Download files and directories using iBridges#
This sections shows how you can download stuff from iRODS using iBridges. These are just a few examples on how you could use it within Python.
For the full documentation and usage go to:
https://ibridges.readthedocs.io/en/stable/
git repository:
https://github.com/UtrechtUniversity/iBridges/
Authentication#
All iRODS clients (icommands and APIs) expect the above parameters to be stored in a special folder. This folder is called .irods and it lies in your home directory:
Mac: /Users/<user>/.irods
Linux: /home/<user>/.irods
Windows: C:\Users\<user>\.irods
You can store the irods_environment.json in that folder and make sure that its extension is json.
Again under Windows the text editors usually save files with the .txt extension. So please watch out for this. Below we provide a code snippet which saves your personal UNLOCK iRODS information in the right place.
# Set irods environment location and username
irods_env_dir = "~/.irods"
irods_env_file = "irods_environment.json"
username = <SRAM username>
If you already have an environment file in place, you can skip the next cell and go to “Start a session”
from pathlib import Path
import json
# CREATE above defined irods environment directory if not does exist yet
irods_env_dir = Path.expanduser(Path(irods_env_dir))
if not irods_env_dir.exists():
irods_env_dir.mkdir()
# Set irods_environment.json file as save in the .irods folder.
env = {
"irods_host": "unlock-icat.irods.surfsara.nl",
"irods_port": 1247,
"irods_user_name": username,
"irods_zone_name": "unlock",
"irods_authentication_scheme": "pam",
"irods_encryption_algorithm": "AES-256-CBC",
"irods_encryption_key_size": 32,
"irods_encryption_num_hash_rounds": 16,
"irods_encryption_salt_size": 8,
"irods_client_server_policy": "CS_NEG_REQUIRE",
"irods_client_server_negotiation": "request_server_negotiation"
}
env_file = Path.expanduser(Path(irods_env_dir)).joinpath("irods_environment.json")
with open(env_file, 'w') as write_json:
#json.dump(env, write_json,indent=2)
json.dump(env, write_json,indent=2)
if Path.is_file(env_file):
print("Created environment file at", env_file)
else:
print("Failed to created environment file at", env_file)
Start a session! It will ask you for the SRAM token#
from pathlib import Path
from ibridges import Session
from getpass import getpass
env_loc = irods_env_dir+"/"+irods_env_file
env_file = Path.expanduser(Path(env_loc))
password = getpass()
session = Session(env_file, password=password)
if session:
print("Session succesfully established")
Downloading files#
investigation = <investigation>
study = <study>
Create a local download directory#
from ibridges import IrodsPath
# Define where to download files locally
download_path = "./unlock_downloads/"+investigation+"/"+study
# Create the directory if it doesn't exist yet
download_dir = Path.expanduser(Path(download_path))
download_dir.mkdir( parents=True, exist_ok=True )
Download a single file or directory#
Use the full iRODS path
You will receive a dictionary with changes, which you can also retrieve beforehand with the option dry_run=True.
Existing local data will not be overwritten. Please use the option overwrite=True if you want to overwrite your local data
from ibridges import download
irods_file = Path("/unlock/home/wur.fdp/stu_bmock12_prjna496047/obs_bmock12_mocktest_cwl/sam_bmock12_synthetic_metagenome/metagenomic_other_illumina/asy_illumina_srr8073716/data/SRR8073716_1.fastq.gz")
download(session, irods_file, download_dir)
irods_dir = Path("/unlock/home/wur.fdp/stu_bmock12_prjna496047/obs_bmock12_mocktest_cwl/sam_bmock12_synthetic_metagenome/metagenomic_other_illumina/asy_illumina_srr8073716")
download(session, irods_dir, download_dir)
Download multiple files and directories with a search#
Likely you would like to download multiple files or directories (collections in iRODS)
For directories:
This will download directories that will have a hit with your “search”.
It will skip the download when the directory exist AND is not empty.
Set the variable overwrite to True to change this behaviour.
“%” denote wildcards in your search string
import os
from ibridges import IrodsPath
from ibridges import search_data
from ibridges import download
search = "/unlock/home/wur."+investigation+"/stu_"+study+"%3_PICRUSt2"
data = search_data(session, path=IrodsPath(session, search))
overwrite = False
# set counters
downloaded, skipped = 0,0
unique_folders = []
for item in data:
run = item["COLL_NAME"].split("/")[-2]
local_destination = Path.expanduser(Path(download_path+"/"+run))
if not local_destination.exists() or overwrite:
local_destination.mkdir( parents=True)
download(session, item["COLL_NAME"], local_destination, overwrite=overwrite)
downloaded += 1
elif len(os.listdir(local_destination)) == 0:
download(session, item["COLL_NAME"], local_destination, overwrite=overwrite)
downloaded += 1
else:
if item["COLL_NAME"] not in unique_folders:
skipped += 1
if item["COLL_NAME"] not in unique_folders: unique_folders.append(item["COLL_NAME"])
print("\nDownloaded: ", downloaded)
print("Skipped: ", skipped)
print("Total",len(unique_folders))
For files:
from ibridges import IrodsPath
from ibridges import search_data
from ibridges import download
from pathlib import Path
import re, os
search = "/unlock/home/wur."+investigation+"/stu_"+study+"%2_Classification/%"
data = search_data(session, path=IrodsPath(session, search))
# Download the desired files
# in this case only files that have ".ttl" in the file name. (the pattern is a regex)
overwrite = False
pattern = ".ttl"
downloaded,skipped = 0,0
for i, item in enumerate(data):
# if i < 10:
if re.search(pattern, item["DATA_NAME"]):
data_name = item["DATA_NAME"]
irods_path = IrodsPath(session, item["COLL_NAME"],item["DATA_NAME"])
local_destination = Path.expanduser(Path(download_path+"/"+"/"+data_name))
if not os.path.isfile(str(local_destination)) or overwrite:
print("Downloading ", data_name)
download(session, irods_path, local_destination, overwrite=overwrite)
downloaded += 1
else:
print("Skipped ", data_name)
skipped += 1
print("\nDownloaded: ", downloaded)
print("Skipped: ", skipped)
print("Total: ", downloaded+skipped)