Download files and directories using iBridges#
This sections shows how you can download stuff from iRODS using iBridges. These are just a few examples on how you could use it within Python.
For the full documentation and usage go to:
https://ibridges.readthedocs.io/en/stable/
git repository:
https://github.com/UtrechtUniversity/iBridges/
Installation#
pip install ibridges
Authentication#
All iRODS clients (icommands and APIs) expect the above parameters to be stored in a special folder. This folder is called .irods and it lies in your home directory:
Mac: /Users/<user>/.irods
Linux: /home/<user>/.irods
Windows: C:\Users\<user>\.irods
You can store the irods_environment.json in that folder and make sure that its extension is json.
Again under Windows the text editors usually save files with the .txt extension. So please watch out for this. Below we provide a code snippet which saves your personal UNLOCK iRODS information in the right place.
# Set iRODS environment directory and environment file
irods_env_dir = "~/.irods"
irods_env_file = "irods_environment.json"
# Provide your SRAM username
username = "<SRAM username>"
If you already have an environment file in place, you can skip the next cell and go to “Start a session”
from pathlib import Path
import json
# CREATE above defined irods environment directory if not does exist yet
irods_env_dir = Path.expanduser(Path(irods_env_dir))
if not irods_env_dir.exists():
irods_env_dir.mkdir()
# Set irods_environment.json file as save in the .irods folder.
env = {
"irods_host": "data.m-unlock.nl",
"irods_port": 1247,
"irods_user_name": "<SRAM username",
"irods_zone_name": "unlock",
"irods_authentication_scheme": "pam_password",
"irods_encryption_algorithm": "AES-256-CBC",
"irods_encryption_key_size": 32,
"irods_encryption_num_hash_rounds": 16,
"irods_encryption_salt_size": 8,
"irods_client_server_policy": "CS_NEG_REQUIRE",
"irods_client_server_negotiation": "request_server_negotiation"
}
env_file = Path.expanduser(Path(irods_env_dir)).joinpath("irods_environment.json")
with open(env_file, 'w') as write_json:
#json.dump(env, write_json,indent=2)
json.dump(env, write_json,indent=2)
if Path.is_file(env_file):
print("Created environment file at", env_file)
else:
print("Failed to created environment file at", env_file)
Start a session! It will ask you for the SRAM token#
from pathlib import Path
from ibridges import Session
from getpass import getpass
env_loc = Path(irods_env_dir) / irods_env_file
env_file = Path.expanduser(Path(env_loc))
password = getpass()
session = Session(env_file, password=password)
if session:
print("Session succesfully established")
Downloading files#
# Define investigation and study variables
investigation = <investigation>
study = <study>
Create a local download directory#
from ibridges import IrodsPath
# Define where to download files locally
download_path = "./unlock_downloads/"+investigation+"/"+study
# Create the directory if it doesn't exist yet
download_dir = Path.expanduser(Path(download_path))
download_dir.mkdir( parents=True, exist_ok=True )
Download a single file or directory#
Use the full iRODS path
You will receive a dictionary with changes, which you can also retrieve beforehand with the option dry_run=True.
Existing local data will not be overwritten. Please use the option overwrite=True if you want to overwrite your local data
from ibridges import download
# Define the file path on iRODS and queue download to local directory
irods_file = Path(f"/unlock/home/wur.{investigation}/stu_{study}/path/to/file.file_extension")
download(session, irods_file, download_dir)
# Define the directory path on iRODS and queue download of all files to local directory
irods_dir = Path(f"/unlock/home/wur.{investigation}/stu_{study}/path/to/directory")
download(session, irods_dir, download_dir)
Download multiple files and directories with a search#
Likely you would like to download multiple files or directories (collections in iRODS)
For directories:
This will recursively download directories that will have a hit with your “path_pattern” search.
It will skip the download when the directory exist AND is not empty.
Set the variable overwrite to True to change this behaviour.
“%” denote wildcards in your search string
from ibridges import IrodsPath, search_data, download
from pathlib import Path
import os
# Define specified folder pattern (e.g. specific PICRUSt output)
folder_pattern="%3_PICRUSt2"
# Define search path
search = f"/unlock/home/wur.{investigation}/stu_{study}/"
data = search_data(session, path=IrodsPath(session, search), path_pattern=folder_pattern)
# Set options and counters
overwrite = False
downloaded, skipped = 0,0
unique_folders = []
# Iterate over search results and download folders
for item in data:
irods_path = IrodsPath(session, item) # Create an IrodsPath object
run = irods_path.parent.name # Extract the parent folder name
local_destination = Path(download_path) / run # Define local destination path
if item.collection_exists(): # Only process directories (collections)
if not local_destination.exists() or overwrite:
local_destination.mkdir(parents=True, exist_ok=True)
download(session, item, local_destination, overwrite=overwrite)
downloaded += 1
elif len(os.listdir(local_destination)) == 0:
download(session, item, local_destination, overwrite=overwrite)
downloaded += 1
else:
skipped += 1
elif item not in unique_folders:
unique_folders.append(item)
# Print download summary
print("\nDownloaded: ", downloaded)
print("Skipped: ", skipped)
print("Total unique folders processed:", len(unique_folders))
For files:
from ibridges import IrodsPath, search_data, download
from pathlib import Path
import os
# Define specified file pattern (e.g. ttl files)
pattern="%.ttl"
# Define search path
search = f"/unlock/home/wur.{investigation}/stu_{study}/"
data = search_data(session, path=IrodsPath(session, search), path_pattern=pattern)
# Set options and counters
overwrite = False
downloaded, skipped = 0,0
unique_folders = []
# Iterate over search results and download files
for item in data:
path = str(item.absolute()) # Convert path to string
local_destination = Path(download_path) # Base local directory
file_destination = local_destination / item.name # Construct local file path
if item.dataobject_exists(): # Check if file
local_destination.mkdir(parents=True, exist_ok=True)
if not file_destination.exists() or overwrite: # Download only if not present
download(session, item, file_destination, overwrite=overwrite)
downloaded += 1
else:
skipped += 1
# Print download summary
print("\nDownloaded: ", downloaded)
print("Skipped: ", skipped)