Snakemake workflow for downloading and converting MRI data to BIDS

public 1yr ago 0 bookmarks

View Workflow

Snakemake workflow for downloading and converting EpLink MRI data to BIDS. See http://github.com/khanlab/eplink-spred2bids-eeg for EEG workflow.

Prerequisites:

You need to have BrainCODE SPRED (XNAT) access to the Eplink EPL31 project. Put your username and password in environment variables named SPRED_USER and SPRED_PASS respectively.

Instructions:

Clone this repository and install with pip install <path_to_cloned_repo>
Update the config file to change the tmp_download folder to a local disk with large enough space.
Run snakemake with a dry-run first:

snakemake -np

If everything looks fine, run with the specified number of parallel cores, e.g. 4 as below:

snakemake --cores 4

Code Snippets

script:
    '../scripts/download_zip.py'

SnakeMake From line 29 of rules/download.smk

script: 
    '../scripts/convert_dcm_to_bids.py'

SnakeMake From line 42 of rules/download.smk

script: '../scripts/create_bval_bvec_pepolar.py'

SnakeMake From line 56 of rules/download.smk

run: 
    for out_dd_json in output.dd_jsons:
        shell('cp {input.dd_json} {out_dd_json}')
    for out_rest_json in output.rest_jsons:
        shell('cp {input.rest_json} {out_rest_json}')
    for out_movie_json in output.movie_jsons:
        shell('cp {input.movie_json} {out_movie_json}')
    for out_bidsignore in output.bidsignores:
        shell('cp {input.bidsignore} {out_bidsignore}')

SnakeMake From line 72 of rules/download.smk

script: 
    '../scripts/get_subject_by_suffix_table.py'

SnakeMake From line 11 of rules/init_lists.smk

script: 
    '../scripts/get_subjects_table.py'

SnakeMake From line 19 of rules/init_lists.smk

run:
    from glob import glob
    from pathlib import Path
    bad_logs = [Path(bad_log).name for bad_log in sorted(glob(f'{input.log_dir}/sub-*_ses-*_{wildcards.suffix}.txt'))]
    sessions = [bad_log.split('_')[1].split('-')[1] for bad_log in bad_logs]
    sites = [bad_log.split('_')[0].split('-')[1][:3] for bad_log in bad_logs]
    subjects = [bad_log.split('_')[0].split('-')[1][3:] for bad_log in bad_logs]
    pd.DataFrame({'site':sites,'subject':subjects,'session':sessions}).to_csv(output.tsv,sep='\t',index=False)

SnakeMake From line 28 of rules/init_lists.smk

run:
    df_all=pd.read_table(input.all_tsv,dtype={'subject':str})
    df_failed=pd.read_table(input.failed_tsv,dtype={'subject':str})

    # Merge on the three columns with an indicator
    merged_df = pd.merge(df_all, df_failed, on=['subject', 'site', 'session'], how='outer', indicator=True)

    # Filter rows where the merge is only present in the left dataframe (df_all)
    df1_filtered = merged_df[merged_df['_merge'] == 'left_only']

    # Remove the indicator column
    df1_filtered = df1_filtered.drop(columns=['_merge'])
    df1_filtered.to_csv(output.tsv,sep='\t',index=False)

SnakeMake From line 44 of rules/init_lists.smk

from snakemake.shell import shell
from glob import glob
import tempfile
import shutil
from pathlib import Path

if isinstance(snakemake.output.nii,str):
    nii_list=[snakemake.output.nii]
else:
    nii_list=list(snakemake.output.nii)

#get list of extensions expected
out_exts=list(set([''.join(Path(f).suffixes) for f in snakemake.output]))
print(out_exts)
print(nii_list)


with tempfile.TemporaryDirectory() as tmp_dir:

    #unzip to tmp_dir
    shell('unzip -d {tmp_dir} {snakemake.input.zip_file}')

    #run dcm2niix, outputting to the tmp_dir root:
    shell('./deps/dcm2niix -d 9 -z y -f output {tmp_dir} &> {snakemake.log} ')

    #remove the log if we reach this point (ie if the shell command was successful
    shell('rm -f {snakemake.log}')

    #this nominally creates an output.nii.gz file (along with .json), 
    # but dcm2niix may add an unknown suffix to the output file too, so we need to 
    # glob, grab list of nii.gz files, then get the basename
    base_names = [ str(Path(nii).name).split('.')[0] for nii in sorted(glob(f'{tmp_dir}/output*.nii.gz'))]

    print(base_names)
    print(nii_list)
    #now, copy each {tmp_dir}/{out_name}{ext} to {out_dir}/{out_name}{out_ext}

    for base_name, nii in zip(base_names,nii_list):

        out_dir=Path(nii).parent

        #get filename without folder or .nii.gz
        out_name=str(Path(nii).name).split('.')[0] 


        out_dir.mkdir(exist_ok=True)
        for ext in out_exts:
            src=Path(tmp_dir) / (base_name + ext)
            dest=out_dir / (out_name + ext)
            print(f'copying {src} to {dest}')
            shutil.copy(src,dest)

Python Snakemake From line 1 of scripts/convert_dcm_to_bids.py

import nibabel as nib

def create_bval_bvec_files(input_file, output_bval, output_bvec):
    img = nib.load(input_file)
    if len(img.shape) == 4:
        N=img.shape[-1]
    else:
        N=1
    print(N)
    with open(output_bval, 'w') as f:
        for _ in range(N):
            f.write('0 ')
        f.write('\n')

    with open(output_bvec, 'w') as f:
        for _ in range(3):
            for _ in range(N):
                f.write('0 ')
            f.write('\n')

# Usage:
create_bval_bvec_files(snakemake.input.nii, snakemake.output.bval, snakemake.output.bvec)

Python nibabel From line 1 of scripts/create_bval_bvec_pepolar.py

import pandas as pd
import xnat
import os
df = pd.read_table(snakemake.input.tsv,dtype={'subject':str}).query(snakemake.params.query)
selected_uris = df['scan_uri'].to_list()
uri=selected_uris[0]

with xnat.connect(snakemake.config['spred_url'],user=os.environ['SPRED_USER'],password=os.environ['SPRED_PASS']) as xnat_connection:
    scanobj = xnat_connection.create_object(uri)
    scanobj.download(snakemake.output.zip_file)

Python Pandas xnat From line 1 of scripts/download_zip.py

import pandas as pd

df = pd.read_table(snakemake.input.tsv,dtype={'subject':str})
df = df.query(snakemake.params.query)
df.sort_values(by=['site','subject','session']).to_csv(snakemake.output.tsv,sep='\t',index=False)

Python Pandas From line 1 of scripts/get_subject_by_suffix_table.py

import xnat
import pandas as pd
import os

xnat_connection = xnat.connect(snakemake.config['spred_url'],user=os.environ['SPRED_USER'],password=os.environ['SPRED_PASS'])
project_id = snakemake.config['project_id']

df = pd.DataFrame(columns=['site','subject','session','series_description','scan_uri'])

for session in snakemake.config['session_lut'].keys():

    mr_session = snakemake.config['session_lut'][session]
    for site in snakemake.config['sites']:

        site_id = f'{project_id}_{site}'
        subjects = [row[0] for row in xnat_connection.projects[site_id].subjects.tabulate(columns=['label'])]

        for subject in subjects:

            subject_id = subject.split('_')[2] #strip off all but numeric part of ID

            try:
                exp_uri=f'/data/projects/{site_id}/experiments/{subject}_{mr_session}_SE01_MR'
                print(exp_uri)
                exp = xnat_connection.create_object(f'/data/projects/{site_id}/experiments/{subject}_{mr_session}_SE01_MR')
            except:
                #print(f'exception: {subject} does not have mri')
                continue

            #now get the scans
            scans = exp.scans
            for scan in scans.values():
                scan_uri = scan.uri
#                scan_name = scan.type
                scan_metadata = scan.data
                series_description = scan_metadata.get('series_description','')

                if 'parameters/imageType' in scan_metadata:
                    image_type = scan_metadata['parameters/imageType'].split('\\\\')
                    image_types = {f'image_type_{i}':[image_type[i]] for i in range(len(image_type))}
                else:
                    image_types = {}

#   {'series_description': 'gre_field_mapping', 'scanner/manufacturer': 'SIEMENS', 'image_session_ID': 'spred_E33978', 'type': 'gre_field_mapping', 'xnat_imageScanData_id': 103831, 'parameters/voxelRes/z': 3, 'xnat_imagescandata_id': 103831, 'parameters/voxelRes/x': 3, 'parameters/voxelRes/y': 3, 'scanner': 'MRC35368', 'startTime': '15:36:25', 'parameters/imageType': 'ORIGINAL\\\\PRIMARY\\\\M\\\\ND', 'ID': '17', 'parameters/flip': 60, 'parameters/seqVariant': 'SP', 'parameters/pixelBandwidth': 290, 'frames': 92, 'parameters/fov/y': 80, 'scanner/model': 'Prisma_fit', 'parameters/fov/x': 80, 'parameters/tr': 500, 'quality': 'unknown', 'UID': '1.3.12.2.1107.5.2.43.67007.2019103015345663164066300.0.0.0', 'parameters/scanSequence': 'GR', 'parameters/sequence': '*fm2d2r', 'parameters/orientation': 'Tra', 'fieldStrength': '3.0', 'parameters/acqType': '2D'}


                df = pd.concat([df,pd.DataFrame({'subject':[subject_id],'site':[site],'session':[session],**image_types,'series_description':[series_description],'scan_uri':[scan_uri]})],ignore_index=True)

df.to_csv(snakemake.output.tsv,sep='\t',index=False)

xnat_connection.disconnect()