Workflow Steps and Code Snippets
372 tagged steps and code snippets that match keyword JSON
Snakemake workflow for building an ANTS template (from antsMultivariateTemplateConstruction2.sh)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | import nibabel as nib import numpy as np img = nib.load(snakemake.input[0]) print('config:') print(snakemake.config) hdr = img.header shape = np.array(hdr.get_data_shape()).astype('float').tolist() zooms = np.array(hdr.get_zooms()).astype('float').tolist() affine = img.affine origin = affine @ np.array([0,0,0,1]).T origin = origin[0:3].astype('float').tolist() template_dict = dict() #add extras from config file template_dict.update(snakemake.config['template_description_extras']) #add shape, zooms, origin, for the resolution template_dict.update( { 'res': {'{res:02d}'.format(res=snakemake.config['resolution_index']) : { 'origin': origin, 'shape': shape, 'zooms': zooms } } } ) import json with open(snakemake.output[0],'w') as outfile: json.dump(template_dict,outfile,indent=2) |
Neuroimaging Data Processing Workflow
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import numpy as np import nibabel as nib import json #load up tissue probability, warped from template tissue_prob_vol = dict() for label,nii in zip(snakemake.config['tissue_labels'], snakemake.input.tissue_priors): print(label) print(nii) tissue_prob_vol[label] = nib.load(nii).get_fdata() #load up k-class tissue segmentation tissue_k_seg = nib.load(snakemake.input.seg_channels_4d) tissue_k_seg.shape sim_prior_k = np.zeros([len(snakemake.config['tissue_labels']),tissue_k_seg.shape[3]]) #for each prior, need to find the channel that best fits for i,label in enumerate(snakemake.config['tissue_labels']): for k in range(tissue_k_seg.shape[3]): print(f'Computing overlap of {label} prior and channel {k}... ') #compute intersection over union s1 = tissue_prob_vol[label] >0.5 s2 = tissue_k_seg.slicer[:,:,:,k].get_fdata() >0.5 sim_prior_k[i,k] = np.sum(np.logical_and(s1,s2).flat) / np.sum(np.logical_or(s1,s2).flat) print('Overlap table:') print(sim_prior_k) label_to_k_dict = dict() for i,label in enumerate(snakemake.config['tissue_labels']): label_to_k_dict[label] = int(np.argmax(sim_prior_k[i,:])) #write nii to file print('writing image at channel {} to output file: {}'.format(label_to_k_dict[label], \ snakemake.output.tissue_segs[i])) nib.save(tissue_k_seg.slicer[:,:,:,label_to_k_dict[label]],\ snakemake.output.tissue_segs[i]) with open(snakemake.output.mapping_json, 'w') as outfile: json.dump(label_to_k_dict, outfile,indent=4) |
workflow for processing ISMRM Diffusion Study - Best Practices in Image Preprocessing data
26 27 28 29 30 31 32 | run: import json with open(input.json) as f: json_dict = json.load(f) json_dict['PhaseEncodingDirection'] = 'j' with open(output.json, 'w') as f: json.dump(json_dict, f,indent=4) |
46 47 48 49 50 51 52 | run: import json with open(input.json) as f: json_dict = json.load(f) json_dict['PhaseEncodingDirection'] = 'j-' with open(output.json, 'w') as f: json.dump(json_dict, f,indent=4) |
template building for cam-can dataset
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | import nibabel as nib import numpy as np import json img = nib.load(snakemake.input[0]) hdr = img.header shape = np.array(hdr.get_data_shape()).astype('float').tolist() zooms = np.array(hdr.get_zooms()).astype('float').tolist() affine = img.affine origin = affine @ np.array([0,0,0,1]).T origin = origin[0:3].astype('float').tolist() template_dict = dict() #add extras from config file template_dict.update(snakemake.config['template_description_extras']) #add shape, zooms, origin, for the resolution template_dict.update( { 'res': {'{res:02d}'.format(res=snakemake.config['resolution_index']) : { 'origin': origin, 'shape': shape, 'zooms': zooms } } } ) #add cohorts to json, with full list of subjects for each cohort.. cohort_dict = dict() for cohort in snakemake.params.cohorts: cohort_dict[cohort] = { 'participants': snakemake.params.subjects[cohort] } template_dict.update( { 'cohort': cohort_dict } ) with open(snakemake.output[0],'w') as outfile: json.dump(template_dict,outfile,indent=2) |
nnUnet training workflow for fetal bold MRI
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | import json #load template json with open(snakemake.input.template_json) as f: dataset = json.load(f) dataset['training'] = [{'image': img, 'label': lbl} for img,lbl in zip(snakemake.params.training_imgs_nosuffix,snakemake.input.training_lbls)] dataset['numTraining'] = len(dataset['training']) #write modified json with open(snakemake.output.dataset_json, 'w') as f: json.dump(dataset, f, indent=4) |
Snakemake workflow for pre-processing dwi data to re-train using b1000 hcp data
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import json import nibabel as nib import numpy as np import sys with open(snakemake.input.shells) as f: shells_dict = json.load(f) #get bval parameter: bval = snakemake.params.bval #input dwi dwi_nib = nib.load(snakemake.input.dwi) print(dwi_nib.shape) #create output shape newshape = np.array(dwi_nib.shape[:3]) avg_shell = np.zeros(newshape.astype(int)) indices = shells_dict['shell_to_vol'][bval] if len(dwi_nib.shape) == 3 and len(indices) == 1 and indices[0] == 0: #we have 3d vol (e.g. b0 only), so just grab it.. avg_shell = dwi_nib.get_fdata()[:,:,:] elif len(dwi_nib.shape) == 4: #otherwise, pick out indices and average avg_shell = np.mean(dwi_nib.get_fdata()[:,:,:,indices],3) else: #if not either of these cases, then something weird with indices and volumes print('unable to get map indices to get avg shell') sys.exit() #now save as image avg_shell_nii = nib.Nifti1Image(avg_shell, affine=dwi_nib.affine ) avg_shell_nii.to_filename(snakemake.output[0]) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | import numpy as np import json #load bvals bvals = np.loadtxt(snakemake.input[0]) #if just single bval (i.e. rev ph enc b0), then just set manually if bvals.size == 1: out_dict = dict() shells = [np.around(bvals,-2).astype('int').tolist()] out_dict['shells'] = shells out_dict['vol_to_shell'] = shells out_dict['shell_to_vol'] = {str(shells[0]): [0]} #point to index 0 #otherwise try to find shells else: shells = [] # histogram is used to find number of shells, with anywhere from 10 to 100 bins # want to use the highest number of bins that doesn't split up the shells # so that the bin centers can then be used as the shell bvalues.. for i,nbins in enumerate( np.arange(10,100,5) ): counts, bin_edges = np.histogram(bvals, bins=nbins ) bin_lhs = bin_edges[:-1] bin_rhs = bin_edges[1:] bin_centers = (bin_lhs + bin_rhs) / 2 shells.append(bin_centers[np.where(counts>0)]) #get number of shells for each bin-width choice nshells = [len(s) for s in shells] print('nshells') print(nshells) #use the highest number of bins that still produces the minimal number of shells min_nshells = np.min(nshells) possible_shells = np.where(nshells == min_nshells)[0] chosen_shells = shells[possible_shells[-1]] #round to nearest 100 chosen_shells = np.around(chosen_shells,-2).astype('int') print('chosen_shells') print(chosen_shells) #write to file #np.savetxt(snakemake.output[0],chosen_shells,fmt='%d') #get bval indices, by mindist to shell #bvals rep_shells = np.tile(chosen_shells,[len(bvals),1]) rep_bvals = np.tile(bvals,[len(chosen_shells),1]).T print(rep_shells) print(rep_bvals) #abs diff between bvals and shells diff = np.abs(rep_bvals - rep_shells) shell_ind = np.argmin(diff,1) shell_ind = chosen_shells[shell_ind] #get a list of indices shells to vols shell_to_vol = dict() for shell in chosen_shells.tolist(): shell_to_vol[shell] = np.where(shell_ind==int(shell))[0].tolist() #chosen_shell out_dict = dict() out_dict['shells'] = chosen_shells.tolist() out_dict['vol_to_shell'] = shell_ind.tolist() out_dict['shell_to_vol'] = shell_to_vol #write to json, shells and their indices with open(snakemake.output[0], 'w') as outfile: json.dump(out_dict, outfile,indent=4) |
Simulation Workflow for Human Allele Frequency Change Study (v1.0.2)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | import demes import os import demesdraw sc = snakemake.wildcards['sc'] class Scenario: pulse_times = [ 150, 130, 110, 90, 70, 50, 30, 10, ] def __init__( self, name: str, N_anc: int, pop_sizes: list[int], pulses: list[list], path: str ): assert len(pulses[0]) == N_anc assert len(pop_sizes) == (N_anc + 1) assert len(pulses) == len(self.pulse_times) self.name = name self.N_anc = N_anc self.pop_sizes = pop_sizes self.pulses = pulses self.file_prefix = path + '/scenario_' + name self.plot = f"{self.file_prefix}.svg" def build(self): b = demes.Builder( description=self.name, time_units="generations", generation_time=1, ) b.add_deme( "Pop0", description="Ancestral 1", epochs=[dict(end_time=0, start_size=self.pop_sizes[0])], ) start = 1500 for i in range(1, self.N_anc): b.add_deme( f"Pop{i}", description=f"Ancestral {i + 1}", ancestors=["Pop0"], start_time=start, epochs=[dict(end_time=0, start_size=self.pop_sizes[i])], ) start -= 200 b.add_deme( f"Pop{self.N_anc}", description="Admixed", ancestors=["Pop0"], start_time=200, epochs=[dict(end_time=0, start_size=self.pop_sizes[self.N_anc])], ) for t, p in zip(self.pulse_times, self.pulses): b.add_pulse( sources=[f"Pop{i}" for i in range(self.N_anc)], dest=f"Pop{self.N_anc}", proportions=p, time=t, ) self.graph = b.resolve() demes.dump(self.graph, self.file_prefix + '.yaml') demes.dump(self.graph, self.file_prefix + '.json', format='json', simplified=False) ax = demesdraw.tubes(self.graph, log_time=True) ax.figure.savefig(self.plot) # ensure pulses 0s are floats! sc_dict = dict() # Scenario 2NGF (No Gene Flow) sc_dict['2NGF'] = Scenario( name="2NGF", N_anc=2, pop_sizes=[10_000, 10_000, 10_000], pulses=[ [.0, .0], [.0, .0], [.0, .0], [.0, .0], [.0, .0], [.0, .0], [.0, .0], [.0, .0], ], path=snakemake.params['outdir'], ) # Scenario 2A sc_dict['2A'] = Scenario( name="2A", N_anc=2, pop_sizes=[10_000, 10_000, 10_000], pulses=[ [.0, 0.2], [.0, 0.2], [.0, 0.2], [.0, .0], [.0, .0], [.0, 0.2], [.0, 0.2], [.0, 0.2], ], path=snakemake.params['outdir'], ) # Scenario 2B sc_dict['2B'] = Scenario( name="2B", N_anc=2, pop_sizes=[10_000, 10_000, 10_000], pulses=[ [.0, 0.2], [.0, 0.2], [.0, 0.2], [.2, .0], [.2, .0], [.0, 0.2], [.0, 0.2], [.0, 0.2], ], path=snakemake.params['outdir'], ) # Scenario 2C sc_dict['2C'] = Scenario( name="2C", N_anc=2, pop_sizes=[10_000, 1_000, 5_000], pulses=[ [.0, 0.2], [.0, 0.2], [.0, 0.2], [.2, .0], [.2, .0], [.0, 0.2], [.0, 0.2], [.0, 0.2], ], path=snakemake.params['outdir'], ) # Scenario 3A sc_dict['3A'] = Scenario( name="3A", N_anc=3, pop_sizes=[10_000, 10_000, 10_000, 10_000], pulses=[ [.0, .2, .0], [.0, .2, .0], [.0, .2, .0], [.2, .0, .0], [.0, .0, .0], [.0, .0, .2], [.0, .0, .2], [.0, .0, .2], ], path=snakemake.params['outdir'], ) # Scenario 3B sc_dict['3B'] = Scenario( name="3B", N_anc=3, pop_sizes=[5_000, 1_000, 10_000, 5_000], pulses=[ [.0, .2, .0], [.0, .0, .2], [.0, .2, .0], [.2, .0, .0], [.2, .0, .0], [.0, .0, .2], [.0, .2, .0], [.0, .0, .2], ], path=snakemake.params['outdir'], ) sc_dict[sc].build() |
Assembly pipeline for 10X chromium genomes of Mytilus species (v1.0)
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | run: import subprocess import json if os.path.exists(output.fasta): os.remove(output.fasta) # Get clusters for Mollusca taxid=6447 url = 'http://www.orthodb.org/search?level=6447&limit=50000' cmd = f'wget "{url}" -O {output.clustids}' subprocess.run(cmd, shell=True, check=True, stderr=subprocess.DEVNULL) # Loop for each cluster with open(output.clustids, 'r') as fr: clusters = json.load(fr) for C_id in clusters['data']: url = f'http://www.orthodb.org/fasta?id={C_id}&species=all' cmd = f'wget "{url}" -O - >> {output.fasta}' subprocess.run(cmd, shell=True, check=True, stderr=subprocess.DEVNULL) |
format / edam
JSON
JavaScript Object Notation format; a lightweight, text-based format to represent tree-structured data using key-value pairs.