Workflow Steps and Code Snippets

24214 tagged steps and code snippets from all workflows

Snakemake workflow: Reconstructing raw tomography data using tomopy. (Version 1)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import tomopy
import dxchange
import numpy as np
from glob import glob

def reconstruction(path_to_output, tifFile, numDarks, numFlats, numProjs, rot_center_init):
    #Load Raw Data into Memory
    tifRef = tifFile
    firstIndex = 1
    #Read Darks
    darkStart = firstIndex
    darkStop = darkStart + numDarks 
    dark = dxchange.reader.read_tiff_stack(tifRef, [i for i in range(darkStart, darkStop)])
    #Read Flats
    flatStart = darkStop
    flatStop = flatStart + numFlats
    flat = dxchange.reader.read_tiff_stack(tifRef, [i for i in range(flatStart, flatStop)])
    #Read Projections
    projStart = flatStop
    projStop = projStart + numProjs
    proj = dxchange.reader.read_tiff_stack(tifRef, [i for i in range(projStart, projStop)])
    print ("Shape of projection cube", proj.shape)
    #Define equiangular angles (angles here not part of raw data)
    theta = tomopy.angles(proj.shape[0])
    #Start Reconstruction
    #Correct for dark and flat
    threads = 12
    proj = tomopy.normalize(proj, flat, dark, ncore=threads)
    proj = tomopy.minus_log(proj, ncore=threads)
    rot_center = rot_center_init
    #Inverse Radon
    recon = tomopy.recon(proj, theta, center=rot_center, algorithm='gridrec', sinogram_order=False, ncore=threads)
    #Cosmetics
    recon = tomopy.circ_mask(recon, axis=0, ratio=0.95, ncore=threads)
    print("Reconstruction finished")
    dxchange.write_tiff_stack(recon, fname=path_to_output + '/recon')
    print ("Finished")

def main():
    path_to_directory = snakemake.input[0]
    fnames = glob(path_to_directory + '/' + '*.tif')
    tiff_file_example = fnames[0]
    path_to_output = snakemake.output[0]
    numDarks_magma = snakemake.params.numDarks
    numFlats_magma = snakemake.params.numFlats
    numProjs_magma = snakemake.params.numProjs
    rot_center_magma = snakemake.params.rotCenter
    reconstruction(path_to_output, tiff_file_example, numDarks_magma, numFlats_magma, numProjs_magma, rot_center_magma)

if __name__ == '__main__':
    main()
42
43
wrapper:
    "0.59.2/bio/multiqc"
38
39
shell:
    "samtools dict {input} > {output} 2> {log} "
70
71
shell:
    "rbt vcf-fix-iupac-alleles < {input} | bcftools view -Oz > {output}"
84
85
wrapper:
    "0.59.2/bio/tabix"
10
11
12
13
shell:
    "(bcftools view --apply-filters PASS --output-type u {input} | "
    "rbt vcf-to-txt -g --fmt DP AD --info ANN | "
    "gzip > {output}) 2> {log}"
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import sys
sys.stderr = open(snakemake.log[0], "w")

import common
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

calls = pd.read_table(snakemake.input[0], header=[0, 1])
samples = [name for name in calls.columns.levels[0] if name != "VARIANT"]
sample_info = calls.loc[:, samples].stack([0, 1]).unstack().reset_index(1, drop=False)
sample_info = sample_info.rename({"level_1": "sample"}, axis=1)

sample_info = sample_info[sample_info["DP"] > 0]
sample_info["freq"] = sample_info["AD"] / sample_info["DP"]
sample_info.index = np.arange(sample_info.shape[0])

plt.figure()

sns.stripplot(x="sample", y="freq", data=sample_info, jitter=True)
plt.ylabel("allele frequency")
plt.xticks(rotation="vertical")

plt.savefig(snakemake.output.freqs)

plt.figure()

sns.stripplot(x="sample", y="DP", data=sample_info, jitter=True)
plt.ylabel("read depth")
plt.xticks(rotation="vertical")

plt.savefig(snakemake.output.depths)
42
43
wrapper:
    "0.59.2/bio/multiqc"