FABLE: Automated and reproducible snakemake workflow

public public 1yr ago 0 bookmarks

FABLE is an automated and reproducible snakemake workflow tailored to Oxford Nanopore Sequencing reads. After easy installation

Code Snippets

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pathlib, os

# Create an empty dataframe 
df = pd.DataFrame()

# get list of benchmark files 
bench_files = pathlib.Path("benchmarks").glob("*.tsv")

for bench_tsv_file in bench_files:
    tmp_df = pd.read_csv(bench_tsv_file, sep='\t')
    tmp_df.insert(0,"Process",bench_tsv_file.stem)
    df = df.append(tmp_df,ignore_index=True)
    df['Process'] = pd.Categorical(df.Process, categories=["Porechop", "NanoQ", "FastQC", "Pre-alignment_NanoPlot", "Minimap2", "Post-alignment_NanoPlot" ], ordered=True)
    df=df.sort_values('Process')
    df = df.reset_index(drop=True)

# replace negligible values to 0
df = df.replace('-', np.NaN)
df=df.replace(np.nan, 0)

df['cumsum_cpu_time'] = df['cpu_time'].cumsum()
df['cumsum_max_vms']=df['max_vms'].cumsum()

print(df)
x_idx = np.arange(df.shape[0])

fig, (ax1,ax2)= plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

# c = ['hotpink', 'palevioletred', 'pink', 'plum','thistle','lavender']
c = ['thistle','palevioletred','pink','lavender','lightsteelblue','powderblue']

# bar plot for cpu_time on ax1
df.plot(x='Process', y='cpu_time', kind='bar', color=c, ax=ax1)

# line plot for cumsum_cpu_time on ax1
df.plot(x='Process', y='cumsum_cpu_time', kind='line', marker='o', markersize=2, color='black', ax=ax1)

# bar plot for max_vms on ax2
df.plot(x='Process', y='max_vms', kind='bar', color=c, ax=ax2)

# line plot for cumsum_max_vms on ax2
df.plot(x='Process', y='cumsum_max_vms', kind='line', marker='o', markersize=2, color='black', ax=ax2)

plt.tight_layout()
ax1.title.set_text('CPU time')
ax2.title.set_text('Maximum VMS')
ax1.set_xticklabels(df['Process'],rotation = 30, horizontalalignment='right', fontsize='small')
ax1.set_ylabel('Seconds')
ax2.set_xticklabels(df['Process'],rotation = 30, horizontalalignment='right', fontsize='small')
ax2.set_ylabel('MegaBytes')
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1.17), fontsize=7, fancybox=True, ncol=1)
ax2.legend(loc='upper left', bbox_to_anchor=(0, 1.17), fontsize=7, fancybox=True, ncol=1)
plt.savefig(snakemake.output[0], bbox_inches="tight")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pathlib, os

#Create an empty dataframe first.
df = pd.DataFrame()

# get list of benchmark files in the 
bench_files = pathlib.Path("benchmarks").glob("*.tsv")
# bench_files = sorted(bench_files, key=os.path.getmtime)

#for bench_tsv_file in pathlib.Path('./workdir_mm2/benchmarks').glob("*.tsv"):
for bench_tsv_file in bench_files:
    #print(bench_tsv_file)
    tmp_df = pd.read_csv(bench_tsv_file, sep='\t')
    tmp_df.insert(0,"Process",bench_tsv_file.stem)
    df = df.append(tmp_df,ignore_index=True)
    df['Process'] = pd.Categorical(df.Process, categories=["Porechop", "NanoQ", "FastQC", "Pre-alignment_NanoPlot", "Vulcan", "Post-alignment_NanoPlot" ], ordered=True)
    df=df.sort_values('Process')
    df = df.reset_index(drop=True)

df['cumsum_cpu_time'] = df['cpu_time'].cumsum()
df['cumsum_max_vms']=df['max_vms'].cumsum()

print(df)
x_idx = np.arange(df.shape[0])

fig, (ax1,ax2)= plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

# c = ['hotpink','palevioletred', 'pink', 'plum','thistle','lavender']
c = ['thistle','palevioletred','pink','lavender','lightsteelblue','powderblue']


# bar plot for cpu_time on ax1
df.plot(x='Process', y='cpu_time', kind='bar', color=c, ax=ax1)

# line plot for cumsum_cpu_time on ax1
df.plot(x='Process', y='cumsum_cpu_time', kind='line', marker='o', markersize=2, color='black', ax=ax1)

# bar plot for max_vms on ax2
df.plot(x='Process', y='max_vms', kind='bar', color=c, ax=ax2)

# line plot for cumsum_max_vms on ax2
df.plot(x='Process', y='cumsum_max_vms', kind='line', marker='o', markersize=2, color='black', ax=ax2)

plt.tight_layout()
ax1.title.set_text('CPU time')
ax2.title.set_text('Maximum VMS')
ax1.set_xticklabels(df['Process'],rotation = 30, horizontalalignment='right', fontsize='small')
ax1.set_ylabel('Seconds')
ax2.set_xticklabels(df['Process'],rotation = 30, horizontalalignment='right', fontsize='small')
ax2.set_ylabel('MegaBytes')
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1.17), fontsize=7, fancybox=True, ncol=1)
ax2.legend(loc='upper left', bbox_to_anchor=(0, 1.17), fontsize=7, fancybox=True, ncol=1)
plt.savefig(snakemake.output[0], bbox_inches="tight")
36
37
shell: 
    "porechop -i {input} -t {threads} -o {output}"
65
66
shell:
    "fastqc --threads {threads} --outdir {params.outdir} {input}"
80
81
shell:
    "NanoPlot -t {threads} --N50 --fastq {input} --outdir {params.outdir} -p {params.prefix}"
95
96
shell:
    "vulcan -ont -t {threads} -r {input.ref} -i {input.fastq} -o {params.prefix}"
103
104
shell:
    "samtools index {input}"
118
119
shell:
    "NanoPlot -t {threads} --N50 --bam {input} -p {params.prefix} -o {params.outdir}"
126
127
shell:
    "bedtools genomecov -ibam {input} -bg > {output}"
134
135
shell:
    "faSize -detailed -tab {input} > {output}"
143
144
shell:
    "bedGraphToBigWig {input.bedgraph} {input.chrsize} {output}"
151
152
shell:
    "samtools stats {input} | grep ^SN | cut -f 2- > {output}"
159
160
shell:
    "mv {input} {output}"
167
168
shell:
    "mv {input} {output}"
177
178
script:
    "scripts/vulcan_plot_benchmark.py"
189
190
shell:
    "multiqc {input} -o report/MultiQC"
201
202
shell:
    "minimap2 -ax map-ont {input.ref} {input.fastq} > {output}"
209
210
shell:
    "samtools sort {input} -o {output}"
217
218
shell:
    "samtools index {input}"
232
233
shell:
    "NanoPlot -t {threads} --N50 --bam {input} -p {params.prefix} -o {params.outdir}"
240
241
shell:
    "bedtools genomecov -ibam {input} -bg > {output}"
248
249
shell:
    "faSize -detailed -tab {input} > {output}"
257
258
shell:
    "bedGraphToBigWig {input.bedgraph} {input.chrsize} {output}"
265
266
shell:
    "samtools stats {input} | grep ^SN | cut -f 2- > {output}"
273
274
shell:
    "mv {input} {output}"
281
282
shell:
    "mv {input} {output}"
291
292
script:
    "scripts/mm2_plot_benchmark.py"
303
304
shell:
    "multiqc {input} --outdir report/MultiQC"
ShowHide 19 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/kaiseriskera/FABLE
Name: fable
Version: 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: Apache License 2.0
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...