Snakemake workflow: Bioinfo_Macro_Microbial_Metatranscriptomics

public public 1yr ago 0 bookmarks

Snakemake workflow: Bioinfo_Macro_Microbial_Metatranscriptomics

A Snakemake workflow for <description>

Usage

The usage of this workflow is described in the

Code Snippets

17
18
19
20
21
22
23
24
25
shell:
    """
    bowtie2-build \
        --threads {threads} \
        {params.extra} \
        {input.reference} \
        {output.prefix} \
    2> {log} 1>&2
    """
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
shell:
    """
    (bowtie2 \
        -x {params.index_prefix} \
        -1 {input.forward_} \
        -2 {input.reverse_} \
        --threads {threads} \
        --rg-id '{params.rg_id}' \
        --rg '{params.rg_extra}' \
        {params.extra} \
    | samtools sort \
        -l 9 \
        -M \
        -m {params.samtools_mem} \
        -o {output.cram} \
        --reference {input.reference} \
        --threads {threads} \
    ) 2> {log} 1>&2
    """
15
16
17
18
19
20
21
22
23
24
25
shell:
    """
    samtools view \
        --threads {threads} \
        --reference {input.reference} \
        --output-fmt BAM \
        --threads {threads} \
        --output {output.bam} \
        {input.cram} \
    2> {log} 1>&2
    """
45
46
47
48
49
50
51
52
53
54
55
shell:
    """
    coverm genome \
        --bam-files {input.bams} \
        --methods {params.methods} \
        --separator "^" \
        --threads {threads} \
        --min-covered-fraction {params.min_covered_fraction} \
    > {output} \
    2> {log}
    """
74
75
76
77
78
79
80
81
82
shell:
    """
    coverm contig \
        --bam-files {input.bams} \
        --methods {params.methods} \
        --proper-pairs-only \
    > {output} \
    2> {log}
    """
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
shell:
    """
    fastp \
        --in1 {input.forward_} \
        --in2 {input.reverse_} \
        --out1 {output.forward_} \
        --out2 {output.reverse_} \
        --unpaired1 {output.unpaired1} \
        --unpaired2 {output.unpaired2} \
        --html {output.html} \
        --json {output.json} \
        --compression 1 \
        --verbose \
        --adapter_sequence {params.adapter_forward} \
        --adapter_sequence_r2 {params.adapter_reverse} \
        --thread {threads} \
        {params.extra} \
    2> {log} 1>&2
    """
12
13
shell:
    "fastqc {input} 2> {log} 1>&2"
21
22
23
24
25
26
27
28
29
30
31
32
33
shell:
    """
    kraken2 \
        --db {input.database} \
        --threads {threads} \
        --paired \
        --gzip-compressed \
        --output >(pigz -11 > {output.out_gz}) \
        --report {output.report} \
        {input.forward_} \
        {input.reverse_} \
    > {log} 2>&1
    """
13
14
15
16
17
shell:
    """
    ln --symbolic $(readlink --canonicalize {input.forward_}) {output.forward_}
    ln --symbolic $(readlink --canonicalize {input.reverse_}) {output.reverse_}
    """
11
12
shell:
    "gzip --decompress --stdout {input.fa} > {output.fa} 2> {log}"
25
26
shell:
    "gzip --decompress --stdout {input.gtf} > {output.gtf}"
41
42
43
44
45
46
47
48
shell:
    """
    (gzip -dc {input.fna} \
    | bgzip \
        -@ {threads} \
        -l 9 \
    > {output.fna}) 2> {log}
    """
25
26
27
28
29
30
31
32
33
34
35
36
shell:
    """
    multiqc \
        --title {params.library} \
        --force \
        --filename {params.library} \
        --outdir {params.out_dir} \
        --dirs \
        --dirs-depth 1 \
        {input} \
    2> {log} 1>&2
    """
13
14
15
16
17
18
19
20
21
22
shell:
    """
    multiqc \
        --filename reads \
        --title reads \
        --force \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
37
38
39
40
41
42
43
44
45
46
shell:
    """
    multiqc \
        --title fastp \
        --force \
        --filename fastp \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
61
62
63
64
65
66
67
68
69
70
shell:
    """
    multiqc \
        --title ribodetector \
        --force \
        --filename ribodetector \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
85
86
87
88
89
90
91
92
93
94
shell:
    """
    multiqc \
        --title kraken2 \
        --force \
        --filename kraken2 \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
109
110
111
112
113
114
115
116
117
118
shell:
    """
    multiqc \
        --title star \
        --force \
        --filename star \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
133
134
135
136
137
138
139
140
141
142
shell:
    """
    multiqc \
        --title bowtie2 \
        --force \
        --filename bowtie2 \
        --outdir {params.dir} \
        {input} \
    2> {log} 1>&2
    """
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
shell:
    """
    ribodetector_cpu \
        --input \
            {input.forward_} \
            {input.reverse_} \
        --output \
            {output.forward_} \
            {output.reverse_} \
        --len {params.average_length} \
        --ensure rrna \
        --threads {threads} \
        --chunk_size {params.chunk_size} \
    2> {log} 1>&2
    """
11
12
shell:
    "samtools index {input} 2> {log} 1>&2"
25
26
shell:
    "samtools index {input} 2> {log} 1>&2"
39
40
shell:
    "samtools dict {input} --output {output} 2> {log} 1>&2"
53
54
shell:
    "samtools dict {input} --output {output} 2> {log} 1>&2"
67
68
shell:
    "tabix {input} 2> {log} 1>&2"
81
82
shell:
    "bgzip {input} 2> {log} 1>&2"
97
98
shell:
    "samtools stats --reference {input.reference} {input.bam} > {output.tsv} 2> {log}"
113
114
shell:
    "samtools stats --reference {input.reference} {input.cram} > {output.tsv} 2> {log}"
128
129
shell:
    "samtools flagstats {input.bam} > {output.txt} 2> {log}"
143
144
shell:
    "samtools flagstats {input.cram} > {output.txt} 2> {log}"
158
159
shell:
    "samtools idxstats {input.bam} > {output.tsv} 2> {log}"
173
174
shell:
    "samtools idxstats {input.cram} > {output.tsv} 2> {log}"
18
19
20
21
22
23
24
25
26
27
28
shell:
    """
    STAR \
        --runMode genomeGenerate \
        --runThreadN {threads} \
        --genomeDir {output.folder} \
        --genomeFastaFiles {input.dna} \
        --sjdbGTFfile {input.gtf} \
        --sjdbOverhang {params.sjdbOverhang} \
    2> {log} 1>&2
    """
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
shell:
    """
    ulimit -n 90000 2> {log} 1>&2

    STAR \
        --runMode alignReads \
        --runThreadN {threads} \
        --genomeDir {input.index} \
        --readFilesIn \
            {input.r1} \
            {input.r2} \
        --outFileNamePrefix {params.out_prefix} \
        --outSAMtype BAM SortedByCoordinate \
        --outSAMunmapped Within KeepPairs \
        --outReadsUnmapped Fastx \
        --readFilesCommand "gzip -cd" \
        --quantMode GeneCounts \
    2>> {log} 1>&2
    """
102
103
104
105
106
107
108
109
110
111
112
113
114
115
shell:
    """
    samtools sort \
        -l 9 \
        -m 1G \
        -M \
        -o {output.cram} \
        --output-fmt CRAM \
        --reference {input.reference} \
        -@ {threads} \
        --write-index \
        {input.bam} \
    2> {log} 1>&2
    """
ShowHide 25 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/3d-omics/Bioinfo_Micro_Microbial_Metatranscriptomics
Name: bioinfo_micro_microbial_metatranscriptomics
Version: 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: MIT License
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...