RASflow: RNA-Seq Analysis Snakemake Workflow

public public 1yr ago Version: v2.0 0 bookmarks

RASflow is a modular, flexible, and user-friendly RNA-Seq analysis workflow. RASflow can be applied to both model and non-model organisms. It supports mapping RNA-Seq raw reads to both genome and transcriptome (downloaded from a public database or homemade by users), and it can do both transcript- and gene-level Differential Expression Analysis (DEA) when transcriptome is used as a mapping reference. It requires little programming skills for basic use.

Code Snippets

9
awk -F'"' '{print $2"\t"$6}' $1 > $2
18
19
shell:
    "hisat2-build -p {config[NCORE]} {input.trans} {params.index}"
27
28
29
run:
    shell("scp -i {input.key} {config[NELSIN]}/{wildcards.sample}_*_R1_001.fastq.gz {output.forward}")
    shell("scp -i {input.key} {config[NELSIN]}/{wildcards.sample}_*_R2_001.fastq.gz {output.reverse}")
43
44
45
shell:
    "hisat2 -p {config[NCORE]} -x {params.index} -1 {input.forward} -2 {input.reverse} -S {output.sam}"
    " && samtools view -b -S {output.sam} > {output.bam}"
52
53
shell:
    "samtools sort {input.bam} -o {output.sort} && samtools index {output.sort}"
60
61
shell:
    "samtools idxstats {input.sort} > {output.count}"
68
69
shell:
    "cd ../scripts && javac -cp opencsv-1.8.jar:. sumgenescod.java && java -cp opencsv-1.8.jar:. sumgenescod codgenelist.csv {input}"
76
77
shell:
    "sh ../scripts/formatCount.sh {input.geneCount} {output.formatCount}"
28
29
30
run:
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R1*.f*q.gz {output.forward}"),
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R2*.f*q.gz {output.reverse}")
39
40
41
42
43
shell:
    """
    shopt -s extglob
    scp -i {params.key} {params.input_path}/{wildcards.sample}?(_*)?(.*).f*q.gz {output.read}
    """
53
54
55
shell:
    "mkdir {output.indexes} && hisat2-build -p {config[NCORE]} {input.genome} {params.index}"
    "&& hisat2_extract_splice_sites.py {config[ANNOTATION]} > {output.splicesites}"
71
72
73
run:
    shell("hisat2 -p {config[NCORE]} --known-splicesite-infile {input.splicesites} -x {params.index} -1 {input.forward} -2 {input.reverse} -S {output.sam}")
    shell("samtools view -@ {config[NCORE]} -b -S {output.sam} > {output.bam}")
87
88
89
run:
    shell("hisat2 -p {config[NCORE]} --known-splicesite-infile {input.splicesites} -x {params.index} -U {input.forward} -S {output.sam}")
    shell("samtools view -@ {config[NCORE]} -b -S {output.sam} > {output.bam}")
96
97
shell:
    "samtools sort -@ {config[NCORE]} {input.bam} -o {output.sort}"
106
107
108
109
110
111
112
113
run:
    if config["COUNTER"]=="featureCounts":
        if config["END"]=="pair":
            shell("featureCounts -p -T {config[NCORE]} -t exon -g {config[ATTRIBUTE]} -a {input.annotation} -o {output.count} {input.sort} && tail -n +3 {output.count} | cut -f1,7 > temp.{wildcards.sample} && mv temp.{wildcards.sample} {output.count}")
        else:
            shell("featureCounts -T {config[NCORE]} -t exon -g {config[ATTRIBUTE]} -a {input.annotation} -o {output.count} {input.sort} && tail -n +3 {output.count} | cut -f1,7 > temp.{wildcards.sample} && mv temp.{wildcards.sample} {output.count}")
    elif config["COUNTER"]=="htseq-count":
        shell("htseq-count -f bam -i {config[ATTRIBUTE]} -s no -t exon {input.sort} {input.annotation} | sed '/^__/ d' > {output.count}")
121
122
shell:
    "qualimap bamqc -bam {input.sort} -nt {config[NCORE]} --java-mem-size=6G -outdir {output.bamqc}"
130
131
shell:
    "multiqc {input.bamqc} {input.count_summary} --filename {output.report}"
138
139
shell:
    "multiqc {input.count_summary} --filename {output.report}"
23
24
25
run:
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R1*.f*q.gz {output.forward}"),
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R2*.f*q.gz {output.reverse}")
36
37
38
shell:
    "fastqc -t $(({config[NCORE]}+0)) -o {params.outputpath} {input.forward} && "
    "fastqc -t $(({config[NCORE]}+0)) -o {params.outputpath} {input.reverse}"
48
49
shell:
    "multiqc {params.path} --filename {output.report}"
58
59
60
61
62
shell:
    """
    shopt -s extglob
    scp -i {params.key} {params.input_path}/{wildcards.sample}?(_*)?(.*).f*q.gz {output.read}
    """
71
72
shell:
    "fastqc -t $(({config[NCORE]}+0)) -o {params.outputpath} {input.read}"
81
82
shell:
    "multiqc {params.path} --filename {output.report}"
27
28
29
run:
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R1*.f*q.gz {output.forward_read}"),
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R2*.f*q.gz {output.reverse_read}")
38
39
40
41
42
shell:
    """
    shopt -s extglob
    scp -i {params.key} {params.input_path}/{wildcards.sample}?(_*)?(.*).f*q.gz {output.read}
    """
49
50
shell:
    "salmon index -t {input} -i {output} --type quasi -k 31 -p {config[NCORE]}"
61
62
63
64
65
shell:
    """
    salmon quant -i {input.index} -l A -1 {input.forward_read} -2 {input.reverse_read} -o {output.quant_dir} -p {config[NCORE]} --seqBias --useVBOpt --validateMappings
    awk 'NR==1{{next}}{{print $1"\\t"$4}}' {output.quant_dir}/quant.sf > {output.tpm}
    """
74
75
76
77
78
shell:
    """
    salmon quant -i {input.index} -l A -r {input.read} -o {output.quant_dir} -p {config[NCORE]} --seqBias --useVBOpt --validateMappings
    awk 'NR==1{{next}}{{print $1"\\t"$4}}' {output.quant_dir}/quant.sf > {output.tpm}
    """
85
86
shell:
    "multiqc {input} --filename {output}"
31
32
33
run:
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R1*.f*q.gz {output.forward}"),
    shell("scp -i {params.key} {params.input_path}/{wildcards.sample}_*R2*.f*q.gz {output.reverse}")
44
45
shell:
    "trim_galore --fastqc -j 4 --paired --basename {wildcards.sample} -o {params.outputpath} {input.forward} {input.reverse}"
54
55
56
57
58
shell:
    """
    shopt -s extglob
    scp -i {params.key} {params.input_path}/{wildcards.sample}?(_*)?(.*).f*q.gz {output.read}
    """
67
68
shell:
    "trim_galore --fastqc -j 4 --basename {wildcards.sample} -o {params.outputpath} {input.read}"
77
78
shell:
    "multiqc {params.path} --filename {output.report}"
ShowHide 24 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/zhxiaokang/RASflow
Name: rasflow-rna-seq-analysis-snakemake-workflow
Version: v2.0
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Other Versions:
Downloaded: 0
Copyright: Public Domain
License: Creative Commons Zero v1.0 Universal
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...