NonSpliced RNAseq workflow

public public 1yr ago Version: Version 1 0 bookmarks

Workflow for NonSpliced RNAseq data with multiple aligners.

Steps:
- workflow_quality.cwl: - FastQC (control) - fastp (trimming) - bowtie2 (read mapping) - sam_to_sorted-bam - featurecounts (transcript read counts) - kallisto (transcript [pseudo]counts)

Code Snippets

53
54
55
56
57
58
59
60
61
62
63
64
65
66
baseCommand: [ /unlock/infrastructure/binaries/BBMap/BBMap_v38.95/bbduk.sh ]
arguments:
  - prefix: "-Xmx"
    separate: false
    valueFrom: $(inputs.memory)M
  - prefix: "out="
    separate: false
    valueFrom: $(inputs.identifier)_1.fq.gz
  - prefix: "out2="
    separate: false
    valueFrom: $(inputs.identifier)_2.fq.gz
  - prefix: "stats="
    separate: false
    valueFrom: $(inputs.identifier)_bbduk-stats.txt
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
baseCommand: [/unlock/infrastructure/binaries/BBMap/BBMap_v38.95/bbmap.sh]

arguments:
  - "-Xmx$(inputs.memory)M"
  - "printunmappedcount"
  - "overwrite=true"
  - "bloom=t"
  - "statsfile=$(inputs.identifier)_BBMap_stats.txt"
  - "covstats=$(inputs.identifier)_BBMap_covstats.txt"
  - |
    ${
      if (inputs.output_mapped){
        return 'outm1='+inputs.identifier+'_filtered_1.fq.gz \
                outm2='+inputs.identifier+'_filtered_2.fq.gz';
      } else {
        return 'outu1='+inputs.identifier+'_filtered_1.fq.gz \
                outu2='+inputs.identifier+'_filtered_2.fq.gz';
      }
    }
  # - "fast"
  # - "minratio=0.9"
  # - "maxindel=3"
  # - "bwr=0.16"
  # - "bw=12"
  # - "minhits=2"
  # - "qtrim=r"
  # - "trimq=10"
  # - "untrim"
  # - "idtag"
  # - "kfilter=25"
  # - "maxsites=1"
  # - "k=14"
  # - "nodisk=t"
  # - "out=$(inputs.identifier)_BBMap.sam"
  # - "rpkm=$(inputs.identifier).rpkm"
61
62
63
64
65
66
67
arguments:
  - prefix: "-S"
    valueFrom: $(inputs.prefix)_bowtie2.sam
  - prefix: "--met-file"
    valueFrom: $(inputs.prefix)_bowtie2_metrics.txt

baseCommand: [/unlock/infrastructure/binaries/bowtie2/bowtie2-v2.4.5/bowtie2]
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
arguments:
  - prefix: --out1
    valueFrom: $(inputs.identifier)_fastp_1.fq.gz
  - |
    ${
      if (inputs.reverse_reads){
        return '--out2';
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.reverse_reads){
        return inputs.identifier + "_fastp_2.fq.gz";
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.merge_reads){
        return '--merged_out';
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.merge_reads){
        return inputs.identifier + "merged_fastp.fq.gz";
      } else {
        return '';
      }
    }

  - prefix: "-h"
    valueFrom: $(inputs.identifier)_fastp.html
  - prefix: "-j"
    valueFrom: $(inputs.identifier)_fastp.json


baseCommand: [/unlock/infrastructure/binaries/fastp/fastp-v0.23.2/fastp]
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
baseCommand: [ /unlock/infrastructure/binaries/FastQC/FastQC_v0.11.9/fastqc ]

label: "FASTQC"
doc: |
    Performs quality control on FASTQ files

requirements:
 - class: InlineJavascriptRequirement
 - class: InitialWorkDirRequirement
   listing:
    - entry: "$({class: 'Directory', listing: []})"
      entryname: "FASTQC"
      writable: true

arguments: ["--outdir", "FASTQC"]

inputs:
  nanopore: 
    type: File?
    doc: FastQ files list
    label: FASTQ files list
    inputBinding:
      position: 101
      prefix: --nano
  fastq:
    type: File[]?
    doc: FastQ file list
    label: FASTQ file list
    inputBinding:
      position: 100
  fastq_path:
    # type: File[]?
    type: string[]?
    doc: FastQ file path list
    label: FastQ file paths
    inputBinding:
      position: 102
  threads:
    type: int?
    default: 1
    inputBinding:
      prefix: --threads
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
baseCommand: [ /unlock/infrastructure/binaries/kraken2-2.0.9-beta/kraken2 ]

label: "Kraken2 metagenomics read classification"
doc: |
    Kraken2 metagenomics read classification.

    Updated databases available at: https://benlangmead.github.io/aws-indexes/k2 (e.g. PlusPF-8)
    Original db: https://ccb.jhu.edu/software/kraken2/index.shtml?t=downloads

requirements:
  - class: InlineJavascriptRequirement

arguments:
  - valueFrom: $(inputs.identifier)_$(inputs.database.split( '/' ).pop())_kraken2.txt
    prefix: --output
  - valueFrom: $(inputs.identifier)_$(inputs.database.split( '/' ).pop())_kraken2_report.txt
    prefix: --report
  - "--report-zero-counts"
  - "--use-names"

inputs:
  threads:
    type: int?
    default: 1
    inputBinding:
      prefix: --threads
  identifier:
    type: string
    doc: Identifier for this dataset used in this workflow
    label: identifier used
  database:
    type: string
    doc: database location of kraken2
    inputBinding:
      prefix: --db

# Short reads
  forward_reads:
    type: File?
    inputBinding:
      position: 100
  reverse_reads:
    type: File?
    inputBinding:
      position: 101
  paired_end:
    type:
    - "null"
    - boolean
    doc: "data paired end (separate files)"
    inputBinding:
      position: 2
      prefix: "--paired"
    default: false

# Long reads
  nanopore: # Oxford Nanopore Technologies reads in FASTQ
    type: File?
    inputBinding:
      position: 102

  gzip:
    type:
    - "null"
    - boolean
    doc: "input data is gzip compressed"
    inputBinding:
      position: 3
      prefix: '--gzip-compressed'
    default: false
  bzip2:
    type:
    - "null"
    - boolean
    doc: "input data is gzip compressed"
    inputBinding:
      position: 3
      prefix: '--bzip2-compressed'
    default: false
13
baseCommand: ["bash", "script.sh"]
CWL From line 13 of krona/krona.cwl
27
28
29
30
31
32
33
- entryname: script.sh
  entry: |-
    #!/bin/bash
    source /root/miniconda/bin/activate
    conda init bash
    conda activate /unlock/infrastructure/conda/krona_v2.8.1
    ktImportTaxonomy -t 5 -m 3 $@
33
34
35
36
37
arguments:
  - prefix: "-o"
    valueFrom: $(inputs.prefix)_FeatureCounts.txt

baseCommand: [/unlock/infrastructure/binaries/subread-2.0.1/bin/featureCounts]
148
149
150
151
152
153
arguments:
  - prefix: "--output-dir="
    separate: false
    valueFrom: $(inputs.prefix)_kallisto

baseCommand: [/unlock/infrastructure/binaries/kallisto/kallisto_v0.46.1/kallisto, quant]
37
38
39
40
41
42
43
44
45
arguments:
    - shellQuote: false
      valueFrom: >
        ${
          var samtools_path = "/unlock/infrastructure/binaries/samtools/samtools-v1.15/bin/samtools"
          var cmd = samtools_path + " view -@ " + inputs.threads + " -hu " + inputs.sam.location + "\
              | " + samtools_path + " sort -@ " + inputs.threads + " -o " + inputs.identifier +".bam";
          return cmd;
        }
ShowHide 2 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://git.wur.nl/unlock/cwl/-/blob/master/cwl/workflows/workflow_RNAseq_NonSpliced.cwl
Name: nonspliced-rnaseq-workflow
Version: Version 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: None
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...