Spliced RNAseq workflow

public 1yr ago Version: Version 1 0 bookmarks

View Workflow

Workflow for Spliced RNAseq data Steps:

workflow_quality.cwl:
- FastQC (Read Quality Control)
- fastp (Read Trimming)
STAR (Read mapping)
featurecounts (transcript read counts)
kallisto (transcript [pseudo]counts)

Code Snippets

baseCommand: [ /unlock/infrastructure/binaries/BBMap/BBMap_v38.95/bbduk.sh ]
arguments:
  - prefix: "-Xmx"
    separate: false
    valueFrom: $(inputs.memory)M
  - prefix: "out="
    separate: false
    valueFrom: $(inputs.identifier)_1.fq.gz
  - prefix: "out2="
    separate: false
    valueFrom: $(inputs.identifier)_2.fq.gz
  - prefix: "stats="
    separate: false
    valueFrom: $(inputs.identifier)_bbduk-stats.txt

CWL BBMap From line 53 of bbmap/bbduk_filter.cwl

baseCommand: [/unlock/infrastructure/binaries/BBMap/BBMap_v38.95/bbmap.sh]

arguments:
  - "-Xmx$(inputs.memory)M"
  - "printunmappedcount"
  - "overwrite=true"
  - "bloom=t"
  - "statsfile=$(inputs.identifier)_BBMap_stats.txt"
  - "covstats=$(inputs.identifier)_BBMap_covstats.txt"
  - |
    ${
      if (inputs.output_mapped){
        return 'outm1='+inputs.identifier+'_filtered_1.fq.gz \
                outm2='+inputs.identifier+'_filtered_2.fq.gz';
      } else {
        return 'outu1='+inputs.identifier+'_filtered_1.fq.gz \
                outu2='+inputs.identifier+'_filtered_2.fq.gz';
      }
    }
  # - "fast"
  # - "minratio=0.9"
  # - "maxindel=3"
  # - "bwr=0.16"
  # - "bw=12"
  # - "minhits=2"
  # - "qtrim=r"
  # - "trimq=10"
  # - "untrim"
  # - "idtag"
  # - "kfilter=25"
  # - "maxsites=1"
  # - "k=14"
  # - "nodisk=t"
  # - "out=$(inputs.identifier)_BBMap.sam"
  # - "rpkm=$(inputs.identifier).rpkm"

CWL BBMap From line 80 of bbmap/bbmap_filter-reads.cwl

arguments:
  - prefix: --out1
    valueFrom: $(inputs.identifier)_fastp_1.fq.gz
  - |
    ${
      if (inputs.reverse_reads){
        return '--out2';
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.reverse_reads){
        return inputs.identifier + "_fastp_2.fq.gz";
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.merge_reads){
        return '--merged_out';
      } else {
        return '';
      }
    }
  - |
    ${
      if (inputs.merge_reads){
        return inputs.identifier + "merged_fastp.fq.gz";
      } else {
        return '';
      }
    }

  - prefix: "-h"
    valueFrom: $(inputs.identifier)_fastp.html
  - prefix: "-j"
    valueFrom: $(inputs.identifier)_fastp.json


baseCommand: [/unlock/infrastructure/binaries/fastp/fastp-v0.23.2/fastp]

CWL fastp From line 72 of fastp/fastp.cwl

baseCommand: [ /unlock/infrastructure/binaries/FastQC/FastQC_v0.11.9/fastqc ]

label: "FASTQC"
doc: |
    Performs quality control on FASTQ files

requirements:
 - class: InlineJavascriptRequirement
 - class: InitialWorkDirRequirement
   listing:
    - entry: "$({class: 'Directory', listing: []})"
      entryname: "FASTQC"
      writable: true

arguments: ["--outdir", "FASTQC"]

inputs:
  nanopore: 
    type: File?
    doc: FastQ files list
    label: FASTQ files list
    inputBinding:
      position: 101
      prefix: --nano
  fastq:
    type: File[]?
    doc: FastQ file list
    label: FASTQ file list
    inputBinding:
      position: 100
  fastq_path:
    # type: File[]?
    type: string[]?
    doc: FastQ file path list
    label: FastQ file paths
    inputBinding:
      position: 102
  threads:
    type: int?
    default: 1
    inputBinding:
      prefix: --threads

CWL FastQC From line 13 of fastqc/fastqc.cwl

baseCommand: [ /unlock/infrastructure/binaries/kraken2-2.0.9-beta/kraken2 ]

label: "Kraken2 metagenomics read classification"
doc: |
    Kraken2 metagenomics read classification.

    Updated databases available at: https://benlangmead.github.io/aws-indexes/k2 (e.g. PlusPF-8)
    Original db: https://ccb.jhu.edu/software/kraken2/index.shtml?t=downloads

requirements:
  - class: InlineJavascriptRequirement

arguments:
  - valueFrom: $(inputs.identifier)_$(inputs.database.split( '/' ).pop())_kraken2.txt
    prefix: --output
  - valueFrom: $(inputs.identifier)_$(inputs.database.split( '/' ).pop())_kraken2_report.txt
    prefix: --report
  - "--report-zero-counts"
  - "--use-names"

inputs:
  threads:
    type: int?
    default: 1
    inputBinding:
      prefix: --threads
  identifier:
    type: string
    doc: Identifier for this dataset used in this workflow
    label: identifier used
  database:
    type: string
    doc: database location of kraken2
    inputBinding:
      prefix: --db

# Short reads
  forward_reads:
    type: File?
    inputBinding:
      position: 100
  reverse_reads:
    type: File?
    inputBinding:
      position: 101
  paired_end:
    type:
    - "null"
    - boolean
    doc: "data paired end (separate files)"
    inputBinding:
      position: 2
      prefix: "--paired"
    default: false

# Long reads
  nanopore: # Oxford Nanopore Technologies reads in FASTQ
    type: File?
    inputBinding:
      position: 102

  gzip:
    type:
    - "null"
    - boolean
    doc: "input data is gzip compressed"
    inputBinding:
      position: 3
      prefix: '--gzip-compressed'
    default: false
  bzip2:
    type:
    - "null"
    - boolean
    doc: "input data is gzip compressed"
    inputBinding:
      position: 3
      prefix: '--bzip2-compressed'
    default: false

CWL kraken2 From line 6 of kraken2/kraken2.cwl

baseCommand: ["bash", "script.sh"]

CWL From line 13 of krona/krona.cwl

- entryname: script.sh
  entry: |-
    #!/bin/bash
    source /root/miniconda/bin/activate
    conda init bash
    conda activate /unlock/infrastructure/conda/krona_v2.8.1
    ktImportTaxonomy -t 5 -m 3 $@

CWL Krona From line 27 of krona/krona.cwl

arguments:
  - prefix: "-o"
    valueFrom: $(inputs.prefix)_FeatureCounts.txt

baseCommand: [/unlock/infrastructure/binaries/subread-2.0.1/bin/featureCounts]

CWL FeatureCounts From line 33 of RNAseq/featurecounts.cwl

arguments:
  - prefix: "--output-dir="
    separate: false
    valueFrom: $(inputs.prefix)_kallisto

baseCommand: [/unlock/infrastructure/binaries/kallisto/kallisto_v0.46.1/kallisto, quant]

CWL Quant From line 148 of kallisto/kallisto_quant.cwl

baseCommand: [/unlock/infrastructure/binaries/STAR-2.7.3a/bin/Linux_x86_64/STAR, --runMode, alignReads]   

inputs:
  genomeDir:
    type: Directory
    inputBinding:
      prefix: "--genomeDir"

  forward_reads:
    type:
     - File
     - File[]
    inputBinding:
      prefix: "--readFilesIn "
      separate: false
      itemSeparator: ","
      position: 1

  reverse_reads:
    type:
     - "null"
     - File
     - File[]
    inputBinding:
      prefix: ""
      separate: false
      itemSeparator: ","
      position: 2

  # Optional Inputs
  threads:
    type: int?
    inputBinding:
      prefix: "--runThreadN"

  OutFileNamePrefix:
    type: string?
    inputBinding:
      prefix: "--outFileNamePrefix"

  quantMode:
    type:
     - "null"
     - type: enum
       symbols:
        - None
        - TranscriptomeSAM
        - GeneCounts
    doc: Run with get gene quantification
    inputBinding:
      prefix: "--quantMode"

  sjdbGTFfile:
    type: File?
    inputBinding:
      prefix: "--sjdbGTFfile"

  Overhang:
    type: int?
    inputBinding:
      prefix: "--sjdbOverhang"

  sjdbGTFtagExonParentGene:
    type: string?
    doc: GTF attribute name for parent gene ID (default gene_id)
    inputBinding:
      prefix: "--sjdbGTFtagExonParentGene"

  sjdbGTFtagExonParentGeneName:
    type: string?
    doc: GTF attrbute name for parent gene name
    inputBinding:
      prefix: "--sjdbGTFtagExonParentGeneName"

  sjdbGTFtagExonParentGeneType:
    type: string?
    doc: GTF attrbute name for parent gene type
    inputBinding:
      prefix: "--sjdbGTFtagExonParentGeneType"

  OutFilterType:
    type:
     - "null"
     - type: enum
       symbols:
        - Normal
        - BySJout
    inputBinding:
      prefix: "--outFilterType"

  OutFilterIntronMotifs:
    type:
     - "null"
     - type: enum
       symbols:
        - None
        - RemoveNoncanonical
        - RemoveNoncanonicalUnannotated
    inputBinding:
      prefix: "--outFilterIntronMotifs"

  outSAMtype:
    type:
      type: array
      items: string
    default: [BAM, SortedByCoordinate]
    inputBinding:
      prefix: --outSAMtype
    doc: |
      strings: type of SAM/BAM output
      1st word:
      BAM  ... output BAM without sorting
      SAM  ... output SAM without sorting
      None ... no SAM/BAM output
      2nd, 3rd:
      Unsorted           ... standard unsorted
      SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.

  ReadFilesCommand:
    type: string?
    inputBinding:
      prefix: "--readFilesCommand"
    default: zcat

  AlignIntronMin:
    type: int?
    inputBinding:
      prefix: "--alignIntronMin"

  AlignIntronMax:
    type: int?
    inputBinding:
      prefix: "--alignIntronMax"

  AlignMatesGapMax:
    type: int?
    inputBinding:
      prefix: "--alignMatesGapMax"

  AlignSJoverhangMin:
    type: int?
    inputBinding:
      prefix: "--alignSJoverhangMin"

  AlignSJDBoverhangMin:
    type: int?
    inputBinding:
      prefix: "--alignSJDBoverhangMin"

  SeedSearchStartLmax:
    type: int?
    inputBinding:
      prefix: "--seedSearchStartLmax"

  ChimOutType:
    type:
     - "null"
     - type: enum
       symbols:
        - Junctions
        - SeparateSAMold
        - WithinBAM
        - "WithinBAM HardClip"
        - "WithinBAM SoftClip"

  ChimSegmentMin:
    type: int?
    inputBinding:
      prefix: "--chimSegmentMin"

  ChimJunctionOverhangMin:
    type: int?
    inputBinding:
      prefix: "--chimJunctionOverhangMin"

  OutFilterMultimapNmax:
    type: int?
    inputBinding:
      prefix: "--outFilterMultimapNmax"

  OutFilterMismatchNmax:
    type: int?
    inputBinding:
      prefix: "--outFilterMismatchNmax"

  OutFilterMismatchNoverLmax:
    type: double?
    inputBinding:
      prefix: "--outFilterMismatchNoverLmax"

  OutReadsUnmapped:
    type:
     - "null"
     - type: enum
       symbols:
        - None
        - Fastx
    inputBinding:
      prefix: "--outReadsUnmapped"

  OutSAMstrandField:
    type:
     - "null"
     - type: enum
       symbols:
        - None
        - intronMotif
    inputBinding:
      prefix: "--outSAMstrandField"

  OutSAMunmapped:
    type:
     - "null"
     - type: enum
       symbols:
        - None
        - Within
        - "Within KeepPairs"
    inputBinding:
      prefix: "--outSAMunmapped"

  OutSAMmapqUnique:
    type: int?
    inputBinding:
      prefix: "--outSAMmapqUnique"

  OutSamMode:
    type: 
     - "null"
     - type: enum
       symbols:
        - None
        - Full
        - NoQS
    inputBinding:
      prefix: "--outSAMmode"

  LimitOutSAMoneReadBytes:
    type: int?
    inputBinding:
      prefix: "--limitOutSAMoneReadBytes"

  GenomeLoad:
    type:
     - "null"
     - type: enum
       symbols:
        - LoadAndKeep
        - LoadAndRemove
        - LoadAndExit
        - Remove
        - NoSharedMemory
    inputBinding:
      prefix: "--genomeLoad"