RNA sequencing data obtained from organisms with a reference genome and annotation followed by a prediction step of editing sites using RDDpred
Introduction
vibbits/rnaseq-editing is a bioinformatics pipeline that can be used to analyse RNA sequencing data obtained from organisms with a reference genome and annotation followed by a prediction step of editing sites using RDDpred.
The pipeline is largely based on the nf-core RNAseq pipeline .
The initial nf-core pipeline is built using Nextflow , a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The Nextflow DSL2 implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from nf-core/modules in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
Code Snippets
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """ bedtools \\ genomecov \\ -ibam $bam \\ -bg \\ -strand + \\ $options.args \\ | bedtools sort > ${prefix_forward}.bedGraph bedtools \\ genomecov \\ -ibam $bam \\ -bg \\ -strand - \\ $options.args \\ | bedtools sort > ${prefix_reverse}.bedGraph bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt """ |
34 35 36 37 38 | """ fasta2gtf.py -o ${add_fasta.baseName}.gtf $biotype_name $add_fasta cat $fasta $add_fasta > ${name}.fasta cat $gtf ${add_fasta.baseName}.gtf > ${name}.gtf """ |
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | """ deseq2_qc.r \\ --count_file $counts \\ --outdir ./ \\ --cores $task.cpus \\ $options.args if [ -f "R_sessionInfo.log" ]; then sed "s/deseq2_pca/${label_lower}_deseq2_pca/g" <$pca_header_multiqc >tmp.txt sed -i -e "s/DESeq2 PCA/${label_upper} DESeq2 PCA/g" tmp.txt cat tmp.txt *.pca.vals.txt > ${label_lower}.pca.vals_mqc.tsv sed "s/deseq2_clustering/${label_lower}_deseq2_clustering/g" <$clustering_header_multiqc >tmp.txt sed -i -e "s/DESeq2 sample/${label_upper} DESeq2 sample/g" tmp.txt cat tmp.txt *.sample.dists.txt > ${label_lower}.sample.dists_mqc.tsv fi Rscript -e "library(DESeq2); write(x=as.character(packageVersion('DESeq2')), file='${software}.version.txt')" """ |
47 48 49 50 | """ dupradar.r $bam $prefix $gtf $strandedness $paired_end $task.cpus Rscript -e "library(dupRadar); write(x=as.character(packageVersion('dupRadar')), file='${software}.version.txt')" """ |
34 35 36 37 38 | """ samtools faidx $fasta cut -f 1,2 ${fasta}.fai > ${fasta}.sizes echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
32 33 34 35 36 | """ echo $workflow.manifest.version > pipeline.version.txt echo $workflow.nextflow.version > nextflow.version.txt scrape_software_versions.py &> software_versions_mqc.yaml """ |
27 28 29 | """ gtf2bed $gtf > ${gtf.baseName}.bed """ |
32 33 34 | """ filter_gtf_for_genes_in_genome.py --gtf $gtf --fasta $fasta -o ${fasta.baseName}_genes.gtf """ |
35 36 37 38 | """ cut -f 1,7 $count | tail -n +3 | cat $header - >> ${prefix}.biotype_counts_mqc.tsv mqc_features_stat.py ${prefix}.biotype_counts_mqc.tsv -s $meta.id -f rRNA -o ${prefix}.biotype_counts_rrna_mqc.tsv """ |
30 31 32 33 | """ echo "Sample\tSTAR uniquely mapped reads (%)" > fail_mapped_samples_mqc.tsv echo "${fail_mapped.join('\n')}" >> fail_mapped_samples_mqc.tsv """ |
35 36 37 | """ touch fail_mapped_samples_mqc.tsv """ |
30 31 32 33 | """ echo "Sample\tProvided strandedness\tInferred strandedness\tSense (%)\tAntisense (%)\tUndetermined (%)" > fail_strand_check_mqc.tsv echo "${fail_strand.join('\n')}" >> fail_strand_check_mqc.tsv """ |
35 36 37 | """ touch fail_strand_check_mqc.tsv """ |
52 53 54 | """ multiqc -f $options.args $custom_config . """ |
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | """ mkdir -p tmp/genes cut -f 1,2 `ls ./genes/* | head -n 1` > gene_ids.txt for fileid in `ls ./genes/*`; do samplename=`basename \$fileid | sed s/\\.genes.results\$//g` echo \$samplename > tmp/genes/\${samplename}.counts.txt cut -f 5 \${fileid} | tail -n+2 >> tmp/genes/\${samplename}.counts.txt echo \$samplename > tmp/genes/\${samplename}.tpm.txt cut -f 6 \${fileid} | tail -n+2 >> tmp/genes/\${samplename}.tpm.txt done mkdir -p tmp/isoforms cut -f 1,2 `ls ./isoforms/* | head -n 1` > transcript_ids.txt for fileid in `ls ./isoforms/*`; do samplename=`basename \$fileid | sed s/\\.isoforms.results\$//g` echo \$samplename > tmp/isoforms/\${samplename}.counts.txt cut -f 5 \${fileid} | tail -n+2 >> tmp/isoforms/\${samplename}.counts.txt echo \$samplename > tmp/isoforms/\${samplename}.tpm.txt cut -f 6 \${fileid} | tail -n+2 >> tmp/isoforms/\${samplename}.tpm.txt done paste gene_ids.txt tmp/genes/*.counts.txt > rsem.merged.gene_counts.tsv paste gene_ids.txt tmp/genes/*.tpm.txt > rsem.merged.gene_tpm.tsv paste transcript_ids.txt tmp/isoforms/*.counts.txt > rsem.merged.transcript_counts.tsv paste transcript_ids.txt tmp/isoforms/*.tpm.txt > rsem.merged.transcript_tpm.tsv """ |
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | """ mkdir -p tmp/genes_counts echo "${params.gtf_group_features}" > gene_ids.txt cut -f 1 `ls ./genes_counts/* | head -n 1` | tail -n +2 >> gene_ids.txt for fileid in `ls ./genes_counts/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_counts/\${filename} done mkdir -p tmp/genes_tpm for fileid in `ls ./genes_tpm/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_tpm/\${filename} done mkdir -p tmp/genes_counts_length_scaled for fileid in `ls ./genes_counts_length_scaled/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_counts_length_scaled/\${filename} done mkdir -p tmp/genes_tpm_length_scaled for fileid in `ls ./genes_tpm_length_scaled/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_tpm_length_scaled/\${filename} done mkdir -p tmp/genes_counts_scaled for fileid in `ls ./genes_counts_scaled/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_counts_scaled/\${filename} done mkdir -p tmp/genes_tpm_scaled for fileid in `ls ./genes_tpm_scaled/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/genes_tpm_scaled/\${filename} done mkdir -p tmp/isoforms_counts echo "transcript_id" > transcript_ids.txt cut -f 1 `ls ./isoforms_counts/* | head -n 1` | tail -n +2 >> transcript_ids.txt for fileid in `ls ./isoforms_counts/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/isoforms_counts/\${filename} done mkdir -p tmp/isoforms_tpm for fileid in `ls ./isoforms_tpm/*`; do filename=`basename \$fileid` cut -f 2 \${fileid} > tmp/isoforms_tpm/\${filename} done paste gene_ids.txt tmp/genes_counts/* > salmon.merged.gene_counts.tsv paste gene_ids.txt tmp/genes_tpm/* > salmon.merged.gene_tpm.tsv paste gene_ids.txt tmp/genes_counts_length_scaled/* > salmon.merged.gene_counts_length_scaled.tsv paste gene_ids.txt tmp/genes_counts_scaled/* > salmon.merged.gene_counts_scaled.tsv paste transcript_ids.txt tmp/isoforms_counts/* > salmon.merged.transcript_counts.tsv paste transcript_ids.txt tmp/isoforms_tpm/* > salmon.merged.transcript_tpm.tsv """ |
28 29 30 31 32 33 34 35 | """ salmon_tx2gene.py \\ --gtf $gtf \\ --salmon salmon \\ --id $params.gtf_group_features \\ --extra $params.gtf_extra_attributes \\ -o salmon_tx2gene.tsv """ |
33 34 35 36 | """ salmon_tximport.r NULL salmon salmon.merged Rscript -e "library(tximeta); write(x=as.character(packageVersion('tximeta')), file='bioconductor-tximeta.version.txt')" """ |
31 32 33 34 35 | """ check_samplesheet.py \\ $samplesheet \\ samplesheet.valid.csv """ |
32 33 34 | """ cat ${readList.sort().join(' ')} > ${prefix}.merged.fastq.gz """ |
41 42 43 44 | """ cat ${read1.sort().join(' ')} > ${prefix}_1.merged.fastq.gz cat ${read2.sort().join(' ')} > ${prefix}_2.merged.fastq.gz """ |
38 39 40 41 42 | """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt """ |
44 45 46 47 48 49 | """ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt """ |
30 31 32 33 | """ gffread $gff $options.args -o ${gff.baseName}.gtf echo \$(gffread --version 2>&1) > ${software}.version.txt """ |
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | """ INDEX=`find -L ./ -name "*.1.ht2" | sed 's/.1.ht2//'` hisat2 \\ -x \$INDEX \\ -U $reads \\ $strandedness \\ --known-splicesite-infile $splicesites \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ $unaligned \\ $options.args \\ | samtools view -bS -F 4 -F 256 - > ${prefix}.bam echo $VERSION > ${software}.version.txt """ |
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | """ INDEX=`find -L ./ -name "*.1.ht2" | sed 's/.1.ht2//'` hisat2 \\ -x \$INDEX \\ -1 ${reads[0]} \\ -2 ${reads[1]} \\ $strandedness \\ --known-splicesite-infile $splicesites \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ $unaligned \\ --no-mixed \\ --no-discordant \\ $options.args \\ | samtools view -bS -F 4 -F 8 -F 256 - > ${prefix}.bam if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz fi if [ -f ${prefix}.unmapped.fastq.2.gz ]; then mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz fi echo $VERSION > ${software}.version.txt """ |
57 58 59 60 61 62 63 64 65 66 67 68 69 | """ mkdir hisat2 $extract_exons hisat2-build \\ -p $task.cpus \\ $ss \\ $exon \\ $options.args \\ $fasta \\ hisat2/${fasta.baseName} echo $VERSION > ${software}.version.txt """ |
32 33 34 35 | """ hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt echo $VERSION > ${software}.version.txt """ |
38 39 40 41 42 43 44 45 46 47 48 | """ picard \\ -Xmx${avail_mem}g \\ MarkDuplicates \\ $options.args \\ INPUT=$bam \\ OUTPUT=${prefix}.bam \\ METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d: > ${software}.version.txt """ |
34 35 36 37 38 39 40 41 42 43 44 | """ preseq \\ lc_extrap \\ $options.args \\ $paired_end \\ -output ${prefix}.ccurve.txt \\ $bam cp .command.err ${prefix}.command.log echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//' > ${software}.version.txt """ |
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """ unset DISPLAY mkdir tmp export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp qualimap \\ --java-mem-size=$memory \\ rnaseq \\ $options.args \\ -bam $bam \\ -gtf $gtf \\ -p $strandedness \\ $paired_end \\ -outdir $prefix echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//' > ${software}.version.txt """ |
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | """ [ ! -f $negative ] && ln -s $negative ${params.neg_site_list} [ ! -f $positive ] && ln -s $positive ${params.pos_site_list} python /code/RDDpred.py \\ -rsf ${params.fasta} \\ -rbl $groupfile \\ -pni $cores \\ -ops $groups \\ -psl $positive \\ -nsl $negative \\ $options.args echo '1.1.4' > ${software}.version.txt rm $groupfile """ |
47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """ INDEX=`find -L ./ -name "*.grp" | sed 's/.grp//'` rsem-calculate-expression \\ --num-threads $task.cpus \\ --temporary-folder ./tmp/ \\ $strandedness \\ $paired_end \\ $options.args \\ $reads \\ \$INDEX \\ $prefix rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g" > ${software}.version.txt """ |
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """ STAR \\ --runMode genomeGenerate \\ --genomeDir rsem/ \\ --genomeFastaFiles $fasta \\ --sjdbGTFfile $gtf \\ --runThreadN $task.cpus \\ $memory \\ $options.args2 rsem-prepare-reference \\ --gtf $gtf \\ --num-threads $task.cpus \\ ${args.join(' ')} \\ $fasta \\ rsem/genome rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g" > ${software}.version.txt """ |
59 60 61 62 63 64 65 66 67 68 | """ rsem-prepare-reference \\ --gtf $gtf \\ --num-threads $task.cpus \\ $options.args \\ $fasta \\ rsem/genome rsem-calculate-expression --version | sed -e "s/Current version: RSEM v//g" > ${software}.version.txt """ |
36 37 38 39 40 41 42 43 | """ bam_stat.py \\ -i $bam \\ $options.args \\ > ${prefix}.bam_stat.txt bam_stat.py --version | sed -e "s/bam_stat.py //g" > ${software}.version.txt """ |
36 37 38 39 40 41 42 43 44 | """ infer_experiment.py \\ -i $bam \\ -r $bed \\ $options.args \\ > ${prefix}.infer_experiment.txt infer_experiment.py --version | sed -e "s/infer_experiment.py //g" > ${software}.version.txt """ |
43 44 45 46 47 48 49 50 51 52 53 | """ inner_distance.py \\ -i $bam \\ -r $bed \\ -o $prefix \\ $options.args \\ > stdout.txt head -n 2 stdout.txt > ${prefix}.inner_distance_mean.txt inner_distance.py --version | sed -e "s/inner_distance.py //g" > ${software}.version.txt """ |
55 56 57 | """ inner_distance.py --version | sed -e "s/inner_distance.py //g" > ${software}.version.txt """ |
42 43 44 45 46 47 48 49 50 51 | """ junction_annotation.py \\ -i $bam \\ -r $bed \\ -o $prefix \\ $options.args \\ 2> ${prefix}.junction_annotation.log junction_annotation.py --version | sed -e "s/junction_annotation.py //g" > ${software}.version.txt """ |
39 40 41 42 43 44 45 46 47 | """ junction_saturation.py \\ -i $bam \\ -r $bed \\ -o $prefix \\ $options.args junction_saturation.py --version | sed -e "s/junction_saturation.py //g" > ${software}.version.txt """ |
37 38 39 40 41 42 43 44 | """ read_distribution.py \\ -i $bam \\ -r $bed \\ > ${prefix}.read_distribution.txt read_distribution.py --version | sed -e "s/read_distribution.py //g" > ${software}.version.txt """ |
38 39 40 41 42 43 44 45 | """ read_duplication.py \\ -i $bam \\ -o $prefix \\ $options.args read_duplication.py --version | sed -e "s/read_duplication.py //g" > ${software}.version.txt """ |
37 38 39 40 41 42 43 44 45 46 47 48 49 50 | """ $get_decoy_ids sed -i.bak -e 's/>//g' decoys.txt cat $transcript_fasta $genome_fasta > $gentrome salmon \\ index \\ --threads $task.cpus \\ -t $gentrome \\ -d decoys.txt \\ $options.args \\ -i salmon salmon --version | sed -e "s/salmon //g" > ${software}.version.txt """ |
65 66 67 68 69 70 71 72 73 74 75 76 | """ salmon quant \\ --geneMap $gtf \\ --threads $task.cpus \\ --libType=$strandedness \\ $reference \\ $input_reads \\ $options.args \\ -o $prefix salmon --version | sed -e "s/salmon //g" > ${software}.version.txt """ |
38 39 40 41 | """ samtools flagstat $bam > ${bam}.flagstat echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
39 40 41 42 | """ samtools idxstats $bam > ${bam}.idxstats echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
35 36 37 38 | """ samtools index $options.args $bam echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
35 36 37 38 | """ samtools sort $options.args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
34 35 36 37 | """ samtools stats $bam > ${bam}.stats echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ |
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | """ sortmerna \\ $Refs \\ --reads $reads \\ --threads $task.cpus \\ --workdir . \\ --aligned rRNA_reads \\ --other non_rRNA_reads \\ $options.args gzip -f < non_rRNA_reads.fq > ${prefix}.fastq.gz mv rRNA_reads.log ${prefix}.sortmerna.log echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//' > ${software}.version.txt """ |
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | """ sortmerna \\ $Refs \\ --reads ${reads[0]} \\ --reads ${reads[1]} \\ --threads $task.cpus \\ --workdir . \\ --aligned rRNA_reads \\ --other non_rRNA_reads \\ --paired_in \\ --out2 \\ $options.args gzip -f < non_rRNA_reads_fwd.fq > ${prefix}_1.fastq.gz gzip -f < non_rRNA_reads_rev.fq > ${prefix}_2.fastq.gz mv rRNA_reads.log ${prefix}.sortmerna.log echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//' > ${software}.version.txt """ |
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | """ STAR \\ --genomeDir $index \\ --readFilesIn $reads \\ --runThreadN $task.cpus \\ --outFileNamePrefix $prefix. \\ $out_sam_type \\ $ignore_gtf \\ $seq_center \\ $options.args $mv_unsorted_bam if [ -f ${prefix}.Unmapped.out.mate1 ]; then mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq gzip ${prefix}.unmapped_1.fastq fi if [ -f ${prefix}.Unmapped.out.mate2 ]; then mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq gzip ${prefix}.unmapped_2.fastq fi STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt """ |
35 36 37 38 39 40 41 42 43 44 45 46 47 | """ mkdir star STAR \\ --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ --sjdbGTFfile $gtf \\ --runThreadN $task.cpus \\ $memory \\ $options.args STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt """ |
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | """ samtools faidx $fasta NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` mkdir star STAR \\ --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ --sjdbGTFfile $gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ $options.args STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt """ |
42 43 44 45 46 47 48 49 50 51 52 53 54 55 | """ stringtie \\ $bam \\ $strandedness \\ -G $gtf \\ -o ${prefix}.transcripts.gtf \\ -A ${prefix}.gene.abundance.txt \\ -C ${prefix}.coverage.gtf \\ -b ${prefix}.ballgown \\ -p $task.cpus \\ $options.args echo \$(stringtie --version 2>&1) > ${software}.version.txt """ |
40 41 42 43 44 45 46 47 48 49 50 51 | """ featureCounts \\ $options.args \\ $paired_end \\ -T $task.cpus \\ -a $annotation \\ -s $strandedness \\ -o ${prefix}.featureCounts.txt \\ ${bams.join(' ')} echo \$(featureCounts -v 2>&1) | sed -e "s/featureCounts v//g" > ${software}.version.txt """ |
54 55 56 57 58 59 60 61 62 63 64 | """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz trim_galore \\ $options.args \\ --cores $cores \\ --gzip \\ $c_r1 \\ $tpc_r1 \\ ${prefix}.fastq.gz echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt """ |
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | """ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz trim_galore \\ $options.args \\ --cores $cores \\ --paired \\ --gzip \\ $c_r1 \\ $c_r2 \\ $tpc_r1 \\ $tpc_r2 \\ ${prefix}_1.fastq.gz \\ ${prefix}_2.fastq.gz echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt """ |
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz [ ! -f $c_adapter ] && ln -s ${params.adapter_file} $c_adapter trimmomatic SE\\ $options.args \\ -threads $cores \\ ${prefix}_1.fastq.gz \\ ${prefix}_1.fq.gz \\ ILLUMINACLIP:${params.adapter_file}:2:30:10:1:true \\ $c_lead \\ $c_trail \\ $c_extra echo '0.39' > ${software}.version.txt """ |
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | """ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz [ ! -f $c_adapter ] && ln -s ${params.adapter_file} $c_adapter trimmomatic PE\\ $options.args \\ -threads $cores \\ ${prefix}_1.fastq.gz \\ ${prefix}_2.fastq.gz \\ ${prefix}_1.fq.gz \\ ${prefix}_U_1.fastq.gz \\ ${prefix}_2.fq.gz \\ ${prefix}_U_2.fastq.gz \\ ILLUMINACLIP:${params.adapter_file}:2:30:10:1:true \\ $c_lead \\ $c_trail \\ $c_extra echo '0.39' > ${software}.version.txt """ |
34 35 36 37 38 39 40 41 | """ bedClip \\ $bedgraph \\ $sizes \\ ${prefix}.bedGraph echo $VERSION > ${software}.version.txt """ |
34 35 36 37 | """ bedGraphToBigWig $bedgraph $sizes ${prefix}.bigWig echo $VERSION > ${software}.version.txt """ |
32 33 34 35 36 37 38 39 40 | """ umi_tools dedup \\ -I $bam \\ -S ${prefix}.bam \\ $paired \\ $options.args echo \$(umi_tools --version 2>&1) | sed 's/^.*UMI-tools version://; s/ *\$//' > ${software}.version.txt """ |
33 34 35 36 37 38 39 40 41 42 | """ umi_tools \\ extract \\ -I $reads \\ -S ${prefix}.umi_extract.fastq.gz \\ $options.args \\ > ${prefix}.umi_extract.log echo \$(umi_tools --version 2>&1) | sed 's/^.*UMI-tools version://; s/ *\$//' > ${software}.version.txt """ |
44 45 46 47 48 49 50 51 52 53 54 55 | """ umi_tools \\ extract \\ -I ${reads[0]} \\ --read2-in=${reads[1]} \\ -S ${prefix}.umi_extract_1.fastq.gz \\ --read2-out=${prefix}.umi_extract_2.fastq.gz \\ $options.args \\ > ${prefix}.umi_extract.log echo \$(umi_tools --version 2>&1) | sed 's/^.*UMI-tools version://; s/ *\$//' > ${software}.version.txt """ |
31 32 33 34 | """ tar -xzvf $options.args $archive echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//' > ${software}.version.txt """ |
Support
- Future updates