Cleaning transcriptome annotations with ab initio assemblers

public public 1yr ago 0 bookmarks

Introduction

AnnotationCleaner is designed to automate the construction of ab initio assembled transcriptomes using several different tools. This will allow you to test the impact of assembler choice on downstream applications. Currently, the a

Code Snippets

91
92
wrapper:
    "v2.1.1/bio/samtools/sort"
19
20
21
22
23
24
25
shell:
    """
    portcullis prep -t {threads} -o prepare_portcullis {params.extra_prep} {input.fasta} {input.bams} 1> {log} 2>&1
    portcullis junc -t {threads} --orientation {params.orientation} \
    --strandedness {params.strandedness} -o results/identify_junctions/junctions {params.extra_junc} \
    prepare_portcullis/ 1> {log} 2>&1
    """
46
47
48
49
50
shell:
    """
    mikado configure --list {input.mlist} --scoring {params.scoring} --reference {input.reference} \
    --junctions {input.junctions} -bt {input.proteins} -od results/mikado_configure/ {params.extra} -t {threads} {output} 1> {log} 2>&1
    """  
67
68
shell:
    "mikado prepare --json-conf {input} -od results/mikado_prepare/ {params.extra} 1> {log} 2>&1"
84
85
86
87
shell:
    """
    TransDecoder.LongOrfs -t {input.fasta} --output_dir results/identify_orfs/ {params.extra} 2> {log}
    """
102
103
104
105
shell:
    """
    TransDecoder.Predict -t {input.transcripts} --output_dir results/identify_orfs/ 2> {log}
    """
SnakeMake From line 102 of rules/mikado.smk
122
123
124
125
shell:
    """
    makeblastdb -in {input.proteins} -dbtype prot -parse_seqids {params.extra} -out results/mikado_blastdb/mikado_blastdb 1> {log} 2>&1
    """
SnakeMake From line 122 of rules/mikado.smk
142
143
144
145
shell:
    """
    pyfasta split -n {params.nsub} {input.fasta}
    """
160
161
162
163
164
shell:
    """
    blastx {params.extra} -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore ppos btop" \
    -num_threads {threads} -query {input.fasta} -db results/mikado_blastdb/mikado_blastdb -out {output.mikado_blast} 2> {log}
    """
SnakeMake From line 160 of rules/mikado.smk
184
185
186
187
188
shell:
    """
    mikado serialise --json-conf {input.mconfig} --transcripts {input.transcripts} --orfs {input.orfs} -od results/mikado_serialise/ \
    --junctions {input.junctions} -p {threads} --tsv results/mikado_blast/ --blast_targets {input.blast_db} {params.extra} 1> {log} 2>&1
    """
220
221
222
223
224
shell:
    """
    blastx {params.extra} -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore ppos btop" \
    -num_threads {threads} -query {input.fasta} -db results/mikado_blastdb/mikado_blastdb -out {output.mikado_blast} 2> {log}
    """
SnakeMake From line 220 of rules/mikado.smk
247
248
249
250
251
shell:
    """
    mikado serialise --json-conf {input.mconfig} --transcripts {input.transcripts} --orfs {input.orfs} -od results/mikado_serialise/ \
    --junctions {input.junctions} --tsv {input.blast} --blast_targets {input.blast_db} {params.extra} 1> {log} 2>&1
    """
269
270
shell:
    "mikado pick --configuration {input.mconfig} -db {input.db} --loci_out mikado.loci.gff3 --subloci_out mikado.subloci.gff3 -od results/mikado_pick/ {params.extra} 1> {log} 2>&1"
285
286
287
288
289
290
shell:
    """
    mikado compare -r {input.reference} --index 1> {log} 2>&1
    mikado compare -r {input.reference} -p {input.mikado_out} -o results/mikado_compare/compare 1> {log} 2>&1
    touch {output.dummy}
    """
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
run:

    scallop = params.scallop
    stringtie = params.stringtie
    provided_annotations = params.provided_annotations

    with open(output[0], 'w') as f:

        if scallop["use_scallop"]:

            scallop.pop("use_scallop")
            taco = scallop.pop("use_taco")
            paths = SCALLOP_PATHS

            counter = 1
            for p in paths:

                row = [str(value) for value in scallop.values()]

                row.insert(0, str(p))

                row[1] = row[1] + str(counter)
                counter = counter + 1

                f.write('\t'.join(row) + '\n')

        if stringtie["use_stringtie"]:

            stringtie.pop("use_stringtie")
            taco = stringtie.pop("use_taco")
            merge = stringtie.pop("use_merge")
            paths = STRINGTIE_PATHS

            counter = 1
            for p in paths:

                row = [str(value) for value in stringtie.values()]

                row.insert(0, str(p))

                row[1] = row[1] + str(counter)
                counter = counter + 1

                f.write('\t'.join(row) + '\n')


        if ~bool(provided_annotations):

            for key, inner_dict in provided_annotations.items():

                row = [str(value) for value in inner_dict.values()]

                row.insert(1, key)

                f.write('\t'.join(row) + '\n')
13
14
shell:
    "scallop -i {input} -o {output.gtf} {params.extra} 1> {log} 2>&1"
22
23
24
25
run:
    with open(output[0], "w") as file:
        for path in input:
            file.write(path + "\n")
41
42
43
44
45
46
shell:
    """
    taco_run -o ./results/scallop_taco/ -p {threads} {params.extra_taco} {input} 1> {log} 2>&1
    taco_refcomp -o ./results/scallop_taco_refcomp/ -r {params.gtf} -t ./results/scallop_taco/assembly.gtf {params.extra_refcomp} 1> {log} 2>&1
    touch {output.output_dummy}
    """
14
15
shell:
    "stringtie -o {output} -p {threads} -G {params.gtf} {params.extra} {input} 1> {log} 2>&1"
30
31
shell:
    "stringtie --merge -p {threads} -G {params.gtf} -o {output} {params.extra} {input}"
39
40
41
42
run:
    with open(output[0], "w") as file:
        for path in input:
            file.write(path + "\n")
58
59
60
61
62
63
shell:
    """
    taco_run -o ./results/stringtie_taco/ -p {threads} {params.extra_taco} {input} 1> {log} 2>&1
    taco_refcomp -o ./results/stringtie_taco_refcomp/ -r {params.gtf} -t ./results/stringtie_taco/assembly.gtf {params.extra_refcomp} 1> {log} 2>&1
    touch {output.output_dummy}
    """
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


import tempfile
from pathlib import Path
from snakemake.shell import shell
from snakemake_wrapper_utils.snakemake import get_mem
from snakemake_wrapper_utils.samtools import get_samtools_opts


samtools_opts = get_samtools_opts(snakemake)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

mem_per_thread_mb = int(get_mem(snakemake) / snakemake.threads)

with tempfile.TemporaryDirectory() as tmpdir:
    tmp_prefix = Path(tmpdir) / "samtools_sort"

    shell(
        "samtools sort {samtools_opts} -m {mem_per_thread_mb}M {extra} -T {tmp_prefix} {snakemake.input[0]} {log}"
    )
ShowHide 16 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/isaacvock/AnnotationCleaner
Name: annotationcleaner
Version: 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: None
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...