Snakemake based pipeline for atacseq dataanalysis

public public 1yr ago Version: 2 0 bookmarks

Snakemake based pipeline for ATAC seq data analysis

Overview

Implemented workflow

dag

Code Snippets

20
21
wrapper:
	"v1.24.0/bio/bowtie2/align"
31
32
wrapper:
	"v1.23.5-48-gf27313f0/bio/samtools/sort"
15
16
17
18
19
20
21
22
23
shell:
	'bamCoverage --bam {input.remdup_bam} '
	'--outFileName {output} '
	'--outFileFormat bigwig '
	'--numberOfProcessors {threads} '
	'--extendReads '
	'--normalizeUsing {params.normalize_using} '
	'--binSize {params.bin_size} '
	'--smoothLength {params.smooth_length} 2>&1 | tee {log}'
15
16
wrapper:
	"v1.24.0/bio/macs2/callpeak"
14
15
wrapper:
	"v1.23.5-48-gf27313f0/bio/picard/markduplicates"
27
28
wrapper:
	"v1.24.0/bio/bedtools/intersect"
38
39
wrapper:
	"v1.25.0-52-g79342b73/bio/samtools/index"
55
56
57
58
59
60
61
62
63
shell:
	'samtools idxstats '
	'{input.bam} | cut -f 1 | grep -v chrM | xargs samtools view '
	'--threads {threads} '
	'--write-index '
	'-F {params.remove_reads} '
	'-f {params.keep_reads} '
	'-q {params.mapqual} '
	'-o {output.bam}##idx##{output.idx} {input.bam}'
75
76
wrapper:
	"v1.23.5-48-gf27313f0/bio/samtools/sort"
86
87
wrapper:
	"v1.24.0/bio/samtools/flagstat"
100
101
102
103
104
105
106
shell:
	'run_spp.R '
	'-c={input} '
	'-p={threads} '
	'-savp '
	'-odir=results/qc/phantompeakqual '
	'-out={output.stats} 2>&1 | tee {log}'
16
17
18
19
20
21
22
23
24
shell:
	'ataqv '
	'--peak-file {input.peaks} '
	'--threads {threads} '
	'--metrics-file {output} '
	'--name {wildcards.sample} '
	'--tss-file {params.tssfile} '
	'--excluded-region-file {params.excludedregionfile} '
	'{params.organism} {input.bam} 2>&1 | tee {log}'
34
35
36
37
38
shell:
	'mkarv '
	'--concurrency {threads} '
	'--force '
	'results/qc/ataqv/report {input}'
51
52
53
54
55
56
shell:
	'multiqc '
	'--force '
	'--outdir results/qc/multiqc '
	'--zip-data-dir '
	'. 2>&1 | tee {log}'
10
11
wrapper:
	"v1.24.0/bio/fastqc"
26
27
28
29
30
31
32
33
shell:
	'trim_galore '
	'--gzip '
	'--output_dir results/qc/trimgalore '
	'--cores {params.threads_actual} '
	'--basename {wildcards.sample} '
	'--paired --no_report_file '
	'{input[0]} {input[1]} 2>&1 | tee {log}'
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
__author__ = "Johannes Köster, Christopher Schröder"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell()

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

bams = snakemake.input.bams
if isinstance(bams, str):
    bams = [bams]
bams = list(map("--INPUT {}".format, bams))

if snakemake.output.bam.endswith(".cram"):
    output = "/dev/stdout"
    if snakemake.params.embed_ref:
        view_options = "-O cram,embed_ref"
    else:
        view_options = "-O cram"
    convert = f" | samtools view -@ {snakemake.threads} {view_options} --reference {snakemake.input.ref} -o {snakemake.output.bam}"
else:
    output = snakemake.output.bam
    convert = ""

with tempfile.TemporaryDirectory() as tmpdir:
    shell(
        "(picard MarkDuplicates"  # Tool and its subcommand
        " {java_opts}"  # Automatic java option
        " {extra}"  # User defined parmeters
        " {bams}"  # Input bam(s)
        " --TMP_DIR {tmpdir}"
        " --OUTPUT {output}"  # Output bam
        " --METRICS_FILE {snakemake.output.metrics}"  # Output metrics
        " {convert} ) {log}"  # Logging
    )
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


import tempfile
from pathlib import Path
from snakemake.shell import shell
from snakemake_wrapper_utils.samtools import get_samtools_opts


samtools_opts = get_samtools_opts(snakemake)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


with tempfile.TemporaryDirectory() as tmpdir:
    tmp_prefix = Path(tmpdir) / "samtools_fastq.sort_"

    shell(
        "samtools sort {samtools_opts} {extra} -T {tmp_prefix} {snakemake.input[0]} {log}"
    )
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
__author__ = "Jan Forster"
__copyright__ = "Copyright 2019, Jan Forster"
__email__ = "[email protected]"
__license__ = "MIT"

from snakemake.shell import shell

## Extract arguments
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
    "(bedtools intersect"
    " {extra}"
    " -a {snakemake.input.left}"
    " -b {snakemake.input.right}"
    " > {snakemake.output})"
    " {log}"
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


import os
from snakemake.shell import shell
from snakemake_wrapper_utils.samtools import get_samtools_opts


samtools_opts = get_samtools_opts(snakemake)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


n = len(snakemake.input.sample)
assert (
    n == 1 or n == 2
), "input->sample must have 1 (single-end) or 2 (paired-end) elements."

if n == 1:
    reads = "-U {}".format(*snakemake.input.sample)
else:
    reads = "-1 {} -2 {}".format(*snakemake.input.sample)


index = os.path.commonprefix(snakemake.input.idx).rstrip(".")


shell(
    "(bowtie2"
    " --threads {snakemake.threads}"
    " {reads} "
    " -x {index}"
    " {extra}"
    "| samtools view"
    " {samtools_opts}"
    " -"
    ") {log}"
)
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "[email protected]"
__license__ = "MIT"


from os import path
import re
from tempfile import TemporaryDirectory

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)


def basename_without_ext(file_path):
    """Returns basename of file path, without the file extension."""

    base = path.basename(file_path)
    # Remove file extension(s) (similar to the internal fastqc approach)
    base = re.sub("\\.gz$", "", base)
    base = re.sub("\\.bz2$", "", base)
    base = re.sub("\\.txt$", "", base)
    base = re.sub("\\.fastq$", "", base)
    base = re.sub("\\.fq$", "", base)
    base = re.sub("\\.sam$", "", base)
    base = re.sub("\\.bam$", "", base)

    return base


# Run fastqc, since there can be race conditions if multiple jobs
# use the same fastqc dir, we create a temp dir.
with TemporaryDirectory() as tempdir:
    shell(
        "fastqc {snakemake.params} -t {snakemake.threads} "
        "--outdir {tempdir:q} {snakemake.input[0]:q}"
        " {log}"
    )

    # Move outputs into proper position.
    output_base = basename_without_ext(snakemake.input[0])
    html_path = path.join(tempdir, output_base + "_fastqc.html")
    zip_path = path.join(tempdir, output_base + "_fastqc.zip")

    if snakemake.output.html != html_path:
        shell("mv {html_path:q} {snakemake.output.html:q}")

    if snakemake.output.zip != zip_path:
        shell("mv {zip_path:q} {snakemake.output.zip:q}")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
__author__ = "Antonie Vietor"
__copyright__ = "Copyright 2020, Antonie Vietor"
__email__ = "[email protected]"
__license__ = "MIT"

import os
import sys
from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

in_contr = snakemake.input.get("control")
params = "{}".format(snakemake.params)
opt_input = ""
out_dir = ""

ext = "_peaks.xls"
out_file = [o for o in snakemake.output if o.endswith(ext)][0]
out_name = os.path.basename(out_file[: -len(ext)])
out_dir = os.path.dirname(out_file)

if in_contr:
    opt_input = "-c {contr}".format(contr=in_contr)

if out_dir:
    out_dir = "--outdir {dir}".format(dir=out_dir)

if any(out.endswith(("_peaks.narrowPeak", "_summits.bed")) for out in snakemake.output):
    if any(
        out.endswith(("_peaks.broadPeak", "_peaks.gappedPeak"))
        for out in snakemake.output
    ):
        sys.exit(
            "Output files with _peaks.narrowPeak and/or _summits.bed extensions cannot be created together with _peaks.broadPeak and/or _peaks.gappedPeak extended output files.\n"
            "For usable extensions please see https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/macs2/callpeak.html.\n"
        )
    else:
        if " --broad" in params:
            sys.exit(
                "If --broad option in params is given, the _peaks.narrowPeak and _summits.bed files will not be created. \n"
                "Remove --broad option from params if these files are needed.\n"
            )

if any(
    out.endswith(("_peaks.broadPeak", "_peaks.gappedPeak")) for out in snakemake.output
):
    if "--broad " not in params and not params.endswith("--broad"):
        params += " --broad "

if any(
    out.endswith(("_treat_pileup.bdg", "_control_lambda.bdg"))
    for out in snakemake.output
):
    if all(p not in params for p in ["--bdg", "-B"]):
        params += " --bdg "
else:
    if any(p in params for p in ["--bdg", "-B"]):
        sys.exit(
            "If --bdg or -B option in params is given, the _control_lambda.bdg and _treat_pileup.bdg extended files must be specified in output. \n"
        )

shell(
    "(macs2 callpeak "
    "-t {snakemake.input.treatment} "
    "{opt_input} "
    "{out_dir} "
    "-n {out_name} "
    "{params}) {log}"
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
__author__ = "Christopher Preusch"
__copyright__ = "Copyright 2017, Christopher Preusch"
__email__ = "cpreusch[at]ust.hk"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.samtools import get_samtools_opts

samtools_opts = get_samtools_opts(
    snakemake, parse_write_index=False, parse_output=False, parse_output_format=False
)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell(
    "samtools flagstat {samtools_opts} {extra} {snakemake.input[0]} > {snakemake.output[0]} {log}"
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

# Samtools takes additional threads through its option -@
# One thread for samtools merge
# Other threads are *additional* threads passed to the '-@' argument
threads = "" if snakemake.threads <= 1 else " -@ {} ".format(snakemake.threads - 1)

shell(
    "samtools index {threads} {extra} {snakemake.input[0]} {snakemake.output[0]} {log}"
)
ShowHide 17 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/pd321/atacseq
Name: atacseq
Version: 2
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Other Versions:
Downloaded: 0
Copyright: Public Domain
License: None
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...