Setting Up Dependencies and Conda Environment for trio_call_phase_pipeline

public public 8mo ago 0 bookmarks

/panguangze/trio_call_phase_pipeline

install:

dysgu https://github.com/panguangze/dysgu.git master branch

pedHap https://github.com/panguangze/pedHapCpp.git main branch

conda create -c conda-forge -c bioconda --name snakemake snakemake snakedeploy

conda activate

Code Snippets

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
    wrapper:
        "master/bio/{}".format(config["bwa_mem"]["wrapper"])


rule samtools_merge:
    input:
        lambda w: expand(
            "results/mapped/{sample}-{unit}.sorted.bam",
            sample=w,
            unit=samples.loc[w].unit,
        ),
    output:
        bam="results/mapped/{sample}.bam",
        idx="results/mapped/{sample}.bam.csi",
    log:
        "results/logs/samtools_merge/{sample}.log",
    params:
        config["samtools_merge"]["params"] + " --write-index",  # optional additional parameters as string
    threads: config["samtools_merge"]["threads"]  # Samtools takes additional threads through its option -@
    wrapper:
        "master/bio/samtools/merge"
12
13
wrapper:
    "master/bio/bcftools/concat"
27
28
wrapper:
    "master/bio/bcftools/index"
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
run:
    shell(
        "mkdir -p {params.lst_dir} && "
        "{params.bin_path} "
        "{params.ngs_params} "
        "--bam {input.bams[0]} "
        "--VCF {input.vcf} "
        "--idx 0 "
        "--out {params.lst_dir}/0.lst && sort -k3 -n {params.lst_dir}/0.lst > {params.lst_dir}/0.s.lst"
    )
    shell(
        "{params.bin_path} "
        "{params.ngs_params} "
        "--bam {input.bams[1]} "
        "--VCF {input.vcf} "
        "--idx 1 "
        "--out {params.lst_dir}/1.lst && sort -k3 -n {params.lst_dir}/1.lst > {params.lst_dir}/1.s.lst"
    )
    shell(
        "{params.bin_path} "
        "{params.ngs_params} "
        "--bam {input.bams[2]} "
        "--VCF {input.vcf} "
        "--idx 2 "
        "--out {params.lst_dir}/2.lst && sort -k3 -n {params.lst_dir}/2.lst > {params.lst_dir}/2.s.lst"
    )
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
run:
    shell(
        "mkdir -p {output.phased_dir} && "
        "{params.bin_path} "
        "{params.ngs_params} "
        "-v {input.vcf} "
        "-f {params.lst_dir}/0.s.lst "
        "-o  {output.phased_dir}/0.spec.vcf "
        "--idx 0  && {params.bgzip_path} {output.phased_dir}/0.spec.vcf" 
    )
    shell(
        "{params.bin_path} "
        "{params.ngs_params} "
        "-v {input.vcf} "
        "-f {params.lst_dir}/1.s.lst "
        "-o  {output.phased_dir}/1.spec.vcf "
        "--idx 1  && {params.bgzip_path} {output.phased_dir}/1.spec.vcf" 
    )
    shell(
        "{params.bin_path} "
        "{params.ngs_params} "
        "-v {input.vcf} "
        "-f {params.lst_dir}/2.s.lst "
        "-o  {output.phased_dir}/2.spec.vcf "
        "--idx 2  && {params.bgzip_path} {output.phased_dir}/2.spec.vcf" 
    )
125
126
127
128
129
run:
    shell("{params.tabix_path} -f {input.phased_dir}/0.spec.vcf.gz")
    shell("{params.tabix_path} -f {input.phased_dir}/1.spec.vcf.gz")
    shell("{params.tabix_path} -f {input.phased_dir}/2.spec.vcf.gz")
    shell("touch {output}")
SnakeMake From line 125 of rules/phase.smk
142
143
144
145
146
147
shell:
    "{params.bin_path} "
    "{params.run_params} "
    "--vcf {params.vcf} "
    "--out {output.vcf} "
    "--homo_recom {output.recom} "
SnakeMake From line 142 of rules/phase.smk
17
18
wrapper:
    "master/bio/fastp"
37
38
wrapper:
    "master/bio/fastp"
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
shell:
    "run_deeptrio "
    "--model_type WGS "
    "--ref {params.ref} "
    "--reads_child {input.bams[0]} "
    "--reads_parent1 {input.bams[1]} "
    "--reads_parent2 {input.bams[2]} "
    "--output_vcf_child {output.out_dir}/{params.sample_names[0]}.vcf.gz "
    "--output_vcf_parent1 {output.out_dir}/{params.sample_names[1]}.vcf.gz "
    "--output_vcf_parent2 {output.out_dir}/{params.sample_names[2]}.vcf.gz "
    "--sample_name_child {params.sample_names[0]} "
    "--sample_name_parent1 {params.sample_names[1]} "
    "--sample_name_parent2 {params.sample_names[2]} "
    "--num_shards {threads}  "
    "--intermediate_results_dir {output}/intermediate_results_dir "
    "--output_gvcf_child {output.out_dir}/{params.sample_names[0]}.g.vcf.gz "
    "--output_gvcf_parent1 {output.out_dir}/{params.sample_names[1]}.g.vcf.gz "
    "--output_gvcf_parent2 {output.out_dir}/{params.sample_names[2]}.g.vcf.gz "
67
68
69
70
71
72
73
74
75
76
77
78
shell:
    "glnexus_cli "
    "--config DeepVariantWGS "
    "--dir {output.scratch} "
    "--threads {threads} "
    "{input.gvcfs}/{params.sample_names[0]}.g.vcf.gz "
    "{input.gvcfs}/{params.sample_names[1]}.g.vcf.gz "
    "{input.gvcfs}/{params.sample_names[2]}.g.vcf.gz "
    "2> {log} "
    "| bcftools view - "
    "| bgzip -c "
    "> {output.vcf} "
93
94
wrapper:
    "master/bio/bcftools/index"
106
107
wrapper:
    "master/bio/bcftools/view"
22
23
24
25
26
27
28
29
shell:
    "{params.bin_path} run "
    "-p {threads} "
    "{params.run_params} "
    "{params.ref} "
    "{output.tmp_dir} "
    "{input.bam} "
    "-o {output.out_vcf}"
46
47
48
49
50
shell:
    "{params.bin_path} merge "
    "{params.merge_trio_params} "
    "{input.vcfs} "
    "-o {output.out_vcf}"
63
64
wrapper:
    "master/bio/bcftools/filter"
78
79
wrapper:
    "master/bio/bcftools/index"
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts


bcftools_opts = get_bcftools_opts(snakemake, parse_ref=False, parse_memory=False)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


shell("bcftools concat {bcftools_opts} {extra} {snakemake.input.calls} {log}")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
__author__ = "Patrik Smeds"
__copyright__ = "Copyright 2021, Patrik Smeds"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts


bcftools_opts = get_bcftools_opts(
    snakemake, parse_ref=False, parse_samples=False, parse_memory=False
)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)
filter = snakemake.params.get("filter", "")


if len(snakemake.output) > 1:
    raise Exception("Only one output file expected, got: " + str(len(snakemake.output)))


shell(
    "bcftools filter"
    " {bcftools_opts}"
    " {filter}"
    " {extra}"
    " {snakemake.input[0]}"
    " {log}"
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts


bcftools_opts = get_bcftools_opts(
    snakemake, parse_ref=False, parse_output_format=False, parse_memory=False
)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


if "--tbi" in extra or "--csi" in extra:
    raise ValueError(
        "You have specified index format (`--tbi/--csi`) in `params.extra`; this is automatically infered from the first output file."
    )

if snakemake.output[0].endswith(".tbi"):
    extra += " --tbi"
elif snakemake.output[0].endswith(".csi"):
    extra += " --csi"
else:
    raise ValueError("invalid index file format ('.tbi', '.csi').")


shell("bcftools index {bcftools_opts} {extra} {snakemake.input[0]} {log}")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts

bcftools_opts = get_bcftools_opts(snakemake, parse_ref=False, parse_memory=False)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell("bcftools view {bcftools_opts} {extra} {snakemake.input[0]} {log}")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2019, Sebastian Kurscheid"
__email__ = "[email protected]"
__license__ = "MIT"

from snakemake.shell import shell
import re

extra = snakemake.params.get("extra", "")
adapters = snakemake.params.get("adapters", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


# Assert input
n = len(snakemake.input.sample)
assert (
    n == 1 or n == 2
), "input->sample must have 1 (single-end) or 2 (paired-end) elements."


# Input files
if n == 1:
    reads = "--in1 {}".format(snakemake.input.sample)
else:
    reads = "--in1 {} --in2 {}".format(*snakemake.input.sample)


# Output files
trimmed_paths = snakemake.output.get("trimmed", None)
if trimmed_paths:
    if n == 1:
        trimmed = "--out1 {}".format(snakemake.output.trimmed)
    else:
        trimmed = "--out1 {} --out2 {}".format(*snakemake.output.trimmed)

        # Output unpaired files
        unpaired = snakemake.output.get("unpaired", None)
        if unpaired:
            trimmed += f" --unpaired1 {unpaired} --unpaired2 {unpaired}"
        else:
            unpaired1 = snakemake.output.get("unpaired1", None)
            if unpaired1:
                trimmed += f" --unpaired1 {unpaired1}"
            unpaired2 = snakemake.output.get("unpaired2", None)
            if unpaired2:
                trimmed += f" --unpaired2 {unpaired2}"

        # Output merged PE reads
        merged = snakemake.output.get("merged", None)
        if merged:
            if not re.search(r"--merge\b", extra):
                raise ValueError(
                    "output.merged specified but '--merge' option missing from params.extra"
                )
            trimmed += f" --merged_out {merged}"
else:
    trimmed = ""


# Output failed reads
failed = snakemake.output.get("failed", None)
if failed:
    trimmed += f" --failed_out {failed}"


# Stats
html = "--html {}".format(snakemake.output.html)
json = "--json {}".format(snakemake.output.json)


shell(
    "(fastp --thread {snakemake.threads} "
    "{extra} "
    "{adapters} "
    "{reads} "
    "{trimmed} "
    "{json} "
    "{html} ) {log}"
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell
from snakemake_wrapper_utils.samtools import get_samtools_opts

samtools_opts = get_samtools_opts(snakemake)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell("samtools merge {samtools_opts} {extra} {snakemake.input} {log}")
ShowHide 18 more snippets with no or duplicated tags.

Free

Created: 8mo ago
Updated: 8mo ago
Maitainers: public
URL: https://github.com/panguangze/trio_call_phase_pipeline
Name: trio_call_phase_pipeline
Version: 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: MIT License
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...