Processing pipeline for public multiome and scATAC-seq immune datasets
Help improve this workflow!
This workflow has been published but could be further improved with some additional meta data:- Keyword(s) in categories input, output, operation, topic
You can help improve this workflow by suggesting the addition or removal of keywords, suggest changes and report issues, or request to become a maintainer of the Workflow .
Processing pipeline for public single-cell epigenomic immune datasets.
Installing dependencies
All the required dependencies needed to run the workflow can be installed automatically by creating a new conda environment.
First ensure that conda or mamba is installed and available.
To create a new environment with the dependencies installed, run:
# using conda
conda env create -f environment.yaml
# using mamba
mamba env create -f environment.yaml
Running the workflow
This workflow involves downloading data from AWS using the AWS
command line tools. To enable the download, you will first need
to create an AWS account and set up the AWS command line tools by
running
aws configure
.
Note that some of the data downloaded
may incur charges from AWS
.
To run the Snakemake workflow, first activate the conda environment containing the required dependencies:
conda activate immune
Next, run
snakemake
with the desired options. Setting the
-j
parameter
controls the maximum number of cores used by the workflow:
snakemake -j 24
See the snakemake documentation for a complete list of available options.
Datasets
PBMC
pbmc_atac_500
pbmc_atac_1k
pbmc_atac_5k
pbmc_atac_10k
pbmc_atac_10k_chromium
pbmc_atac_10k_chromiumX
pbmc_multiome_3k_sorted
pbmc_multiome_3k_unsorted
pbmc_multiome_10k_sorted
pbmc_multiome_10k_unsorted
pbmc_multiome_10k_chromium
pbmc_multiome_10k_chromiumX
pbmc_reference
BMMC
bmmc_atac
bmmc_multiome
bmmc_reference
Code Snippets
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | library(Signac) library(Seurat) library(SummarizedExperiment) library(future) library(GenomicRanges) plan("multicore", workers = 8) options(future.globals.maxSize = 100 * 1024 ^ 3) ### Load processed object for the metadata orig.obj <- readRDS("data/bmmc_atac/scATAC-Healthy-Hematopoiesis-191120.rds") annot <- readRDS("data/annotations_hg38.rds") # get all fragment files frag.files <- list.files("data/bmmc_atac/", pattern = "^scATAC_", full.names = TRUE) frag.files <- paste0(frag.files, "/fragments.tsv.gz") names(frag.files) <- list.files("data/bmmc_atac/", pattern = "^scATAC_") # call peaks on each and combine pk.all <- list() for (i in seq_along(frag.files)) { pks <- CallPeaks( object = frag.files[[i]] ) pk.all[[i]] <- pks } gr.combined <- Reduce(f = c, x = pk.all) gr.combined <- GenomicRanges::reduce(gr.combined) gr.combined <- keepStandardChromosomes(gr.combined, pruning.mode = "coarse") # quantify and create objects md <- colData(orig.obj) obj.list <- list() for (i in seq_along(frag.files)) { obj_ident <- gsub(pattern = "scATAC_", x = names(frag.files)[[i]], replacement = "") cells.keep <- md[md$Group == obj_ident, "Barcode"] frags <- CreateFragmentObject( path = frag.files[[i]], cells = cells.keep ) peakcounts <- FeatureMatrix( fragments = frags, features = gr.combined, cells = cells.keep ) obj <- CreateChromatinAssay(counts = peakcounts, fragments = frags) obj <- CreateSeuratObject(counts = obj, assay = "ATAC") obj <- RenameCells(obj, add.cell.id = obj_ident) obj.list[[i]] <- obj } rownames(md) <- paste0(md$Group, "_", md$Barcode) md <- as.data.frame(md) bmmc <- merge(x = obj.list[[1]], y = obj.list[2:length(obj.list)]) bmmc <- AddMetaData(object = bmmc, metadata = md) Annotation(bmmc) <- annot bmmc$tissue <- sapply(strsplit(bmmc$Group, "_"), `[[`, 1) bmmc$sample <- sapply(strsplit(bmmc$Group, "_"), `[[`, 2) # remove PBMC Idents(bmmc) <- "tissue" bmmc <- subset(bmmc, idents = "PBMC", invert = TRUE) bmmc <- NucleosomeSignal(bmmc) # process bmmc <- FindTopFeatures(bmmc) bmmc <- RunTFIDF(bmmc) bmmc <- RunSVD(bmmc) bmmc <- RunUMAP(bmmc, reduction = "lsi", dims = 2:40) ga <- GeneActivity(bmmc) bmmc[["GA"]] <- CreateAssayObject(counts = ga) DefaultAssay(bmmc) <- "GA" bmmc <- NormalizeData(bmmc) DefaultAssay(bmmc) <- "ATAC" # quantify multiome peaks in atac bmmc_multiome <- readRDS("objects/bmmc_multiome.rds") get_idf <- function(x) { rsums <- rowSums(x = x) idf <- ncol(x = x) / rsums idf <- log(1 + idf) return(idf) } multiome_idf <- get_idf(x = GetAssayData(bmmc_multiome, assay = "peaks", slot = "counts")) multiome_counts <- FeatureMatrix( fragments = Fragments(bmmc), features = granges(bmmc_multiome), cells = colnames(bmmc) ) bmmc[['peaks']] <- CreateChromatinAssay( counts = multiome_counts, fragments = Fragments(bmmc), annotation = Annotation(bmmc) ) bmmc <- subset(bmmc, subset = nucleosome_signal < 5 & nCount_peaks < 50000) DefaultAssay(bmmc) <- "peaks" bmmc <- FindTopFeatures(bmmc) bmmc <- RunTFIDF(bmmc, idf = multiome_idf) bmmc <- RunSVD(bmmc, scale.embeddings = FALSE) bmmc <- RunUMAP( object = bmmc, reduction = "lsi", dims = 2:30, return.model = TRUE, reduction.name = "umap.peaks" ) # integrate embeddings atac <- SplitObject(bmmc, split.by = "Group") atac <- lapply(atac, RunSVD, scale.embeddings = FALSE) atac_anchors <- FindIntegrationAnchors( object.list = atac, reduction = "rlsi", dims = 2:30 ) atac_integrated <- IntegrateEmbeddings( anchorset = atac_anchors, new.reduction.name = "integrated_lsi", reductions = bmmc[["lsi"]], dims.to.integrate = 1:30 ) bmmc[["integrated_lsi"]] <- atac_integrated[["integrated_lsi"]] saveRDS(object = bmmc, file = "objects/bmmc_atac.rds") |
1
of
processing_code/bmmc_atac.R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | library(Signac) library(Seurat) library(Matrix) rna_counts <- readMM(file = "data/bmmc_multiome/rna/counts.mtx") rna_metadata <- read.table(file = "data/bmmc_multiome/rna/metadata.csv", sep = ",", header = TRUE, row.names = 1) genes <- read.table(file = "data/bmmc_multiome/rna/genes.csv", sep = ",", header = TRUE, row.names = 1) rna_counts <- t(rna_counts) rna_counts <- as(rna_counts, "dgCMatrix") rownames(rna_counts) <- rownames(genes) colnames(rna_counts) <- rownames(rna_metadata) annotations <- readRDS("data/annotations_hg38.rds") multiome <- CreateSeuratObject(counts = rna_counts, project = "multiome", meta.data = rna_metadata) atac_counts <- readMM(file = "data/bmmc_multiome/atac/counts.mtx") atac_metadata <- read.table(file = "data/bmmc_multiome/atac/metadata.csv", sep = ",", header = TRUE, row.names = 1) peaks <- read.table(file = "data/bmmc_multiome/atac/peaks.csv", sep = ",", header = TRUE, row.names = 1) atac_counts <- t(atac_counts) atac_counts <- as(atac_counts, "dgCMatrix") rownames(atac_counts) <- rownames(peaks) colnames(atac_counts) <- rownames(atac_metadata) common_cells <- intersect(colnames(multiome), colnames(atac_counts)) atac_counts <- atac_counts[, common_cells] multiome <- multiome[, common_cells] multiome[["peaks"]] <- CreateChromatinAssay(counts = atac_counts, annotation = annotations) # create fragment file for each sample # name = name of cell in object # element = name of cell in fragment file frags <- list.files("data/bmmc_multiome", pattern = "*.tsv.gz$", recursive = TRUE, full.names = TRUE) fraglist <- list() for (i in seq_along(along.with = frags)) { f <- frags[[i]] sample.name <- gsub(pattern = "data/bmmc_multiome/", replacement = "", x = f) sample.name <- gsub(pattern = "/atac_fragments.tsv.gz", replacement = "", x = sample.name) cells.object <- colnames(multiome)[grepl(sample.name, colnames(multiome))] cells.fragfile <- sapply(cells.object, function(x) { a <- substr(x, start = 1, stop = 16) paste0(a, "-1") }, USE.NAMES = TRUE) fraglist[[i]] <- CreateFragmentObject(path = f, cells = cells.fragfile) } # process RNA DefaultAssay(multiome) <- "RNA" multiome <- FindVariableFeatures(multiome) multiome <- NormalizeData(multiome) multiome <- SCTransform(multiome) multiome <- RunPCA(multiome) multiome <- RunUMAP( object = multiome, reduction = "pca", dims = 1:40, reduction.name = "umap.sct", return.model = TRUE ) # process ATAC DefaultAssay(multiome) <- "peaks" Fragments(multiome) <- fraglist multiome <- FindTopFeatures(multiome) multiome <- RunTFIDF(multiome) multiome <- RunSVD(multiome) multiome <- RunUMAP( object = multiome, reduction = "lsi", dims = 2:40, reduction.name = "umap.atac", return.model = TRUE ) saveRDS(object = multiome, file = "objects/bmmc_multiome.rds") |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | library(Seurat) obj <- readRDS("data/bmmc_reference/fullref.Rds") obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) obj <- ScaleData(obj) obj <- RunPCA(obj) obj <- RunUMAP( object = obj, reduction = "pca", dims = 1:40, reduction.name = "umap", return.model = TRUE ) obj$dataset <- 'reference' saveRDS(obj, "objects/bmmc_reference.rds") |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | import anndata import sys import os from scipy import io import pandas as pd basepath = sys.argv[1] bmmc = anndata.read_h5ad(basepath + "/GSE194122_openproblems_neurips2021_multiome_BMMC_processed.h5ad") df = bmmc.var atac_features = df.index[df['feature_types'] == 'ATAC'].tolist() rna_features = df.index[df['feature_types'] == 'GEX'].tolist() atac = bmmc[:, atac_features] rna = bmmc[:, rna_features] # RNA if not os.path.isdir(basepath + "/rna"): os.makedirs(basepath + "/rna") rnacounts = rna.layers["counts"] io.mmwrite(target=basepath + "/rna/counts.mtx", a=rnacounts) genenames = rna.var rna_md = rna.obs genenames.to_csv(basepath + "/rna/genes.csv") rna_md.to_csv(basepath + "/rna/metadata.csv") # ATAC if not os.path.isdir(basepath + "/atac"): os.makedirs(basepath + "/atac") ataccounts = atac.layers["counts"] io.mmwrite(target=basepath + "/atac/counts.mtx", a=ataccounts) peaknames = atac.var atac_md = atac.obs peaknames.to_csv(basepath + "/atac/peaks.csv") atac_md.to_csv(basepath + "/atac/metadata.csv") |
1 2 3 4 5 6 7 8 9 10 11 12 13 | library(Signac) library(EnsDb.Hsapiens.v86) library(GenomeInfoDb) # extract gene annotations from EnsDb annotations.hg38 <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) # change to UCSC style since the data was mapped to hg38 seqlevelsStyle(annotations.hg38) <- 'UCSC' genome(annotations.hg38) <- "hg38" # save saveRDS(object = annotations.hg38, file = "data/annotations_hg38.rds") |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | library(Seurat) library(Signac) library(GenomicRanges) args = commandArgs(trailingOnly = TRUE) ncore <- as.numeric(args[1]) annot.path <- args[2] peak.path <- args[3] sample_name <- args[4] annot <- readRDS(annot.path) peaks <- read.table(file = peak.path, col.names = c("chrom", "start", "end")) peaks <- makeGRangesFromDataFrame(df = peaks) peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse") message("Using ", ncore, " cores") if (ncore > 1) { library(future) plan("multicore", workers = ncore) options(future.globals.maxSize = 100 * 1024^3) } # create object countfile <- list.files(paste0("data/pbmc_atac/", sample_name), pattern = "*.h5$", full.names = TRUE) frags <- list.files(paste0("data/pbmc_atac/", sample_name), pattern = "*.tsv.gz$", full.names = TRUE) md_file <- list.files(paste0("data/pbmc_atac/", sample_name), patter = "*.csv$", full.names = TRUE) counts <- Read10X_h5(filename = countfile) metadata <- read.table(file = md_file, sep = ",", header = TRUE, row.names = 1) assay <- CreateChromatinAssay(counts = counts, fragments = frags, genome = "hg38", annotation = annot, sep = c(":", "-")) obj <- CreateSeuratObject(counts = assay, meta.data = metadata, assay = "ATAC") # process ATAC DefaultAssay(obj) <- "ATAC" obj <- NucleosomeSignal(obj) obj <- TSSEnrichment(obj) # add gene activity ga <- GeneActivity(object = obj) obj[["GA"]] <- CreateAssayObject(counts = ga) obj <- NormalizeData(object = obj, assay = 'GA') # create assay containing shared peaks common_counts <- FeatureMatrix( fragments = Fragments(obj)[[1]], features = peaks, cells = colnames(obj) ) obj[["common"]] <- CreateChromatinAssay(counts = common_counts, fragments = frags, annotation = annot, genome = "hg38") DefaultAssay(obj) <- "common" obj <- FindTopFeatures(obj, min.cutoff = "q5") obj <- RunTFIDF(obj) obj <- RunSVD(obj) obj <- RunUMAP(obj, reduction = "lsi", dims = 2:30, reduction.name = "umap.atac", store.model = TRUE) # save object saveRDS(object = obj, file = paste0("objects/", sample_name, ".rds")) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | library(Seurat) library(Signac) library(GenomicRanges) args = commandArgs(trailingOnly = TRUE) ncore <- as.numeric(args[1]) annot.path <- args[2] peak.path <- args[3] sample_name <- args[4] ref <- args[5] annot <- readRDS(annot.path) peaks <- read.table(file = peak.path, col.names = c("chrom", "start", "end")) peaks <- makeGRangesFromDataFrame(df = peaks) peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse") message("Using ", ncore, " cores") if (ncore > 1) { library(future) plan("multicore", workers = ncore) options(future.globals.maxSize = 100 * 1024^3) } # create object countfile <- list.files(paste0("data/pbmc_multiome/", sample_name), pattern = "*.h5$", full.names = TRUE) frags <- list.files(paste0("data/pbmc_multiome/", sample_name), pattern = "*.tsv.gz$", full.names = TRUE) md_file <- list.files(paste0("data/pbmc_multiome/", sample_name), patter = "*.csv$", full.names = TRUE) counts <- Read10X_h5(filename = countfile) metadata <- read.table(file = md_file, sep = ",", header = TRUE, row.names = 1) obj <- CreateSeuratObject(counts = counts$`Gene Expression`, assay = "RNA", meta.data = metadata) obj[["ATAC"]] <- CreateChromatinAssay(counts = counts$Peaks, fragments = frags, genome = "hg38", annotation = annot, sep = c(":", "-")) # filter low/high count cells obj <- subset(obj, subset = nCount_ATAC > 2000 & nCount_ATAC < 70000 & nCount_RNA > 500 & nCount_RNA < 15000) # process ATAC DefaultAssay(obj) <- "ATAC" obj <- NucleosomeSignal(obj) obj <- TSSEnrichment(obj) # create assay containing shared peaks common_counts <- FeatureMatrix( fragments = Fragments(obj)[[1]], features = peaks, cells = colnames(obj) ) obj[["common"]] <- CreateChromatinAssay(counts = common_counts, fragments = frags, annotation = annot, genome = "hg38") DefaultAssay(obj) <- "common" obj <- FindTopFeatures(obj, min.cutoff = "q5") obj <- RunTFIDF(obj) obj <- RunSVD(obj) obj <- RunUMAP(obj, reduction = "lsi", dims = 2:30, reduction.name = "umap.atac", store.model = TRUE) # process RNA DefaultAssay(obj) <- "RNA" obj <- FindVariableFeatures(obj) obj <- NormalizeData(obj) obj <- ScaleData(obj) obj <- RunPCA(obj) obj <- RunUMAP(obj, reduction = "pca", dims = 1:30, reduction.name = "umap.rna", store.model = TRUE) # map to reference reference <- readRDS(ref) anchors <- FindTransferAnchors( reference = reference, query = obj, reference.assay = "SCT", query.assay = "RNA", dims = 1:30, normalization.method = "SCT", recompute.residuals = TRUE, reduction = 'cca' ) pred <- TransferData( anchorset = anchors, refdata = reference$celltype.l2, weight.reduction = obj[['pca']], dims = 1:30 ) obj <- AddMetaData(object = obj, metadata = pred) # save object saveRDS(object = obj, file = paste0("objects/", sample_name, ".rds")) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | library(Seurat) library(SeuratDisk) if (!requireNamespace("SeuratDisk", quietly = TRUE)) { if (!requireNamespace("remotes", quietly = TRUE)) { options(repos=list('CRAN'="https://cloud.r-project.org")) install.packages("remotes") } remotes::install_github("mojaveazure/seurat-disk") } # load preprocessed reference obj <- LoadH5Seurat(file = "data/pbmc_reference/pbmc_multimodal.h5seurat") # add raw RNA counts counts <- Matrix::readMM("data/pbmc_reference/GSM5008737_RNA_3P-matrix.mtx.gz") features <- read.table("data/pbmc_reference/GSM5008737_RNA_3P-features.tsv.gz", sep = "\t") cells <- read.table("data/pbmc_reference/GSM5008737_RNA_3P-barcodes.tsv.gz", sep = "\t") rownames(counts) <- features$V1 colnames(counts) <- cells$V1 counts <- as(counts, "dgCMatrix") obj[["RNA"]] <- CreateAssayObject(counts = counts) DefaultAssay(obj) <- "RNA" obj <- FindVariableFeatures(obj) obj <- NormalizeData(obj) # compute SCT residuals for reference obj <- SCTransform( object = obj, reference.SCT.model = slot(object = obj[['SCT']], name = "SCTModel.list")[[1]], residual.features = VariableFeatures(obj[["SCT"]]), assay = "RNA", new.assay.name = "SCT", verbose = FALSE ) obj$dataset <- "reference" saveRDS(obj, "objects/pbmc_reference.rds") |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | library(Seurat) library(ggplot2) args <- commandArgs(trailingOnly = TRUE) samplename <- as.character(args[[1]]) obj <- readRDS(paste0("objects/", samplename, ".rds")) if (grepl("reference", samplename)) { gb <- 'celltype.l2' } else { annot <- read.table(paste0("annotations/", samplename, ".tsv.gz"), sep = "\t") obj <- AddMetaData(obj, annot) gb <- 'predicted.id' } p <- DimPlot( object = obj, label = TRUE, repel = TRUE, group.by = gb, pt.size = 0.1 ) + NoLegend() ggsave( filename = paste0("plots/", samplename, ".png"), plot = p, height = 6, width = 7 ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | library(Signac) library(Seurat) args <- commandArgs(trailingOnly = TRUE) samplename <- as.character(args[[1]]) query <- readRDS(paste0("objects/", samplename, ".rds")) if (grepl('reference', samplename)) { # reference object given as input pred <- query[[]] write.table( x = pred, file = paste0("annotations/", samplename, ".tsv"), sep = "\t", quote = FALSE ) } else { # map query to reference is_multiome <- grepl("multiome", samplename) is_pbmc <- grepl("pbmc", samplename) if (is_pbmc) { reference <- readRDS("objects/pbmc_reference.rds") } else { reference <- readRDS("objects/bmmc_reference.rds") } if (is_multiome & is_pbmc) { # pbmc multiome normalization.method <- "SCT" recompute.residuals <- TRUE query.assay <- 'RNA' reference.assay <- 'SCT' reduction <- 'pcaproject' weight.reduction <- 'pcaproject' wt.dims <- 1:40 } else if (is_multiome) { # bmmc multiome normalization.method <- "LogNormalize" recompute.residuals <- FALSE query.assay <- 'RNA' reference.assay <- 'RNA' reduction <- 'pcaproject' weight.reduction <- 'pcaproject' wt.dims <- 1:40 } else { # atac normalization.method <- 'LogNormalize' recompute.residuals <- FALSE query.assay <- 'GA' reference.assay <- 'RNA' reduction <- 'cca' weight.reduction <- query[['lsi']] wt.dims <- 2:30 } anchors <- FindTransferAnchors( reference = reference, query = query, normalization.method = normalization.method, recompute.residuals = recompute.residuals, reference.assay = reference.assay, query.assay = query.assay, reduction = reduction, dims = 1:40 ) pred <- TransferData( anchorset = anchors, refdata = reference$celltype.l2, weight.reduction = weight.reduction, dims = wt.dims ) write.table( x = pred, file = paste0("annotations/", samplename, ".tsv"), sep = "\t", quote = FALSE ) } |
50 51 52 53 | shell: """ wget -i {input} -P data/pbmc_multiome/{wildcards.dset} """ |
60 61 62 63 | shell: """ wget -i {input} -P data/pbmc_atac/{wildcards.dset} """ |
70 71 72 73 74 | shell: """ wget -i {input} -P data/bmmc_atac aws s3 sync s3://mpal-hg38/public/ ./data/bmmc_atac/ --request-payer """ |
81 82 83 84 85 86 | shell: """ wget -i {input} -P data/bmmc_multiome cd data/bmmc_multiome gzip -d GSE194122_openproblems_neurips2021_multiome_BMMC_processed.h5ad.gz """ |
92 93 94 95 | shell: """ aws s3 sync s3://openproblems-bio/public/post_competition/multiome ./data/bmmc_multiome """ |
101 102 103 104 | shell: """ aws s3 cp s3://bmmc-reference/public/fullref.Rds {output} --request-payer """ |
111 112 113 114 | shell: """ wget -i {input} -P data/{wildcards.dset} """ |
120 | shell: "wget https://cf.10xgenomics.com/samples/cell-arc/2.0.0/pbmc_granulocyte_sorted_10k/pbmc_granulocyte_sorted_10k_atac_peaks.bed -O {output}" |
136 137 138 139 | shell: """ python processing_code/convert_h5ad.py data/bmmc_multiome """ |
153 154 155 156 | shell: """ Rscript processing_code/bmmc_multiome.R """ |
166 167 168 169 | shell: """ Rscript processing_code/bmmc_atac.R """ |
176 177 178 179 | shell: """ Rscript processing_code/bmmc_reference.R """ |
186 | shell: "Rscript processing_code/get_annotations.R" |
193 | shell: "Rscript processing_code/pbmc_reference.R" |
204 205 206 207 208 209 210 211 212 | shell: """ Rscript processing_code/pbmc_multiome.R \ {threads} \ {input.annot} \ {input.peaks} \ {wildcards.sample} \ {input.ref} """ |
222 223 224 225 226 227 228 229 | shell: """ Rscript processing_code/pbmc_atac.R \ {threads} \ {input.annot} \ {input.peaks} \ {wildcards.sample} """ |
241 242 243 244 245 | shell: """ Rscript processing_code/refmap.R {wildcards.sample} gzip annotations/{wildcards.sample}.tsv """ |
254 255 256 257 | shell: """ Rscript processing_code/plot.R {wildcards.sample} """ |
Support
- Future updates