Code Snippets

Snakemake pipeline for calling CNAs from Affymetrix (now Thermofisher) Cytoscan and Oncoscan arrays

renv::activate()

library(EaCoN)
library(data.table)
library(qs)
library(GenomicRanges)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(S4Vectors)

# -- 0.2 Parse snakemake arguments
input <- snakemake@input
params <- snakemake@params
output <- snakemake@output

# -- 1. Read in gamma files

gammaFiles <- list.files(input$out_dir, '.*gammaEval.*txt', recursive=TRUE, 
    full.names=TRUE)
print(gammaFiles)


# -- 2. Load pancanPloidy data as reference
data(pancanPloidy.noSegs)
pancan.ploidy <- pancan.obj.segless$breaks$ploidy

all.fits <- lapply(gammaFiles, function(file) {
    list(fit = fread(file, sep = '\t'),
        sample = strsplit(file, '/')[[1]][2]
    )
})
names(all.fits) <- sapply(all.fits, function(x) x$sample)

.fitsToVector <- function(fit.list) {
    newList <- list()
    nameList <- list()
    for (i in seq_along(fit.list)) {
        fit <- as.list(fit.list[[i]]$fit[[2]])
        names(fit) <- tolower(unlist(fit.list[[i]]$fit[[1]]))
        print(fit)
        newList[[i]] <- fit
        nameList <- c(nameList, fit.list[[i]][[2]])
    }
    names(newList) <- unlist(nameList)
    return(newList)
}

vec.fits <- .fitsToVector(all.fits)

# -- 3. Annotated the RDS data associated with the gamma files

# Change to fix error in annotaRDS.Batch
setwd('procdata')

print('Starting annotation...')
gr.cnv <- EaCoN:::annotateRDS.Batch(
    all.fits,
    'ASCAT',
    nthread = params$nthreads,
    gamma.method = 'score',
    pancan.ploidy = pancan.ploidy
)
print('finished annotation')

setwd('..')

# Save raw results object to disk
qsave(gr.cnv, file = file.path(input$out_dir, 
    paste0(params$analysis_name, 'optimal_gamma_list.qs')), 
    nthread=params$nthreads)

## ---- Create GRangesList of segmentation results and save them to disk

# Extract segmentation data.frames from the resutls object
segmentation_df_list <- lapply(gr.cnv, function(x) x$seg)

# Convert all data.frames to GRanges and return in a list
list_of_gRanges <- lapply(segmentation_df_list, function(x) 
    makeGRangesFromDataFrame(x, keep.extra.columns = TRUE))

# Convert list of GRanges objects into GRangesList object
cnv_grList <- GRangesList(list_of_gRanges)

# Save GRangesList to disk for downstream analysis
qsave(cnv_grList, file = file.path(params$results, 
    paste0(params$analysis_name, '_grList.qs')), nthread=params$nthreads)

R Snakemake data.table TxDb.Hsapiens.UCSC.hg19.knownGene GenomicRanges EaCoN qs S4Vectors From line 2 of scripts/4_selectOptimalGamma.R

Easy Copy Number Analysis (EaCoN) Pipeline

renv::activate()
library(EaCoN)
library(data.table)
library(qs)
library(GenomicRanges)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
library(BiocParallel)
library(RaggedExperiment)

# -- 0.2 Parse snakemake parameters
input <- snakemake@input
params <- snakemake@params
output <- snakemake@output

# -- 0.3 Load utity functions
source(file.path("scripts", "utils.R"))

# -- 1. Load the optimal gamma for each sample
best_fits <- fread(input[[1]])

# -- 2. Find the .RDS files associated with the best fits
best_fit_files <- Map(
    function(x, y)
        grep(pattern=y, list.files(x, recursive=TRUE, full.names=TRUE), value=TRUE),
    x=file.path(params$out_dir, best_fits$sample_name),
    y=paste0(".*gamma", sprintf("%.2f", best_fits$gamma), "/.*RDS$")
)
l2r_files <- Map(
    function(x, y)
        grep(pattern=y, list.files(x, recursive=TRUE, full.names=TRUE), value=TRUE),
    x=file.path(params$out_dir, best_fits$sample_name),
    y=".*L2R/.*RDS$"
)

# -- 3. Load the best fit ASCN and L2R data and build GRanges objects

.build_granges_from_cnv <- function(ascn, l2r) {
    ascn_data <- readRDS(ascn)
    l2r_data <- readRDS(l2r)
    buildGRangesFromASCNAndL2R(ascn_data, l2r_data)
}

BPPARAM <- BiocParallel::bpparam()
BiocParallel::bpworkers(BPPARAM) <- params$nthreads
gr_list <- BiocParallel::bpmapply(.build_granges_from_cnv,
    best_fit_files, l2r_files,
    SIMPLIFY=FALSE, USE.NAMES=TRUE, BPPARAM=BPPARAM
)

# removing directory paths
names(gr_list) <- basename(names(gr_list))

# -- 4. Construct a RaggedExperiment object
ragged_exp <- as(GRangesList(gr_list), "RaggedExperiment")

# include annotated bins to summarize the RaggedExperiment with
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
genome_bins <- binReferenceGenome()
annotated_bins <- annotateGRangesWithTxDB(genome_bins, txdb=txdb)

# include annotated genes to summarized RaggedExperiment with
gene_granges <- genes(txdb)
annotated_genes <- annotateGRangesWithTxDB(gene_granges, txdb=txdb)

metadata(ragged_exp) <- list(
    annotated_genome_bins=annotated_bins,
    annotated_genes=annotated_genes,
    simplifyReduce=function(scores, ranges, qranges) {
        if (is.numeric(scores)) {
            x <- mean(scores, na.rm=TRUE)
        } else {
            count_list <- as.list(table(scores))
            x <- paste0(
                paste0(names(count_list), ":", unlist(count_list)),
                collapse=","
            )
        }
        return(x)
    }
)

# -- Save files to disk
qsave(ragged_exp, file=output[[1]], nthreads=params$nthread)

R Snakemake data.table org.Hs.eg.db TxDb.Hsapiens.UCSC.hg19.knownGene GenomicRanges EaCoN qs RaggedExperiment From line 2 of scripts/5_buildRaggedExperiment.R

A Snakemake based modular Workflow that facilitates RNA-Seq analyses with a special focus on splicing

library("FRASER")

library("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("org.Hs.eg.db")

# Requierements: 1. Sample annotation,
# 2. Two count matrices are needed: one containing counts for the splice junctions, i.e. the
# split read counts, and one containing the splice site counts, i.e. the counts of non
# split reads overlapping with the splice sites present in the splice junctions.


set_up_fraser_dataset_object <- function(sample_annotation_file_path) {
  #' Function to set up a FRASER object
  #'
  #' @param sample_annotation_file_path     Path to sample annotation file
  #' @param output_dir_path     Path to output directory
  #'
  #' @return FRASER object

  # Load annotation file
  annotationTable <- fread(sample_annotation_file_path, header=TRUE, sep="\t", stringsAsFactors=FALSE)
  annotationTable$bamFile <- file.path(annotationTable$bamFile)   # Required for FRASER

  # --------------- Creating a FRASER object ----------------
  # create FRASER object
  settings <- FraserDataSet(colData=annotationTable, name="Fraser Dataset")

  # Via count reads
  fds <- countRNAData(settings)

  # Via raw counts
  # junctionCts <- fread(additional_junction_counts_file, header=TRUE, sep="\t", stringsAsFactors=FALSE)
  # spliceSiteCts <- fread(additional_splice_site_counts_file, header=TRUE, sep="\t", stringsAsFactors=FALSE)
  # fds <- FraserDataSet(colData=annotationTable, junctions=junctionCts, spliceSites=spliceSiteCts, workingDir="FRASER_output")

  return(fds)
}


run_filtering <- function(fraser_object,
                          plot_filter_expression_file, plot_cor_psi5_heatmap_file,
                          plot_cor_psi3_heatmap_file, plot_cor_theta_heatmap_file) {
  #' Function to run filtering
  #'
  #' @param fraser_object     FRASER object
  #' @param output_dir_path     Path to output directory
  #'
  #' @return FRASER object


  # --------------- Filtering ----------------
  # Compute main splicing metric -> The PSI-value
  fds <- calculatePSIValues(fraser_object)
  # Run filters on junctions: At least one sample has 20 reads, and at least 5% of the samples have at least 1 reads
  # Filter=FALSE, since we first plot and subsequently apply subsetting
  fds <- filterExpressionAndVariability(fds,
                                        minExpressionInOneSample=20,
                                        minDeltaPsi=0.0,  # Only junctions with a PSI-value difference of at least x% between two samples are considered
                                        filter=FALSE       # If TRUE, a subsetted fds containing only the introns that passed all filters is returned.
                                        )

  # Plot filtering results
  jpeg(plot_filter_expression_file, width=800, height=800)
  print(plotFilterExpression(fds, bins=100))
  dev.off()

  # Finally apply filter results
  fds_filtered <- fds[mcols(fds, type="j")[,"passed"],]

  # ---------------- Heatmaps of correlations ----------------
  # 1. Correlation of PSI5
  tryCatch(
    expr = {
      # Heatmap of the sample correlation
      jpeg(plot_cor_psi5_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds_filtered, type="psi5", logit=TRUE, normalized=FALSE)
      dev.off()
    },
    error = function(e) {
        print("Error in creating Heatmap of the sample correlation")
        print(e)
    }
  )
  # tryCatch(
  #   expr = {
  #     # Heatmap of the intron/sample expression
  #     jpeg(plot_cor_psi5_top100_heatmap_file, width=800, height=800)
  #     plotCountCorHeatmap(fds_filtered, type="psi5", logit=TRUE, normalized=FALSE,
  #                     plotType="junctionSample", topJ=100, minDeltaPsi = 0.01)
  #     dev.off()
  #   },
  #   error = function(e) {
  #       print("Error in creating Heatmap of the intron/sample expression")
  #       print(e)
  #   }
  # )

  # 2. Correlation of PSI3
  tryCatch(
      expr = {
      # Heatmap of the sample correlation
      jpeg(plot_cor_psi3_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds_filtered, type="psi3", logit=TRUE, normalized=FALSE)
      dev.off()
      },
      error = function(e) {
          print("Error in creating Heatmap of the sample correlation")
          print(e)
      }
  )
  # tryCatch(
  #   expr = {
  #     # Heatmap of the intron/sample expression
  #     jpeg(plot_cor_psi3_top100_heatmap_file, width=800, height=800)
  #     plotCountCorHeatmap(fds_filtered, type="psi3", logit=TRUE, normalized=FALSE,
  #                     plotType="junctionSample", topJ=100, minDeltaPsi = 0.01)
  #     dev.off()
  #   },
  #   error = function(e) {
  #       print("Error in creating Heatmap of the intron/sample expression")
  #       print(e)
  #   }
  # )

  # 3. Correlation of Theta
  tryCatch(
      expr = {
      # Heatmap of the sample correlation
      jpeg(plot_cor_theta_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds_filtered, type="theta", logit=TRUE, normalized=FALSE)
      dev.off()
      },
      error = function(e) {
          print("Error in creating Heatmap of the sample correlation")
          print(e)
      }
  )
  # tryCatch(
  #   expr = {
  #     # Heatmap of the intron/sample expression
  #     jpeg(plot_cor_theta_top100_heatmap_file, width=800, height=800)
  #     plotCountCorHeatmap(fds_filtered, type="theta", logit=TRUE, normalized=FALSE,
  #                     plotType="junctionSample", topJ=100, minDeltaPsi = 0.01)
  #     dev.off()
  #   },
  #   error = function(e) {
  #       print("Error in creating Heatmap of the intron/sample expression")
  #       print(e)
  #   }
  # )

  return(fds_filtered)
}


detect_dif_splice <- function(fraser_object, output_fraser_analysis_set_object_file,
                              plot_normalized_cor_psi5_heatmap_file,
                              plot_normalized_cor_psi3_heatmap_file,
                              plot_normalized_cor_theta_heatmap_file) {
  #' Function to detect differential splicing
  #'
  #' @param fraser_object     FRASER object
  #' @param output_dir_path     Path to output directory
  #' @param summary_table_file     Path to summary table file
  #'
  #' @return FRASER object


  # ----------------- Detection of differential splicing -----------------
  # 1. Fitting the splicing model:
  # Normalizing data and correct for confounding effects by using a denoising autoencoder
  # This is computational heavy on real size datasets and can take awhile

  # q: The encoding dimension to be used during the fitting procedure. Can be fitted with optimHyperParams
  # see: https://rdrr.io/bioc/FRASER/man/optimHyperParams.html
  fds <- FRASER(fraser_object, q=c(psi5=3, psi3=5, theta=2))

  # Plot 1: PSI5
  tryCatch(
    expr = {
      # Check results in heatmap
      jpeg(plot_normalized_cor_psi5_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds, type="psi5", normalized=TRUE, logit=TRUE)
      dev.off()
    },
    error = function(e) {
        print("Error in creating Heatmap of the sample correlation")
        print(e)
    }
  )

  # Plot 2: PSI3
  tryCatch(
      expr = {
      # Check results in heatmap
      jpeg(plot_normalized_cor_psi3_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds, type="psi3", normalized=TRUE, logit=TRUE)
      dev.off()
      },
      error = function(e) {
          print("Error in creating Heatmap of the sample correlation")
          print(e)
      }
  )

  # Plot 3: Theta
  tryCatch(
      expr = {
      # Check results in heatmap
      jpeg(plot_normalized_cor_theta_heatmap_file, width=800, height=800)
      plotCountCorHeatmap(fds, type="theta", normalized=TRUE, logit=TRUE)
      dev.off()
      },
      error = function(e) {
          print("Error in creating Heatmap of the sample correlation")
          print(e)
      }
  )


  # 2. Differential splicing analysis
  # 2.1 annotate introns with the HGNC symbols of the corresponding gene
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  orgDb <- org.Hs.eg.db
  fds <- annotateRangesWithTxDb(fds, txdb=txdb, orgDb=orgDb)

  # 2.2 retrieve results with default and recommended cutoffs (padj <= 0.05 and |deltaPsi| >= 0.3)
  print("Saving FraserAnalysisDataSetTest results")
  # Saves RDS-file into savedObjects folder
  saveFraserDataSet(fds, dir=dirname(dirname(output_fraser_analysis_set_object_file)),
                    name=basename(output_fraser_analysis_set_object_file))


  # ----------------- Finding splicing candidates in patients -----------------
  # -> Plotting the results
  # tryCatch(
  #   expr = {
      # -------- Sample specific plots --------
      # jpeg(file.path(output_dir_path, "psi5_volcano_plot_sample1.jpg"), width=800, height=800)
      # plotVolcano(fds, type="psi5", annotationTable$sampleID[1])
      # dev.off()

      # jpeg(file.path(output_dir_path, "psi5_expression_sample1.jpg"), width=800, height=800)
      # plotExpression(fds, type="psi5", result=sampleRes[1])
      # dev.off()

      # jpeg(file.path(output_dir_path, "expected_vs_observed_psi_sample1.jpg"), width=800, height=800)
      # plotExpectedVsObservedPsi(fds, result=sampleRes[1])
      # dev.off()
  #   },
  #   error = function(e) {
  #       print("Error in creating plots")
  #       print(e)
  #   }
  # )

  return(fds)
  }


main_function <- function() {
  in_sample_annotation_file <- snakemake@input[["sample_annotation_file"]]

  # Output: Plot files - After filtering, no normalization
  plot_filter_expression_file <- snakemake@output[["plot_filter_expression_file"]]
  plot_cor_psi5_heatmap_file <- snakemake@output[["plot_cor_psi5_heatmap_file"]]
  plot_cor_psi3_heatmap_file <- snakemake@output[["plot_cor_psi3_heatmap_file"]]
  plot_cor_theta_heatmap_file <- snakemake@output[["plot_cor_theta_heatmap_file"]]

  # ToDO: Set plotType to "sampleCorrelation", however this plots are not helpful and can be ignored...
  # plot_cor_psi5_top100_heatmap_file <- snakemake@output[["plot_cor_psi5_top100_heatmap_file"]]
  # plot_cor_psi3_top100_heatmap_file <- snakemake@output[["plot_cor_psi3_top100_heatmap_file"]]
  # plot_cor_theta_top100_heatmap_file <- snakemake@output[["plot_cor_theta_top100_heatmap_file"]]

  # Output: Plot files - After filtering, normalization
  plot_normalized_cor_psi5_heatmap_file <- snakemake@output[["plot_normalized_cor_psi5_heatmap_file"]]
  plot_normalized_cor_psi3_heatmap_file <- snakemake@output[["plot_normalized_cor_psi3_heatmap_file"]]
  plot_normalized_cor_theta_heatmap_file <- snakemake@output[["plot_normalized_cor_theta_heatmap_file"]]

  # Output: Differential splicing analysis
  output_fraser_dataset_object_file <- snakemake@output[["fraser_data_set_object_file"]]


  # TODO: Integrate additional count files from external resources -> Failed...
  # additional_junction_counts_file <- snakemake@params[["additional_junction_counts_file"]]
  # additional_splice_site_counts_file <- snakemake@params[["additional_splice_site_counts_file"]]

  threads <- snakemake@threads
  register(MulticoreParam(workers=threads))

  # 1. Create FRASER object
  fraser_obj <- set_up_fraser_dataset_object(in_sample_annotation_file)
  print("FRASER: FRASER dataset object created")

  # 2. Run filtering
  filtered_fraser_obj <- run_filtering(fraser_obj,
                                       plot_filter_expression_file,
                                       plot_cor_psi5_heatmap_file,
                                       plot_cor_psi3_heatmap_file,
                                       plot_cor_theta_heatmap_file)
  print("FRASER: Filtering done")

  # 3. Detect differential splicing
  detect_dif_splice(filtered_fraser_obj, output_fraser_dataset_object_file,
                    plot_normalized_cor_psi5_heatmap_file,
                    plot_normalized_cor_psi3_heatmap_file,
                    plot_normalized_cor_theta_heatmap_file
                    )
  print("FRASER: Differential splicing analysis done")
}

main_function()

R org.Hs.eg.db TxDb.Hsapiens.UCSC.hg19.knownGene FRASER From line 2 of fraser/fraser_dataset_exploration.R

MPRA GWAS Builder: snakemake workflow

save.image("logs/intersect_epigenome.RData")


log <- file(snakemake@log[[1]], open="wt")
sink(log, type = "message")
sink(log, type = "output")

## Load packages
# library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
# library(SNPlocs.Hsapiens.dbSNP151.GRCh38)
# library(BSgenome.Hsapiens.UCSC.hg19)
# library(TxDb.Hsapiens.UCSC.hg19.knownGene)
# library(VariantAnnotation)
# library(rtracklayer)
# library(plyranges)


## Set up project
# library(ProjectTemplate)
# load.project()

# str(snakemake@config$epigenome)

library(rtracklayer)
library(plyranges)
library(tidyverse)

set.seed(snakemake@config$seed)

## Load data
# ldlink_full_results <- read_tsv("./data/raw/lib3_design/ldlink_full_results.txt")
# haploreg_full_results <- read_tsv("./data/raw/lib3_design/haploreg_full_results.txt")

ld_snps <- read_tsv(snakemake@input$ld_snps)

hg19_to_hg38_chain <- import.chain("assets/hg19ToHg38.over.chain")

if ("epigenome_csv" %in% names(snakemake@config) && file.exists(snakemake@config$epigenome_csv)) {

    epigenome_csv <- read_csv(snakemake@config$epigenome_csv)
    epigenome_keys <- epigenome_csv$name
    epigenome_bed <- map2(epigenome_csv$bedfile, epigenome_csv$genome, function(bedfile, genome) {
        bed <- read_narrowpeaks(bedfile)
        if (genome == "hg19") {
            bed <- liftOver(bed, hg19_to_hg38_chain) %>% unlist
        }
        return(bed)
        })

} else {

    epigenome_keys <- names(snakemake@config$epigenome)
    epigenome_bed <- map(snakemake@config$epigenome, function(epigenome) {
        bed <- read_narrowpeaks(epigenome$bedfile)
        if (epigenome$genome == "hg19") {
            bed <- liftOver(bed, hg19_to_hg38_chain) %>% unlist
        }
        return(bed)
        })
}

epigenome_df <- tibble(key = epigenome_keys,
                       bed = epigenome_bed) %>%
    mutate(key = str_replace_all(key, "[^A-Za-z0-9_]", "_")) %>%
    group_by(key) %>%
    summarise(bed = list(reduce(bed, union_ranges)))

epigenome_keys <- epigenome_df$key
epigenome_bed <- epigenome_df$bed %>% set_names(epigenome_df$key)

ld_snps_gr <- ld_snps %>%
    filter(!is.na(coord_b38)) %>%
    extract(coord_b38, c("chr", "pos"), "(chr[0-9XY]+):(\\d+)", remove = F) %>%
    mutate(start = pos, end = pos) %>%
    select(-pos) %>%
    makeGRangesFromDataFrame(keep.extra.columns = T)

epigenome_ranges <- map(epigenome_bed,
    ~ as_tibble(.) %>%
    mutate(range = paste0(seqnames, ":", start, "-", end)) %>%
    pull(range))

mcols(ld_snps_gr) <- cbind(mcols(ld_snps_gr),
    map2_dfc(epigenome_ranges, epigenome_bed, ~ .x[findOverlaps(ld_snps_gr, .y, maxgap = 0, select = "first")]))


if (!is.null(snakemake@config$eqtls)) {
    eqtls <-
        map_dfr(snakemake@config$eqtls,
            ~ read_tsv(.$file), .id = "tissue")

    eqtls <- eqtls %>%
        extract(variant_id, c("chr", "pos"), "^(chr[0-9XY]+)_(\\d+)", remove = F) %>%
        mutate(pos = as.integer(pos))
} else {
    eqtls <- tibble(chr = character(), pos = integer(), variant_id = character())
}




ld_snps_epigenome <- ld_snps_gr %>%
    as_tibble(.name_repair = "minimal") %>%
    select(-end, -width, -strand) %>%
    dplyr::rename(chr = seqnames,
                  pos = start) %>%
    mutate(across(all_of(epigenome_keys), ~ ifelse(!is.na(.), cur_column(), NA), .names = "{.col}_dummy")) %>%
    unite(Epigenome, ends_with("_dummy"), sep = ";", na.rm = T) %>%
    left_join(eqtls %>% distinct(chr, pos, eQTL = variant_id))


write_tsv(ld_snps_epigenome, snakemake@output$epigenome)


peak_stats <- tibble(peakset = epigenome_keys, bed = epigenome_bed) %>%
    mutate(peak_num = map_int(epigenome_bed, length),
           peak_width = map_int(epigenome_bed, ~ sum(width(.)))) %>%
    select(-bed)

write_csv(peak_stats, snakemake@output$peak_stats)

R tidyverse TxDb.Hsapiens.UCSC.hg19.knownGene SNPlocs.Hsapiens.dbSNP144.GRCh37 BSgenome.Hsapiens.UCSC.hg19 ucsc-liftover plyranges VariantAnnotation From line 2 of scripts/SNPsEpigenomeIntersect.R

A Snakemake workflow to analyse and visualise Illumina Infinium Methylation arrays

addAnno <- function(dmrs, outputLoc = "nearestLocation", featureLocForDistance="TSS", 
                    bindingRegion=c(-2000, 2000), organism = "hg38"){

    library(GenomicRanges)
    library(ChIPpeakAnno)
    library(org.Hs.eg.db)

    dmrs = GRanges(dmrs)

    if(organism == "hg38"){   

        library(TxDb.Hsapiens.UCSC.hg38.knownGene)

        annoData <- toGRanges(TxDb.Hsapiens.UCSC.hg38.knownGene)

    } 

    if(organism == "hg19"){

        library(TxDb.Hsapiens.UCSC.hg19.knownGene)

        annoData <- toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene)

    }

    seqlevelsStyle(dmrs) <- seqlevelsStyle(annoData)

    anno_dmrs <- annotatePeakInBatch(dmrs, AnnotationData = annoData, 
                                    output = outputLoc, 
                                    FeatureLocForDistance = featureLocForDistance,
                                    bindingRegion = bindingRegion)

    anno_dmrs$symbol <- xget(anno_dmrs$feature, org.Hs.egSYMBOL)

    return(anno_dmrs)

}

main <- function(input, output, params, log) {

    # Log

    out <- file(log$out, open = "wt")

    err <- file(log$err, open = "wt")

    sink(out, type = "output")

    sink(err, type = "message")

    # Script

    library(minfi)
    library(DMRcate)
    library(rtracklayer)

    dmrs <- readRDS(input$rds)

    # params
    outputLoc <- params$output # "nearestLocation"
    featureLocForDistance <- params$featureLocForDistance # "TSS"
    bindingRegion <- params$bindingRegion  # c(-2000, 2000)
    organism <- params$organism

    # output 
    save <- output$csv

    # run annotation
    dmrs = addAnno(dmrs, outputLoc, featureLocForDistance, bindingRegion, organism)

    # save output

    write.csv(as.data.frame(dmrs), save)

    rtracklayer::export(dmrs, output$bed) 

    saveRDS(dmrs, file = output$rds)

}

main(snakemake@input, snakemake@output, snakemake@params, snakemake@log)

R org.Hs.eg.db TxDb.Hsapiens.UCSC.hg19.knownGene TxDb.Hsapiens.UCSC.hg38.knownGene GenomicRanges minfi DMRcate From line 3 of scripts/annotate.R

Workflow Steps and Code Snippets

Snakemake pipeline for calling CNAs from Affymetrix (now Thermofisher) Cytoscan and Oncoscan arrays

Easy Copy Number Analysis (EaCoN) Pipeline

A Snakemake based modular Workflow that facilitates RNA-Seq analyses with a special focus on splicing

MPRA GWAS Builder: snakemake workflow

A Snakemake workflow to analyse and visualise Illumina Infinium Methylation arrays

TxDb.Hsapiens.UCSC.hg19.knownGene