Workflow Steps and Code Snippets

191 tagged steps and code snippets that match keyword DESeq2

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

library("DESeq2")

counts <- read.csv(snakemake@input[["counts"]], sep="\t", row.names="Geneid")
counts <- subset(counts, select = -c(Chr, Start, End, Strand, Length))

samples <- read.csv(snakemake@params[["samples"]], sep="\t", stringsAsFactors = TRUE)
units <- read.csv(snakemake@params[["units"]], sep="\t", stringsAsFactors = TRUE)

coldata <- merge(unique(samples), units, by='sample')
coldata <- subset(coldata, select=-c(fq1, fq2))
coldata[] <- lapply(coldata, as.factor)
rownames(coldata) <- paste(coldata$sample, paste("t",coldata$time,sep=""), coldata$replicate, sep=".")

# Speciffic steps needed for our analysis
#--------------------------------------------------
counts$G_M.t0.1 <- counts$G.t0.1
counts$G_M.t0.2 <- counts$G.t0.2

zero_rows <- data.frame(rbind(
  c("G_M", "mitomycin", 1, 0),
  c("G_M", "mitomycin", 2, 0)
))
colnames(zero_rows) <- colnames(coldata)
rownames(zero_rows) <- c("G_M.t0.1", "G_M.t0.2")
coldata <- rbind(coldata, zero_rows)
#--------------------------------------------------

# Filter rows with less than 5 counts in at least 2 samples
filter <- apply(counts, 1, function(x) length(x[x>5])>=2)
filtered <- counts[filter,]

# Create a DESeq data set with a full desing.
ddsFull <- DESeqDataSetFromMatrix(
    countData = filtered,
    colData = coldata,
    design = as.formula(snakemake@params[["model"]]))

# Use Likelihood-ratio test on a reduced model.
dds <- DESeq(ddsFull, test="LRT", reduced = as.formula(snakemake@params[["reduced_model"]]))
saveRDS(dds, file = snakemake@output[["rds"]])

alpha <- as.numeric(snakemake@params[["alpha"]])
result <- results(dds, alpha=alpha)

# Construct a list of shrunken LFC for the coefficients of the model.
logfc.list <- list()
logfc.list[[1]] <- data.frame(result)
for (i in 2:length(resultsNames(dds))) {
    coeff = resultsNames(dds)[i]
    res.t <- results(dds, name=coeff, test="Wald", alpha = alpha)
    out.t <- as.data.frame(res.t)
    out.t <- cbind(geneid = rownames(out.t), out.t)
    write.table(
        out.t,
        file=paste("deseq2/", coeff, ".tsv", sep=""),
        sep="\t",
        quote=FALSE,
        row.names=FALSE
    )

    res_shrunken <- lfcShrink(dds, coef=coeff, type="apeglm", res = res.t)
    out.table <- as.data.frame(res_shrunken)
    out.table <- cbind(geneid = rownames(out.table), out.table)
    write.table(
        out.table,
        file=paste("deseq2/", coeff, "_shrunken.tsv", sep=""),
        sep="\t",
        quote=FALSE,
        row.names=FALSE
    )

    svg(paste("deseq2/", coeff, "ma_plot.svg", sep=""))
    plotMA(result, ylim=c(-2,2))
    dev.off()

    # Add the shrunken log2FC to the list.
    lfc <- res_shrunken[, "log2FoldChange", drop=FALSE]
    colnames(lfc) <- c(paste(coeff, "sLFC", sep="."))
    logfc.list[[i]] <- lfc
}

# Write combined results in the main output.
out.result <- do.call(cbind, logfc.list)
out.result <- out.result[order(out.result$padj),]
out.result <- cbind(geneid = rownames(out.result), out.result)
  write.table(
    out.result,
    file=snakemake@output[["table"]],
    sep="\t",
    quote=FALSE,
    row.names=FALSE
  )

# Use variance stabilising transformation and then plot PCA.
vsd <- vst(ddsFull, blind=FALSE)
svg(snakemake@output[["pca_plot"]])
plotPCA(vsd, intgroup=c("condition", "time"), ntop = 500)
dev.off()

Snakemake workflow: rna-seq-star-deseq2

40
41
script:
    "../scripts/deseq2-init.R"
76
77
script:
    "../scripts/deseq2.R"
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

library("DESeq2")

parallel <- FALSE
if (snakemake@threads > 1) {
    library("BiocParallel")
    # setup parallelization
    register(MulticoreParam(snakemake@threads))
    parallel <- TRUE
}

# colData and countData must have the same sample order, but this is ensured
# by the way we create the count matrix
cts <- read.table(snakemake@input[["counts"]], header=TRUE, row.names="gene", check.names=FALSE)
coldata <- read.table(snakemake@params[["samples"]], header=TRUE, row.names="sample", check.names=FALSE)

dds <- DESeqDataSetFromMatrix(countData=cts,
                              colData=coldata,
                              design=~ condition)

# remove uninformative columns
dds <- dds[ rowSums(counts(dds)) > 1, ]
# normalization and preprocessing
dds <- DESeq(dds, parallel=parallel)

saveRDS(dds, file=snakemake@output[[1]])
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

library("DESeq2")

parallel <- FALSE
if (snakemake@threads > 1) {
    library("BiocParallel")
    # setup parallelization
    register(MulticoreParam(snakemake@threads))
    parallel <- TRUE
}

dds <- readRDS(snakemake@input[[1]])

contrast <- c("condition", snakemake@params[["contrast"]])
res <- results(dds, contrast=contrast, parallel=parallel)
# shrink fold changes for lowly expressed genes
res <- lfcShrink(dds, contrast=contrast, res=res)
# sort by p-value
res <- res[order(res$padj),]
# TODO explore IHW usage


# store results
svg(snakemake@output[["ma_plot"]])
plotMA(res, ylim=c(-2,2))
dev.off()

write.table(as.data.frame(res), file=snakemake@output[["table"]])
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

library("DESeq2")

# load deseq2 data
dds <- readRDS(snakemake@input[[1]])

# obtain normalized counts
counts <- rlog(dds, blind=FALSE)
svg(snakemake@output[[1]])
plotPCA(counts, intgroup=snakemake@params[["pca_labels"]])
dev.off()
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
library(biomformat)
library(DESeq2)

# Set logging information
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

# Load the input table
print("Loading table...")
table <- biomformat::read_biom(snakemake@input[["table"]])
table <- as.matrix(biomformat::biom_data(table))

# Load the metadata
print("Loading metadata...")
metadata <- read.table(snakemake@input[["metadata"]], sep="\t", header=T,
                       row.names=1)

covariate <- snakemake@params[[1]][["factor_name"]]
target <- snakemake@params[[1]][["target_level"]]
reference <- snakemake@params[[1]][["reference_level"]]
confounders <- snakemake@params[[1]][["confounders"]]

# Harmonize table and metadata samples
print("Harmonizing table and metadata samples...")
samples <- colnames(table)
metadata <- subset(metadata, rownames(metadata) %in% samples)
metadata[[covariate]] <- as.factor(metadata[[covariate]])
metadata[[covariate]] <- relevel(metadata[[covariate]], reference)
sample_order <- row.names(metadata)
table <- table[, sample_order]
row.names(table) <- paste0("F_", row.names(table)) # Append F_ to features to avoid R renaming

# Create the design formula
print("Creating design formula...")
design.formula <- paste0("~", covariate)
if (length(confounders) != 0) {
    confounders_form = paste(confounders, collapse=" + ")
    design.formula <- paste0(design.formula, " + ", confounders_form)
}
design.formula <- as.formula(design.formula)

# Run DESeq2
print("Running DESeq2...")
dds <- DESeq2::DESeqDataSetFromMatrix(
    countData=table,
    colData=metadata,
    design=design.formula
)
dds.results <- DESeq2::DESeq(dds, sfType="poscounts")
saveRDS(dds.results, snakemake@output[[2]])
print("Saved RDS!")

# Create results table and modify result names
results <- DESeq2::results(
    dds.results,
    format="DataFrame",
    tidy=TRUE,
    cooksCutoff=FALSE,
    pAdjustMethod="BH",
    contrast=c(covariate, target, reference)
)
row.names(results) <- gsub("^F_", "", row.names(table))

# Save results to output file
write.table(results, file=snakemake@output[[1]], sep="\t")
print("Saved differentials!")
tool / biotools

DESeq2

R/Bioconductor package for differential gene expression analysis based on the negative binomial distribution. Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution.