Workflow Steps and Code Snippets

1 tagged steps and code snippets that match keyword GenomeInfoDbData

A single-cell RNA-seq pipeline (v1.0.6)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
log <- file(snakemake@log[[1]], open = "wt")
sink(log)
sink(log, type = "message")

message("CONFIGURATION STEP")
# A. Parameters: 
# 1. Load libraries. 
suppressMessages(library("GenomeInfoDbData"))
suppressMessages(library("Seurat"))
suppressMessages(library("RColorBrewer"))
suppressMessages(library("slingshot"))
suppressMessages(library("SingleCellExperiment"))
suppressMessages(library("rmarkdown"))
suppressMessages(library("gam"))
suppressMessages(library("pheatmap"))
if (snakemake@params[["graphics"]]) {suppressMessages(library("rgl"))}
if (snakemake@params[["graphics"]]) {suppressMessages(library("htmltools"))}
message("1. Libraries were loaded.")

# 2. Folder configuration. 
dir.name = snakemake@params[["output_dir"]]
input_data = snakemake@input[["seurat_obj"]]
folders = c("1_preprocessing", "2_normalization", "3_clustering", "4_degs", "5_gs", "6_traj_in", "7_func_analysis")
message("2. Folder paths were set.")

# 3. Get variables from Snakemake.  
selected_res = snakemake@params[["selected_res"]]
start.clus = snakemake@params[["start_clus"]]
end.clus = snakemake@params[["end_clus"]]
n_var_genes = snakemake@params[["n_var_genes"]]
n_plotted_genes = snakemake@params[["n_plotted_genes"]]
random_seed = snakemake@params[["random_seed"]]
pc = snakemake@params[["pc"]]
graphics = snakemake@params[["graphics"]]
ram = snakemake@resources[["mem_mb"]]
threads = snakemake@threads
message("3. Parameters were loaded.")

# 4. Analysis configuration. 
# RAM configuration.
options(future.globals.maxSize = ram*1024^2)
# Set seed.
if (is.numeric(random_seed)) {
  set.seed(random_seed)
}
message(paste0("4. Seed was set at ", random_seed, "."))
message("Configuration finished.")
message("\n")

message("PROCESSING STEP")
# 10. Trajectory inference analysis using slingshot.
# 10.1. Seurat object is converted to SingleCellExperiment object (required by slingshot) & cluster resolution is selected. 
seurat <- readRDS(input_data)
assay_type <- seurat@active.assay
cluster_res <- paste0(assay_type,"_snn_res.",selected_res)
if (!(cluster_res %in% colnames(seurat@meta.data))){
  stop("Specified resolution is not available.")
}
seurat.sim <- as.SingleCellExperiment(seurat)
message("1. Seurat object was loaded.")

if (graphics){
  # Running UMAP to get 3 dimensions.
  seurat3D <- RunUMAP(seurat,dims = 1:pc, n.components = 3, verbose = FALSE)
  seurat.sim <- as.SingleCellExperiment(seurat)
  seurat3D.sim <- as.SingleCellExperiment(seurat3D)
  message("1.5. 3D UMAP was calculated.")
}

# 10.2. Slingshot algorithm (dimensions = UMAP). There are 4 options depending of the start and end cluster in the following trajectory:
if(is.numeric(start.clus) == FALSE && is.numeric(end.clus) == FALSE){
  seurat.sim <- slingshot(seurat.sim, clusterLabels=cluster_res, reducedDim="UMAP")
  if (graphics) {seurat3D.sim <- slingshot(seurat3D.sim, clusterLabels=cluster_res, reducedDim="UMAP")}
  const = FALSE
} else if(is.numeric(start.clus) == TRUE && is.numeric(end.clus) == FALSE){
  seurat.sim <- slingshot(seurat.sim, clusterLabels=cluster_res, reducedDim="UMAP", start.clus = start.clus)
  if (graphics) {seurat3D.sim <- slingshot(seurat3D.sim, clusterLabels=cluster_res, reducedDim="UMAP", start.clus = start.clus)}
  const = TRUE
} else if(is.numeric(start.clus) == FALSE && is.numeric(end.clus) == TRUE){
  seurat.sim <- slingshot(seurat.sim, clusterLabels=cluster_res, reducedDim="UMAP", end.clus = end.clus)
  if (graphics) {seurat3D.sim <- slingshot(seurat3D.sim, clusterLabels=cluster_res, reducedDim="UMAP", end.clus = end.clus)}
  const = TRUE
} else if(is.numeric(start.clus) == TRUE && is.numeric(end.clus) == TRUE){
  seurat.sim <- slingshot(seurat.sim, clusterLabels=cluster_res, reducedDim="UMAP", start.clus = start.clus, end.clus=end.clus)
  if (graphics) {seurat3D.sim <- slingshot(seurat3D.sim, clusterLabels=cluster_res, reducedDim="UMAP", start.clus = start.clus, end.clus=end.clus)}
  const = TRUE
}
message("2. Slingshot trajectories were calculated.")

# 10.3. Plots generation.
# 10.3.1. Set the number of colours from the palette and extend it.
n_col <- length(levels(seurat.sim@colData[, cluster_res]))
getPalette <- colorRampPalette(brewer.pal(9,'Set1'))

if (graphics) {
  # 10.3.2. Set a good window size for the 3D plots.
  r3dDefaults$windowRect <- c(0,50, 1024, 720) 

  # 10.3.3. Curves 3D plot with legend --> HTML output.
  plot3d(reducedDims(seurat3D.sim)$UMAP[,1:3], col = getPalette(n_col)[seurat3D.sim@colData[, cluster_res]])
  plot3d.SlingshotDataSet(SlingshotDataSet(seurat3D.sim), type = "curves", add = TRUE, lwd =3)
  legend3d("topright", legend=paste0("Cluster - ", levels(seurat3D.sim@colData[, cluster_res])), pch=16, col=getPalette(n_col), inset=c(0))
  p_curves <- rglwidget(width = 1240, height = 1024)
  htmltools::save_html(htmltools::tagList(p_curves), file = paste0(dir.name, "/", folders[6], "/", paste0("3D_curves_", selected_res, "_res.html")))

  # 10.3.4. Lineage 3D plot with legend --> HTML output.
  plot3d(reducedDims(seurat3D.sim)$UMAP[,1:3], col = getPalette(n_col)[seurat3D.sim@colData[, cluster_res]])
  plot3d.SlingshotDataSet(SlingshotDataSet(seurat3D.sim), type = "lineages", add = TRUE, lwd =3)
  legend3d("topright", legend=paste0("Cluster - ", levels(seurat3D.sim@colData[, cluster_res])), pch=16, col=getPalette(n_col), inset=c(0))
  p_lineages <- rglwidget(width = 1240, height = 1024)
  htmltools::save_html(htmltools::tagList(p_lineages), file = paste0(dir.name, "/", folders[6], "/", paste0("3D_lineages_", selected_res, "_res.html")))
}
# 10.3.5. Curves 2D plot with legend --> pdf output. 
pdf(paste0(dir.name, "/", folders[6], "/2D_curves_", selected_res, "_res.pdf"), width = 11, height = 11)
plot(reducedDims(seurat.sim)$UMAP, col = getPalette(n_col)[seurat.sim@colData[, cluster_res]],
     pch=16, asp = 1, main = "2D curves - Clusters Trajectories")
legend("topright", legend=paste0("Cluster - ", levels(seurat.sim@colData[, cluster_res])), pch=16, col=getPalette(n_col))
lines(SlingshotDataSet(seurat.sim), lwd=2, col = 'black')
dev.off()

# 10.3.6. Lineage 2D plot with legend --> pdf output.
pdf(paste0(dir.name, "/", folders[6], "/2D_lineages_", selected_res, "_res.pdf"), width = 11, height = 11)
plot(reducedDims(seurat.sim)$UMAP, col = getPalette(n_col)[seurat.sim@colData[, cluster_res]],
     pch=16, asp = 1, main = "2D lineages - Clusters Trajectories")
legend("topright", legend=paste0("Cluster - ", levels(seurat.sim@colData[, cluster_res])), pch=16, col=getPalette(n_col))
lines(SlingshotDataSet(seurat.sim), lwd=2, type = 'lineages', col = 'black', show.constraints = const)
dev.off()
message("3. Trajectory lots were done.")

# 10.4. Temporally expressed genes heatmap.
# 10.4.1. Pseudotime is obtained.
t <- seurat.sim$slingPseudotime_1

# 10.4.2. Get the n variable genes.
Y <- assays(seurat.sim)$logcounts
var_genes <- names(sort(apply(Y,1,var),decreasing = TRUE))[1:n_var_genes]
Y <- Y[var_genes,]

# 10.4.3. Fitting a gam model.
gam.pval <- apply(Y,1,function(z){
  d <- data.frame(z=z, t=t)
  suppressWarnings({
    tmp <- suppressWarnings(gam(z ~ lo(t), data=d))
  })
  p <- summary(tmp)[3][[1]][2,3]
  p
})

# 10.4.4. Get the top genes selected by p-val and plot the heatmap.
topgenes <- names(sort(gam.pval, decreasing = FALSE))[1:n_plotted_genes]
heatdata <- assays(seurat.sim)$logcounts[topgenes, order(t, na.last = NA)]
heatclus <- seurat.sim@colData[,cluster_res][order(t, na.last = NA)]
annotation <- data.frame("Cluster" = heatclus, row.names = colnames(heatdata))
ann_colors <- list("Cluster" = setNames(getPalette(n_col),
                                        levels(heatclus)))
# 10.4.5. Plot the genes.
pheatmap(heatdata, cluster_cols = FALSE, 
         color =  colorRampPalette(c("yellow", "red"))(100),
         annotation_col = annotation, annotation_colors = ann_colors,
         show_colnames = FALSE, filename = paste0(dir.name, "/", folders[6], "/temporally_expressed_heatmaps_", selected_res, "_res.pdf"))
message("4. Pseudotime heatmap was obtained.")

# Save used objects.
if (graphics) {save(seurat.sim, seurat3D.sim, file = paste0(dir.name, "/", folders[6], "/slingshot_sce_objects.RData"))
} else {save(seurat.sim, file = paste0(dir.name, "/", folders[6], "/slingshot_sce_objects.RData"))}
message("5. R objects were saved.")
data / bioconductor

GenomeInfoDbData

Species and taxonomy ID look up tables used by GenomeInfoDb: Files for mapping between NCBI taxonomy ID and species. Used by functions in the GenomeInfoDb package.