Help improve this workflow!
This workflow has been published but could be further improved with some additional meta data:- Keyword(s) in categories input, output, operation, topic
You can help improve this workflow by suggesting the addition or removal of keywords, suggest changes and report issues, or request to become a maintainer of the Workflow .
Introduction
Cellsnake can be run directly using the snakemake workflow. We recommend the wrapper but the snakemake workflow give more control in some use cases.
The main cellsnake repo is here : https://github.com/sinanugur/cellsnake
Installation
You may pull the workflow from the GitHub repo and create a clean environment. Mamba installation is highly recommended.
conda install mamba -c conda-forge # to install Mamba
git clone https://github.com/sinanugur/scrna-workflow.git
cd scrna-workflow
mamba env create --name scrna-workflow --file environment.yml
conda activate scrna-workflow
For Apple Silicon (i.e. M1, M2 etc.) architecture, you have to put CONDA_SUBDIR=osx-64 before creating the environment.
CONDA_SUBDIR=osx-64 mamba env create --name scrna-workflow --file environment.yml
conda activate scrna-workflow
After the environment created and activated successfully, to install all the required R packages, you should run the installation script, this may take some time:
bash install_r_packages.sh
Quick Start Example
You can start a minimal run by calling, sample runs are expected in data folder.
snakemake -j 10 --config datafolder=data option=minimal
Then we can run integration.
snakemake -j 10 --config option=integration
Now it is time to work on the integrated sample. We can run standard workflow on the integrated object which is always generates at the same location.
snakemake -j 10 --config datafolder=analyses_integrated/seurat/integrated.rds option=standard is_integrated_sample=True --rerun-incomplete
You may change some of the options or you may provide a config file as well, for example.
snakemake -j 10 --config datafolder=analyses_integrated/seurat/integrated.rds option=standard is_integrated_sample=True --configfile=config.yaml --rerun-incomplete
Code Snippets
6 7 8 9 | shell: """ {cellsnake_path}workflow/scripts/scrna-seurat-integration.R --rds "{input}" --sampleid {integration_id} --output.rds {output} --reduction {reduction} --dims {dims} """ |
23 24 25 26 27 28 29 | run: shell(""" rm -r {output.outdir}/counts; {cellsnake_path}workflow/mg2sc/src/scMeG-kraken.py --input {input.bam} --outdir {output.outdir} --DBpath {kraken_db_folder} --threads {threads} --minimum-hit-groups {min_hit_groups} --confidence {confidence} --prefix {wildcards.sample} """) #if kraken_extra_files is False: # shell("rm -r {output.kr} {output.classified}") |
39 40 | shell: "{cellsnake_path}workflow/scripts/scrna-kraken2-collapse.py {input.outdir}/counts {input.hierarchy} {wildcards.taxa} {output}" |
47 48 | shell: """Rscript -e 'SeuratDisk::Convert("{input}",dest = "h5seurat", overwrite = TRUE)'""" |
56 57 | shell: "{cellsnake_path}workflow/scripts/scrna-kraken2-data-parser.R --h5seurat {input} --output.rds {output.microbiome_rds} --output.plot {output.plot} --sampleid {wildcards.sample} --taxa {wildcards.taxa} --min.features {microbiome_min_features} --min.cells {microbiome_min_cells}" |
67 68 | shell: "{cellsnake_path}workflow/scripts/scrna-microbiome-dimplot.R --rds {input.rds} --microbiome.rds {input.microbiome_rds} --dimplot {output.dimplot} --tplot {output.tplot} --reduction.type {wildcards.reduction} --taxa {wildcards.taxa}" |
79 80 | shell: "{cellsnake_path}workflow/scripts/scrna-microbiome-sigplot.R --rds {input.rds} --microbiome.rds {input.microbiome_rds} --taxa {wildcards.taxa} --sigtable {output.xlsx} --idents {wildcards.i}" |
89 90 | shell: """{cellsnake_path}workflow/scripts/scrna-combine-microbiome-rds.R --rds "{input}" --sampleid {integration_id} --output.rds {output}""" |
42 43 44 45 46 | run: if is_integrated_sample is True: shell("cp {input.raw} {output.rds}") else: shell("{cellsnake_path}workflow/scripts/scrna-read-qc.R --data.dir {input.raw} --output.rds {output.rds} --sampleid {wildcards.sample} --percent.rp {percent_rp} --percent.mt {wildcards.percent_mt} --min.features {min_features} --max.features {max_features} --max.molecules {max_molecules} --min.cells {min_cells} --before.violin.plot {output.before} --after.violin.plot {output.after} {params.mt_param}") |
58 59 60 61 62 | run: if is_integrated_sample is True: shell("{cellsnake_path}workflow/scripts/scrna-clusteringtree.R --rds {input} --clplot {output.clustree} --integration") else: shell("{cellsnake_path}workflow/scripts/scrna-clusteringtree.R --rds {input} --scale.factor {scale_factor} --nfeature {highly_variable_features} --variable.selection.method {variable_selection_method} --normalization.method {normalization_method} --clplot {output.clustree} --heplot {output.heatmap} --hvfplot {output.hvfplot} --jeplot {output.jackandelbow}") |
80 81 82 83 | shell: "{cellsnake_path}workflow/scripts/scrna-normalization-pca.R --rds {input} {params.doublet_filter} --normalization.method {normalization_method} --cpu {threads} " "--scale.factor {scale_factor} --reference {singler_ref} --variable.selection.method {variable_selection_method} --nfeature {highly_variable_features} --resolution {params.paramaters[resolution]} " "--output.rds {output.rds} {umap_plot} {tsne_plot} {params.integration}" |
94 95 | shell: "{cellsnake_path}workflow/scripts/scrna-metrics.R --rds {input.rds} --ccplot {output.ccplot} --ccbarplot {output.ccbarplot} --html {output.html} --idents {wildcards.i} --xlsx {output.xlsx}" |
106 107 | shell: "{cellsnake_path}workflow/scripts/scrna-technicals.R --rds {input.rds} --fplot {output.fplot} --cplot {output.cplot} --mtplot {output.mtplot} --rpplot {output.rpplot}" |
120 121 | shell: "{cellsnake_path}workflow/scripts/scrna-dimplot.R --rds {input} --reduction.type {wildcards.reduction} --pdfplot {output.pl} --htmlplot {output.html} --idents {wildcards.i} --percentage {min_percentage_to_plot} {show_labels}" |
135 136 | shell: "{cellsnake_path}workflow/scripts/scrna-dimplot.R --rds {input.rds} --reduction.type {wildcards.reduction} --pdfplot {output.pl} --htmlplot {output.html} --csv {input.prediction} --idents singler --percentage {min_percentage_to_plot} {show_labels}" |
147 148 | shell: "{cellsnake_path}workflow/scripts/scrna-find-markers.R --rds {input} --idents {wildcards.i} --logfc.threshold {logfc_threshold} --test.use {test_use} --output.rds {output.rds}" |
159 160 | shell: "{cellsnake_path}workflow/scripts/scrna-marker-tables.R --rds {input} --output.xlsx.positive {output.positive} --output.xlsx.all {output.allmarkers}" |
168 169 | shell: "{cellsnake_path}workflow/scripts/scrna-top-marker-plot.R --xlsx {input} --output.plot {output}" |
177 178 | shell: "{cellsnake_path}workflow/scripts/scrna-marker-heatmap.R --rds {input.rds} --xlsx {input.excel} --output.plot {output} --idents {wildcards.i}" |
189 190 | shell: "{cellsnake_path}workflow/scripts/scrna-marker-plots.R --rds {input.rds} --xlsx {input.excel} --top_n {marker_plots_per_cluster_n} --output.plot.dir {output} --reduction.type {wildcards.reduction}" |
203 204 | shell: """{cellsnake_path}workflow/scripts/scrna-selected-marker-plots.R --rds {input} --gene "{params.gene}" --output.plot.dir {params.sdir} --reduction.type {wildcards.reduction} --idents {wildcards.i}""" |
214 215 | shell: "{cellsnake_path}workflow/scripts/scrna-convert-to-h5ad.R --rds {input.rds} --output {output}" |
222 223 | shell: "{cellsnake_path}workflow/scripts/scrna-singler-annotation.R --rds {input} --output {output.prediction} --reference {singler_ref} --granulation {singler_granulation}" |
236 237 | shell: "{cellsnake_path}workflow/scripts/scrna-singler-plots.R --rds {input.rds} --prediction {input.prediction} --sheplot {output.sheplot} --pheplot {output.pheplot} --sheplottop {output.sheplottop} --xlsx {output.xlsx} --idents {wildcards.i}" |
250 251 | shell: "{cellsnake_path}workflow/scripts/scrna-celltypist.py {input} {output.dotplot} {output.outputdir} {output.xlsx} {celltypist_model} {wildcards.i}" |
260 261 262 263 | shell: """ {cellsnake_path}workflow/scripts/scrna-celltypist.R --rds {input.rds} --csv {input.csv} --output.tsne.plot {output.tsne} --output.umap.plot {output.umap} --percentage {min_percentage_to_plot} {show_labels} """ |
275 276 | shell: "{cellsnake_path}workflow/scripts/scrna-kegg.R --xlsx {input} --output.rds {output.rds} --mapping {mapping} --organism {organism} --output.kegg {output.kegg} --output.mkegg {output.mkegg} --output.gse {output.gse} --output.mgse {output.mgse}" |
285 286 | shell: "{cellsnake_path}workflow/scripts/scrna-go_analysis.R --xlsx {input} --output.rds {output.rds} --mapping {mapping} --output.go {output.go} --output.gse {output.gse}" |
295 296 | shell: "{cellsnake_path}workflow/scripts/scrna-find-pairwise-markers.R --rds {input.rds} --logfc.threshold {logfc_threshold} --test.use {test_use} --output.rds {output.rds} --metadata {input.metadata} --metadata.column {wildcards.i}" |
306 307 | shell: "{cellsnake_path}workflow/scripts/scrna-marker-tables.R --rds {input} --output.xlsx.positive {output.positive} --output.xlsx.all {output.allmarkers}" |
314 315 | shell: "{cellsnake_path}workflow/scripts/scrna-volcano.R --rds {input.rds} --vplot {output.pdf}" |
327 328 329 330 331 | run: if wildcards.i == "majority_voting": shell("{cellsnake_path}workflow/scripts/scrna-gsea.R --idents {wildcards.i} --rds {input.rds} --gseafile {input.gseafile} --output.xlsx {output.xlsx} --csv {input.csv}") else: shell("{cellsnake_path}workflow/scripts/scrna-gsea.R --idents {wildcards.i} --rds {input.rds} --gseafile {input.gseafile} --output.xlsx {output.xlsx}") |
341 342 343 344 345 | run: if wildcards.i == "majority_voting": shell("{cellsnake_path}workflow/scripts/scrna-cellchat.R --rds {input.rds} --species {species} --idents {wildcards.i} --output {output.cellchatrds} --csv {input.csv}") else: shell("{cellsnake_path}workflow/scripts/scrna-cellchat.R --rds {input.rds} --species {species} --idents {wildcards.i} --output {output.cellchatrds}") |
352 353 | shell: "{cellsnake_path}workflow/scripts/scrna-cellchat_plots.R --rds {input.cellchatrds} --output.dir {output.outputdir}" |
362 363 | shell: "{cellsnake_path}workflow/scripts/scrna-monocle3.R --rds {input.rds} --output.dir {params.outputdir} --pplot {output.pplot}" |
6 7 | shell: "{cellsnake_path}workflow/scripts/scrna-subset_final_rds.R --rds {input.rds} --output.rds {output.rds} --keywords {keywords} --column {subset_column} --metadata {metadata} {exact}" |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Cellchat object", metavar = "character" ), optparse::make_option(c("--output.dir"), type = "character", default = NULL, help = "Output directory", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) require(CellChat) require(ggalluvial) cellchat <- readRDS(file = opt$rds) technicalDetailsPathStep2ResolutionCellChat <- "" pathToPlotsStep2ResolutionCellChat <- paste0(opt$output.dir, "/") dir.create(pathToPlotsStep2ResolutionCellChat, showWarnings = F) groupSize <- as.numeric(table(cellchat@idents)) par(mfrow = c(1, 2), xpd = TRUE) fName <- "Number-of-interactions" pdf(paste(pathToPlotsStep2ResolutionCellChat, fName, ".pdf", sep = ""), 10, 10) print(netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = T, label.edge = F, title.name = "Number of interactions")) dev.off() fName <- "Interaction-strength" pdf(paste(pathToPlotsStep2ResolutionCellChat, fName, ".pdf", sep = ""), 10, 10) print(netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = T, label.edge = F, title.name = "Interaction weights/strength")) dev.off() pathToPlotsStep2ResolutionCellChatClusters <- paste0(pathToPlotsStep2ResolutionCellChat, "1-clusters", "/") dir.create(pathToPlotsStep2ResolutionCellChatClusters, showWarnings = F) mat <- cellchat@net$weight par(mfrow = c(3, 4), xpd = TRUE) for (i in 1:nrow(mat)) { mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat)) mat2[i, ] <- mat[i, ] fName <- stringr::str_replace_all(paste0("edge-weights-for-", rownames(mat)[i]), "/", "_") pdf(paste(pathToPlotsStep2ResolutionCellChatClusters, fName, ".pdf", sep = ""), 10, 10) print(netVisual_circle(mat2, vertex.weight = groupSize, weight.scale = T, edge.weight.max = max(mat), title.name = rownames(mat)[i])) dev.off() } pathToPlotsStep2ResolutionCellChatSignPath <- paste0(pathToPlotsStep2ResolutionCellChat, "2-significant-pathways", "/") dir.create(pathToPlotsStep2ResolutionCellChatSignPath, showWarnings = F) # All the signaling pathways showing significant communications allPathways <- cellchat@netP$pathways for (pathways.show in allPathways) { pathToPlotsStep2ResolutionCellChatSignPathCicrcle <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "circle-plots", "/") dir.create(pathToPlotsStep2ResolutionCellChatSignPathCicrcle, showWarnings = F) # Hierarchy plot # Here we define `vertex.receive` so that the left portion of the hierarchy plot shows signaling to fibroblast and the right portion shows signaling to immune cells vertex.receiver <- seq(1, 4) # a numeric vector. netVisual_aggregate(cellchat, signaling = pathways.show, vertex.receiver = vertex.receiver) # Circle plot par(mfrow = c(1, 1)) fName <- pathways.show pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathCicrcle, fName, ".pdf", sep = ""), 10, 10) print(netVisual_aggregate(cellchat, signaling = pathways.show, layout = "circle")) dev.off() pathToPlotsStep2ResolutionCellChatSignPathchord <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "chord-plots", "/") dir.create(pathToPlotsStep2ResolutionCellChatSignPathchord, showWarnings = F) # Chord diagram par(mfrow = c(1, 1)) pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathchord, fName, ".pdf", sep = ""), 10, 10) print(netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord")) dev.off() pathToPlotsStep2ResolutionCellChatSignPathHeatmap <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "heatmap-plots", "/") dir.create(pathToPlotsStep2ResolutionCellChatSignPathHeatmap, showWarnings = F) # Heatmap par(mfrow = c(1, 1)) pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathHeatmap, fName, ".pdf", sep = ""), 10, 10) print(netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds")) dev.off() } #> Do heatmap based on a single object # Chord diagram # group.cellType <- c(rep("FIB", 4), rep("DC", 4), rep("TC", 4)) # grouping cell clusters into fibroblast, DC and TC cells # names(group.cellType) <- levels(cellchat@idents) # netVisual_chord_cell(cellchat, signaling = pathways.show, group = group.cellType, title.name = paste0(pathways.show, " signaling network")) #> Plot the aggregated cell-cell communication network at the signaling pathway level pathToPlotsStep2ResolutionCellChatContributionOfLRpairs <- paste0(pathToPlotsStep2ResolutionCellChat, "3-ligand-receptor-pairs", "/") dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, showWarnings = F) fName <- "all-pathways-contribution" tryCatch( { pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, fName, ".pdf", sep = ""), 5, 20) print(netAnalysis_contribution(cellchat, signaling = allPathways)) dev.off() }, error = function(e) { dev.off() } ) # Compute the contribution of each ligand-receptor pair to the overall signaling pathway and visualize cell-cell communication mediated by a single ligand-receptor pair for (pathways.show in allPathways) { pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle <- paste0(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, "circle", "/") dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle, showWarnings = F) pairLR.CXCL <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE) LR.show <- pairLR.CXCL[1, ] # show one ligand-receptor pair # Hierarchy plot vertex.receiver <- seq(1, 4) # a numeric vector fName <- pathways.show pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle, fName, ".pdf", sep = ""), 10, 10) print(netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "circle")) dev.off() pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord <- paste0(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, "chord", "/") dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord, showWarnings = F) #> [[1]] # Chord diagram pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord, fName, ".pdf", sep = ""), 10, 10) print(netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "chord")) dev.off() } pathToPlotsStep2ResolutionCellChatCellCellC <- paste0(pathToPlotsStep2ResolutionCellChat, "4-cell-cell-communication", "/") dir.create(pathToPlotsStep2ResolutionCellChatCellCellC, showWarnings = F) # show all the significant interactions (L-R pairs) from some cell groups (defined by 'sources.use') to other cell groups (defined by 'targets.use') fName <- "significant-interactions-for-cell-groups" pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellC, fName, ".pdf", sep = ""), 50, 20) print(netVisual_bubble(cellchat, remove.isolate = FALSE)) dev.off() # netVisual_bubble(cellchat, sources.use = 4, targets.use = c(5:11), remove.isolate = FALSE) # show all the significant interactions (L-R pairs) associated with certain signaling pathways fName <- "significant-interactoions-for-singaling-pathways" pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellC, fName, ".pdf", sep = ""), 50, 20) print(netVisual_bubble(cellchat, signaling = allPathways, remove.isolate = FALSE)) dev.off() # show all the significant interactions (L-R pairs) based on user's input (defined by `pairLR.use`) # pairLR.use <- extractEnrichedLR(cellchat, signaling = c("CCL","CXCL","FGF")) # netVisual_bubble(cellchat, sources.use = c(3,4), targets.use = c(5:8), pairLR.use = pairLR.use, remove.isolate = TRUE) #> Comparing communications on a single object #> pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions <- paste0(pathToPlotsStep2ResolutionCellChatCellCellC, "interactions", "/") dir.create(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions, showWarnings = F) # show all the significant interactions (L-R pairs) from some cell groups (defined by 'sources.use') to other cell groups (defined by 'targets.use') # show all the interactions sending from Inflam.FIB if (FALSE) { # commented out because it takes a lot of time for (g1 in unique(cellchat@meta$identity)) { for (g2 in unique(cellchat@meta$identity)) { tryCatch( { fName <- paste0("from-", g1, "-to-", g2) pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions, fName, ".pdf", sep = ""), 50, 20) print(netVisual_chord_gene(cellchat, sources.use = g1, targets.use = g2, lab.cex = 0.5, legend.pos.y = 30)) dev.off() }, error = function(cond) { dev.off() }, finally = {} ) } } pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP <- paste0(pathToPlotsStep2ResolutionCellChatCellCellC, "interactions-with-pathways", "/") dir.create(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP, showWarnings = F) for (g1 in unique(cellchat@meta$identity)) { for (g2 in unique(cellchat@meta$identity)) { for (pathways.show in allPathways) { tryCatch( { fName <- paste0("from-", g1, "-to-", g2, "-pathway-", pathways.show) pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP, fName, ".pdf", sep = ""), 50, 20) print(netVisual_chord_gene(cellchat, sources.use = g1, targets.use = g2, signaling = pathways.show, lab.cex = 0.5, legend.pos.y = 30)) dev.off() }, error = function(cond) { dev.off() }, finally = {} ) } } } } pathToPlotsStep2ResolutionGE <- paste0(pathToPlotsStep2ResolutionCellChat, "5-gene-expression", "/") dir.create(pathToPlotsStep2ResolutionGE, showWarnings = F) for (pathways.show in allPathways) { fName <- paste0("gene-expression", "-pathway-", pathways.show) pdf(paste(pathToPlotsStep2ResolutionGE, fName, ".pdf", sep = ""), 20, 10) print(plotGeneExpression(cellchat, signaling = pathways.show)) dev.off() } pathToPlotsStep2ResolutionCC <- paste0(pathToPlotsStep2ResolutionCellChat, "6-system-analysis-of-cc-communication-network", "/") dir.create(pathToPlotsStep2ResolutionCC, showWarnings = F) pathToPlotsStep2ResolutionCCNCS <- paste0(pathToPlotsStep2ResolutionCC, "network-centrality-scores/") dir.create(pathToPlotsStep2ResolutionCCNCS, showWarnings = F) # Compute the network centrality scores cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP") # the slot 'netP' means the inferred intercellular communication network of signaling pathways # Visualize the computed centrality scores using heatmap, allowing ready identification of major signaling roles of cell groups for (pathways.show in allPathways) { fName <- pathways.show pdf(paste(pathToPlotsStep2ResolutionCCNCS, fName, ".pdf", sep = ""), 5, 5) print(netAnalysis_signalingRole_network(cellchat, signaling = pathways.show, font.size = 10)) dev.off() } pathToPlotsStep2ResolutionCCDSR <- paste0(pathToPlotsStep2ResolutionCC, "dominant-senders-and-receivers/") dir.create(pathToPlotsStep2ResolutionCCDSR, showWarnings = F) # Visualize the dominant senders (sources) and receivers (targets) in a 2D space # We also provide another intutive way to visualize the dominant senders (sources) and receivers (targets) in a 2D space using scatter plot. # Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways fName <- "aggregated-results" pdf(paste(pathToPlotsStep2ResolutionCC, fName, ".pdf", sep = ""), 10, 10) print(netAnalysis_signalingRole_scatter(cellchat)) dev.off() pathToPlotsStep2ResolutionCCDSRSP <- paste0(pathToPlotsStep2ResolutionCCDSR, "scatter/") dir.create(pathToPlotsStep2ResolutionCCDSRSP, showWarnings = F) pathToPlotsStep2ResolutionCCDH <- paste0(pathToPlotsStep2ResolutionCCDSR, "heatmap/") dir.create(pathToPlotsStep2ResolutionCCDH, showWarnings = F) for (pathways.show in allPathways) { #> Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways # Signaling role analysis on the cell-cell communication networks of interest fName <- pathways.show tryCatch( { pdf(paste(pathToPlotsStep2ResolutionCCDSRSP, fName, ".pdf", sep = ""), 10, 20) print(netAnalysis_signalingRole_scatter(cellchat, signaling = pathways.show)) dev.off() }, error = function(cond) { dev.off() }, finally = {} ) tryCatch( { # Signaling role analysis on the cell-cell communication networks of interest pdf(paste(pathToPlotsStep2ResolutionCCDH, fName, ".pdf", sep = ""), 10, 20) print(netAnalysis_signalingRole_heatmap(cellchat, signaling = pathways.show)) dev.off() }, error = function(cond) { dev.off() }, finally = {} ) } # Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways ht1 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing") ht2 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming") fName <- "aggregated-results-incoming-outgoing" pdf(paste(pathToPlotsStep2ResolutionCC, fName, ".pdf", sep = ""), 10, 5) print(ht1 + ht2) dev.off() # Identify global communication patterns to explore how multiple cell types and signaling pathways coordinate together pathToPlotsStep2ResolutionOCPWSC <- paste0(pathToPlotsStep2ResolutionCellChat, "7-communication-pattern-of-secreting-cells/") dir.create(pathToPlotsStep2ResolutionOCPWSC, showWarnings = F) # write("In outgoing: You look at Cophenetic and Silhouette values and when they begin to drop suddenly then you look at the number (one before dropping), opposite with icoming pattern", paste0(pathToPlotsStep2ResolutionOCPWSC, "instruction.txt")) library(NMF) library(ggalluvial) pathToPlotsStep2ResolutionOCPWSCOut <- paste0(pathToPlotsStep2ResolutionOCPWSC, "outgoing/") dir.create(pathToPlotsStep2ResolutionOCPWSCOut, showWarnings = F) out <- selectK(cellchat, pattern = "outgoing") fName <- "outgoing-communication-pattern" pdf(paste(pathToPlotsStep2ResolutionOCPWSCOut, fName, ".pdf", sep = ""), 10, 5) print(out) dev.off() pathToPlotsStep2ResolutionOCPWSCHeatmap <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "heatmap/") dir.create(pathToPlotsStep2ResolutionOCPWSCHeatmap, showWarnings = F) pathToPlotsStep2ResolutionOCPWSCRiver <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "river/") dir.create(pathToPlotsStep2ResolutionOCPWSCRiver, showWarnings = F) pathToPlotsStep2ResolutionOCPWSCDot <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "dot/") dir.create(pathToPlotsStep2ResolutionOCPWSCDot, showWarnings = F) for (nPatterns in c(1:10)) { tryCatch( { fName <- paste0("pattern-", nPatterns) cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns) pdf(paste(pathToPlotsStep2ResolutionOCPWSCHeatmap, fName, ".pdf", sep = ""), 10, 10) print(identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns)) dev.off() pdf(paste(pathToPlotsStep2ResolutionOCPWSCRiver, fName, ".pdf", sep = ""), 10, 10) print(netAnalysis_river(cellchat, pattern = "outgoing")) dev.off() pdf(paste(pathToPlotsStep2ResolutionOCPWSCDot, fName, ".pdf", sep = ""), 10, 10) print(netAnalysis_dot(cellchat, pattern = "outgoing")) dev.off() }, error = function(cond) {}, finally = {} ) } pathToPlotsStep2ResolutionOCPWSCIn <- paste0(pathToPlotsStep2ResolutionOCPWSC, "incoming/") dir.create(pathToPlotsStep2ResolutionOCPWSCIn, showWarnings = F) out <- selectK(cellchat, pattern = "incoming") fName <- "incoming-communication-pattern" pdf(paste(pathToPlotsStep2ResolutionOCPWSCIn, fName, ".pdf", sep = ""), 10, 10) print(out) dev.off() pathToPlotsStep2ResolutionOCPWSCHeatmap <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "heatmap/") dir.create(pathToPlotsStep2ResolutionOCPWSCHeatmap, showWarnings = F) pathToPlotsStep2ResolutionOCPWSCRiver <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "river/") dir.create(pathToPlotsStep2ResolutionOCPWSCRiver, showWarnings = F) pathToPlotsStep2ResolutionOCPWSCDot <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "dot/") dir.create(pathToPlotsStep2ResolutionOCPWSCDot, showWarnings = F) for (nPatterns in 1:10) { tryCatch( { fName <- paste0("pattern-", nPatterns) cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns) pdf(paste(pathToPlotsStep2ResolutionOCPWSCHeatmap, fName, ".pdf", sep = ""), 10, 10) print(identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns)) dev.off() pdf(paste(pathToPlotsStep2ResolutionOCPWSCRiver, fName, ".pdf", sep = ""), 10, 10) print(netAnalysis_river(cellchat, pattern = "incoming")) dev.off() pdf(paste(pathToPlotsStep2ResolutionOCPWSCDot, fName, ".pdf", sep = ""), 10, 10) print(netAnalysis_dot(cellchat, pattern = "incoming")) dev.off() }, error = function(cond) {}, finally = {} ) } |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import sys import scanpy as sc import pandas as pd import celltypist data = sc.read_h5ad(sys.argv[1]) dotplot_file_name=sys.argv[2] output_analyses_directory=sys.argv[3] output_xlsx=sys.argv[4] model=sys.argv[5] idents= 'seurat_clusters' if sys.argv[6] is None else sys.argv[6] predictions = celltypist.annotate(data, model = model, majority_voting = True) adata = predictions.to_adata() #'seurat_clusters' dotplot=celltypist.dotplot(predictions, use_as_reference = idents, use_as_prediction = 'majority_voting', show=False,return_fig=True) dotplot.savefig(dotplot_file_name) predictions.to_table(output_analyses_directory) table = pd.crosstab(adata.obs.seurat_clusters, adata.obs.majority_voting) table.to_excel(output_xlsx) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | option_list <- list( optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--csv"), type = "character", default = NULL, help = "Celltypist prediction file", metavar = "character" ), optparse::make_option(c("--output.tsne.plot"), type = "character", default = NULL, help = "Output tsne file name", metavar = "character" ), optparse::make_option(c("--output.umap.plot"), type = "character", default = NULL, help = "Output umap file name", metavar = "character" ), optparse::make_option(c("--percentage"), type = "double", default = 5, help = "Cluster mimnimum percentage to plot", metavar = "double" ), optparse::make_option(c("--labels"), action = "store_true", default = FALSE, help = "Print labels on the plot") ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds) || is.null(opt$csv)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE) } require(patchwork) require(tidyverse) require(Seurat) tryCatch( { source("workflow/scripts/scrna-functions.R") }, error = function(cond) { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) } ) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" celltypist <- read.csv( opt$csv, row.names = 1 ) scrna@meta.data <- scrna@meta.data %>% tibble::rownames_to_column("barcodes") %>% dplyr::left_join(celltypist %>% as.data.frame() %>% rownames_to_column("barcodes"), by = "barcodes") %>% tibble::column_to_rownames("barcodes") n <- length(scrna@meta.data %>% pull(majority_voting) %>% unique()) palette <- function_color_palette(n) palette <- function_color_palette(n) palette <- setNames(palette, scrna@meta.data %>% pull(majority_voting) %>% unique()) breaks <- scrna@meta.data %>% dplyr::select(majority_voting) %>% dplyr::count(majority_voting) %>% dplyr::mutate(perc = (n * 100) / sum(n)) %>% dplyr::filter(perc >= opt$percentage) %>% dplyr::select(-n, -perc) %>% distinct() %>% pull() %>% as.character() p1 <- DimPlot(scrna, reduction = "tsne", label = opt$labels, group.by = "majority_voting", repel = TRUE, raster = FALSE) & scale_color_manual(values = palette, breaks = breaks) & theme(plot.title = element_blank()) & theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) & guides(colour = guide_legend(ncol = 2, override.aes = list(size = 7))) p2 <- DimPlot(scrna, reduction = "umap", label = opt$labels, group.by = "majority_voting", repel = TRUE, raster = FALSE) & scale_color_manual(values = palette, breaks = breaks) & theme(plot.title = element_blank()) & theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) & guides(colour = guide_legend(ncol = 2, override.aes = list(size = 7))) m <- max(str_count(breaks)) w <- c(7.5 + (m * 0.08) * (floor(length(breaks) / 11) + 1)) (p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p1 (p2 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p2 ggsave(plot = p1, filename = opt$output.tsne.plot, width = 7.5, height = 8) ggsave(plot = p2, filename = opt$output.umap.plot, width = 7.5, height = 8) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | option_list <- list( optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("-r", "--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = "output.rds", help = "Output RDS file name [default= %default]", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds) || is.null(opt$sampleid)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE) } require(tidyverse) require(Seurat) require(patchwork) files <- unlist(strsplit(opt$rds, " ")) print(files) for (i in files) { if (!exists("scrna")) { scrna <- readRDS(file = i) } else { scrna <- bind_rows(scrna, readRDS(file = i)) } } saveRDS(scrna, file = opt$output.rds) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--output"), type = "character", default = "output.h5ad", help = "Output h5ad file name", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE) } if (!requireNamespace("SeuratDisk", quietly = TRUE)) { remotes::install_github("mojaveazure/seurat-disk", upgrade = "never") } require(Seurat) require(SeuratDisk) require(tidyverse) scrna <- readRDS(file = opt$rds) UpdateSeuratObject(scrna) -> scrna DefaultAssay(scrna) <- "RNA" DietSeurat(scrna) -> scrna scrna@meta.data %>% dplyr::mutate(dplyr::across(where(is.factor), as.character)) -> scrna@meta.data output_file_name <- str_remove_all(opt$output, ".h5ad$") SaveH5Seurat(scrna, filename = paste0(output_file_name, ".h5Seurat"), overwrite = TRUE) SeuratDisk::Convert(paste0(output_file_name, ".h5Seurat"), dest = "h5ad", overwrite = TRUE) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--reduction.type"), type = "character", default = "umap", help = "Reduction type, umap or tsne", metavar = "character" ), optparse::make_option(c("--pdfplot"), type = "character", default = "reduction.pdf", help = "Plot file name", metavar = "character" ), optparse::make_option(c("--htmlplot"), type = "character", default = "reduction.html", help = "Plot file name", metavar = "character" ), optparse::make_option(c("--csv"), type = "character", default = NULL, help = "CSV meta file or this can be a singler RDS file", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name for marker analysis", metavar = "character" ), optparse::make_option(c("--percentage"), type = "double", default = 5, help = "Cluster minimum percentage to plot", metavar = "double" ), optparse::make_option(c("--labels"), action = "store_true", default = FALSE, help = "Print labels on the plot") ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(patchwork) require(plotly) require(Seurat) require(tidyverse) tryCatch( { source("workflow/scripts/scrna-functions.R") }, error = function(cond) { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) } ) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" if (!is.null(opt$csv)) { try({ metadata <- read.csv( opt$csv, row.names = 1 ) scrna@meta.data <- scrna@meta.data %>% tibble::rownames_to_column("barcodes") %>% dplyr::left_join(metadata %>% as.data.frame() %>% rownames_to_column("barcodes"), by = "barcodes") %>% tibble::column_to_rownames("barcodes") }) try({ pred <- readRDS(opt$csv) AddMetaData(scrna, pred["pruned.labels"] %>% as.data.frame() %>% dplyr::select(singler = pruned.labels)) -> scrna }) } ###################### dimplot###################### Idents(object = scrna) <- scrna@meta.data[[opt$idents]] n <- length(Idents(scrna) %>% unique()) palette <- function_color_palette(n) palette <- setNames(palette, Idents(scrna) %>% unique()) breaks <- scrna@meta.data %>% dplyr::select(opt$idents) %>% dplyr::count(get(opt$idents)) %>% dplyr::mutate(perc = (n * 100) / sum(n)) %>% dplyr::filter(perc >= opt$percentage) %>% dplyr::select(-n, -perc) %>% distinct() %>% pull() %>% as.character() p1 <- DimPlot(scrna, reduction = opt$reduction.type, raster = FALSE) & scale_color_manual(values = palette) ggplotly(p1) -> p1_plotly p1_plotly %>% htmlwidgets::saveWidget(file = opt$htmlplot, selfcontained = T) m <- max(str_count(breaks)) w <- c(8 + (m * 0.09) * (floor(length(breaks) / 11) + 1)) p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = opt$labels, repel = TRUE, raster = FALSE) & scale_color_manual(values = palette, breaks = breaks) & theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) & guides(colour = guide_legend(ncol = 3, override.aes = list(size = 7))) (p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p1 ggsave(plot = p1, filename = opt$pdfplot, width = 7.5, height = 8) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--logfc.threshold"), type = "double", default = 0.25, help = "LogFC [default= %default]", metavar = "character" ), optparse::make_option(c("--test.use"), type = "character", default = "wilcox", help = "Test use [default= %default]", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = NULL, help = "RDS table of all markers", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name for marker analysis", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" # RNA_=paste0("RNA_snn_res.",opt$resolution) # Idents(object = scrna) <- [email protected][[RNA_]] Idents(object = scrna) <- scrna@meta.data[[opt$idents]] all_markers <- FindAllMarkers(scrna, logfc.threshold = opt$logfc.threshold, test.use = opt$test.use) saveRDS(all_markers, file = opt$output.rds) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--logfc.threshold"), type = "double", default = 0.25, help = "LogFC [default= %default]", metavar = "character" ), optparse::make_option(c("--test.use"), type = "character", default = "wilcox", help = "Test use [default= %default]", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = NULL, help = "RDS table of all markers", metavar = "character" ), optparse::make_option(c("--metadata.column"), type = "character", default = "condition", help = "Meta data column name for marker analysis", metavar = "character" ), optparse::make_option(c("--metadata"), type = "character", default = NULL, help = "Metadata filename", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) function_read_metadata <- function(opt) { file_type <- tools::file_ext(opt$metadata) possible_separators <- c(",", ";", "|", "\t") if (tolower(file_type) %in% c("csv", "tsv", "txt")) { for (sep in possible_separators) { tryCatch( { df <- read.csv(opt$metadata, sep = sep) return(df) }, error = function(e) { message(paste("Unable to read file with separator", sep)) } ) } } else if (tolower(file_type) %in% c("xls", "xlsx")) { for (sep in possible_separators) { tryCatch( { df <- openxlsx::read.xlsx(opt$metadata) return(df) }, error = function(e) { message(paste("Unable to read file with separator", sep)) } ) } } else { message("Unsupported file type") return(NULL) } message("Unable to read file with any separator") return(NULL) } scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" function_read_metadata(opt) -> df scrna@meta.data %>% dplyr::left_join(df, by = c("orig.ident" = names(df)[1])) -> scrna@meta.data # RNA_=paste0("RNA_snn_res.",opt$resolution) # Idents(object = scrna) <- [email protected][[RNA_]] Idents(object = scrna) <- scrna@meta.data[[opt$metadata.column]] combn(as.character(unique(Idents(scrna))), 2) -> pairwise results <- list() for (i in 1:ncol(pairwise)) { pair_ <- paste0(pairwise[1, i], " vs ", pairwise[2, i]) markers <- FindMarkers(scrna, ident.1 = pairwise[1, i], ident.2 = pairwise[2, i], logfc.threshold = opt$logfc.threshold, test.use = opt$test.use) results[[pair_]] <- markers pair_ <- paste0(pairwise[2, i], " vs ", pairwise[1, i]) markers <- FindMarkers(scrna, ident.1 = pairwise[2, i], ident.2 = pairwise[1, i], logfc.threshold = opt$logfc.threshold, test.use = opt$test.use) results[[pair_]] <- markers } results %>% map(~ rownames_to_column(., var = "gene")) %>% bind_rows(.id = "condition") -> all_markers saveRDS(all_markers, file = opt$output.rds) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | option_list <- list( optparse::make_option(c("--xlsx"), type = "character", default = NULL, help = "Excel table of markers", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = NULL, help = "Output RDS file name", metavar = "character" ), optparse::make_option(c("--output.go"), type = "character", default = NULL, help = "Output kegg excel file name", metavar = "character" ), optparse::make_option(c("--output.gse"), type = "character", default = NULL, help = "Output gse excel file name", metavar = "character" ), optparse::make_option(c("--mapping"), type = "character", default = "org.Hs.eg.db", help = "Mapping", metavar = "character" ), optparse::make_option(c("--pval"), type = "double", default = 0.05, help = "P value treshold [default= %default]", metavar = "character" ), optparse::make_option(c("--logfc.treshold"), type = "double", default = 1.5, help = "LogFC [default= %default]", metavar = "character" ) ) # require(clusterProfiler) require(tidyverse) try({ if (!requireNamespace(opt$mapping, quietly = TRUE)) { BiocManager::install(opt$mapping, update = TRUE) } }) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) require(opt$mapping, character.only = T) if (is.null(opt$xlsx)) { optparse::print_help(opt_parser) stop("Arguments must be supplied", call. = FALSE) } All_Features <- openxlsx::read.xlsx(opt$xlsx) function_enrichment_go_singlecell <- function(results, p = 0.05, f = 1.5) { print(results %>% distinct(cluster) %>% pull()) results %>% as.data.frame() %>% dplyr::filter(p_val_adj < p) %>% arrange(desc(avg_log2FC)) %>% dplyr::select(gene, avg_log2FC) %>% dplyr::mutate(GeneID = mapIds(get(opt$mapping), keys = gene, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")) %>% dplyr::filter(!is.na(GeneID), !is.na(avg_log2FC), !duplicated(GeneID)) %>% dplyr::select(3, 2) %>% deframe() -> geneList gene <- names(geneList)[abs(geneList) > f] tryCatch( { kk <- clusterProfiler::enrichGO( gene = gene, universe = names(geneList), OrgDb = get(opt$mapping), ont = "ALL", pAdjustMethod = "fdr", pvalueCutoff = 1, readable = TRUE ) }, error = function(e) { kk <- NULL } ) -> kk tryCatch( { kk2 <- clusterProfiler::gseGO( geneList = names(geneList), OrgDb = get(opt$mapping), ont = "ALL", minGSSize = 2, maxGSSize = 500, pvalueCutoff = 1, verbose = FALSE, eps = 0 ) }, error = function(e) { kk2 <- NULL } ) -> kk2 return(list(kk, kk2)) } All_Features %>% split(.$cluster) %>% purrr::map(~ function_enrichment_go_singlecell(., p = opt$pval, f = opt$logfc.treshold)) -> all_go_results saveRDS(all_go_results, opt$output.rds) openxlsx::write.xlsx(all_go_results %>% keep(~ !is.null(.[[1]])) %>% map(~ .[[1]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.go) openxlsx::write.xlsx(all_go_results %>% keep(~ !is.null(.[[2]])) %>% map(~ .[[2]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.gse) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | option_list <- list( optparse::make_option(c("--xlsx"), type = "character", default = NULL, help = "Excel table of markers", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = NULL, help = "Output RDS file name", metavar = "character" ), optparse::make_option(c("--output.kegg"), type = "character", default = NULL, help = "Output kegg excel file name", metavar = "character" ), optparse::make_option(c("--output.mkegg"), type = "character", default = NULL, help = "Output kegg excel file name", metavar = "character" ), optparse::make_option(c("--output.gse"), type = "character", default = NULL, help = "Output gse excel file name", metavar = "character" ), optparse::make_option(c("--output.mgse"), type = "character", default = NULL, help = "Output gse excel file name", metavar = "character" ), optparse::make_option(c("--mapping"), type = "character", default = "org.Hs.eg.db", help = "Mapping", metavar = "character" ), optparse::make_option(c("--organism"), type = "character", default = "hsa", help = "Organism code, look at https://www.genome.jp/kegg/catalog/org_list.html", metavar = "character" ), optparse::make_option(c("--pval"), type = "double", default = 0.05, help = "P value treshold [default= %default]", metavar = "character" ), optparse::make_option(c("--logfc.treshold"), type = "double", default = 1, help = "LogFC [default= %default]", metavar = "character" ) ) # require(clusterProfiler) require(tidyverse) try({ if (!requireNamespace(opt$mapping, quietly = TRUE)) { BiocManager::install(opt$mapping) } }) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) require(opt$mapping, character.only = T) if (is.null(opt$xlsx)) { optparse::print_help(opt_parser) stop("Arguments must be supplied", call. = FALSE) } All_Features <- openxlsx::read.xlsx(opt$xlsx) function_enrichment_kegg_singlecell <- function(results, p = 0.05, f = 1.5) { print(results %>% distinct(cluster) %>% dplyr::pull()) results %>% as.data.frame() %>% dplyr::filter(p_val_adj < p) %>% arrange(desc(avg_log2FC)) %>% dplyr::select(gene, avg_log2FC) %>% dplyr::mutate(GeneID = mapIds(get(opt$mapping), keys = gene, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")) %>% dplyr::filter(!is.na(GeneID), !is.na(avg_log2FC), !duplicated(GeneID)) %>% dplyr::select(3, 2) %>% deframe() -> geneList gene <- names(geneList)[abs(geneList) > f] # as recommended by the documentation tryCatch( { kk1 <- clusterProfiler::enrichKEGG( gene = gene, organism = opt$organism, pAdjustMethod = "fdr", minGSSize = 2, pvalueCutoff = 1 ) }, error = function(e) { kk1 <- NULL } ) -> kk1 tryCatch( { kk2 <- clusterProfiler::gseKEGG( geneList = geneList, organism = opt$organism, pvalueCutoff = 1, pAdjustMethod = "fdr", minGSSize = 2, eps = 0, verbose = FALSE ) }, error = function(e) { kk2 <- NULL } ) -> kk2 tryCatch( { kk3 <- clusterProfiler::enrichMKEGG( gene = gene, organism = opt$organism, pvalueCutoff = 1, minGSSize = 2, pAdjustMethod = "fdr", ) }, error = function(e) { kk3 <- NULL } ) -> kk3 tryCatch( { kk4 <- clusterProfiler::gseMKEGG( geneList = geneList, organism = opt$organism, minGSSize = 2, keyType = "kegg", pAdjustMethod = "fdr", pvalueCutoff = 1 ) }, error = function(e) { kk4 <- NULL } ) -> kk4 return(list(kk1, kk2, kk3, kk4)) } All_Features %>% split(.$cluster) %>% purrr::map(~ function_enrichment_kegg_singlecell(., p = opt$pval, f = opt$logfc.treshold)) -> all_kegg_results saveRDS(all_kegg_results, opt$output.rds) openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[1]])) %>% map(~ .[[1]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.kegg) openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[2]])) %>% map(~ .[[2]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.gse) openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[3]])) %>% map(~ .[[3]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.mkegg) openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[4]])) %>% map(~ .[[4]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.mgse) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import scanpy as sc, anndata as ad import sys import anndata import pandas as pd import numpy as np import scipy.sparse def taxonomy_compare(A, B): ''' A helper function to compare the level of two entries in the kraken hierarchy. Returns 1 if A is higher in the hierarchy, 0 if A and B are equal, -1 if B is higher. Input ----- A, B : ``str`` Two strings taken from column four of hierarchy.txt, capturing the taxonomy levels of the records. ''' #the order of the taxonomy levels order = 'RDKPCOFGS' #get the levels of A and B in this hierarchy levA = order.find(A[0]) levB = order.find(B[0]) if levA < levB: #A is higher in the hierarchy return 1 elif levA > levB: #B is higher in the hierarchy return -1 else: #A and B are at the same level, so need more detailed investigation #need to turn to positions [1:], which capture the more detailed sub-hierarchy #(and are regular integers, if present) if A[1:] < B[1:]: #A is higher in the hierarchy return 1 elif A[1:] > B[1:]: #B is higher in the hierarchy return -1 else: #they're the same return 0 def identify_level(hierarchy, target): ''' A helper function that identifies each record's corresponding entry of the appropriate level, for the entire kraken hierarchy file. Input as in collapse_taxonomy(). Outputs a dictionary with the record ID as the key and the appropriate hierarchy level as the value, with entries higher in the hierarchy than the desired target level having "" as the value. ''' #load the file hierarchy = pd.read_table(hierarchy, comment='#', header=None) #we start off with no value tax_value = '' translation = {0:''} for level, tax_id, tax_name in zip(hierarchy.iloc[:,3], hierarchy.iloc[:,4], hierarchy.iloc[:,5]): #need to strip off all the left side space from the name tax_name = tax_name.lstrip() #check how our encountered level compares to the target level comp = taxonomy_compare(level, target) if comp == 1: #our record is higher in the hierarchy than the target level #so it can't have a value of the desired level assigned tax_value = '' elif comp == 0: #our record is of the exact correct hierarchy level #so it's the new value for anything lower in the hierarchy tax_value = tax_name #if comp == -1, then it's lower and the current tax_value should be usedd #one way or another, can store the value now translation[tax_id] = tax_value #and once we go across the whole thing, return the dictionary return translation def collapse_taxonomy(adata, hierarchy, target): ''' Collapse the single cell kraken pipeline counts based on taxonomy hierarchy, with all information more specific than the desired depth summed up. Returns an AnnData with the variable space being the summed counts at the specified point in the taxonomy hierarchy. Input ----- adata : ``AnnData`` Single cell kraken pipeline output, with the kraken taxonomy IDs in ``adata.var['gene_ids']``. hierarchy : filename Path to the ``hierarchy.txt`` file provided as part of the output of the kraken pipeline. target : ``str`` The desired hierarchy level to sum up to. Accepted values are ``["domain", "kingdom", "phylum", "class", "order", "family", "genus", "species"]`` or any value of the fourth column of ``hierarchy``. ''' #if the user provides the target as a full name, extract and capitalise the first letter if len(target)>2: target = target[0].upper() #obtain the translation of the taxonomy IDs to the appropriate target level translation = identify_level(hierarchy, target) #use this translation to get the target level values for the IDs in the object levels = np.array([translation[i] for i in adata.var['gene_ids']]) #prepare the output var names and collapse the count matrix accordingly #(remove '' as it's of no interest) var_names = np.unique(levels) var_names = var_names[var_names != ''] #this goes better on CSC holder = adata.X.tocsc() c = scipy.sparse.csr_matrix(np.hstack([np.sum(holder[:,levels==n],1) for n in var_names])) #create output object bdata = anndata.AnnData(c, obs=adata.obs) bdata.var_names = var_names return bdata adata=sc.read_10x_mtx(sys.argv[1]) #kraken2 pipeline output hierarchy=sys.argv[2] #hierarchy file taxa=sys.argv[3] #collapse to what, for example genus output=sys.argv[4] #output h5ad file adata=collapse_taxonomy(adata,hierarchy,taxa) adata.write_h5ad(output) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | option_list <- list( optparse::make_option(c("--min.cells"), type = "integer", default = 1, help = "Min cells [default= %default]", metavar = "integer" ), optparse::make_option(c("--min.features"), type = "integer", default = 3, help = "Min features [default= %default]", metavar = "character" ), optparse::make_option(c("--data.dir"), type = "character", default = NULL, help = "Data directory", metavar = "character" ), optparse::make_option(c("--h5seurat"), type = "character", default = NULL, help = "Data directory", metavar = "character" ), optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = "output.rds", help = "Output RDS file name [default= %default]", metavar = "character" ), optparse::make_option(c("--output.plot"), type = "character", default = "output.pdf", help = "Output barplot file name [default= %default]", metavar = "character" ), optparse::make_option(c("--taxa"), type = "character", default = "genus", help = "Taxonomic level", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$h5seurat)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (data.dir and sampleid)", call. = FALSE) } require(optparse) require(tidyverse) require(Seurat) require(SeuratDisk) tryCatch( { source("workflow/scripts/scrna-functions.R") }, error = function(cond) { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) } ) # nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell. # CreateSeuratObject(scrna.data,min.cells = 1,min.features = 5) -> scrna tryCatch( { CreateSeuratObject(LoadH5Seurat(opt$h5seurat)[["RNA"]]@counts, min.cells = opt$min.cells, min.features = opt$min.features) -> scrna head(scrna) scrna <- RenameCells(object = scrna, add.cell.id = make.names(opt$sampleid)) # add cell.id to cell name scrna[["RNA"]]@counts %>% as.matrix() %>% t() %>% as.data.frame() %>% select(-starts_with("Homo")) -> df }, error = function(e) { df <- NULL } ) -> df pdf(opt$output.plot, width = 6, height = 9) try({ df %>% rownames_to_column("barcode") %>% gather(group, umi, -barcode) %>% group_by(group) %>% summarise(sum = log2(sum(umi))) %>% arrange(desc(sum)) %>% slice_max(n = 50, order_by = sum) %>% ggplot(aes(reorder(group, sum), sum)) + geom_col() + coord_flip() + ggthemes::theme_few() + ylab("Total log2-Expression (Microbiome)") + xlab(opt$taxa) + ggtitle(opt$sampleid) + theme(axis.title = element_text(size = 12)) -> p1 print(p1) }) dev.off() saveRDS(df, file = opt$output.rds) |
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--xlsx"), type = "character", default = NULL, help = "Excel table of markers for input", metavar = "character" ), optparse::make_option(c("--output.plot"), type = "character", default = "output.pdf", help = "Output plot directory", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name for marker analysis", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" Idents(object = scrna) <- scrna@meta.data[[opt$idents]] n <- length(Idents(scrna) %>% unique()) Positive_Features <- openxlsx::read.xlsx(opt$xlsx) Positive_Features %>% group_by(cluster) %>% slice_max(n = 5, order_by = avg_log2FC) -> df df %>% distinct(gene) %>% pull() -> not.all.genes scrna <- ScaleData(scrna, features = not.all.genes) DoHeatmap(object = scrna, features = not.all.genes, label = F) & theme(axis.text.y = element_text(size = 5)) & scale_fill_continuous(type = "viridis") -> p1 ggsave(opt$output.plot, p1, height = 4 + (n * 0.2), width = 5 + (n * 0.05), useDingbats = TRUE) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--top_n"), type = "integer", default = 20, help = "How many features to plot per cluster [default= %default]", metavar = "integer" ), optparse::make_option(c("--xlsx"), type = "character", default = NULL, help = "Excel table of markers for input", metavar = "character" ), optparse::make_option(c("--output.plot.dir"), type = "character", default = NULL, help = "Output plot directory", metavar = "character" ), optparse::make_option(c("--reduction.type"), type = "character", default = "umap", help = "Reduction type, umap or tsne", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name for marker analysis", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(viridis) require(tidyverse) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" Idents(object = scrna) <- scrna@meta.data[[opt$idents]] n <- length(Idents(scrna) %>% unique()) palette <- function_color_palette(n) Positive_Features <- openxlsx::read.xlsx(opt$xlsx) %>% group_by(cluster) %>% slice_min(order_by = p_val_adj, n = opt$top_n) %>% select(cluster, gene) for (d in (Positive_Features %>% distinct(cluster) %>% pull())) { dir.create(paste0(opt$output.plot.dir, "/cluster", d, "/"), recursive = TRUE) } options(warn = -1) suppressMessages(for (i in 1:nrow(Positive_Features)) { gene <- Positive_Features[i, ]$gene cluster <- Positive_Features[i, ]$cluster p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = gene, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 12)) p2 <- DotPlot(scrna, features = gene) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 12)) & theme(legend.position = "right") p3 <- VlnPlot(scrna, features = gene) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 12)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level") suppressWarnings(((p1 | p2) / p3) -> wp) ggsave(paste0(opt$output.plot.dir, "/cluster", cluster, "/", gene, ".pdf"), wp, height = 5 + (n * 0.15), width = 6 + (n * 0.15), useDingbats = TRUE) }) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "RDS file of marker data frame", metavar = "character" ), optparse::make_option(c("--output.xlsx.positive"), type = "character", default = NULL, help = "Excel table of positive markers", metavar = "character" ), optparse::make_option(c("--output.xlsx.all"), type = "character", default = NULL, help = "Excel table of all markers", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) all_markers <- readRDS(file = opt$rds) openxlsx::write.xlsx(all_markers, file = opt$output.xlsx.all) openxlsx::write.xlsx(all_markers %>% filter(avg_log2FC > 0), file = opt$output.xlsx.positive) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name for marker analysis", metavar = "character" ), optparse::make_option(c("--ccplot"), type = "character", default = "ccplot.pdf", help = "Cell cluster count plot", metavar = "character" ), optparse::make_option(c("--ccbarplot"), type = "character", default = "ccbarplot.pdf", help = "Cell cluster count plot", metavar = "character" ), optparse::make_option(c("--htmlplot"), type = "character", default = "htmlplot.pdf", help = "Cell cluster html plot", metavar = "character" ), optparse::make_option(c("--xlsx"), type = "character", default = "metrics.xlsx", help = "Metrics table output", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE) } require(plotly) require(ggpubr) require(Seurat) require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) scrna <- readRDS(file = opt$rds) scrna@meta.data %>% dplyr::add_count(orig.ident) %>% dplyr::mutate(total_clusters = length(unique(get(opt$idents)))) %>% distinct(orig.ident, n, total_clusters) %>% dplyr::select("Sample Name" = orig.ident, "Total Cells" = n, "Total Clusters" = total_clusters) %>% ggtexttable(rows = NULL, theme = ttheme("light")) -> p1 id <- length(unique(scrna@meta.data$orig.ident)) ggsave(opt$ccplot, p1, height = 7 + (id * 0.2)) scrna@meta.data %>% dplyr::group_by(orig.ident, get(opt$idents)) %>% dplyr::count() %>% dplyr::select("Sample Name" = 1, "Cluster" = 2, "Total Cells" = 3) -> df df %>% openxlsx::write.xlsx(opt$xlsx) write_tsv(df, file = opt$xlsx %>% str_replace(".xlsx", ".tsv")) df %>% ggplot(aes(x = Cluster, y = `Total Cells`, fill = `Sample Name`)) + geom_col() + ggthemes::theme_hc() + theme(legend.title = element_blank()) + guides(colour = guide_legend(ncol = 3, override.aes = list(size = 7))) -> p2 n <- length(unique(scrna@meta.data[opt$idents])) ggsave(opt$ccbarplot, p2, height = 5.2 + (id * 0.09), width = 6 + (n * 0.23)) ggplotly(p2) -> p1_plotly p1_plotly %>% htmlwidgets::saveWidget(file = opt$htmlplot, selfcontained = T) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--microbiome.rds"), type = "character", default = NULL, help = "Microbiome rds file", metavar = "character" ), optparse::make_option(c("--reduction.type"), type = "character", default = "umap", help = "Reduction type, umap or tsne", metavar = "character" ), optparse::make_option(c("--dimplot"), type = "character", default = "micdimplot.pdf", help = "Plot file name", metavar = "character" ), optparse::make_option(c("--tplot"), type = "character", default = "tplot.pdf", help = "Total microbiome dimplot file name", metavar = "character" ), optparse::make_option(c("--taxa"), type = "character", default = "genus", help = "Taxonomic level", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(patchwork) require(Seurat) # require(randomcoloR) require(viridis) require(tidyverse) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" microbiome <- readRDS(file = opt$microbiome.rds) tryCatch( { AddMetaData(scrna, microbiome %>% rownames_to_column("barcodes") %>% gather(taxa, umi, -barcodes) %>% dplyr::group_by(taxa) %>% dplyr::mutate(sum = sum(umi, na.rm = T)) %>% ungroup() %>% dplyr::mutate(taxa = ifelse(sum >= min(sort(unique(sum), decreasing = T)[1:11], na.rm = T), paste0(opt$taxa, "_", taxa), paste0(opt$taxa, "_", "others"))) %>% dplyr::select(-sum) %>% dplyr::group_by(barcodes, taxa) %>% dplyr::summarise(sum = sum(umi, na.rm = T)) %>% ungroup() %>% spread(taxa, sum) %>% column_to_rownames("barcodes")) -> scrna }, error = function(e) { print(e) print("No microbiome data") scrna@meta.data <- scrna@meta.data %>% dplyr::mutate(!!paste0(opt$taxa, "_nodetected") := 0) return(scrna) } ) -> scrna head(scrna) # p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = TRUE) & theme_cellsnake_classic() & scale_color_manual(values = palette) scrna@meta.data %>% dplyr::mutate(`Total log2-Expression (Microbiome)` = log2(rowSums(across(starts_with(opt$taxa))) + 1)) %>% dplyr::mutate(across(contains("Microbiome"), ~ replace(., .x == 0, NA))) %>% dplyr::mutate(across(contains("genus"), ~ replace(., .x == 0, NA))) -> scrna@meta.data # scrna %>% # dplyr::select(barcodes = .cell, orig.ident, contains(opt$taxa), starts_with(opt$reduction.type)) %>% # gather(taxa, umi, starts_with(opt$taxa)) %>% # dplyr::select(barcodes, orig.ident, x = 3, y = 4, taxa, umi) %>% # replace(is.na(.), 0) %>% # ggplot(aes(x = x, y = y, color = log2(umi + 1))) + # geom_point(size = 0.2) + # labs(color = "Log2-UMI") + # theme(axis.text = element_text(size = 12)) + # scale_color_viridis(option = "magma", direction = -1, alpha = 0.8, na.value = "white") + # ggthemes::theme_few() + # facet_wrap(~taxa) -> p1 # ggsave(plot = p1, filename = opt$dimplot, width = 13, height = 9) plotting_taxas <- scrna@meta.data %>% dplyr::select(starts_with(opt$taxa)) %>% select( where( ~ !all(is.na(.x)) ) ) %>% colnames() %>% unique() print(plotting_taxas) pdf(opt$dimplot, width = 7, height = 7) for (i in plotting_taxas) { try({ FeaturePlot(scrna, features = i, pt.size = 0.1, reduction = opt$reduction.type, raster = FALSE) & scale_color_continuous(type = "viridis", na.value = "gray96") -> p1 p1 <- (p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") print(p1) }) } dev.off() pdf(opt$tplot, width = 7.5, height = 8) try({ FeaturePlot(scrna, features = "Total log2-Expression (Microbiome)", pt.size = 0.1, reduction = opt$reduction.type, raster = FALSE) & scale_color_continuous(type = "viridis", na.value = "gray96") -> p2 p2 <- (p2 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") print(p2) }) dev.off() |
4
of
scripts/scrna-microbiome-dimplot.R
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--microbiome.rds"), type = "character", default = NULL, help = "Microbiome rds file", metavar = "character" ), optparse::make_option(c("--sigplot"), type = "character", default = "sigplot.pdf", help = "Plot file name", metavar = "character" ), optparse::make_option(c("--sigtable"), type = "character", default = NULL, help = "Excel table of positive markers", metavar = "character" ), optparse::make_option(c("--taxa"), type = "character", default = "genus", help = "Taxonomic level", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) # require(randomcoloR) require(tidyseurat) require(viridis) require(tidyverse) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" microbiome <- readRDS(file = opt$microbiome.rds) # p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = TRUE) & theme_cellsnake_classic() & scale_color_manual(values = palette) tryCatch( { AddMetaData(scrna, microbiome %>% rownames_to_column("barcodes") %>% gather(taxa, umi, -barcodes) %>% dplyr::group_by(taxa) %>% dplyr::mutate(sum = sum(umi, na.rm = T)) %>% ungroup() %>% dplyr::mutate(taxa = paste0(opt$taxa, "_", taxa)) %>% dplyr::select(-sum) %>% dplyr::group_by(barcodes, taxa) %>% dplyr::summarise(sum = sum(umi, na.rm = T)) %>% ungroup() %>% spread(taxa, sum) %>% column_to_rownames("barcodes")) -> scrna scrna %>% dplyr::select(orig.ident, one_of(opt$idents), starts_with(opt$taxa)) %>% gather(taxa, umi, starts_with(opt$taxa)) %>% group_by(across(opt$idents), orig.ident, taxa) %>% dplyr::mutate(total = sum(umi, na.rm = T)) %>% # group_by(taxa, orig.ident, across(opt$idents)) %>% dplyr::mutate(cell = n()) %>% dplyr::ungroup() %>% distinct(orig.ident, across(opt$idents), taxa, total, cell) %>% group_by(taxa) %>% dplyr::mutate(v3 = sum(total) - total, v4 = sum(cell) - cell) %>% rowwise() %>% # dplyr::mutate(p = fisher.test(matrix(c(total, cell, v3, v4), ncol = 2), alternative = "greater")$p.value) %>% ungroup() %>% # dplyr::mutate(p = p.adjust(p)) %>% dplyr::mutate(`Taxa reads in this group` = total, `Cells in this group` = cell, `Total reads for this taxa` = v3 + total, `Total cells` = v4 + cell) %>% dplyr::filter(`Taxa reads in this group` > 0) %>% dplyr::select(-total, -cell, -v3, -v4) %>% arrange(desc(`Taxa reads in this group`)) -> df }, error = function(e) { print(e) df <- data.frame(orig.ident = character(), `Taxa reads in this group` = numeric(), `Cells in this group` = numeric(), `Total reads for this taxa` = numeric(), `Total cells` = numeric()) } ) -> df # [email protected] %>% # dplyr::select(starts_with(opt$idents)) %>% # pull() %>% # unique() %>% # length() -> n head(df) openxlsx::write.xlsx(df, file = opt$sigtable) write_tsv(df, file = opt$sigtable %>% str_replace(".xlsx", ".tsv")) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--pplot"), type = "character", default = NULL, help = "Partition plot", metavar = "character" ), optparse::make_option(c("--output.dir"), type = "character", default = NULL, help = "Output plot directory", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (!requireNamespace("SeuratWrappers", quietly = TRUE)) { remotes::install_github("satijalab/seurat-wrappers", upgrade = "never") } if (!requireNamespace("monocle3", quietly = TRUE)) { remotes::install_version("igraph", "1.4.3") remotes::install_github("cole-trapnell-lab/monocle3", upgrade = "never") } require(monocle3) require(Seurat) require(SeuratWrappers) require(tidyverse) require(patchwork) require(magrittr) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } scrna <- readRDS(file = opt$rds) cds <- as.cell_data_set(scrna) tryCatch( { cds <- cluster_cells(cds) }, error = function(e) { cds <- cluster_cells(cds, cluster_method = "louvain") } ) -> cds p1 <- plot_cells(cds, color_cells_by = "singler", show_trajectory_graph = FALSE) p2 <- plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE) wrap_plots(p1, p2) ggsave(opt$pplot, width = 11, height = 5.5) as.Seurat(cds, assay = NULL) -> scrna scrna@meta.data %>% dplyr::count(monocle3_partitions) %>% dplyr::mutate(perc = (n * 100) / sum(n)) %>% dplyr::filter(perc >= 5, n > 200) %>% pull(monocle3_partitions) %>% as.vector() -> partitions print(partitions) for (i in partitions) { integrated.sub <- subset(scrna, monocle3_partitions == i) cds2 <- as.cell_data_set(integrated.sub) if (i != "1") { cds2 <- cluster_cells(cds2) } cds2 <- learn_graph(cds2) p1 <- plot_cells(cds2, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE) # max.avp <- which.max(unlist(FetchData(integrated.sub, "AVP"))) # max.avp <- colnames(integrated.sub)[max.avp] # cds2 <- order_cells(cds2, root_cells = max.avp) # p2 <- plot_cells(cds2, color_cells_by = "pseudotime", label_cell_groups = FALSE, label_leaves = FALSE,label_branch_points = FALSE) # wrap_plots(p1, p2) ggsave(paste0(opt$output.dir, "/plot_monocle-partition-", i, ".pdf"), plot = p1, width = 6, height = 5.5) } |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | option_list <- list( optparse::make_option(c("--scale.factor"), type = "integer", default = 10000, help = "Scale factor [default= %default]", metavar = "character" ), optparse::make_option(c("--nfeatures"), type = "integer", default = 2000, help = "Highly variable features [default= %default]", metavar = "integer" ), optparse::make_option(c("--variable.selection.method"), type = "character", default = "vst", help = "Find variable features selection method [default= %default]", metavar = "character" ), optparse::make_option(c("--rds"), type = "character", default = NULL, help = "A RAW rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--normalization.method"), type = "character", default = "LogNormalize", help = "Normalization method[default= %default]", metavar = "character" ), optparse::make_option(c("--doublet.filter"), action = "store_true", default = FALSE), optparse::make_option(c("--integration"), action = "store_true", default = FALSE), optparse::make_option(c("--umap"), action = "store_true", default = FALSE), optparse::make_option(c("--tsne"), action = "store_true", default = FALSE), optparse::make_option(c("--resolution"), type = "character", default = "0.8", help = "Resolution [default= %default]", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = "output.rds", help = "Output RDS file name [default= %default]", metavar = "character" ), optparse::make_option(c("--cpu"), type = "integer", default = 5, help = "Number of CPU for parallel run [default= %default]", metavar = "character" ), optparse::make_option(c("--reference"), type = "character", default = "HumanPrimaryCellAtlasData", help = "SingleR reference for annotation", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(optparse) require(SingleR) require(celldex) require(tidyverse) require(Seurat) require(patchwork) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) scrna <- readRDS(file = opt$rds) if (isFALSE(opt$integration)) { scrna <- NormalizeData(scrna, normalization.method = opt$normalization.method, scale.factor = opt$scale.factor) scrna <- FindVariableFeatures(scrna, selection.method = opt$variable.selection.method, nfeatures = opt$nfeatures) } else { try({ DefaultAssay(scrna) <- "integrated" }) # for now only for Seurat, Harmony will come } # all.genes <- rownames(scrna) memory requirements can be large if using all genes not.all.genes <- VariableFeatures(scrna) # only variable features scrna <- ScaleData(scrna, features = not.all.genes) scrna <- RunPCA(scrna, features = not.all.genes) dimensionReduction <- function_pca_dimensions(scrna) scrna <- FindNeighbors(scrna, dims = 1:dimensionReduction) if (!opt$resolution %in% c("auto", "AUTO", "Auto")) { scrna <- FindClusters(scrna, resolution = as.numeric(opt$resolution)) } else { if (!requireNamespace("MultiKParallel", quietly = TRUE)) { remotes::install_github("sinanugur/MultiKParallel", upgrade = "never") } require(MultiKParallel) scrna_tmp <- FindClusters(scrna, resolution = seq(0.2, 2.5, 0.15)) multik <- MultiKParallel(scrna_tmp, reps = 10, seed = 255, resolution = seq(0.2, 2.5, 0.15), numCores = opt$cpu, nPC = dimensionReduction) multik$k %>% tibble::as_tibble() %>% count(value) %>% filter(n == max(n)) %>% slice(1) %>% pull(value) -> K scrna_tmp[[]] %>% as.data.frame() %>% select(starts_with("RNA")) %>% rownames_to_column("barcode") %>% mutate(across(where(is.factor), as.character)) %>% as_tibble() %>% gather(res, clu, contains("RNA")) %>% distinct(res, clu) %>% count(res) %>% mutate(res = str_remove(res, "RNA_snn_res.")) %>% mutate(diff = abs(n - K)) %>% slice_min(order_by = diff) %>% pull(res) %>% as.numeric() -> optimal_resolution scrna <- FindClusters(scrna, resolution = optimal_resolution) } if (opt$umap) { scrna <- RunUMAP(scrna, dims = 1:dimensionReduction) } if (opt$tsne) { scrna <- RunTSNE(scrna, dims = 1:dimensionReduction, check_duplicates = FALSE) } if (opt$doublet.filter) { if (!requireNamespace("DoubletFinder", quietly = TRUE)) { remotes::install_github("chris-mcginnis-ucsf/DoubletFinder", upgrade = "never") } require(DoubletFinder) homotypic.prop <- modelHomotypic(scrna$seurat_clusters) nExp_poi <- round(0.075 * nrow(scrna@meta.data)) ## Assuming 7.5% doublet formation rate - tailor for your dataset nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop)) ## Run DoubletFinder with varying classification stringencies ---------------------------------------------------------------- scrna <- doubletFinder_v3(scrna, PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE) scrna <- doubletFinder_v3(scrna, PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = paste0("pANN_0.25_0.09_", nExp_poi), sct = FALSE) scrna@meta.data %>% tibble::rownames_to_column("barcodes") %>% select(barcodes, starts_with("DF")) %>% select(barcodes, DoubletFinder = 2) -> doublet_df scrna@meta.data <- scrna@meta.data %>% select(!starts_with("DF")) %>% select(!starts_with("pANN")) %>% tibble::rownames_to_column("barcodes") %>% dplyr::left_join(doublet_df, by = "barcodes") %>% tibble::column_to_rownames("barcodes") subset(scrna, subset = DoubletFinder == "Singlet") -> scrna # scrna$DoubletFinder <- NULL } # RNA_=paste0("RNA_snn_res.",opt$resolution) # metrics <- table([email protected][["seurat_clusters"]], [email protected]$orig.ident) # p1 <- DimPlot(scrna, reduction = "pca", label = TRUE, label.size = 10) # celltype annotation with SingleR ref <- get(opt$reference)() DefaultAssay(scrna) <- "RNA" smObjSCE <- as.SingleCellExperiment(scrna) pred <- SingleR(test = smObjSCE, ref = ref, labels = ref$label.fine) AddMetaData(scrna, pred["pruned.labels"] %>% as.data.frame() %>% dplyr::select(singler = pruned.labels)) -> scrna attr(scrna, "SingleRref") <- opt$reference try({ DefaultAssay(scrna) <- "integrated" }) # output files saveRDS(scrna, file = opt$output.rds) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--tsv"), type = "character", default = NULL, help = "A text file contains the gene list", metavar = "character" ), optparse::make_option(c("--gene"), type = "character", default = NULL, help = "A list of genes to create plots", metavar = "character" ), optparse::make_option(c("--output.plot.dir"), type = "character", default = NULL, help = "Output plot directory", metavar = "character" ), optparse::make_option(c("--reduction.type"), type = "character", default = "umap", help = "Reduction type, umap or tsne", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) require(viridis) markers <- c() if (!is.null(opt$tsv)) { markers <- read_tsv(opt$tsv, col_names = FALSE) %>% pull() } scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" # RNA_=paste0("RNA_snn_res.",opt$resolution) Idents(object = scrna) <- scrna@meta.data[[opt$idents]] if (!is.null(opt$gene)) { markers <- c(unlist(strsplit(opt$gene, " ")), markers) } # identification=opt$idents # FetchData(scrna,c(identification,"UMAP_1","UMAP_2","TSNE_1","TSNE_2",markers)) %>% rownames_to_column("barcodes") %>% gather(gene,expr,5:last_col()) %>% group_by(gene,across(identification)) %>% dplyr::rename(clusters:=identification) %>% ungroup() -> plot_df # print(head(plot_df)) dir.create(opt$output.plot.dir, recursive = TRUE) suppressMessages(for (i in markers) { n <- length(Idents(scrna) %>% unique()) palette <- function_color_palette(n) tryCatch( { p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = i, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 10)) p2 <- DotPlot(scrna, features = i) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 10)) & theme(legend.position = "right") p3 <- VlnPlot(scrna, features = i) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 10)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level") # p1 <- plot_df %>% filter(gene == i) %>% ggplot(aes(x=UMAP_1,y=UMAP_2,color=expr)) + geom_point(size=0.3) + theme_cellsnake_classic() + scale_color_continuous(type = "viridis") + labs(color="Expression") + theme(axis.text = element_text(size=12)) # p2 <- plot_df %>% group_by(gene,clusters) %>% summarise(percent=100*(length(expr[expr>0])/n()),average=mean(expr)) %>% filter(gene == i) %>% ggplot(aes(x=gene,y=clusters,size=percent,color=average)) + geom_point() + theme_cellsnake_classic() + scale_color_continuous(type = "viridis") + labs(color="Average Expression",size="Percent Expressed")+ ylab("Identity") + theme(axis.title.x = element_blank(),axis.text = element_text(size=12)) + theme(legend.position = "right") # p3 <- plot_df %>% ungroup() %>% filter(gene == i) %>% ggplot(aes(x=clusters,y=log10(expr),fill=clusters)) + geom_violin() + theme_cellsnake_classic() + geom_jitter(size=0.3,shape=20, position=position_jitter(0.2)) + scale_fill_manual(values = palette) + theme(legend.position = "right",axis.text = element_text(size = 12)) + labs(fill="") + xlab("Identity") + ylab("Log10-Expression Level") suppressWarnings(((p1 | p2) / p3) -> wp) }, error = function(cond) { wp <- plot.new() }, finally = {} ) -> wp ggsave(paste0(opt$output.plot.dir, "/", i, ".pdf"), wp, height = 5 + (n * 0.15), width = 7 + (n * 0.15), useDingbats = TRUE) }) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | option_list <- list( optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("-r", "--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = "output.rds", help = "Output RDS file name [default= %default]", metavar = "character" ), optparse::make_option(c("--cca.dims"), type = "integer", default = 30, help = "Which dimensions to use from the CCA to specify the neighbor search space 1 to [default= %default]", metavar = "character" ), optparse::make_option(c("--reduction"), type = "character", default = "cca", help = "Integration reduction type [default= %default]", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds) || is.null(opt$sampleid)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE) } require(tidyverse) require(Seurat) require(patchwork) try( { source("workflow/scripts/scrna-functions.R") }, silent = TRUE ) try( { source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R")) }, silent = TRUE ) files <- unlist(strsplit(opt$rds, " ")) print(files) for (i in files) { if (!exists("scrna_list")) { scrna_list <- list(readRDS(file = i)) } else { scrna_list <- append(scrna_list, readRDS(file = i)) } } scrna_anchors <- FindIntegrationAnchors(object.list = scrna_list, dims = 1:opt$cca.dims, reduction = opt$reduction) scrna <- IntegrateData(anchorset = scrna_anchors, dims = 1:opt$cca.dims) saveRDS(scrna, file = opt$output.rds) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--output"), type = "character", default = "pred.rds", help = "Output prediction file", metavar = "character" ), optparse::make_option(c("--reference"), type = "character", default = "HumanPrimaryCellAtlasData", help = "SingleR reference", metavar = "character" ), optparse::make_option(c("--granulation"), type = "character", default = "label.main", help = "SingleR granulation level", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE) } require(optparse) require(SingleR) require(SingleCellExperiment) require(Seurat) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" # celltype annotation with SingleR ref <- get(opt$reference)() smObjSCE <- as.SingleCellExperiment(scrna) pred <- SingleR(test = smObjSCE, ref = ref, labels = ref[[opt$granulation]]) saveRDS(pred, file = opt$output) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "A list of RDS files of Seurat objects", metavar = "character" ), optparse::make_option(c("--sheplot"), type = "character", default = "sheplot.pdf", help = "Output score heatmap plot file name", metavar = "character" ), optparse::make_option(c("--sheplottop"), type = "character", default = "sheplot.pdf", help = "Output score heatmap plot file name, top 20", metavar = "character" ), optparse::make_option(c("--pheplot"), type = "character", default = "pheplot.pdf", help = "Output heatmap plot file name", metavar = "character" ), optparse::make_option(c("--idents"), type = "character", default = "seurat_clusters", help = "Meta data column name", metavar = "character" ), optparse::make_option(c("--csv"), type = "character", default = NULL, help = "A meta data table", metavar = "character" ), optparse::make_option(c("--prediction"), type = "character", default = "pred.rds", help = "Input prediction file", metavar = "character" ), optparse::make_option(c("--xlsx"), type = "character", default = "predictions.xlsx", help = "Input prediction file", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE) } require(optparse) require(SingleR) # require(celldex) require(pheatmap) require(Seurat) require(tidyverse) scrna <- readRDS(file = opt$rds) DefaultAssay(scrna) <- "RNA" # celltype annotation with SingleR pred <- readRDS(opt$prediction) plotScoreHeatmap(pred) -> p1 ggsave(plot = p1, filename = opt$sheplot, width = 15, height = 8) plotScoreHeatmap(pred, show.labels = F, max.labels = 20) -> p1 ggsave(plot = p1, filename = opt$sheplottop, width = 7, height = 4) tab <- table(Assigned = pred$pruned.labels, Cluster = scrna@meta.data[[opt$idents]]) pheatmap(log2(tab + 10), color = colorRampPalette(c("white", "blue"))(101)) -> p1 n1 <- length(unique(scrna$seurat_clusters)) n2 <- length(unique(pred$pruned.labels)) ggsave(plot = p1, filename = opt$pheplot, width = 6 + (n1 * 0.10), height = 4 + (n2 * 0.10)) tab %>% openxlsx::write.xlsx(file = opt$xlsx, row.names = F) |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--keywords"), type = "character", default = NULL, help = "Keywords to slice object", metavar = "character" ), optparse::make_option(c("--column"), type = "character", default = NULL, help = "Meta data column name for selecting the slice", metavar = "character" ), optparse::make_option(c("--output.rds"), type = "character", default = "output.rds", help = "Output RDS file name [default= %default]", metavar = "character" ), optparse::make_option(c("--metadata"), type = "character", default = NULL, help = "Metadata filename", metavar = "character" ), optparse::make_option(c("--exact"), action = "store_true", default = FALSE, help = "Exact match, otherwise pattern match" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(Seurat) require(tidyverse) scrna <- readRDS(file = opt$rds) function_read_metadata <- function(opt) { file_type <- tools::file_ext(opt$metadata) possible_separators <- c(",", ";", "|", "\t") if (tolower(file_type) %in% c("csv", "tsv", "txt")) { for (sep in possible_separators) { tryCatch( { df <- read.csv(opt$metadata, sep = sep) return(df) }, error = function(e) { message(paste("Unable to read file with separator", sep)) } ) } } else if (tolower(file_type) %in% c("xls", "xlsx")) { for (sep in possible_separators) { tryCatch( { df <- openxlsx::read.xlsx(opt$metadata) return(df) }, error = function(e) { message(paste("Unable to read file with separator", sep)) } ) } } else { message("Unsupported file type") return(NULL) } message("Unable to read file with any separator") return(NULL) } if (!is.null(opt$metadata)) { function_read_metadata(opt) -> df if (is.null(df)) { message("No metadata file provided, skipping metadata merge") } else { scrna@meta.data %>% dplyr::left_join(df, by = c("orig.ident" = names(df)[1])) -> scrna@meta.data } } function_subset_by_idents <- function(scrna, opt) { if (!is.null(opt$keywords)) { keywords <- strsplit(opt$keywords, ",")[[1]] patterns <- paste0("(?i)", "(", paste(keywords, collapse = "|"), ")") scrna@meta.data %>% { if (!opt$exact) { if (is.null(opt$column)) dplyr::filter(., if_any(everything(), str_detect, pattern = patterns)) else dplyr::filter(., if_any(opt$column, str_detect, pattern = patterns)) } else { dplyr::filter(., get(opt$column) %in% keywords) } } %>% rownames() -> cells scrna <- subset(scrna, cells = cells) return(scrna) } } function_subset_by_idents(scrna, opt) -> scrna head(scrna) saveRDS(scrna, file = opt$output.rds) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "Processed rds file of a Seurat object", metavar = "character" ), optparse::make_option(c("--sampleid"), type = "character", default = NULL, help = "Sample ID", metavar = "character" ), optparse::make_option(c("--fplot"), type = "character", default = NULL, help = "nFeature plot", metavar = "character" ), optparse::make_option(c("--cplot"), type = "character", default = NULL, help = "nCount plot", metavar = "character" ), optparse::make_option(c("--mtplot"), type = "character", default = NULL, help = "Percent MT plot", metavar = "character" ), optparse::make_option(c("--rpplot"), type = "character", default = NULL, help = "Ribo plot", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE) } require(Seurat) require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) scrna <- readRDS(file = opt$rds) FeaturePlot(scrna, features = "nFeature_RNA", pt.size = 0.1, raster = FALSE) ggsave(opt$fplot, width = 7, height = 5) FeaturePlot(scrna, features = "nCount_RNA", pt.size = 0.1, raster = FALSE) ggsave(opt$cplot, width = 7, height = 5) FeaturePlot(scrna, features = "percent.mt", pt.size = 0.1, raster = FALSE) ggsave(opt$mtplot, width = 7, height = 5) FeaturePlot(scrna, features = "percent.rp", pt.size = 0.1, raster = FALSE) ggsave(opt$rpplot, width = 7, height = 5) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | option_list <- list( optparse::make_option(c("--xlsx"), type = "character", default = NULL, help = "Excel table of markers for input", metavar = "character" ), optparse::make_option(c("--output.plot"), type = "character", default = "output.pdf", help = "Output plot directory", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) # try({source("workflow/scripts/scrna-functions.R")},silent=TRUE) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))},silent=TRUE) if (is.null(opt$xlsx)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(tidyverse) Positive_Features <- openxlsx::read.xlsx(opt$xlsx) Positive_Features %>% group_by(cluster) %>% slice_max(n = 20, order_by = avg_log2FC) -> df Positive_Features %>% distinct(cluster) %>% pull() -> clusters pdf(opt$output.plot, width = 6, height = 6) for (i in clusters) { df %>% dplyr::filter(cluster %in% i) -> df2 maxFC <- (df2 %>% pull(avg_log2FC) %>% max()) + 1 try({ df2 %>% dplyr::mutate(n = dense_rank(dplyr::desc(avg_log2FC))) %>% ggplot(aes(x = n, y = avg_log2FC, label = gene)) + geom_text(angle = 75, size = 4) + ggthemes::theme_few() + ylim(c(0, maxFC)) + coord_cartesian(clip = "off", expand = TRUE) + ggtitle("Top markers", subtitle = paste(i, "vs all")) -> p1 print(p1) }) } dev.off() |
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | option_list <- list( optparse::make_option(c("--rds"), type = "character", default = NULL, help = "RDS file of marker data frame", metavar = "character" ), optparse::make_option(c("--vplot"), type = "character", default = "vplot.pdf", help = "Output volcano plot name", metavar = "character" ) ) opt_parser <- optparse::OptionParser(option_list = option_list) opt <- optparse::parse_args(opt_parser) if (is.null(opt$rds)) { optparse::print_help(opt_parser) stop("At least one argument must be supplied (rds file)", call. = FALSE) } require(EnhancedVolcano) require(tidyverse) # try({source("workflow/scripts/scrna-functions.R")}) # try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))}) all_markers <- readRDS(file = opt$rds) pdf(opt$vplot, width = 7, height = 7) for (i in unique(all_markers$condition)) { markers <- all_markers %>% filter(condition == i) print(EnhancedVolcano(markers, x = "avg_log2FC", y = "p_val_adj", lab = markers %>% pull(gene), FCcutoff = 0.75, subtitle = i)) } dev.off() |
Support
- Future updates