BioWorkflows: scRNA Seurat workflow

shell:
    """
    {cellsnake_path}workflow/scripts/scrna-seurat-integration.R --rds "{input}" --sampleid {integration_id} --output.rds {output} --reduction {reduction} --dims {dims}
    """

SnakeMake From line 6 of rules/integration.smk

run:
    shell("""
        rm -r {output.outdir}/counts;
        {cellsnake_path}workflow/mg2sc/src/scMeG-kraken.py --input {input.bam} --outdir {output.outdir} --DBpath {kraken_db_folder} --threads {threads} --minimum-hit-groups {min_hit_groups} --confidence {confidence} --prefix {wildcards.sample}
        """)
    #if kraken_extra_files is False:
    #    shell("rm -r {output.kr} {output.classified}")

SnakeMake From line 23 of rules/microbiome.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-kraken2-collapse.py {input.outdir}/counts {input.hierarchy} {wildcards.taxa} {output}"

SnakeMake From line 39 of rules/microbiome.smk

shell:
    """Rscript -e 'SeuratDisk::Convert("{input}",dest = "h5seurat", overwrite = TRUE)'"""

SnakeMake From line 47 of rules/microbiome.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-kraken2-data-parser.R --h5seurat {input} --output.rds {output.microbiome_rds} --output.plot {output.plot} --sampleid {wildcards.sample} --taxa {wildcards.taxa} --min.features {microbiome_min_features} --min.cells {microbiome_min_cells}"

SnakeMake From line 56 of rules/microbiome.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-microbiome-dimplot.R --rds {input.rds} --microbiome.rds {input.microbiome_rds} --dimplot {output.dimplot} --tplot {output.tplot} --reduction.type {wildcards.reduction} --taxa {wildcards.taxa}"

SnakeMake From line 67 of rules/microbiome.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-microbiome-sigplot.R --rds {input.rds} --microbiome.rds {input.microbiome_rds} --taxa {wildcards.taxa}  --sigtable {output.xlsx} --idents {wildcards.i}"

SnakeMake From line 79 of rules/microbiome.smk

shell:
    """{cellsnake_path}workflow/scripts/scrna-combine-microbiome-rds.R --rds "{input}" --sampleid {integration_id} --output.rds {output}"""

SnakeMake From line 89 of rules/microbiome.smk

run:
    if is_integrated_sample is True:
        shell("cp {input.raw} {output.rds}")
    else:
        shell("{cellsnake_path}workflow/scripts/scrna-read-qc.R --data.dir {input.raw} --output.rds {output.rds} --sampleid {wildcards.sample} --percent.rp {percent_rp} --percent.mt {wildcards.percent_mt} --min.features {min_features} --max.features {max_features} --max.molecules {max_molecules}  --min.cells {min_cells} --before.violin.plot {output.before} --after.violin.plot {output.after} {params.mt_param}")

SnakeMake From line 42 of rules/seurat.smk

run:
    if is_integrated_sample is True:
        shell("{cellsnake_path}workflow/scripts/scrna-clusteringtree.R --rds {input} --clplot {output.clustree} --integration")
    else:
        shell("{cellsnake_path}workflow/scripts/scrna-clusteringtree.R --rds {input} --scale.factor {scale_factor} --nfeature {highly_variable_features} --variable.selection.method {variable_selection_method} --normalization.method {normalization_method} --clplot {output.clustree} --heplot {output.heatmap} --hvfplot {output.hvfplot} --jeplot {output.jackandelbow}")

SnakeMake From line 58 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-normalization-pca.R --rds {input} {params.doublet_filter} --normalization.method {normalization_method} --cpu {threads} "
    "--scale.factor {scale_factor} --reference {singler_ref} --variable.selection.method {variable_selection_method} --nfeature {highly_variable_features} --resolution {params.paramaters[resolution]} "
    "--output.rds {output.rds} {umap_plot} {tsne_plot} {params.integration}"

SnakeMake From line 80 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-metrics.R --rds {input.rds} --ccplot {output.ccplot} --ccbarplot {output.ccbarplot} --html {output.html} --idents {wildcards.i} --xlsx {output.xlsx}"

SnakeMake From line 94 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-technicals.R --rds {input.rds} --fplot {output.fplot} --cplot {output.cplot} --mtplot {output.mtplot} --rpplot {output.rpplot}"

SnakeMake From line 106 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-dimplot.R --rds {input} --reduction.type {wildcards.reduction} --pdfplot {output.pl} --htmlplot {output.html} --idents {wildcards.i} --percentage {min_percentage_to_plot} {show_labels}"

SnakeMake From line 120 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-dimplot.R --rds {input.rds} --reduction.type {wildcards.reduction} --pdfplot {output.pl} --htmlplot {output.html} --csv {input.prediction} --idents singler --percentage {min_percentage_to_plot}  {show_labels}"

SnakeMake From line 135 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-find-markers.R --rds {input} --idents {wildcards.i} --logfc.threshold {logfc_threshold} --test.use {test_use}  --output.rds {output.rds}"

SnakeMake From line 147 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-marker-tables.R --rds {input} --output.xlsx.positive {output.positive} --output.xlsx.all {output.allmarkers}"

SnakeMake From line 159 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-top-marker-plot.R --xlsx {input} --output.plot {output}" 

SnakeMake From line 168 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-marker-heatmap.R --rds {input.rds} --xlsx {input.excel} --output.plot {output} --idents {wildcards.i}" 

SnakeMake From line 177 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-marker-plots.R --rds {input.rds} --xlsx {input.excel} --top_n {marker_plots_per_cluster_n} --output.plot.dir {output} --reduction.type {wildcards.reduction}"

SnakeMake From line 189 of rules/seurat.smk

shell:
    """{cellsnake_path}workflow/scripts/scrna-selected-marker-plots.R --rds {input} --gene "{params.gene}" --output.plot.dir {params.sdir} --reduction.type {wildcards.reduction} --idents {wildcards.i}"""

SnakeMake From line 203 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-convert-to-h5ad.R --rds {input.rds} --output {output}"

SnakeMake From line 214 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-singler-annotation.R --rds {input} --output {output.prediction} --reference {singler_ref} --granulation {singler_granulation}"

SnakeMake From line 222 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-singler-plots.R --rds {input.rds} --prediction {input.prediction} --sheplot {output.sheplot} --pheplot {output.pheplot} --sheplottop {output.sheplottop} --xlsx {output.xlsx} --idents {wildcards.i}"

SnakeMake From line 236 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-celltypist.py {input} {output.dotplot} {output.outputdir} {output.xlsx} {celltypist_model} {wildcards.i}"

SnakeMake From line 250 of rules/seurat.smk

shell:
    """
    {cellsnake_path}workflow/scripts/scrna-celltypist.R --rds {input.rds} --csv {input.csv} --output.tsne.plot {output.tsne} --output.umap.plot {output.umap} --percentage {min_percentage_to_plot} {show_labels}
    """

SnakeMake From line 260 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-kegg.R --xlsx {input} --output.rds {output.rds} --mapping {mapping} --organism {organism} --output.kegg {output.kegg} --output.mkegg {output.mkegg}  --output.gse {output.gse} --output.mgse {output.mgse}"

SnakeMake From line 275 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-go_analysis.R --xlsx {input} --output.rds {output.rds} --mapping {mapping} --output.go {output.go} --output.gse {output.gse}"

SnakeMake From line 285 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-find-pairwise-markers.R --rds {input.rds} --logfc.threshold {logfc_threshold} --test.use {test_use} --output.rds {output.rds} --metadata {input.metadata} --metadata.column {wildcards.i}"

SnakeMake From line 295 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-marker-tables.R --rds {input} --output.xlsx.positive {output.positive} --output.xlsx.all {output.allmarkers}"

SnakeMake From line 306 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-volcano.R --rds {input.rds} --vplot {output.pdf}"

SnakeMake From line 314 of rules/seurat.smk

run:
    if wildcards.i == "majority_voting":
        shell("{cellsnake_path}workflow/scripts/scrna-gsea.R --idents {wildcards.i} --rds {input.rds} --gseafile {input.gseafile} --output.xlsx {output.xlsx}  --csv {input.csv}")
    else:
        shell("{cellsnake_path}workflow/scripts/scrna-gsea.R --idents {wildcards.i} --rds {input.rds} --gseafile {input.gseafile} --output.xlsx {output.xlsx}")

SnakeMake From line 327 of rules/seurat.smk

run:
    if wildcards.i == "majority_voting":
        shell("{cellsnake_path}workflow/scripts/scrna-cellchat.R --rds {input.rds} --species {species} --idents {wildcards.i} --output {output.cellchatrds} --csv {input.csv}")
    else:
        shell("{cellsnake_path}workflow/scripts/scrna-cellchat.R --rds {input.rds} --species {species} --idents {wildcards.i} --output {output.cellchatrds}")

SnakeMake From line 341 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-cellchat_plots.R --rds {input.cellchatrds} --output.dir {output.outputdir}"

SnakeMake From line 352 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-monocle3.R --rds {input.rds} --output.dir {params.outputdir} --pplot {output.pplot}"

SnakeMake From line 362 of rules/seurat.smk

shell:
    "{cellsnake_path}workflow/scripts/scrna-subset_final_rds.R --rds {input.rds} --output.rds {output.rds} --keywords {keywords} --column {subset_column} --metadata {metadata} {exact}"

SnakeMake From line 6 of rules/subset.smk

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Cellchat object", metavar = "character"
  ),
  optparse::make_option(c("--output.dir"),
    type = "character", default = NULL,
    help = "Output directory", metavar = "character"
  )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(tidyverse)
require(CellChat)
require(ggalluvial)

cellchat <- readRDS(file = opt$rds)




technicalDetailsPathStep2ResolutionCellChat <- ""
pathToPlotsStep2ResolutionCellChat <- paste0(opt$output.dir, "/")
dir.create(pathToPlotsStep2ResolutionCellChat, showWarnings = F)



groupSize <- as.numeric(table(cellchat@idents))
par(mfrow = c(1, 2), xpd = TRUE)
fName <- "Number-of-interactions"
pdf(paste(pathToPlotsStep2ResolutionCellChat, fName, ".pdf", sep = ""), 10, 10)
print(netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = T, label.edge = F, title.name = "Number of interactions"))
dev.off()

fName <- "Interaction-strength"
pdf(paste(pathToPlotsStep2ResolutionCellChat, fName, ".pdf", sep = ""), 10, 10)
print(netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = T, label.edge = F, title.name = "Interaction weights/strength"))
dev.off()

pathToPlotsStep2ResolutionCellChatClusters <- paste0(pathToPlotsStep2ResolutionCellChat, "1-clusters", "/")
dir.create(pathToPlotsStep2ResolutionCellChatClusters, showWarnings = F)


mat <- cellchat@net$weight
par(mfrow = c(3, 4), xpd = TRUE)
for (i in 1:nrow(mat)) {
  mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
  mat2[i, ] <- mat[i, ]
  fName <- stringr::str_replace_all(paste0("edge-weights-for-", rownames(mat)[i]), "/", "_")
  pdf(paste(pathToPlotsStep2ResolutionCellChatClusters, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_circle(mat2, vertex.weight = groupSize, weight.scale = T, edge.weight.max = max(mat), title.name = rownames(mat)[i]))
  dev.off()
}

pathToPlotsStep2ResolutionCellChatSignPath <- paste0(pathToPlotsStep2ResolutionCellChat, "2-significant-pathways", "/")
dir.create(pathToPlotsStep2ResolutionCellChatSignPath, showWarnings = F)

# All the signaling pathways showing significant communications
allPathways <- cellchat@netP$pathways
for (pathways.show in allPathways)
{
  pathToPlotsStep2ResolutionCellChatSignPathCicrcle <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "circle-plots", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatSignPathCicrcle, showWarnings = F)

  # Hierarchy plot
  # Here we define `vertex.receive` so that the left portion of the hierarchy plot shows signaling to fibroblast and the right portion shows signaling to immune cells
  vertex.receiver <- seq(1, 4) # a numeric vector.
  netVisual_aggregate(cellchat, signaling = pathways.show, vertex.receiver = vertex.receiver)
  # Circle plot
  par(mfrow = c(1, 1))
  fName <- pathways.show
  pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathCicrcle, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_aggregate(cellchat, signaling = pathways.show, layout = "circle"))
  dev.off()

  pathToPlotsStep2ResolutionCellChatSignPathchord <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "chord-plots", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatSignPathchord, showWarnings = F)
  # Chord diagram
  par(mfrow = c(1, 1))
  pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathchord, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord"))
  dev.off()

  pathToPlotsStep2ResolutionCellChatSignPathHeatmap <- paste0(pathToPlotsStep2ResolutionCellChatSignPath, "heatmap-plots", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatSignPathHeatmap, showWarnings = F)
  # Heatmap
  par(mfrow = c(1, 1))

  pdf(paste(pathToPlotsStep2ResolutionCellChatSignPathHeatmap, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds"))
  dev.off()
}


#> Do heatmap based on a single object


# Chord diagram
# group.cellType <- c(rep("FIB", 4), rep("DC", 4), rep("TC", 4)) # grouping cell clusters into fibroblast, DC and TC cells
# names(group.cellType) <- levels(cellchat@idents)
# netVisual_chord_cell(cellchat, signaling = pathways.show, group = group.cellType, title.name = paste0(pathways.show, " signaling network"))
#> Plot the aggregated cell-cell communication network at the signaling pathway level


pathToPlotsStep2ResolutionCellChatContributionOfLRpairs <- paste0(pathToPlotsStep2ResolutionCellChat, "3-ligand-receptor-pairs", "/")
dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, showWarnings = F)
fName <- "all-pathways-contribution"
tryCatch(
  {
    pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, fName, ".pdf", sep = ""), 5, 20)
    print(netAnalysis_contribution(cellchat, signaling = allPathways))
    dev.off()
  },
  error = function(e) {
    dev.off()
  }
)

# Compute the contribution of each ligand-receptor pair to the overall signaling pathway and visualize cell-cell communication mediated by a single ligand-receptor pair
for (pathways.show in allPathways)
{
  pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle <- paste0(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, "circle", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle, showWarnings = F)

  pairLR.CXCL <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE)
  LR.show <- pairLR.CXCL[1, ] # show one ligand-receptor pair
  # Hierarchy plot
  vertex.receiver <- seq(1, 4) # a numeric vector
  fName <- pathways.show
  pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsCircle, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "circle"))
  dev.off()

  pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord <- paste0(pathToPlotsStep2ResolutionCellChatContributionOfLRpairs, "chord", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord, showWarnings = F)

  #> [[1]]
  # Chord diagram
  pdf(paste(pathToPlotsStep2ResolutionCellChatContributionOfLRpairsChord, fName, ".pdf", sep = ""), 10, 10)
  print(netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "chord"))
  dev.off()
}

pathToPlotsStep2ResolutionCellChatCellCellC <- paste0(pathToPlotsStep2ResolutionCellChat, "4-cell-cell-communication", "/")
dir.create(pathToPlotsStep2ResolutionCellChatCellCellC, showWarnings = F)

# show all the significant interactions (L-R pairs) from some cell groups (defined by 'sources.use') to other cell groups (defined by 'targets.use')
fName <- "significant-interactions-for-cell-groups"
pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellC, fName, ".pdf", sep = ""), 50, 20)
print(netVisual_bubble(cellchat, remove.isolate = FALSE))
dev.off()
# netVisual_bubble(cellchat, sources.use = 4, targets.use = c(5:11), remove.isolate = FALSE)


# show all the significant interactions (L-R pairs) associated with certain signaling pathways
fName <- "significant-interactoions-for-singaling-pathways"
pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellC, fName, ".pdf", sep = ""), 50, 20)
print(netVisual_bubble(cellchat, signaling = allPathways, remove.isolate = FALSE))
dev.off()

# show all the significant interactions (L-R pairs) based on user's input (defined by `pairLR.use`)
# pairLR.use <- extractEnrichedLR(cellchat, signaling = c("CCL","CXCL","FGF"))
# netVisual_bubble(cellchat, sources.use = c(3,4), targets.use = c(5:8), pairLR.use = pairLR.use, remove.isolate = TRUE)
#> Comparing communications on a single object
#>

pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions <- paste0(pathToPlotsStep2ResolutionCellChatCellCellC, "interactions", "/")
dir.create(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions, showWarnings = F)

# show all the significant interactions (L-R pairs) from some cell groups (defined by 'sources.use') to other cell groups (defined by 'targets.use')
# show all the interactions sending from Inflam.FIB

if (FALSE) { # commented out because it takes a lot of time
  for (g1 in unique(cellchat@meta$identity))
  {
    for (g2 in unique(cellchat@meta$identity))
    {
      tryCatch(
        {
          fName <- paste0("from-", g1, "-to-", g2)
          pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractions, fName, ".pdf", sep = ""), 50, 20)
          print(netVisual_chord_gene(cellchat, sources.use = g1, targets.use = g2, lab.cex = 0.5, legend.pos.y = 30))
          dev.off()
        },
        error = function(cond) {
          dev.off()
        },
        finally = {}
      )
    }
  }

  pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP <- paste0(pathToPlotsStep2ResolutionCellChatCellCellC, "interactions-with-pathways", "/")
  dir.create(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP, showWarnings = F)

  for (g1 in unique(cellchat@meta$identity))
  {
    for (g2 in unique(cellchat@meta$identity))
    {
      for (pathways.show in allPathways)
      {
        tryCatch(
          {
            fName <- paste0("from-", g1, "-to-", g2, "-pathway-", pathways.show)
            pdf(paste(pathToPlotsStep2ResolutionCellChatCellCellCAllInteractionsWithP, fName, ".pdf", sep = ""), 50, 20)
            print(netVisual_chord_gene(cellchat, sources.use = g1, targets.use = g2, signaling = pathways.show, lab.cex = 0.5, legend.pos.y = 30))
            dev.off()
          },
          error = function(cond) {
            dev.off()
          },
          finally = {}
        )
      }
    }
  }
}

pathToPlotsStep2ResolutionGE <- paste0(pathToPlotsStep2ResolutionCellChat, "5-gene-expression", "/")
dir.create(pathToPlotsStep2ResolutionGE, showWarnings = F)

for (pathways.show in allPathways)
{
  fName <- paste0("gene-expression", "-pathway-", pathways.show)
  pdf(paste(pathToPlotsStep2ResolutionGE, fName, ".pdf", sep = ""), 20, 10)
  print(plotGeneExpression(cellchat, signaling = pathways.show))
  dev.off()
}

pathToPlotsStep2ResolutionCC <- paste0(pathToPlotsStep2ResolutionCellChat, "6-system-analysis-of-cc-communication-network", "/")
dir.create(pathToPlotsStep2ResolutionCC, showWarnings = F)

pathToPlotsStep2ResolutionCCNCS <- paste0(pathToPlotsStep2ResolutionCC, "network-centrality-scores/")
dir.create(pathToPlotsStep2ResolutionCCNCS, showWarnings = F)
# Compute the network centrality scores
cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP") # the slot 'netP' means the inferred intercellular communication network of signaling pathways
# Visualize the computed centrality scores using heatmap, allowing ready identification of major signaling roles of cell groups
for (pathways.show in allPathways)
{
  fName <- pathways.show
  pdf(paste(pathToPlotsStep2ResolutionCCNCS, fName, ".pdf", sep = ""), 5, 5)
  print(netAnalysis_signalingRole_network(cellchat, signaling = pathways.show, font.size = 10))
  dev.off()
}

pathToPlotsStep2ResolutionCCDSR <- paste0(pathToPlotsStep2ResolutionCC, "dominant-senders-and-receivers/")
dir.create(pathToPlotsStep2ResolutionCCDSR, showWarnings = F)

# Visualize the dominant senders (sources) and receivers (targets) in a 2D space

# We also provide another intutive way to visualize the dominant senders (sources) and receivers (targets) in a 2D space using scatter plot.

# Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
fName <- "aggregated-results"
pdf(paste(pathToPlotsStep2ResolutionCC, fName, ".pdf", sep = ""), 10, 10)
print(netAnalysis_signalingRole_scatter(cellchat))
dev.off()

pathToPlotsStep2ResolutionCCDSRSP <- paste0(pathToPlotsStep2ResolutionCCDSR, "scatter/")
dir.create(pathToPlotsStep2ResolutionCCDSRSP, showWarnings = F)

pathToPlotsStep2ResolutionCCDH <- paste0(pathToPlotsStep2ResolutionCCDSR, "heatmap/")
dir.create(pathToPlotsStep2ResolutionCCDH, showWarnings = F)

for (pathways.show in allPathways)
{
  #> Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
  # Signaling role analysis on the cell-cell communication networks of interest

  fName <- pathways.show

  tryCatch(
    {
      pdf(paste(pathToPlotsStep2ResolutionCCDSRSP, fName, ".pdf", sep = ""), 10, 20)
      print(netAnalysis_signalingRole_scatter(cellchat, signaling = pathways.show))
      dev.off()
    },
    error = function(cond) {
      dev.off()
    },
    finally = {}
  )

  tryCatch(
    {
      # Signaling role analysis on the cell-cell communication networks of interest
      pdf(paste(pathToPlotsStep2ResolutionCCDH, fName, ".pdf", sep = ""), 10, 20)
      print(netAnalysis_signalingRole_heatmap(cellchat, signaling = pathways.show))
      dev.off()
    },
    error = function(cond) {
      dev.off()
    },
    finally = {}
  )
}

# Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
ht1 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing")
ht2 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming")
fName <- "aggregated-results-incoming-outgoing"
pdf(paste(pathToPlotsStep2ResolutionCC, fName, ".pdf", sep = ""), 10, 5)
print(ht1 + ht2)
dev.off()



# Identify global communication patterns to explore how multiple cell types and signaling pathways coordinate together

pathToPlotsStep2ResolutionOCPWSC <- paste0(pathToPlotsStep2ResolutionCellChat, "7-communication-pattern-of-secreting-cells/")
dir.create(pathToPlotsStep2ResolutionOCPWSC, showWarnings = F)

# write("In outgoing: You look at Cophenetic and Silhouette values and when they begin to drop suddenly then you look at the number (one before dropping), opposite with icoming pattern", paste0(pathToPlotsStep2ResolutionOCPWSC, "instruction.txt"))

library(NMF)
library(ggalluvial)

pathToPlotsStep2ResolutionOCPWSCOut <- paste0(pathToPlotsStep2ResolutionOCPWSC, "outgoing/")
dir.create(pathToPlotsStep2ResolutionOCPWSCOut, showWarnings = F)

out <- selectK(cellchat, pattern = "outgoing")

fName <- "outgoing-communication-pattern"
pdf(paste(pathToPlotsStep2ResolutionOCPWSCOut, fName, ".pdf", sep = ""), 10, 5)
print(out)
dev.off()

pathToPlotsStep2ResolutionOCPWSCHeatmap <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "heatmap/")
dir.create(pathToPlotsStep2ResolutionOCPWSCHeatmap, showWarnings = F)

pathToPlotsStep2ResolutionOCPWSCRiver <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "river/")
dir.create(pathToPlotsStep2ResolutionOCPWSCRiver, showWarnings = F)

pathToPlotsStep2ResolutionOCPWSCDot <- paste0(pathToPlotsStep2ResolutionOCPWSCOut, "dot/")
dir.create(pathToPlotsStep2ResolutionOCPWSCDot, showWarnings = F)


for (nPatterns in c(1:10))
{
  tryCatch(
    {
      fName <- paste0("pattern-", nPatterns)
      cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns)
      pdf(paste(pathToPlotsStep2ResolutionOCPWSCHeatmap, fName, ".pdf", sep = ""), 10, 10)
      print(identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns))
      dev.off()

      pdf(paste(pathToPlotsStep2ResolutionOCPWSCRiver, fName, ".pdf", sep = ""), 10, 10)
      print(netAnalysis_river(cellchat, pattern = "outgoing"))
      dev.off()

      pdf(paste(pathToPlotsStep2ResolutionOCPWSCDot, fName, ".pdf", sep = ""), 10, 10)
      print(netAnalysis_dot(cellchat, pattern = "outgoing"))
      dev.off()
    },
    error = function(cond) {},
    finally = {}
  )
}




pathToPlotsStep2ResolutionOCPWSCIn <- paste0(pathToPlotsStep2ResolutionOCPWSC, "incoming/")
dir.create(pathToPlotsStep2ResolutionOCPWSCIn, showWarnings = F)

out <- selectK(cellchat, pattern = "incoming")

fName <- "incoming-communication-pattern"
pdf(paste(pathToPlotsStep2ResolutionOCPWSCIn, fName, ".pdf", sep = ""), 10, 10)
print(out)
dev.off()

pathToPlotsStep2ResolutionOCPWSCHeatmap <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "heatmap/")
dir.create(pathToPlotsStep2ResolutionOCPWSCHeatmap, showWarnings = F)


pathToPlotsStep2ResolutionOCPWSCRiver <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "river/")
dir.create(pathToPlotsStep2ResolutionOCPWSCRiver, showWarnings = F)

pathToPlotsStep2ResolutionOCPWSCDot <- paste0(pathToPlotsStep2ResolutionOCPWSCIn, "dot/")
dir.create(pathToPlotsStep2ResolutionOCPWSCDot, showWarnings = F)


for (nPatterns in 1:10)
{
  tryCatch(
    {
      fName <- paste0("pattern-", nPatterns)
      cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns)
      pdf(paste(pathToPlotsStep2ResolutionOCPWSCHeatmap, fName, ".pdf", sep = ""), 10, 10)
      print(identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns))
      dev.off()

      pdf(paste(pathToPlotsStep2ResolutionOCPWSCRiver, fName, ".pdf", sep = ""), 10, 10)
      print(netAnalysis_river(cellchat, pattern = "incoming"))
      dev.off()

      pdf(paste(pathToPlotsStep2ResolutionOCPWSCDot, fName, ".pdf", sep = ""), 10, 10)
      print(netAnalysis_dot(cellchat, pattern = "incoming"))
      dev.off()
    },
    error = function(cond) {},
    finally = {}
  )
}

R tidyverse Seurat ggalluvial NMF From line 4 of scripts/scrna-cellchat_plots.R

import sys
import scanpy as sc
import pandas as pd
import celltypist

data = sc.read_h5ad(sys.argv[1])
dotplot_file_name=sys.argv[2]
output_analyses_directory=sys.argv[3]
output_xlsx=sys.argv[4]
model=sys.argv[5]
idents= 'seurat_clusters' if sys.argv[6] is None else sys.argv[6]

predictions = celltypist.annotate(data, model = model, majority_voting = True)

adata = predictions.to_adata()
#'seurat_clusters'
dotplot=celltypist.dotplot(predictions, use_as_reference = idents, use_as_prediction = 'majority_voting', show=False,return_fig=True)

dotplot.savefig(dotplot_file_name)

predictions.to_table(output_analyses_directory)

table = pd.crosstab(adata.obs.seurat_clusters, adata.obs.majority_voting)
table.to_excel(output_xlsx)

Python Pandas celltypist From line 3 of scripts/scrna-celltypist.py

option_list <- list(
      optparse::make_option(c("--sampleid"),
            type = "character", default = NULL,
            help = "Sample ID", metavar = "character"
      ),
      optparse::make_option(c("--rds"),
            type = "character", default = NULL,
            help = "A list of RDS files of Seurat objects", metavar = "character"
      ),
      optparse::make_option(c("--csv"),
            type = "character", default = NULL,
            help = "Celltypist prediction file", metavar = "character"
      ),
      optparse::make_option(c("--output.tsne.plot"),
            type = "character", default = NULL,
            help = "Output tsne file name", metavar = "character"
      ),
      optparse::make_option(c("--output.umap.plot"),
            type = "character", default = NULL,
            help = "Output umap file name", metavar = "character"
      ),
      optparse::make_option(c("--percentage"),
            type = "double", default = 5,
            help = "Cluster mimnimum percentage to plot", metavar = "double"
      ), optparse::make_option(c("--labels"), action = "store_true", default = FALSE, help = "Print labels on the plot")
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds) || is.null(opt$csv)) {
      optparse::print_help(opt_parser)
      stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE)
}

require(patchwork)
require(tidyverse)
require(Seurat)


tryCatch(
      {
            source("workflow/scripts/scrna-functions.R")
      },
      error = function(cond) {
            source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
      }
)


scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"

celltypist <- read.csv(
      opt$csv,
      row.names = 1
)

scrna@meta.data <- scrna@meta.data %>%
      tibble::rownames_to_column("barcodes") %>%
      dplyr::left_join(celltypist %>% as.data.frame() %>% rownames_to_column("barcodes"), by = "barcodes") %>%
      tibble::column_to_rownames("barcodes")


n <- length(scrna@meta.data %>% pull(majority_voting) %>% unique())

palette <- function_color_palette(n)
palette <- function_color_palette(n)
palette <- setNames(palette, scrna@meta.data %>% pull(majority_voting) %>% unique())

breaks <- scrna@meta.data %>%
      dplyr::select(majority_voting) %>%
      dplyr::count(majority_voting) %>%
      dplyr::mutate(perc = (n * 100) / sum(n)) %>%
      dplyr::filter(perc >= opt$percentage) %>%
      dplyr::select(-n, -perc) %>%
      distinct() %>%
      pull() %>%
      as.character()

p1 <- DimPlot(scrna, reduction = "tsne", label = opt$labels, group.by = "majority_voting", repel = TRUE, raster = FALSE) &
      scale_color_manual(values = palette, breaks = breaks) & theme(plot.title = element_blank()) &
      theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) &
      guides(colour = guide_legend(ncol = 2, override.aes = list(size = 7)))
p2 <- DimPlot(scrna, reduction = "umap", label = opt$labels, group.by = "majority_voting", repel = TRUE, raster = FALSE) &
      scale_color_manual(values = palette, breaks = breaks) & theme(plot.title = element_blank()) &
      theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) &
      guides(colour = guide_legend(ncol = 2, override.aes = list(size = 7)))


m <- max(str_count(breaks))

w <- c(7.5 + (m * 0.08) * (floor(length(breaks) / 11) + 1))

(p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p1
(p2 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p2


ggsave(plot = p1, filename = opt$output.tsne.plot, width = 7.5, height = 8)
ggsave(plot = p2, filename = opt$output.umap.plot, width = 7.5, height = 8)

R tidyverse Seurat patcHwork From line 3 of scripts/scrna-celltypist.R

option_list <- list(
    optparse::make_option(c("--sampleid"),
        type = "character", default = NULL,
        help = "Sample ID", metavar = "character"
    ),
    optparse::make_option(c("-r", "--rds"),
        type = "character", default = NULL,
        help = "A list of RDS files of Seurat objects", metavar = "character"
    ),
    optparse::make_option(c("--output.rds"),
        type = "character", default = "output.rds",
        help = "Output RDS file name [default= %default]", metavar = "character"
    )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds) || is.null(opt$sampleid)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE)
}

require(tidyverse)
require(Seurat)
require(patchwork)


files <- unlist(strsplit(opt$rds, " "))
print(files)
for (i in files) {
    if (!exists("scrna")) {
        scrna <- readRDS(file = i)
    } else {
        scrna <- bind_rows(scrna, readRDS(file = i))
    }
}


saveRDS(scrna, file = opt$output.rds)

R tidyverse Seurat patcHwork From line 3 of scripts/scrna-combine-microbiome-rds.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "A list of RDS files of Seurat objects", metavar = "character"
  ),
  optparse::make_option(c("--output"),
    type = "character", default = "output.h5ad",
    help = "Output h5ad file name", metavar = "character"
  )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE)
}

if (!requireNamespace("SeuratDisk", quietly = TRUE)) {
  remotes::install_github("mojaveazure/seurat-disk", upgrade = "never")
}
require(Seurat)
require(SeuratDisk)
require(tidyverse)


scrna <- readRDS(file = opt$rds)

UpdateSeuratObject(scrna) -> scrna
DefaultAssay(scrna) <- "RNA"

DietSeurat(scrna) -> scrna

scrna@meta.data %>% dplyr::mutate(dplyr::across(where(is.factor), as.character)) -> scrna@meta.data


output_file_name <- str_remove_all(opt$output, ".h5ad$")

SaveH5Seurat(scrna, filename = paste0(output_file_name, ".h5Seurat"), overwrite = TRUE)
SeuratDisk::Convert(paste0(output_file_name, ".h5Seurat"), dest = "h5ad", overwrite = TRUE)

R tidyverse Seurat SeuratDisk From line 3 of scripts/scrna-convert-to-h5ad.R

option_list <- list(
      optparse::make_option(c("--rds"),
            type = "character", default = NULL,
            help = "Processed rds file of a Seurat object", metavar = "character"
      ),
      optparse::make_option(c("--reduction.type"),
            type = "character", default = "umap",
            help = "Reduction type, umap or tsne", metavar = "character"
      ),
      optparse::make_option(c("--pdfplot"),
            type = "character", default = "reduction.pdf",
            help = "Plot file name", metavar = "character"
      ),
      optparse::make_option(c("--htmlplot"),
            type = "character", default = "reduction.html",
            help = "Plot file name", metavar = "character"
      ),
      optparse::make_option(c("--csv"),
            type = "character", default = NULL,
            help = "CSV meta file or this can be a singler RDS file", metavar = "character"
      ),
      optparse::make_option(c("--idents"),
            type = "character", default = "seurat_clusters",
            help = "Meta data column name for marker analysis", metavar = "character"
      ),
      optparse::make_option(c("--percentage"),
            type = "double", default = 5,
            help = "Cluster minimum percentage to plot", metavar = "double"
      ),
      optparse::make_option(c("--labels"), action = "store_true", default = FALSE, help = "Print labels on the plot")
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
      optparse::print_help(opt_parser)
      stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(patchwork)
require(plotly)
require(Seurat)
require(tidyverse)

tryCatch(
      {
            source("workflow/scripts/scrna-functions.R")
      },
      error = function(cond) {
            source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
      }
)



scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"


if (!is.null(opt$csv)) {
      try({
            metadata <- read.csv(
                  opt$csv,
                  row.names = 1
            )

            scrna@meta.data <- scrna@meta.data %>%
                  tibble::rownames_to_column("barcodes") %>%
                  dplyr::left_join(metadata %>% as.data.frame() %>% rownames_to_column("barcodes"), by = "barcodes") %>%
                  tibble::column_to_rownames("barcodes")
      })
      try({
            pred <- readRDS(opt$csv)
            AddMetaData(scrna, pred["pruned.labels"] %>% as.data.frame() %>% dplyr::select(singler = pruned.labels)) -> scrna
      })
}





###################### dimplot######################
Idents(object = scrna) <- scrna@meta.data[[opt$idents]]

n <- length(Idents(scrna) %>% unique())

palette <- function_color_palette(n)
palette <- setNames(palette, Idents(scrna) %>% unique())

breaks <- scrna@meta.data %>%
      dplyr::select(opt$idents) %>%
      dplyr::count(get(opt$idents)) %>%
      dplyr::mutate(perc = (n * 100) / sum(n)) %>%
      dplyr::filter(perc >= opt$percentage) %>%
      dplyr::select(-n, -perc) %>%
      distinct() %>%
      pull() %>%
      as.character()


p1 <- DimPlot(scrna, reduction = opt$reduction.type, raster = FALSE) & scale_color_manual(values = palette)

ggplotly(p1) -> p1_plotly

p1_plotly %>% htmlwidgets::saveWidget(file = opt$htmlplot, selfcontained = T)

m <- max(str_count(breaks))

w <- c(8 + (m * 0.09) * (floor(length(breaks) / 11) + 1))


p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = opt$labels, repel = TRUE, raster = FALSE) &
      scale_color_manual(values = palette, breaks = breaks) &
      theme(legend.direction = "horizontal", legend.text = element_text(size = 7)) &
      guides(colour = guide_legend(ncol = 3, override.aes = list(size = 7)))

(p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect") -> p1

ggsave(plot = p1, filename = opt$pdfplot, width = 7.5, height = 8)

R tidyverse Seurat patcHwork plotly From line 4 of scripts/scrna-dimplot.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--logfc.threshold"),
    type = "double", default = 0.25,
    help = "LogFC [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--test.use"),
    type = "character", default = "wilcox",
    help = "Test use [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--output.rds"),
    type = "character", default = NULL,
    help = "RDS table of all markers", metavar = "character"
  ),
  optparse::make_option(c("--idents"),
    type = "character", default = "seurat_clusters",
    help = "Meta data column name for marker analysis", metavar = "character"
  )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})


scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"



# RNA_=paste0("RNA_snn_res.",opt$resolution)
# Idents(object = scrna) <- [email protected][[RNA_]]
Idents(object = scrna) <- scrna@meta.data[[opt$idents]]


all_markers <- FindAllMarkers(scrna, logfc.threshold = opt$logfc.threshold, test.use = opt$test.use)



saveRDS(all_markers, file = opt$output.rds)

R tidyverse Seurat From line 4 of scripts/scrna-find-markers.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "Processed rds file of a Seurat object", metavar = "character"
    ),
    optparse::make_option(c("--logfc.threshold"),
        type = "double", default = 0.25,
        help = "LogFC [default= %default]", metavar = "character"
    ),
    optparse::make_option(c("--test.use"),
        type = "character", default = "wilcox",
        help = "Test use [default= %default]", metavar = "character"
    ),
    optparse::make_option(c("--output.rds"),
        type = "character", default = NULL,
        help = "RDS table of all markers", metavar = "character"
    ),
    optparse::make_option(c("--metadata.column"),
        type = "character", default = "condition",
        help = "Meta data column name for marker analysis", metavar = "character"
    ),
    optparse::make_option(c("--metadata"),
        type = "character", default = NULL,
        help = "Metadata filename", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(tidyverse)

function_read_metadata <- function(opt) {
    file_type <- tools::file_ext(opt$metadata)
    possible_separators <- c(",", ";", "|", "\t")

    if (tolower(file_type) %in% c("csv", "tsv", "txt")) {
        for (sep in possible_separators) {
            tryCatch(
                {
                    df <- read.csv(opt$metadata, sep = sep)
                    return(df)
                },
                error = function(e) {
                    message(paste("Unable to read file with separator", sep))
                }
            )
        }
    } else if (tolower(file_type) %in% c("xls", "xlsx")) {
        for (sep in possible_separators) {
            tryCatch(
                {
                    df <- openxlsx::read.xlsx(opt$metadata)
                    return(df)
                },
                error = function(e) {
                    message(paste("Unable to read file with separator", sep))
                }
            )
        }
    } else {
        message("Unsupported file type")
        return(NULL)
    }

    message("Unable to read file with any separator")
    return(NULL)
}


scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"

function_read_metadata(opt) -> df


scrna@meta.data %>% dplyr::left_join(df, by = c("orig.ident" = names(df)[1])) -> scrna@meta.data


# RNA_=paste0("RNA_snn_res.",opt$resolution)
# Idents(object = scrna) <- [email protected][[RNA_]]
Idents(object = scrna) <- scrna@meta.data[[opt$metadata.column]]

combn(as.character(unique(Idents(scrna))), 2) -> pairwise



results <- list()
for (i in 1:ncol(pairwise)) {
    pair_ <- paste0(pairwise[1, i], " vs ", pairwise[2, i])
    markers <- FindMarkers(scrna, ident.1 = pairwise[1, i], ident.2 = pairwise[2, i], logfc.threshold = opt$logfc.threshold, test.use = opt$test.use)
    results[[pair_]] <- markers

    pair_ <- paste0(pairwise[2, i], " vs ", pairwise[1, i])
    markers <- FindMarkers(scrna, ident.1 = pairwise[2, i], ident.2 = pairwise[1, i], logfc.threshold = opt$logfc.threshold, test.use = opt$test.use)
    results[[pair_]] <- markers
}

results %>%
    map(~ rownames_to_column(., var = "gene")) %>%
    bind_rows(.id = "condition") -> all_markers


saveRDS(all_markers, file = opt$output.rds)

R tidyverse Seurat From line 4 of scripts/scrna-find-pairwise-markers.R

option_list <- list(
  optparse::make_option(c("--xlsx"),
    type = "character", default = NULL,
    help = "Excel table of markers", metavar = "character"
  ),
  optparse::make_option(c("--output.rds"),
    type = "character", default = NULL,
    help = "Output RDS file name", metavar = "character"
  ),
  optparse::make_option(c("--output.go"),
    type = "character", default = NULL,
    help = "Output kegg excel file name", metavar = "character"
  ),
  optparse::make_option(c("--output.gse"),
    type = "character", default = NULL,
    help = "Output gse excel file name", metavar = "character"
  ),
  optparse::make_option(c("--mapping"),
    type = "character", default = "org.Hs.eg.db",
    help = "Mapping", metavar = "character"
  ),
  optparse::make_option(c("--pval"),
    type = "double", default = 0.05,
    help = "P value treshold [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--logfc.treshold"),
    type = "double", default = 1.5,
    help = "LogFC [default= %default]", metavar = "character"
  )
)
# require(clusterProfiler)
require(tidyverse)

try({
  if (!requireNamespace(opt$mapping, quietly = TRUE)) {
    BiocManager::install(opt$mapping, update = TRUE)
  }
})


opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

require(opt$mapping, character.only = T)

if (is.null(opt$xlsx)) {
  optparse::print_help(opt_parser)
  stop("Arguments must be supplied", call. = FALSE)
}


All_Features <- openxlsx::read.xlsx(opt$xlsx)


function_enrichment_go_singlecell <- function(results, p = 0.05, f = 1.5) {
  print(results %>% distinct(cluster) %>% pull())
  results %>%
    as.data.frame() %>%
    dplyr::filter(p_val_adj < p) %>%
    arrange(desc(avg_log2FC)) %>%
    dplyr::select(gene, avg_log2FC) %>%
    dplyr::mutate(GeneID = mapIds(get(opt$mapping), keys = gene, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")) %>%
    dplyr::filter(!is.na(GeneID), !is.na(avg_log2FC), !duplicated(GeneID)) %>%
    dplyr::select(3, 2) %>%
    deframe() -> geneList


  gene <- names(geneList)[abs(geneList) > f]

  tryCatch(
    {
      kk <- clusterProfiler::enrichGO(
        gene = gene,
        universe = names(geneList),
        OrgDb = get(opt$mapping),
        ont = "ALL",
        pAdjustMethod = "fdr",
        pvalueCutoff = 1,
        readable = TRUE
      )
    },
    error = function(e) {
      kk <- NULL
    }
  ) -> kk

  tryCatch(
    {
      kk2 <- clusterProfiler::gseGO(
        geneList = names(geneList),
        OrgDb = get(opt$mapping),
        ont = "ALL",
        minGSSize = 2,
        maxGSSize = 500,
        pvalueCutoff = 1,
        verbose = FALSE,
        eps = 0
      )
    },
    error = function(e) {
      kk2 <- NULL
    }
  ) -> kk2





  return(list(kk, kk2))
}


All_Features %>%
  split(.$cluster) %>%
  purrr::map(~ function_enrichment_go_singlecell(., p = opt$pval, f = opt$logfc.treshold)) -> all_go_results



saveRDS(all_go_results, opt$output.rds)

openxlsx::write.xlsx(all_go_results %>% keep(~ !is.null(.[[1]])) %>% map(~ .[[1]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.go)
openxlsx::write.xlsx(all_go_results %>% keep(~ !is.null(.[[2]])) %>% map(~ .[[2]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.gse)

R tidyverse clusterProfiler From line 2 of scripts/scrna-go_analysis.R

option_list <- list(
  optparse::make_option(c("--xlsx"),
    type = "character", default = NULL,
    help = "Excel table of markers", metavar = "character"
  ),
  optparse::make_option(c("--output.rds"),
    type = "character", default = NULL,
    help = "Output RDS file name", metavar = "character"
  ),
  optparse::make_option(c("--output.kegg"),
    type = "character", default = NULL,
    help = "Output kegg excel file name", metavar = "character"
  ),
  optparse::make_option(c("--output.mkegg"),
    type = "character", default = NULL,
    help = "Output kegg excel file name", metavar = "character"
  ),
  optparse::make_option(c("--output.gse"),
    type = "character", default = NULL,
    help = "Output gse excel file name", metavar = "character"
  ),
  optparse::make_option(c("--output.mgse"),
    type = "character", default = NULL,
    help = "Output gse excel file name", metavar = "character"
  ),
  optparse::make_option(c("--mapping"),
    type = "character", default = "org.Hs.eg.db",
    help = "Mapping", metavar = "character"
  ),
  optparse::make_option(c("--organism"),
    type = "character", default = "hsa",
    help = "Organism code, look at https://www.genome.jp/kegg/catalog/org_list.html", metavar = "character"
  ),
  optparse::make_option(c("--pval"),
    type = "double", default = 0.05,
    help = "P value treshold [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--logfc.treshold"),
    type = "double", default = 1,
    help = "LogFC [default= %default]", metavar = "character"
  )
)
# require(clusterProfiler)
require(tidyverse)

try({
  if (!requireNamespace(opt$mapping, quietly = TRUE)) {
    BiocManager::install(opt$mapping)
  }
})


opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)
require(opt$mapping, character.only = T)
if (is.null(opt$xlsx)) {
  optparse::print_help(opt_parser)
  stop("Arguments must be supplied", call. = FALSE)
}


All_Features <- openxlsx::read.xlsx(opt$xlsx)


function_enrichment_kegg_singlecell <- function(results, p = 0.05, f = 1.5) {
  print(results %>% distinct(cluster) %>% dplyr::pull())
  results %>%
    as.data.frame() %>%
    dplyr::filter(p_val_adj < p) %>%
    arrange(desc(avg_log2FC)) %>%
    dplyr::select(gene, avg_log2FC) %>%
    dplyr::mutate(GeneID = mapIds(get(opt$mapping), keys = gene, column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")) %>%
    dplyr::filter(!is.na(GeneID), !is.na(avg_log2FC), !duplicated(GeneID)) %>%
    dplyr::select(3, 2) %>%
    deframe() -> geneList


  gene <- names(geneList)[abs(geneList) > f] # as recommended by the documentation

  tryCatch(
    {
      kk1 <- clusterProfiler::enrichKEGG(
        gene = gene,
        organism = opt$organism,
        pAdjustMethod = "fdr",
        minGSSize = 2,
        pvalueCutoff = 1
      )
    },
    error = function(e) {
      kk1 <- NULL
    }
  ) -> kk1

  tryCatch(
    {
      kk2 <- clusterProfiler::gseKEGG(
        geneList = geneList,
        organism = opt$organism,
        pvalueCutoff = 1,
        pAdjustMethod = "fdr",
        minGSSize = 2,
        eps = 0,
        verbose = FALSE
      )
    },
    error = function(e) {
      kk2 <- NULL
    }
  ) -> kk2

  tryCatch(
    {
      kk3 <- clusterProfiler::enrichMKEGG(
        gene = gene,
        organism = opt$organism,
        pvalueCutoff = 1,
        minGSSize = 2,
        pAdjustMethod = "fdr",
      )
    },
    error = function(e) {
      kk3 <- NULL
    }
  ) -> kk3

  tryCatch(
    {
      kk4 <- clusterProfiler::gseMKEGG(
        geneList = geneList,
        organism = opt$organism,
        minGSSize = 2,
        keyType = "kegg",
        pAdjustMethod = "fdr",
        pvalueCutoff = 1
      )
    },
    error = function(e) {
      kk4 <- NULL
    }
  ) -> kk4

  return(list(kk1, kk2, kk3, kk4))
}


All_Features %>%
  split(.$cluster) %>%
  purrr::map(~ function_enrichment_kegg_singlecell(., p = opt$pval, f = opt$logfc.treshold)) -> all_kegg_results



saveRDS(all_kegg_results, opt$output.rds)

openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[1]])) %>% map(~ .[[1]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.kegg)
openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[2]])) %>% map(~ .[[2]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.gse)
openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[3]])) %>% map(~ .[[3]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.mkegg)
openxlsx::write.xlsx(all_kegg_results %>% keep(~ !is.null(.[[4]])) %>% map(~ .[[4]]@result) %>% bind_rows(.id = "cluster") %>% as_tibble(), opt$output.mgse)

R tidyverse clusterProfiler From line 2 of scripts/scrna-kegg.R

import scanpy as sc, anndata as ad
import sys

import anndata
import pandas as pd
import numpy as np
import scipy.sparse


def taxonomy_compare(A, B):
	'''
	A helper function to compare the level of two entries in the kraken hierarchy. Returns
	1 if A is higher in the hierarchy, 0 if A and B are equal, -1 if B is higher.

	Input
	-----
	A, B : ``str``
		Two strings taken from column four of hierarchy.txt, capturing the taxonomy 
		levels of the records.
	'''
	#the order of the taxonomy levels
	order = 'RDKPCOFGS'
	#get the levels of A and B in this hierarchy
	levA = order.find(A[0])
	levB = order.find(B[0])
	if levA < levB:
		#A is higher in the hierarchy
		return 1
	elif levA > levB:
		#B is higher in the hierarchy
		return -1
	else:
		#A and B are at the same level, so need more detailed investigation
		#need to turn to positions [1:], which capture the more detailed sub-hierarchy
		#(and are regular integers, if present)
		if A[1:] < B[1:]:
			#A is higher in the hierarchy
			return 1
		elif A[1:] > B[1:]:
			#B is higher in the hierarchy
			return -1
		else:
			#they're the same
			return 0

def identify_level(hierarchy, target):
	'''
	A helper function that identifies each record's corresponding entry of the appropriate
	level, for the entire kraken hierarchy file. Input as in collapse_taxonomy(). Outputs
	a dictionary with the record ID as the key and the appropriate hierarchy level as 
	the value, with entries higher in the hierarchy than the desired target level having 
	"" as the value.
	'''
	#load the file
	hierarchy = pd.read_table(hierarchy, comment='#', header=None)
	#we start off with no value
	tax_value = ''
	translation = {0:''}
	for level, tax_id, tax_name in zip(hierarchy.iloc[:,3], hierarchy.iloc[:,4], hierarchy.iloc[:,5]):
		#need to strip off all the left side space from the name
		tax_name = tax_name.lstrip()
		#check how our encountered level compares to the target level
		comp = taxonomy_compare(level, target)
		if comp == 1:
			#our record is higher in the hierarchy than the target level
			#so it can't have a value of the desired level assigned
			tax_value = ''
		elif comp == 0:
			#our record is of the exact correct hierarchy level
			#so it's the new value for anything lower in the hierarchy
			tax_value = tax_name
		#if comp == -1, then it's lower and the current tax_value should be usedd
		#one way or another, can store the value now
		translation[tax_id] = tax_value
	#and once we go across the whole thing, return the dictionary
	return translation

def collapse_taxonomy(adata, hierarchy, target):
	'''
	Collapse the single cell kraken pipeline counts based on taxonomy hierarchy, with all
	information more specific than the desired depth summed up. Returns an AnnData with the
	variable space being the summed counts at the specified point in the taxonomy hierarchy.

	Input
	-----
	adata : ``AnnData``
		Single cell kraken pipeline output, with the kraken taxonomy IDs in 
		``adata.var['gene_ids']``.
	hierarchy : filename
		Path to the ``hierarchy.txt`` file provided as part of the output of the kraken 
		pipeline.
	target : ``str``
		The desired hierarchy level to sum up to. Accepted values are ``["domain", "kingdom", 
		"phylum", "class", "order", "family", "genus", "species"]`` or any value of the 
		fourth column of ``hierarchy``.
	'''
	#if the user provides the target as a full name, extract and capitalise the first letter
	if len(target)>2:
		target = target[0].upper()
	#obtain the translation of the taxonomy IDs to the appropriate target level
	translation = identify_level(hierarchy, target)
	#use this translation to get the target level values for the IDs in the object
	levels = np.array([translation[i] for i in adata.var['gene_ids']])
	#prepare the output var names and collapse the count matrix accordingly
	#(remove '' as it's of no interest)
	var_names = np.unique(levels)
	var_names = var_names[var_names != '']
	#this goes better on CSC
	holder = adata.X.tocsc()
	c = scipy.sparse.csr_matrix(np.hstack([np.sum(holder[:,levels==n],1) for n in var_names]))
	#create output object
	bdata = anndata.AnnData(c, obs=adata.obs)
	bdata.var_names = var_names
	return bdata


adata=sc.read_10x_mtx(sys.argv[1]) #kraken2 pipeline output
hierarchy=sys.argv[2] #hierarchy file
taxa=sys.argv[3] #collapse to what, for example genus
output=sys.argv[4] #output h5ad file


adata=collapse_taxonomy(adata,hierarchy,taxa)

adata.write_h5ad(output)

Python Pandas numpy scipy Kraken anndata From line 4 of scripts/scrna-kraken2-collapse.py

option_list <- list(
  optparse::make_option(c("--min.cells"),
    type = "integer", default = 1,
    help = "Min cells [default= %default]", metavar = "integer"
  ),
  optparse::make_option(c("--min.features"),
    type = "integer", default = 3,
    help = "Min features [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--data.dir"),
    type = "character", default = NULL,
    help = "Data directory", metavar = "character"
  ),
  optparse::make_option(c("--h5seurat"),
    type = "character", default = NULL,
    help = "Data directory", metavar = "character"
  ),
  optparse::make_option(c("--sampleid"),
    type = "character", default = NULL,
    help = "Sample ID", metavar = "character"
  ),
  optparse::make_option(c("--output.rds"),
    type = "character", default = "output.rds",
    help = "Output RDS file name [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--output.plot"),
    type = "character", default = "output.pdf",
    help = "Output barplot file name [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--taxa"),
    type = "character", default = "genus",
    help = "Taxonomic level", metavar = "character"
  )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$h5seurat)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (data.dir and sampleid)", call. = FALSE)
}

require(optparse)
require(tidyverse)
require(Seurat)
require(SeuratDisk)
tryCatch(
  {
    source("workflow/scripts/scrna-functions.R")
  },
  error = function(cond) {
    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
  }
)
# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell.


# CreateSeuratObject(scrna.data,min.cells = 1,min.features = 5) -> scrna



tryCatch(
  {
    CreateSeuratObject(LoadH5Seurat(opt$h5seurat)[["RNA"]]@counts, min.cells = opt$min.cells, min.features = opt$min.features) -> scrna


    head(scrna)
    scrna <- RenameCells(object = scrna, add.cell.id = make.names(opt$sampleid)) # add cell.id to cell name
    scrna[["RNA"]]@counts %>%
      as.matrix() %>%
      t() %>%
      as.data.frame() %>%
      select(-starts_with("Homo")) -> df
  },
  error = function(e) {
    df <- NULL
  }
) -> df


pdf(opt$output.plot, width = 6, height = 9)
try({
  df %>%
    rownames_to_column("barcode") %>%
    gather(group, umi, -barcode) %>%
    group_by(group) %>%
    summarise(sum = log2(sum(umi))) %>%
    arrange(desc(sum)) %>%
    slice_max(n = 50, order_by = sum) %>%
    ggplot(aes(reorder(group, sum), sum)) +
    geom_col() +
    coord_flip() +
    ggthemes::theme_few() +
    ylab("Total log2-Expression (Microbiome)") +
    xlab(opt$taxa) +
    ggtitle(opt$sampleid) +
    theme(axis.title = element_text(size = 12)) -> p1
  print(p1)
})
dev.off()

saveRDS(df, file = opt$output.rds)

R ggplot2 tidyverse Seurat optparse SeuratDisk From line 3 of scripts/scrna-kraken2-data-parser.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "Processed rds file of a Seurat object", metavar = "character"
    ),
    optparse::make_option(c("--xlsx"),
        type = "character", default = NULL,
        help = "Excel table of markers for input", metavar = "character"
    ),
    optparse::make_option(c("--output.plot"),
        type = "character", default = "output.pdf",
        help = "Output plot directory", metavar = "character"
    ),
    optparse::make_option(c("--idents"),
        type = "character", default = "seurat_clusters",
        help = "Meta data column name for marker analysis", metavar = "character"
    )
)




opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})

scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"

Idents(object = scrna) <- scrna@meta.data[[opt$idents]]

n <- length(Idents(scrna) %>% unique())

Positive_Features <- openxlsx::read.xlsx(opt$xlsx)


Positive_Features %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC) -> df


df %>%
    distinct(gene) %>%
    pull() -> not.all.genes

scrna <- ScaleData(scrna, features = not.all.genes)

DoHeatmap(object = scrna, features = not.all.genes, label = F) & theme(axis.text.y = element_text(size = 5)) & scale_fill_continuous(type = "viridis") -> p1

ggsave(opt$output.plot, p1, height = 4 + (n * 0.2), width = 5 + (n * 0.05), useDingbats = TRUE)

R tidyverse Seurat From line 5 of scripts/scrna-marker-heatmap.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "Processed rds file of a Seurat object", metavar = "character"
    ),
    optparse::make_option(c("--top_n"),
        type = "integer", default = 20,
        help = "How many features to plot per cluster [default= %default]", metavar = "integer"
    ),
    optparse::make_option(c("--xlsx"),
        type = "character", default = NULL,
        help = "Excel table of markers for input", metavar = "character"
    ),
    optparse::make_option(c("--output.plot.dir"),
        type = "character", default = NULL,
        help = "Output plot directory", metavar = "character"
    ),
    optparse::make_option(c("--reduction.type"),
        type = "character", default = "umap",
        help = "Reduction type, umap or tsne", metavar = "character"
    ),
    optparse::make_option(c("--idents"),
        type = "character", default = "seurat_clusters",
        help = "Meta data column name for marker analysis", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

try(
    {
        source("workflow/scripts/scrna-functions.R")
    },
    silent = TRUE
)
try(
    {
        source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
    },
    silent = TRUE
)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(viridis)
require(tidyverse)




scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"
Idents(object = scrna) <- scrna@meta.data[[opt$idents]]

n <- length(Idents(scrna) %>% unique())

palette <- function_color_palette(n)


Positive_Features <- openxlsx::read.xlsx(opt$xlsx) %>%
    group_by(cluster) %>%
    slice_min(order_by = p_val_adj, n = opt$top_n) %>%
    select(cluster, gene)


for (d in (Positive_Features %>% distinct(cluster) %>% pull())) {
    dir.create(paste0(opt$output.plot.dir, "/cluster", d, "/"), recursive = TRUE)
}

options(warn = -1)




suppressMessages(for (i in 1:nrow(Positive_Features)) {
    gene <- Positive_Features[i, ]$gene
    cluster <- Positive_Features[i, ]$cluster

    p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = gene, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 12))
    p2 <- DotPlot(scrna, features = gene) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 12)) & theme(legend.position = "right")
    p3 <- VlnPlot(scrna, features = gene) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 12)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level")

    suppressWarnings(((p1 | p2) / p3) -> wp)

    ggsave(paste0(opt$output.plot.dir, "/cluster", cluster, "/", gene, ".pdf"), wp, height = 5 + (n * 0.15), width = 6 + (n * 0.15), useDingbats = TRUE)
})

R tidyverse Seurat viridis From line 2 of scripts/scrna-marker-plots.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "RDS file of marker data frame", metavar = "character"
    ),
    optparse::make_option(c("--output.xlsx.positive"),
        type = "character", default = NULL,
        help = "Excel table of positive markers", metavar = "character"
    ),
    optparse::make_option(c("--output.xlsx.all"),
        type = "character", default = NULL,
        help = "Excel table of all markers", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}


require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})


all_markers <- readRDS(file = opt$rds)



openxlsx::write.xlsx(all_markers, file = opt$output.xlsx.all)

openxlsx::write.xlsx(all_markers %>% filter(avg_log2FC > 0), file = opt$output.xlsx.positive)

R tidyverse From line 4 of scripts/scrna-marker-tables.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--sampleid"),
    type = "character", default = NULL,
    help = "Sample ID", metavar = "character"
  ),
  optparse::make_option(c("--idents"),
    type = "character", default = "seurat_clusters",
    help = "Meta data column name for marker analysis", metavar = "character"
  ),
  optparse::make_option(c("--ccplot"),
    type = "character", default = "ccplot.pdf",
    help = "Cell cluster count plot", metavar = "character"
  ),
  optparse::make_option(c("--ccbarplot"),
    type = "character", default = "ccbarplot.pdf",
    help = "Cell cluster count plot", metavar = "character"
  ),
  optparse::make_option(c("--htmlplot"),
    type = "character", default = "htmlplot.pdf",
    help = "Cell cluster html plot", metavar = "character"
  ),
  optparse::make_option(c("--xlsx"),
    type = "character", default = "metrics.xlsx",
    help = "Metrics table output", metavar = "character"
  )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE)
}

require(plotly)
require(ggpubr)
require(Seurat)
require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})



scrna <- readRDS(file = opt$rds)



scrna@meta.data %>%
  dplyr::add_count(orig.ident) %>%
  dplyr::mutate(total_clusters = length(unique(get(opt$idents)))) %>%
  distinct(orig.ident, n, total_clusters) %>%
  dplyr::select("Sample Name" = orig.ident, "Total Cells" = n, "Total Clusters" = total_clusters) %>%
  ggtexttable(rows = NULL, theme = ttheme("light")) -> p1

id <- length(unique(scrna@meta.data$orig.ident))

ggsave(opt$ccplot, p1, height = 7 + (id * 0.2))

scrna@meta.data %>%
  dplyr::group_by(orig.ident, get(opt$idents)) %>%
  dplyr::count() %>%
  dplyr::select("Sample Name" = 1, "Cluster" = 2, "Total Cells" = 3) -> df

df %>% openxlsx::write.xlsx(opt$xlsx)

write_tsv(df, file = opt$xlsx %>% str_replace(".xlsx", ".tsv"))

df %>% ggplot(aes(x = Cluster, y = `Total Cells`, fill = `Sample Name`)) +
  geom_col() +
  ggthemes::theme_hc() +
  theme(legend.title = element_blank()) +
  guides(colour = guide_legend(ncol = 3, override.aes = list(size = 7))) -> p2
n <- length(unique(scrna@meta.data[opt$idents]))



ggsave(opt$ccbarplot, p2, height = 5.2 + (id * 0.09), width = 6 + (n * 0.23))


ggplotly(p2) -> p1_plotly

p1_plotly %>% htmlwidgets::saveWidget(file = opt$htmlplot, selfcontained = T)

R ggplot2 tidyverse Seurat ggpubr plotly From line 2 of scripts/scrna-metrics.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--microbiome.rds"),
    type = "character", default = NULL,
    help = "Microbiome rds file", metavar = "character"
  ),
  optparse::make_option(c("--reduction.type"),
    type = "character", default = "umap",
    help = "Reduction type, umap or tsne", metavar = "character"
  ),
  optparse::make_option(c("--dimplot"),
    type = "character", default = "micdimplot.pdf",
    help = "Plot file name", metavar = "character"
  ),
  optparse::make_option(c("--tplot"),
    type = "character", default = "tplot.pdf",
    help = "Total microbiome dimplot file name", metavar = "character"
  ),
  optparse::make_option(c("--taxa"),
    type = "character", default = "genus",
    help = "Taxonomic level", metavar = "character"
  )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(patchwork)
require(Seurat)
# require(randomcoloR)

require(viridis)
require(tidyverse)

try(
  {
    source("workflow/scripts/scrna-functions.R")
  },
  silent = TRUE
)
try(
  {
    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
  },
  silent = TRUE
)

scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"


microbiome <- readRDS(file = opt$microbiome.rds)


tryCatch(
  {
    AddMetaData(scrna, microbiome %>% rownames_to_column("barcodes") %>% gather(taxa, umi, -barcodes) %>% dplyr::group_by(taxa) %>% dplyr::mutate(sum = sum(umi, na.rm = T)) %>% ungroup() %>%
      dplyr::mutate(taxa = ifelse(sum >= min(sort(unique(sum), decreasing = T)[1:11], na.rm = T), paste0(opt$taxa, "_", taxa), paste0(opt$taxa, "_", "others"))) %>%
      dplyr::select(-sum) %>% dplyr::group_by(barcodes, taxa) %>% dplyr::summarise(sum = sum(umi, na.rm = T)) %>% ungroup() %>% spread(taxa, sum) %>% column_to_rownames("barcodes")) -> scrna
  },
  error = function(e) {
    print(e)
    print("No microbiome data")


    scrna@meta.data <- scrna@meta.data %>% dplyr::mutate(!!paste0(opt$taxa, "_nodetected") := 0)
    return(scrna)
  }
) -> scrna

head(scrna)
# p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = TRUE) & theme_cellsnake_classic() & scale_color_manual(values = palette)

scrna@meta.data %>%
  dplyr::mutate(`Total log2-Expression (Microbiome)` = log2(rowSums(across(starts_with(opt$taxa))) + 1)) %>%
  dplyr::mutate(across(contains("Microbiome"), ~ replace(., .x == 0, NA))) %>%
  dplyr::mutate(across(contains("genus"), ~ replace(., .x == 0, NA))) -> scrna@meta.data


# scrna %>%
#  dplyr::select(barcodes = .cell, orig.ident, contains(opt$taxa), starts_with(opt$reduction.type)) %>%
#  gather(taxa, umi, starts_with(opt$taxa)) %>%
#  dplyr::select(barcodes, orig.ident, x = 3, y = 4, taxa, umi) %>%
#  replace(is.na(.), 0) %>%
#  ggplot(aes(x = x, y = y, color = log2(umi + 1))) +
#  geom_point(size = 0.2) +
#  labs(color = "Log2-UMI") +
#  theme(axis.text = element_text(size = 12)) +
#  scale_color_viridis(option = "magma", direction = -1, alpha = 0.8, na.value = "white") +
#  ggthemes::theme_few() +
#  facet_wrap(~taxa) -> p1


# ggsave(plot = p1, filename = opt$dimplot, width = 13, height = 9)




plotting_taxas <- scrna@meta.data %>%
  dplyr::select(starts_with(opt$taxa)) %>%
  select(
    where(
      ~ !all(is.na(.x))
    )
  ) %>%
  colnames() %>%
  unique()

print(plotting_taxas)

pdf(opt$dimplot, width = 7, height = 7)
for (i in plotting_taxas) {
  try({
    FeaturePlot(scrna, features = i, pt.size = 0.1, reduction = opt$reduction.type, raster = FALSE) &
      scale_color_continuous(type = "viridis", na.value = "gray96") -> p1

    p1 <- (p1 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect")
    print(p1)
  })
}
dev.off()


pdf(opt$tplot, width = 7.5, height = 8)
try({
  FeaturePlot(scrna, features = "Total log2-Expression (Microbiome)", pt.size = 0.1, reduction = opt$reduction.type, raster = FALSE) &
    scale_color_continuous(type = "viridis", na.value = "gray96") -> p2

  p2 <- (p2 / guide_area()) + plot_layout(heights = c(2.5, 1), widths = c(1, 0.6), guides = "collect")
  print(p2)
})
dev.off()

R ggplot2 tidyverse Seurat patcHwork viridis Magma randomcoloR From line 4 of scripts/scrna-microbiome-dimplot.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--microbiome.rds"),
    type = "character", default = NULL,
    help = "Microbiome rds file", metavar = "character"
  ),
  optparse::make_option(c("--sigplot"),
    type = "character", default = "sigplot.pdf",
    help = "Plot file name", metavar = "character"
  ),
  optparse::make_option(c("--sigtable"),
    type = "character", default = NULL,
    help = "Excel table of positive markers", metavar = "character"
  ),
  optparse::make_option(c("--taxa"),
    type = "character", default = "genus",
    help = "Taxonomic level", metavar = "character"
  ),
  optparse::make_option(c("--idents"),
    type = "character", default = "seurat_clusters",
    help = "Meta data column name", metavar = "character"
  )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
# require(randomcoloR)
require(tidyseurat)
require(viridis)
require(tidyverse)

try(
  {
    source("workflow/scripts/scrna-functions.R")
  },
  silent = TRUE
)
try(
  {
    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
  },
  silent = TRUE
)

scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"


microbiome <- readRDS(file = opt$microbiome.rds)






# p1 <- DimPlot(scrna, reduction = opt$reduction.type, label = TRUE) & theme_cellsnake_classic() & scale_color_manual(values = palette)

tryCatch(
  {
    AddMetaData(scrna, microbiome %>% rownames_to_column("barcodes") %>% gather(taxa, umi, -barcodes) %>% dplyr::group_by(taxa) %>% dplyr::mutate(sum = sum(umi, na.rm = T)) %>% ungroup() %>%
      dplyr::mutate(taxa = paste0(opt$taxa, "_", taxa)) %>%
      dplyr::select(-sum) %>% dplyr::group_by(barcodes, taxa) %>% dplyr::summarise(sum = sum(umi, na.rm = T)) %>% ungroup() %>% spread(taxa, sum) %>% column_to_rownames("barcodes")) -> scrna

    scrna %>%
      dplyr::select(orig.ident, one_of(opt$idents), starts_with(opt$taxa)) %>%
      gather(taxa, umi, starts_with(opt$taxa)) %>%
      group_by(across(opt$idents), orig.ident, taxa) %>%
      dplyr::mutate(total = sum(umi, na.rm = T)) %>%
      # group_by(taxa, orig.ident, across(opt$idents)) %>%
      dplyr::mutate(cell = n()) %>%
      dplyr::ungroup() %>%
      distinct(orig.ident, across(opt$idents), taxa, total, cell) %>%
      group_by(taxa) %>%
      dplyr::mutate(v3 = sum(total) - total, v4 = sum(cell) - cell) %>%
      rowwise() %>%
      # dplyr::mutate(p = fisher.test(matrix(c(total, cell, v3, v4), ncol = 2), alternative = "greater")$p.value) %>%
      ungroup() %>%
      # dplyr::mutate(p = p.adjust(p)) %>%
      dplyr::mutate(`Taxa reads in this group` = total, `Cells in this group` = cell, `Total reads for this taxa` = v3 + total, `Total cells` = v4 + cell) %>%
      dplyr::filter(`Taxa reads in this group` > 0) %>%
      dplyr::select(-total, -cell, -v3, -v4) %>%
      arrange(desc(`Taxa reads in this group`)) -> df
  },
  error = function(e) {
    print(e)
    df <- data.frame(orig.ident = character(), `Taxa reads in this group` = numeric(), `Cells in this group` = numeric(), `Total reads for this taxa` = numeric(), `Total cells` = numeric())
  }
) -> df

# [email protected] %>%
#  dplyr::select(starts_with(opt$idents)) %>%
#  pull() %>%
#  unique() %>%
#  length() -> n

head(df)

openxlsx::write.xlsx(df, file = opt$sigtable)

write_tsv(df, file = opt$sigtable %>% str_replace(".xlsx", ".tsv"))

R tidyverse Seurat viridis randomcoloR From line 4 of scripts/scrna-microbiome-sigplot.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "Processed rds file of a Seurat object", metavar = "character"
    ),
    optparse::make_option(c("--pplot"),
        type = "character", default = NULL,
        help = "Partition plot", metavar = "character"
    ),
    optparse::make_option(c("--output.dir"),
        type = "character", default = NULL,
        help = "Output plot directory", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (!requireNamespace("SeuratWrappers", quietly = TRUE)) {
    remotes::install_github("satijalab/seurat-wrappers", upgrade = "never")
}

if (!requireNamespace("monocle3", quietly = TRUE)) {
    remotes::install_version("igraph", "1.4.3")
    remotes::install_github("cole-trapnell-lab/monocle3", upgrade = "never")
}

require(monocle3)
require(Seurat)
require(SeuratWrappers)
require(tidyverse)
require(patchwork)
require(magrittr)


if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}
scrna <- readRDS(file = opt$rds)


cds <- as.cell_data_set(scrna)

tryCatch(
    {
        cds <- cluster_cells(cds)
    },
    error = function(e) {
        cds <- cluster_cells(cds, cluster_method = "louvain")
    }
) -> cds


p1 <- plot_cells(cds, color_cells_by = "singler", show_trajectory_graph = FALSE)
p2 <- plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE)


wrap_plots(p1, p2)

ggsave(opt$pplot, width = 11, height = 5.5)


as.Seurat(cds, assay = NULL) -> scrna
scrna@meta.data %>%
    dplyr::count(monocle3_partitions) %>%
    dplyr::mutate(perc = (n * 100) / sum(n)) %>%
    dplyr::filter(perc >= 5, n > 200) %>%
    pull(monocle3_partitions) %>%
    as.vector() -> partitions

print(partitions)
for (i in partitions) {
    integrated.sub <- subset(scrna, monocle3_partitions == i)


    cds2 <- as.cell_data_set(integrated.sub)
    if (i != "1") {
        cds2 <- cluster_cells(cds2)
    }
    cds2 <- learn_graph(cds2)
    p1 <- plot_cells(cds2, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE)

    # max.avp <- which.max(unlist(FetchData(integrated.sub, "AVP")))
    # max.avp <- colnames(integrated.sub)[max.avp]
    # cds2 <- order_cells(cds2, root_cells = max.avp)
    # p2 <- plot_cells(cds2, color_cells_by = "pseudotime", label_cell_groups = FALSE, label_leaves = FALSE,label_branch_points = FALSE)

    # wrap_plots(p1, p2)
    ggsave(paste0(opt$output.dir, "/plot_monocle-partition-", i, ".pdf"), plot = p1, width = 6, height = 5.5)
}

R tidyverse Seurat magrittr patcHwork SeuratWrappers From line 2 of scripts/scrna-monocle3.R

option_list <- list(
  optparse::make_option(c("--scale.factor"),
    type = "integer", default = 10000,
    help = "Scale factor [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--nfeatures"),
    type = "integer", default = 2000,
    help = "Highly variable features [default= %default]", metavar = "integer"
  ),
  optparse::make_option(c("--variable.selection.method"),
    type = "character", default = "vst",
    help = "Find variable features selection method [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "A RAW rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--normalization.method"),
    type = "character", default = "LogNormalize",
    help = "Normalization method[default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--doublet.filter"), action = "store_true", default = FALSE),
  optparse::make_option(c("--integration"), action = "store_true", default = FALSE),
  optparse::make_option(c("--umap"), action = "store_true", default = FALSE),
  optparse::make_option(c("--tsne"), action = "store_true", default = FALSE),
  optparse::make_option(c("--resolution"),
    type = "character", default = "0.8",
    help = "Resolution [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--output.rds"),
    type = "character", default = "output.rds",
    help = "Output RDS file name [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--cpu"),
    type = "integer", default = 5,
    help = "Number of CPU for parallel run [default= %default]", metavar = "character"
  ),
  optparse::make_option(c("--reference"),
    type = "character", default = "HumanPrimaryCellAtlasData",
    help = "SingleR reference for annotation", metavar = "character"
  )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(optparse)
require(SingleR)
require(celldex)
require(tidyverse)
require(Seurat)
require(patchwork)



try(
  {
    source("workflow/scripts/scrna-functions.R")
  },
  silent = TRUE
)
try(
  {
    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
  },
  silent = TRUE
)


scrna <- readRDS(file = opt$rds)

if (isFALSE(opt$integration)) {
  scrna <- NormalizeData(scrna, normalization.method = opt$normalization.method, scale.factor = opt$scale.factor)
  scrna <- FindVariableFeatures(scrna, selection.method = opt$variable.selection.method, nfeatures = opt$nfeatures)
} else {
  try({
    DefaultAssay(scrna) <- "integrated"
  }) # for now only for Seurat, Harmony will come
}


# all.genes <- rownames(scrna) memory requirements can be large if using all genes
not.all.genes <- VariableFeatures(scrna) # only variable features

scrna <- ScaleData(scrna, features = not.all.genes)
scrna <- RunPCA(scrna, features = not.all.genes)
dimensionReduction <- function_pca_dimensions(scrna)
scrna <- FindNeighbors(scrna, dims = 1:dimensionReduction)

if (!opt$resolution %in% c("auto", "AUTO", "Auto")) {
  scrna <- FindClusters(scrna, resolution = as.numeric(opt$resolution))
} else {
  if (!requireNamespace("MultiKParallel", quietly = TRUE)) {
    remotes::install_github("sinanugur/MultiKParallel", upgrade = "never")
  }
  require(MultiKParallel)
  scrna_tmp <- FindClusters(scrna, resolution = seq(0.2, 2.5, 0.15))
  multik <- MultiKParallel(scrna_tmp, reps = 10, seed = 255, resolution = seq(0.2, 2.5, 0.15), numCores = opt$cpu, nPC = dimensionReduction)
  multik$k %>%
    tibble::as_tibble() %>%
    count(value) %>%
    filter(n == max(n)) %>%
    slice(1) %>%
    pull(value) -> K

  scrna_tmp[[]] %>%
    as.data.frame() %>%
    select(starts_with("RNA")) %>%
    rownames_to_column("barcode") %>%
    mutate(across(where(is.factor), as.character)) %>%
    as_tibble() %>%
    gather(res, clu, contains("RNA")) %>%
    distinct(res, clu) %>%
    count(res) %>%
    mutate(res = str_remove(res, "RNA_snn_res.")) %>%
    mutate(diff = abs(n - K)) %>%
    slice_min(order_by = diff) %>%
    pull(res) %>%
    as.numeric() -> optimal_resolution


  scrna <- FindClusters(scrna, resolution = optimal_resolution)
}



if (opt$umap) {
  scrna <- RunUMAP(scrna, dims = 1:dimensionReduction)
}


if (opt$tsne) {
  scrna <- RunTSNE(scrna, dims = 1:dimensionReduction, check_duplicates = FALSE)
}



if (opt$doublet.filter) {
  if (!requireNamespace("DoubletFinder", quietly = TRUE)) {
    remotes::install_github("chris-mcginnis-ucsf/DoubletFinder", upgrade = "never")
  }
  require(DoubletFinder)

  homotypic.prop <- modelHomotypic(scrna$seurat_clusters)
  nExp_poi <- round(0.075 * nrow(scrna@meta.data)) ## Assuming 7.5% doublet formation rate - tailor for your dataset
  nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))

  ## Run DoubletFinder with varying classification stringencies ----------------------------------------------------------------
  scrna <- doubletFinder_v3(scrna, PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
  scrna <- doubletFinder_v3(scrna, PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = paste0("pANN_0.25_0.09_", nExp_poi), sct = FALSE)


  scrna@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    select(barcodes, starts_with("DF")) %>%
    select(barcodes, DoubletFinder = 2) -> doublet_df

  scrna@meta.data <- scrna@meta.data %>%
    select(!starts_with("DF")) %>%
    select(!starts_with("pANN")) %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(doublet_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")


  subset(scrna, subset = DoubletFinder == "Singlet") -> scrna
  # scrna$DoubletFinder <- NULL
}




# RNA_=paste0("RNA_snn_res.",opt$resolution)

# metrics <- table([email protected][["seurat_clusters"]], [email protected]$orig.ident)

# p1 <- DimPlot(scrna, reduction = "pca", label = TRUE, label.size = 10)

# celltype annotation with SingleR
ref <- get(opt$reference)()



DefaultAssay(scrna) <- "RNA"
smObjSCE <- as.SingleCellExperiment(scrna)
pred <- SingleR(test = smObjSCE, ref = ref, labels = ref$label.fine)
AddMetaData(scrna, pred["pruned.labels"] %>% as.data.frame() %>% dplyr::select(singler = pruned.labels)) -> scrna
attr(scrna, "SingleRref") <- opt$reference

try({
  DefaultAssay(scrna) <- "integrated"
})


# output files
saveRDS(scrna, file = opt$output.rds)

R tidyverse Seurat optparse patcHwork SingleR From line 4 of scripts/scrna-normalization-pca.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--tsv"),
    type = "character", default = NULL,
    help = "A text file contains the gene list", metavar = "character"
  ),
  optparse::make_option(c("--gene"),
    type = "character", default = NULL,
    help = "A list of genes to create plots", metavar = "character"
  ),
  optparse::make_option(c("--output.plot.dir"),
    type = "character", default = NULL,
    help = "Output plot directory", metavar = "character"
  ),
  optparse::make_option(c("--reduction.type"),
    type = "character", default = "umap",
    help = "Reduction type, umap or tsne", metavar = "character"
  ),
  optparse::make_option(c("--idents"),
    type = "character", default = "seurat_clusters",
    help = "Meta data column name", metavar = "character"
  )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

try(
  {
    source("workflow/scripts/scrna-functions.R")
  },
  silent = TRUE
)
try(
  {
    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
  },
  silent = TRUE
)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(Seurat)
require(tidyverse)
require(viridis)

markers <- c()

if (!is.null(opt$tsv)) {
  markers <- read_tsv(opt$tsv, col_names = FALSE) %>% pull()
}



scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"

# RNA_=paste0("RNA_snn_res.",opt$resolution)


Idents(object = scrna) <- scrna@meta.data[[opt$idents]]


if (!is.null(opt$gene)) {
  markers <- c(unlist(strsplit(opt$gene, " ")), markers)
}

# identification=opt$idents



# FetchData(scrna,c(identification,"UMAP_1","UMAP_2","TSNE_1","TSNE_2",markers)) %>% rownames_to_column("barcodes") %>% gather(gene,expr,5:last_col()) %>% group_by(gene,across(identification)) %>% dplyr::rename(clusters:=identification) %>% ungroup() -> plot_df


# print(head(plot_df))
dir.create(opt$output.plot.dir, recursive = TRUE)

suppressMessages(for (i in markers) {
  n <- length(Idents(scrna) %>% unique())
  palette <- function_color_palette(n)

  tryCatch(
    {
      p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = i, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 10))
      p2 <- DotPlot(scrna, features = i) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 10)) & theme(legend.position = "right")
      p3 <- VlnPlot(scrna, features = i) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 10)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level")

      # p1 <- plot_df %>% filter(gene == i) %>%       ggplot(aes(x=UMAP_1,y=UMAP_2,color=expr)) + geom_point(size=0.3) + theme_cellsnake_classic() + scale_color_continuous(type = "viridis") + labs(color="Expression") + theme(axis.text = element_text(size=12))
      # p2 <- plot_df %>% group_by(gene,clusters)  %>% summarise(percent=100*(length(expr[expr>0])/n()),average=mean(expr)) %>% filter(gene == i) %>%       ggplot(aes(x=gene,y=clusters,size=percent,color=average)) + geom_point() + theme_cellsnake_classic() +       scale_color_continuous(type = "viridis") + labs(color="Average Expression",size="Percent Expressed")+ ylab("Identity") + theme(axis.title.x = element_blank(),axis.text = element_text(size=12)) + theme(legend.position = "right")
      # p3 <- plot_df %>% ungroup() %>% filter(gene == i) %>%       ggplot(aes(x=clusters,y=log10(expr),fill=clusters)) + geom_violin() + theme_cellsnake_classic() + geom_jitter(size=0.3,shape=20, position=position_jitter(0.2)) +       scale_fill_manual(values = palette) + theme(legend.position = "right",axis.text = element_text(size = 12)) + labs(fill="") + xlab("Identity") + ylab("Log10-Expression Level")

      suppressWarnings(((p1 | p2) / p3) -> wp)
    },
    error = function(cond) {
      wp <- plot.new()
    },
    finally = {}
  ) -> wp
  ggsave(paste0(opt$output.plot.dir, "/", i, ".pdf"), wp, height = 5 + (n * 0.15), width = 7 + (n * 0.15), useDingbats = TRUE)
})

R ggplot2 tidyverse Seurat viridis From line 2 of scripts/scrna-selected-marker-plots.R

option_list <- list(
    optparse::make_option(c("--sampleid"),
        type = "character", default = NULL,
        help = "Sample ID", metavar = "character"
    ),
    optparse::make_option(c("-r", "--rds"),
        type = "character", default = NULL,
        help = "A list of RDS files of Seurat objects", metavar = "character"
    ),
    optparse::make_option(c("--output.rds"),
        type = "character", default = "output.rds",
        help = "Output RDS file name [default= %default]", metavar = "character"
    ),
    optparse::make_option(c("--cca.dims"),
        type = "integer", default = 30,
        help = "Which dimensions to use from the CCA to specify the neighbor search space 1 to [default= %default]", metavar = "character"
    ),
    optparse::make_option(c("--reduction"),
        type = "character", default = "cca",
        help = "Integration reduction type [default= %default]", metavar = "character"
    )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds) || is.null(opt$sampleid)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE)
}

require(tidyverse)
require(Seurat)
require(patchwork)
try(
    {
        source("workflow/scripts/scrna-functions.R")
    },
    silent = TRUE
)
try(
    {
        source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
    },
    silent = TRUE
)


files <- unlist(strsplit(opt$rds, " "))
print(files)
for (i in files) {
    if (!exists("scrna_list")) {
        scrna_list <- list(readRDS(file = i))
    } else {
        scrna_list <- append(scrna_list, readRDS(file = i))
    }
}



scrna_anchors <- FindIntegrationAnchors(object.list = scrna_list, dims = 1:opt$cca.dims, reduction = opt$reduction)


scrna <- IntegrateData(anchorset = scrna_anchors, dims = 1:opt$cca.dims)


saveRDS(scrna, file = opt$output.rds)

R tidyverse Seurat patcHwork From line 3 of scripts/scrna-seurat-integration.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "A list of RDS files of Seurat objects", metavar = "character"
    ),
    optparse::make_option(c("--output"),
        type = "character", default = "pred.rds",
        help = "Output prediction file", metavar = "character"
    ),
    optparse::make_option(c("--reference"),
        type = "character", default = "HumanPrimaryCellAtlasData",
        help = "SingleR reference", metavar = "character"
    ),
    optparse::make_option(c("--granulation"),
        type = "character", default = "label.main",
        help = "SingleR granulation level", metavar = "character"
    )
)


opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE)
}


require(optparse)
require(SingleR)
require(SingleCellExperiment)
require(Seurat)
scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"



# celltype annotation with SingleR
ref <- get(opt$reference)()

smObjSCE <- as.SingleCellExperiment(scrna)
pred <- SingleR(test = smObjSCE, ref = ref, labels = ref[[opt$granulation]])

saveRDS(pred, file = opt$output)

R Seurat optparse SingleCellExperiment SingleR From line 3 of scripts/scrna-singler-annotation.R

option_list <- list(
      optparse::make_option(c("--rds"),
            type = "character", default = NULL,
            help = "A list of RDS files of Seurat objects", metavar = "character"
      ),
      optparse::make_option(c("--sheplot"),
            type = "character", default = "sheplot.pdf",
            help = "Output score heatmap plot file name", metavar = "character"
      ),
      optparse::make_option(c("--sheplottop"),
            type = "character", default = "sheplot.pdf",
            help = "Output score heatmap plot file name, top 20", metavar = "character"
      ),
      optparse::make_option(c("--pheplot"),
            type = "character", default = "pheplot.pdf",
            help = "Output heatmap plot file name", metavar = "character"
      ),
      optparse::make_option(c("--idents"),
            type = "character", default = "seurat_clusters",
            help = "Meta data column name", metavar = "character"
      ),
      optparse::make_option(c("--csv"),
            type = "character", default = NULL,
            help = "A meta data table", metavar = "character"
      ),
      optparse::make_option(c("--prediction"),
            type = "character", default = "pred.rds",
            help = "Input prediction file", metavar = "character"
      ),
      optparse::make_option(c("--xlsx"),
            type = "character", default = "predictions.xlsx",
            help = "Input prediction file", metavar = "character"
      )
)


opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
      optparse::print_help(opt_parser)
      stop("At least one argument must be supplied (rds and sampleid)", call. = FALSE)
}


require(optparse)
require(SingleR)
# require(celldex)
require(pheatmap)
require(Seurat)
require(tidyverse)

scrna <- readRDS(file = opt$rds)
DefaultAssay(scrna) <- "RNA"



# celltype annotation with SingleR
pred <- readRDS(opt$prediction)


plotScoreHeatmap(pred) -> p1
ggsave(plot = p1, filename = opt$sheplot, width = 15, height = 8)


plotScoreHeatmap(pred, show.labels = F, max.labels = 20) -> p1
ggsave(plot = p1, filename = opt$sheplottop, width = 7, height = 4)


tab <- table(Assigned = pred$pruned.labels, Cluster = scrna@meta.data[[opt$idents]])


pheatmap(log2(tab + 10), color = colorRampPalette(c("white", "blue"))(101)) -> p1



n1 <- length(unique(scrna$seurat_clusters))
n2 <- length(unique(pred$pruned.labels))

ggsave(plot = p1, filename = opt$pheplot, width = 6 + (n1 * 0.10), height = 4 + (n2 * 0.10))

tab %>% openxlsx::write.xlsx(file = opt$xlsx, row.names = F)

R tidyverse Seurat optparse pheatmap SingleR From line 3 of scripts/scrna-singler-plots.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "Processed rds file of a Seurat object", metavar = "character"
    ),
    optparse::make_option(c("--keywords"),
        type = "character", default = NULL,
        help = "Keywords to slice object", metavar = "character"
    ),
    optparse::make_option(c("--column"),
        type = "character", default = NULL,
        help = "Meta data column name for selecting the slice", metavar = "character"
    ),
    optparse::make_option(c("--output.rds"),
        type = "character", default = "output.rds",
        help = "Output RDS file name [default= %default]", metavar = "character"
    ),
    optparse::make_option(c("--metadata"),
        type = "character", default = NULL,
        help = "Metadata filename", metavar = "character"
    ),
    optparse::make_option(c("--exact"),
        action = "store_true", default = FALSE,
        help = "Exact match, otherwise pattern match"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}


require(Seurat)
require(tidyverse)



scrna <- readRDS(file = opt$rds)
function_read_metadata <- function(opt) {
    file_type <- tools::file_ext(opt$metadata)
    possible_separators <- c(",", ";", "|", "\t")

    if (tolower(file_type) %in% c("csv", "tsv", "txt")) {
        for (sep in possible_separators) {
            tryCatch(
                {
                    df <- read.csv(opt$metadata, sep = sep)
                    return(df)
                },
                error = function(e) {
                    message(paste("Unable to read file with separator", sep))
                }
            )
        }
    } else if (tolower(file_type) %in% c("xls", "xlsx")) {
        for (sep in possible_separators) {
            tryCatch(
                {
                    df <- openxlsx::read.xlsx(opt$metadata)
                    return(df)
                },
                error = function(e) {
                    message(paste("Unable to read file with separator", sep))
                }
            )
        }
    } else {
        message("Unsupported file type")
        return(NULL)
    }

    message("Unable to read file with any separator")
    return(NULL)
}

if (!is.null(opt$metadata)) {
    function_read_metadata(opt) -> df
    if (is.null(df)) {
        message("No metadata file provided, skipping metadata merge")
    } else {
        scrna@meta.data %>% dplyr::left_join(df, by = c("orig.ident" = names(df)[1])) -> scrna@meta.data
    }
}




function_subset_by_idents <- function(scrna, opt) {
    if (!is.null(opt$keywords)) {
        keywords <- strsplit(opt$keywords, ",")[[1]]
        patterns <- paste0("(?i)", "(", paste(keywords, collapse = "|"), ")")

        scrna@meta.data %>%
            {
                if (!opt$exact) {
                    if (is.null(opt$column)) dplyr::filter(., if_any(everything(), str_detect, pattern = patterns)) else dplyr::filter(., if_any(opt$column, str_detect, pattern = patterns))
                } else {
                    dplyr::filter(., get(opt$column) %in% keywords)
                }
            } %>%
            rownames() -> cells
        scrna <- subset(scrna, cells = cells)

        return(scrna)
    }
}



function_subset_by_idents(scrna, opt) -> scrna

head(scrna)

saveRDS(scrna, file = opt$output.rds)

R tidyverse Seurat From line 4 of scripts/scrna-subset_final_rds.R

option_list <- list(
  optparse::make_option(c("--rds"),
    type = "character", default = NULL,
    help = "Processed rds file of a Seurat object", metavar = "character"
  ),
  optparse::make_option(c("--sampleid"),
    type = "character", default = NULL,
    help = "Sample ID", metavar = "character"
  ),
  optparse::make_option(c("--fplot"),
    type = "character", default = NULL,
    help = "nFeature plot", metavar = "character"
  ),
  optparse::make_option(c("--cplot"),
    type = "character", default = NULL,
    help = "nCount plot", metavar = "character"
  ),
  optparse::make_option(c("--mtplot"),
    type = "character", default = NULL,
    help = "Percent MT plot", metavar = "character"
  ),
  optparse::make_option(c("--rpplot"),
    type = "character", default = NULL,
    help = "Ribo plot", metavar = "character"
  )
)

opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
  optparse::print_help(opt_parser)
  stop("At least one argument must be supplied (rds file and sampleid)", call. = FALSE)
}

require(Seurat)
require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})



scrna <- readRDS(file = opt$rds)


FeaturePlot(scrna, features = "nFeature_RNA", pt.size = 0.1, raster = FALSE)

ggsave(opt$fplot, width = 7, height = 5)



FeaturePlot(scrna, features = "nCount_RNA", pt.size = 0.1, raster = FALSE)

ggsave(opt$cplot, width = 7, height = 5)


FeaturePlot(scrna, features = "percent.mt", pt.size = 0.1, raster = FALSE)

ggsave(opt$mtplot, width = 7, height = 5)

FeaturePlot(scrna, features = "percent.rp", pt.size = 0.1, raster = FALSE)

ggsave(opt$rpplot, width = 7, height = 5)

R tidyverse Seurat From line 2 of scripts/scrna-technicals.R

option_list <- list(
    optparse::make_option(c("--xlsx"),
        type = "character", default = NULL,
        help = "Excel table of markers for input", metavar = "character"
    ),
    optparse::make_option(c("--output.plot"),
        type = "character", default = "output.pdf",
        help = "Output plot directory", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

# try({source("workflow/scripts/scrna-functions.R")},silent=TRUE)
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))},silent=TRUE)

if (is.null(opt$xlsx)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(tidyverse)



Positive_Features <- openxlsx::read.xlsx(opt$xlsx)


Positive_Features %>%
    group_by(cluster) %>%
    slice_max(n = 20, order_by = avg_log2FC) -> df


Positive_Features %>%
    distinct(cluster) %>%
    pull() -> clusters



pdf(opt$output.plot, width = 6, height = 6)
for (i in clusters) {
    df %>% dplyr::filter(cluster %in% i) -> df2
    maxFC <- (df2 %>% pull(avg_log2FC) %>% max()) + 1

    try({
        df2 %>%
            dplyr::mutate(n = dense_rank(dplyr::desc(avg_log2FC))) %>%
            ggplot(aes(x = n, y = avg_log2FC, label = gene)) +
            geom_text(angle = 75, size = 4) +
            ggthemes::theme_few() +
            ylim(c(0, maxFC)) +
            coord_cartesian(clip = "off", expand = TRUE) +
            ggtitle("Top markers", subtitle = paste(i, "vs all")) -> p1
        print(p1)
    })
}
dev.off()

R ggplot2 tidyverse From line 2 of scripts/scrna-top-marker-plot.R

option_list <- list(
    optparse::make_option(c("--rds"),
        type = "character", default = NULL,
        help = "RDS file of marker data frame", metavar = "character"
    ),
    optparse::make_option(c("--vplot"),
        type = "character", default = "vplot.pdf",
        help = "Output volcano plot name", metavar = "character"
    )
)



opt_parser <- optparse::OptionParser(option_list = option_list)
opt <- optparse::parse_args(opt_parser)

if (is.null(opt$rds)) {
    optparse::print_help(opt_parser)
    stop("At least one argument must be supplied (rds file)", call. = FALSE)
}

require(EnhancedVolcano)
require(tidyverse)
# try({source("workflow/scripts/scrna-functions.R")})
# try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))})




all_markers <- readRDS(file = opt$rds)

pdf(opt$vplot, width = 7, height = 7)
for (i in unique(all_markers$condition)) {
    markers <- all_markers %>% filter(condition == i)

    print(EnhancedVolcano(markers, x = "avg_log2FC", y = "p_val_adj", lab = markers %>% pull(gene), FCcutoff = 0.75, subtitle = i))
}
dev.off()