Workflow Steps and Code Snippets
5 tagged steps and code snippets that match keyword GO.db
The evaluation of MERFISHtools and the underyling model as published in the corresponding article. (v1.0.0)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | library("biomaRt") ensembl = useMart("ensembl")#,dataset="hsapiens_gene_ensembl", verbose=TRUE) ensembl = useDataset("hsapiens_gene_ensembl", mart=ensembl) library("GOstats") library("GO.db") library("org.Hs.eg.db") library("mutoss") est <- read.table(snakemake@input[[1]], header = TRUE, stringsAsFactors = FALSE) rownames(est) <- est$feat # translate gene names to entrez ids ids <- getBM(attributes = c("hgnc_symbol", "entrezgene"), filters = "hgnc_symbol", values = est$feat, mart = ensembl) # take the first entrez id in case of ambiguity ids <- ids[!duplicated(ids$hgnc_symbol), ] rownames(ids) <- ids$hgnc_symbol # lookup entrez ids est$entrez <- ids[est$feat, "entrezgene"] # define foreground genes significant <- est$diff_fdr <= 0.05 foreground <- est[significant, ] if(nrow(foreground) > 0) { # define test parameters params <- new("GOHyperGParams", geneIds = foreground$entrez, universeGeneIds = est$entrez, ontology = "BP", annotation = "org.Hs.eg", pvalueCutoff = 0.05, conditional = TRUE, testDirection = "over") results <- hyperGTest(params) print(results) goterms <- summary(results) # correct for multiple testing. We use Benjamini-Yekuteli here, because the performed tests are strongly dependent #by <- BY(goterms$Pvalue, 0.05) #goterms$adjPvalue <- by[["adjPValues"]] write.table(goterms, file = snakemake@output[["terms"]], row.names = FALSE, quote = FALSE, sep = "\t") # associate genes with go terms ids <- ids[!is.na(ids$entrezgene), ] rownames(ids) <- ids$entrezgene genes <- stack(geneIdUniverse(results)) colnames(genes) <- c("entrezid", "goterm") genes <- genes[, c("goterm", "entrezid")] genes$gene <- ids[genes$entrezid, "hgnc_symbol"] genes$entrezid <- NULL genes$cv <- est[genes$gene, "cv_map"] genes$fdr <- est[genes$gene, "diff_fdr"] write.table(genes, file = snakemake@output[["genes"]], row.names = FALSE, quote = FALSE, sep = "\t") } else { file.create(snakemake@output[["terms"]]) file.create(snakemake@output[["genes"]]) } |
Basic RNA-seq analysis for single-end data.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | degFile = snakemake@input[['degFile']] assembly <- snakemake@params[['assembly']] FC <- snakemake@params[['FC']] adjp <- snakemake@params[['adjp']] printTree <- snakemake@params[['printTree']] library(GO.db) library(topGO) library(ggplot2) library(RColorBrewer) library(biomaRt) library(GenomicFeatures) library(Rgraphviz) ##----------load differentially expressed genes --------# print("Loading differential expressed gene table") print(degFile) if(grepl('txt$|tsv$',degFile)){ deg <- read.delim(file=degFile,header=TRUE,sep="\t") } ##---------load correct Biomart------------------------# print(getwd()) if (assembly == "hg19") { organismStr <- "hsapiens" geneID2GO <- get(load("./anno/biomaRt/hg19.Ens_75.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "hg38.89") { organismStr <- "hsapiens" ### to get to hg38 mappings ensembl 89! geneID2GO <- get(load("./anno/biomaRt/hg38.Ens_89.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "hg38.90") { organismStr <- "hsapiens" ### to get to hg38 mappings ensembl 90! geneID2GO <- get(load("./anno/biomaRt/hg38.Ens_90.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "mm10.78") { organismStr <- "mmusculus" ### to get to hg38 mappings ensembl 90! geneID2GO <- get(load("./anno/biomaRt/mm10.Ens_78.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "mm10.96") { organismStr <- "mmusculus" ### to get to hg38 mappings ensembl 90! geneID2GO <- get(load("./anno/biomaRt/mm10.Ens_96.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } ##-----------------------------------Functions--------------------------------------# runGO <- function(geneList,xx=xx,otype,setName){ setLength <- sum(as.numeric(levels(geneList))[geneList]) fname <- paste(Dir, paste(setName, otype, "GO.txt", sep="_"), sep="/") GOData <- new("topGOdata", ontology=otype, allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) resultFisher <- runTest(GOData, algorithm = "classic", statistic = "fisher")## statistical test for topGO x <- GenTable(GOData, classicFisher=resultFisher, topNodes=length(names(resultFisher@score)))## make go table for all terms x <- data.frame(x) pVal <- data.frame(pval=signif(resultFisher@score, 6)) ## get unrounded pvalue x$enrich <- x$Significant/x$Expected ## calculate enrichment based on what you expect by chance x$p.unround <- pVal[x$GO.ID,"pval"]## put unrounded pvalue in the table x$p.adj <- signif(p.adjust(x$p.unround, method="BH"), 6)## calculate the adjusted pvalue with Benjamini & Hochberg correction x$log.p.adj <- -log10(x$p.adj) ## convert adjusted p value to -log10 for plot magnitude #x$Term.full <- sapply(x$GO.ID, FUN=function(n){Term(xx[[n]])}) ## get the full term name x <- x[order(x$GO.ID),] write.table(x, file=fname, sep="\t", col.names=TRUE, quote=FALSE, row.names=FALSE) ## save the table ## you can print the tree if you want, but since I keep the list of all of them skip if(printTree>0){ printGraph(GOData,## make the tree for the go data resultFisher, firstSigNodes = 5, fn.prefix = sub("_GO.txt$", "", fname), useInfo = "all", pdfSW = TRUE ) } return(x) } ## function to make barplot of -log10 adjusted pvalues colored by enrichment drawBarplot <- function(go, ontology, setName){ go <- go[!go$p.adj > 0.01,] if(nrow(go)>1){ #go$Term.full <- make.unique(paste(sapply(strsplit(as.character(substring(go$Term.full,1,50)), "\\,"), `[`, 1))) go$Term <- make.unique(paste(sapply(strsplit(as.character(substring(go$Term,1,50)), "\\,"), `[`, 1))) print(setName) go <- go[with(go, order(p.adj, -enrich)),] ## Currently there is a discrepency between xx and x, so we only use Term right now, not Term.full #go$Term.full <-factor(paste(go$Term.full), levels=rev(paste(go$Term.full))) ## sort table by adjusted p-value go$Term <-factor(paste(go$Term), levels=rev(paste(go$Term))) ## sort table by adjusted p-value ptitle <- paste(ontology, setName) ## plot title ptitle <- gsub("^.*/","",ptitle) pfname <- paste(setName,ontology,"pdf",sep=".")## name of png file if(nrow(go) < 20 ){ toprange <- 1:nrow(go) }else{ toprange <- 1:20 } top <- go[toprange,] col <- colorRampPalette(c("white","navy"))(16) pdf(file=paste(Dir, pfname, sep="/"),height=5,width=7) print({ p <- ggplot(top, aes(y=log.p.adj, x=Term, fill=enrich)) + ## ggplot barplot function geom_bar(stat="identity",colour="black") + ggtitle(ptitle) + xlab("") + ylab("-log10(fdr)") + scale_fill_gradient(low=col[2], high=col[15], name="enrichment", limits=c(0,ceiling(max(top$enrich))))+ coord_flip()+ theme(panel.grid.major = element_line(colour = "grey"), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+ theme(text = element_text(size=8), axis.text.x = element_text(vjust=1,color="black",size=8), axis.text.y = element_text(color="black",size=8), plot.title=element_text(size=10)) }) dev.off() } } print("get up genes and make geneList") up <- deg$padj < adjp & deg$log2FoldChange >= log2(FC) up <- unique(rownames(deg[up,])) up <- toupper(up) all <-unique(names(geneID2GO)) up.geneList <- factor(as.integer(all %in% up)) names(up.geneList) <- all up.setsize <- sum(as.numeric(levels(up.geneList))[up.geneList]) print("setsize for significant genes") up.setsize adjplabel <- gsub("^0\\.","",adjp) comparison <- gsub("\\.tsv$|\\.txt$|\\.rda$|\\.RData$","",degFile) Dir <- sub("$", "/GOterms", dirname(comparison)) if(!(file.exists(Dir))) { dir.create(Dir,FALSE,TRUE) } if (up.setsize >= 2){ print("make GO table for the up genes") go.UP.BP <- runGO(geneList=up.geneList,xx=xx,otype="BP",setName=paste(basename(comparison),"upFC",FC, "adjp", adjp, sep=".")) drawBarplot(go=go.UP.BP,ontology="BP",setName=paste(basename(comparison),"upFC",FC, "adjp", adjp, sep=".")) }else{ up_out = snakemake@output[[1]] write.table('No Significant Genes', file=up_out) } print("get down genes and make geneList") dn <- deg$padj < adjp & deg$log2FoldChange <= -log2(FC) dn <- unique(rownames(deg[dn,])) dn <- toupper(dn) all <-unique(names(geneID2GO)) dn.geneList <- factor(as.integer(all %in% dn)) names(dn.geneList) <- all dn.setsize <- sum(as.numeric(levels(dn.geneList))[dn.geneList]) print("setsize for significant genes") dn.setsize if(dn.setsize >= 2){ print("make GO table for down genes") go.DN.BP <- runGO(geneList=dn.geneList,xx=xx,otype="BP",setName=paste(basename(comparison),"downFC",FC, "adjp", adjp, sep=".")) print("make barplot for down genes") drawBarplot(go=go.DN.BP,ontology="BP",setName=paste(basename(comparison),"downFC",FC, "adjp", adjp, sep=".")) }else{ down_out = snakemake@output[[2]] write.table('No Significant Genes', file=down_out) } |
This pipeline contains the data processing steps used for Thuy Ngo's 2019 cell-free RNA manuscript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | degFile = snakemake@input[['degFile']] assembly <- snakemake@params[['assembly']] FC <- snakemake@params[['FC']] adjp <- snakemake@params[['adjp']] printTree <- snakemake@params[['printTree']] library(GO.db) library(topGO) library(ggplot2) library(RColorBrewer) library(biomaRt) library(GenomicFeatures) library(Rgraphviz) ##----------load differentially expressed genes --------# print("Loading differential expressed gene table") print(degFile) deg <- read.delim(file=degFile,header=TRUE,sep="\t") rownames(deg) <- sub("\\.[0-9]*","",rownames(deg)) ##---------load correct Biomart------------------------# if (assembly == "hg19") { organismStr <- "hsapiens" geneID2GO <- get(load("/home/groups/CEDAR/anno/biomaRt/hg19.Ens_75.biomaRt.GO.geneID2GO.RData")) xx <- get(load("/home/groups/CEDAR/anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "hg38.89") { organismStr <- "hsapiens" ### to get to hg38 mappings ensembl 89! geneID2GO <- get(load("/home/groups/CEDAR/anno/biomaRt/hg38.Ens_89.biomaRt.GO.geneID2GO.RData")) xx <- get(load("/home/groups/CEDAR/anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "hg38.90") { organismStr <- "hsapiens" ### to get to hg38 mappings ensembl 90! geneID2GO <- get(load("/home/groups/CEDAR/anno/biomaRt/hg38.Ens_90.biomaRt.GO.geneID2GO.RData")) xx <- get(load("/home/groups/CEDAR/anno/biomaRt/GO.db.Term.list.rda")) } if (assembly == "mm10") { organismStr <- "mmusculus" ### to get to hg38 mappings ensembl 90! geneID2GO <- get(load("./anno/biomaRt/hg38.Ens_90.biomaRt.GO.external.geneID2GO.RData")) xx <- get(load("./anno/biomaRt/GO.db.Term.list.rda")) } ##-----------------------------------Functions--------------------------------------# runGO <- function(geneList,xx=xx,otype,setName){ setLength <- sum(as.numeric(levels(geneList))[geneList]) fname <- paste(Dir, paste(setName, otype, "GO.txt", sep="_"), sep="/") GOData <- new("topGOdata", ontology=otype, allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) resultFisher <- runTest(GOData, algorithm = "classic", statistic = "fisher")## statistical test for topGO x <- GenTable(GOData, classicFisher=resultFisher, topNodes=length(names(resultFisher@score)))## make go table for all terms x <- data.frame(x) pVal <- data.frame(pval=signif(resultFisher@score, 6)) ## get unrounded pvalue x$enrich <- x$Significant/x$Expected ## calculate enrichment based on what you expect by chance x$p.unround <- pVal[x$GO.ID,"pval"]## put unrounded pvalue in the table x$p.adj <- signif(p.adjust(x$p.unround, method="BH"), 6)## calculate the adjusted pvalue with Benjamini & Hochberg correction x$log.p.adj <- -log10(x$p.adj) ## convert adjusted p value to -log10 for plot magnitude #x$Term.full <- sapply(x$GO.ID, FUN=function(n){Term(xx[[n]])}) ## get the full term name x <- x[order(x$GO.ID),] write.table(x, file=fname, sep="\t", col.names=TRUE, quote=FALSE, row.names=FALSE) ## save the table ## you can print the tree if you want, but since I keep the list of all of them skip if(printTree>0){ printGraph(GOData,## make the tree for the go data resultFisher, firstSigNodes = 5, fn.prefix = sub("_GO.txt$", "", fname), useInfo = "all", pdfSW = TRUE ) } return(x) } ## function to make barplot of -log10 adjusted pvalues colored by enrichment drawBarplot <- function(go, ontology, setName){ go <- go[!go$p.adj > 0.01,] if(nrow(go)>1){ #go$Term.full <- make.unique(paste(sapply(strsplit(as.character(substring(go$Term.full,1,50)), "\\,"), `[`, 1))) go$Term <- make.unique(paste(sapply(strsplit(as.character(substring(go$Term,1,50)), "\\,"), `[`, 1))) print(setName) go <- go[with(go, order(p.adj, -enrich)),] ## Currently there is a discrepency between xx and x, so we only use Term right now, not Term.full #go$Term.full <-factor(paste(go$Term.full), levels=rev(paste(go$Term.full))) ## sort table by adjusted p-value go$Term <-factor(paste(go$Term), levels=rev(paste(go$Term))) ## sort table by adjusted p-value ptitle <- paste(ontology, setName) ## plot title ptitle <- gsub("^.*/","",ptitle) pfname <- paste(setName,ontology,"pdf",sep=".")## name of png file if(nrow(go) < 20 ){ toprange <- 1:nrow(go) }else{ toprange <- 1:20 } top <- go[toprange,] col <- colorRampPalette(c("white","navy"))(16) pdf(file=paste(Dir, pfname, sep="/"),height=5,width=7) print({ p <- ggplot(top, aes(y=log.p.adj, x=Term, fill=enrich)) + ## ggplot barplot function geom_bar(stat="identity",colour="black") + ggtitle(ptitle) + xlab("") + ylab("-log10(fdr)") + scale_fill_gradient(low=col[2], high=col[15], name="enrichment", limits=c(0,ceiling(max(top$enrich))))+ coord_flip()+ theme(panel.grid.major = element_line(colour = "grey"), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+ theme(text = element_text(size=8), axis.text.x = element_text(vjust=1,color="black",size=8), axis.text.y = element_text(color="black",size=8), plot.title=element_text(size=10)) }) dev.off() } } print("get up genes and make geneList") up <- deg$padj < adjp & deg$log2FoldChange >= log2(FC) up <- unique(rownames(deg[up,])) all <-unique(names(geneID2GO)) up.geneList <- factor(as.integer(all %in% up)) names(up.geneList) <- all up.setsize <- sum(as.numeric(levels(up.geneList))[up.geneList]) print("setsize for significant genes") up.setsize adjplabel <- gsub("^0\\.","",adjp) comparison <- gsub("\\.tsv$|\\.txt$|\\.rda$|\\.RData$","",degFile) Dir <- sub("$", "/GOterms", dirname(comparison)) if(!(file.exists(Dir))) { dir.create(Dir,FALSE,TRUE) } if (up.setsize >=2) { print("make GO table for the up genes") ################################# go.UP.BP <- runGO(geneList=up.geneList,xx=xx,otype="BP",setName=paste(basename(comparison),"upFC",FC, "adjp", adjp, sep=".")) #go.UP.MF <- runGO(geneList=up.geneList,xx=xx,otype="MF",setName=paste(comparison,"up",sep=".")) print("make the png for the up genes") drawBarplot(go=go.UP.BP,ontology="BP",setName=paste(basename(comparison),"upFC",FC, "adjp", adjp, sep=".")) } else { up_out = snakemake@output[[1]] write.table('No Significant Genes', file=up_out) } print("get down genes and make geneList") dn <- deg$padj < adjp & deg$log2FoldChange <= -log2(FC) dn <- unique(rownames(deg[dn,])) all <-unique(names(geneID2GO)) dn.geneList <- factor(as.integer(all %in% dn)) names(dn.geneList) <- all dn.setsize <- sum(as.numeric(levels(dn.geneList))[dn.geneList]) print("setsize for significant genes") dn.setsize if(dn.setsize >= 2){ print("make GO table for down genes") go.DN.BP <- runGO(geneList=dn.geneList,xx=xx,otype="BP",setName=paste(basename(comparison),"downFC",FC, "adjp", adjp, sep=".")) print("make barplot for down genes") drawBarplot(go=go.DN.BP,ontology="BP",setName=paste(basename(comparison),"downFC",FC, "adjp", adjp, sep=".")) }else{ down_out = snakemake@output[[2]] write.table('No Significant Genes', file=down_out) } |
Differential Gene Expression using RNA-Seq
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | library(knitr) knitr::opts_chunk$set(echo = TRUE) library(DESeq2) library(ggplot2) library(knitr) library(clusterProfiler) library(biomaRt) library(ReactomePA) library(DOSE) library(KEGG.db) library(org.Mm.eg.db) library(org.Hs.eg.db) library(pheatmap) library(genefilter) library(RColorBrewer) library(GO.db) library(topGO) library(dplyr) library(gage) library(ggsci) |
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 | source("https://bioconductor.org/biocLite.R") biocLite("DESeq2") ; library(DESeq2) biocLite("ggplot2") ; library(ggplot2) biocLite("clusterProfiler") ; library(clusterProfiler) biocLite("biomaRt") ; library(biomaRt) biocLite("ReactomePA") ; library(ReactomePA) biocLite("DOSE") ; library(DOSE) biocLite("KEGG.db") ; library(KEGG.db) biocLite("org.Mm.eg.db") ; library(org.Mm.eg.db) biocLite("org.Hs.eg.db") ; library(org.Hs.eg.db) biocLite("pheatmap") ; library(pheatmap) biocLite("genefilter") ; library(genefilter) biocLite("RColorBrewer") ; library(RColorBrewer) biocLite("GO.db") ; library(GO.db) biocLite("topGO") ; library(topGO) biocLite("dplyr") ; library(dplyr) biocLite("gage") ; library(gage) biocLite("ggsci") ; library(ggsci) |
data / bioconductor
GO.db
A set of annotation maps describing the entire Gene Ontology: A set of annotation maps describing the entire Gene Ontology assembled using data from GO