Workflow Steps and Code Snippets

2 tagged steps and code snippets that match keyword Homo.sapiens

Code used for the manuscript 'Network reconstruction for trans acting genetic loci using multi-omics data and prior information' by Hawe et al., 2022 in Genome Medicine

 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
log <- file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type="message")

# ------------------------------------------------------------------------------
print("Load libraries and source scripts.")
# ------------------------------------------------------------------------------
library(qvalue)
library(data.table)
library(graph)
library(fdrtool)
library(Homo.sapiens)
library(pheatmap)

source("scripts/lib.R")
source("scripts/priors.R")

# ------------------------------------------------------------------------------
print("Get snakemake params.")
# ------------------------------------------------------------------------------
# input
fgene_priors <- snakemake@input[["gg_priors"]]
feqtlgen_eqtl_priors <- snakemake@input[["eqtlgen_eqtl_priors"]]
fgtex_eqtl_priors <- snakemake@input[["gtex_eqtl_priors"]]
fcpgcontext <- snakemake@input[["cpg_context"]]
ftsscontext <- snakemake@input[["tss_context"]]
fppi <- snakemake@input[["ppi"]]
franges <- snakemake@input[["ranges"]]
fcpg_annot <- snakemake@input[["cpg_annot"]]

# params
sentinel <- snakemake@wildcards$sentinel
eqtl_prior_type <- snakemake@params$eqtl_prior_type

# output
fout <- snakemake@output[[1]]
fplot <- snakemake@output[[2]]

# ------------------------------------------------------------------------------
print("Loading data.")
# ------------------------------------------------------------------------------

print("Loading PPI db.")
ppi_db <- readRDS(fppi)

# load ranges object
ranges <- readRDS(franges)

# get all entities as single vector
nodes <- c(sentinel, ranges$snp_genes$SYMBOL,
           ranges$tfs$SYMBOL, ranges$spath$SYMBOL)

if(ranges$seed == "meqtl") {
  nodes <- c(nodes, with(ranges,
                         c(cpg_genes$SYMBOL, names(cpgs))))
} else {
  nodes <- c(nodes, ranges$trans_genes$SYMBOL)
}
nodes <- unique(nodes)

# ------------------------------------------------------------------------------
print("Load gtex priors, extract link priors.")
# ------------------------------------------------------------------------------
feqtl <- ifelse("eqtlgen" %in% eqtl_prior_type, 
                feqtlgen_eqtl_priors, 
                fgtex_eqtl_priors)
load_eqtl_priors(sentinel, feqtl, eqtl_prior_type)
load_genegene_priors(fgene_priors)

# set appropriate context
if(ranges$seed == "meqtl") {
  fcontext <- fcpgcontext
} else {
  fcontext <- ftsscontext
}

print("Annotation context is:")
print(fcontext)

print("Retrieving link priors.")
pr <- get_link_priors(ranges, nodes, ppi_db, fcontext, 
                      fcpg_annot)

# create a heatmap to be able to look at the priors
pheatmap(filename = fplot, pr, cex=0.7)

# ------------------------------------------------------------------------------
print("Saving data.")
# ------------------------------------------------------------------------------
saveRDS(pr, file=fout)
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
log <- file(snakemake@log[[1]], open = "wt")
sink(log)
sink(log, type = "message")

# ------------------------------------------------------------------------------
# Load libraries and source scripts
# ------------------------------------------------------------------------------
suppressPackageStartupMessages(library(qvalue))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(graph))
suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(fdrtool))
suppressPackageStartupMessages(library(Homo.sapiens))
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(FDb.InfiniumMethylation.hg19))
source("scripts/lib.R")
source("scripts/priors.R")

# ------------------------------------------------------------------------------
# Get snakemake params
# ------------------------------------------------------------------------------

# inputs
feqtl <- snakemake@input[["eqtl"]]
fsnpinfo <- snakemake@input[["snpinfo"]]
fexpr <- snakemake@input[["expr"]]
fsampleinfo <- snakemake@input[["sampleinfo"]]
fpheno <- snakemake@input[["pheno"]]
fppi <- snakemake@input[["ppi"]]
dplots <- snakemake@params$plot_dir

# outputs
fout_gene_priors <- snakemake@output$gene_priors
fout_eqtl_priors <- snakemake@output$eqtl_priors

# ------------------------------------------------------------------------------
# Start processing
# ------------------------------------------------------------------------------

print("Loading PPI db.")
ppi_db <- readRDS(fppi)

# simply delegate
create_priors(
  feqtl,
  fsnpinfo,
  fexpr,
  fsampleinfo,
  fpheno,
  dplots,
  ppi_db,
  fout_gene_priors,
  fout_eqtl_priors
)

if (FALSE) {
  # ------------------------------------------------------------------------------
  print("Prepare the Banovich based priors, i.e. TF-CpG priors.")
  # ------------------------------------------------------------------------------
  # methylation data
  meth <-
    fread("data/current/banovich-2017/methylation/full_matrix.txt",
          data.table = F)
  rownames(meth) <- meth$V1
  meth$V1 <- NULL
  cpgs <- features(FDb.InfiniumMethylation.hg19)
  cpgs <- cpgs[rownames(meth)]

  # expression data
  expr <-
    read.table(
      "data/current/banovich-2017/xun_lan/allTFexp.withHeader",
      header = T,
      sep = "\t",
      stringsAsFactors = F
    )

  # apparently the table contains duplicated entries, remove them
  expr <- expr[!duplicated(expr), ]
  rownames(expr) <- unique(expr[, 1])

  samples <- intersect(colnames(expr), colnames(meth))

  expr <- t(expr[, samples])
  meth <- t(meth[, samples])

  # ------------------------------------------------------------------------------
  print("Get (our) chip-seq context for the cpgs.")
  # ------------------------------------------------------------------------------
  tfbs_ann <- get_tfbs_context(names(cpgs), fcpgcontext)

  # ------------------------------------------------------------------------------
  print("For each TF, get the correlation to each of the CpGs it is bound nearby")
  # ------------------------------------------------------------------------------
  pairs <- lapply(colnames(expr), function(tf) {
    # get columns for tf
    sub <-
      tfbs_ann[, grepl(tf, colnames(tfbs_ann), ignore.case = T), drop = F]
    rs <- rowSums(sub)
    bound_cpgs <- names(rs[rs > 0])

    assoc <- unlist(mclapply(bound_cpgs, function(c) {
      cor.test(expr[, tf],
               meth[, c],
               method = "pearson")$p.value
    }, mc.cores = threads))

    cbind.data.frame(
      TF = rep(tf, length(assoc)),
      CpG = bound_cpgs,
      rho = assoc,
      stringsAsFactors = F
    )
  })

  # ------------------------------------------------------------------------------
  print("Collect and finalize results.")
  # ------------------------------------------------------------------------------
  tab <- do.call(rbind, pairs)
  colnames(tab) <- c("TF", "CpG", "pval")
  tab$qval <- qvalue(tab$pval)$lfdr
  tab$prior <- 1 - tab$qval
  head(tab)
  write.table(
    file = "results/current/tf-cpg-prior.txt",
    sep = "\t",
    col.names = NA,
    row.names = T,
    quote = F,
    tab
  )
}
# ------------------------------------------------------------------------------
print("Session info:")
# ------------------------------------------------------------------------------
sessionInfo()
data / bioconductor

Homo.sapiens

Annotation package for the Homo.sapiens object: Contains the Homo.sapiens object to access data from several related annotation packages.