Advantages of KCCA-Based GRN Construction in Propanet

public public 1yr ago 0 bookmarks

Propanet with KCCA GRN

Advantages of KCCA base GRN construction

  • Complex relationship of multiple transcription factors (TFs) regulating multiple target genes (TGs) are modeled

  • Different combinations of co-workin

Code Snippets

24
25
shell:
    "python src/6_make_result.py {params.result_dir} --kegg {params.kegg}" 
SnakeMake From line 24 of main/Snakefile
40
41
42
shell:
    "python src/5_extract_target_genes.py --nwk {input.nwk} --deg {input.DEGli} --tf {params.tf_li_file} \
    --tfrank {input.tfranktrim} --out {params.output_dir}/subnetwork.{wildcards.i}"
SnakeMake From line 40 of main/Snakefile
66
67
68
69
70
shell:
    "python src/4_main_propanet.py -TFliFile {params.tf_li_file} \
    -nwkFile {params.temp}/{params.prefix}.tp -expFile {params.exp_file} \
    -binFile {params.seed_file} -N {params.n_sample} -p {params.n_threads} \
    -cond {params.prefix} -outD {params.temp}"
SnakeMake From line 66 of main/Snakefile
86
87
shell:
    "python src/3_network_weight.py -nwk {input} -exp {params.exp_file} -o {output} -p {params.n_threads}"
SnakeMake From line 86 of main/Snakefile
 96
 97
 98
 99
100
101
102
103
104
105
run:
    import pickle as pkl

    with open(input, "rb") as f:
        data = pkl.load(f)

    with open(output, "w") as f:
        for key, val in data.items():
            for start, end in val:
                f.write(f"{start}\t{end}\n")
SnakeMake From line 96 of main/Snakefile
127
128
129
130
131
132
shell:
    "python src/2_GRN_inference.py {input.template_network} \
    {input.gene_exp_profile} {input.deg_profile} \
    {output} -TFli {input.tf_list} -TFmodule {input.dict_cluster_TF} \
    -TGmodule {input.dict_cluster_TG} -nComp {params.n_comp} -thr {params.edge_thr} \
    -reg {params.reg} -nThreads {params.nThreads} {params.norm}"
SnakeMake From line 127 of main/Snakefile
139
140
141
142
143
144
run:
    import pandas as pd
    import pickle
    df_TG_community_groupBy = pd.read_csv(input.tsv,sep='\t')
    with open(output,'wb') as f:
        pickle.dump(df_TG_community_groupBy.groupby('community')['gene'].apply(list).to_dict(),f)
152
153
shell:
    "Rscript src/1_clustering.R {input.inst_nwk_TF} {output.cluster} {output.edgelist}"
SnakeMake From line 152 of main/Snakefile
164
165
166
shell:
    "python src/0_instantiate_nwk.py {input.template_nwk} \
    {input.gene_exp} -corrCut {params.corrCut} -o {output.inst_nwk} -nThreads {params.n_threads}"  
SnakeMake From line 164 of main/Snakefile
176
177
178
179
180
181
182
183
184
185
186
187
run:
    import pandas as pd
    import numpy as np

    df_TF = pd.read_csv(input.TF_li,sep='\t')
    df_nwk = pd.read_csv(input.PPI,sep='\t') 

    df_nwk_TF = df_nwk.loc[lambda x:np.logical_and(x.protein1.isin(df_TF.Name),x.protein2.isin(df_TF.Name))]
    df_nwk_TF.to_csv(output.PPI_TF,sep='\t',index=False)

    df_nwk_TG = df_nwk.loc[lambda x:np.logical_and(~x.protein1.isin(df_TF.Name),~x.protein2.isin(df_TF.Name))]
    df_nwk_TG.to_csv(output.PPI_TG,sep='\t',index=False)
197
198
199
200
201
202
run:
    import pandas as pd
    df = pd.read_csv(input.deg_binary, sep = "\t", index_col = 0)

    for i in range(0, n_timepoints):
        df[df.columns[i]].to_csv(join(intermediate_results_dir, "/timepoints/deg_profile.tp") + str(i+1), sep = "\t")
213
214
215
216
217
run:
    import pandas as pd

    df = pd.read_csv(input.exp_file, sep = "\t", index_col = 0)
    for i, sample in enumeerate(range(params.n_replicates - 1, params.n_timepoints, params.n_replicates)):
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd
import argparse
import numpy as np
import time
import sys
from multiprocessing import Pool
from scipy.stats.stats import pearsonr

def isNum(x):
    try:
        float(x)
        return True
    except:
        return False

def corrCut(nwk,cutoff=None):
    '''correlation cutoff, positive sorting'''
    nwk.dropna(subset=['correlation'],inplace=True)
    nwk.sort_values(by=['correlation'],inplace=True,ascending=False)
    if cutoff!=None: 
        return nwk.loc[lambda x:x.correlation>=cutoff]
    else:
        return nwk

def setMinExp(nwk,exp,expCut):
    '''remove gene whose expression is lower than expCut'''
    filtered_gene = exp[exp.max(axis=1)>1]['Hybridization REF']
    boolean_mask = np.logical_and(nwk['protein1'].isin(filtered_gene),nwk['protein2'].isin(filtered_gene))
    return nwk[boolean_mask]

def expCut(nwk,exp,sample_list,expCut):
    '''remove gene whose mean of group(mutated/not-mutated) expression is lower than expCut'''
    with open(sample_list) as f:
        mutSamples=f.read().strip().split()
    exp['no_mut']=exp.loc[:,~exp.columns.isin(mutSamples)].mean(axis=1)
    exp['mut']=exp.loc[:,exp.columns.isin(mutSamples)].mean(axis=1)
    boolean_mask = np.logical_or(exp['no_mut']>=1,exp['mut']>=1)
    gene_selected = exp[boolean_mask]['Hybridization REF']
    boolean_mask2 = np.logical_and(nwk['protein1'].isin(gene_selected),nwk['protein2'].isin(gene_selected))
    return nwk[boolean_mask2]

def FCcut(nwk,FC_df,FCcut):
    keys = FC_df.iloc[:,0]
    values = FC_df.iloc[:,1]
    dictionary = dict(zip(keys, values))
    dictionary.pop('?','Not Found')
    first_col = np.array([dictionary[i] for i in nwk.iloc[:,0]])
    second_col = np.array([dictionary[i] for i in nwk.iloc[:,1]])
    boolean_mask = np.logical_and(abs(first_col) >= FCcut, abs(second_col) >= FCcut)
    boolean_mask2 = nwk['protein1'].apply(lambda x: dictionary[x])*nwk['protein2'].apply(lambda x: dictionary[x])>0
    bigFC_nwk = nwk[boolean_mask & boolean_mask2]
    return bigFC_nwk

if __name__=="__main__":
    parser = argparse.ArgumentParser(description='python 0_instantiate_nwk.py nwkFile exp -corrCut [] -nThreads [] -o []')
    parser.add_argument('nwk',help='network')
    parser.add_argument('exp',help='exp File')
    parser.add_argument('-corrCut',type=float, required=False,help='corealtion cutoff')
    parser.add_argument('-nThreads',type=int, required=False,default=1)
    parser.add_argument('-o',required=True,help='output')
    args=parser.parse_args()

    ####correaltion score combined with string score
    start=time.time()
    exp=pd.read_csv(args.exp,sep='\t',header=0,index_col=0)

    #remove duplicates
    data=[]
    with open(args.nwk) as f:
        for line in f.readlines():
            tmp=line.strip().split('\t')
            data.append(sorted(tmp))

    df_nwk=pd.DataFrame(data[1:],columns=['Gene_A','Gene_B'],dtype=float)

    df_nwk.drop_duplicates(subset=['Gene_A','Gene_B'],inplace=True)
    df_nwk_filt = df_nwk.loc[lambda x:np.logical_and(x.Gene_A.isin(exp.index),x.Gene_B.isin(exp.index))].loc[lambda x:x.Gene_A != x.Gene_B]

    #make exp dictionary to calculate correlation
    lst_exps=dict() 
    with open(args.exp) as f:
        lines=f.readlines()
    for line in lines:
        s=line.strip().split('\t')
        if not isNum(s[1]):
            continue
        else:
            gene, exps = s[0], list(map(float,s[1:]))
            lst_exps[gene]=exps
    lst_pairs2=zip(df_nwk_filt['Gene_A'],df_nwk_filt['Gene_B'])

    def myCorr(x):
        g1,g2=sorted(x)
        if np.all(np.array(lst_exps[g1])==lst_exps[g1][0]) or np.all(np.array(lst_exps[g2])==lst_exps[g2][0]):
            val,pval=(0,1)
        else:
            val, pval = pearsonr(lst_exps[g1],lst_exps[g2])
        return (g1,g2,val)

    p = Pool(args.nThreads)
    res2=p.imap_unordered(myCorr, lst_pairs2)
    p.close()
    p.join()

    corr_res2=[]
    for g1,g2,val in res2:
        if g1==g2:
            continue
        corr_res2.append([g1,g2,val])

    df_nwk_corr=pd.DataFrame(corr_res2,columns=['Gene_A','Gene_B','correlation'])
    df_nwk_corrCut=corrCut(df_nwk_corr,args.corrCut)

    end=time.time()
    time_elapsed=end-start
    df_nwk_corrCut.to_csv(args.o,sep='\t',header=True,index=False)
    print(args.o, 'time_elapsed', time_elapsed)
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
library(igraph)
library(dplyr)

args = commandArgs(trailingOnly=TRUE)
file_name = args[1] 
out_file = args[2] 
out_file2 = args[3]
## Data import and cleaning
print('START')
parsed_genes = read.csv(file_name, sep='\t',header=TRUE,stringsAsFactors = FALSE)

## Creating igraph object

edgelist = as.matrix(parsed_genes[,1:2])
g = graph.edgelist(edgelist, directed=FALSE)

## Community detection : Multilevel algorithm
mlc <- multilevel.community(g)

## Writing the results to a file

mlc_community_list <- as.data.frame(as.matrix(membership(mlc)))
mlc_community_list$gene <- rownames(mlc_community_list)
mlc_community_member <- arrange(mlc_community_list, V1) %>%
  select(gene, V1)
colnames(mlc_community_member)[2] <- 'community'

community_info = mlc_community_member

# Lookup Table
vals <- community_info[,2]
keys <- community_info[,1]
lookup <- setNames(vals, keys)

index = which(lookup[parsed_genes[,1]] != lookup[parsed_genes[,2]])
filtered_edgelist <- parsed_genes[-index,]

write.table(mlc_community_member, row.names=FALSE, col.names=TRUE, file=out_file, sep='\t', quote=FALSE)
write.table(filtered_edgelist, row.names=FALSE, col.names=TRUE, file=paste(out_file2,sep=''), sep='\t',quote=FALSE)
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import itertools
import pickle
import argparse
import operator

import pickle
from multiprocessing import Pool

import networkx as nx
import numpy as np
import pandas as pd
from scipy.linalg import norm, eigh
from scipy.stats import fisher_exact
from sklearn.preprocessing import MinMaxScaler

from utils import rcca

def weighted_kcca(data, reg=0.5, numCC=None, kernelcca=True, ktype='linear', gausigma=1.0, degree=2):
    """Set up and solve the kernel CCA eigenproblem"""
    if kernelcca:
        kernel = [rcca._make_kernel(d, ktype=ktype, gausigma=gausigma, degree=degree) for d in data]
    else:
        kernel = [d.T for d in data]

    nDs = len(kernel)
    nFs = [k.shape[0] for k in kernel]
    numCC = min([k.shape[1] for k in kernel]) if numCC is None else numCC

    # Get the auto- and cross-covariance matrices
    crosscovs = [np.dot(ki, kj.T) for ki in kernel for kj in kernel]

    # Allocate left-hand side (LH) and right-hand side (RH):
    LH = np.zeros((sum(nFs), sum(nFs)))
    RH = np.zeros((sum(nFs), sum(nFs)))

    # Fill the left and right sides of the eigenvalue problem
    for i in range(nDs):
        RH[sum(nFs[:i]) : sum(nFs[:i+1]),
           sum(nFs[:i]) : sum(nFs[:i+1])] = (crosscovs[i * (nDs + 1)]
                                             + reg * np.eye(nFs[i]))

        for j in range(nDs):
            if i != j:
                LH[sum(nFs[:j]) : sum(nFs[:j+1]),
                   sum(nFs[:i]) : sum(nFs[:i+1])] = crosscovs[nDs * j + i]

    LH = (LH + LH.T) / 2.
    RH = (RH + RH.T) / 2.

    maxCC = LH.shape[0]
    r, Vs = eigh(LH, RH, eigvals=(maxCC - numCC, maxCC - 1))
    r[np.isnan(r)] = 0
    rindex = np.argsort(r)[::-1]
    comp = []
    Vs = Vs[:, rindex]
    for i in range(nDs):
        comp.append(Vs[sum(nFs[:i]):sum(nFs[:i + 1]), :numCC])
    return np.sort(r)[::-1], comp

class _CCABase(rcca._CCABase):
    def __init__(self, numCV=None, reg=None, regs=None, numCC=None,
                 numCCs=None, kernelcca=True, ktype=None, verbose=False,
                 select=0.2, cutoff=1e-15, gausigma=1.0, degree=2):
        self.numCV = numCV
        self.reg = reg
        self.regs = regs
        self.numCC = numCC
        self.numCCs = numCCs
        self.kernelcca = kernelcca
        self.ktype = ktype
        self.cutoff = cutoff
        self.select = select
        self.gausigma = gausigma
        self.degree = degree
        if self.kernelcca and self.ktype == None:
            self.ktype = 'linear'
        self.verbose = verbose

    def train(self, data):
        nT = data[0].shape[0]
        if self.verbose:
            print('Training CCA, kernel = %s, regularization = %0.4f, '
                  '%d components' % (self.ktype, self.reg, self.numCC))

        eigenV, comps = weighted_kcca(data, self.reg, self.numCC, kernelcca=self.kernelcca, ktype=self.ktype, gausigma=self.gausigma, degree=self.degree)
        ###weighted sample weight, weighted by eigenvalue
        comps = [np.array([eigenV]*nT)*comps[i] for i in range(len(data))]

        self.cancorrs, self.ws, self.comps = rcca.recon(data, comps, kernelcca=self.kernelcca)
        if len(data) == 2:
            self.cancorrs = self.cancorrs[np.nonzero(self.cancorrs)]
        return self

class weighted_CCA(_CCABase):
    """Attributes:
        reg (float): regularization parameter. Default is 0.1.
        numCC (int): number of canonical dimensions to keep. Default is 10.
        kernelcca (bool): kernel or non-kernel CCA. Default is True.
        ktype (string): type of kernel used if kernelcca is True.
                        Value can be 'linear' (default) or 'gaussian'.
        verbose (bool): default is True.
    Returns:
        ws (list): canonical weights
        comps (list): canonical components
        cancorrs (list): correlations of the canonical components
                         on the training dataset
        corrs (list): correlations on the validation dataset
        preds (list): predictions on the validation dataset
        ev (list): explained variance for each canonical dimension
    """
    def __init__(self, reg=0.5, numCC=10, kernelcca=True, ktype=None,
                 verbose=False, cutoff=1e-15):
        super(weighted_CCA, self).__init__(reg=reg, numCC=numCC, kernelcca=kernelcca,
                                  ktype=ktype, verbose=verbose, cutoff=cutoff)

    def train(self, data):
        return super(weighted_CCA, self).train(data)

def kcca_embedding(TF_exp, TG_exp, normalize, n_comp=1, kernel='gaussian', reg = 0.5):
    kcca = weighted_CCA(kernelcca=True, ktype=kernel, reg = reg, numCC = n_comp) 
    kcca.train([TF_exp.to_numpy(), TG_exp.to_numpy()])

    #print('x_weight_dim', kcca.ws[0].shape)
    #print('y_weight_dim', kcca.ws[1].shape)

    TF_embed = pd.DataFrame(kcca.ws[0], index=TF_exp.columns)
    TG_embed = pd.DataFrame(kcca.ws[1], index=TG_exp.columns)

    if normalize == True:
        TF_embed_norm = TF_embed.apply(lambda x:x/norm(x), axis=1).fillna(0) #L2 norm
        TG_embed_norm = TG_embed.apply(lambda x:x/norm(x), axis=1).fillna(0) #L2 norm
        return (kcca, TF_embed_norm, TG_embed_norm)
    else:
        return (kcca, TF_embed, TG_embed)

def dotProduct(instance1, instance2):
    res = 0
    for x in range(len(instance1)):
        res += instance1[x] * instance2[x]
    return res


def getNeighbors(instance):
    '''Generate k-nearest neighbors of testInstance among trainingSet
    =================================================================
    '''
    test, idx1, trainingSet = instance
    testInstance = test.loc[idx1,:]
    li_distances = []
    length = len(testInstance)-1
    for i in range(len(trainingSet)):
        #dist = euclideanDistance(testInstance, trainingSet.iloc[i])
        #dist = correlation(testInstance, trainingSet.iloc[i])
        dist = dotProduct(testInstance, trainingSet.iloc[i])
        li_distances.append((trainingSet.index[i], dist))
    #sort list by the second item in each element from list
    #e.g.) li_distances = [(tg1,dist), (tg2,dist)]
    li_distances.sort(key = operator.itemgetter(1),reverse=True)
    dict_res = dict()
    dict_res[idx1] = li_distances
    #e.g.) dict_res = {tf1:[(tg1,dist),(tg2,dist),(tg3,dist)]}
    return dict_res

def inp_pair(df1,df2):
    li1 = set(df1.index)
    for tf in li1:
        yield (df1,tf,df2)

def TFTG_nwk(df_TF, df_TG):
    outs = {}
    for tf in set(df_TF.index):
        inst = (df_TF,tf,df_TG) 
        outs.update(getNeighbors(inst))
    return outs

def inp_pair_modularized_TFTG_nwk(nx_graph, dict_tf_comm, dict_tg_comm, edge_cutoff, df_exp, n_comp, reg, normalize):
    for comID_tf,comID_tg in itertools.product(dict_tf_comm.keys(),dict_tg_comm.keys()):
        yield (nx_graph, dict_tf_comm, dict_tg_comm, comID_tf, comID_tg, edge_cutoff, df_exp, n_comp, reg, normalize)

def mergeDicts(iter_dicts):
    dict_all=dict()
    for each_dict in iter_dicts:
        dict_all.update(each_dict)
    return dict_all

def modularized_TFTG_nwk(instance):
    def all_tftg_candidates(nx_obj, tfs, tgs):
        tfs_common, tgs_common = set(tfs).intersection(nx_obj.nodes()), set(tgs).intersection(nx_obj.nodes())
        dict_cent_allnodes=nx.betweenness_centrality_subset(nx_obj,tfs_common,tgs_common)
        set_nodes = set(pd.DataFrame.from_dict(dict_cent_allnodes,orient='index',columns=['paths']).loc[lambda x:x.paths !=0].index.to_list()) ##########
        nx_obj = nx_obj.subgraph(set_nodes.union(tfs).union(tgs)) ###########
        dict_tftgs = nx.to_dict_of_lists(nx_obj)
        dict_tftgs_filtered ={}
        for key in dict_tftgs:
            if len(dict_tftgs[key])==0:
                continue
            else:
                dict_tftgs_filtered[key] = dict_tftgs[key]
        return nx_obj, dict_tftgs_filtered

    def addEdges(dict_pair):
        li_res = []
        for tf in dict_pair.keys():
            for tg,dist in dict_pair[tf]:
                li_res.append((tf,tg,dist))
        return li_res

    def filterEdges(dict_pair,cutoff):
        dict_res = {}
        for tf in dict_pair.keys():
            dict_res[tf] = [(dist) for tg,dist in dict_pair[tf] if dist > cutoff]
        return dict_res

    def list2dict(li_pair):
        dict_res = {}
        for tf,tg in li_pair:
            if tf in dict_res:
                dict_res[tf].append(tg)
            else:
                dict_res[tf]=[tg]
        return dict_res

    def rearrange_dict(dict1):
        dict2 = {}
        for i1 in dict1:
            for i2,i3 in dict1[i1]:
                dict2[(i1,i2)]=i3
        return dict2

    nx_graph, dict_tf_comm, dict_tg_comm, comID_tf, comID_tg, edge_cutoff, df_exp, n_comp, reg, normalize = instance[0], instance[1], instance[2], instance[3], instance[4], instance[5], instance[6], instance[7], instance[8], instance[9]
    res = {}
    # print('TF_cluster: {}, \t TG_cluster:{}'.format(comID_tf,comID_tg))
    nx_graph_tmp, dict_tftg_candidates_tmp = all_tftg_candidates(nx_graph,dict_tf_comm[comID_tf],dict_tg_comm[comID_tg])
    if len(nx_graph_tmp.edges)==0:
        res[(comID_tf,comID_tg)] = []
        return res   
    else:    
        df_graph_tmp = pd.DataFrame(nx_graph_tmp.edges,columns=['TF','TG'])
    ### detecting valid TFTG pair with kCCA
    set_allNodes = set(nx_graph_tmp.nodes())
    set_outDegreeNodes = set(df_graph_tmp.loc[:,'TF'].to_list())
    set_visited = set()
    TFs = set(dict_tf_comm[comID_tf]).intersection(set_outDegreeNodes)
    set_visited.update(TFs)
    TGs = []
    for TF in TFs:
        if TF in dict_tftg_candidates_tmp:
            TGs.extend(dict_tftg_candidates_tmp[TF])
    TGs = set(TGs).intersection(set_allNodes)        
    if len(TGs) == 0:
        res[(comID_tf,comID_tg)] = []
        return res
    kcca, TF_embed, TG_embed = kcca_embedding(df_exp.loc[:,df_exp.columns.isin(TFs)],df_exp.loc[:,df_exp.columns.isin(TGs)],normalize=normalize,n_comp=n_comp,reg=reg)
    df_TFTG_pair_final = pd.DataFrame()
    dict_TFTG_pair = rearrange_dict(TFTG_nwk(TF_embed,TG_embed))
    if len(dict_TFTG_pair)==0:
        res[(comID_tf,comID_tg)] = []
        return res
    else:
        df_TFTG_pair = pd.DataFrame.from_dict(dict_TFTG_pair,orient='index',columns=['weight_kCCA'])
    li_TFTG_pair = list(set(nx_graph_tmp.edges()).intersection(set(df_TFTG_pair.index.to_list()))) ######
    df_TFTG_pair = df_TFTG_pair.loc[li_TFTG_pair]
    df_TFTG_pair_final = df_TFTG_pair
    df_TFTG_pair_2nd = df_TFTG_pair
    while True:
        TFs_2nd = set([tg for tf,tg in df_TFTG_pair_2nd.index.to_list()]).intersection(set_outDegreeNodes)
        set_visited.update(TFs_2nd)
        TGs_2nd = set(sum([dict_tftg_candidates_tmp[TF] for TF in TFs_2nd if TF in dict_tftg_candidates_tmp.keys()],[]))-set_visited
        if len(TGs_2nd) == 0:
            break
        else:
            kcca_2nd, TF_embed_2nd, TG_embed_2nd = kcca_embedding(df_exp.loc[:,df_exp.columns.isin(TFs_2nd)],df_exp.loc[:,df_exp.columns.isin(TGs_2nd)],normalize=normalize,n_comp=n_comp,reg=reg)
            dict_TFTG_pair_2nd = rearrange_dict(TFTG_nwk(TF_embed_2nd,TG_embed_2nd))
            if len(dict_TFTG_pair_2nd)==0:
                break 
            else:
                df_TFTG_pair_2nd = pd.DataFrame.from_dict(dict_TFTG_pair_2nd,orient='index',columns=['weight_kCCA'])
            li_TFTG_pair_2nd = list(set(nx_graph_tmp.edges()).intersection(set(df_TFTG_pair_2nd.index.to_list()))) 
            if len(li_TFTG_pair_2nd) == 0:
                break
            df_TFTG_pair_2nd = df_TFTG_pair_2nd.loc[li_TFTG_pair_2nd]
            df_TFTG_pair_final = pd.concat([df_TFTG_pair_final,df_TFTG_pair_2nd],axis=0)
            li_TFTG_pair += li_TFTG_pair_2nd

#####################################################################################################################################

    if df_TFTG_pair_final.shape[0]!=0:
        scaler=MinMaxScaler()
        df_TFTG_pair_final['weight_kCCA_rescaled'] = scaler.fit_transform(df_TFTG_pair_final['weight_kCCA'].values.reshape(-1,1))
        #######edge_cutoff with rescaled edge weight 
        li_TFTG_pair_final = df_TFTG_pair_final.loc[lambda x:x['weight_kCCA_rescaled'] > edge_cutoff].index.to_list() 
        res[(comID_tf,comID_tg)] = li_TFTG_pair_final

#####################################################################################################################################
    else:
        res[(comID_tf,comID_tg)] = []

    with open('./report.pkl', 'wb') as f:
        pickle.dump(res, f) 

    return res 

def all_tftg_candidates(instance):
    nx_obj, dict_tf_comm, dict_tg_comm, tf_id, tg_id = instance[0], instance[1], instance[2], instance[3], instance[4]
    tfs, tgs = dict_tf_comm[tf_id], dict_tg_comm[tg_id]
    tfs_common, tgs_common = set(tfs).intersection(nx_obj.nodes()), set(tgs).intersection(nx_obj.nodes())
    dict_cent_allnodes=nx.betweenness_centrality_subset(nx_obj,tfs_common,tgs_common)
    set_nodes = set(pd.DataFrame.from_dict(dict_cent_allnodes,orient='index',columns=['paths']).loc[lambda x:x.paths !=0].index.to_list())
    nx_obj = nx_obj.subgraph(set_nodes.union(tfs).union(tgs)) ###########
    dict_res = {}
    li_li_edges = pd.DataFrame(nx_obj.edges).values.tolist()
    set_tup_edges = [(i1,i2) for i1,i2 in li_li_edges]
    dict_res[(tf_id,tg_id)] = set_tup_edges
    return dict_res

def inp_pair_all_tftg_candidates(nx_GRN,dict_cluster_1,dict_cluster_2):
    for tf_id,tg_id in itertools.product(dict_cluster_1.keys(),dict_cluster_2.keys()):
        yield (nx_GRN,dict_cluster_1,dict_cluster_2,tf_id,tg_id)

def edgelist2nodeset(dict_edges):
    dict_nodeSet = {}
    for key in dict_edges:
        dict_nodeSet[key] = set(np.array(dict_edges[key]).flatten())
    return dict_nodeSet

def fisher_exact_test(dict_nodeSet, set_ref, nodeSet):
    dict_jaccard = {}
    set_ref = set_ref.intersection(nodeSet)
    for key in dict_nodeSet:
        if len(set(dict_nodeSet[key])) ==0:
            continue
        numDEGs = len(set(dict_nodeSet[key]).intersection(set_ref))
        coeff, pval= fisher_exact([[numDEGs,len(set_ref)-numDEGs],[len(dict_nodeSet[key])-numDEGs, len(nodeSet)-len(set_ref)+numDEGs]])
        dict_jaccard[key] = pval
    return dict_jaccard    


def GRN_inference(nx_GRN, df_exp, li_deg, tfModule, tgModule, nComp, thr, reg, normalize, nThreads=20):
    df_exp_filt=df_exp.loc[:,set(df_exp.columns.to_list()).intersection(set(li_deg))]

    set_nodes_tmp = set()
    for node in df_exp_filt.columns.to_list():
        if node in nx_GRN.nodes:
            set_nodes_tmp = set_nodes_tmp.union(node)
            set_nodes_tmp = set_nodes_tmp.union(set(nx_GRN.neighbors(node)))
    numDEGs = len(set_nodes_tmp.intersection(set(li_deg)))
    numTotal = len(set_nodes_tmp)
    nx_GRN_tmp = nx_GRN.subgraph(set_nodes_tmp).copy()
    tf_filt_keys = [key for key in dict_tf_community if len(set(nx_GRN_tmp.nodes()).intersection(set(dict_tf_community[key])))>1]
    tg_filt_keys = [key for key in dict_tg_community if len(set(nx_GRN_tmp.nodes()).intersection(set(dict_tg_community[key])))>1]
    dict_tf_community_filt = {}
    dict_tg_community_filt = {}

    for key in tf_filt_keys:
        dict_tf_community_filt[key] = dict_tf_community[key]
    for key in tg_filt_keys:
        dict_tg_community_filt[key] = dict_tg_community[key]

    inps = inp_pair_modularized_TFTG_nwk(nx_GRN_tmp, dict_tf_community_filt, dict_tg_community_filt, thr, df_exp, nComp, reg, normalize)

    with Pool(nThreads) as p:
        outs = p.map(modularized_TFTG_nwk,inps)


    return mergeDicts(outs)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='python 2_GRN_inference.py nwk_GRN exp deg out -TFli [TF cataolog] -TFmodule [] -TGmodule [] -nComp [# components] -thr [thr] -nThreads [] -normalize []')
    parser.add_argument('nwk_GRN',help="GRN to be utilized as a guide TF-TG relation")
    parser.add_argument('exp',help='exp profile of genes')
    parser.add_argument('deg',help='deg list of genes')
    parser.add_argument('out',help='out file name')
    parser.add_argument('-TFli',required=True, help="TF catalog")
    parser.add_argument('-TFmodule',required=True)
    parser.add_argument('-TGmodule',required=True)
    parser.add_argument('-nComp',required=True, type=int, help='# components')
    parser.add_argument('-thr', required=True, type=float, help='')
    parser.add_argument('-reg',type=float)
    parser.add_argument('-nThreads',required=False, type=int)
    parser.add_argument('-normalize',required=False, default=True, action='store_true')
    args = parser.parse_args()

    df_tfs = pd.read_csv(args.TFli,sep='\t')
    li_TFs = df_tfs.iloc[:,0].to_list()
    with open(args.TFmodule,'rb') as f:
        dict_tf_community =pickle.load(f)

    with open(args.TGmodule,'rb') as f:
        dict_tg_community =pickle.load(f)

    GRN = pd.read_csv(args.nwk_GRN,sep='\t')
    GRN = nx.from_pandas_edgelist(GRN,'TF','TG',create_using=nx.DiGraph())

    exp = pd.read_csv(args.exp,sep='\t',index_col=0 ).T 

    deg = pd.read_csv(args.deg,sep='\t',names=['gene']).iloc[:,0].to_list()
    dict_res = GRN_inference(GRN,exp,deg,dict_tf_community,dict_tg_community,args.nComp,args.thr,args.reg,args.normalize,args.nThreads)

    with open(args.out,'wb') as f:
        pickle.dump(dict_res, f)        
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
import sys
import os

import pandas as pd
import numpy as np

from tqdm import tqdm

from scipy.stats import pearsonr
from multiprocessing import Pool, Manager

def calculate_corr(x):
    g1, g2 = x

    try:
        exp1, exp2 = gene_exp.loc[g1, :], gene_exp.loc[g2, :]
    except KeyError:
        print(g1, g2)

    corr, _ = corr_function(exp1, exp2)

    q.put((g1, g2, corr))
    return  

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='python %(prog)s -nwk nwkFile -exp expFile -o out')
    parser.add_argument('-nwk', required=True, help='Network file')
    parser.add_argument('-exp', required=True, help='gene expression File')
    parser.add_argument('-o', required=True, help = "output file")
    parser.add_argument('-p', required=True, help="number of threads")
    args = parser.parse_args()

    with open(args.nwk) as f:
        edge_list = list(map(lambda x: x.strip().split(), f.readlines()))

    m = Manager()
    q = m.Queue()

    gene_exp = pd.read_csv(args.exp, sep = "\t", index_col=0)
    corr_function = pearsonr

    print("calculate correlation coefficent...")
    print(len(edge_list))

    with Pool(processes=int(args.p)) as pool:
        with tqdm(total=len(edge_list)) as pbar:
            for _ in pool.imap_unordered(calculate_corr, edge_list):
                pbar.update()

    with open(args.o, 'w') as fw:
        while not q.empty():
            start, end, corr = q.get()
            fw.write('\t'.join([start, end, str(corr)])+'\n')

    print('end...')
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import pandas as pd
import numpy as np
import os
import argparse
import networkx as nx
import time
from tqdm import tqdm
import utils.propagation as NP
import utils.target_gene as TG 

def infByNode(TF, g, TFset) :
    visited = set()
    profit = 0
    stack = []
    stack.append(TF)
    while len(stack)>0:
        v = stack.pop()
        if v not in visited :
            visited.add(v)
            if v not in TFset: profit += abs(g.nodes[v]['weight'])
            for s in g.successors(v) :
                if s not in visited:
                    stack.append(s)
    visitedTGs = visited - TFset
    return profit, len(visited)-1, len(visitedTGs) 

def IM(nwk,TFset,repeat) :
    '''
    Influence Maximization
    ==============================
    Input 
        nwk (nx.object)
        TFset (set)
        repeat (int) 
    Output
        infNo (dict): reachability for each TF
        TFRank (list): sorted TF list
    '''

    infNo = {}
    for n in TFset :
        infNo[n]=0.0
    for _ in tqdm(range(repeat)):
        # Produce G'
        g = nwk.copy()
        #for (a,b) in network.edges() :
        #    if np.random.randint(1,1000)>abs(g[a][b]['weight']*1000) :
        #        g.remove_edge(a, b)
                #Calculate influcence (profit)
        for TF in TFset :
            profit, lenInf, _ = infByNode(TF, g, TFset)
            #print TF,profit,lenInf,lenInfTG
            if lenInf>0 and not np.isnan(profit/float(lenInf)) : infNo[TF] += profit/float(lenInf)
    for n in TFset:
        infNo[n]=infNo[n]/float(repeat)
    TFRank = sorted(infNo.keys(), key=lambda x: infNo[x], reverse=True)
    for key in infNo.keys() :
        if (infNo[key]==0.0) : TFRank.remove(key)
    return TFRank, infNo

def TF_adding_NP(DEGli, geneSet, TFli, TFrankFile, DEGnetFile, seed, coverNo=200, coverage=None):
    '''
    Trim TF list with improving correlation using Network propagation    
    =================================================================
    Input
        DEGli (li)
        geneSet (set)
        TFli (li) 
        TFrankFile (str)
        DEGnetFile (str)
        seed (pd.DataFrame)
        coverNo (int)
        coverage (float)           
    Output
        spearmanLi (li)
        cover (li)
        TF_trimmed (li)
        lst_node_weight (li)
    '''
    DEGnet = nx.read_edgelist(DEGnetFile,data=(('weight',float),),create_using=nx.DiGraph())
    TFset=set(TFli)
    nodeCnt=len(set(DEGnet.nodes())-TFset)
    TFli_rank=pd.read_csv(TFrankFile,sep='\t',header=None)[0].tolist()
    corr=[]
    cover=[]
    TF_trimmed=[]
    TG_set=set()
    prevCor = 0
    currTol = 0
    spearmanLi = []
    for iTF,TF in tqdm(enumerate(TFli_rank)):
        #seedFile: only selected TF( TFli[:iTF] ) is marked, otherwise 0
        TF_trimmed.append(TFli_rank[iTF])
        seed2weight = seed.loc[TF_trimmed,:].T.to_dict('records')[0]
        wk = NP.Walker(DEGnetFile, absWeight=True)
        spearman, lst_node_weight = wk.run_exp(seed2weight, TFset, 0.1, normalize=False)
        spearmanLi.append(spearman)
        corTmp=0
        corTmp=spearman[0]
        corr.append(corTmp)
        #TG_set |= TG.Target_genes(TF,DEGnet,DEGli,TFset)
        edges, TGalltmp, TGtmp = TG.Target_genes(TF,DEGnet,DEGli,TFset,geneSet)
        if TG_set == (TG_set | TGtmp): 
            TF_trimmed.pop()
            continue
        TG_set |= TGtmp

        c=float(len(TG_set))/nodeCnt
        cover.append(c)

        if prevCor > corTmp : currTol +=1
        else : currTol = 0
        if coverage != None and (cover[-1] > coverage or len(TG_set)>coverNo):
            TF_trimmed.pop()
            break
    seed2weight = seed.loc[TF_trimmed,:].T.to_dict('records')[0]
    _, lst_node_weight = wk.run_exp(seed2weight, TFset, 0.1, normalize=False)
    return spearmanLi, cover, TF_trimmed, lst_node_weight

if __name__ == "__main__":    
    parser = argparse.ArgumentParser()
    parser.add_argument('--tf',help='TF list File')
    parser.add_argument('--nwk',help='Network file')
    parser.add_argument('--lvl',help='DE level of genes')
    parser.add_argument('--deg',help='deg list file')
    parser.add_argument('--out',help='project (output directory) name')
    parser.add_argument('--imround', help="influence maximization round", default=1000)
    parser.add_argument('-p',default='10',type=int,help='# process for multiprocessing')
    parser.add_argument('-c',default='0.5',type=float,help='coverage threshold', choices=range(0, 1))
    parser.add_argument('-coverNo',default='200',type=float)
    args=parser.parse_args()

    if not os.path.exists(os.path.dirname(args.out)): 
        os.makedirs(os.path.dirname(args.out))

    with open(args.tf) as f:
        tfli = f.read().strip().split()

    with open(args.deg) as f:
        degli = set(f.read().strip().split())

    #networkFile --> networkx object, expFile --> pd.DataFrame
    print('Number of transcription factors', len(set(tfli)))
    print('Number of differenitial expressed genes', len(set(degli)))

    network = nx.read_edgelist(args.nwk, data=(('weight',float),),create_using=nx.DiGraph())

    #IM, NP for each timepoint
    print(network)

    #step0: parsing exp data, network for each timepoint
    start=time.time() 

    weight_tp = pd.read_csv(args.lvl, sep = "\t")
    DEG_tp = weight_tp.where(weight_tp["gene"].isin(set(degli)), other = 0)

    DEGliFile = args.deg

    ##Make nwk with DEGs in specific timepoint
    network = network.subgraph(set(degli)|set(tfli))
    DEGnetFile = args.out + ".nwk" 
    nx.write_edgelist(network,DEGnetFile,data=['weight'],delimiter='\t')

    ##Assign new weight to timepoint-specific network
    weight_DEG_nwk = weight_tp[weight_tp["gene"].isin(network.nodes())].set_index('gene')
    weight_dict = weight_DEG_nwk.T.to_dict('records')[0]
    nx.set_node_attributes(network,weight_dict,'weight')

    #step1: Influence maximization
    start=time.time() 
    ##preprocess
    nodes=pd.Series(list(set([x for x in network.nodes()])))
    TFset=set(nodes[nodes.isin(tfli)].tolist())
    ##influence maximization
    TFrank, infNo = IM(network,TFset, int(args.imround))
    ##Output into file 
    TFrankFile = args.out + '.tf.rank'

    with open(TFrankFile,'w') as TFrankF:
        for TF in TFrank:
            TFrankF.write(TF+'\n')#IM result--> cold.D2.TF_rank.1

    print('TF ranking done', time.strftime("%H:%M:%S",time.gmtime(time.time()-start)), '\n')

    #step2: NP based on TF rank
    start=time.time() 
    ##TF adding NP
    seed = weight_tp.set_index('gene')
    spearmanLi,coverage,TF_trimmed,lst_node_weight = TF_adding_NP(degli, degli, tfli, TFrankFile, DEGnetFile, seed, args.coverNo, coverage=args.c) 

    ##Save Output as file 
    TFtrimF = TFrankFile + '.trim'

    with open(TFtrimF,'w') as f:
        f.write('\n'.join(TF_trimmed))

    print(DEGliFile, TFrankFile, TFtrimF)
    print("main process end")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import argparse

import networkx as nx
import numpy as np

from utils.target_gene import Target_genes

def makeGeneSet(DEGFile,geneSetF):
    ##-control
    DEGSet = set(np.genfromtxt(DEGFile, dtype=np.str))
    if geneSetF==None: 
        geneSet = DEGSet

    else: 
        geneSet = set(np.genfromtxt(geneSetF, dtype=np.str))

    return set(DEGSet)&geneSet

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--nwk',help='network file')
    parser.add_argument('--deg')
    parser.add_argument('--tf')
    parser.add_argument('--tfrank')
    parser.add_argument('--geneSetFile')
    parser.add_argument('--out')
    args=parser.parse_args()

    if not os.path.exists(os.path.dirname(args.out)): 
        os.mkdir(os.path.dirname(args.out))

    geneSet = makeGeneSet(args.deg, args.geneSetFile)
    network = nx.read_edgelist(args.nwk,data=(('weight',float),),create_using=nx.DiGraph())
    TFSet = set(np.genfromtxt(args.tf, dtype=np.str, delimiter='\t'))

    with open(args.deg) as f1, open(args.tf) as f2:
        DEGli=f1.read().strip().split()
        TFli=f2.read().strip().split()
        edgeSet = set()

    for idx,TF in enumerate(TFli):
        edges, TGenes, TGs = Target_genes(TF,network,DEGli,TFSet,geneSet)
        edgeSet |= edges

        with open(args.out,'w') as f4:
            for (a, b, c) in edgeSet: 
                f4.write(a+'\t'+b+'\t'+str(c)+'\n')
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import argparse
import sys
import os

def findDETG(resPath, cond) : # cond =='cold'
  bins = pd.read_csv('exp/exp.'+cond+'.DEG.binary', sep='\t')
  for f in os.listdir(resPath) :
    if f[-4:]!='trim': continue
    tp = f.split('.')[5]
    TFSet = set(open(resPath+'/'+f, 'r').read().strip().split()[2::2])
    DEGSet = set(bins['gene'][bins.iloc[:,44+int(tp)]!=0])
    print(tp, len(DEGSet&TFSet), DEGSet&TFSet)
  return   

def makeDict(keggF) :
  symDict = {}
  descDict = {}
  for line in open(keggF, 'r').readlines():
    tp = line.split('\t')
    kid = tp[0].split(':')[1]
    sd = tp[1].split(';')
    sym = '' if len(sd)==1 else sd[0].split(', ')[0]
    desc = sd[0] if len(sd)==1 else sd[1]
    symDict[kid]=sym.rstrip()
    descDict[kid]=desc.rstrip()
  return symDict, descDict

def main_TG():
  parser = argparse.ArgumentParser(usage='python %(prog)s resPath')
  parser.add_argument('resPath')
  parser.add_argument('--kegg', required= True)
  args=parser.parse_args()

  symDict, descDict = makeDict(args.kegg)
  outF = open(args.resPath+'/TGdesc.txt', 'w')
  for f in os.listdir(args.resPath) :
    if os.path.isdir(args.resPath+'/'+f) or f[-2:]!='TG': continue
    tp = f.split('.')[2].upper()
    TF = f.split('.')[4]
    for line in open(args.resPath+'/'+f, 'r').readlines():
      TG = line.rstrip()
      TFsym = '' if TF not in symDict else symDict[TF]
      TFdesc = '' if TF not in descDict else descDict[TF]
      if TG not in symDict : outF.write('{}\t{}\t{}\t{}\t{}\t\t\n'.format(tp, TF, TFsym, TFdesc, TG))
      else : outF.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(tp, TF, TFsym, TFdesc, TG, symDict[TG], descDict[TG]))

def main():
  parser = argparse.ArgumentParser(usage='python %(prog)s resPath')
  parser.add_argument('resPath')
  args=parser.parse_args()
  symDict, descDict = makeDict('data/kegg_ath_gene.txt')
  outF = open(args.resPath+'/timeseries_network.txt', 'w')
  for f in os.listdir(args.resPath+'/TG') :
    if os.path.isdir(args.resPath+'/TG/'+f) or f[-2:]=='TG' or f[-3:]=='txt': continue
    print(f)
    tp = 'T'+f.split('.')[1]
    for line in open(args.resPath+'/TG/'+f, 'r').readlines():
      temp = line.split('\t')
      source = temp[0]
      target = temp[1]
      TFsym = '' if source not in symDict else symDict[source]
      TFdesc = '' if source not in descDict else descDict[source]
      if target not in symDict : outF.write('{}\t{}\t{}\t{}\t{}\t\t\n'.format(tp, source, TFsym, TFdesc, target))
      else : outF.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(tp, source, TFsym, TFdesc, target, symDict[target], descDict[target]))

if __name__=='__main__':
        main()
ShowHide 13 more snippets with no or duplicated tags.

Login to post a comment if you would like to share your experience with this workflow.

Do you know this workflow well? If so, you can request seller status , and start supporting this workflow.

Free

Created: 1yr ago
Updated: 1yr ago
Maitainers: public
URL: https://github.com/Junhyeong02/propanet
Name: propanet
Version: 1
Badge:
workflow icon

Insert copied code into your website to add a link to this workflow.

Downloaded: 0
Copyright: Public Domain
License: None
  • Future updates

Related Workflows

cellranger-snakemake-gke
snakemake workflow to run cellranger on a given bucket using gke.
A Snakemake workflow for running cellranger on a given bucket using Google Kubernetes Engine. The usage of this workflow ...