Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env python
import dnn.loaddata as Loader
import sys
import os
import time
import numpy as np
import dnn.evaluation as ev
import dnn.metaData as metadata
import sklearn.metrics as metric
from sklearn.linear_model import SGDClassifier as lr
nbatchsize=5000
def loadData(ibatch,threshold,ptrain=0.9):
#load the dataset
dhs=Loader.loadDhs(ibatch)
#randomize dataset
irnd=np.random.permutation(dhs.shape[0])
ntrain=int(ptrain*dhs.shape[0])
train_dhs=dhs[irnd[:ntrain],:,:]
test_dhs=dhs[irnd[ntrain:],:,:]
#
chip=Loader.loadChip(ibatch,threshold)
train_chip=chip[irnd[:ntrain],:,:]
test_chip=chip[irnd[ntrain:],:,:]
#
dna=Loader.loadDna(ibatch)
train_dna=dna[irnd[:ntrain],:,:]
test_dna=dna[irnd[ntrain:],:,:]
return train_dhs, test_dhs, train_chip, test_chip,\
train_dna,test_dna
def main(batchid, threshold, num_epochs=500):
ibatch=batchid
print("Load annotation ...")
tf2i=metadata.loadMetaDataMap(metadata.datadir + "annotations/tf_names.txt")
cell2i=metadata.loadMetaDataMap(metadata.datadir + "annotations/cells.txt")
print("Loading dataset ...")
train_dhs,test_dhs, train_chip,test_chip, train_dna,test_dna=loadData(ibatch, threshold)
ncell=test_chip.shape[1]
ntf=test_chip.shape[2]
totaltrainingsize=train_dhs.shape[0]
totaltestsize=test_dhs.shape[0]
auprc_all = {}
print("Logistic Regression on DHS auPRC: ")
for ktf in tf2i:
X=np.array([],dtype="float")
Xtest=np.array([],dtype="float")
ytest=np.array([],dtype="float")
y=np.array([],dtype="float")
for kc in cell2i:
if train_chip[0,cell2i[kc],tf2i[ktf]]>=0:
if X.shape[0]==0:
X=train_dhs[:,cell2i[kc],:].astype("float")
Xtest=test_dhs[:,cell2i[kc],:].astype("float")
ytest=test_chip[:,cell2i[kc],tf2i[ktf]].astype("float")
y=train_chip[:,cell2i[kc],tf2i[ktf]].astype("float")
else:
X=np.concatenate((X,train_dhs[:,cell2i[kc],:].astype("float")))
Xtest=np.concatenate((Xtest,test_dhs[:,cell2i[kc],:].astype("float")))
ytest=np.concatenate((ytest,test_chip[:,cell2i[kc],tf2i[ktf]].astype("float")))
y=np.concatenate((y,train_chip[:,cell2i[kc],tf2i[ktf]].astype("float")))
clf = lr(loss='log')
clf.fit(X, y)
pr=clf.decision_function(Xtest)
auprc = metric.average_precision_score(ytest, pr)
#auprc=metric.auc(prec,recall)
print(ktf+": "+ str(auprc))
auprc_all[ktf] = auprc
print("Mean AUPRC: " + str(np.mean(auprc_all.values())))
if __name__ == '__main__':
if ('--help' in sys.argv) or ('-h' in sys.argv) or (len(sys.argv)<=1):
print("Trains a neural network on DREAM dataset using Lasagne.")
print("Usage: %s batchid threshold [epochs]" % sys.argv[0])
print("")
print("model: Specify one of the following model names")
for k in dnn.models.build_model:
print("\t"+k)
print("")
print("batchid: Use one of the batches which are number")
print(" from 0 to 19")
print("threshold: You must either specify 'conservative' or 'relaxed'")
print("EPOCHS: number of training epochs to perform (default: 500)")
print("")
print("Example usage:")
print("")
print("train_model.py 0 relaxed 10")
else:
kwargs = {}
kwargs['batchid'] = int(sys.argv[1])
kwargs['threshold'] = "conservative"
if len(sys.argv) > 2:
kwargs['num_epochs'] = int(sys.argv[2])
main(**kwargs)