Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Implemented gene set enrichment
- Loading branch information
Showing
7 changed files
with
670 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
test/* | ||
src/*.pyc |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
''' | ||
@author: jbayer | ||
Module with different functions for creating different figures. | ||
"heatmap" is specialised for plotting MTI-Set overlap ratios within a heatmap build | ||
with pyplot from matplotlib. | ||
INPUT: | ||
mti_d - {mir:[uniProt Accessions]} dictionary | ||
outPath - Path to save heatmap plot | ||
le - True if function compares MTI-sets that have just leading edge genes in it | ||
(-> changes in title) | ||
OUTPUT: | ||
Heatmap pdf file | ||
"bargraph" creates two graphs on one page with the number of miRNAs/MTIs per | ||
analysis step. | ||
INPUT: | ||
db_num_l - Step 1: number of identified items in MTI DBs | ||
occ_num_l - Step 2: number of items occurring as often as defined over MTI DBs | ||
uni_num_l - Step 3: number of items mapped to UniProt Accessions | ||
annot_num_l - Step 4: number of items overlapping with annotation file | ||
out_path - Path to save bar plot | ||
sel - True if MTIs were just selected (-> no step 4) | ||
OUTPUT: | ||
Bar plot pdf file | ||
"venn" is specialised to create a venn diagram for maximum four MTI DBs to show the | ||
numerical intersections of their MTIs or miRNAs. | ||
INPUT: | ||
outpath - Path to save venn diagram | ||
base_list - List of used MTI DBs as numerical abbreviation (1-4) | ||
baseDict_list - List with the MTI dictionaries of the MTI DBs | ||
baseName_list - List with the names of the used MTI DBs | ||
mti - True if diagram shows the MTIs (-> changes in title) | ||
OUTPUT: | ||
Venn diagram pdf file | ||
''' | ||
import tempfile, os, matplotlib, natural_sort as ns | ||
matplotlib.use('Agg',warn=False) | ||
import matplotlib.pyplot as plt, numpy as np | ||
|
||
def heatmap(mti_d, outPath, le = False): | ||
label_l = [] | ||
for mir in mti_d.keys(): | ||
if mti_d[mir]: | ||
label_l.append(mir) | ||
label_l.sort(key=ns.natural_keys) | ||
|
||
# calculate ratio of all sets # | ||
matrix = [] # matrix with ratios | ||
done_l = [] | ||
for mir1 in label_l: | ||
row_l = [] | ||
for mir2 in label_l: | ||
union = len(set(mti_d[mir1]+mti_d[mir2])) | ||
overlap = float(len(set(mti_d[mir1]).intersection(mti_d[mir2]))) | ||
try: | ||
row_l.append(overlap/union) | ||
except: | ||
row_l.append(0) | ||
matrix.append(row_l) | ||
done_l.append(mir1) | ||
|
||
# create heatmap # | ||
font_s = 280./len(label_l) | ||
if font_s > 12: | ||
font_s = 12 | ||
greyline = 24./len(label_l) | ||
plt.rc('axes',linewidth = 24./len(label_l)) | ||
|
||
cmapx = plt.cm.get_cmap("GnBu") | ||
hm = plt.pcolormesh(np.array(matrix), cmap=cmapx, edgecolors='lightgrey',linewidth=greyline) | ||
plt.xticks(np.arange(len(label_l))+0.5,label_l, rotation = 90, fontsize=font_s) # mir labels x-axis | ||
plt.yticks(np.arange(len(label_l))+0.5,label_l, fontsize=font_s) # mir labels y-axis | ||
plt.tick_params(axis='both',bottom='off',left='off',right='off',top='off') # turns off all axis ticks | ||
plt.xlim(xmax=len(label_l)) # x-axis length = number of mirs | ||
plt.ylim(ymax=len(label_l)) # y-axis length = number of mirs | ||
plt.colorbar(hm) | ||
#cb.ax.tick_params(labelsize=font_s) | ||
if le: | ||
plt.title("Leading Edge Geneset Overlap Ratio") | ||
else: | ||
plt.title("Geneset Overlap Ratio")#, fontsize=font_s) | ||
plt.savefig(outPath,format='pdf',bbox_inches='tight') | ||
plt.close() | ||
#plt.show() | ||
|
||
def bargraph(db_num_l, occ_num_l, uni_num_l, annot_num_l, out_path, sel): | ||
''' CREATES | ||
a Multiplot of two Bar-Graphs in one document.''' | ||
|
||
all_num_l = [db_num_l, occ_num_l, uni_num_l, annot_num_l] | ||
if sel: all_num_l = all_num_l[:-1] # solely selecting MTIs: uni = annot number | ||
mir_num_tuple = tuple(map(lambda num_l: num_l[0],all_num_l)) # number of mirnas per tool step in a tuple | ||
tar_num_tuple = tuple(map(lambda num_l: num_l[1],all_num_l)) # number of mir-tar-ias per tool step in a tuple | ||
|
||
xtick_name_l = ['DB search','DB occurrence','UniProt mapping', 'Annotation mapping'] | ||
if sel: xtick_name_l = xtick_name_l[:-1] | ||
|
||
# number of bars and their locations for plot 1 and 2 # | ||
numBar = 4 | ||
width = 0.8 # bar width | ||
if sel: | ||
numBar = 3 | ||
loc1 = np.arange(numBar) # (0 1 2 3 4 5) | ||
loc2 = np.arange(numBar) | ||
|
||
## Subplot miRNAs ## | ||
plt.figure(figsize=(6,9), dpi=80,facecolor='w') | ||
plt.subplot(211) | ||
p1 = plt.bar(loc1,mir_num_tuple,width)#,color=color_list1) | ||
# Properties for plot 1 # | ||
plt.title('Number of miRNAs\nover analysis steps') | ||
#plt.ylabel('Number of miRNAs') | ||
plt.xticks(loc1,xtick_name_l, rotation=50) | ||
# no ticks, no labels (height) on yaxis # | ||
plt.tick_params(axis='both',bottom='off',top='off',left='off',right='off',labelleft='off') | ||
# numbers above bars (set for each) # | ||
for bar in p1: | ||
height = bar.get_height() | ||
num_height = set_numHeight(height,mir_num_tuple[0]) | ||
plt.text(bar.get_x()+bar.get_width()/2.,num_height, '%d'%int(height), | ||
ha='center',va='bottom') | ||
# set height of xaxis a bit higher for numbers above # | ||
if not mir_num_tuple[0] == 0: | ||
plt.axis([-0.3, 4, 0, mir_num_tuple[0]/6.+mir_num_tuple[0]]) | ||
else: | ||
plt.axis([0, 4, -4, 4]) | ||
|
||
## Subplot miRNA-target interactions ## | ||
plt.subplot(212) | ||
p2 = plt.bar(loc2,tar_num_tuple,width)#,color=color_list) | ||
# Properties for plot 2 # | ||
plt.title('Number of miRNA-target interactions\nover analysis steps') | ||
#plt.ylabel('Number of miRNA-target interactions') | ||
plt.xticks(loc2,xtick_name_l, rotation=50) | ||
plt.tick_params(axis='both',bottom='off',top='off',left='off',right='off',labelleft='off') | ||
# numbers above bars (set for each) # | ||
for bar in p2: | ||
max_h = max(tar_num_tuple[0],tar_num_tuple[2]) | ||
height = bar.get_height() | ||
num_height = set_numHeight(height, max_h) | ||
plt.text(bar.get_x()+bar.get_width()/2.,num_height, '%d'%int(height), | ||
ha='center',va='bottom') | ||
# set height of xaxis a bit higher for numbers above # | ||
if not tar_num_tuple[0] == 0: | ||
plt.axis([-0.3, 4, 0, max_h/6.+max_h]) | ||
else: | ||
plt.axis([0, 4, -4, 4]) | ||
|
||
# adjust distance between subplots # | ||
plt.subplots_adjust(hspace=1) | ||
# save figure as pdf, remove white space # | ||
plt.savefig(out_path,format='pdf',bbox_inches='tight') | ||
plt.close() | ||
|
||
|
||
def venn(outpath, base_list, baseDict_list, baseName_list, mti): | ||
''' CREATES | ||
a venn diagram with the number of miRNAs/ MTIs ''' | ||
|
||
color_list = ["#4F81BD","#9BBB59","#8064A2","#C0504D"] | ||
colors = [] | ||
# R - string # | ||
r_str = '''suppressPackageStartupMessages(library(VennDiagram)) | ||
''' | ||
# create lists of miRFams and colors for each DB in R # | ||
for base in base_list: | ||
b_dict = getBaseDict(base, baseDict_list, baseName_list) | ||
if not mti: | ||
add = 'miRs.pdf' | ||
main = 'miRNAs' | ||
miRFam_li = list(b_dict.keys()) | ||
else: | ||
add = 'MTIs.pdf' | ||
main = 'miRNA-target interactions' | ||
miRFam_li = b_dict | ||
miRFam_str = str(miRFam_li).replace("[", "").replace("]","") | ||
|
||
if miRFam_str: | ||
r_str += ''+base+' <- c('+miRFam_str+')\n' | ||
colors.append(color_list[base_list.index(base)]) | ||
|
||
color_str = str(colors)[1:-1] | ||
bases = str(base_list)[1:-1] | ||
|
||
# set different circle and label sizes for diff. numbers of sets # | ||
if not len(base_list) == 0: | ||
if len(base_list) == 4 or len(base_list) == 3: | ||
catCex = '1.3' # label size | ||
labelPos = ')' # label position (here: default) | ||
else: | ||
catCex = '2' | ||
labelPos = ',cat.pos=0)' # position (here: top - 0 degrees ) | ||
|
||
# Send lists for each DB to R # | ||
r_str += '''base_data <- list('''+bases.replace("'", '')+''') # list of miRNAs per DB | ||
names(base_data) <- c('''+bases.replace("'", '"')+''') # DB names for miRNA lists | ||
pdf(file="'''+outpath+'Venn_'+add+'''",7,7)\n''' | ||
# Create Venn-Diagrams # | ||
r_str += '''grid.draw(venn.diagram(base_data, filename = NULL, margin=0.1, main="'''+main+'''", | ||
main.fontfamily="sans", main.cex='''+catCex+''',main.pos=c(0.5,1), | ||
scaled=FALSE,euler.d=FALSE, cat.fontfamily=rep("sans",'''+str(len(base_list))+'''), | ||
col=c('''+color_str+'''),cex=2,fontfamily="sans",cat.cex='''+catCex+labelPos+''') | ||
''' | ||
r_str += "dev.off()" | ||
# create temporary file and open it with R (command line) # | ||
with tempfile.NamedTemporaryFile(suffix='.R') as tmp: | ||
tmp.write(r_str) | ||
tmp.flush() | ||
os.system("R --vanilla < {tmp}".format(tmp=tmp.name)+" >/dev/null") | ||
tmp.close() | ||
|
||
|
||
def getBaseDict(base, baseDict_list, baseName_list): | ||
'''returns the dictionary of the given target base''' | ||
return baseDict_list[baseName_list.index(base)] | ||
|
||
def set_numHeight(height,maxi): | ||
''' returns the height to write the number ''' | ||
if height < maxi/6: | ||
num_height = 1.5*height | ||
elif height < maxi/4: | ||
num_height = 1.25*height | ||
elif height < maxi/2: | ||
num_height = 1.1*height | ||
else: num_height = 1.05*height | ||
return num_height | ||
|
||
|
Oops, something went wrong.