Skip to content
Permalink
aa53535785
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 232 lines (204 sloc) 8.96 KB
'''
@author: jbayer
Module with different functions for creating different figures.
"heatmap" is specialised for plotting MTI-Set overlap ratios within a heatmap build
with pyplot from matplotlib.
INPUT:
mti_d - {mir:[uniProt Accessions]} dictionary
outPath - Path to save heatmap plot
le - True if function compares MTI-sets that have just leading edge genes in it
(-> changes in title)
OUTPUT:
Heatmap pdf file
"bargraph" creates two graphs on one page with the number of miRNAs/MTIs per
analysis step.
INPUT:
db_num_l - Step 1: number of identified items in MTI DBs
occ_num_l - Step 2: number of items occurring as often as defined over MTI DBs
uni_num_l - Step 3: number of items mapped to UniProt Accessions
annot_num_l - Step 4: number of items overlapping with annotation file
out_path - Path to save bar plot
sel - True if MTIs were just selected (-> no step 4)
OUTPUT:
Bar plot pdf file
"venn" is specialised to create a venn diagram for maximum four MTI DBs to show the
numerical intersections of their MTIs or miRNAs.
INPUT:
outpath - Path to save venn diagram
base_list - List of used MTI DBs as numerical abbreviation (1-4)
baseDict_list - List with the MTI dictionaries of the MTI DBs
baseName_list - List with the names of the used MTI DBs
mti - True if diagram shows the MTIs (-> changes in title)
OUTPUT:
Venn diagram pdf file
'''
import tempfile, os, matplotlib, natural_sort as ns
matplotlib.use('Agg',warn=False)
import matplotlib.pyplot as plt, numpy as np
def heatmap(mti_d, outPath, le = False):
label_l = []
for mir in mti_d.keys():
if mti_d[mir]:
label_l.append(mir)
label_l.sort(key=ns.natural_keys)
# calculate ratio of all sets #
matrix = [] # matrix with ratios
done_l = []
for mir1 in label_l:
row_l = []
for mir2 in label_l:
union = len(set(mti_d[mir1]+mti_d[mir2]))
overlap = float(len(set(mti_d[mir1]).intersection(mti_d[mir2])))
try:
row_l.append(overlap/union)
except:
row_l.append(0)
matrix.append(row_l)
done_l.append(mir1)
# create heatmap #
font_s = 280./len(label_l)
if font_s > 12:
font_s = 12
greyline = 24./len(label_l)
plt.rc('axes',linewidth = 24./len(label_l))
cmapx = plt.cm.get_cmap("GnBu")
hm = plt.pcolormesh(np.array(matrix), cmap=cmapx, edgecolors='lightgrey',linewidth=greyline)
plt.xticks(np.arange(len(label_l))+0.5,label_l, rotation = 90, fontsize=font_s) # mir labels x-axis
plt.yticks(np.arange(len(label_l))+0.5,label_l, fontsize=font_s) # mir labels y-axis
plt.tick_params(axis='both',bottom='off',left='off',right='off',top='off') # turns off all axis ticks
plt.xlim(xmax=len(label_l)) # x-axis length = number of mirs
plt.ylim(ymax=len(label_l)) # y-axis length = number of mirs
plt.colorbar(hm)
#cb.ax.tick_params(labelsize=font_s)
if le:
plt.title("Leading Edge Geneset Overlap Ratio")
else:
plt.title("Geneset Overlap Ratio")#, fontsize=font_s)
plt.savefig(outPath,format='pdf',bbox_inches='tight')
plt.close()
#plt.show()
def bargraph(db_num_l, occ_num_l, uni_num_l, annot_num_l, out_path, sel):
''' CREATES
a Multiplot of two Bar-Graphs in one document.'''
all_num_l = [db_num_l, occ_num_l, uni_num_l, annot_num_l]
if sel: all_num_l = all_num_l[:-1] # solely selecting MTIs: uni = annot number
mir_num_tuple = tuple(map(lambda num_l: num_l[0],all_num_l)) # number of mirnas per tool step in a tuple
tar_num_tuple = tuple(map(lambda num_l: num_l[1],all_num_l)) # number of mir-tar-ias per tool step in a tuple
xtick_name_l = ['DB search','DB occurrence','UniProt mapping', 'Annotation mapping']
if sel: xtick_name_l = xtick_name_l[:-1]
# number of bars and their locations for plot 1 and 2 #
numBar = 4
width = 0.8 # bar width
if sel:
numBar = 3
loc1 = np.arange(numBar) # (0 1 2 3 4 5)
loc2 = np.arange(numBar)
## Subplot miRNAs ##
plt.figure(figsize=(6,9), dpi=80,facecolor='w')
plt.subplot(211)
p1 = plt.bar(loc1,mir_num_tuple,width)#,color=color_list1)
# Properties for plot 1 #
plt.title('Number of miRNAs\nover analysis steps')
#plt.ylabel('Number of miRNAs')
plt.xticks(loc1,xtick_name_l, rotation=50)
# no ticks, no labels (height) on yaxis #
plt.tick_params(axis='both',bottom='off',top='off',left='off',right='off',labelleft='off')
# numbers above bars (set for each) #
for bar in p1:
height = bar.get_height()
num_height = set_numHeight(height,mir_num_tuple[0])
plt.text(bar.get_x()+bar.get_width()/2.,num_height, '%d'%int(height),
ha='center',va='bottom')
# set height of xaxis a bit higher for numbers above #
if not mir_num_tuple[0] == 0:
plt.axis([-0.3, 4, 0, mir_num_tuple[0]/6.+mir_num_tuple[0]])
else:
plt.axis([0, 4, -4, 4])
## Subplot miRNA-target interactions ##
plt.subplot(212)
p2 = plt.bar(loc2,tar_num_tuple,width)#,color=color_list)
# Properties for plot 2 #
plt.title('Number of miRNA-target interactions\nover analysis steps')
#plt.ylabel('Number of miRNA-target interactions')
plt.xticks(loc2,xtick_name_l, rotation=50)
plt.tick_params(axis='both',bottom='off',top='off',left='off',right='off',labelleft='off')
# numbers above bars (set for each) #
for bar in p2:
max_h = max(tar_num_tuple[0],tar_num_tuple[2])
height = bar.get_height()
num_height = set_numHeight(height, max_h)
plt.text(bar.get_x()+bar.get_width()/2.,num_height, '%d'%int(height),
ha='center',va='bottom')
# set height of xaxis a bit higher for numbers above #
if not tar_num_tuple[0] == 0:
plt.axis([-0.3, 4, 0, max_h/6.+max_h])
else:
plt.axis([0, 4, -4, 4])
# adjust distance between subplots #
plt.subplots_adjust(hspace=1)
# save figure as pdf, remove white space #
plt.savefig(out_path,format='pdf',bbox_inches='tight')
plt.close()
def venn(outpath, base_list, baseDict_list, baseName_list, mti):
''' CREATES
a venn diagram with the number of miRNAs/ MTIs '''
color_list = ["#4F81BD","#9BBB59","#8064A2","#C0504D"]
colors = []
# R - string #
r_str = '''suppressPackageStartupMessages(library(VennDiagram))
'''
# create lists of miRFams and colors for each DB in R #
for base in base_list:
b_dict = getBaseDict(base, baseDict_list, baseName_list)
if not mti:
add = 'miRs.pdf'
main = 'miRNAs'
miRFam_li = list(b_dict.keys())
else:
add = 'MTIs.pdf'
main = 'miRNA-target interactions'
miRFam_li = b_dict
miRFam_str = str(miRFam_li).replace("[", "").replace("]","")
if miRFam_str:
r_str += ''+base+' <- c('+miRFam_str+')\n'
colors.append(color_list[base_list.index(base)])
color_str = str(colors)[1:-1]
bases = str(base_list)[1:-1]
# set different circle and label sizes for diff. numbers of sets #
if not len(base_list) == 0:
if len(base_list) == 4 or len(base_list) == 3:
catCex = '1.3' # label size
labelPos = ')' # label position (here: default)
else:
catCex = '2'
labelPos = ',cat.pos=0)' # position (here: top - 0 degrees )
# Send lists for each DB to R #
r_str += '''base_data <- list('''+bases.replace("'", '')+''') # list of miRNAs per DB
names(base_data) <- c('''+bases.replace("'", '"')+''') # DB names for miRNA lists
pdf(file="'''+outpath+'Venn_'+add+'''",7,7)\n'''
# Create Venn-Diagrams #
r_str += '''grid.draw(venn.diagram(base_data, filename = NULL, margin=0.1, main="'''+main+'''",
main.fontfamily="sans", main.cex='''+catCex+''',main.pos=c(0.5,1),
scaled=FALSE,euler.d=FALSE, cat.fontfamily=rep("sans",'''+str(len(base_list))+'''),
col=c('''+color_str+'''),cex=2,fontfamily="sans",cat.cex='''+catCex+labelPos+''')
'''
r_str += "dev.off()"
# create temporary file and open it with R (command line) #
with tempfile.NamedTemporaryFile(suffix='.R') as tmp:
tmp.write(r_str)
tmp.flush()
os.system("R --vanilla < {tmp}".format(tmp=tmp.name)+" >/dev/null")
tmp.close()
def getBaseDict(base, baseDict_list, baseName_list):
'''returns the dictionary of the given target base'''
return baseDict_list[baseName_list.index(base)]
def set_numHeight(height,maxi):
''' returns the height to write the number '''
if height < maxi/6:
num_height = 1.5*height
elif height < maxi/4:
num_height = 1.25*height
elif height < maxi/2:
num_height = 1.1*height
else: num_height = 1.05*height
return num_height