diff --git a/visualize.py b/visualize.py
new file mode 100644
index 0000000..59f9ad9
--- /dev/null
+++ b/visualize.py
@@ -0,0 +1,179 @@
+
+"""
+This visualises the information found by find_exons.py
+@author: Anastasiia Petrova
+@contact: anastasiia.petrova(at)mpi-bn.mpg.de
+"""
+
+import argparse
+import sys
+import os
+import re
+import time
+import multiprocessing
+import logging
+import subprocess
+from Bio import SeqIO
+import Bio.SeqIO.FastaIO as bio
+import numpy as np
+from collections import defaultdict
+from scipy import stats
+import pyBigWig
+from statsmodels.sandbox.stats.multicomp import multipletests #for bonfferoni
+import matplotlib.pyplot as plt
+import random
+import textwrap
+import seaborn as sns
+
+#from gtfparse import read_gtf #makes the logger to look different o_o
+
+logger = logging.getLogger('visualise')
+logger.setLevel(logging.INFO)
+
+formatter = logging.Formatter("%(asctime)s : %(message)s", "%Y-%m-%d %H:%M")
+
+def parse_args():
+	
+	parser = argparse.ArgumentParser(prog = '', description = textwrap.dedent('''                           
+
+		This script produces plots and a tsv-file to show the number of exons for the list of genes of interest.
+		'''), epilog='That is what you need to make this script work for you. Enjoy it')
+	
+	required_arguments = parser.add_argument_group('required arguments')
+	required_arguments.add_argument('--correlation', help='a .txt file with correlation between genes length and the length of exons', required=True)
+	
+	#all other arguments are optional
+	#parser.add_argument('--genes_file', help='names of genes of interest')
+
+	args = parser.parse_args()
+
+	return args
+
+def check_existing_input_files(args):
+
+	print('checking the input file')
+
+	if not os.path.isfile(args.correlation):
+		print('please make sure the .txt file with genes-exons correlation exists')
+		sys.exit()
+
+def make_box_plot(group1, group2, gene_names, scores, output_directory, figure_name, color_name):
+	#first make two groups
+	group_one = []
+	group_two = []
+
+	for name in group1:
+		group_one.append(scores[gene_names.index(name)])
+
+	for name in group2:
+		group_two.append(scores[gene_names.index(name)])
+
+	plt.figure()
+	plt.boxplot([group_one, group_two])
+
+	plt.show()
+
+
+
+def make_plot(x_array, y_array, gene_names, output_directory, figure_name, color_name):
+
+	the_max = max(max(x_array), max(y_array))
+		
+	plt.xlim(0, the_max)
+	plt.ylim(0, the_max)
+
+	fig, ax = plt.subplots()
+	plt.plot([0, the_max], [0, the_max], '--', color = "black", linewidth = 0.5)
+	plt.plot(x_array, y_array, 'o', color = color_name)
+
+	plt.xlabel("whole gene length")
+	plt.ylabel("sum length of exons")
+
+	for i,j,name in zip(x_array, y_array, gene_names):
+		ax.annotate('%s' %name, xy=(i,j), xytext=(1,0), textcoords='offset points', size='xx-small')
+
+	fig.savefig(os.path.join(output_directory, figure_name))
+	
+	#plt.show()
+
+def make_reg_plot(x_array, y_array, gene_names, output_directory, figure_name, color_name):
+
+	the_max = max(max(x_array), max(y_array))
+		
+	#plt.xlim(0, the_max)
+	#plt.ylim(0, the_max)
+
+	#fig, ax = plt.subplots()
+	#plt.plot([0, the_max], [0, the_max], '--', color = "black", linewidth = 0.5)
+	#plt.plot(x_array, y_array, 'o', color = color_name)
+	sns.set()
+	sns.regplot(np.array(x_array), np.array(y_array))
+	sns.plt.show()
+
+	#plt.xlabel("whole gene length")
+	#plt.ylabel("sum length of exons")
+
+	#for i,j,name in zip(x_array, y_array, gene_names):
+	#	ax.annotate('%s' %name, xy=(i,j), xytext=(1,0), textcoords='offset points', size='xx-small')
+
+	#fig.savefig(os.path.join(output_directory, figure_name))
+	
+	#plt.show()
+
+def read_correlation(correlation_file):
+	gene_names = []
+	scores = []
+	gene_lengths = []
+	exons_lengths = []
+	with open(correlation_file) as read_cor_file:
+		for cor_line in read_cor_file:
+			if not cor_line.startswith('#'):
+				cor_line_array = re.split(r'\t', cor_line.rstrip('\n'))
+				gene_names.append(cor_line_array[0])
+				scores.append(float(cor_line_array[1]))
+				gene_lengths.append(int(cor_line_array[2]))
+				exons_lengths.append(int(cor_line_array[3]))
+			
+		read_cor_file.close()
+
+	return gene_names, scores, gene_lengths, exons_lengths
+
+def main():
+
+	start = time.time()
+
+	args = parse_args()
+
+	check_existing_input_files(args)
+
+	#check if there is an existing directory that user gave as input, otherwise create this directory from the path provided from the user
+	#check_directory(args.output_directory)
+
+	fh = logging.FileHandler("visualize_log.txt")
+	fh.setLevel(logging.INFO)
+	fh.setFormatter(formatter)
+	logger.addHandler(fh)
+
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	ch.setFormatter(formatter)
+	logger.addHandler(ch)
+
+	gene_names, scores, gene_lengths, exons_lengths = read_correlation(args.correlation)
+
+	logger.info("making a plot")
+	#make_plot(gene_lengths, exons_lengths, gene_names, "./output", "test1.png", 'gold')
+
+	group1 = ["OTX1", "OTX2", "PITX1", "GSC2", "NFYA", "NFYB", "DUX4"] #important genes in early development
+	group2 = ["MYCL", "LTBP3", "ERGIC3", "GCM2", "LHX8", "HIF1A", "FLI1"] #not so significant for early dev
+
+	make_box_plot(group1, group2, gene_names, scores, "./output", "test2.png", 'gold')
+
+	logger.info("find_exons needed %s seconds to generate the output" % (round(time.time() - start, 2)))
+	
+	for handler in logger.handlers:
+		handler.close()
+		logger.removeFilter(handler)
+	
+if __name__ == "__main__":
+	main()
\ No newline at end of file