diff --git a/README.md b/README.md
index 88c6a01..91dbc8f 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,3 @@
-# IsoTV
\ No newline at end of file
+# IsoTV
+
+Detailed documentation avaliable at https://isotv.readthedocs.io/.
diff --git a/Snakefile b/Snakefile
new file mode 100644
index 0000000..9d7eeb1
--- /dev/null
+++ b/Snakefile
@@ -0,0 +1,707 @@
+# Siddharth Annaldasula
+# required packages: Bio (SeqIO), os, pandas, numpy,  subprocess, string, argparse, seaborn, matplotlib
+
+import os
+import pandas as pd
+import shutil
+from matplotlib.backends.backend_pdf import PdfPages
+
+##### Logistics
+configfile: "config_goeke.yaml"
+workdir: config["outdir"]
+
+ReferenceFasta = config["genome_fasta"]
+GenomeGFF  = config["genome_annot"]
+SAMPLES = config["samples"]
+
+
+### All
+rule all:
+  input:
+    config["output_plots"]
+
+# Package versions
+rule dump_versions:
+    output:
+        ver = "versions.txt"
+    conda:
+        "environment.yaml"
+    shell:
+        "conda list > {output.ver}"
+
+###### Raw ONT Reads Processing
+
+# Basecalling
+rule Basecalling:
+    input:
+        "RawData/{sample}/{id}.fast5"
+    output:
+        "RawData/QC/{sample}/{id}.txt",
+        "RawData/Fast5/{sample}/{id}/{id}.fast5",
+        fastq = "RawData/Basecalled/{sample}/{id}.fastq"
+    resources:
+        memory = 10,
+        time = 5,
+        tmpdir = 4
+    params:
+        guppy = config["guppy"],
+        flowcell = config["flowcell"],
+        kit = config["kit"],
+        work = config["outdir"]
+    priority: 10
+    group: "basecalling"
+    threads: 8
+	shell: """
+        mkdir -p {params.work}/RawData/Fast5/{wildcards.sample}/{wildcards.id}
+
+		ln -sf {params.work}/{input} {params.work}/RawData/Fast5/{wildcards.sample}/{wildcards.id}/{wildcards.id}.fast5
+
+		{params.guppy} -i {params.work}/RawData/Fast5/{wildcards.sample}/{wildcards.id}/ -s {params.work}/RawData/Basecalled/{wildcards.sample}/{wildcards.id}/ \
+		--flowcell {params.flowcell} --kit {params.kit} --num_callers 1 --cpu_threads_per_caller {threads} --fast5_out
+
+		mv -f {params.work}/RawData/Basecalled/{wildcards.sample}/{wildcards.id}/workspace/{wildcards.id}.fast5 {params.work}/RawData/Fast5/{wildcards.sample}/{wildcards.id}/{wildcards.id}.fast5
+
+		mv {params.work}/RawData/Basecalled/{wildcards.sample}/{wildcards.id}/sequencing_summary.txt {params.work}/RawData/QC/{wildcards.sample}/{wildcards.id}.txt
+
+		find {params.work}/RawData/Basecalled/{wildcards.sample}/{wildcards.id} -name '*.fastq' -exec mv {{}} {params.work}/RawData/Basecalled/{wildcards.sample}/{wildcards.id}.fastq \;"""
+
+rule aggregate_fast5:
+    input:
+        f5 = lambda wildcards: expand("RawData/Basecalled/{{sample}}/{id}.fastq", id = glob_wildcards("RawData/"+wildcards.sample+"/{id}.fast5")[0]),
+    output:
+        fq = "RawData/Fastq/{sample}"+".fastq"
+    resources:
+        memory = 50,
+        time = 4,
+        tmpdir = 4
+    priority: 9
+    run:
+        with open(output.fq,'w') as fout:
+            for fn in input.f5:
+                with open(fn,'r') as fin:
+                    shutil.copyfileobj(fin, fout)
+
+		#f5 = lambda wildcards: expand("RawData/Basecalled/{sample}/{id}.fastq"),
+
+# Build minimap2 index to reference genome
+rule Minimap2Index:
+    input:
+        genome = ReferenceFasta
+    output:
+        index = "Results/Minimap2/"+ReferenceFasta.split("/")[-1]+".mmi"
+    resources:
+        memory = 32,
+        time = 1
+    params:
+        opts = config["minimap2_index_opts"],
+    threads:
+        config["threads"]
+    shell:
+      "minimap2 -t {threads} {params.opts} -I 1000G -d {output.index} {input.genome}"
+
+# Filter basecalled raw ONT reads
+rule FilterReads:
+    input:
+        lambda wildcards: "RawData/Fastq/"+SAMPLES[wildcards.sample]+".fastq"
+    output:
+        "FilteredData/{sample}.fastq",
+    resources:
+        memory = 32,
+        time = 1
+    params:
+        config["min_mean_q"]
+    shell:
+        "filtlong --min_mean_q {params} {input} > {output}"
+
+# Pychopper
+rule Pychopper:
+    input:
+        "FilteredData/{sample}.fastq",
+    output:
+        pdf = "Results/Pychopper/{sample}.pychopper_report.pdf",
+        fastq = "Results/Pychopper/{sample}.pychop.fastq",
+        stats = "Results/Pychopper/{sample}.pychop.stats",
+        scores = "Results/Pychopper/{sample}.pychop.scores",
+        unclass = "Results/Pychopper/{sample}.unclassified.fastq"
+    resources:
+        memory=32,
+        time = 4
+    params:
+        opts = config["porechop_heu_stringency"]
+    run:
+        shell("cdna_classifier.py -b ReferenceData/cdna_barcodes.fas -r {output.pdf} -S {output.stats} -A {output.scores} -u {output.unclass} {input} {output.fastq}")
+
+# Build known splice junction bed file
+rule SpliceJunctionIndex:
+    input:
+        GenomeGFF
+    output:
+        junc_bed = "ReferenceData/junctions.bed"
+    shell:
+        "paftools.js gff2bed {input} > {output.junc_bed}"
+
+# Map reads using minimap2
+rule Minimap2Pinfish:
+    input:
+       index = rules.Minimap2Index.output.index,
+       fastq = expand("Results/Pychopper/{sample}.pychop.fastq", sample=SAMPLES) if (config["pychopper"]==True) else expand("FilteredData/{sample}.fastq", sample=SAMPLES),
+       use_junc = rules.SpliceJunctionIndex.output.junc_bed if config["minimap2_opts_junction"] else ""
+    output:
+       bam = "Results/Minimap2/merged.mapping.bam"
+    resources:
+        memory = 32,
+        time = 4
+    params:
+        opts = config["minimap2_opts"],
+        min_mq = config["minimum_mapping_quality"],
+    threads: config["threads"]
+    shell:"""
+    minimap2 -t {threads} -ax splice {params.opts} --junc-bed {input.use_junc} {input.index} {input.fastq}\
+    | samtools view -q {params.min_mq} -F 2304 -Sb | samtools sort -@ {threads} - -o {output.bam};
+    samtools index {output.bam}
+    """
+
+# Convert BAM to GFF
+rule PinfishRawBAM2GFF:
+    input:
+        bam = rules.Minimap2Pinfish.output.bam
+    output:
+        raw_gff = "Results/Pinfish/raw_transcripts.gff"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        opts = config["spliced_bam2gff_opts"]
+    threads: config["threads"]
+    shell:
+        "spliced_bam2gff {params.opts} -t {threads} -M {input.bam} > {output.raw_gff}"
+
+# Cluster transcripts in GFF
+rule PinfishClusterGFF:
+    input:
+        raw_gff = rules.PinfishRawBAM2GFF.output.raw_gff
+    output:
+        cls_gff = "Results/Pinfish/clustered_pol_transcripts.gff",
+        cls_tab = "Results/Pinfish/cluster_memberships.tsv",
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        c = config["minimum_cluster_size"],
+        d = config["exon_boundary_tolerance"],
+        e = config["terminal_exon_boundary_tolerance"],
+        min_iso_frac = config["minimum_isoform_percent"],
+    threads: config["threads"]
+    shell:
+        "cluster_gff -p {params.min_iso_frac} -t {threads} -c {params.c} -d {params.d} -e {params.e} -a {output.cls_tab} {input.raw_gff} > {output.cls_gff}"
+
+# Collapse clustered read artifacts
+rule PinfishCollapseRawPartials:
+    input:
+        cls_gff = rules.PinfishClusterGFF.output.cls_gff
+    output:
+        cls_gff_col = "Results/Pinfish/clustered_transcripts_collapsed.gff"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        d = config["collapse_internal_tol"],
+        e = config["collapse_three_tol"],
+        f = config["collapse_five_tol"],
+    shell:
+       "collapse_partials -d {params.d} -e {params.e} -f {params.f} {input.cls_gff} > {output.cls_gff_col}"
+
+# Polish read clusters
+rule PinfishPolishClusters:
+    input:
+        cls_gff = rules.PinfishClusterGFF.output.cls_gff,
+        cls_tab = rules.PinfishClusterGFF.output.cls_tab,
+        bam = rules.Minimap2Pinfish.output.bam
+    output:
+        pol_trs = "Results/Pinfish/polished_transcripts.fas"
+    conda:
+        "/project/owlmayerTemporary/Sid/nanopore-analysis/environment.yaml"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        c = config["minimum_cluster_size"]
+    threads: config["threads"]
+    shell:
+        "polish_clusters -t {threads} -a {input.cls_tab} -c {params.c} -o {output.pol_trs} {input.bam}"
+
+# Map polished transcripts to genome
+rule MinimapPolishedClusters:
+    input:
+       index = rules.Minimap2Index.output.index,
+       fasta = rules.PinfishPolishClusters.output.pol_trs,
+    output:
+       pol_bam = "Results/Minimap2/polished_reads_aln_sorted.bam"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        extra = config["minimap2_opts_polished"],
+    threads: config["threads"]
+    shell:"""
+    minimap2 -t {threads} {params.extra} -ax splice {input.index} {input.fasta}\
+    | samtools view -Sb -F 2304 | samtools sort -@ {threads} - -o {output.pol_bam};
+    samtools index {output.pol_bam}
+    """
+
+# Convert BAM of polished transcripts to GFF
+rule PinfishPolishedBAM2GFF:
+    input:
+        bam = rules.MinimapPolishedClusters.output.pol_bam
+    output:
+        pol_gff = "Results/Pinfish/polished_transcripts.gff"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        extra = config["spliced_bam2gff_opts_pol"]
+    threads: config["threads"]
+    shell:
+        "spliced_bam2gff {params.extra} -t {threads} -M {input.bam} > {output.pol_gff}"
+
+# Collapse polished read artifacts
+rule PinfishCollapsePolishedPartials:
+    input:
+        pol_gff = rules.PinfishPolishedBAM2GFF.output.pol_gff
+    output:
+        pol_gff_col = "Results/Pinfish/polished_transcripts_collapsed.gff"
+    resources:
+        memory = 8,
+        time = 1
+    params:
+        d = config["collapse_internal_tol"],
+        e = config["collapse_three_tol"],
+        f = config["collapse_five_tol"],
+    shell:
+        "collapse_partials -d {params.d} -e {params.e} -f {params.f} {input.pol_gff} > {output.pol_gff_col}"
+
+rule GffCompare:
+    input:
+        reference = GenomeGFF,
+        exptgff = rules.PinfishCollapsePolishedPartials.output.pol_gff_col
+    output:
+        nanopore_gtf = "Results/GffCompare/nanopore.combined.gtf"
+    resources:
+        memory = 16,
+        time = 1
+    shell:
+        "gffcompare -r {input.reference} -R -A -K -o " + "Results/GffCompare/nanopore {input.exptgff}"
+
+# Generate corrected transcriptome
+rule PrepareCorrectedTranscriptomeFasta:
+    input:
+        genome = ReferenceFasta,
+        gff = rules.PinfishCollapsePolishedPartials.output.pol_gff_col,
+    output:
+        fasta = "Results/Pinfish/corrected_transcriptome_polished_collapsed.fas",
+    resources:
+        memory = 8,
+        time = 1
+    shell:
+        "gffread -g {input.genome} -w {output.fasta} {input.gff}"
+
+rule PrepareCorrectedTranscriptomeFastaNonred:
+    input:
+        rules.PrepareCorrectedTranscriptomeFasta.output.fasta,
+        rules.GffCompare.output.nanopore_gtf
+    output:
+        fasta = "Results/Pinfish/corrected_transcriptome_polished_collapsed_nonred.fas",
+    resources:
+        memory = 8,
+        time = 1
+    script:
+        "scripts/nonredundant.py"
+
+rule reads_to_transcripts:
+    input:
+         rules.PrepareCorrectedTranscriptomeFastaNonred.output.fasta,
+         rules.GffCompare.output.nanopore_gtf
+    output:
+        "Results/Pinfish/corrected_transcriptome_polished_collapsed_nonredundant_tcons.fas"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "preprocess_sequences"
+    script:
+        "scripts/reads_to_transcripts.py"
+
+# Build minimap2 index for transcriptome
+rule Transcriptome2Index:
+    input:
+        genome = rules.PrepareCorrectedTranscriptomeFastaNonred.output.fasta,
+    output:
+        index = "Results/Minimap2/Transcriptome.mmi"
+    threads: config["threads"]
+    resources:
+        memory = 16,
+        time = 2
+    shell:
+      "minimap2 -t {threads} -I 1000G -d {output.index} {input.genome}"
+
+# Map reads using minimap2
+rule Minimap2Genome:
+    input:
+       index = rules.Minimap2Index.output.index,
+       fastq = "FilteredData/{sample}.fastq",
+       use_junc = rules.SpliceJunctionIndex.output.junc_bed if config["minimap2_opts_junction"] else ""
+    output:
+       bam = "IGV/{sample}.genome.bam"
+    resources:
+        memory = 32,
+        time = 4
+    params:
+        opts = config["minimap2_opts"]
+    threads: config["threads"]
+    shell:"""
+    minimap2 -t {threads} -ax splice {params.opts} --junc-bed {input.use_junc} {input.index} {input.fastq}\
+    | samtools view -F 260 -Sb | samtools sort -@ {threads} - -o {output.bam};
+    samtools index {output.bam}
+    """
+
+# Map to transcriptome
+rule Map2Transcriptome:
+    input:
+       index = rules.Transcriptome2Index.output.index,
+       fastq = "FilteredData/{sample}.fastq"
+    output:
+       bam = "Results/Quantification/{sample}.bam",
+       sbam = "Results/Quantification/{sample}.sorted.bam"
+    threads: config["threads"]
+    resources:
+        memory = 32,
+        time = 4
+    params:
+        msec = config["maximum_secondary"],
+        psec = config["secondary_score_ratio"],
+    priority: 10
+    shell:"""
+        minimap2 -ax map-ont -t {threads} -p {params.psec} -N {params.msec} {input.index} {input.fastq} | samtools view -Sb > {output.bam};
+        samtools sort -@ {threads} {output.bam} -o {output.sbam};
+        samtools index {output.sbam};
+        """
+
+rule ExtractPrimaryMapping:
+    input:
+        transcriptome = rules.Map2Transcriptome.output.sbam,
+        genome = rules.Minimap2Genome.output.bam
+    resources:
+        memory = 4,
+        time = 1
+    output:
+        "IGV/{sample}.transcriptome.bam"
+    shell:"""
+        samtools view -b -F 260 {input.transcriptome} > {output};
+        samtools index {output}
+        """
+
+# Quantify transcripts
+rule CountTranscripts:
+    input:
+        bam = rules.ExtractPrimaryMapping.output
+    resources:
+        memory = 4,
+        time = 1
+    output:
+        quant = "Results/Quantification/{sample}.counts"
+    shell:"""
+        echo -e "counts\ttranscript" > {output.quant};
+        samtools view {input.bam} | cut -f3 | uniq -c | grep -v "*" | sed -e 's/^[ \t]*//' | sed 's/ /\t/' >> {output.quant}
+        """
+
+# Concatenate transcript counts
+rule ConcatenateCounts:
+    input:
+        annotation = GenomeGFF,
+        transcriptome = rules.GffCompare.output.nanopore_gtf,
+        counts = expand("Results/Quantification/{sample}.counts", sample=SAMPLES),
+    resources:
+        memory = 4,
+        time = 1
+    output:
+        counts = "Results/Quantification/counts.txt"
+    script:
+        "scripts/concatenate.py"
+
+# Normalize isoform levels
+rule CalculateDeSeq2Norm:
+    input:
+        counts = rules.ConcatenateCounts.output
+    resources:
+        memory = 4,
+        time = 1
+    output:
+        norm_counts = "Results/Quantification/counts_deseq2norm.txt"
+    script:
+        "scripts/deseq2norm.R"
+
+###### Isoform Transcript Processing
+
+# Use correct file paths
+CountsData =  rules.CalculateDeSeq2Norm.output if config["preprocess"] else config["counts_data"]
+Transcripts =  rules.reads_to_transcripts.output if config["preprocess"] else config["polished_reads"]
+NanoporeGTF = rules.GffCompare.output.nanopore_gtf if config["preprocess"] else config["nanopore_gtf"]
+
+# Filter genes that are not found in the analysis, or if their analysis has already been done
+checkpoint gene_filter:
+    input:
+        NanoporeGTF if config["annotation"] else Transcripts
+    output:
+        directory("Results/Genes_temp")
+    params:
+        genes_final_dir = "Results/Genes"
+    script:
+        "scripts/filter_genes.py"
+
+# Extract transcripts for each gene
+rule gene_to_transcripts:
+    input:
+        NanoporeGTF,
+        Transcripts,
+        "Results/Genes_temp/{gene}"
+    output:
+        "Results/Genes_temp/{gene}/{gene}_seq.fa"
+    params:
+        gene = "{gene}"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "preprocess_sequences"
+    script:
+        "scripts/gene_to_transcripts.py"
+
+# Transcript isoform translation
+checkpoint translation_to_protein:
+    input:
+        seq = rules.gene_to_transcripts.output
+    output:
+        transcripts = directory("Results/Genes_temp/{gene}/transcripts"),
+        stats = "Results/Genes_temp/{gene}/{gene}_transcripts_stats.txt"
+    params:
+        gene = "{gene}"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 10
+    threads: 1
+    #group: "preprocess_sequences"
+    script:
+        "scripts/translation_to_protein.py"
+
+###### Translated Isoform Feature Analysis
+
+# Disorder region analysis
+rule iupred2a_analysis:
+    input:
+        protein = "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa",
+        iupred2a = config["iupred2a_path"]
+    output:
+        seq = "Results/Genes_temp/{gene}/transcripts/{transcript}_sequence.txt",
+        idr = "Results/Genes_temp/{gene}/transcripts/{transcript}_func_iupred2a.txt"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "sequence_analysis"
+    shell:
+        "echo {input.protein} >> {output.seq}; {input.iupred2a} -a {output.seq} long >> {output.idr}"
+
+# Domain analysis
+rule interpro_scan:
+    input:
+        translation = "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa"
+    output:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_func_domains.gff3"
+    params:
+        db = "Pfam",
+        java = config["java"],
+        pfam = config["interproScan_path"],
+    resources:
+        memory = 16,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "sequence_analysis"
+    shell:
+        "source {params.java} ; sh  {params.pfam} -i {input.translation} -o {output} -f GFF3 -appl {params.db} -dra"
+
+# Secondary structure analysis
+rule porter_analysis:
+    input:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa"
+    output:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa.ss3"
+    params:
+        brewery = config["brewery_path"],
+        lock1 = config["lock1"],
+        lock2 = config["lock2"],
+    resources:
+        memory = 72,
+        time = 1,
+        tmpdir = 16
+        #memory = 16,
+        #time = 1,
+        #tmpdir = 0
+    priority: 10
+    threads: 16
+    #group: "sequence_analysis"
+    shell:
+        "python3 {params.brewery} -i {input} --cpu 16 --noTA --noSA --noCD --fast"
+#        'set -ve; export ORIGDIR="$(/bin/pwd)"; flock {params.lock1} bash -ve -c "scp {input} $MXQ_JOB_TMPDIR/"; cd $MXQ_JOB_TMPDIR/; python3 {params.brewery} -i *.fa --cpu 16 --noTA --noSA --noCD --fast; flock {params.lock2} bash -ve -c "cp *.fa.ss3 $ORIGDIR/{output}"'
+#        "echo {params.brewery} > {output}"
+
+# PTM analysis
+rule functional_site_analysis:
+    input:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa"
+    output:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_func_sites.txt"
+    params:
+        ps_scan = config["prositeScan_path"],
+        prosite_dat = config["prositeDat_path"],
+        pf_scan = config["pfScan_path"],
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "sequence_analysis"
+    shell:
+        "perl {params.ps_scan} -d {params.prosite_dat} --pfscan {params.pf_scan} {input} > {output}"
+
+# Prion analysis
+rule prion_analysis:
+    input:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa"
+    output:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_func_prion.txt"
+    params:
+        plaac = config["plaac_path"],
+        gene = "{gene}"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    group: "sequence_analysis"
+    shell:
+        "java -jar {params.plaac} -i {input} -p all > {output}"
+
+# Combine functional feature analysis on individual isoforms
+def functional_files():
+    files = []
+    if (config["aa"]): files.append("Results/Genes_temp/{gene}/transcripts/{transcript}_protein.fa")
+    if (config["iupred2a"]): files.append(rules.iupred2a_analysis.output.idr)
+    if (config["pfam"]): files.append(rules.interpro_scan.output)
+    if (config["porter"]): files.append(rules.porter_analysis.output)
+    if (config["pfScan"]): files.append(rules.functional_site_analysis.output)
+    if (config["prion"]): files.append(rules.prion_analysis.output)
+    return files
+rule individual_transcript_analysis:
+    input:
+        functional_files()
+    output:
+        "Results/Genes_temp/{gene}/transcripts/{transcript}_analysis.txt"
+    params:
+        gene = "{gene}"
+    resources:
+        memory = 10,
+        time = 1,
+        tmpdir = 0
+    priority: 5
+    threads: 1
+    script:
+        "scripts/individual_transcript_analysis.py"
+
+def aggregate_transcript_analysis_func(wildcards):
+    checkpoint_output_gene = checkpoints.gene_filter.get(**wildcards).output[0]
+    checkpoint_output = checkpoints.translation_to_protein.get(**wildcards).output[0]
+    return expand("Results/Genes_temp/{gene}/transcripts/{transcript}_analysis.txt",
+        gene = wildcards.gene,
+        transcript=glob_wildcards(os.path.join(checkpoint_output,"{transcript}_protein.fa")).transcript)
+
+rule aggregate_transcript_analysis:
+    input:
+        aggregate_transcript_analysis_func
+    output:
+        "Results/Genes_temp/{gene}/{gene}_transcripts_filtered_analysis.txt"
+    params:
+        gene = "{gene}"
+    resources:
+        memory = 4,
+        time = 1,
+        tmpdir = 0
+    priority: 5
+    threads: 1
+    shell:
+        "cat {input} > {output}"
+
+rule protein_coding_potential_analysis:
+    input:
+        rules.aggregate_transcript_analysis.output,
+        rules.translation_to_protein.output.stats,
+        CountsData,
+        NanoporeGTF,
+    output:
+        "Results/Genes_temp/{gene}/{gene}_functional_analysis.pdf"
+    params:
+        gene = "{gene}"
+    resources:
+        memory = 16,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    script:
+        "scripts/visualization.py"
+
+rule copy_genes_temp:
+    input:
+        gene_plot = "Results/Genes_temp/{gene}/{gene}_functional_analysis.pdf"
+    output:
+        gene_folder = directory("Results/Genes/{gene}"),
+        plot = "Results/Plots/{gene}_functional_analysis.pdf"
+    resources:
+        memory = 16,
+        time = 1,
+        tmpdir = 0
+    priority: 8
+    threads: 1
+    shell:
+        "mkdir {output.gene_folder}; cp {input.gene_plot} {output.plot}; cp -r {input} {output.gene_folder};"
+
+def aggregate_protein_coding_potential_analysis(wildcards):
+    checkpoint_output = checkpoints.gene_filter.get(**wildcards).output[0]
+    return expand("Results/Plots/{gene}_functional_analysis.pdf",
+           gene=glob_wildcards(os.path.join(checkpoint_output,"{gene}.txt")).gene)
+
+rule output_combine_files:
+    input:
+        aggregate = aggregate_protein_coding_potential_analysis,
+        gene_temp = directory("Results/Genes_temp"),
+    output:
+        plot = "Results/Output/" + config["output_plots"]
+    resources:
+        memory = 4,
+        time = 1,
+        tmpdir = 0
+    priority: 5
+    threads: 1
+    shell:
+        "rm -rf {input.gene_temp}; pdfunite {input.aggregate} {output.plot};"
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..cf1f3bd
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,155 @@
+##### IsoTV Config File
+pipeline: "IsoTV"
+repo: "https://github.molgen.mpg.de/MayerGroup/IsoTV"
+
+### Ouput directory
+
+outdir: "/path/to/output"
+
+### General pipeline parameters:
+
+basecalling: FALSE
+preprocess: TRUE
+annotation: TRUE
+quantification: TRUE
+
+###### ONT long read processing config
+### Basecalling pipeline parameters
+
+guppy: "/path/to/Guppy324/bin/guppy_basecaller"
+flowcell: FLO-MIN106
+kit: SQK-DCS109
+
+### Reference Files
+genome_fasta: "/path/to/GRCh38.p12.primary_assembly.genome.fa"
+genome_annot: "/path/to/gencode.v32.primary_assembly.annotation.gtf"
+
+### Samples
+# samples with .bottom extension must be placed in the RawData folder
+# condition_replicate
+samples:
+    A549_1: "A549_r1_r3"
+    A549_2: "A549_r2_r1"
+    #A549_3: "A549_r3_r2"
+    A549_5: "A549_r5_r3"
+    HCT116_1: "HCT116_r1_r4"
+    HCT116_3: "HCT116_r3_r2"
+    HCT116_4: "HCT116_r4_r1"
+    HCT116_5: "HCT116_r5_r1"
+    HEPG2_1: "HEPG2_r1_r1"
+    HEPG2_4: "HEPG2_r4_r2"
+    HEPG2_5: "HEPG2_r5_r3"
+    K562_1: "K562_r1_r2"
+    K562_2: "K562_r2_r1"
+    K562_3: "K562_r3_r1"
+    K562_4: "K562_r4_r2"
+    MCF7_1: "MCF7_r1_r2"
+    MCF7_3: "MCF7_r3_r3"
+    MCF7_4: "MCF7_r4_r2"
+
+threads: 16
+
+# Use pychopper results
+pychopper: TRUE
+
+# Use annotation to improve splice junction mapping (minimap2 --junc_bed parameter)
+minimap2_opts_junction: TRUE
+
+# Minimum read quality to keep:
+min_mean_q: 5
+
+# Stringency of porechop heuristic:
+porechop_heu_stringency: 0.25
+
+# Options passed to minimap2 during indexing:
+minimap2_index_opts: "-k14"
+
+# Extra options passed to minimap2:
+minimap2_opts: "-uf"  # required for stranded data e.g. when pychopper filtered
+
+# Minmum mapping quality:
+minimum_mapping_quality: 5
+
+# Options passed to spliced_bam2gff:
+spliced_bam2gff_opts: "-s"  # required for stranded data e.g. when pychopper filtered
+
+# -c parameter:
+minimum_cluster_size: 3
+
+# -p parameter:
+minimum_isoform_percent: 1
+
+# -d parameter:
+exon_boundary_tolerance: 10
+
+# -e parameter:
+terminal_exon_boundary_tolerance: 50
+
+# Extra options passed to minimap2 when mapping polished reads:
+minimap2_opts_polished: "-uf"   # required for stranded data e.g. when pychopper filtered
+
+# Options passed to spliced_bam2gff when converting alignments of polished reads:
+spliced_bam2gff_opts_pol: "-s"  # required for stranded data e.g. when pychopper filtered
+
+# Options passed to collapse_partials when collapsing fragmentation artifacts
+# Internal exon boundary tolerance:
+collapse_internal_tol: 5
+
+# Five prime boundary tolerance:
+collapse_five_tol: 500
+
+# Three prime boundary tolerance:
+collapse_three_tol: 50
+
+maximum_secondary: 200
+secondary_score_ratio: 1
+
+##### Feature Analysis Config
+### Input genes
+
+gene_file: "/path/to/genes.tab"
+
+### Output file and folder
+
+output_plots: "loveisgone.pdf"
+
+### Processed file paths - required if not using ONT long read processing workflow
+
+nanopore_gtf: "/path/to/Results/GffCompare/nanopore.combined_filt.gtf"
+polished_reads: "/path/to/Results/Pinfish/corrected_transcriptome_polished_collapsed_tcons_nonredundant.fas"
+counts_data: "/path/to/Results/Quantification_nonred/all_counts_deseq2norm_all.txt"
+
+# Is data continuous
+continuous: FALSE
+
+### Tools paths and functional analysis
+
+aa: TRUE
+
+iupred2a_path: "/path/to/iupred2a/iupred2a.py"
+iupred2a: TRUE
+
+brewery_path: "/path/to/Brewery/Brewery.py"
+porter: TRUE
+
+interproScan_path: "/path/to/my_interproscan/interproscan-5.38-76.0/interproscan.sh"
+pfam: TRUE
+
+prositeScan_path: "/path/to/ps_scan/ps_scan.pl"
+pfScan_path: "/path/to/ps_scan/pfscan"
+prositeDat_path: "/path/to/prosite.dat"
+pfScan: TRUE
+
+#plaac_path: "/path/to/plaac.jar"
+#prion: FALSE
+
+minIsoTPM: 1
+maxIsoNum: 8
+minIsoPct: 10
+
+### Misc paths
+
+java : "/pkg/openjdk-11.0.3.2-0/profile"
+
+lock1: "/.../lock.txt"
+lock2: "/.../lock2.txt"
diff --git a/environment.yaml b/environment.yaml
new file mode 100644
index 0000000..149c2e7
--- /dev/null
+++ b/environment.yaml
@@ -0,0 +1,205 @@
+name: isotv_env
+channels:
+  - sagrudd
+  - bioconda
+  - defaults
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _r-mutex=1.0.0=anacondar_1
+  - binutils_impl_linux-64=2.33.1=he6710b0_7
+  - binutils_linux-64=2.33.1=h9595d00_15
+  - bioconductor-annotate=1.64.0=r36_0
+  - bioconductor-annotationdbi=1.48.0=r36_0
+  - bioconductor-biobase=2.46.0=r36h516909a_0
+  - bioconductor-biocgenerics=0.32.0=r36_0
+  - bioconductor-biocparallel=1.20.0=r36he1b5a44_0
+  - bioconductor-delayedarray=0.12.0=r36h516909a_0
+  - bioconductor-deseq2=1.26.0=r36he1b5a44_0
+  - bioconductor-genefilter=1.68.0=r36hc99cbb1_0
+  - bioconductor-geneplotter=1.64.0=r36_0
+  - bioconductor-genomeinfodb=1.22.0=r36_0
+  - bioconductor-genomeinfodbdata=1.2.2=r36_0
+  - bioconductor-genomicranges=1.38.0=r36h516909a_0
+  - bioconductor-iranges=2.20.0=r36h516909a_0
+  - bioconductor-s4vectors=0.24.0=r36h516909a_0
+  - bioconductor-summarizedexperiment=1.16.0=r36_0
+  - bioconductor-xvector=0.26.0=r36h516909a_0
+  - bioconductor-zlibbioc=1.32.0=r36h516909a_0
+  - blas=1.0=mkl
+  - bwidget=1.9.11=1
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2020.10.14=0
+  - cairo=1.14.12=h8948797_3
+  - certifi=2020.6.20=py36_0
+  - curl=7.69.1=hbc83047_0
+  - cycler=0.10.0=py36_0
+  - dbus=1.13.18=hb2f20db_0
+  - expat=2.2.10=he6710b0_2
+  - filtlong=0.2.0=he513fc3_3
+  - fontconfig=2.13.0=h9420a91_0
+  - freetype=2.10.4=h5ab3b9f_0
+  - fribidi=1.0.10=h7b6447c_0
+  - gcc_impl_linux-64=7.3.0=habb00fd_1
+  - gcc_linux-64=7.3.0=h553295d_15
+  - gffcompare=0.11.2=hc9558a2_1
+  - gfortran_impl_linux-64=7.3.0=hdf63c60_1
+  - gfortran_linux-64=7.3.0=h553295d_15
+  - glib=2.66.1=h92f7085_0
+  - graphite2=1.3.14=h23475e2_0
+  - gsl=2.4=h14c3975_4
+  - gst-plugins-base=1.14.0=hbbd80ab_1
+  - gstreamer=1.14.0=hb31296c_0
+  - gxx_impl_linux-64=7.3.0=hdf63c60_1
+  - gxx_linux-64=7.3.0=h553295d_15
+  - harfbuzz=2.4.0=hca77d97_1
+  - hmmer=3.3.1=he1b5a44_0
+  - icu=58.2=he6710b0_3
+  - intel-openmp=2020.2=254
+  - jpeg=9b=h024ee3a_2
+  - k8=0.2.5=he513fc3_0
+  - kiwisolver=1.2.0=py36hfd86e86_0
+  - krb5=1.17.1=h173b8e3_0
+  - lcms2=2.11=h396b838_0
+  - ld_impl_linux-64=2.33.1=h53a641e_7
+  - libcurl=7.69.1=h20c2e04_0
+  - libedit=3.1.20191231=h14c3975_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=9.1.0=hdf63c60_0
+  - libgfortran-ng=7.3.0=hdf63c60_0
+  - libpng=1.6.37=hbc83047_0
+  - libssh2=1.9.0=h1ba5d50_1
+  - libstdcxx-ng=9.1.0=hdf63c60_0
+  - libtiff=4.1.0=h2733197_1
+  - libuuid=1.0.3=h1bed415_2
+  - libxcb=1.14=h7b6447c_0
+  - libxml2=2.9.10=hb55368b_3
+  - lz4-c=1.9.2=heb0550a_3
+  - make=4.2.1=h1bed415_1
+  - matplotlib=3.3.2=0
+  - matplotlib-base=3.3.2=py36h817c723_0
+  - minimap2=2.17=hed695b0_3
+  - mkl=2020.2=256
+  - mkl-service=2.3.0=py36he904b0f_0
+  - mkl_fft=1.2.0=py36h23d657b_0
+  - mkl_random=1.1.1=py36h0573a6f_0
+  - ncurses=6.2=he6710b0_1
+  - numpy=1.19.2=py36h54aff64_0
+  - numpy-base=1.19.2=py36hfa32c7d_0
+  - olefile=0.46=py36_0
+  - openssl=1.1.1h=h7b6447c_0
+  - pandas=1.1.3=py36he6710b0_0
+  - pango=1.45.3=hd140c19_0
+  - parasail-python=1.2=py36h8b12597_0
+  - pcre=8.44=he6710b0_0
+  - pillow=8.0.0=py36h9a89aac_0
+  - pinfish=200316=2bc39d4_1
+  - pip=20.2.4=py36_0
+  - pixman=0.40.0=h7b6447c_0
+  - pychopper=2.5.0=py_0
+  - pyparsing=2.4.7=py_0
+  - pyqt=5.9.2=py36h05f1152_2
+  - python=3.6.12=hcff3b4d_2
+  - python-dateutil=2.8.1=py_0
+  - python-edlib=1.3.8.post1=py36hc9558a2_0
+  - pytz=2020.1=py_0
+  - qt=5.9.7=h5867ecd_1
+  - r-acepack=1.4.1=r36ha65eedd_0
+  - r-assertthat=0.2.1=r36h6115d3f_0
+  - r-backports=1.1.4=r36h96ca727_0
+  - r-base=3.6.1=haffb61f_2
+  - r-base64enc=0.1_3=r36h96ca727_4
+  - r-bh=1.69.0_1=r36h6115d3f_0
+  - r-bit=1.1_14=r36h96ca727_0
+  - r-bit64=0.9_7=r36h96ca727_0
+  - r-bitops=1.0_6=r36h96ca727_4
+  - r-blob=1.1.1=r36h6115d3f_0
+  - r-checkmate=1.9.1=r36h96ca727_0
+  - r-cli=1.1.0=r36h6115d3f_0
+  - r-cluster=2.0.8=r36ha65eedd_0
+  - r-colorspace=1.4_1=r36h96ca727_0
+  - r-crayon=1.3.4=r36h6115d3f_0
+  - r-data.table=1.12.2=r36h96ca727_0
+  - r-dbi=1.0.0=r36h6115d3f_0
+  - r-dichromat=2.0_0=r36h6115d3f_4
+  - r-digest=0.6.18=r36h96ca727_0
+  - r-evaluate=0.13=r36h6115d3f_0
+  - r-fansi=0.4.0=r36h96ca727_0
+  - r-foreign=0.8_71=r36h96ca727_0
+  - r-formatr=1.6=r36h6115d3f_0
+  - r-formula=1.2_3=r36h6115d3f_0
+  - r-futile.logger=1.4.3=r36h6115d3f_1003
+  - r-futile.options=1.0.1=r36h6115d3f_0
+  - r-ggplot2=3.1.1=r36h6115d3f_0
+  - r-glue=1.3.1=r36h96ca727_0
+  - r-gridextra=2.3=r36h6115d3f_0
+  - r-gtable=0.3.0=r36h6115d3f_0
+  - r-highr=0.8=r36h6115d3f_0
+  - r-hmisc=4.2_0=r36ha65eedd_0
+  - r-htmltable=1.13.1=r36h6115d3f_0
+  - r-htmltools=0.3.6=r36h29659fb_0
+  - r-htmlwidgets=1.3=r36h6115d3f_0
+  - r-jsonlite=1.6=r36h96ca727_0
+  - r-knitr=1.22=r36h6115d3f_0
+  - r-labeling=0.3=r36h6115d3f_4
+  - r-lambda.r=1.2.3=r36h6115d3f_0
+  - r-lattice=0.20_38=r36h96ca727_0
+  - r-latticeextra=0.6_28=r36h6115d3f_0
+  - r-lazyeval=0.2.2=r36h96ca727_0
+  - r-locfit=1.5_9.4=r36hcdcec82_1
+  - r-magrittr=1.5=r36h6115d3f_4
+  - r-markdown=0.9=r36h96ca727_0
+  - r-mass=7.3_51.3=r36h96ca727_0
+  - r-matrix=1.2_17=r36h96ca727_0
+  - r-matrixstats=0.54.0=r36h96ca727_0
+  - r-memoise=1.1.0=r36h6115d3f_0
+  - r-mgcv=1.8_28=r36h96ca727_0
+  - r-mime=0.6=r36h96ca727_0
+  - r-munsell=0.5.0=r36h6115d3f_0
+  - r-nlme=3.1_139=r36ha65eedd_0
+  - r-nnet=7.3_12=r36h96ca727_0
+  - r-pillar=1.3.1=r36h6115d3f_0
+  - r-pkgconfig=2.0.2=r36h6115d3f_0
+  - r-plogr=0.2.0=r36h6115d3f_0
+  - r-plyr=1.8.4=r36h29659fb_0
+  - r-prettyunits=1.0.2=r36h6115d3f_0
+  - r-r6=2.4.0=r36h6115d3f_0
+  - r-rcolorbrewer=1.1_2=r36h6115d3f_0
+  - r-rcpp=1.0.1=r36h29659fb_0
+  - r-rcpparmadillo=0.9.300.2.0=r36h29659fb_0
+  - r-rcurl=1.95_4.12=r36h96ca727_0
+  - r-reshape2=1.4.3=r36h29659fb_0
+  - r-rlang=0.3.4=r36h96ca727_0
+  - r-rpart=4.1_15=r36h96ca727_0
+  - r-rsqlite=2.1.1=r36h29659fb_0
+  - r-rstudioapi=0.10=r36h6115d3f_0
+  - r-scales=1.0.0=r36h29659fb_0
+  - r-snow=0.4_3=r36h6115d3f_0
+  - r-stringi=1.4.3=r36h29659fb_0
+  - r-stringr=1.4.0=r36h6115d3f_0
+  - r-survival=2.44_1.1=r36h96ca727_0
+  - r-tibble=2.1.1=r36h96ca727_0
+  - r-utf8=1.1.4=r36h96ca727_0
+  - r-viridis=0.5.1=r36h6115d3f_0
+  - r-viridislite=0.3.0=r36h6115d3f_0
+  - r-withr=2.1.2=r36h6115d3f_0
+  - r-xfun=0.6=r36h6115d3f_0
+  - r-xml=3.98_1.19=r36h96ca727_0
+  - r-xtable=1.8_4=r36h6115d3f_0
+  - r-yaml=2.2.0=r36h96ca727_0
+  - racon=1.4.13=he513fc3_0
+  - readline=8.0=h7b6447c_0
+  - scipy=1.5.2=py36h0b6359f_0
+  - seaborn=0.11.0=py_0
+  - setuptools=50.3.0=py36hb0f4dca_1
+  - sip=4.19.8=py36hf484d3e_0
+  - six=1.15.0=py_0
+  - sqlite=3.33.0=h62c20be_0
+  - tk=8.6.10=hbc83047_0
+  - tktable=2.10=h14c3975_0
+  - tornado=6.0.4=py36h7b6447c_1
+  - tqdm=4.7.2=py36_0
+  - wheel=0.35.1=py_0
+  - xz=5.2.5=h7b6447c_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.4.5=h9ceee32_0
diff --git a/scripts/concatenate.py b/scripts/concatenate.py
new file mode 100644
index 0000000..63d3aaa
--- /dev/null
+++ b/scripts/concatenate.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+# Concatentate and annotate isoforms and their expression levels 
+
+import pandas as pd
+
+# Concatenate datasets
+l=[]
+for fn in snakemake.input[2:]:
+    df=pd.read_csv(fn,delimiter='\t',usecols=[0,1],index_col=[1])
+    df=df.rename(columns={'counts':fn.split('.counts')[0].split('/')[-1]})
+    l.append(df)
+df=l[0]
+for i in range(1,len(l)):
+    df=df.join(l[i],how='outer')
+df=df.fillna(0)
+
+# Get connection to reference
+ds=pd.read_csv(snakemake.input[1],delimiter='\t',usecols=[2,8],header=None, names=['type','info'])
+ds=ds[ds["type"]=='transcript']
+ds['transcript']=ds.apply(lambda x: x['info'].split('oId "')[1].split('";')[0],1)
+ds['transcript_id']=ds.apply(lambda x: x['info'].split('transcript_id "')[1].split('";')[0],1)
+ds['class_code']=ds.apply(lambda x: x['info'].split('class_code "')[1].split('";')[0],1)
+ds['gene_name']=ds.apply(lambda x: '' if x['class_code']=='u' else x['info'].split('gene_name "')[1].split('";')[0],1)
+ds['ref_transcript']=ds.apply(lambda x: '' if x['class_code']=='u' else x['info'].split('cmp_ref "')[1].split('";')[0],1)
+ds=ds.set_index('transcript')
+df=df.join(ds,how='left')
+
+# Get gene names
+d=[]
+with open(snakemake.input[0],'r') as af:
+    for linia in af:
+        if linia[0]!='#':
+            ln=linia.strip().split()
+            if ln[2]=='gene':
+                gene_name=linia.split('gene_name "')[1].split('"')[0]
+                gene_id=linia.split('gene_id "')[1].split('"')[0]
+                gene_type=linia.split('gene_type "')[1].split('"')[0]
+                d.append([gene_name,gene_id,gene_type])
+d=pd.DataFrame(data=d,columns=['gene_name','gene_id','gene_type'])
+
+df=df.merge(d,how='left',on='gene_name')
+df=df.drop(columns=['info','type'])
+cols = list(df.columns)
+cols_new = ["class_code"] + cols[:cols.index("class_code")] + cols[cols.index("class_code")+ 1:]
+df=df.reindex(columns = cols_new)
+df.index = df["transcript_id"]
+df.sort_index().to_csv(snakemake.output[0],index=False)
diff --git a/scripts/deseq2norm.R b/scripts/deseq2norm.R
new file mode 100644
index 0000000..5a05568
--- /dev/null
+++ b/scripts/deseq2norm.R
@@ -0,0 +1,27 @@
+# Normalize isoform expression across all conditions
+
+library("DESeq2")
+
+samples <- snakemake@config["samples"]$samples
+data <- read.csv(file=snakemake@input[["counts"]], header=TRUE, sep=",")
+countData <- data[colnames(data)[2:(length(samples) + 1)]]
+countData <- as.matrix(countData)
+
+name <- colnames(data)[2:(length(samples) + 1)]
+condition <- sapply(strsplit(name,"_"), `[`, 1)
+colData <- cbind(name,condition)
+
+dds <- DESeqDataSetFromMatrix(countData,colData,design=~condition)
+dds <- estimateSizeFactors(dds)
+Pcounts <- counts(dds, normalized=TRUE)
+
+tpm <- function(x) {
+  x * 1000000/sum(x)
+}
+
+Pcounts <- apply(Pcounts,2,tpm)
+
+data[colnames(data)[2:(length(samples) + 1)]] <- Pcounts
+
+data <- data[order(data$transcript_id),]
+write.table(data,file=snakemake@output[["norm_counts"]],row.names = FALSE,quote=FALSE,sep=",")
diff --git a/scripts/filter_genes.py b/scripts/filter_genes.py
new file mode 100644
index 0000000..318f153
--- /dev/null
+++ b/scripts/filter_genes.py
@@ -0,0 +1,47 @@
+# Determine which genes to analyze in the input
+
+import pandas as pd
+import os
+from Bio import SeqIO
+
+
+# Determines if a gene already exists, and create a dummy file and folder if it does
+def gene_exists_file_func(gene):
+    gene_temp_dir = os.path.join(snakemake.output[0],gene)
+    gene_final_dir = os.path.join(snakemake.params["genes_final_dir"],gene)
+
+    gene_exists_file = os.path.join(snakemake.output[0],"%s.txt" %(gene))
+    if (not os.path.isfile(gene_exists_file)):
+        f = open(gene_exists_file, "w+")
+        f.close()
+
+    if ((not os.path.isdir(gene_final_dir)) and (not os.path.isdir(gene_temp_dir))):
+        os.makedirs(gene_temp_dir)
+
+
+gene_file = snakemake.config["gene_file"]
+genes = list(set(pd.read_csv(gene_file, sep = "\t", names = ["gene_symbol"])["gene_symbol"]))
+os.makedirs(snakemake.output[0])
+
+# If the annotation file is given
+if (snakemake.config["annotation"]):
+    annotate_df = pd.read_csv(snakemake.input[0], sep = "\t", header = None)
+    annotate_df = annotate_df[annotate_df[2]  != "exon"]
+    annotate_lines = list(annotate_df[8])
+
+    for ann in range(len(annotate_lines)):
+        if ("gene_name" in annotate_lines[ann]):
+            gene = annotate_lines[ann].split('gene_name "')[1].split('"')[0]
+            if (gene in genes):
+                gene_exists_file_func(gene)
+else: # If only the transcriptome file is given
+    transcripts_filename = snakemake.input[0]
+    transcripts = SeqIO.index(transcripts_filename, "fasta")
+    try:
+        for transcript in transcripts:
+            gene = transcipts[transcript].id.split("|")[1]
+            if (gene in genes):
+                gene_exists_file_func(gene)
+    except:
+        for gene in genes:
+            gene_exists_file_func(gene)
diff --git a/scripts/gene_to_transcripts.py b/scripts/gene_to_transcripts.py
new file mode 100644
index 0000000..ecd0f1c
--- /dev/null
+++ b/scripts/gene_to_transcripts.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+# Extract the transcripts that are linked to their respective gene
+
+import pandas as pd
+import numpy as np
+from Bio import SeqIO
+
+# Import transcript isoform sequences
+transcripts_filename = snakemake.input[1]
+transcripts = SeqIO.index(transcripts_filename, "fasta")
+
+output = []
+gene_name = snakemake.params[0]
+
+# If the annotation file is given
+if (snakemake.config["annotation"]):
+    # Import the nanopore annotation file
+    annotation_filename = snakemake.input[0]
+    annotate_df = pd.read_csv(annotation_filename,sep = "\t", header = None)
+    #annotation = pd.read_csv(sep = "\t", skiprows = 5, names = ["chr","type","info"],usecols = [0,2,8])
+    #annotate_df = pd.read_csv(annotation_filename,sep = "\t", skiprows = 5, names = ["chr","type","info"],usecols = [0,2,8], header = None)
+
+    annotate_df = annotate_df[annotate_df[2]  != "exon"]
+    annotate_lines = list(annotate_df[8])
+
+    # Mapping gene name to transcriptID
+    gene_tID = dict()
+
+    for ann in range(len(annotate_lines)):
+        if ("gene_name" in annotate_lines[ann]):
+            line = annotate_lines[ann].split(";")
+            #tID = line[0].split(" ")[-1][1:-1]
+            tID = annotate_lines[ann].split('transcript_id "')[1].split('"')[0]
+            #gene = line[2].split(" ")[-1][1:-1]
+            gene = annotate_lines[ann].split('gene_name "')[1].split('"')[0]
+            if (gene not in gene_tID): gene_tID[gene] = [tID]
+            else: gene_tID[gene].append(tID)
+
+    # Extracting isoforms from related genes
+    try:
+        gene_tID[gene_name]
+    except KeyError as error:
+        print("The gene %s was not found in the annotation file." %error)
+        raise
+
+    for tID in gene_tID[gene_name]:
+        output.append(">" + tID)
+        output.append(str(transcripts[tID].seq))
+# If only the transcriptome file is given
+else:
+    for transcript in transcripts:
+        try:
+            if (gene_name == transcipts[transcript].id.split("|")[1]):
+                output.append(">" + transcipts[transcript].id.split("|")[0])
+                output.append(str(transcipts[transcript].seq))
+        except:
+            output.append(">" + transcipts[transcript].id.split("|")[0])
+            output.append(str(transcipts[transcript].seq))
+
+    if (output == []):
+        print("The gene %s was not found in the transcriptome file.")
+        raise
+
+output_filename = snakemake.output[0]
+output_file = open(output_filename,"w+")
+output_file.write("\n".join(output))
+output_file.close()
diff --git a/scripts/individual_transcript_analysis.py b/scripts/individual_transcript_analysis.py
new file mode 100644
index 0000000..0c5a1ac
--- /dev/null
+++ b/scripts/individual_transcript_analysis.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+
+# Individual transcript analysis
+
+count = 0
+output = [">" + snakemake.wildcards["transcript"]]
+
+for filename in snakemake.input:
+    file = open(filename,"r")
+    lines = file.readlines()
+    file.close()
+
+    if("ss" in filename):
+        output.append("#####SS Analysis")
+        for position in lines:
+            output.append(position.strip())
+
+    elif ("iupred2a" in filename):
+        output.append("#####IUPred2A Analysis")
+        lines = lines[7:]
+        for position in lines:
+            output.append(position.strip())
+
+    elif("domains" in filename):
+        output.append("#####Domain Analysis")
+        lines = lines[5:]
+        for region in lines:
+            if (not region.startswith("#")):
+                region_split = region.strip().split("\t")
+                if (region_split[1] == "Pfam"):
+                    region_short = "\t".join([region_split[3],region_split[4],region_split[8].split(";")[3].split("=")[-1]])
+                    output.append(region_short.strip())
+            else:
+                break
+
+    elif("prion" in filename):
+        output.append("#####Prion Analysis")
+        for prionsite in lines:
+            if (not prionsite.startswith("#")):
+                prionsite_split = prionsite.strip().split("\t")
+                prionsite_short = "\t".join([prionsite_split[2],prionsite_split[15]])
+                output.append(prionsite_short)
+
+    elif("sites" in filename):
+        output.append("#####Prosite Analysis")
+        modification = ""
+        site_only = False
+        for site in lines:
+            if (site.startswith(">")):
+                modification = site.strip().split(" ")[3]
+                site_only = "site" in site
+            else:
+                site_split = site.strip().replace("  "," ").split(" ")
+                if ((len(site_split) > 2) and site_only):
+                    site_short = "\t".join([site_split[0],site_split[2],modification])
+                    output.append(site_short)
+
+    elif ("protein" in filename):
+        output.append("#####AA Sequence")
+        lines = lines[1:]
+        output.append(lines[0].strip())
+
+output.append("")
+
+output_file = open(snakemake.output[0],"w+")
+output_file.write("\n".join(output))
+output_file.close()
diff --git a/scripts/nonredundant.py b/scripts/nonredundant.py
new file mode 100644
index 0000000..fbe2434
--- /dev/null
+++ b/scripts/nonredundant.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+
+# Select only non-redundant transcripts
+
+import pandas as pd
+from Bio import SeqIO
+
+transcripts_filename = snakemake.input[0]
+transcripts = SeqIO.index(transcripts_filename, "fasta")
+
+ds=pd.read_csv(snakemake.input[1],delimiter='\t',usecols=[2,8],header=None, names=['type','info'])
+ds=ds[ds["type"]=='transcript']
+ds['transcript_id']=ds.apply(lambda x: x['info'].split('oId "')[1].split('";')[0],1)
+non_redundant_tid = set(ds["transcript_id"])
+output = []
+for transcript in transcripts:
+    if (transcript in non_redundant_tid):
+        output.append(">" + transcript.strip())
+        output.append(str(transcripts[transcript].seq).strip())
+output_filename = snakemake.output[0]
+output_file = open(output_filename, "w+")
+output_file.writelines("\n".join(output))
+output_file.close()
diff --git a/scripts/reads_to_transcripts.py b/scripts/reads_to_transcripts.py
new file mode 100644
index 0000000..2415780
--- /dev/null
+++ b/scripts/reads_to_transcripts.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+# Converts transcriptome file from reads (oID names) to transcripts name (TCONS)
+
+from Bio import SeqIO
+import pandas as pd
+
+transcripts_filename = snakemake.input[0]
+transcripts = SeqIO.index(transcripts_filename, "fasta")
+
+annotation_filename = snakemake.input[1]
+
+ds=pd.read_csv(annotation_filename,delimiter='\t',usecols=[2,8],header=None, names=['type','info'])
+ds=ds[ds["type"]=='transcript']
+ds['transcript']=ds.apply(lambda x: x['info'].split('oId "')[1].split('";')[0],1)
+ds['transcript_id']=ds.apply(lambda x: x['info'].split('transcript_id "')[1].split('";')[0],1)
+test = dict()
+index = list(ds['transcript'])
+refs = list(ds["transcript_id"])
+for i in range(len(index)):
+    test[index[i]] = refs[i]
+
+
+output = []
+for transcript in transcripts:
+    try:
+        output.append(">" + str(test[transcripts[transcript].id]))
+        output.append(str(transcripts[transcript].seq))
+    except:
+        pass
+
+output_filename = snakemake.output[0]
+output_file = open(output_filename,"w+")
+output_file.write("\n".join(output))
+output_file.close()
diff --git a/scripts/translation_to_protein.py b/scripts/translation_to_protein.py
new file mode 100644
index 0000000..8d47e42
--- /dev/null
+++ b/scripts/translation_to_protein.py
@@ -0,0 +1,134 @@
+# Translation of transcript sequences to amino acid sequences
+
+from Bio import SeqIO
+import os
+import pandas as pd
+
+codon_table = {
+        'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
+        'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
+        'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
+        'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
+        'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
+        'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
+        'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
+        'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
+        'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
+        'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
+        'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
+        'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
+        'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
+        'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
+        'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
+        'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W',
+        }
+
+# Kozak scoring
+def score(seq,start):
+    kozak = {
+        "A":[0.25,0.61,0.27,0.15,1.00,0.00,0.00,0.23],
+        "C":[0.53,0.02,0.49,0.55,0.00,0.00,0.00,0.16],
+        "G":[0.15,0.36,0.13,0.21,0.00,0.00,1.00,0.46],
+        "T":[0.07,0.01,0.11,0.09,0.00,1.00,0.00,0.15]
+    }
+
+    score = 1.0
+    for i in range(start,len(seq)):
+        score *= kozak[seq[i]][i]
+    return score
+
+# Translation
+def translate(seq, i):
+    translating = True
+    aa = ""
+
+    while(translating):
+        if (len(seq) < 3):
+            translating = False
+            aa += "*"
+        else:
+            codon = seq[0:3]
+            if (codon_table[codon] == "_"):
+                translating = False
+            else:
+                aa += codon_table[codon]
+            seq = seq[3:]
+            i += 3
+    return aa,i
+
+# Translation scoring function
+def translate_aa_seq_length(seq):
+    i = 0
+    uorf_seq = "M"
+    uorf_sc = 0
+    maybe = False
+    uorf_st = 0
+    uorf_end = 0
+    translating = True
+    uorfs_seq_frames = {0:"M",1:"M",2:"M"}
+
+    while (translating):
+        if (i > len(seq)):
+            break
+        elif (seq[i:i+3] == "ATG"):
+            aa,end = translate(seq[i:], i)
+            sc = score(seq[i-4:i+4],0)
+
+            if (aa in uorfs_seq_frames[i % 3]):
+                pass
+            elif (len(aa) < 15 * 3):
+                pass
+            elif (len(aa) > 15 * 10):
+                if (sc > uorf_sc):
+                    return aa,i,end
+                if (len(aa) > 15 * 20):
+                    return aa,i,end
+                if (maybe):
+                    return uorf_seq,uorf_st,uorf_end
+                else:
+                    return aa,i,end
+            elif (sc > uorf_sc):
+                uorf_seq = aa
+                uorf_sc = sc
+                uorf_st = i
+                uorf_end = end
+                maybe = True
+
+            if (aa not in uorfs_seq_frames[i % 3]):
+                uorfs_seq_frames[i % 3] = aa
+
+        i += 1
+    return uorf_seq,uorf_st,uorf_end
+
+def find_all_aa_seqs(seq):
+    longest_aa_seq,start,end = translate_aa_seq_length(seq)
+    return longest_aa_seq,start,end
+
+
+transcripts_filename = snakemake.input[0]
+transcripts = SeqIO.index(transcripts_filename, "fasta")
+output = []
+
+gene = snakemake.params[0]
+if(not os.path.isdir(snakemake.output[0])):
+    os.mkdir(snakemake.output[0])
+
+# Translate each transcript seperatly and place in their own fasta file
+transcript_stats = []
+for transcript in transcripts:
+    seq = str(transcripts[transcript].seq).strip()
+    enst = str(transcripts[transcript].id).split("|")[-1].strip()
+    protein,start,end = find_all_aa_seqs(seq)
+
+    transcript_name =  str(transcripts[transcript].id)
+    transcript_filename = transcript_name.split("|")[0]
+    transcript_filename = transcript_filename.replace("|","_")
+    transcript_filename = transcript_filename.replace("_","")
+    transcript_filename_path = str(snakemake.output[0] + "/%s_protein.fa" %(transcript_filename))
+    transcript_file = open(transcript_filename_path, "w+")
+    transcript_file.write(">" + transcript_name + "\n" + protein + "\n")
+    transcript_file.close()
+
+    transcript_stats.append([transcript_name.split("|")[0],start,end])
+transcript_stats_df = pd.DataFrame(transcript_stats, columns = ["transcript_id","start","end"])
+transcript_stats_df.to_csv(snakemake.output[1], index = False)
diff --git a/scripts/visualization.py b/scripts/visualization.py
new file mode 100644
index 0000000..ede3ea6
--- /dev/null
+++ b/scripts/visualization.py
@@ -0,0 +1,572 @@
+import copy
+import string
+import argparse
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+from matplotlib.patches import Ellipse
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib.transforms import Bbox
+from matplotlib.collections import PatchCollection
+import matplotlib.gridspec as gridspec
+from visualization_helpers import *
+
+
+gene = snakemake.params[0]
+alph = string.ascii_uppercase
+alph += "0123456789!@#$%^&*?;'{}-=+,.~_[]:<>`qwertyuiopasdfghjklzxcvbnm1234567890qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnm"
+
+#grey, orange, turquoise blue, blue green, yellow, royal blue, vermillion, red purple,
+#pastel yellow, brown orange, burgundy, cerulean blue, dark purple, light purple, dark blue green
+#pastel green, harvest brown, brown, sky blue, pink, dark pink
+colors=["#999999", "#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7","#F0D648",
+        '#db6d00','#920000', '#6db6ff', '#b66dff', '#490092', '#ffff6d', '#b66dff', '#004949',
+        '#24C324', '#924900', '#9a6324', 'ff6db6', 'CC79A7']
+colors_ptm = ["#DB6D00","#920000","#B66DFF","#0072B2","#24C324"]
+colors_ss3 = ['#FFC20A','#0C7BDC',"white"]
+ss3_abbvs = ["H","E","C"]
+colors_aa = ['#99BFFF', '#F56345', '#E0C830', '#8700F5', 'white']
+aa_abbvs = {"A":"H","C":"S","D":"N","E":"N","F":"H","G":"S","H":"P","I":"H","K":"P","L":"H","M":"H","N":"L","P":"H","Q":"L","R":"P","S":"L","T":"L","V":"H","W":"H","Y":"H"}
+aa_groups = ["P","N","L","S","H"]
+
+# VISUALIZATION FUNCTIONS
+
+# change font
+matplotlib.rcParams['font.sans-serif'] = "Arial"
+matplotlib.rcParams['font.family'] = "sans-serif"
+plt.rc('font', size=14)
+
+def plotAnnotation(annotation, transcripts_plot, colors, longest_length_protein, fig, gs, start_row_panel):
+    curr_row_panel = start_row_panel
+
+    exon_dict = dict()
+    for tcons_curr in transcripts_plot:
+        transcript_annotation = annotation[annotation['transcript_id'] == tcons_curr]
+
+        transcript_exon_start = 0
+        transcript_exon_end = -1
+        if (transcript_annotation.shape[0] > 0):
+            if (transcript_annotation.iloc[0]["strand"] == "-"):
+                transcript_annotation = transcript_annotation.iloc[::-1]
+                plot_start = copy.deepcopy(transcript_annotation["plot_start"])
+                transcript_annotation["plot_start"] = transcript_annotation["plot_stop"]
+                transcript_annotation["plot_stop"] = plot_start
+            for idx,row in transcript_annotation.iterrows():
+                if (row['type'] != 'transcript'):
+                    exon_old_id = row["exon_number"]
+                    start,stop = row['plot_start'],row['plot_stop']
+                    transcript_exon_start = transcript_exon_end + 1
+                    transcript_exon_end += abs(start - stop) + 1
+                    exon = Exon(tcons_curr, exon_old_id, start, stop)
+
+                    if (start in exon_dict):
+                        exon_dict[start].append(exon)
+                    else:
+                        exon_dict[start] = [exon]
+
+                    if (stop in exon_dict):
+                        exon_dict[stop].append(exon)
+                    else:
+                        exon_dict[stop] = [exon]
+
+    all_exons = []
+    curr_exons = []
+    count = 0
+    master = dict()
+    for key,value in sorted(exon_dict.items()):
+        end_exons = []
+        for exon in value:
+            curr_exons.append(exon) if (exon not in curr_exons) else end_exons.append(exon)
+        end_exons.sort()
+        prev_start,prev_end = -1,-1
+        for exon in end_exons:
+            if (prev_start != exon.start or prev_end != exon.stop):
+                count += 1
+                exon.add_new_id(count)
+                all_exons.append(exon)
+            else:
+                exon.add_new_id(count)
+            prev_start,prev_end = exon.start,exon.stop
+            if (exon.tcons not in master):
+                master[exon.tcons] = dict()
+            master[exon.tcons][exon.old_id] = exon.new_id
+
+    count = -1
+    previous_start,previous_stop = -1,-1
+    exon_group = []
+    exon_distances = []
+    for exon in all_exons:
+        if (exon.start > previous_stop):
+            exon_group.append([exon])
+            exon_distances.append([exon.start,exon.stop])
+            previous_start,previous_stop = exon.start,exon.stop
+            count += 1
+        else:
+            exon_group[count].append(exon)
+            if (exon.stop > previous_stop):
+                previous_stop = exon.stop
+                exon_distances[count][1] = exon.stop
+            if (exon.start < previous_start):
+                previous_start = exon.start
+                exon_distances[count][0] = exon.start
+
+    exon_distances_original = copy.deepcopy(exon_distances)
+    previous_stop = -1
+    for exon_dist in exon_distances:
+        start,stop = exon_dist[0],exon_dist[1]
+        if ((start - previous_stop) > 200):
+            exon_dist[0] = previous_stop + 200
+            exon_dist[1] = exon_dist[0] + stop - start
+        previous_stop = exon_dist[1]
+
+    for exon_similar_group in range(len(exon_group)):
+        for exon in exon_group[exon_similar_group]:
+            start,stop = exon.start,exon.stop
+            if (exon.start != exon_distances[exon_similar_group][0]):
+                exon.start = exon_distances[exon_similar_group][0] + (exon.start - exon_distances_original[exon_similar_group][0])
+                exon.stop = exon.start + (stop - start)
+
+    for tcons_curr in transcripts_plot:
+        transcript_annotation = annotation[annotation['transcript_id'] == tcons_curr]
+        transcript_annotation["exon_number"] = transcript_annotation.apply(lambda x: master[tcons_curr][str(x["exon_number"])] if (x["type"] != "transcript") else 0, axis = 1)
+        transcript_exons = transcript_annotation.iloc[1:]
+        start = all_exons[transcript_exons.loc[transcript_exons['exon_number'].idxmin()]["exon_number"] - 1].start
+        stop = all_exons[transcript_exons.loc[transcript_exons['exon_number'].idxmax()]["exon_number"] - 1].stop
+        transcript_annotation["plot_start"] = transcript_annotation.apply(lambda x: all_exons[x["exon_number"] - 1].start if (x["type"] != "transcript") else start, axis = 1)
+        transcript_annotation["plot_stop"] = transcript_annotation.apply(lambda x: all_exons[x["exon_number"] - 1].stop if (x["type"] != "transcript") else stop, axis = 1)
+        annotation[annotation['transcript_id'] == tcons_curr] = transcript_annotation
+
+    # Plot Transcript
+    longest = max(annotation.loc[annotation['plot_start'].idxmax()]["plot_start"],annotation.loc[annotation['plot_stop'].idxmax()]["plot_stop"])
+    for tcons_curr in transcripts_plot:
+        transcript_color = colors[curr_row_panel - start_row_panel]
+        ax = fig.add_subplot(gs[curr_row_panel, :])
+        ax.set_xlim((-10, longest+1))
+        ax.set_ylim((0, 1.5))
+        ax.text(0,1,tcons_curr,horizontalalignment = "left",verticalalignment = "top", fontsize = 14, color = "black")
+
+        transcript_annotation = annotation[annotation['transcript_id'] == tcons_curr]
+
+        transcript_info = transcript_annotation.iloc[0]
+        plt.plot([transcript_info["plot_start"],transcript_info["plot_stop"]],[0.5,0.5],color=transcript_color,linewidth=2)
+
+        transcript_annotation = transcript_annotation.iloc[1:]
+        if (transcript_annotation.iloc[0]["strand"] == "-"):
+            transcript_annotation = transcript_annotation.iloc[::-1]
+        prev_intron_start = -1
+        for idx,row in transcript_annotation.iterrows():
+            exon_start,exon_stop = row['plot_start'],row['plot_stop']
+            exon = alph[int(row["exon_number"]) - 1]
+            plt.plot([exon_start,exon_stop],[0.5,0.5],color = transcript_color,linewidth = 15)
+            ax.annotate(exon,(exon_start/2 + exon_stop/2,0.49), fontsize = 14, color = "white", weight='bold', ha = "center", va = "center")
+
+            if (prev_intron_start > 0 and exon_start - prev_intron_start >= 200):
+                intron_shortening = [exon_distances[group - 1][1]/2 + exon_distances[group][0]/2 for group in range(len(exon_distances)) if (prev_intron_start <= exon_distances[group][0] and exon_start >= exon_distances[group][0])]
+                for intron in intron_shortening:
+                    plt.plot([intron - 10,intron + 10],[0.5,0.5],color = "white",linewidth = 3)
+                    plt.plot([intron - 20,intron - 10],[0.3,0.7],color = transcript_color,linewidth = 2)
+                    plt.plot([intron + 10,intron + 20],[0.3,0.7],color = transcript_color,linewidth = 2)
+            prev_intron_start = exon_stop
+
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        ax.spines['left'].set_visible(False)
+        ax.spines['bottom'].set_visible(False)
+        ax.set_yticks([], [])
+        ax.set_xticks([], [])
+
+        curr_row_panel += 1
+
+def plotExonLines(protein_exon_start,protein_exon_end,bottom,buffer_exon,exon_draw):
+    for feature_start,feature_stop in exon_draw:
+        plt.plot((protein_exon_start - 1,protein_exon_start - 1),(feature_start,feature_stop),color = "grey", linewidth = 2)
+        plt.plot((protein_exon_end  - 1,protein_exon_end - 1),(feature_start,feature_stop),color = "grey", linewidth = 2)
+    plt.plot((protein_exon_start - 1,protein_exon_start - 1),(bottom,bottom + buffer_exon),color = "grey", linewidth = 2)
+    plt.plot((protein_exon_end - 1,protein_exon_end - 1),(bottom,bottom + buffer_exon),color = "grey", linewidth = 2)
+
+def plotSequenceAnalysis(transcripts_plot, colors, longest_length_protein, fig, gs, start_row_panel, annotation = None):
+    curr_row_panel = start_row_panel
+
+    for isoform in range(len(transcripts_plot)):
+        tcons_curr = transcripts[transcripts_plot[isoform]]
+
+        transcript_color = colors[isoform]
+
+        buffer_prion = 1.0
+        buffer_idr = 1.0
+        buffer_domain = 0.4
+        buffer_exon = 0.25
+        buffer_aa = 0.3
+        buffer_ss = 0.25
+        buffer_phos = 0.075
+        buffer_den = 0.5
+
+        lg = fig.add_subplot(gs[curr_row_panel:curr_row_panel+5, 0])
+        lg.set_xlim((-0.11,0.11))
+
+        ax = fig.add_subplot(gs[curr_row_panel:curr_row_panel+5, 1:])
+        ax.set_xlim((-0.5,longest_length_protein + 1))
+        ax.patch.set_facecolor(transcript_color)
+        ax.patch.set_alpha(0.15)
+
+        lg.set_xlabel('AA Position', fontsize = 14)
+        plt.plot((0,len(tcons_curr.aa)), (0,0),color = "black")
+        bottom = 0
+        exon_draw = []
+
+        # Prion score
+        if (snakemake.config["prion"]):
+            plc_df = pd.DataFrame(tcons_curr.plc[1:], columns = tcons_curr.plc[0])
+            plc_df = plc_df.astype({'AANUM': 'int32',"HMM.PrD-like":"float"})
+            plc_df["AANUM"] = plc_df["AANUM"] - 0.5
+            plt.plot(plc_df["AANUM"], bottom + plc_df["HMM.PrD-like"], color = transcript_color)
+            lg.annotate("Prion-Like", (0,bottom + buffer_prion/2), fontsize = 14, color = "black", ha = "center", va = "center")
+            plt.plot((0,len(tcons_curr.aa)), (bottom + 1,bottom + 1),color = "black")
+            bottom += 1.05
+            exon_draw.append([bottom - 1.05,bottom])
+
+        # IDR region
+        if (snakemake.config["iupred2a"]):
+            idr_df = pd.DataFrame(tcons_curr.idr[1:], columns = tcons_curr.idr[0])
+            idr_df = idr_df.astype({'# POS': 'int32',"IUPRED2":"float"})
+            idr_df["# POS"] = idr_df["# POS"] - 0.5
+            plt.plot(idr_df["# POS"], bottom + idr_df["IUPRED2"], color = transcript_color)
+            lg.annotate("Disordered",(0,bottom + buffer_idr * 0.65), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            lg.annotate("Region",(0,bottom + buffer_idr * 0.35), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            lg.plot((-0.1,0.1), (bottom + 1,bottom + 1),color = "black")
+            bottom += 1.05
+            exon_draw.append([bottom - 1.05,bottom])
+
+        # Protein domain
+        r = fig.canvas.get_renderer()
+        if (snakemake.config["pfam"]):
+            for i in tcons_curr.dom:
+                start,stop,name = (int(i[0]) - 1),(int(i[1]) - 1),i[2].strip()
+                name = name.replace(" domain","")
+                plt.gca().add_patch(Rectangle((start, bottom), stop - start, buffer_domain, edgecolor = transcript_color, facecolor = transcript_color, alpha = 0.5))
+                text = ax.annotate(name, (start/2.0 + stop/2.0, bottom + buffer_domain/2), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+                bbtext =  Bbox(ax.transData.inverted().transform(text.get_window_extent(renderer = r)))
+                if (bbtext.x0 < start and bbtext.x1 > stop):
+                    text.set_visible(False)
+                    name_split = name.split(" ")
+                    name_top = " ".join(name_split[:int((len(name_split)/2))])
+                    name_bottom = " ".join(name_split[int((len(name_split)/2)):])
+                    ax.annotate(name_top, (start/2.0 + stop/2.0, bottom + buffer_domain * 3/4), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+                    ax.annotate(name_bottom, (start/2.0 + stop/2.0, bottom + buffer_domain * 1/4), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+            lg.annotate("Domain", (0, bottom + buffer_domain/2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.5
+
+        # Secondary structure prediction
+        if (snakemake.config["porter"]):
+            ss3_df = pd.DataFrame(tcons_curr.ss3[1:], columns = tcons_curr.ss3[0])
+            ss = list(ss3_df["SS"])
+            for i in range(len(tcons_curr.aa)):
+                plt.gca().add_patch(Rectangle((i,bottom),1,buffer_ss,edgecolor=colors_ss3[ss3_abbvs.index(ss[i])],facecolor=colors_ss3[ss3_abbvs.index(ss[i])]))
+            lg.annotate("Structure",(0,bottom + buffer_ss/2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.4
+
+        # Amino acid sequence
+        if (snakemake.config["aa"]):
+            for i in range(len(tcons_curr.aa)):
+                plt.gca().add_patch(Rectangle((i,bottom),1,buffer_aa, edgecolor=colors_aa[aa_groups.index(aa_abbvs[tcons_curr.aa[i]])],facecolor=colors_aa[aa_groups.index(aa_abbvs[tcons_curr.aa[i]])]))
+            lg.annotate("Amino Acid",(0,bottom + buffer_aa/2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.4
+
+        # Phosphorlyation Site
+        if (snakemake.config["pfScan"]):
+            row_phos = 0
+            sites_focus = ["AMIDATION","MYRISTYL","GLYCOSYLATION","HYDROXYL","PHOSPHO_SITE"]
+
+            plt.plot([0,len(tcons_curr.aa)],[bottom + 0.1,bottom + 0.1], linewidth = 4, color = "#919191")
+            for site_type in sites_focus:
+                if (site_type in tcons_curr.pss):
+                    for i in tcons_curr.pss[site_type]:
+                        start,stop = (int(i[0]) - 1),(int(i[1]) - 1)
+                        mid = start/2 + stop/2
+                        radius_x = longest_length_protein/150 + 1
+                        radius_y = -longest_length_protein/8000 + 0.22
+                        patches = [Rectangle((mid - 0.5,bottom + 0.1),1,0.2,facecolor=colors_ptm[row_phos])]
+                        patches.append(Ellipse((mid,bottom + 0.3),radius_x,radius_y,facecolor=colors_ptm[row_phos]))
+                        p = PatchCollection(patches)
+                        p.set_color(colors_ptm[row_phos])
+                        ax.add_collection(p)
+                row_phos += 1
+
+            lg.annotate("PTM",(0,bottom + row_phos * buffer_phos/2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.45
+
+            # Phosphorlyation Site Density
+            density = [0.0] * len(tcons_curr.aa)
+            for site_type in tcons_curr.pss:
+                for i in tcons_curr.pss[site_type]:
+                    start,stop = (int(i[0]) - 1),(int(i[1]) - 1)
+                    hamming_values = list(np.hamming(stop - start + 1 + 10))
+                    for j in range(max(0,start - 5),min(stop + 1 + 5,len(tcons_curr.aa))):
+                        density[j] += hamming_values[j - max(0,start - 5)]
+            density = pd.DataFrame(density)
+            density = (density/density.max()) * 0.5
+            density_x = [i + 0.5 for i in range(len(tcons_curr.aa))]
+            plt.plot(density_x, density + bottom, color =  transcript_color)
+            lg.annotate("PTM Density",(0,bottom + buffer_den/2), fontsize = 13, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.55
+            exon_draw.append([bottom - 0.6,bottom])
+
+        # Exons
+        if (snakemake.config["annotation"]):
+            transcript_annotation = annotation[annotation['transcript_id'] == transcripts_plot[isoform]]
+            transcript_stats = transcript_stats_df[transcript_stats_df["transcript_id"] == transcripts_plot[isoform]]
+            translate = False
+            protein_start,protein_end = int(transcript_stats["start"]),int(transcript_stats["end"])
+            transcript_exon_start = 0
+            transcript_exon_end = -1
+            protein_exon_start = 0
+            protein_exon_end = 0
+            if (transcript_annotation.shape[0] > 0):
+                if (transcript_annotation.iloc[0]["strand"] == "-"):
+                    transcript_annotation = transcript_annotation.iloc[::-1]
+                for idx,row in transcript_annotation.iterrows():
+                    if (row['type'] != 'transcript'):
+                        start = row['plot_start']
+                        stop = row['plot_stop']
+                        transcript_exon_start = transcript_exon_end + 1
+                        transcript_exon_end += abs(start - stop) + 1
+                        exon = alph[int(row["exon_number"]) - 1]
+
+                        if ((transcript_exon_start <= protein_end) and (protein_end <= transcript_exon_end)):
+                            translate = False
+                            protein_exon_start = protein_exon_end
+                            protein_exon_end = (protein_end - protein_start)/3
+                            plotExonLines(protein_exon_start,protein_exon_end,bottom,buffer_exon,exon_draw)
+                            ax.annotate(exon,(protein_exon_start/2 + protein_exon_end/2 - 1,bottom + buffer_exon/2), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+                            break
+
+                        if (translate):
+                            protein_exon_start = protein_exon_end
+                            protein_exon_end += abs(start - stop)/3
+                            plotExonLines(protein_exon_start,protein_exon_end,bottom,buffer_exon,exon_draw)
+                            ax.annotate(exon,(protein_exon_start/2 + protein_exon_end/2 - 1,bottom + buffer_exon/2), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+                            protein_exon_start = protein_exon_end
+
+                        if ((transcript_exon_start <= protein_start) and (protein_start <= transcript_exon_end)):
+                            translate = True
+                            protein_exon_start = 0
+                            protein_exon_end = abs(transcript_exon_end - protein_start)/3
+                            plotExonLines(protein_exon_start,protein_exon_end,bottom,buffer_exon,exon_draw)
+                            ax.annotate(exon,(protein_exon_start/2 + protein_exon_end/2 - 1,bottom + buffer_exon/2), fontsize = 14, color = "black", weight='bold', ha = "center", va = "center")
+            lg.annotate("Exon",(0,bottom + buffer_exon/2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+            bottom += 0.25
+
+        plt.gca().add_patch(Rectangle((len(tcons_curr.aa), 0), longest_length_protein - len(tcons_curr.aa) + 1, bottom, edgecolor = "white", facecolor = "white"))
+
+        plt.plot((0,len(tcons_curr.aa)), (bottom,bottom),color = "black")
+        ax.set_title(tcons_curr.tcons + str("***" if tcons_curr.ns_decay else ""), loc = "left", fontsize = 14)
+        # Finish Formatting
+        lg.set_ylim((0, bottom))
+        lg.spines['top'].set_visible(False)
+        lg.spines['right'].set_visible(False)
+        lg.spines['left'].set_visible(False)
+        #lg.spines['bottom'].set_visible(False)
+        lg.set_xticks([], [])
+        lg.set_yticks([], [])
+
+        ax.set_ylim((0, bottom))
+        plt.plot((0,0),(0,bottom),color = "black")
+        plt.plot((len(tcons_curr.aa),len(tcons_curr.aa)),(0,bottom),color = "black")
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        ax.spines['left'].set_visible(False)
+        #ax.spines['bottom'].set_visible(False)
+        ax.set_yticks([], [])
+        curr_row_panel += total_features
+
+def plotLegend(plt,fig,gs,total_gs):
+    ax = fig.add_subplot(gs[total_gs-2:total_gs, 0:5])
+    ax.set_xlim((0, 13))
+    ax.set_ylim((0, 2))
+    mods_abbvs = ["Amidation","Myristoylation","Glycoslyation","Hydroxylation","Phosphorlyation"]
+    for i in range(0,len(mods_abbvs)):
+        plt.gca().add_patch(Rectangle((3,float(i*2)/len(mods_abbvs)),3,1.0/len(mods_abbvs),edgecolor=colors_ptm[i],facecolor=colors_ptm[i]))
+        ax.text(7,float(i*2)/len(mods_abbvs) + 0.2,mods_abbvs[i],horizontalalignment = "left",verticalalignment = "top")
+    ax.annotate("Post-translation Modification (PTM)",(6.75,2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+    ax.axis("off")
+
+    ax = fig.add_subplot(gs[total_gs-2:total_gs, 5:10])
+    ax.set_xlim((0, 13))
+    ax.set_ylim((0, 2))
+    ss_abbvs = ["Alpha Helix","Beta Sheet","Other"]
+    for i in range(0,len(ss_abbvs)):
+        if (ss_abbvs[i] == "Other"):
+            plt.gca().add_patch(Rectangle((3,float(i*2)/5),3,1.0/5,edgecolor="black",facecolor=colors_ss3[i]))
+        else:
+            plt.gca().add_patch(Rectangle((3,float(i*2)/5),3,1.0/5,edgecolor=colors_ss3[i],facecolor=colors_ss3[i]))
+        ax.text(7,float(i*2)/5 + 0.25,ss_abbvs[i],horizontalalignment = "left",verticalalignment = "top")
+    ax.annotate("Secondary Structure",(6.75,2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+    ax.axis("off")
+
+    ax = fig.add_subplot(gs[total_gs-2:total_gs, 10:15])
+    ax.set_xlim((0, 13))
+    ax.set_ylim((0, 2))
+    aa_abbvs = ["Positively Charged","Negatively Charged","Polar","Special","Hydrophobic"]
+    for i in range(0,len(aa_abbvs)):
+        if (aa_abbvs[i] == "Hydrophobic"):
+            plt.gca().add_patch(Rectangle((3,float(i*2)/len(aa_abbvs)),3,1.0/len(aa_abbvs),edgecolor="black",facecolor=colors_aa[i]))
+        else:
+            plt.gca().add_patch(Rectangle((3,float(i*2)/len(aa_abbvs)),3,1.0/len(aa_abbvs),edgecolor=colors_aa[i],facecolor=colors_aa[i]))
+        ax.text(7,float(i*2)/len(aa_abbvs) + 0.2,aa_abbvs[i],horizontalalignment = "left",verticalalignment = "top")
+    ax.annotate("Amino Acid",(6.75,2), fontsize = 14, color = "black", ha = "center", va = "center", weight='bold')
+    ax.axis("off")
+
+######## Actual Code
+
+gene_functional_analysis_file = open(snakemake.input[0], "r")
+lines = gene_functional_analysis_file.readlines()
+gene_functional_analysis_file.close()
+transcript_stats_df = pd.read_csv(snakemake.input[1])
+transcript_stats_df["transcript_id"] = transcript_stats_df["transcript_id"].apply(lambda x: x.replace("_",""))
+
+transcripts = dict()
+analysis = {"aas":False,"idr":False,"dom":False,"ss3":False,"plc":False,"pss":False}
+aa_lines = ""
+
+for line in lines:
+    if (line.startswith(">")):
+        if (len(aa_lines) > 0):
+            transcripts[tcons] = transcript(tcons, ns_decay, aa_lines, idr_lines, dom_lines, ss3_lines, plc_lines, pss_lines)
+        tcons = line.strip()[1:].replace("_","")
+        aa_lines = []
+        idr_lines = []
+        dom_lines = []
+        ss3_lines = []
+        plc_lines = []
+        ns_decay = False
+        pss_lines = {"PHOSPHO_SITE":[],"GLYCOSYLATION":[],"MYRISTYL":[],"HYDROXYL":[],"AMIDATION":[],"OTHER":[]}
+        analysis = {"aas":False,"idr":False,"dom":False,"ss3":False,"plc":False,"pss":False}
+
+    elif(line.startswith("#####Prion Analysis")):
+        analysis["plc"] = True
+    elif(line.startswith("#####Prosite Analysis")):
+        analysis["pss"] = True
+    elif(line.startswith("#####SS Analysis")):
+        analysis["ss3"] = True
+    elif(line.startswith("#####Domain Analysis")):
+        analysis["dom"] = True
+    elif(line.startswith("#####IUPred2A Analysis")):
+        analysis["idr"] = True
+    elif(line.startswith("#####AA Sequence")):
+        analysis["aas"] = True
+
+    elif(analysis["plc"]):
+        plc_lines.append(line.strip().split("\t"))
+    elif(analysis["pss"]):
+        pss_id = line.strip().split("\t")[2]
+        if ("GLYCOSYLATION" in pss_id): pss_lines["GLYCOSYLATION"].append(line.strip().split("\t"))
+        elif ("PHOSPHO_SITE" in pss_id): pss_lines["PHOSPHO_SITE"].append(line.strip().split("\t"))
+        elif ("MYRISTYL" in pss_id): pss_lines["MYRISTYL"].append(line.strip().split("\t"))
+        elif ("HYDROXYL" in pss_id): pss_lines["HYDROXYL"].append(line.strip().split("\t"))
+        elif ("AMIDATION" in pss_id): pss_lines["AMIDATION"].append(line.strip().split("\t"))
+        else: pss_lines["OTHER"].append(line.strip().split("\t"))
+    elif(analysis["ss3"]):
+        ss3_lines.append(line.strip().split("\t"))
+    elif(analysis["dom"]):
+        dom_lines.append(line.strip().split("\t"))
+    elif(analysis["idr"]):
+        idr_lines.append(line.strip().split("\t"))
+    elif(analysis["aas"]):
+        aa_lines = line.strip()
+        if (aa_lines[-1] == "*"):
+            ns_decay = True
+            aa_lines = aa_lines[:-1]
+
+transcripts[tcons] = transcript(tcons, ns_decay, aa_lines, idr_lines, dom_lines, ss3_lines, plc_lines, pss_lines)
+total_features = 0
+for feature in analysis:
+    if analysis[feature]:
+        total_features += 1
+total_features += 1
+
+longest_length = 0
+longest_tcon = ""
+for ids in transcripts:
+    if (len(transcripts[ids].aa) > longest_length):
+        longest_length = len(transcripts[ids].aa)
+        longest_tcon = ids
+
+counts_data_filename = snakemake.input[2] if snakemake.config["quantification"] else None
+annotation_filename = snakemake.input[3] if snakemake.config["annotation"] else None
+args = Parser(counts_data_filename,annotation_filename,snakemake.config["samples"])
+continuous = snakemake.config["continuous"]
+minimumTPM = snakemake.config["minIsoTPM"]
+maximumIso = snakemake.config["maxIsoNum"]
+minimumPct = snakemake.config["minIsoPct"]
+(annotation, data, samples, conditions, number_replicates) = preprocessArguments(args)
+
+
+total_gs = 1
+transcripts_plot = sorted(list(transcripts.keys()))
+if (snakemake.config["quantification"]):
+    df_temp = data[data["gene_name"] == gene]
+    data_gene = df_temp[samples].sum().to_frame().transpose()
+    data_gene = calculateStatistics(data_gene,conditions,number_replicates)
+    df_temp = calculateStatistics(df_temp,conditions,number_replicates)
+    df_temp = (df_temp.filter(like='mean').div(data_gene.filter(like='mean').values[0],1)*100).add_prefix('Pct_').join(df_temp)
+    df_temp = chooseIsoforms2Plot(df_temp,minimumTPM,minimumPct,maximumIso)
+    df_temp["transcript_id"] = df_temp["transcript_id"].apply(lambda x: x.replace("_",""))
+    transcripts_plot = list(pd.unique(df_temp["transcript_id"]))
+    total_gs += 4
+
+if (snakemake.config["annotation"]):
+    transcripts_delete = []
+    for transcript_id in transcripts_plot:
+        transcript_annotation = annotation[annotation['transcript_id']==transcript_id]
+        if ((transcript_annotation.shape[0] < 3)):
+            transcripts_delete.append(transcript_id.replace("_",""))
+    for transcript_id in transcripts_delete:
+        transcripts_plot.remove(transcript_id)
+    total_gs += len(transcripts_plot) + 1
+total_gs += total_features * len(transcripts_plot) + 2
+fig = plt.figure(figsize = (21,total_gs))
+gs = fig.add_gridspec(total_gs, 15)
+
+fig.tight_layout(pad = 5.0)
+fig.subplots_adjust(top = 0.95,bottom = 0.05,left = 0.05, right = 0.95)
+fig.suptitle(gene,fontsize=30)
+
+total_gs = 1
+if (snakemake.config["quantification"]):
+    #plot total expression
+    axg = fig.add_subplot(gs[0:4, 0:4])
+    plotTotal(conditions, data_gene, axg, colors, number_replicates, continuous)
+    total_gs += 4
+
+if (len(transcripts_plot) > 0):
+    if (snakemake.config["quantification"]):
+        df_temp = df_temp[df_temp["transcript_id"].isin(transcripts_plot)]
+
+        #plot isoform expression
+        axg = fig.add_subplot(gs[0:4, 5:10])
+        plotIsoformProfiles(conditions, df_temp, axg, colors, continuous)
+
+        #plot isoform expression percentage
+        axg = fig.add_subplot(gs[0:4, 11:15])
+        plotIsoformStacked(conditions, df_temp, axg, colors, continuous)
+
+    if (snakemake.config["annotation"]):
+        #prepare annotation
+        annotation_cut = prepareAnnotation(annotation,transcripts_plot)
+
+        #plot annotation
+        plotAnnotation(annotation_cut, transcripts_plot, colors, longest_length, fig, gs, total_gs)
+        total_gs += len(transcripts_plot) + 1
+
+        plotSequenceAnalysis(transcripts_plot, colors, longest_length, fig, gs, total_gs, annotation_cut)
+    else:
+        plotSequenceAnalysis(transcripts_plot, colors, longest_length, fig, gs, total_gs)
+
+    total_gs += total_features * len(transcripts_plot) + 2
+    plotLegend(plt,fig,gs,total_gs)
+
+plt.subplots_adjust(wspace=0.05, hspace=0.05)
+fig.savefig(snakemake.output[0])
diff --git a/scripts/visualization_helpers.py b/scripts/visualization_helpers.py
new file mode 100644
index 0000000..d496493
--- /dev/null
+++ b/scripts/visualization_helpers.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+
+import string
+import pandas as pd
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+
+# change font
+matplotlib.rcParams['font.sans-serif'] = "Arial"
+matplotlib.rcParams['font.family'] = "sans-serif"
+plt.rc('font', size=14)
+
+alph = string.ascii_lowercase
+alph += "0123456789!@#$%^&*?;'{}-=+,.~_[]:<>`qwertyuiopasdfghjklzxcvbnm1234567890qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnm"
+
+class transcript:
+    def __init__(self, tcons, ns_decay, aa, idr = [], dom = [], ss3 = [], plc = [], pss = []):
+        self.tcons = tcons
+        self.ns_decay = ns_decay
+        self.aa = aa
+        self.idr = idr
+        self.dom = dom
+        self.ss3 = ss3
+        self.plc = plc
+        self.pss = pss
+
+class Exon:
+    def __init__(self, tcons, old_id, start, stop, new_id = ""):
+        self.tcons = tcons
+        self.old_id = old_id
+        self.new_id = new_id
+        self.start = start
+        self.stop = stop
+
+    def __lt__(self, other):
+         return self.start < other.start
+
+    def add_new_id(self, id):
+        self.new_id = id
+
+    def __str__(self):
+        return str(self.new_id) + " " + str(self.start) + " " + str(self.stop)
+
+class Parser(object):
+    def __init__(self, csv, gtf, samples):
+        self.csv = csv
+        self.gtf = gtf
+        self.samples = samples
+
+def preprocessArguments(args):
+    if (args.gtf == None):
+        annotation = None
+    else:
+        annotation = pd.read_csv(args.gtf, delimiter='\t', header=None, usecols=[0,2,3,4,6,8],names=['chrm','type','start','stop','strand','more'])
+        annotation['transcript_id'] = annotation.apply(lambda x: x['more'].split('transcript_id "')[1].split('"')[0],1)
+        annotation["transcript_id"] = annotation["transcript_id"].apply(lambda x: x.replace("_",""))
+        annotation["exon_number"] = annotation.apply(lambda x: x["more"].split('exon_number "')[1].split('"')[0] if "exon_number" in x["more"] else 0,1)
+        annotation = annotation.drop(columns='more')
+
+    if (args.csv == None):
+        data = None
+    else:
+        data = pd.read_csv(args.csv)
+
+    samples = [sample for sample in args.samples]
+    conditions = sorted(list(set([sample.split("_")[0] for sample in samples])))
+    number_replicates={}
+    for cond in conditions:
+        number_replicates[cond]  = len([sample for sample in samples if sample.startswith(cond)])
+    return annotation, data, samples, conditions, number_replicates
+
+def calculateStatistics(df,conds,nreps):
+    for cond in conds:
+        df['mean'+cond]=df.filter(like=cond+'_').mean(1)
+        df['stdn'+cond]=df.filter(like=cond+'_').std(1)/np.sqrt(nreps[cond])
+    df=df.sort_index()
+    return df
+
+def chooseIsoforms2Plot(df,minTPM,minPct,maxIso):
+    df['minimum']=df.filter(regex='^mean').min(axis=1)
+    df['maximumPct']=df.filter(regex='^Pct').max(axis=1)
+    df=df[df['maximumPct']>minPct]
+    df['maximum']=df.filter(regex='^mean').max(axis=1)
+    df=df[df['maximum']>minTPM]
+    df['avg'] = df.filter(regex='^Pct').mean(axis=1)
+    df=df.sort_values('avg',ascending=False)
+    df=df.head(maxIso)
+    return df
+
+def plotTotal(conditions, df_gene, ax, colors, number_replicates, continuous):
+    conditions_x = [i + 1 for i in range(len(conditions))]
+    if (continuous):
+        plt.errorbar(conditions_x, df_gene.filter(like = 'mean').iloc[0], yerr = df_gene.filter(like='stdn').iloc[0], color = '#FA3D36', linewidth = 3, label = "Total Expression", markersize=6)
+    else:
+        x = []
+        boxes = []
+        count = 0
+        for i in range(len(conditions)):
+            x += [i + 1] * number_replicates[conditions[i]]
+            boxes.append(df_gene.filter(like = '_').values[0][count:count+number_replicates[conditions[i]]])
+            count += number_replicates[conditions[i]]
+        bp = plt.boxplot(boxes, patch_artist=True, showfliers=False, zorder=1)
+        for patch in bp['boxes']:
+            patch.set_facecolor("#F7F7F7")
+        for median in bp['medians']:
+            median.set(color ='black',linewidth = 1)
+
+        plt.scatter(x + np.random.normal(0, 0.075, len(x)), df_gene.filter(like = '_'), color = "black", marker = '.', s = 200, alpha = 0.5, linewidths = 0, zorder=2)
+
+    ax.set_ylim(bottom = -0.5)
+    ax.set_xticks(conditions_x)
+    ax.set_xticklabels(conditions)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    #ax.spines['left'].set_bounds(0,ax.get_yticks()[-2])
+    ax.spines['bottom'].set_bounds(min(conditions_x),max(conditions_x))
+    ax.tick_params(axis = "both", which = "major", labelsize = 14)
+    plt.xlabel("Condition")
+    plt.ylabel("Expression [TPM]")
+    #plt.legend(loc = "upper center", bbox_to_anchor=(0.5,1), frameon=False)
+    ax.set_title("Total Gene Expression", loc = "center", fontsize = 14, weight='bold')
+
+def plotIsoformProfiles(conditions, df, ax, colors, continuous):
+    x = [i + 1 for i in range(len(conditions))]
+    last_bottom = [0] * len(conditions)
+    for j in range(df.shape[0]):
+        row = df.iloc[j]
+        if (continuous):
+            plt.errorbar(x + np.random.normal(0, 0.03, len(x)), row.filter(regex='^mean'),yerr=row.filter(like='stdn'),color=colors[j],linewidth=2, label = "")
+        else:
+            mean = row.filter(regex = '^mean')
+            err = row.filter(like='stdn')
+            jitter = 0.5 * (j + 1)/(df.shape[0] + 1) - 0.25
+            plt.bar(x, mean, width = 0.5, bottom = last_bottom, color = colors[j])
+            for i in x:
+                plt.plot([i + jitter,i + jitter],[last_bottom[i - 1] + mean.iloc[i - 1] - err.iloc[i - 1],last_bottom[i - 1] + mean.iloc[i - 1] + err.iloc[i - 1]],linewidth = 2,color = "black")
+            last_bottom += mean
+
+    ax.set_ylim(bottom=-0.5)
+    ax.set_xticks(x)
+    ax.set_xticklabels(conditions)
+    ax.tick_params(axis = "both", which = "major", labelsize = 14)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    #ax.spines['left'].set_bounds(0,ax.get_yticks()[-2])
+    ax.spines['bottom'].set_bounds(min(x),max(x))
+    plt.xlabel("Condition", fontsize = 14)
+    plt.ylabel("Expression [TPM]", fontsize = 14)
+    plt.legend(loc = "upper center", bbox_to_anchor=(0.5,1), frameon=False)
+    ax.set_title("Individual Isoform Expression", loc = "center", fontsize = 14, weight='bold')
+
+def plotIsoformStacked(conditions, df, ax, colors, continuous):
+    x = [i + 1 for i in range(len(conditions))]
+    if (continuous):
+        plt.stackplot(x,df.filter(regex='^Pct').values,colors=colors[:df.shape[0]])
+    else:
+        x = [i + 1 for i in range(len(conditions))]
+        last_bottom = [0] * len(conditions)
+        for j in range(df.shape[0]):
+            row = df.iloc[j]
+            pct = row.filter(regex = '^Pct')
+            #err = row.filter(like='stdn')
+            err = np.divide(np.multiply(row.filter(like='stdn').values,row.filter(regex = '^Pct').values),row.filter(regex = '^mean').values)
+            jitter = (j - df.shape[0]//2)/10
+            plt.bar(x, pct, width = 0.5, bottom = last_bottom, color=colors[j])
+            #for i in x:
+            #    plt.plot([i + jitter,i + jitter],[last_bottom[i - 1] + pct.iloc[i - 1] - err[i - 1],last_bottom[i - 1] + pct.iloc[i - 1] + err[i - 1]],linewidth = 2,color = "black")
+            last_bottom += pct
+
+    ax.set_xticks(x)
+    ax.set_xticklabels(conditions)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['left'].set_bounds(0,100)
+    ax.spines['bottom'].set_bounds(min(x),max(x))
+    ax.axis(top = [-0.5,100])
+    ax.set_yticks([i for i in range(0,101,20)])
+    ax.tick_params(axis = "both", which = "major", labelsize = 14)
+    plt.xlabel("Condition")
+    plt.ylabel("Expression [%]")
+    ax.set_title("Individual Isoform Usage", loc = "center", fontsize = 14, weight='bold')
+
+def prepareAnnotation(annotation,transcripts_plot):
+    cut = annotation[annotation['transcript_id'].isin(transcripts_plot)]
+    strand = cut.iloc[0]['strand']
+    if (strand == '+'):
+        start = cut['start'].min()
+        cut['plot_start'] = cut['start'] - start
+        cut['plot_stop'] = cut['stop'] - start
+    else:
+        start = cut['stop'].max()
+        cut['plot_start'] = (cut['start'] - start) * (-1)
+        cut['plot_stop'] = (cut['stop'] - start) * (-1)
+    return cut