import os
import gzip
import csv


class GTFGen:


    """

    Class to generate Ensembl GTF-data with activity
    """

    def __init__(self, organism, release, wd, data_dir):

        self.gff_lines = self.get_organism_as_gff(organism, release, wd, data_dir)
        self.value_map = {0: "ACTIVE", 1: "POISED", 2: "REPRESSED", 3: "INACTIVE", 4: "NA"}

    def get_organism_as_gff(self, organism, release, wd, data_dir):

        # reads the original gff file for organism
        if data_dir:
            directory = os.path.join(data_dir + "/EnsemblData/", release, organism)
        else:
            directory = os.path.join(wd + "/data/EnsemblData/", release, organism)
        inputfile = ""
        for file in os.listdir(directory):
            if file.endswith("gff.gz"):
                inputfile = os.path.join(directory, file)

        with gzip.open(inputfile) as original_file:
            return original_file.readlines()

    def reformat_to_gff(self, activity, release):

        # Reformats gff to gtf and appends activity-data for config specified celltype-categories

        gtf_return = []

        for index, line in enumerate(self.gff_lines):

            decoded_line = line.decode("UTF-8")
            # Generate a templist
            templist = []
            # Split Line by Tab
            splitted = decoded_line.split("\t")
            # Split Last Field by ";"
            splitted_additional = splitted[-1].strip().split(";")
            # Add Chromosome Name Format = chr+Name
            templist.append("chr"+splitted[0])
            # Add RegBuild_ + release
            templist.append("RegBuild_"+release)
            # Add Description from Description in last ; separated segment
            templist.append(splitted_additional[4].split("=")[1].lower())
            # Add Start / End Data from original
            templist.extend(splitted[3:5])
            # Add Score, Strand and Frame Data
            templist.extend([".", "+", "."])
            # Add "additional" information

            templist.append(self.generate_additional_information(splitted_additional[0],
                            self.generate_activity_list(activity, index)))

            gtf_return.append(templist)

        return gtf_return

    @staticmethod
    def generate_additional_information(gene_id, activity):

        if gene_id.startswith("ID=regulatory_region:"):
            gene_id = 'ID "'+gene_id.split(':')[1]+'"'

        activity_string = 'activity "'+', '.join(activity)+'"'

        # helper method to concat activity information to string
        return gene_id+'; '+activity_string

    def generate_activity_list(self, activity, index):
        # generates activity list
        activity_list = []
        for key, value in activity.items():
            activity_list.append(key+">"+self.value_map[value[index]])
        return activity_list

    def get_gtf(self, release, activity):
        # returns the resulting gtf-formatted-list
        return self.reformat_to_gff(activity, release)