from Modules.Ensembl.ActivityTable import ActivityTable from Modules.Ensembl.FTPHandling.VersionChecker import EnsemblRegulationFTPRetriever as FTPRetriever from Modules.Ensembl.ActivityCategorizer import ActivityCategorizer from Modules.Ensembl.GTFGen import GTFGen class Ensembl: """ Main class for handling Ensembl Regulatory data Checks for local files and downloads if files are missing @author: Sebastian Beyvers @contact: sebastian.beyvers@med.uni-giessen.de """ def __init__(self, organism, wd, data_dir): # Constructor and main method for Ensembl-GTF-Creation # input_parameter: organism = input organism # wd = working directory # data_dir = use data_dir parameter if specified. print("Starting Ensembl") # Check and Update for Local Ensembl Release Data self.updater = FTPRetriever(organism, wd, data_dir) self.release = self.updater.get_release() # Check for Activitytables (table.bin binary files) and generate if not existing self.acttable = ActivityTable(organism, self.release, wd, data_dir) self.acttable.check_and_generate_activity_table() # Categorize the Activitytable by config defined categories (config: ./config/celltypes_organism.json) self.categorizer = ActivityCategorizer(self.release, organism, wd, data_dir) print("Generating GTF") # Instatiate self.gtf_generator = GTFGen(organism, self.release, wd, data_dir) print("Ensembl Finished !") def get_gtf(self): # Getter Method for resulting GTF-Entries as List. # return_value: list of gtf entries. return self.gtf_generator.get_gtf(self.release, self.categorizer.get_categorization()) #e = Ensembl("homo_sapiens") #print(len(e.categorizer.categorization))