import json import os class ActivityCategorizer: def __init__(self, release, organism, wd, data_dir): # List of all Folders with Activity Tables self.folderlist = [] # Dictionary from celltypes_organism.json mit key = Kategorie und Value = [Ordner] self.c_dict = self.read_config(organism, wd) # Activity table from all files as dict self.activity = {} self.get_activity_data(release, organism, wd, data_dir) # Categorized Activity from Json-config print("Categorization: This may take a while") self.categorization = self.generate_categorized_activity() print("Categorization finished !") def get_categorization(self): return self.categorization def read_config(self, organism, wd): c_dict = {} path_to_config = os.path.join(wd +"/config/celltypes_"+organism+".json") with open(path_to_config) as input_file: data = json.loads(input_file.read()) for x in data: c_dict[x["type"]] = x["alias_ensembl"] self.folderlist.extend(x["alias_ensembl"]) return c_dict def get_activity_data(self, release, organism, wd, data_dir): for folder in self.folderlist: # Generate path to binary File if data_dir: file = os.path.join(data_dir + "/EnsemblData", release, organism, "activity", folder, "table.bin") else: file = os.path.join(wd + "/data/EnsemblData", release, organism, "activity", folder, "table.bin") with open(file, "rb") as tables: self.activity[folder] = bytearray(tables.read()) def generate_categorized_activity(self): category_activity = {} for category, aliases in self.c_dict.items(): # If an alias exists if aliases: # If theres only one alias if len(aliases) == 1: category_activity[category] = self.activity[aliases[0]] # If there are multiple alias else: category_activity[category] = self.activity_comparator(aliases) # If theres no alias all bytes were set to 4 = NA else: category_activity[category] = bytearray([4]*len(self.activity[self.folderlist[0]])) return category_activity def activity_comparator(self, aliaslist): concatenated_array = bytearray([]) length = len(self.activity[aliaslist[0]]) input_arrays = [self.activity[x] for x in aliaslist] for x in range(length): if any(y[x] == 0 for y in input_arrays): concatenated_array.append(0) elif any(y[x] == 1 for y in input_arrays): concatenated_array.append(1) elif any(y[x] == 2 for y in input_arrays): concatenated_array.append(2) elif any(y[x] == 3 for y in input_arrays): concatenated_array.append(3) elif any(y[x] == 4 for y in input_arrays): concatenated_array.append(4) return concatenated_array # Debugging # e = ActivityCategorizer("../../config/celltypes_human.json", "release-94", "homo_sapiens") # print(len(e.categorization)) # for x in e.categorization.values(): # print(len(x))