Skip to content
Permalink
ab6f883dd1
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
106 lines (71 sloc) 3.31 KB
import json
import os
class ActivityCategorizer:
def __init__(self, release, organism, wd, data_dir):
# List of all Folders with Activity Tables
self.folderlist = []
# Dictionary from celltypes_organism.json mit key = Kategorie und Value = [Ordner]
self.c_dict = self.read_config(organism, wd)
# Activity table from all files as dict
self.activity = {}
self.get_activity_data(release, organism, wd, data_dir)
# Categorized Activity from Json-config
print("Categorization: This may take a while")
self.categorization = self.generate_categorized_activity()
print("Categorization finished !")
def get_categorization(self):
return self.categorization
def read_config(self, organism, wd):
c_dict = {}
path_to_config = os.path.join(wd +"/config/celltypes_"+organism+".json")
with open(path_to_config) as input_file:
data = json.loads(input_file.read())
for x in data:
c_dict[x["type"]] = x["alias_ensembl"]
self.folderlist.extend(x["alias_ensembl"])
return c_dict
def get_activity_data(self, release, organism, wd, data_dir):
for folder in self.folderlist:
# Generate path to binary File
if data_dir:
file = os.path.join(data_dir + "/EnsemblData", release, organism, "activity", folder, "table.bin")
else:
file = os.path.join(wd + "/data/EnsemblData", release, organism, "activity", folder, "table.bin")
with open(file, "rb") as tables:
self.activity[folder] = bytearray(tables.read())
def generate_categorized_activity(self):
category_activity = {}
for category, aliases in self.c_dict.items():
# If an alias exists
if aliases:
# If theres only one alias
if len(aliases) == 1:
category_activity[category] = self.activity[aliases[0]]
# If there are multiple alias
else:
category_activity[category] = self.activity_comparator(aliases)
# If theres no alias all bytes were set to 4 = NA
else:
category_activity[category] = bytearray([4]*len(self.activity[self.folderlist[0]]))
return category_activity
def activity_comparator(self, aliaslist):
concatenated_array = bytearray([])
length = len(self.activity[aliaslist[0]])
input_arrays = [self.activity[x] for x in aliaslist]
for x in range(length):
if any(y[x] == 0 for y in input_arrays):
concatenated_array.append(0)
elif any(y[x] == 1 for y in input_arrays):
concatenated_array.append(1)
elif any(y[x] == 2 for y in input_arrays):
concatenated_array.append(2)
elif any(y[x] == 3 for y in input_arrays):
concatenated_array.append(3)
elif any(y[x] == 4 for y in input_arrays):
concatenated_array.append(4)
return concatenated_array
# Debugging
# e = ActivityCategorizer("../../config/celltypes_human.json", "release-94", "homo_sapiens")
# print(len(e.categorization))
# for x in e.categorization.values():
# print(len(x))