Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/3.1_create_gtf/Modules/Ensembl/ActivityTable.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
81 lines (58 sloc)
2.93 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os.path | |
from Modules.Ensembl.ActivityTableGenerator import ATGenerator | |
class ActivityTable: | |
""" | |
Class for checking activity_table and generating them. | |
activityTable = byte Representation of activity status | |
corresponding to the generator schema default: | |
0, "activity=ACTIVE", | |
1, "activity=POISED", | |
2, "activity=REPRESSED", | |
3, "activity=INACTIVE", | |
4, "activity=NA" | |
@author: Sebastian Beyvers | |
@contact: sebastian.beyvers@med.uni-giessen.de | |
""" | |
def __init__(self, organism, current_release, wd, data_dir): | |
# Constructor for the ActivityTable-Class | |
# input_parameter: organism = input organism | |
# current_release = current used Ensembl release | |
# wd = working dir (default working directory, data_dir is used if specified) | |
# data_dir = data directory (this is used as directory if specified) | |
if data_dir: | |
self.link = os.path.join(data_dir + "/EnsemblData/", current_release, organism, "activity") | |
else: | |
self.link = os.path.join(wd + "/data/EnsemblData/", current_release, organism, "activity") | |
self.folders = next(os.walk(self.link))[1] | |
# List to represent Index with activitystatus for ATGenerator class | |
self.generator = ATGenerator(["activity=ACTIVE", | |
"activity=POISED", | |
"activity=REPRESSED", | |
"activity=INACTIVE", | |
"activity=NA"]) | |
def check_and_generate_activity_table(self): | |
# checks if file (table.bin) already exists for celltype -> generates new one if missing | |
for subfolder in self.folders: | |
folder_link = os.path.join(self.link, subfolder) | |
sf_link = os.path.join(folder_link, "table.bin") | |
# If table.bin is missing: | |
if not os.path.isfile(sf_link): | |
print("No ActivityTable for "+subfolder+" found, generating new one.") | |
self.generate_table(folder_link) | |
# Else: Do nothing | |
print("All ActivityTables found, proceeding") | |
def generate_table(self, link): | |
# generates the table and saves it as table.bin file | |
# input_parameter: link = link to ensembl activity folder for specific celltype | |
# generates table.bin file in link folder | |
for root, dirs, files in os.walk(link): | |
for file in files: | |
if file.endswith(".gff.gz"): | |
originpath = os.path.join(root, file) | |
file_path = os.path.join(root, "table.bin") | |
with open(file_path, "wb") as f: | |
f.write(self.generator.read_table(originpath)) | |
print("New ActivityTable generated in: " + root) | |
# Debug | |
# e = ActivityTable("homo_sapiens", "release-94") | |
# e.check_and_generate_activity_table() |