Skip to content
Permalink
9b258c6d1f
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
71 lines (50 sloc) 2.29 KB
import os
import gzip
import csv
class GTFGen:
def __init__(self, organism, release, wd):
self.gff_lines = self.get_organism_as_gff(organism, release, wd)
self.value_map = {0: "ACTIVE", 1: "POISED", 2: "REPRESSED", 3: "INACTIVE", 4: "NA"}
def get_organism_as_gff(self, organism, release, wd):
directory = os.path.join(wd + "/EnsemblData/", release, organism)
inputfile = ""
for file in os.listdir(directory):
if file.endswith("gff.gz"):
inputfile = os.path.join(directory, file)
with gzip.open(inputfile) as original_file:
return original_file.readlines()
def reformat_to_gff(self, activity, release):
gtf_return = []
for index, line in enumerate(self.gff_lines):
decoded_line = line.decode("UTF-8")
# Generate a templist
templist = []
# Split Line by Tab
splitted = decoded_line.split("\t")
# Split Last Field by ";"
splitted_additional = splitted[-1].strip().split(";")
# Add Chromosome Name Format = chr+Name
templist.append("chr"+splitted[0])
# Add RegBuild_ + release
templist.append("RegBuild_"+release)
# Add Description from Description in last ; separated segment
templist.append(splitted_additional[4].split("=")[1])
# Add Start / End Data from original
templist.extend(splitted[3:5])
# Add Score, Strand and Frame Data
templist.extend([".", "+", "."])
# Add "additional" information
templist.append(self.generate_additional_information(splitted_additional[0],
self.generate_activity_list(activity, index)))
gtf_return.append(templist)
return gtf_return
@staticmethod
def generate_additional_information(id, activity):
return "; ".join([id, "activity="+", ".join(activity)])
def generate_activity_list(self, activity, index):
activity_list = []
for key, value in activity.items():
activity_list.append(key+">"+self.value_map[value[index]])
return activity_list
def get_gtf(self, release, activity):
return self.reformat_to_gff(activity, release)