Skip to content
Permalink
3877157589
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
63 lines (44 sloc) 2.2 KB
class UniqueFilter:
"""
Class to get unique GTF-results, filtered by specified cell-/tissuetypes
@author: Sebastian Beyvers
@contact: sebastian.beyvers@med.uni-giessen.de
"""
def __init__(self, ense, ucsc, org_filter=None):
# Constructor
# input_parameter: ense = list of gtf-formatted entries from Ensembl data
# ucsc = list of gtf-formatted entries from UCSC data
# org_filter = filter for specific celltype
self.results = self.get_filtered_results(org_filter, ense, ucsc)
def get_results(self):
# Getter method for results variable
return self.results
def get_filtered_results(self, org_filter, ense, ucsc):
# Method to concat ucsc and ensemble dataset without duplicates and filter by activitylist
# input_parameter: ense = list of gtf-formatted entries from Ensembl data
# ucsc = list of gtf-formatted entries from UCSC data
# org_filter = filter for specific celltype
# return_value: List of unique (filtered) results.
unfiltered_results = self.concat_without_duplicates(ense, ucsc) # First: Concat ucsc and ensembl data
if org_filter: # Second: apply filter if specified
filterstrings = [x+">ACTIVE" for x in org_filter]
return_list = []
for element in unfiltered_results:
if any(tissue in element[-1] for tissue in filterstrings):
return_list.append(element)
return return_list
else:
return unfiltered_results
@staticmethod
def concat_without_duplicates(ense, ucsc):
# Concat UCSC and Ensembl data without duplicates
# input_parameter: ense = ensembl-gtf-data and ucsc = ucsc-gtf-data
# return_value: concatinated list of gtf-entries without duplicates
results = ense+ucsc
for ens in ense:
for uc in ucsc:
if ens[0] == uc[0]:
#print("Chromosome Equal")
if ense[3] == uc[3]:
results.remove(ucsc)
return results