Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/3.1_create_gtf/Modules/Uniquifier.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
63 lines (44 sloc)
2.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UniqueFilter: | |
""" | |
Class to get unique GTF-results, filtered by specified cell-/tissuetypes | |
@author: Sebastian Beyvers | |
@contact: sebastian.beyvers@med.uni-giessen.de | |
""" | |
def __init__(self, ense, ucsc, org_filter=None): | |
# Constructor | |
# input_parameter: ense = list of gtf-formatted entries from Ensembl data | |
# ucsc = list of gtf-formatted entries from UCSC data | |
# org_filter = filter for specific celltype | |
self.results = self.get_filtered_results(org_filter, ense, ucsc) | |
def get_results(self): | |
# Getter method for results variable | |
return self.results | |
def get_filtered_results(self, org_filter, ense, ucsc): | |
# Method to concat ucsc and ensemble dataset without duplicates and filter by activitylist | |
# input_parameter: ense = list of gtf-formatted entries from Ensembl data | |
# ucsc = list of gtf-formatted entries from UCSC data | |
# org_filter = filter for specific celltype | |
# return_value: List of unique (filtered) results. | |
unfiltered_results = self.concat_without_duplicates(ense, ucsc) # First: Concat ucsc and ensembl data | |
if org_filter: # Second: apply filter if specified | |
filterstrings = [x+">ACTIVE" for x in org_filter] | |
return_list = [] | |
for element in unfiltered_results: | |
if any(tissue in element[-1] for tissue in filterstrings): | |
return_list.append(element) | |
return return_list | |
else: | |
return unfiltered_results | |
@staticmethod | |
def concat_without_duplicates(ense, ucsc): | |
# Concat UCSC and Ensembl data without duplicates | |
# input_parameter: ense = ensembl-gtf-data and ucsc = ucsc-gtf-data | |
# return_value: concatinated list of gtf-entries without duplicates | |
results = ense+ucsc | |
for ens in ense: | |
for uc in ucsc: | |
if ens[0] == uc[0]: | |
#print("Chromosome Equal") | |
if ense[3] == uc[3]: | |
results.remove(ucsc) | |
return results | |