Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/3.1_create_gtf/Modules/Uniquifier.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
46 lines (31 sloc)
1.2 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UniqueFilter: | |
""" | |
Class to get unique GTF-results, filtered by specified cell-/tissuetypes | |
""" | |
def __init__(self, ense, ucsc, org_filter=None): | |
self.results = self.get_filtered_results(org_filter, ense, ucsc) | |
def get_results(self): | |
return self.results | |
def get_filtered_results(self, org_filter, ense, ucsc): | |
# Apply Filter | |
unfiltered_results = self.concat_without_duplicates(ense, ucsc) | |
if org_filter: | |
filterstrings = [x+">ACTIVE" for x in org_filter] | |
return_list = [] | |
for element in unfiltered_results: | |
if any(tissue in element[-1] for tissue in filterstrings): | |
return_list.append(element) | |
return return_list | |
else: | |
return unfiltered_results | |
@staticmethod | |
def concat_without_duplicates(ense, ucsc): | |
# Concat UCSC and Ensembl data without duplicates | |
results = ense+ucsc | |
for ens in ense: | |
for uc in ucsc: | |
if ens[0] == uc[0]: | |
#print("Chromosome Equal") | |
if ense[3] == uc[3]: | |
results.remove(ucsc) | |
return results | |