Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/RegGTFExtractor.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
68 lines (51 sloc)
2.17 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
from Modules.Ensembl.Ensembl import Ensembl | |
from Modules.ucsc.ucsc import UcscGtf | |
from Modules.Uniquifier import UniqueFilter | |
from Modules.SaveResults import ResultSaver | |
import os | |
import json | |
def check_for_local_folder(wd): | |
if not os.path.isdir(os.path.join(wd+"/EnsemblData")): | |
os.mkdir(os.path.join(wd+"/EnsemblData")) | |
if not os.path.isdir(os.path.join(wd+"/UCSCData" )): | |
os.mkdir(os.path.join(wd+"/UCSCData" )) | |
def check_filter(tissue_cmd, org, wd): | |
path_to_config = os.path.join(wd + "/config/celltypes_" + org + ".json" ) | |
tissues_config = [] | |
if not tissue_cmd: | |
return False | |
with open(path_to_config) as input_file: | |
data = json.loads(input_file.read()) | |
for x in data: | |
tissues_config.append(x["type"]) | |
if any(tissue in tissues_config for tissue in tissue_cmd): | |
return True | |
else: | |
return False | |
def main_script(org, wd, tissuetype=None): | |
check_for_local_folder(wd) | |
if check_filter(tissuetype, org, wd): | |
tissues = tissuetype | |
print("Filter detected !") | |
else: | |
tissues = None | |
print("Filter not detected !") | |
ucsc = UcscGtf(org, wd) | |
ense = Ensembl(org, wd) | |
print("Getting Unique Results") | |
unique_filter = UniqueFilter(ense.get_gtf(), ucsc.get_gtf(), tissues) | |
ResultSaver(unique_filter.get_results(), org, tissues) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='GTF-Generator from UCSC Table Browser and Ensembl Regulatory Build' ) | |
parser.add_argument('organism', help='Source organism [ homo_sapiens or mus_musculus ]', action='store', nargs='?', type=str) | |
parser.add_argument('--tissue', help='Tissue- or Celltype(s)', action='store', nargs='*', type=str) | |
parser.add_argument('--wd', help='Working directory. default: "."', action='store', default='.', type=str) | |
args = vars(parser.parse_args()) | |
print("Working Dir: " + args["wd"]) | |
if args["organism"]: | |
print("Working Dir: " + args["wd"]) | |
main_script(args["organism"], args["wd"], args["tissue"]) | |
else: | |
print("No Arguments found -> See python3 ./RegGTFExtractor.py -h for help.") | |