Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
loosolab
/
master_project_JLU2018
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
7
Pull requests
1
Actions
Projects
0
Wiki
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Wiki
Security
Insights
Files
c0c36bc
bin
1.1_footprint_extraction
1.2_filter_motifs
2.1_clustering
2.2_motif_estimation
3.1_create_gtf
Modules
config
data
RegGTFExtractor.py
config
README.md
masterenv.yml
nextflow.config
pipeline.nf
Breadcrumbs
master_project_JLU2018
/
bin
/
3.1_create_gtf
/
RegGTFExtractor.py
Blame
Blame
Latest commit
History
History
156 lines (114 loc) · 5.3 KB
Breadcrumbs
master_project_JLU2018
/
bin
/
3.1_create_gtf
/
RegGTFExtractor.py
Top
File metadata and controls
Code
Blame
156 lines (114 loc) · 5.3 KB
Raw
""" RegGTFExtractor.py extracts regulatory-data from Ensembl and UCSC databases and converts the output to a GTF-formatted file. @author: Sebastian Beyvers @contact: sebastian.beyvers@med.uni-giessen.de """ import argparse from Modules.Ensembl.Ensembl import Ensembl from Modules.ucsc.ucsc import UcscGtf from Modules.Uniquifier import UniqueFilter from Modules.SaveResults import ResultSaver from Modules.CrossMapper import CrossMapper import os import json def check_for_local_folder(wd): # Check if local folder exists and create if missing when no data_dir is specified # input_parameter: wd = working directory # return_value: None if not os.path.isdir(os.path.join(wd+"/data/")): os.mkdir(os.path.join(wd+"/data/")) if not os.path.isdir(os.path.join(wd+"/data/EnsemblData")): os.mkdir(os.path.join(wd+"/data/EnsemblData")) if not os.path.isdir(os.path.join(wd+"/data/UCSCData")): os.mkdir(os.path.join(wd+"/data/UCSCData")) if not os.path.isdir(os.path.join(wd+"/data/temp")): os.mkdir(os.path.join(wd+"/data/temp")) def check_for_data_dir(data_dir): # Check if local folder exists and create if missing when data_dir as parameter is specified # input_parameter: data_dir = data directory # return_value: None if not os.path.isdir(os.path.join(data_dir)): os.mkdir(os.path.join(data_dir)) if not os.path.isdir(os.path.join(data_dir+"/EnsemblData")): os.mkdir(os.path.join(data_dir+"/EnsemblData")) if not os.path.isdir(os.path.join(data_dir+"/UCSCData")): os.mkdir(os.path.join(data_dir + "/UCSCData")) if not os.path.isdir(os.path.join(data_dir+"/temp")): os.mkdir(os.path.join(data_dir+"/temp")) def check_filter(tissue_cmd, org, wd): # Checks if filter-celltype is in Json types for organism # input_parameter: tissue_cmd: Filtered tissuetypes; org = organism; wd = working directory # return_value: boolean if selected filter is in config path_to_config = os.path.join(wd + "/config/celltypes_" + org + ".json") tissues_config = [] if not tissue_cmd: return False with open(path_to_config) as input_file: data = json.loads(input_file.read()) for x in data: tissues_config.append(x["type"]) if any(tissue in tissues_config for tissue in tissue_cmd): return True else: return False def check_organism(org): # Checks the organism input and decides if chrossmapping is necessary # input_parameter: org = input organism (parameter) # return_value: tuple with values = (organism_alias (string), boolean if chrossmapping is needed) if org == "hg38": return "homo_sapiens", False if org == "hg19": print("Older assembly Version detected: hg19 -> Crossmapping result from hg38") return "homo_sapiens", True elif org == "mm10": return "mus_musculus", False elif org == "mm9": print("Older assembly Version detected: mm9 -> Crossmapping result from mm10") return "mus_musculus", True def main_script(organism, wd, data_dir, out, tissuetype=None): # main function # input_parameter: all parameters from argparse (org, x_mappable) = check_organism(organism) if not data_dir: check_for_local_folder(wd) else: check_for_data_dir(data_dir) if check_filter(tissuetype, org, wd): tissues = tissuetype print("Filter detected !") else: tissues = None print("Filter not detected !") # Get UCSC Data ucsc = UcscGtf(org, wd, data_dir) # Gen Ensembl Data ense = Ensembl(org, wd, data_dir) print("Getting Unique Results") unique_filter = UniqueFilter(ense.get_gtf(), ucsc.get_gtf(), tissues) if data_dir: ResultSaver(unique_filter.get_results(), organism, data_dir, x_mappable, True, tissues, out) if x_mappable: CrossMapper(organism, data_dir, out, True) else: ResultSaver(unique_filter.get_results(), organism, wd, x_mappable, False, tissues, out) if x_mappable: CrossMapper(organism, wd, out, False) if __name__ == '__main__': # argument parser parser = argparse.ArgumentParser(description='GTF-Generator from UCSC Table Browser and Ensembl Regulatory Build' ) parser.add_argument('organism', help='Source organism [ hg19 | hg38 or mm9 | mm10 ]', action='store', nargs='?', type=str) parser.add_argument('--tissue', help='Tissue- or Celltype(s)', action='store', nargs='*', type=str) parser.add_argument('--wd', help='Working directory. default: "."', action='store', default=os.getcwd(), type=str) parser.add_argument('--dir', help='Data directory. default: "working_directory"', action='store', default="", type=str) parser.add_argument('--out', help='Output directory: default: "."', action='store', default=".", type=str) args = vars(parser.parse_args()) # Check if organism exists if args["organism"]: if args["organism"] in ["hg19", "hg38", "mm9", "mm10"]: print("Working Dir: " + args["wd"]) main_script(args["organism"], args["wd"], args["dir"], args["out"], args["tissue"]) else: print("Invalid Organism: " + args["organism"] + " see -h for help") else: print("No Arguments found -> See python3 ./RegGTFExtractor.py -h for help.")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
You can’t perform that action at this time.