diff --git a/.gitignore b/.gitignore index dc6d207..7cf34e7 100644 --- a/.gitignore +++ b/.gitignore @@ -203,6 +203,4 @@ venv.bak/ # mypy .mypy_cache/ - -Würde bin/3.1_create_gtf/data/ löschen -Würde data/ löschen +/bin/3.1_create_gtf/data/ diff --git a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py index f1663b1..23aaf91 100644 --- a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py +++ b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py @@ -58,17 +58,21 @@ def read_gff_to_gtf(self): with open(self.output, 'r') as csvfile: tsvreader = csv.reader(csvfile, delimiter='\t') for row in tsvreader: - sequence = [] - sequence.append(row[0]) - sequence.append("UCSC") - sequence.append(row[3].lower().replace(' ', '_')) - sequence.append(row[1]) - sequence.append(row[2]) - sequence.append(".") - sequence.append(row[5]) - sequence.append(".") - sequence.append('; '.join([self.find_ID(''.join(row[11:])), 'activity \"'+", ".join(self.get_activity(''.join(row[11:]))) + '"'])+";") - gtf_lines.append(sequence) + if row[9] not in ["region", "sequence_feature", + "CAAT_signal", "stem_loop", + "sequence_secondary_structure"]: + + sequence = [] + sequence.append(row[0]) + sequence.append("UCSC") + sequence.append(row[9].lower().replace(' ', '_')) + sequence.append(row[1]) + sequence.append(row[2]) + sequence.append(".") + sequence.append(row[5]) + sequence.append(".") + sequence.append('; '.join([self.find_ID(''.join(row[11:])), 'activity \"'+", ".join(self.get_activity(''.join(row[11:]))) + '"'])+";") + gtf_lines.append(sequence) return gtf_lines diff --git a/bin/3.1_create_gtf/RegGTFExtractor.py b/bin/3.1_create_gtf/RegGTFExtractor.py index 9000341..6bdd251 100644 --- a/bin/3.1_create_gtf/RegGTFExtractor.py +++ b/bin/3.1_create_gtf/RegGTFExtractor.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ RegGTFExtractor.py extracts regulatory-data from Ensembl and UCSC databases