From f6ddce35872a10e8f8cd7822805df6e8d014949e Mon Sep 17 00:00:00 2001 From: basti Date: Wed, 9 Jan 2019 13:21:21 +0100 Subject: [PATCH 1/2] Fixed naming scheme for 3rd column --- .gitignore | 4 +--- bin/3.1_create_gtf/Modules/ucsc/ucsc.py | 26 ++++++++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index dc6d207..7cf34e7 100644 --- a/.gitignore +++ b/.gitignore @@ -203,6 +203,4 @@ venv.bak/ # mypy .mypy_cache/ - -Würde bin/3.1_create_gtf/data/ löschen -Würde data/ löschen +/bin/3.1_create_gtf/data/ diff --git a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py index f1663b1..23aaf91 100644 --- a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py +++ b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py @@ -58,17 +58,21 @@ def read_gff_to_gtf(self): with open(self.output, 'r') as csvfile: tsvreader = csv.reader(csvfile, delimiter='\t') for row in tsvreader: - sequence = [] - sequence.append(row[0]) - sequence.append("UCSC") - sequence.append(row[3].lower().replace(' ', '_')) - sequence.append(row[1]) - sequence.append(row[2]) - sequence.append(".") - sequence.append(row[5]) - sequence.append(".") - sequence.append('; '.join([self.find_ID(''.join(row[11:])), 'activity \"'+", ".join(self.get_activity(''.join(row[11:]))) + '"'])+";") - gtf_lines.append(sequence) + if row[9] not in ["region", "sequence_feature", + "CAAT_signal", "stem_loop", + "sequence_secondary_structure"]: + + sequence = [] + sequence.append(row[0]) + sequence.append("UCSC") + sequence.append(row[9].lower().replace(' ', '_')) + sequence.append(row[1]) + sequence.append(row[2]) + sequence.append(".") + sequence.append(row[5]) + sequence.append(".") + sequence.append('; '.join([self.find_ID(''.join(row[11:])), 'activity \"'+", ".join(self.get_activity(''.join(row[11:]))) + '"'])+";") + gtf_lines.append(sequence) return gtf_lines From 8d1ff19636fd81b8410f274d8c1f404dc353fa41 Mon Sep 17 00:00:00 2001 From: basti Date: Wed, 9 Jan 2019 18:31:29 +0100 Subject: [PATCH 2/2] Added Shebang in response to #44 --- bin/3.1_create_gtf/RegGTFExtractor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/3.1_create_gtf/RegGTFExtractor.py b/bin/3.1_create_gtf/RegGTFExtractor.py index 9000341..6bdd251 100644 --- a/bin/3.1_create_gtf/RegGTFExtractor.py +++ b/bin/3.1_create_gtf/RegGTFExtractor.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ RegGTFExtractor.py extracts regulatory-data from Ensembl and UCSC databases