Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
updated parse_gff.py, now also works in case the ID attribute is miss…
…ing for the lowest level features (CDS, exons). An ID will be added.
  • Loading branch information
proost committed May 11, 2017
1 parent 9dc8cf5 commit 8de768d
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion helper/parse_gff.py
@@ -1,7 +1,7 @@
import argparse
import sys
import json
from collections import OrderedDict
from collections import OrderedDict, defaultdict

LOCUS_FEATURES = ['gene']
TRANSCRIPT_FEATURES = ['mRNA', 'transcript']
Expand Down Expand Up @@ -65,12 +65,22 @@ def parse_gff3(filename):
genes = OrderedDict()
transcript_to_locus = {}

count_per_transcript = defaultdict(lambda: 1)

with open(filename) as gff_in:
for line in gff_in:
# Skip comments
if not line.strip()[0] == '#':
line_data = parse_line(line)

# Parts (e.g. CDS or Exon) might not have an ID. One will be added here
if ID_ATTRIBUTE not in line_data['attributes'].keys() and line_data['feature'] in PARTS_FEATURES:
if PARENT_ATTRIBUTE in line_data['attributes'].keys():
counter_id = line_data['attributes'][PARENT_ATTRIBUTE] + '.' + line_data['feature'] + '.'
new_id = counter_id + str(count_per_transcript[counter_id])
count_per_transcript[counter_id] += 1
line_data['attributes'][ID_ATTRIBUTE] = new_id

# Every line needs a valid ID
if ID_ATTRIBUTE in line_data['attributes'].keys():

Expand Down

0 comments on commit 8de768d

Please sign in to comment.