Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
removed some obsolete parsers modified gene family models to support …
…deletes...
  • Loading branch information
proost committed Dec 1, 2016
1 parent ad7c3cc commit e5f61e7
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 123 deletions.
3 changes: 2 additions & 1 deletion master_build.template.py
Expand Up @@ -273,7 +273,8 @@

print("Adding Families")
print("===============")
families_id = GeneFamily.add_families_from_plaza("data/genefamily_data.hom.csv", "PLAZA 2.5 Homologous gene families")
# TODO OBSOLETE
# families_id = GeneFamily.add_families_from_plaza("data/genefamily_data.hom.csv", "PLAZA 2.5 Homologous gene families")

print("Adding Expression Plots")
print("=======================")
Expand Down
8 changes: 4 additions & 4 deletions planet/controllers/admin/controls.py
Expand Up @@ -381,12 +381,12 @@ def add_family():
fd, temp_path = mkstemp()
open(temp_path, 'wb').write(family_data)

if source == 'plaza':
GeneFamily.add_families_from_plaza(temp_path, method_description)
flash('Added Gene families from file %s' % form.file.name, 'success')
elif source == 'mcl':
if source == 'mcl':
GeneFamily.add_families_from_mcl(temp_path, method_description)
flash('Added Gene families from file %s' % form.file.name, 'success')
elif source == 'orthofinder':
GeneFamily.add_families_from_orthofinder(temp_path, method_description)
flash('Added Gene families from file %s' % form.file.name, 'success')
else:
flash('Method not implemented yet', 'danger')
os.close(fd)
Expand Down
2 changes: 1 addition & 1 deletion planet/forms/admin/add_family.py
Expand Up @@ -6,6 +6,6 @@

class AddFamiliesForm(FlaskForm):
method_description = StringField('Description', [InputRequired])
source = SelectField('Source', choices=[('plaza', 'PLAZA csv'), ('mcl', 'MCL'), ('orthofinder', 'OrthoFinder')])
source = SelectField('Source', choices=[('mcl', 'MCL'), ('orthofinder', 'OrthoFinder')])
file = FileField()

105 changes: 25 additions & 80 deletions planet/models/gene_families.py
Expand Up @@ -2,8 +2,6 @@
from planet.models.relationships import sequence_family, family_xref, SequenceSequenceECCAssociation
from planet.models.sequences import Sequence

from utils.parser.plaza.families import Parser as FamilyParser

import csv
import re

Expand All @@ -19,7 +17,9 @@ class GeneFamilyMethod(db.Model):
method = db.Column(db.Text)
family_count = db.Column(db.Integer)

families = db.relationship('GeneFamily', backref=db.backref('method', lazy='joined'), lazy='dynamic')
families = db.relationship('GeneFamily', backref=db.backref('method', lazy='joined'),
lazy='dynamic',
cascade='all, delete-orphan')

def __init__(self, method):
self.method = method
Expand Down Expand Up @@ -161,25 +161,17 @@ def add_families_from_mcl(filename, description, handle_isoforms=True, prefix='m
return method.id

@staticmethod
def add_families_from_tab(filename, description, handle_isoforms=True):
def add_families_from_orthofinder(filename, description, handle_isoforms=True):
"""
DEPRICATED IMPORT FROM MCL
Add gene families directly from MCL output (one line with all genes from one family)
:param filename:
:param description:
:param handle_isoforms:
:return:
:param filename: The file to load
:param description: Description of the method to store in the database
:param handle_isoforms: should isofroms (indicated by .1 at the end) be handled
:return the new methods internal ID
"""

# Create new method for these families
method = GeneFamilyMethod(description)

try:
db.session.add(method)
db.session.commit()
except Exception as e:
db.session.rollback()
quit()
method = GeneFamilyMethod.add(description)

gene_hash = {}
all_sequences = Sequence.query.all()
Expand All @@ -191,71 +183,24 @@ def add_families_from_tab(filename, description, handle_isoforms=True):
gene_id = re.sub('\.\d+$', '', sequence.name.lower())
gene_hash[gene_id] = sequence

family_hash = {}

families = {}
genes = []

with open(filename) as csvfile:
reader = csv.DictReader(csvfile, delimiter='\t')
for row in reader:
family = row['family']
gene = row['gene']

genes.append(gene)

if family not in families.keys():
families[family] = []
family_hash[family] = GeneFamily(family)
family_hash[family].method_id = method.id

families[family].append(gene)

for name, f in family_hash.items():
db.session.add(f)

for name, f in family_hash.items():
for gene in families[name]:
if gene.lower() in gene_hash.keys():
gene_hash[gene.lower()].families.append(family_hash[name])

try:
db.session.commit()
except Exception as e:
db.session.rollback()
print(e)

return method.id

@staticmethod
def add_families_from_plaza(filename, description):
family_parser = FamilyParser()
family_parser.read(filename)

method = GeneFamilyMethod(description)

db.session.add(method)

gene_hash = {}
all_sequences = Sequence.query.all()

for sequence in all_sequences:
gene_hash[sequence.name] = sequence
with open(filename, "r") as f_in:
for line in f_in:
orthofinder_id, *parts = line.strip().split()

for family, genes in family_parser.families.items():
new_family = GeneFamily(family)
new_family.method_id = method.id
orthofinder_id = orthofinder_id.rstrip(':')

db.session.add(new_family)
new_family = GeneFamily(orthofinder_id.replace('OG', 'OG_%02d_' % method.id))
new_family.method_id = method.id

for gene in genes:
if gene in gene_hash:
gene_hash[gene].families.append(new_family)
for p in parts:
if p.lower() in gene_hash.keys():
new_family.sequences.append(gene_hash[p.lower()])

try:
db.session.commit()
except Exception as e:
db.session.rollback()
print(e)
try:
db.session.add(new_family)
db.session.commit()
except Exception as e:
db.session.rollback()
quit()

return method.id
2 changes: 2 additions & 0 deletions tests/data/comparative_data/mamut.families.orthofinder.txt
@@ -0,0 +1,2 @@
OG00001: Gene01 Gene02
OG00002: Gene03
16 changes: 0 additions & 16 deletions utils/parser/plaza/families.py

This file was deleted.

21 changes: 0 additions & 21 deletions utils/parser/plaza/interpro.py

This file was deleted.

0 comments on commit e5f61e7

Please sign in to comment.