Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
further work on QC
  • Loading branch information
proost committed Apr 25, 2016
1 parent 86569cd commit 3228d41
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
39 changes: 39 additions & 0 deletions pipeline/check/quality.py
@@ -0,0 +1,39 @@
import sys

__bad_fields = ['no_feature', 'ambiguous', 'too_low_aQual', 'not_aligned', 'alignment_not_unique']


def htseq_count_quality(filename, cutoff):
values = []
total_count = 0

with open(filename, "r") as fin:
for l in fin:
gene, count = l.strip().split('\t')
if all([gene not in bf for bf in __bad_fields]):
values.append(int(count))
total_count += int(count)

if total_count == 0:
print("N50 check for", filename, "failed. No reads found")
return False

values.sort(reverse=True)

current_count = 0
for e, v in enumerate(values, start=1):
current_count += v
if current_count > total_count/2:
if e >= cutoff:
return True

break

print("N50 check for", filename, "failed.", e, cutoff)
return False






15 changes: 8 additions & 7 deletions pipeline/transcriptome.py
Expand Up @@ -4,6 +4,7 @@
from cluster import wait_for_job
from utils.matrix import read_matrix, write_matrix, normalize_matrix_counts, normalize_matrix_length
from pipeline.base import PipelineBase
from pipeline.check.quality import htseq_count_quality


class TranscriptomePipeline(PipelineBase):
Expand Down Expand Up @@ -267,15 +268,15 @@ def htseq_to_matrix(self):

for file in htseq_files:
full_path = os.path.join(htseq_output, file)
if htseq_count_quality(full_path, 1):
with open(full_path, "r") as f:
for row in f:
gene_id, count = row.strip().split('\t')

with open(full_path, "r") as f:
for row in f:
gene_id, count = row.strip().split('\t')
if gene_id not in counts.keys():
counts[gene_id] = {}

if gene_id not in counts.keys():
counts[gene_id] = {}

counts[gene_id][file] = count
counts[gene_id][file] = count

output_file = self.dp[g]['exp_matrix_output']
with open(output_file, "w") as f_out:
Expand Down

0 comments on commit 3228d41

Please sign in to comment.