Skip to content

Commit

Permalink
added helper file to generate a quality summary for HISAT2 output
Browse files Browse the repository at this point in the history
  • Loading branch information
proost committed Jul 26, 2017
1 parent 375fc42 commit c199951
Showing 1 changed file with 33 additions and 0 deletions.
33 changes: 33 additions & 0 deletions helper/hisat2_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""
Script to iterate over HISAT2 output and grab quality statistics
"""
import os
import re

from sys import argv
from collections import defaultdict

hisat2_path = argv[1]

values = defaultdict(list)

# Prepare regex
re_mapped = re.compile('\t(.*)% overall alignment rate')

# Get all directories in this path
for sf in os.listdir(path=hisat2_path):
# Only consider .stats files
if os.path.isfile(os.path.join(hisat2_path, sf)) and sf.endswith('.stats'):
summary = os.path.join(hisat2_path, sf)
# process summary file
with open(summary) as f:
lines = '\t'.join(f.readlines())
hits = re_mapped.search(lines)
if hits:
values['samples'].append(sf)
values['mapped_percentages'].append(float(hits.group(1)))

print('sample', 'mapped_percentage', sep='\t')
for s, p in zip(values['samples'], values['mapped_percentages']):
print(s, p, sep='\t')

0 comments on commit c199951

Please sign in to comment.