From a0db8252b9590a58d6583cda12305dc6bf68001e Mon Sep 17 00:00:00 2001 From: Georgi Tushev Date: Thu, 8 Sep 2016 00:58:10 +0200 Subject: [PATCH] add q-alignment length --- htsutils.c | 58 +++++++++++++++++++++++++++++++++++++++++------------- htsutils.h | 3 ++- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/htsutils.c b/htsutils.c index 4bdede6..fc055a0 100644 --- a/htsutils.c +++ b/htsutils.c @@ -14,11 +14,12 @@ int parseBAM(Settings *props) bam_hdr_t *header = sam_hdr_read(fh_in); bam1_t *bam = bam_init1(); - int lines = 0; - int polyTailA = 0; - int polyHeadT = 0; + uint32_t lines = 0; + uint32_t poly = 0; + uint32_t polyTailSize = 0; uint32_t polyBasePosition = 0; + uint32_t polyReadSize = 0; while (sam_read1(fh_in, header, bam) >= 0) { @@ -29,24 +30,31 @@ int parseBAM(Settings *props) // define strand specific base if (bam->core.flag & BAM_FREVERSE) { - polyHeadT = findPoly(bam, 'T', POLY_HEAD); + polyTailSize = findPoly(bam, 'T', POLY_HEAD); polyBasePosition = bam->core.pos; } else { - polyTailA = findPoly(bam, 'A', POLY_TAIL); + polyTailSize = findPoly(bam, 'A', POLY_TAIL); polyBasePosition = bam_calend(&bam->core, bam_get_cigar(bam)); } - lines++; + // read alignment length + polyReadSize = bam_calqlen(&bam->core, bam_get_cigar(bam)); + // check for poly condition + if ((polyTailSize >= props->sizepoly) && (polyReadSize <= (bam->core.l_qseq - props->sizepoly))) + { + poly++; + } //printf("Tail: %d|%d\tHead: %d|%d\tSeq:: ", polyTailA,polyTailT,polyHeadA,polyHeadT); //dumpBamSeq(bam); //printf("\n"); - //if (lines == 20) + lines++; + //if (lines == 10) // break; } @@ -55,7 +63,7 @@ int parseBAM(Settings *props) bam_hdr_destroy(header); sam_close(fh_in); - //printf("BAM Lines: %d / %d\n", poly, lines); + printf("BAM Lines: %d / %d\n", poly, lines); return 0; } @@ -79,7 +87,7 @@ void dumpBamSeq(bam1_t *bam) * https://github.com/jstjohn/KentLib/blob/master/lib/dnautil.c * Kent et.al., 2002 */ -int findPoly(bam1_t *bam, int base, int side) +uint32_t findPoly(bam1_t *bam, int base, int side) { int i = 0; @@ -89,7 +97,7 @@ int findPoly(bam1_t *bam, int base, int side) int currPosition = 0; int bestPosition = -1; - int polySize = 0; + uint32_t polySize = 0; int smallBase = tolower(base); int bigBase = toupper(base); @@ -147,21 +155,43 @@ int findPoly(bam1_t *bam, int base, int side) if (side == POLY_TAIL) polySize = bam->core.l_qseq - bestPosition; - if (polySize < 0) - polySize = 0; + //if (polySize < 0) + // polySize = 0; } return polySize; } +/* bam_calqlen + * Calculate query alignment length + */ +static inline uint32_t bam_calqlen(const bam1_core_t *core, const uint32_t *cigar) +{ + int i; + uint32_t qlen = 0; + + for (i = 0; i < core->n_cigar; ++i) + { + int span = cigar[i] >> 4; + int op = cigar[i] & 0xf; + + if ( (op == BAM_CMATCH) || (op == BAM_CDEL) || (op == BAM_CREF_SKIP)) + { + qlen += span; + } + } + + return qlen; +} + /* bam_calend * Calculate the rightmost coordinate of an alignment on the reference genome. * bam.h https://github.com/samtools/samtools/search?utf8=%E2%9C%93&q=bam_calend */ -static inline uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar) +static inline uint32_t bam_calend(const bam1_core_t *core, const uint32_t *cigar) { - return c->pos + (c->n_cigar ? bam_cigar2rlen(c->n_cigar, cigar) : 1); + return core->pos + (core->n_cigar ? bam_cigar2rlen(core->n_cigar, cigar) : 1); } diff --git a/htsutils.h b/htsutils.h index ecd99a9..9dd989d 100644 --- a/htsutils.h +++ b/htsutils.h @@ -19,9 +19,10 @@ #define POLY_TAIL 1 int parseBAM(Settings *); -int findPoly(bam1_t *, int, int); void dumpBamSeq(bam1_t *); +static inline uint32_t findPoly(bam1_t *, int, int); +static inline uint32_t bam_calqlen(const bam1_core_t *, const uint32_t *); static inline uint32_t bam_calend(const bam1_core_t *, const uint32_t *); #endif /* htsutils_h */