Skip to content

Commit

Permalink
add q-alignment length
Browse files Browse the repository at this point in the history
  • Loading branch information
MPIBR-tushevg committed Sep 7, 2016
1 parent 33eaaee commit a0db825
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 15 deletions.
58 changes: 44 additions & 14 deletions htsutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ int parseBAM(Settings *props)
bam_hdr_t *header = sam_hdr_read(fh_in);
bam1_t *bam = bam_init1();

int lines = 0;
int polyTailA = 0;
int polyHeadT = 0;
uint32_t lines = 0;
uint32_t poly = 0;

uint32_t polyTailSize = 0;
uint32_t polyBasePosition = 0;
uint32_t polyReadSize = 0;

while (sam_read1(fh_in, header, bam) >= 0)
{
Expand All @@ -29,24 +30,31 @@ int parseBAM(Settings *props)
// define strand specific base
if (bam->core.flag & BAM_FREVERSE)
{
polyHeadT = findPoly(bam, 'T', POLY_HEAD);
polyTailSize = findPoly(bam, 'T', POLY_HEAD);
polyBasePosition = bam->core.pos;
}
else
{
polyTailA = findPoly(bam, 'A', POLY_TAIL);
polyTailSize = findPoly(bam, 'A', POLY_TAIL);
polyBasePosition = bam_calend(&bam->core, bam_get_cigar(bam));
}

lines++;
// read alignment length
polyReadSize = bam_calqlen(&bam->core, bam_get_cigar(bam));


// check for poly condition
if ((polyTailSize >= props->sizepoly) && (polyReadSize <= (bam->core.l_qseq - props->sizepoly)))
{
poly++;
}

//printf("Tail: %d|%d\tHead: %d|%d\tSeq:: ", polyTailA,polyTailT,polyHeadA,polyHeadT);
//dumpBamSeq(bam);
//printf("\n");

//if (lines == 20)
lines++;
//if (lines == 10)
// break;

}
Expand All @@ -55,7 +63,7 @@ int parseBAM(Settings *props)
bam_hdr_destroy(header);
sam_close(fh_in);

//printf("BAM Lines: %d / %d\n", poly, lines);
printf("BAM Lines: %d / %d\n", poly, lines);

return 0;
}
Expand All @@ -79,7 +87,7 @@ void dumpBamSeq(bam1_t *bam)
* https://github.com/jstjohn/KentLib/blob/master/lib/dnautil.c
* Kent et.al., 2002
*/
int findPoly(bam1_t *bam, int base, int side)
uint32_t findPoly(bam1_t *bam, int base, int side)
{
int i = 0;

Expand All @@ -89,7 +97,7 @@ int findPoly(bam1_t *bam, int base, int side)
int currPosition = 0;
int bestPosition = -1;

int polySize = 0;
uint32_t polySize = 0;

int smallBase = tolower(base);
int bigBase = toupper(base);
Expand Down Expand Up @@ -147,21 +155,43 @@ int findPoly(bam1_t *bam, int base, int side)
if (side == POLY_TAIL)
polySize = bam->core.l_qseq - bestPosition;

if (polySize < 0)
polySize = 0;
//if (polySize < 0)
// polySize = 0;
}

return polySize;
}

/* bam_calqlen
* Calculate query alignment length
*/
static inline uint32_t bam_calqlen(const bam1_core_t *core, const uint32_t *cigar)
{
int i;
uint32_t qlen = 0;

for (i = 0; i < core->n_cigar; ++i)
{
int span = cigar[i] >> 4;
int op = cigar[i] & 0xf;

if ( (op == BAM_CMATCH) || (op == BAM_CDEL) || (op == BAM_CREF_SKIP))
{
qlen += span;
}
}

return qlen;
}


/* bam_calend
* Calculate the rightmost coordinate of an alignment on the reference genome.
* bam.h https://github.com/samtools/samtools/search?utf8=%E2%9C%93&q=bam_calend
*/

static inline uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar)
static inline uint32_t bam_calend(const bam1_core_t *core, const uint32_t *cigar)
{
return c->pos + (c->n_cigar ? bam_cigar2rlen(c->n_cigar, cigar) : 1);
return core->pos + (core->n_cigar ? bam_cigar2rlen(core->n_cigar, cigar) : 1);
}

3 changes: 2 additions & 1 deletion htsutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
#define POLY_TAIL 1

int parseBAM(Settings *);
int findPoly(bam1_t *, int, int);
void dumpBamSeq(bam1_t *);

static inline uint32_t findPoly(bam1_t *, int, int);
static inline uint32_t bam_calqlen(const bam1_core_t *, const uint32_t *);
static inline uint32_t bam_calend(const bam1_core_t *, const uint32_t *);

#endif /* htsutils_h */

0 comments on commit a0db825

Please sign in to comment.