diff --git a/htsutils.c b/htsutils.c index 99460fe..4bdede6 100644 --- a/htsutils.c +++ b/htsutils.c @@ -8,35 +8,45 @@ #include "htsutils.h" -int parseBAM(char *file_name) +int parseBAM(Settings *props) { - samFile *fh_in = sam_open(file_name, "r"); + samFile *fh_in = sam_open(props->input, "r"); bam_hdr_t *header = sam_hdr_read(fh_in); bam1_t *bam = bam_init1(); int lines = 0; - int poly = 0; int polyTailA = 0; - int polyTailT = 0; - int polyHeadA = 0; int polyHeadT = 0; - //int polyHead = 0; + + uint32_t polyBasePosition = 0; while (sam_read1(fh_in, header, bam) >= 0) { + // filter based on MAPQ + if (props->mapq > bam->core.qual) + continue; + + // define strand specific base + if (bam->core.flag & BAM_FREVERSE) + { + polyHeadT = findPoly(bam, 'T', POLY_HEAD); + polyBasePosition = bam->core.pos; + } + else + { + polyTailA = findPoly(bam, 'A', POLY_TAIL); + polyBasePosition = bam_calend(&bam->core, bam_get_cigar(bam)); + } + lines++; - polyTailA = findPoly(bam, 'A', POLY_TAIL); - polyTailT = findPoly(bam, 'T', POLY_TAIL); - polyHeadA = findPoly(bam, 'A', POLY_HEAD); - polyHeadT = findPoly(bam, 'T', POLY_HEAD); //printf("Tail: %d|%d\tHead: %d|%d\tSeq:: ", polyTailA,polyTailT,polyHeadA,polyHeadT); //dumpBamSeq(bam); //printf("\n"); - //if (lines == 10) + //if (lines == 20) // break; } @@ -144,3 +154,14 @@ int findPoly(bam1_t *bam, int base, int side) return polySize; } + +/* bam_calend + * Calculate the rightmost coordinate of an alignment on the reference genome. + * bam.h https://github.com/samtools/samtools/search?utf8=%E2%9C%93&q=bam_calend + */ + +static inline uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar) +{ + return c->pos + (c->n_cigar ? bam_cigar2rlen(c->n_cigar, cigar) : 1); +} + diff --git a/htsutils.h b/htsutils.h index ed85c36..ecd99a9 100644 --- a/htsutils.h +++ b/htsutils.h @@ -13,12 +13,15 @@ #include #include #include +#include "config.h" #define POLY_HEAD 0 #define POLY_TAIL 1 -int parseBAM(char *); +int parseBAM(Settings *); int findPoly(bam1_t *, int, int); void dumpBamSeq(bam1_t *); +static inline uint32_t bam_calend(const bam1_core_t *, const uint32_t *); + #endif /* htsutils_h */