Skip to content

Commit

Permalink
add HTSLib support
Browse files Browse the repository at this point in the history
  • Loading branch information
MPIBR-tushevg committed Sep 7, 2016
1 parent 6f558c8 commit dacb22d
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 0 deletions.
146 changes: 146 additions & 0 deletions htsutils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
//
// htsutils.c
// PASSFinder
//
// Created by Georgi Tushev on 07/09/16.
// Copyright © 2016 Scientific Computing. All rights reserved.
//

#include "htsutils.h"

int parseBAM(char *file_name)
{
samFile *fh_in = sam_open(file_name, "r");
bam_hdr_t *header = sam_hdr_read(fh_in);
bam1_t *bam = bam_init1();

int lines = 0;
int poly = 0;
int polyTailA = 0;
int polyTailT = 0;
int polyHeadA = 0;
int polyHeadT = 0;
//int polyHead = 0;

while (sam_read1(fh_in, header, bam) >= 0)
{
lines++;


polyTailA = findPoly(bam, 'A', POLY_TAIL);
polyTailT = findPoly(bam, 'T', POLY_TAIL);
polyHeadA = findPoly(bam, 'A', POLY_HEAD);
polyHeadT = findPoly(bam, 'T', POLY_HEAD);

//printf("Tail: %d|%d\tHead: %d|%d\tSeq:: ", polyTailA,polyTailT,polyHeadA,polyHeadT);
//dumpBamSeq(bam);
//printf("\n");

//if (lines == 10)
// break;

}

bam_destroy1(bam);
bam_hdr_destroy(header);
sam_close(fh_in);

//printf("BAM Lines: %d / %d\n", poly, lines);

return 0;
}

/* dumpSeq
* return pointer to sequence in BAM file
*/
void dumpBamSeq(bam1_t *bam)
{
int i;

for (i = 0; i < bam->core.l_qseq; ++i)
{
printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bam),i)]);
}
}


/* findPoly
* modified findTailPolyAMaybeMask
* https://github.com/jstjohn/KentLib/blob/master/lib/dnautil.c
* Kent et.al., 2002
*/
int findPoly(bam1_t *bam, int base, int side)
{
int i = 0;

int currScore = 10;
int bestScore = 10;

int currPosition = 0;
int bestPosition = -1;

int polySize = 0;

int smallBase = tolower(base);
int bigBase = toupper(base);

char currBase = 'X';

for (i = 0; i < bam->core.l_qseq; ++i)
{
// define current position based on sequence side
currPosition = i;
if (side == POLY_TAIL)
currPosition = bam->core.l_qseq - i - 1;

// get current base from BAM
currBase = seq_nt16_str[bam_seqi(bam_get_seq(bam), currPosition)];

// skip N match
if (currBase == 'N' || currBase == 'n')
continue;

// compare to query base
if (currBase == bigBase || currBase == smallBase)
{
// add match score
currScore += 1;

// update best score
if (currScore >= (bestScore - 6))
{
bestScore = currScore;
bestPosition = currPosition;
}
}
else
{
// add mismatch score
currScore -= 8;
}

// break if score is negative
if (currScore < 0)
break;

// cap if score is too big
if (currScore > 20)
currScore = 20;

}

/* assign result */
if (bestPosition >= 0)
{
polySize = bestPosition + 1;

if (side == POLY_TAIL)
polySize = bam->core.l_qseq - bestPosition;

if (polySize < 0)
polySize = 0;
}

return polySize;
}

24 changes: 24 additions & 0 deletions htsutils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//
// htsutils.h
// PASSFinder
//
// Created by Georgi Tushev on 07/09/16.
// Copyright © 2016 Scientific Computing. All rights reserved.
//

#ifndef htsutils_h
#define htsutils_h

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <htslib/sam.h>

#define POLY_HEAD 0
#define POLY_TAIL 1

int parseBAM(char *);
int findPoly(bam1_t *, int, int);
void dumpBamSeq(bam1_t *);

#endif /* htsutils_h */

0 comments on commit dacb22d

Please sign in to comment.