-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6f558c8
commit dacb22d
Showing
2 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
// | ||
// htsutils.c | ||
// PASSFinder | ||
// | ||
// Created by Georgi Tushev on 07/09/16. | ||
// Copyright © 2016 Scientific Computing. All rights reserved. | ||
// | ||
|
||
#include "htsutils.h" | ||
|
||
int parseBAM(char *file_name) | ||
{ | ||
samFile *fh_in = sam_open(file_name, "r"); | ||
bam_hdr_t *header = sam_hdr_read(fh_in); | ||
bam1_t *bam = bam_init1(); | ||
|
||
int lines = 0; | ||
int poly = 0; | ||
int polyTailA = 0; | ||
int polyTailT = 0; | ||
int polyHeadA = 0; | ||
int polyHeadT = 0; | ||
//int polyHead = 0; | ||
|
||
while (sam_read1(fh_in, header, bam) >= 0) | ||
{ | ||
lines++; | ||
|
||
|
||
polyTailA = findPoly(bam, 'A', POLY_TAIL); | ||
polyTailT = findPoly(bam, 'T', POLY_TAIL); | ||
polyHeadA = findPoly(bam, 'A', POLY_HEAD); | ||
polyHeadT = findPoly(bam, 'T', POLY_HEAD); | ||
|
||
//printf("Tail: %d|%d\tHead: %d|%d\tSeq:: ", polyTailA,polyTailT,polyHeadA,polyHeadT); | ||
//dumpBamSeq(bam); | ||
//printf("\n"); | ||
|
||
//if (lines == 10) | ||
// break; | ||
|
||
} | ||
|
||
bam_destroy1(bam); | ||
bam_hdr_destroy(header); | ||
sam_close(fh_in); | ||
|
||
//printf("BAM Lines: %d / %d\n", poly, lines); | ||
|
||
return 0; | ||
} | ||
|
||
/* dumpSeq | ||
* return pointer to sequence in BAM file | ||
*/ | ||
void dumpBamSeq(bam1_t *bam) | ||
{ | ||
int i; | ||
|
||
for (i = 0; i < bam->core.l_qseq; ++i) | ||
{ | ||
printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bam),i)]); | ||
} | ||
} | ||
|
||
|
||
/* findPoly | ||
* modified findTailPolyAMaybeMask | ||
* https://github.com/jstjohn/KentLib/blob/master/lib/dnautil.c | ||
* Kent et.al., 2002 | ||
*/ | ||
int findPoly(bam1_t *bam, int base, int side) | ||
{ | ||
int i = 0; | ||
|
||
int currScore = 10; | ||
int bestScore = 10; | ||
|
||
int currPosition = 0; | ||
int bestPosition = -1; | ||
|
||
int polySize = 0; | ||
|
||
int smallBase = tolower(base); | ||
int bigBase = toupper(base); | ||
|
||
char currBase = 'X'; | ||
|
||
for (i = 0; i < bam->core.l_qseq; ++i) | ||
{ | ||
// define current position based on sequence side | ||
currPosition = i; | ||
if (side == POLY_TAIL) | ||
currPosition = bam->core.l_qseq - i - 1; | ||
|
||
// get current base from BAM | ||
currBase = seq_nt16_str[bam_seqi(bam_get_seq(bam), currPosition)]; | ||
|
||
// skip N match | ||
if (currBase == 'N' || currBase == 'n') | ||
continue; | ||
|
||
// compare to query base | ||
if (currBase == bigBase || currBase == smallBase) | ||
{ | ||
// add match score | ||
currScore += 1; | ||
|
||
// update best score | ||
if (currScore >= (bestScore - 6)) | ||
{ | ||
bestScore = currScore; | ||
bestPosition = currPosition; | ||
} | ||
} | ||
else | ||
{ | ||
// add mismatch score | ||
currScore -= 8; | ||
} | ||
|
||
// break if score is negative | ||
if (currScore < 0) | ||
break; | ||
|
||
// cap if score is too big | ||
if (currScore > 20) | ||
currScore = 20; | ||
|
||
} | ||
|
||
/* assign result */ | ||
if (bestPosition >= 0) | ||
{ | ||
polySize = bestPosition + 1; | ||
|
||
if (side == POLY_TAIL) | ||
polySize = bam->core.l_qseq - bestPosition; | ||
|
||
if (polySize < 0) | ||
polySize = 0; | ||
} | ||
|
||
return polySize; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// | ||
// htsutils.h | ||
// PASSFinder | ||
// | ||
// Created by Georgi Tushev on 07/09/16. | ||
// Copyright © 2016 Scientific Computing. All rights reserved. | ||
// | ||
|
||
#ifndef htsutils_h | ||
#define htsutils_h | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <ctype.h> | ||
#include <htslib/sam.h> | ||
|
||
#define POLY_HEAD 0 | ||
#define POLY_TAIL 1 | ||
|
||
int parseBAM(char *); | ||
int findPoly(bam1_t *, int, int); | ||
void dumpBamSeq(bam1_t *); | ||
|
||
#endif /* htsutils_h */ |