Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#include<iostream>
#include<algorithm>
#include<string.h>
#include "dbseq.h"
using namespace std;
extern Param param;
extern bit8_t alphabet[];
extern bit8_t rev_alphabet[];
/************/
RefSeq::RefSeq() {
total_kmers=0;
}
ref_loc_t RefSeq::LoadNextSeq(ifstream &fin) {
char ch[1000]; char c; string s;
if(param.gz_ref) return 0;
fin>>c; if(fin.eof()) return 0;
string::iterator z=_seq.begin();
_length=0;
fin>>_name; fin.getline(ch, 1000);
while(!fin.eof()) {
fin>>c; if(fin.eof()) break;
fin.unget(); if(c=='>') break;
fin>>s;
if(_length+s.size()>=param.max_dbseq_size) {
param.max_dbseq_size+=param.append_dbseq_size;
_seq.resize(param.max_dbseq_size);
z=_seq.begin()+_length;
//cout<<"_seq size: "<<param.max_dbseq_size<<endl;
}
copy(s.begin(), s.end(), z);
z+=s.size(); _length+=s.size();
}
return _length;
}
ref_loc_t RefSeq::LoadNextSeq(igzstream &fin) {
char ch[1000]; char c; string s;
if(!param.gz_ref) return 0;
fin>>c; if(fin.eof()) return 0;
string::iterator z=_seq.begin();
_length=0;
fin>>_name; fin.getline(ch, 1000);
while(!fin.eof()) {
fin>>c; if(fin.eof()) break;
fin.unget(); if(c=='>') break;
fin>>s;
if(_length+s.size()>=param.max_dbseq_size) {
param.max_dbseq_size+=param.append_dbseq_size;
_seq.resize(param.max_dbseq_size);
z=_seq.begin()+_length;
//cout<<"_seq size: "<<param.max_dbseq_size<<endl;
}
copy(s.begin(), s.end(), z);
z+=s.size(); _length+=s.size();
}
return _length;
}
void RefSeq::BinSeq(OneBfa &a) {
a.n=(_length+(SEGLEN-1))/SEGLEN+BINSEQPAD; //SEGLENbp, bit(SEGLEN*2) for each element. put 2 extra elements at the 3'end to invoid overflow
bit32_t t=a.n*SEGLEN-_length;
if(t) {
string ts(t, 'N');
if(_seq.size()<_length+t) _seq.resize(_length+t);
copy(ts.begin(), ts.end(), _seq.begin()+_length);
}
a.s = new bit64_t[a.n];
string::iterator p=_seq.begin();
bit32_t i,j;
for(i=0; i<a.n; i++,p+=SEGLEN) {
a.s[i]=0;
for(j=0; j<SEGLEN; j++) {
a.s[i]<<=2;
a.s[i]|=alphabet[(unsigned char)*(p+j)];
//cout << *(p+j);
}
}
//cout <<endl;
}
void RefSeq::cBinSeq(OneBfa &a) {
a.n=(_length+(SEGLEN-1))/SEGLEN+BINSEQPAD; //SEGLENbp, bit(SEGLEN*2) for each element. put 2 extra elements at the 3'end to invoid overflow
/* int t=a.n*SEGLEN-_length;
cout << _length<< " "<<a.n << " t "<<t<<endl;
if(t) {
string ts(t, 'N');
if(_seq.size()<_length+t) _seq.resize(_length+t);
copy(ts.begin(), ts.end(), _seq.begin()+_length);
}
*/
a.s = new bit64_t[a.n];
string::iterator p=_seq.begin()+a.n*SEGLEN-1;
string::iterator tmp;
for(bit32_t i=0;i<a.n; i++,p-=SEGLEN) {
a.s[i]=0;
for(bit32_t j=0; j<SEGLEN; j++) {
a.s[i]<<=2;
a.s[i]|=rev_alphabet[(unsigned char)*(p-j)];
//cout <<*(p-j);
}
//cout << endl;
//cout << StrSeed2((unsigned int)a.s[i].a, SEGLEN);
}
//cout <<endl;
}
void RefSeq::UnmaskRegion() {
Block b, cb;
b.id=_count;
cb.id=_count+1;
int total_len=((_length+(SEGLEN-1))/SEGLEN+BINSEQPAD)*SEGLEN;
b.begin=b.end=0;
// bit32_t total_size=0;
while(b.end<_length) {
b.begin=_seq.find_first_of(param.useful_nt, b.end);
if(b.begin > _length) break;
b.end=_seq.find_first_of(param.nx_nt, b.begin);
b.end = (b.end<=_length? b.end : _length);
if(b.end-b.begin <30) continue;
if((!_blocks.empty()) && (b.id==_blocks[_blocks.size()-1].id)
&& (b.begin - _blocks[_blocks.size()-1].end <5))
_blocks[_blocks.size()-1].end=b.end;
else {
_blocks.push_back(b);
//added by yxi
cb.begin=total_len-b.end;
cb.end=total_len-b.begin;
_blocks.push_back(cb);
//cout <<"bid "<<b.id<<" "<<b.begin<<" "<<b.end<<endl;
//cout <<"cbid "<<cb.id<<" "<<cb.begin<<" "<<cb.end<<endl;
}
}
}
void RefSeq::find_CCGG() {
bit32_t tmp_offset,tmp_max, seedloc, right=0xffffffffu, seglen; int i,j;
vector<ref_loc_t> tmpset_index_BSW[50], tmpset_index_BSC[50];
vector<pair<ref_loc_t, bit32_t> > tmp_CCGG_sites;
//vector<pair<ref_loc_t, bit32_t> >::iterator rit;
std::transform(_seq.begin(),_seq.end(),_seq.begin(),static_cast < int(*)(int) > (toupper));
tmp_offset=title[_count-1].rc_offset-param.seed_size;
tmp_max=title[_count-1].size-param.seed_size;
for(j=0;j<param.digest_site.size();j++) {
bit32_t min_offset=min(param.digest_pos[j], (bit32_t) param.digest_site[j].size()-param.digest_pos[j]);
bit32_t rev_offset=param.digest_site[j].size()-2*min_offset;
right=_seq.find(param.digest_site[j],1);
while(right<_length) {
n_CCGG++;
tmp_CCGG_sites.push_back(make_pair(right+min_offset,rev_offset));
right=_seq.find(param.digest_site[j],right+1);
}
}
sort(tmp_CCGG_sites.begin(), tmp_CCGG_sites.end());
CCGG_sites.push_back(tmp_CCGG_sites);
for(i=0;i<param.max_seedseg_num;i++) {
tmpset_index_BSW[i].reserve(n_CCGG);
tmpset_index_BSC[i].reserve(n_CCGG);
}
for(j=0;j+1<tmp_CCGG_sites.size();++j) {
//for(rit=tmp_CCGG_sites.begin();rit!=tmp_CCGG_sites.end()-1&&tmp_CCGG_sites.size()>1;++rit) {
for(i=j+1,seglen=0;i<tmp_CCGG_sites.size();++i)
if((seglen=tmp_CCGG_sites[i].first+tmp_CCGG_sites[i].second-tmp_CCGG_sites[j].first)>=param.min_insert) break;
if(seglen>param.max_insert||seglen<param.min_insert) continue;
for(i=0,seedloc=tmp_CCGG_sites[j].first;i<param.max_seedseg_num&&seedloc<=tmp_max;i++,seedloc+=param.seed_size)
tmpset_index_BSW[i].push_back(seedloc);
}
for(j=1;j<tmp_CCGG_sites.size();++j) {
//for(rit=tmp_CCGG_sites.begin()+1;rit!=tmp_CCGG_sites.end()&&tmp_CCGG_sites.size()>1;++rit) {
for(i=j-1,seglen=0;i>=0;--i)
if((seglen=tmp_CCGG_sites[j].first+tmp_CCGG_sites[j].second-tmp_CCGG_sites[i].first)>=param.min_insert) break;
if(seglen>param.max_insert||seglen<param.min_insert) continue;
for(i=0,seedloc=tmp_CCGG_sites[j].first+tmp_CCGG_sites[j].second-param.seed_size;i<param.max_seedseg_num&&seedloc>=0;i++,seedloc-=param.seed_size)
tmpset_index_BSC[i].push_back(tmp_offset-seedloc);
}
for(i=0;i<param.max_seedseg_num;i++) {
CCGG_index[i].push_back(tmpset_index_BSW[i]);
CCGG_index[i].push_back(tmpset_index_BSC[i]);
}
}
bool BlockComp(Block a, Block b) {return (a.id<b.id)||((a.id==b.id)&&(a.begin<b.begin));}
void RefSeq::Run_ConvertBinseq(ifstream &fin, igzstream &gzfin) {
param.max_seedseg_num=(FIXELEMENT-1)*SEGLEN/param.seed_size;
_seq.resize(param.max_dbseq_size);
RefTitle r;
_count=0;
total_num=sum_length=0;
while(LoadNextSeq(fin)||LoadNextSeq(gzfin)) {
r.name=_name;
r.size=_length;
r.rc_offset=((_length+(SEGLEN-1))/SEGLEN+BINSEQPAD)*SEGLEN;
title.push_back(r);
OneBfa a;
BinSeq(a);
bfa.push_back(a);
UnmaskRegion();
_count++;
total_num++;
sum_length+=_length;
// cout<<r.size<<endl;
//added by yxi, RC reference seq
title.push_back(r);
OneBfa ca;
cBinSeq(ca);
bfa.push_back(ca);
//UnmaskRegion();
_count++;
//total_num++;
//sum_length+=_length;
//cout<<"ref:"<<r.name<<endl;
if(param.RRBS_flag) find_CCGG();
}
sort(_blocks.begin(),_blocks.end(),BlockComp);
param.total_ref_seq=total_num;
bit32_t i, s=0; bit64_t *ptr, *cptr;
ref_anchor.push_back(REF_MARGIN*SEGLEN);
for(i=0;i<total_num;i++) {
s+=bfa[i*2].n; ref_anchor.push_back((s+REF_MARGIN)*SEGLEN);
}
//for(i=0;i<=total_num;i++) cout<<"i="<<i<<" "<<ref_anchor[i]<<" "<<endl;
refcat=new bit64_t[s+REF_MARGIN*2]; crefcat=new bit64_t[s+REF_MARGIN*2];
xref[0]=refcat; xref[1]=crefcat;
ptr=refcat+REF_MARGIN; cptr=crefcat+REF_MARGIN;
for(i=0;i<_count;i++) {
if(i%2==0) {
ptr=copy(bfa[i].s, bfa[i].s+bfa[i].n, ptr);
delete [] bfa[i].s;
bfa[i].s=ptr-bfa[i].n;
}
else {
cptr=copy(bfa[i].s, bfa[i].s+bfa[i].n, cptr);
delete [] bfa[i].s;
bfa[i].s=cptr-bfa[i].n;
}
}
// cout<<"total seq length: "<<sum_length<<endl;
// cout<<"total "<<ccgg_seglen.size()<<" CCGG sites\n";
// for(map<shortHit,bit32_t,shorthitcompclass>::iterator it=ccgg_seglen.begin();it!=ccgg_seglen.end();it++)
// cout<<"chr:"<<(int)(it->first).chr<<" pos:"<<(it->first).loc<<" => "<<(it->second)<<endl;
_seq.clear(); //free ram
}
bit32_t RefSeq::s_MakeSeed_1(bit64_t *_m, int _a) {
return param.XT(((_m[0]<<(_a*2))|((_m[1]>>1)>>(63-_a*2)))>>param.seed_bits_lz);
//if(_a>=0) return param.XT((*_m>>_a)&param.seed_bits);
//else return param.XT(((*_m<<-_a)|(*(_m+1)>>(SEGLEN*2+_a)))&param.seed_bits);
//return param.XT(((((bit64_t)*_m<<32)|*(_m+1))>>_a)&param.seed_bits);
}
void RefSeq::InitialIndex() {
bit32_t i; total_kmers=1;
KmerLoc *p; KmerLoc2 *p2;
for(i=0;i<param.seed_size;i++) total_kmers*=3;
if(param.RRBS_flag) {
index= new KmerLoc[total_kmers];
for(i=0,p=index; i<total_kmers; p++,i++) p->n1=0;
}
else {
index2=new KmerLoc2[total_kmers];
for(i=0,p2=index2;i<total_kmers;i++,p2++) p2->n[0]=p2->n[1]=0;
}
}
void RefSeq::CalKmerFreq() {
bit32_t chr, chr1, i, j, tmp_offset; bit64_t *_m;
vector<ref_loc_t>::iterator it;
if(param.RRBS_flag){
for(j=0;j<param.max_seedseg_num;j++){
for(chr=0;chr<_count;chr++){
//cout<<"chr:"<<chr<<" "<<title[chr].name<<" size:"<<title[chr].size<<" loc:"<<CCGG_index[j][chr].size()<<endl;
_m=bfa[chr].s;
for(it=CCGG_index[j][chr].begin(),i=0;it!=CCGG_index[j][chr].end();++it,++i){
index[s_MakeSeed_1(_m+(*it)/SEGLEN,(*it)%SEGLEN)].n1++;
}
if(param.pairend||param.chains){
chr1=chr^1; tmp_offset=title[chr].rc_offset-param.seed_size;
for(it=CCGG_index[j][chr1].begin();it!=CCGG_index[j][chr1].end();++it){
if(tmp_offset<*it) continue;
index[s_MakeSeed_1(_m+(int)(tmp_offset-*it)/SEGLEN,(tmp_offset-*it)%SEGLEN)].n1++;
}
}
}
}
}
else{
t_CalKmerFreq(0);
t_CalKmerFreq(1);
}
}
void RefSeq::t_CalKmerFreq(bit32_t ref_chain) {
bit64_t *_m; bit32_t i,j,i2, prefetch, ptr=0;
bit32_t dbs[PREFETCH_CAL_UNIT];
prefetch=PREFETCH_CAL_UNIT*param.index_interval;
for(vector<Block>::iterator p=_blocks.begin(); p!=_blocks.end(); p++) {
if(p->id%2!=ref_chain) continue;
//cout<<"seg:"<<p->begin<<" "<<p->end<<endl;
_m=bfa[p->id].s; i2=((p->end-param.seed_size)/param.index_interval)*param.index_interval;
for(j=0,i=(p->begin/param.index_interval)*param.index_interval; j<PREFETCH_CAL_UNIT; i+=param.index_interval,++j,++ptr) {
//cout<<"ptr="<<ptr<<" ptr_mod="<<(ptr%PREFETCH_CAL_UNIT)<<" db:"<<s_MakeSeed_1(_m+i/SEGLEN,i%SEGLEN)<<endl;
dbs[ptr%PREFETCH_CAL_UNIT]=s_MakeSeed_1(_m+i/SEGLEN,i%SEGLEN);
}
//cout<<"chain="<<ref_chain; for(jj=0;jj<PREFETCH_CAL_UNIT;jj++) cout<<" db["<<jj<<"]="<<dbs[jj]; cout<<endl;
for(i=(p->begin/param.index_interval)*param.index_interval; i<=i2; i+=param.index_interval,++ptr) {
index2[dbs[ptr%PREFETCH_CAL_UNIT]].n[ref_chain]++;
j=s_MakeSeed_1(_m+(i+prefetch)/SEGLEN,(i+prefetch)%SEGLEN);
dbs[ptr%PREFETCH_CAL_UNIT]=j;
__builtin_prefetch(index2+j,1,0);
//cout<<"i="<<i<<" ptr="<<ptr<<" ptr_mod="<<(ptr%PREFETCH_CAL_UNIT)<<" current="<<dbs[ptr%PREFETCH_CAL_UNIT]<<" prefetch="<<j<<endl;
//for(jj=-PREFETCH_CAL_UNIT;jj<PREFETCH_CAL_UNIT*4;jj++) cout<<" "<<jj<<":"<<dbs[jj]; cout<<endl;
}
}
}
void RefSeq::AllocIndex() {
KmerLoc *v; KmerLoc2 *u;
bit32_t i, j, t, *ptr, block_size=1<<22, ptr_count=0;
if(param.RRBS_flag){
for(v=index,i=0; i<total_kmers; v++,i++) {
if(v->n1>0) {
v->loc1= new Hit[v->n1];
v->n1=0;
}
}
}
else {
bit32_t *kmer_count=new bit32_t[total_kmers];
ptr = new bit32_t[block_size]; // 64MB
mem_pool.push_back(ptr);
for(i=0,u=index2; i<total_kmers; i++,u++) {
t=u->n[1]+u->n[0]; kmer_count[i]=t;
if(t==0) continue;
if(ptr_count+t>=block_size-PREFETCH_LOOP) {
if(t>=block_size-PREFETCH_LOOP) {
u->loc1= new bit32_t[t+PREFETCH_LOOP];
mem_pool.push_back(u->loc1);
for(j=0;j<PREFETCH_LOOP;j++) (u->loc1)[t+j]=0;
u->n[1]=0;
continue;
}
for(j=0;j<PREFETCH_LOOP;j++) ptr[ptr_count+j]=0;
ptr = new bit32_t[block_size];
mem_pool.push_back(ptr);
ptr_count=0;
}
u->loc1=ptr+ptr_count; ptr_count+=t;
u->n[1]=0;
}
for(j=0;j<PREFETCH_LOOP;j++) ptr[ptr_count+j]=0;
sort(kmer_count, kmer_count+total_kmers-1);
param.max_kmer_num=kmer_count[(bit32_t)(total_kmers*(1-param.max_kmer_ratio))-1];
//cout<<"kmer_threshold:"<<param.max_kmer_num<<" kmer_ratio:"<<param.max_kmer_ratio<<endl;
delete [] kmer_count;
}
}
void RefSeq::ReleaseIndex() {
bit32_t i;
KmerLoc *v; KmerLoc2 *u;
if(param.RRBS_flag){
for(i=0,v=index; i<total_kmers; i++,v++) {
if(v->n1) delete [] v->loc1;
}
delete [] index;
}
else {
while(!mem_pool.empty()) {
delete [] mem_pool.back();
mem_pool.pop_back();
}
delete [] index2;
}
}
void RefSeq::FillIndex() {
bit64_t *_m; bit32_t chr, chr1, j,tmp_offset;
KmerLoc *z; Hit tmphit;
vector<ref_loc_t>::iterator it;
if(param.RRBS_flag) {
for(j=0;j<param.max_seedseg_num;j++) {
for(chr=0;chr<_count;chr++) {
tmphit.chr=chr|(j<<16); _m=bfa[chr].s;
for(it=CCGG_index[j][chr].begin();it!=CCGG_index[j][chr].end();++it) {
z=index+s_MakeSeed_1(_m+(*it)/SEGLEN,(*it)%SEGLEN);
tmphit.loc=(*it);
z->loc1[z->n1++]=tmphit;
}
if(param.pairend||param.chains) {
chr1=chr^1; tmp_offset=title[chr].rc_offset-param.seed_size;
tmphit.chr=chr|(j<<16)|0x1000000;
for(it=CCGG_index[j][chr1].begin();it!=CCGG_index[j][chr1].end();++it) {
if(tmp_offset<*it) continue;
z=index+s_MakeSeed_1(_m+(int)(tmp_offset-*it)/SEGLEN,(tmp_offset-*it)%SEGLEN);
tmphit.loc=(tmp_offset-*it);
z->loc1[z->n1++]=tmphit;
}
}
}
}
}
else {
t_FillIndex(0);
t_FillIndex(1);
}
//_a=0; _e=0; for(z=index,i=0; i<total_kmers; z++,i++) if(z->n1) {_a+=z->n1; _e++;} cout<<"index total:"<<_a<<" keys:"<<_e<<endl;
//_a=0; _e=0; for(z=cindex, i=0; i<total_kmers; z++,i++) if(z->n1) {_a+=z->n1; _e++;} cout<<"cindex total:"<<_a<<" keys:"<<_e<<endl;
/*
for(ref_id_t chr=0;chr<_count;chr++){
cout<<"chr"<<(int)chr<<endl;
for(set<ref_loc_t>::iterator it=CCGG_index[chr].begin();it!=CCGG_index[chr].end();it++) cout<<" "<<*it; cout<<endl;
for(set<ref_loc_t>::iterator it=CCGG_cindex[chr].begin();it!=CCGG_cindex[chr].end();it++) cout<<" "<<*it; cout<<endl;
}
*/
}
void RefSeq::t_FillIndex(bit32_t ref_chain) {
KmerLoc2 *z2; Hit tmphit;
bit64_t *_m; bit32_t i2,j,i,prefetch, tmp=1-ref_chain, ptr=0;
bit32_t dbs[PREFETCH_CRT_UNIT];
prefetch=PREFETCH_CRT_UNIT*param.index_interval;
for(vector<Block>::iterator p=_blocks.begin(); p!=_blocks.end(); p++) {
if(p->id%2!=ref_chain) continue;
tmphit.chr=p->id; _m=bfa[tmphit.chr].s;
i2=((p->end-param.seed_size)/param.index_interval)*param.index_interval;
for(j=0,i=(p->begin/param.index_interval)*param.index_interval; j<PREFETCH_CRT_UNIT; i+=param.index_interval,++j,++ptr) {
dbs[ptr%PREFETCH_CRT_UNIT]=s_MakeSeed_1(_m+i/SEGLEN,i%SEGLEN);
}
for(tmphit.loc=(p->begin/param.index_interval)*param.index_interval; tmphit.loc<=i2; tmphit.loc+=param.index_interval,++ptr) {
z2=index2+dbs[ptr%PREFETCH_CRT_UNIT];
z2->loc1[z2->n[tmp]++]=hit2int(tmphit);
j=s_MakeSeed_1(_m+(tmphit.loc+prefetch)/SEGLEN,(tmphit.loc+prefetch)%SEGLEN);
dbs[ptr%PREFETCH_CRT_UNIT]=j;
__builtin_prefetch(index2+j,1,0);
}
}
}
void RefSeq::CreateIndex() {
bit32_t i;
InitialIndex();
CalKmerFreq();
AllocIndex();
FillIndex();
FinishIndex();
}
void RefSeq::FinishIndex() {
bit32_t i;
if(param.RRBS_flag)
for(i=0;i<param.max_seedseg_num;i++) CCGG_index[i].clear();
/*
for(i=0;i<total_kmers;i++) {
cout<<"seed:"<<i<<" total:"<<index2[i][0]<<endl;
for(j=1; j<=index2[i][0]; j++) cout<<index2[i][j]<<" "; cout<<endl;
//for(k=0;k<4;k++) {
while(true){
cout<<"c"<<(index2[i][j]>>24)<<":"<<(index2[i][j]&0xffffff)<<" ";
if((index2[i][j]&0xffffff)==index2[i][0]) break;
j++;
}
cout<<endl;
}
*/
}
pair<ref_loc_t,bit32_t> RefSeq::CCGG_seglen(ref_id_t chr, ref_loc_t pos, int readlen) {
ref_id_t chr2=chr/2;
bit32_t left, right, mid;
ref_loc_t midval,seg_start, seg_end;
CCGG_sites_chr=&CCGG_sites[chr2];
left=0; //right=CCGG_sites[chr2].size()-1;
right=CCGG_sites_chr->size()-1;
while(left<right-1) {
mid=(left+right)/2;
//cout<<"pos:"<<pos<<"\tleft:"<<left<<" "<<CCGG_sites[chr2][left]<<"\tright:"<<right<<" "<<CCGG_sites[chr2][right]<<"\tmid:"<<mid<<" "<<CCGG_sites[chr2][mid]<<endl;
if((midval=(*CCGG_sites_chr)[mid].first)==pos) {//return pair<ref_loc_t,int>(pos+1,CCGG_sites[chr2][mid+1]-midval+2);
left=mid;right=mid+1;break;
}
else if(midval<pos) left=mid;
//if(CCGG_sites[chr2][mid]<=pos) left=mid;
else right=mid;
}
//cout<<"endloop\n";
//cout<<"pos:"<<pos<<"\tleft:"<<left<<" "<<CCGG_sites[chr2][left]<<"\tright:"<<right<<" "<<CCGG_sites[chr2][right]<<"\tmid:"<<mid<<" "<<CCGG_sites[chr2][mid]<<endl;
//for (mid=left-2;mid<left+3;mid++) cout<<"\t"<<CCGG_sites[chr2][mid]; cout<<endl;
seg_start=(*CCGG_sites_chr)[left].first;
while(((seg_end=(*CCGG_sites_chr)[right].first+(*CCGG_sites_chr)[right].second)<pos+readlen)&&(right<CCGG_sites_chr->size())) right++;
//cout<<"pos:"<<pos<<"\tleft:"<<left<<" "<<CCGG_sites[chr2][left]<<"\tright:"<<right<<" "<<CCGG_sites[chr2][right]<<"\tmid:"<<mid<<" "<<CCGG_sites[chr2][mid]<<endl;}
//cout<<"ZP:"<<(seg_start+1)<<" ZL:"<<(seg_end-seg_start)<<endl;
//return pair<ref_loc_t,int>(CCGG_sites[chr2][left]+1, CCGG_sites[chr2][right]-CCGG_sites[chr2][left]+2);
return pair<ref_loc_t,bit32_t>(seg_start+1, seg_end-seg_start);
}
ref_loc_t RefSeq::hit2int(Hit h) {
return ref_anchor[h.chr/2]+h.loc;
}