65 vector<Position> vseeds;
67 static int expSeedCount;
68 static double maxCoverage;
74 static double blockP(
const PP::Block& blk,
string s,
int from) {
76 for (
int i=0; i<s.length(); i++)
77 result *= blk[from+i][GeneticCode::get_aa_from_symbol(s[i])];
80 static double blockQ(
const Block& blk,
string s,
int from) {
82 for (
int i=0; i<s.length(); i++)
83 result *= blk[from+i].Q(GeneticCode::get_aa_from_symbol(s[i])).doubleValue();
88 static double blockP(
const PP::Block& blk,
int aas,
int from) {
91 for (
int i=from+2; i>=from; i--) {
92 result *= blk[i][aas % 20];
97 static double blockQ(
const Block& blk,
int aas,
int from) {
99 for (
int i=from+2; i>=from; i--) {
100 result *= blk[i].Q(aas % 20).doubleValue();
109 vindex(1,0), current(0), currend(0)
111 multimap<int, Position> seeds;
112 multimap<int, Position>::iterator it;
115 for (
int b = 0; b<prfl.blockCount(); b++) {
116 const Block& blk = prfl[b];
118 cerr <<
"Determining seeds for block " << b <<
" (" << blk.id <<
", " << blk.size() <<
" columns).\n";
120 int maxcount = expSeedCount / blk.size();
122 for (
int i=0; i<=blk.size()-3; i++) {
123 multimap<double,int> currtriples;
126 for (
int t1 = 0; t1<NUM_AA; t1++) {
127 s[0] = GeneticCode::aa_symbols[t1];
128 for (
int t2 = 0; t2<NUM_AA; t2++) {
129 s[1] = GeneticCode::aa_symbols[t2];
130 for (
int t3 = 0; t3<NUM_AA; t3++) {
131 s[2] = GeneticCode::aa_symbols[t3];
132 double q_val = blockQ(blk, val, i);
133 currtriples.insert(make_pair(q_val, val));
140 map<double,int>::reverse_iterator it = currtriples.rbegin();
141 for (count = 0; count < maxcount && p < maxCoverage; count++) {
142 p += blockP(blk, it->second, i);
143 seeds.insert(make_pair(it->second,
Position(b,i)));
147 cerr <<
"[" << i <<
".." << (i+2) <<
"]: coverage=" << p <<
" minBlockQ=" << (--it)->first <<
" count=" << count << endl;
151 vseeds.reserve(seeds.size());
152 vindex.reserve(8001);
153 for (it=seeds.begin(); it != seeds.end(); ++it) {
154 while (vindex.size() <= it->first)
155 vindex.push_back(vseeds.size());
156 vseeds.push_back(it->second);
158 while (vindex.size()<=8000)
159 vindex.push_back(vseeds.size());
164 void setFirst(
int patt) {
165 current = vindex[patt];
166 currend = vindex[patt+1];
169 return current < currend;
173 return vseeds[current++];
177 return vseeds.size();
298 typedef deque<ScoringCandidate> SeedCountsQueue;
303 vector<SeedCountsQueue> theCandidates[2];
307 vector<int> maxseedcount;
308 vector<int> seedhitsizes;
309 vector<int> candcounts;
310 vector<int> realhitcounts;
313 deque<twomer> patterns;
319 maxseedcount(p.blockCount(), 0),
320 seedhitsizes(p.blockCount(), 0),
321 candcounts(p.blockCount(), 0),
322 realhitcounts(p.blockCount(), 0),
326 for (
int b=0; b<p.blockCount(); b++)
327 theCandidates[0].push_back(SeedCountsQueue(p.blockSize(b) * 3 -8,
329 theCandidates[1] = theCandidates[0];
334 void putSeedsToEntries(
int currpat,
bool reverse) {
335 seedColl.setFirst(currpat);
336 while (seedColl.hasNext()) {
338 int index = (reverse ? prfl.blockSize(pos.b) - pos.i - 3 : pos.i) * 3;
342 if (pf.lastOffset >= 0 && (pf.lastOffset == pos.i || reverse == (pf.lastOffset < pos.i)))
343 cerr <<
"ERROR: last offset (" << pf.lastOffset <<
") " << (reverse ?
"<" :
">")
344 <<
" current offset (" << pos.i <<
") !" << endl;
346 int range = abs(pf.lastOffset-pos.i);
347 pf.inSeedCount += range < 3 ? range : 3;
348 pf.lastOffset = pos.i;
355 void pushSeeds(
int t) {
357 twomer nextPattern = patterns.back();
359 twomer revPattern = patterns.back();
362 aa = GeneticCode::map[
Seq2Int(3)(PP::DNA::sequence + t-2)];
363 aab = GeneticCode::map[
Seq2Int(3).rc(PP::DNA::sequence + t-2)];
364 int forval = nextPattern.val()*20;
365 int revval = revPattern.val();
366 if (aa >= 0 && forval >= 0)
367 putSeedsToEntries(forval + aa,
false);
368 if (aab >= 0 && revval >= 0)
369 putSeedsToEntries(revval + 400*aab,
true);
373 nextPattern[0] = nextPattern[1];
375 patterns.push_front(nextPattern);
376 revPattern[1] = revPattern[0];
378 patterns.push_front(revPattern);
387 for (
int b=0; b < prfl.blockCount(); b++) {
389 int hc = theCandidates[reverse][b].back().inSeedCount;
390 theCandidates[reverse][b].pop_back();
391 seedhitsizes[b] += hc;
393 cerr <<
"ERROR: seedhitcount < 0!\n";
394 if (maxseedcount[b] < hc)
395 maxseedcount[b] = hc;
396 if (hc > 4 + prfl.blockSize(b)/4) {
397 int blstart = t - prfl.blockSize(b) * 3;
402 prfl[b].bestPartialLogScore(reverse, blstart, pt);
403 int width = pt.to - pt.from;
405 width >= MIN_BLOCKSIZE &&
406 width >= 0.3*prfl.blockSize(b)) {
408 target.newHit(newHit);
418 takeCandidates(t, target,
false);
419 takeCandidates(t, target,
true);
423 void debugging_output() {
424 for (
int b=0; b<prfl.blockCount(); b++) {
425 cerr <<
"Block " << b <<
" (" << prfl[b].id <<
")" << endl;
426 cerr <<
" len: " << prfl.blockSize(b) << endl;
427 cerr <<
" av c: " << double(seedhitsizes[b])/DNA::len;
428 cerr <<
" max c: " << maxseedcount[b] << endl;
429 cerr <<
" % c>t: " << fixed << setw(4) << setprecision(2) << double(candcounts[b]*100)/DNA::len << endl;