65    vector<Position> vseeds;
 
   67    static int expSeedCount;   
 
   68    static double maxCoverage; 
 
   74    static double blockP(
const PP::Block& blk, 
string s, 
int from) {
 
   76        for (
int i=0;  i<s.length(); i++) 
 
   77        result *= blk[from+i][GeneticCode::get_aa_from_symbol(s[i])];
 
   80    static double blockQ(
const Block& blk, 
string s, 
int from) {
 
   82        for (
int i=0;  i<s.length(); i++) 
 
   83        result *= blk[from+i].Q(GeneticCode::get_aa_from_symbol(s[i])).doubleValue();
 
   88    static double blockP(
const PP::Block& blk, 
int aas, 
int from) {
 
   91        for (
int i=from+2;  i>=from; i--) {
 
   92        result *= blk[i][aas % 20];
 
   97    static double blockQ(
const Block& blk, 
int aas, 
int from) {
 
   99        for (
int i=from+2;  i>=from; i--) {
 
  100        result *= blk[i].Q(aas % 20).doubleValue();
 
  109        vindex(1,0), current(0), currend(0) 
 
  111        multimap<int, Position> seeds;
 
  112        multimap<int, Position>::iterator it;
 
  115        for (
int b = 0; b<prfl.blockCount(); b++) {
 
  116        const Block& blk = prfl[b];
 
  118        cerr << 
"Determining seeds for block " << b << 
" (" << blk.id << 
", " << blk.size() << 
" columns).\n";
 
  120        int maxcount = expSeedCount / blk.size();
 
  122        for (
int i=0; i<=blk.size()-3; i++) {
 
  123            multimap<double,int> currtriples;
 
  126            for (
int t1 = 0; t1<NUM_AA; t1++) {
 
  127            s[0] = GeneticCode::aa_symbols[t1];
 
  128            for (
int t2 = 0; t2<NUM_AA; t2++) {
 
  129                s[1] = GeneticCode::aa_symbols[t2];
 
  130                for (
int t3 = 0; t3<NUM_AA; t3++) {
 
  131                s[2] = GeneticCode::aa_symbols[t3];
 
  132                double q_val = blockQ(blk, val, i);
 
  133                currtriples.insert(make_pair(q_val, val));
 
  140            map<double,int>::reverse_iterator it = currtriples.rbegin();
 
  141            for (count = 0; count < maxcount && p < maxCoverage; count++) {
 
  142            p += blockP(blk, it->second, i);
 
  143            seeds.insert(make_pair(it->second, 
Position(b,i)));
 
  147            cerr << 
"[" << i << 
".." << (i+2) << 
"]: coverage=" << p << 
"   minBlockQ=" << (--it)->first << 
" count=" << count << endl;
 
  151        vseeds.reserve(seeds.size());
 
  152        vindex.reserve(8001);
 
  153        for (it=seeds.begin(); it != seeds.end(); ++it) {
 
  154        while (vindex.size() <= it->first) 
 
  155            vindex.push_back(vseeds.size());
 
  156        vseeds.push_back(it->second);
 
  158        while (vindex.size()<=8000) 
 
  159        vindex.push_back(vseeds.size());
 
  164    void setFirst(
int patt) {
 
  165        current = vindex[patt];
 
  166        currend = vindex[patt+1];
 
  169        return current < currend;
 
  173        return vseeds[current++];
 
  177        return vseeds.size();
 
 
  298    typedef deque<ScoringCandidate> SeedCountsQueue;
 
  303    vector<SeedCountsQueue> theCandidates[2];
 
  307    vector<int> maxseedcount; 
 
  308    vector<int> seedhitsizes; 
 
  309    vector<int> candcounts;   
 
  310    vector<int> realhitcounts; 
 
  313    deque<twomer> patterns;
 
  319        maxseedcount(p.blockCount(), 0),
 
  320        seedhitsizes(p.blockCount(), 0),
 
  321        candcounts(p.blockCount(), 0),
 
  322        realhitcounts(p.blockCount(), 0),
 
  326        for (
int b=0; b<p.blockCount(); b++) 
 
  327        theCandidates[0].push_back(SeedCountsQueue(p.blockSize(b) * 3 -8, 
 
  329        theCandidates[1] = theCandidates[0];
 
  334    void putSeedsToEntries(
int currpat, 
bool reverse) {
 
  335        seedColl.setFirst(currpat);
 
  336        while (seedColl.hasNext()) {
 
  338        int index = (reverse ? prfl.blockSize(pos.b) - pos.i - 3 : pos.i) * 3;
 
  342        if (pf.lastOffset >= 0 && (pf.lastOffset == pos.i || reverse == (pf.lastOffset < pos.i)))
 
  343            cerr << 
"ERROR: last offset (" << pf.lastOffset << 
") " << (reverse ? 
"<" : 
">") 
 
  344             << 
" current offset (" << pos.i << 
") !" << endl;
 
  346        int range = abs(pf.lastOffset-pos.i);
 
  347        pf.inSeedCount += range < 3 ? range : 3;
 
  348        pf.lastOffset = pos.i;
 
  355    void pushSeeds(
int t) {
 
  357        twomer nextPattern = patterns.back();
 
  359        twomer revPattern = patterns.back();
 
  362        aa = GeneticCode::map[
Seq2Int(3)(PP::DNA::sequence + t-2)];
 
  363        aab = GeneticCode::map[
Seq2Int(3).rc(PP::DNA::sequence + t-2)];
 
  364        int forval = nextPattern.val()*20;
 
  365        int revval = revPattern.val();
 
  366        if (aa >= 0 && forval >= 0) 
 
  367            putSeedsToEntries(forval + aa, 
false);
 
  368        if (aab >= 0 && revval >= 0)
 
  369            putSeedsToEntries(revval + 400*aab, 
true);
 
  373        nextPattern[0] = nextPattern[1];
 
  375        patterns.push_front(nextPattern);
 
  376        revPattern[1] = revPattern[0];
 
  378        patterns.push_front(revPattern);
 
  387        for (
int b=0; b < prfl.blockCount(); b++) {
 
  389        int hc = theCandidates[reverse][b].back().inSeedCount;
 
  390        theCandidates[reverse][b].pop_back();
 
  391        seedhitsizes[b] += hc;
 
  393            cerr << 
"ERROR: seedhitcount < 0!\n";
 
  394        if (maxseedcount[b] < hc)
 
  395            maxseedcount[b] = hc;
 
  396        if (hc > 4 + prfl.blockSize(b)/4) {
 
  397            int blstart = t - prfl.blockSize(b) * 3;
 
  402            prfl[b].bestPartialLogScore(reverse, blstart, pt);
 
  403            int width = pt.to - pt.from;
 
  405            width >= MIN_BLOCKSIZE && 
 
  406            width >= 0.3*prfl.blockSize(b)) {
 
  408            target.newHit(newHit);
 
  418        takeCandidates(t, target, 
false);
 
  419        takeCandidates(t, target, 
true);
 
  423    void debugging_output() {
 
  424        for (
int b=0; b<prfl.blockCount(); b++) {
 
  425        cerr << 
"Block " << b << 
" (" << prfl[b].id << 
")" << endl;
 
  426        cerr << 
"    len: " << prfl.blockSize(b) << endl;
 
  427        cerr << 
"   av c: " << double(seedhitsizes[b])/DNA::len;
 
  428        cerr << 
"  max c: " << maxseedcount[b] << endl;
 
  429        cerr << 
"  % c>t: " << fixed << setw(4) << setprecision(2) << double(candcounts[b]*100)/DNA::len << endl;