139 for (
int strand=0; strand<2; strand++) {
140 blockScores[strand].clear();
141 for (
int b=0; b<blockCount(); b++)
142 blockScores[strand].push_back(
BlockScoreType( getBlock(b,strand) ));
145 void initHitSequences() {
147 for (
int f=0; f<3; f++) {
148 hitSeqColl[0][f].reset(blockCount());
149 hitSeqColl[1][f].reset(blockCount());
152 void advanceScores(
int base);
156 vit.foreachSubstate(
this, &SubstateModel::clearLowScoring);
158 double getIntronBonus(
int b,
int i,
int frame,
bool comp) {
159 const Block& blk = getBlock(b, comp);
165 freq = blk.getIntronFreq(blk.size()-i-1, 3-frame);
167 freq = blk.getIntronFreq(blk.size()-i, 0);
169 freq = blk.getIntronFreq(i, frame);
170 freq *= fullIntronBonus;
171 return freq > minIntronFactor ? freq : minIntronFactor;
175 vector<BlockScoreType> blockScores[2];
177 list<Match> currentResult;
178 bool allow_truncated;
179 bool exhaustive_substates;
214 void checkInRange(
Position pos,
bool complement) {
215 if (pos.b > blockCount() || pos.b < 0 ||
216 (pos.i>0 && pos.i > blockSize(pos.b, complement)))
220 int blockCount()
const {
221 return prfl.blockCount();
223 int blockNoOfB(
int b,
bool comp)
const {
224 if (b == blockCount())
225 throw ProjectError(
"Invalid block no. in SubstateModel::blockNoOfB");
226 return comp ? blockCount() - 1 - b : b;
228 int iBlockOfB(
int b,
bool comp)
const {
229 return comp ? blockCount() - b : b;
232 return prfl.interBlockDist(iBlockOfB(b, comp));
234 int blockSize(
int b,
bool comp)
const {
235 return b == blockCount() ? 0 : prfl.blockSize(blockNoOfB(b, comp));
237 const Block& getBlock(
int b,
bool comp)
const {
238 return prfl[blockNoOfB(b, comp)];
241 void addHitSeqMatch(
const HitSequence* hs,
bool comp) {
243 for (
int b = hs->last(); b >= hs->first(); b--) {
244 int blockno = blockNoOfB(b, comp);
245 int firstBase = (*hs)[b];
246 int lastBase = firstBase + prfl.blockSize(blockno)*3 - 1;
247 Double score = blockScores[comp][b].savedScore(firstBase).getRoot(prfl.blockSize(blockno));
248 currentResult.push_front(
Match(prfl[blockno].
id, blockno, firstBase, lastBase, score, 0, comp));
252 int endOfLastCodon,
int endOfBioExon,
254 int blockno = blockNoOfB(ppos.b, comp);
256 checkInRange(ppos, comp);
258 int startOfMatch = endOfLastCodon - 3*ppos.i + 1;
259 if (startOfMatch <= endOfBioExon) {
260 Double score = ppos.i==0 ? 1 :
261 prfl[blockno].scoreFromScratch(comp, startOfMatch, 0, ppos.i).getRoot(ppos.i);
263 3*(prfl.blockSize(blockno) - ppos.i)-endOfBioExon + endOfLastCodon : 0;
264 currentResult.push_front(
Match(prfl[blockno].
id, blockno,
265 startOfMatch, endOfBioExon,
266 score, offset, comp));
269 else throw ProjectError(
"Called addPrefixMatch with invalid value!");
274 int beginOfFirstCodon,
int beginOfBioExon,
276 int blockno = blockNoOfB(ppos.b, comp);
278 checkInRange(ppos, comp);
280 int len = prfl.blockSize(blockno) - ppos.i;
281 int endOfMatch = beginOfFirstCodon + 3*len - 1;
282 if (beginOfBioExon <= endOfMatch) {
283 Double score = len==0 ? 1 :
284 prfl[blockno].scoreFromScratch(comp, beginOfFirstCodon, ppos.i).getRoot(len);
285 int offset = comp ? 0 :
286 3*ppos.i-(beginOfFirstCodon-beginOfBioExon);
287 currentResult.push_front(
Match(prfl[blockno].
id, blockno,
288 beginOfBioExon, endOfMatch,
289 score, offset, comp));
292 else throw ProjectError(
"Called addSuffixMatch with invalid value!");
297 int beginOfBioExon,
int endOfLastCodon,
int endOfBioExon,
299 int blockno = blockNoOfB(ppos.b, comp);
300 int len = (endOfLastCodon - beginOfBioExon)/3;
301 Double score = len == 0 ? 1 :
302 prfl[blockno].scoreFromScratch(comp, endOfLastCodon-3*len+1, ppos.i-len, len).getRoot(len);
304 3*(prfl.blockSize(blockno) - ppos.i)-endOfBioExon + endOfLastCodon :
305 3*ppos.i - endOfLastCodon + beginOfBioExon;
306 currentResult.push_front(
Match(prfl[blockno].
id, blockno,
307 beginOfBioExon, endOfBioExon,
308 score, offset, comp));
311 void appendMatchesTo(list<Match>& target) {
312 target.splice(target.end(), currentResult);
316 void outputFullBlocks(
int offset=0) {
317 multimap< int, string > output;
318 for (
int b=0; b<prfl.blockCount(); b++)
319 for (
int f=0; f<6; f++) {
320 bool complement = f%2;
321 const map<int, Double>& hits = blockScores[complement][b].savedHits(f%3);
322 int blockno = blockNoOfB(b, complement);
323 for (map<int, Double>::const_iterator it = hits.begin();
324 it != hits.end(); ++it) {
326 strm << it->first + offset <<
"\t"
327 << it->first + offset + 3*prfl.blockSize(blockno) <<
"\t"
328 << it->second <<
"\t"
329 << (complement ?
'-' :
'+') <<
"\t0\t"
330 << prfl[blockno].
id <<
"\n";
331 output.insert(make_pair(it->first, strm.str()));
334 multimap<int, string>::iterator it;
335 for (it = output.begin(); it != output.end(); ++it)
336 cerr <<
"\tAUGUSTUS\tblock_hit\t" << it->second ;
338 void printGlobalThresh(
Double p) {
339 for (
int b=0; b<blockCount(); b++) {
342 cerr <<
"(thresh " << b <<
") (+):" << p * p1 <<
" (-):" << p * p2 <<
"\n";
344 cerr <<
"( backward strand states are:";
345 for (
int i=0; i<stateStrands.size(); i++)
350 static void setStateStrands(
const vector<StateType>& stateMap) {
351 stateStrands.resize(stateMap.size());
352 for (
int i=0; i<stateMap.size(); i++)
353 stateStrands[i] = !isOnFStrand(stateMap[i]);
358 static vector<bool> stateStrands;
359 static double fullIntronBonus;
360 static double minIntronFactor;
370 delete currentHSColl;
373 void newFirstCodon(
int beginOfBioExon) {
374 beginOfFirstCodon = beginOfBioExon + mod3(endOfLastCodon - beginOfBioExon +1);
375 aa_count = (endOfLastCodon - beginOfFirstCodon + 1)/3;
383 (this->*scoreFunPtr)(col, predState, transEmiProb);
385 virtual void postProcessing(
const Double& maxProb) = 0;
390 virtual bool hasNewMax() {
393 virtual void addMatches(
int,
int) {}
398 virtual Double getMaxProb() {
406 complement(!isOnFStrand(type)),
408 blockScores(model.blockScores[complement]),
409 beginOfFirstCodon(-1),
410 endOfLastCodon(isOnFStrand(type) ?
411 endOfBioExon - stateReadingFrames[type] :
412 endOfBioExon - (2 - stateReadingFrames[type])),
413 currentHSColl(&model.hitSeqColl[complement][mod3(endOfLastCodon+1)]),
416 is_first(isFirstExon(type)),
417 is_last(isLastExon(type)),
421 if (isFirstExon(type))
422 scoreFunPtr = &ExonScorer::scoreFirst;
424 scoreFunPtr = &ExonScorer::scoreInternal;
427 case terminal:
case singleG:
428 endStateOffset=-1;
break;
429 case initial1:
case initial2:
430 case internal1:
case internal2:
431 case rinternal0:
case rinternal1:
432 case rterminal1:
case rterminal0:
433 endStateOffset=1;
break;
434 case initial0:
case internal0:
435 case rinternal2:
case rinitial:
436 case rterminal2:
case rsingleG:
437 endStateOffset=0;
break;
439 throw ProjectError(
"Internal error: State type " + itoa(type) +
" unsuitable for exon scorer.");
445 int blockCount()
const {
446 return mdl.blockCount();
448 int blockSize(
int b)
const {
449 return mdl.blockSize(b,complement);
451 const Block& getBlock(
int b)
const {
452 return mdl.getBlock(b, complement);
454 bool exceedsBlock(
Position ppos)
const {
455 return ppos.i > blockSize(ppos.b);
458 return mdl.interBlockDist(b,complement);
462 return interBlockDist(b);
464 return interBlockDist(b) +
Range(blockSize(b-1));
466 Range interBlockRange(
int b)
const {
467 return interBlockDist(b).r;
469 Range interBlockFullRange(
int b)
const {
470 return interBlockFullDist(b).r;
473 ppos.i -= blockSize(ppos.b);
474 ppos.i -= interBlockRange(++ppos.b).max;
484 vector<BlockScoreType>& blockScores;
495 int aa_count, beginOfFirstCodon, endOfLastCodon;
502 bool is_first, is_last;
517 int endofBioExon,
int ORFleft);
518 void calcAllowedDist(
bool is_last) {
519 allowedDist.has_max = interBlockDist(rightPos.b).has_max || rightPos.i<0;
521 (rightPos.i > 0 || is_last) ?
522 interBlockRange(rightPos.b).alignRight(rightPos.i) : rightPos.i;
524 void createHitSequences(
int ORFleft);
532 virtual void postProcessing(
const Double& mainProb);
537 virtual bool hasNewMax() {
540 virtual void addMatches(
int beginOfBioExon,
int endOfBioExon) {
541 newFirstCodon(beginOfBioExon);
542 if (rightPos.i - endStateOffset > aa_count) {
543 mdl.addInternMatch(rightPos - endStateOffset, beginOfBioExon, endOfLastCodon, endOfBioExon, complement);
547 mdl.addPrefixMatch(rightPos - endStateOffset, endOfLastCodon, endOfBioExon, complement);
548 mdl.addHitSeqMatch(bestHitSeq, complement);
549 if (predSubstate.slot>=0) {
552 mdl.addSuffixMatch(ppos, beginOfFirstCodon, beginOfBioExon, complement);
560 Double getPrefixFactor()
const {
563 virtual Double getMaxProb() {
571 Double bestFittingBonusFactor(
int b,
int maxBlockStart,
const HitSequence*& argmax);