Augustus 3.4.0
Loading...
Searching...
No Matches
pp_profile.hh
1/*
2 * pp_profile.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 *
7 * Project : Gene Prediction with Protein Family Patterns
8 * Description: Data structures containing the ProteinProfile / IntronProfile
9 *
10 */
11
12#ifndef __PP_PROFILE_HH
13#define __PP_PROFILE_HH
14
15// project includes
16#include "geneticcode.hh" // needed to score codons
17#include "vitmatrix.hh" // for SubstateId's
18
19// standard C/C++ includes
20#include <deque>
21#include <cmath> // for sqrt
22#include <map>
23#include <vector>
24
25using namespace std;
26
27
28// constants used in profile classes
29// how many bits are available for i when coding PP::Position as int
30#define INTERBLOCKBITS 15
31#define MAXINTERBLOCKDIST ((1<<INTERBLOCKBITS)-1)
32// this might well be reduced in the future to enable more profile-related
33// substates
34#define MAX_BLOCKCOUNT 64
35
36
37// fraction by which the distance range is relaxed
38// PP::DistanceType::makeTolerant allows for a range of (1-x)min,(1+x)max
39#define RELAXATION 0.05
40
41// minimum size of a partial block that can be discarded
42#define MIN_CHECKCOUNT 3
43#define MIN_BLOCKSIZE 6
44
45// default values for quantiles (in units of stddev)
46// that are transformed into scoring thresholds
47#define MIN_SPEC 4.0
48#define MIN_SENS 0.4
49#define MIN_ANCHOR_SPEC 4.0
50#define MIN_ANCHOR_COUNT 1
51#define PARTIAL_SPEC 4.5
52#define PARTIAL_SENS 2.0
53#define GLOBAL_THRESH 2.5
54
55
56struct Dist {
57 double mu;
58 double var;
59 Dist(double m=0, double v=0) : mu(m), var(v) {}
60 Dist& add(double m=0, double v=0) {
61 mu+=m; var+=v; return *this;
62 }
63 Dist& operator+=(const Dist& other) {
64 return add(other.mu, other.var);
65 }
66 Dist& operator-=(const Dist& other) {
67 return add(-other.mu, -other.var);
68 }
69 Dist operator+(const Dist& other) const {
70 return Dist(*this) += other;
71 }
72 Dist operator-(const Dist& other) const {
73 return Dist(*this) -= other;
74 }
75
76 double stddev() const {
77 return sqrt(var>=0?var:-var);
78 }
79
80 double normed(double abs) const {
81 return (abs - mu)/stddev();
82 }
83 double abs(double normed) const {
84 return normed * stddev() + mu;
85 }
86
87};
88
89inline Dist operator-(const Dist& d) {
90 return Dist(-d.mu, -d.var);
91}
92
93/*
94 * A data structure representing the intron profile of a block profile
95 *
96 */
97struct ProfileMap : private map < pair<int, int>, double > {
98 void add(int n, int f, double p) {
99 insert(make_pair(make_pair(n,f), p));
100 }
101 double get(int n, int f) const {
102 const_iterator it = find(make_pair(n,f));
103 return (it == end()) ? 0 : it->second;
104 }
105}; // class ProfileMap
106
107namespace PP {
108
109 /*
110 * Exception class thrown from PP:: classes
111 */
113 ProfileNotFoundError(string filename) :
114 ProjectError("PP::Profile: Could not open file \"" + filename + "\".")
115 {}
116 };
118 ProfileReadError(string filename, int lineno) :
119 ProjectError("PP::Profile: Error parsing pattern file\"" + filename +
120 "\", line " + itoa(lineno) + ".")
121 {}
122 };
123
125 int lineno;
126 ProfileParseError(int n) : lineno(n) {}
127 };
128
130 int offset;
131 PartParseError(int n) : offset(n) {}
132 };
133
135 ProfileInsigError(string message) :
136 ProjectError("PP::Profile: " + message) {}
137 };
138
144 private:
145 std::map < pair < unsigned, unsigned >, unsigned > mapIntraBlock;
146 std::map < unsigned, unsigned > mapInterBlock;
147 int numSeq = 0;
148 public:
149 IntronProfile () {}
155 IntronProfile (const vector<string>& lines);
162 int getIntraFreq (unsigned c, unsigned f) const {
163 map < pair < unsigned, unsigned >, unsigned >::const_iterator it = \
164 mapIntraBlock.find(make_pair(c,f));
165 return (it == mapIntraBlock.end()) ? 0 : (it->second);
166 }
172 int getInterFreq (unsigned n) const {
173 if (mapInterBlock.empty()) {
174 return -1;
175 }
176 map < unsigned, unsigned >::const_iterator it = mapInterBlock.find(n);
177 return (it == mapInterBlock.end()) ? 0 : (it->second);
178 }
182 int get_numSeq () const {
183 return numSeq;
184 }
185 };
186
187 /*
188 * A data structure representing an integer interval
189 */
190 struct Range {
191 Range(int x=0) : min(x), max(x) {}
192 Range(int l, int h) : min(l), max(h) {
193 if (min>max) { min = h; max = l; }
194 }
195 Range operator+= (Range other) {
196 min+=other.min;
197 max+=other.max;
198 return *this;
199 }
200 Range operator+ (Range other) const {
201 return Range(min,max)+=other;
202 }
203 bool has(int elem) const {
204 return min <= elem && elem <= max;
205 }
206 int size() const { // <= 0 is equivalent to
207 return max-min+1;
208 }
209
210 // move interval such that max equals newmax
211 Range alignRight(int newmax) const {
212 return Range(newmax-max+min, newmax);
213 }
214 // move interval such that min equals newmin
215 Range alignLeft(int newmin) const {
216 return Range(newmin, newmin+max-min);
217 }
218
219 // data fields
220 int min, max;
221 }; // class PP::Range
222
223 inline Range operator- (Range r) {
224 return Range(-r.max, -r.min);
225 }
226
228 bool has_max;
229 Range r;
230 DistanceType() : has_max(true), r() {}
231 DistanceType(Range other) : has_max(true), r(other) {}
232 friend ostream& operator<< (ostream& strm, DistanceType d) {
233 if (d.has_max)
234 return strm << d.r.min << "\t" << d.r.max;
235 else
236 return strm << d.r.min << "\t*";
237 }
238 friend istream& operator>> (istream& strm, DistanceType& d) {
239 d.has_max = true;
240 strm >> d.r.min >> ws;
241 if (strm.peek() == '*') {
242 strm.get();
243 d.has_max = false;
244 d.r.max = d.r.min;
245 return strm;
246 }
247 return strm >> d.r.max;
248 }
249 DistanceType& operator+= (DistanceType other) {
250 r += other.r;
251 has_max = has_max && other.has_max;
252 return *this;
253 }
254 DistanceType operator+ (DistanceType other) const {
255 return DistanceType(*this) += other;
256 }
257 void setInfMax () {
258 r.max = r.min;
259 has_max = false;
260 }
261 void makeTolerant() {
262 r.min = int(r.min * (1-RELAXATION) + 0.5);
263 if (has_max) {
264 r.max = int(r.max * (1+RELAXATION) + 0.5);
265 if (r.max >= MAXINTERBLOCKDIST)
266 setInfMax();
267 } else
268 r.max = r.min;
269 }
270 bool has(int elem) const {
271 return has_max ? r.has(elem) : r.min <= elem;
272 }
273 };
274
275 /*
276 * A data structure representing a distribution of amino acids
277 */
278 class Column {
279 public:
280 Column() { values[0]=1.0; }
281 Column(const double* val) {
282 *this = val;
283 }
284 Column& operator= (const double*);
285
286 // initialize odds-ratios factors
287 void initRatios();
288
289 // return expectation and variance of odds-ratios
290 Dist getDist(const Column& model) const;
291 Dist getOwnDist() const {
292 return getDist(*this);
293 }
294 Dist getBackDist() const {
295 return getDist(background);
296 }
297
298 // accessing the Column entries
299 // by value
300 double operator[](int n) const {
301 return (0 <= n && n<NUM_AA) ? values[n] : -1.0;
302 }
303 // by oddRatio
304 Double Q(int n) const {
305 return (0 <= n && n<NUM_AA) ? oddRatios[n] : stopCodonScore;
306 }
307 Double Q(char c) const {
308 return Q(GeneticCode::get_aa_from_symbol(c));
309 }
310 // by LogOddRatio
311 double L(int n) const {
312 return Q(n).log();
313 }
314 double max_val() const {
315 return *std::max_element(values, values + NUM_AA);
316 };
317 //returns character of the amino acid with the highest value
318 char argmax() const {
319 return std::string(GeneticCode::aa_symbols)[std::max_element(values, values + NUM_AA) - values];
320 }
321 // stream operations
322 friend ostream& operator<<(ostream&, const Column&);
323 friend istream& operator>>(istream&, Column& c);
324
325 static const Double stopCodonScore;
326 static double invalidScore;
327 static double weight;
328 private:
329 // internal data fields
330 double values[NUM_AA]; // these numbers will be positive and
331 // add up to one; class takes care of the normalisation
332 double oddRatios[NUM_AA]; // the odds against the background distribution
333
334 static const Column background;
335 static const double minFreq; // a frequency of 0.0001 is always ensured by the class;
336 // but we don't deal here with pseudocounts
337 }; // class PP::Column
338
339 ostream& operator<<(ostream&, const Column&);
340 istream& operator>>(istream&, Column& c);
341 /* Without above two lines we get a compiler warning since C++14.
342 * Reason found on http://en.cppreference.com/w/cpp/language/namespace:
343 * "Names introduced by friend declarations [...] do not become visible to lookup [...] unless a
344 * matching declaration is provided at namespace scope, either before or after the class definition."
345 */
346 struct Position {
347 Position() : b(-1), i(0) {}
348
349 Position(int blockno, int columnNr) :
350 b(blockno), i(columnNr) {}
351 Position(const SubstateId& id) :
352 b(id.slot % MAX_BLOCKCOUNT),
353 i(id.value) {}
354 Position operator+ (int l) {
355 return Position(b, i + l);
356 }
357 Position operator- (int l) {
358 return operator+(-l);
359 }
360 Position operator++(int) {
361 return Position(b,i++);
362 }
363 bool operator< (Position other) const {
364 return (b < other.b) || (b == other.b && i < other.i);
365 }
366 bool operator== (Position other) const {
367 return i==other.i && b==other.b;
368 }
369 bool operator!= (Position other) const {
370 return !(other == (*this));
371 }
372 bool operator<= (Position other) const {
373 return !(other < (*this));
374 }
375 void nextB() { b++; i=0; }
376
377 // static int getB(int id) {
378 // return id >> (INTERBLOCKBITS+1);
379 // }
380 // static int getI(int id) {
381 // static const int mask = (-1) << (INTERBLOCKBITS+1);
382 // static const int offset = 1 << INTERBLOCKBITS;
383 // return (id | mask) + offset;
384 // }
385 // static Position fromId(int id) {
386 // if (id < 0) return Position();
387 // return Position(getB(id), getI(id));
388 // }
389
390 Position operator= (const SubstateId& id) {
391 b = id.slot % MAX_BLOCKCOUNT;
392 i = id.value;
393 return *this;
394 }
395 SubstateId id() const {
396 return b<0 ? SubstateId() : SubstateId(b,i);
397 }
398 SubstateId copyId() const {
399 return b<0 ? SubstateId() : SubstateId(b + MAX_BLOCKCOUNT, i);
400 }
401
402 short b, i;
403 }; // class PP::Position
404
405 class BlockScoreType;
406
408 int from, to;
409 double score;
410 };
411
412 class Block {
413// friend class BlockScoreType;
414 public:
415 Block(DistanceType, const vector<string>&, string);
416 ~Block() {
417 delete iP;
418 }
419 // score(x,b,l) is the emission probability factor for the sequence starting
420 // at dna position x, from the partial block of length l (codons), starting at
421 // offset b
422 // BlockScoreType score;
423 void initDistributions();
424 bool initThresholds();
425 void setIntronProfile(const vector<string>&);
426 int size() const { return columns.size(); }
427 const Column& operator[] (int columnNr) const { return columns[columnNr]; }
428 // bool addBlocksUntil(int newbase, map<int,Double>* result=NULL);
429 // void removeBlocksUntil(int newbase) {
430 // newbase -= 3 * size();
431 // while (score.begin() < newbase)
432 // score.pop_front();
433 // }
434// Double saveNewScore();
435 // Double savedScore(int offset) {
436 // map<int,Double>::iterator it = hits[offset%3].find(offset);
437 // return it == hits[offset%3].end() ? 0 : it->second;
438 // }
439 Double scoreFromScratch(bool complement, int dna_offset, int block_offset, int len) const;
440 Double scoreFromScratch(bool complement, int dna_offset, int block_offset) const {
441 return scoreFromScratch(complement, dna_offset, block_offset, size()-block_offset);
442 }
443 Double scoreFromScratch(bool complement, int dna_offset) const {
444 return scoreFromScratch(complement, dna_offset, 0);
445 }
446 Double checkedSuffixScore(bool complement, int dna_offset, int block_offset) const;
447 void bestPartialLogScore(bool complement, int dna_offset, PartScoreType&) const;
448
449 // Double checkedPrefixScore(bool complement, int dna_offset, int len) const {
450 // Double result = scoreFromScratch(complement, dna_offset, 0,len);
451 // return result > getPrefixThresh(complement, len) ? result : 0;
452 // }
453
454 // const map<int,Double>& savedHits(int f) const {
455 // return hits[f];
456 // }
457#ifdef DEBUG
458 void show_score(int n);
459#endif
460
461 Block reverse(DistanceType dist) {
462 Block result;
463 result.distance = dist;
464 for (int i=size()-1; i>=0; i--)
465 result.columns.push_back(columns[i]);
466 return result;
467 }
468
469 friend void initConstants();
470
471 Double getPrefixThresh(bool complement, int i) const;
472 Double getSuffixThresh(bool complement, int i) const;
473 Double getPartialThresh(bool complement, int from, int to) const {
474 return complement ?
475 thresholdMatrix[size()-from][size()-to] :
476 thresholdMatrix[to][from];
477 }
478 Double getPartialThresh(bool complement, int from) const {
479 return getPartialThresh(complement, from, size());
480 }
481
482 Double getThreshold() const {
483 return threshold;
484 }
485
486 bool hasIP() const {
487 return iP != 0;
488 }
489 double getIntronFreq(int i, int frame) const {
490 return iP->get(i,frame);
491 }
492 bool isAnchor() {
493 return threshold > getSpecThresh(min_anchor_spec); //getBackDist().abs(min_anchor_spec);
494 }
495 // Dist getBlockscoreBack() {
496 // return backDists[0];
497 // }
498 // the following methods use logscores
499 Dist getOwnDist(int from, int to, bool complement = false) const {
500 return complement ?
501 ownDists[size()-to] - ownDists[size()-from] :
502 ownDists[from] - ownDists[to];
503 }
504 Dist getOwnDist(int from=0) const {
505 return ownDists[from];
506 }
507 Dist getBackDist(int from, int to, bool complement = false ) const {
508 return complement ?
509 backDists[size()-to] - ownDists[size()-from]:
510 backDists[from] - backDists[to];
511 }
512 Dist getBackDist(int from=0) const {
513 return backDists[from];
514 }
515 double ownNormalize(double logscore, int from, int to, bool complement = false) const {
516 return getOwnDist(complement, from, to).normed(logscore);
517 }
518 double backNormalize(double logscore, int from, int to, bool complement = false) const {
519 return getBackDist(complement, from, to).normed(logscore);
520 }
521 double getSensThresh(double stddev, int from, int to) const {
522 return getOwnDist(from, to).abs(-stddev);
523 }
524 double getSensThresh(double stddev, int from=0) const {
525 return getOwnDist(from).abs(-stddev);
526 }
527 double getSpecThresh(double stddev, int from, int to) const {
528 return getBackDist(from, to).abs(stddev);
529 }
530 double getSpecThresh(double stddev, int from=0) const {
531 return getBackDist(from).abs(stddev);
532 }
533 const vector<Dist>& getAllOwnDists() const {
534 return ownDists;
535 }
536
537 string id;
538 int blockNumbInFile;
539 DistanceType distance; // distance range to previous block
540
541 static const Double almostZero;
542 static const double invalidScore;
543
544 private:
545 Block() : iP(0) {}
546
547 vector<Column> columns;
548 ProfileMap* iP;
549// map<int,Double> hits[3]; // will be updated by addBlocksUntil
550
551#ifdef DEBUG
552 vector<Double> prefixThresh, suffixThresh;
553#endif
554 vector< vector<Double> > thresholdMatrix;
555 vector<Dist> backDists, ownDists;
556 // Dist blockscoreBack;
557 Double threshold; // threshold for full block scores to be inserted in the hitlist
558 static double min_spec;
559 static double min_sens;
560 static double min_anchor_spec;
561 static double partial_spec;
562 static double partial_sens;
563 }; // class PP::Block
564
565
567 public:
568 typedef vector<Double> aligned_type; // partial (prefix) scores for the block at one dna position
569 // back() is the full block score
570
571 typedef deque<aligned_type> array_type; // the aligned_types for all dna positions
572
573 BlockScoreType(const Block& block) :
574 offset(0), partner(&block) {}
575 bool addBlocksUntil(bool complement, int newbase, map<int,Double>* result=NULL);
576 void removeBlocksUntil(int newbase) {
577 newbase -= 3 * size();
578 while (begin() < newbase)
579 pop_front();
580 }
581 Double savedScore(int offset) {
582 map<int,Double>::iterator it = hits[offset%3].find(offset);
583 return it == hits[offset%3].end() ? 0 : it->second;
584 }
585 const map<int,Double>& savedHits(int f) const {
586 return hits[f];
587 }
588 void init(int newoffset = 0) {
589 offset = newoffset;
590 values.clear();
591 for (int f=0; f<3; f++) hits[f].clear();
592 }
593 void pop_front() { // removes the first block from the array and returns its score
594 values.pop_front();
595 ++offset;
596 }
597 Double front() {
598 return values.front().back();
599 }
600// void push_back(const aligned_type& new_vals) {
601// values.push_back(new_vals);
602// }
603// void push_front(const aligned_type& new_vals) {
604// values.push_front(new_vals);
605// offset--;
606// }
607 const aligned_type& getAlignedScores(int x) const {
608 return values.at(x-offset);
609 }
610 Double get(int dna_offset, int block_offset, int len) const {
611 if (len==0)
612 return 1;
613 if (block_offset == 0)
614 return getAlignedScores(dna_offset).at(len-1);
615 const aligned_type& alignedScores = getAlignedScores(dna_offset - 3*block_offset);
616 return alignedScores.at(len+block_offset-1)/alignedScores.at(block_offset-1);
617 }
618 Double get(int dna_offset, int block_offset) const {
619 if (block_offset == 0)
620 return get(dna_offset);
621 const aligned_type& alignedScores = getAlignedScores(dna_offset - 3*block_offset);
622 return alignedScores.back()/alignedScores.at(block_offset-1);
623 }
624 Double get(int dna_offset) const {
625 return getAlignedScores(dna_offset).back();
626 }
627 const int& begin() const {
628 return offset;
629 }
630 int count() const {
631 return values.size();
632 }
633 int end() const {
634 return offset + count();
635 }
636 int size() const {
637 return partner->size();
638 }
639 // Double threshold() const {
640 // return partner->threshold;
641 // }
642 Double prefixThresh(bool complement, int i) const {
643 return partner->getPrefixThresh(complement, i);
644 }
645 Double suffixThresh(bool complement, int i) const {
646 return partner->getSuffixThresh(complement, i);
647 }
648 private:
649 aligned_type& new_back() {
650 values.push_back(aligned_type());
651 return values.back();
652 }
653
654
655 // internal data fields
656 array_type values;
657 int offset; // dna pos of first base of first evaluated block
658 const Block* partner;
659 map<int,Double> hits[3]; // will be updated by addBlocksUntil
660 }; // class PP::BlockScoreType
661
662
663
664
665 class Profile {
666 static Column background;
667 static double lowest_odds, exp_lowest_odds;
668
669 public:
670 // methods of class ProteinProfile
671 // initialize pattern from file <filename>
672 Profile(string filename);
673
681 int getIntronIntraFreq (unsigned b, unsigned c, unsigned f) {
682 if (!has_iP(b))
683 return 0;
684 return iP[b].getIntraFreq(c, f);
685 }
686
692 int getIntronIntraFreqAtCol (unsigned b, unsigned c) {
693 if (!has_iP(b))
694 return 0;
695 int freq = 0;
696 for (int frame = 0; frame < 3; frame++) {
697 freq += iP[b].getIntraFreq(c, frame);
698 }
699 return freq;
700 }
701
708 int getIntronInterFreq (unsigned b, unsigned n) {
709 if (!has_iP(b))
710 return -1;
711 return iP[b].getInterFreq(n);
712 }
713
717 unsigned getIntronNumSeq () {
718 for (unsigned i = 0; i < iP.size(); i++) {
719 if (iP[i].get_numSeq() > 0) {
720 return iP[i].get_numSeq();
721 }
722 }
723 return 0;
724
725 }
726
727 //int getIntronTotalFreq () {
728 // return iP[block].getTotalFreq();
729 //}
730
731
732// static bool idInBlock(int id) {
733// return ((id >> INTERBLOCKBITS)) % 2 > 0;
734// }
735
736// Position getPosOfId(int id) const {
737// if (id < 0) return Position();
738// Position result(getB(id), getI(id));
739// #ifdef DEBUG
740// if (result.b > blockCount()) {
741// cout << "getB() > #blocks\n";
742// } else if (result.i > blockSize(result.b)) {
743// cout << "getI() > blocksize\n";
744// }
745// #endif
746// if (result.i < 0) result.i <<= skipBits[result.b];
747// return result;
748// }
749// Position getPosOfCopyId(int id, bool comp=false) const {
750// return getPosOfId(id + copyOffset);
751// }
752 Position firstInRange(int blockno=0) const {
753 return Position(blockno, -interBlockRange(blockno).max);
754 }
755
756 int blockCount() const {
757 return blocks.size();
758 }
759 int blockSize(int blockno) const {
760#ifdef DEBUG
761 if (blockno < 0 || blockno >= blocks.size())
762 throw ProjectError("prfl.blockSize() called with index out of range");
763#endif
764 return blocks[blockno].size();
765 }
766 DistanceType interBlockDist(int blockno) const {
767 return blockno == blockCount() ? finalDist : blocks[blockno].distance;
768 }
769 Range interBlockRange(int blockno) const {
770 return interBlockDist(blockno).r;
771 }
772 // bool has_max(int blockno) const {
773 // return interBlockDist(blockno).has_max;
774 // }
775 // DistanceType interBlockFullDist(int blockno) const {
776 // if (blockno == 0)
777 // return blocks[0].distance;
778 // DistanceType result = interBlockDist(blockno);
779 // result.r += blocks[blockno-1].size();
780 // return result;
781 // }
782// int interBlockMaxDist(int blockno) const {
783// return interBlockFullRange(blockno).max;
784// }
785// bool isInRange(int blockno, int len) const {
786// return interBlockRange(blockno).has(len);
787// }
788 Column getColumn(Position pos) const {
789 return blocks[pos.b][pos.i];
790 }
791 string getName() const {
792 return name;
793 }
794 string getInfoLine() const {
795 ostringstream result;
796 int blockno=0;
797 while (true) {
798 result << "--";
799 DistanceType d = interBlockDist(blockno);
800 if (d.has_max)
801 result << "[" << d.r.min << ".." << d.r.max << "]";
802 else if (d.r.min>0)
803 result << "[" << d.r.min << ",...]";
804 result << "--";
805 if (blockno == blockCount())
806 return result.str();
807 result << "> " << blocks[blockno].id << " (" << blockSize(blockno) << ") <";
808 blockno++;
809 }
810 }
811 ostream& write(ostream&) const;
812
813 friend ostream& operator<< (ostream& strm, const Profile& P) {
814 return P.write(strm);
815 }
816
817 // Position finalPos() const {
818 // return Position(blockCount(),0);
819 // }
820 // SubstateId getTerminalId() const {
821 // return Position(blockCount(),0).id();
822 // }
823
824 Block& operator[] (int blockno) {
825 return blocks[blockno];
826 }
827 const Block& operator[] (int blockno) const {
828 return blocks[blockno];
829 }
830 Double getGlobalThresh(bool complement, Position ppos) const {
831 return ppos.b == blockCount() ? 1 :
832 globalThresh[complement][ppos.b][ppos.i >= 0 ? ppos.i : 0];
833 }
834
835 friend void initConstants();
836
837#ifdef DEBUG
838 static bool vv_equal(const vector<Double>& v1, const vector<Double>& v2) {
839 return equal(v1.begin(), v1.end(), v2.begin(), almost_equal);
840 }
841#endif
842
843#ifdef DEBUG
844 Profile reverse() {
845 Profile result;
846 vector< vector<Dist> > ownDists;
847 result.name = name + "(rev.)";
848 for (int b=blockCount(); b>0; b--) {
849 result.blocks.push_back(blocks[b-1].reverse(interBlockDist(b)));
850 result.blocks.back().initThresholds();
851 ownDists.push_back(result.blocks.back().getAllOwnDists());
852 }
853 result.calcGlobalThresh(ownDists);
854 if (!equal(result.globalThresh[0].begin(),
855 result.globalThresh[0].end(),
856 globalThresh[1].begin(),
857 vv_equal) ||
858 !equal(result.globalThresh[1].begin(),
859 result.globalThresh[1].end(),
860 globalThresh[0].begin(),
861 vv_equal) )
862 throw ProjectError("globalThreshs not equal!");
863 for (int b=0; b<blockCount(); b++)
864 for (int i=1; i<result.blockSize(b); i++)
865 if (!almost_equal(result[b].getPrefixThresh(false, i),
866 blocks[blockCount()-b-1].getPrefixThresh(true,i)) ||
867 !almost_equal(result[b].getPrefixThresh(true, i),
868 blocks[blockCount()-b-1].getPrefixThresh(false,i)))
869 throw ProjectError("prefixThreshs not equal!");
870 else if (!almost_equal(result[b].getSuffixThresh(false, i),
871 blocks[blockCount()-b-1].getSuffixThresh(true,i)) ||
872 !almost_equal(result[b].getSuffixThresh(true, i),
873 blocks[blockCount()-b-1].getSuffixThresh(false,i)))
874 throw ProjectError("suffixThreshs not equal!");
875 result.finalDist = interBlockDist(0);
876 return result;
877 }
878#endif
879
880 private:
881 Profile() {}
882 void calcGlobalThresh(const vector< vector<Dist> >&);
883 void parse_stream(istream&);
884
885 // data fields
886 string name;
887 vector<Block> blocks; // the blocks
888 vector< vector<Double> > globalThresh[2];
889 DistanceType finalDist; // final non-block range
890 //ProfileMap iP; // not implemented
891
892 //data structure that holds all intron profiles
893 //Author Lars Gabriel
894 std::map<int, IntronProfile> iP;
895
896 bool has_iP(int b) {
897 map < int, IntronProfile >::const_iterator it = iP.find(b);
898 return !(it == iP.end());
899 };
900 static int min_anchor_count;
901 static double global_thresh;
902 static double absolute_malus_threshold;
903
904// // moved to SubstateModel
905// void addSkipBits(Range r) {
906// int n=0;
907// while (r.max > (1<<INTERBLOCKBITS)) {
908// r.max >>= 1;
909// n++;
910// }
911// skipBits[0].push_back(n);
912// }
913// int copyOffset;
914
915 }; // class PP::Profile
916
917 struct DNA {
918 static const char* sequence;
919 static int len;
920 static void initSeq(const char* dna, int seqlen) {
921 sequence = dna;
922 len = seqlen;
923 }
924 static void initSeq(string s) {
925 // Make a copy of the actual character array, as the
926 // string can be destroyed when the sequence is still needed.
927 len = s.length();
928 char *dna = new char[len + 1];
929 std::strcpy (dna, s.c_str());
930 sequence = dna;
931 }
932 };
933
934 void initConstants();
935} // namespace PP
936
937#ifdef DEBUG
938namespace std {
939inline ostream& operator<< (ostream& strm, PP::Position ppos) {
940 return strm << ppos.b << "/" << ppos.i;
941}
942}
943#endif
944
945inline Double PP::Block::getPrefixThresh(bool complement, int i) const {
946#ifdef DEBUG
947 if (i<=0 || i>=size())
948 throw ProjectError("Out of range!");
949 Double result = complement ?
950 suffixThresh[size() - i] :
951 prefixThresh[i];
952 if (!almost_equal(result,getPartialThresh(complement, 0, i)))
953 throw ProjectError("partialThresh bug");
954#endif
955 return getPartialThresh(complement, 0, i);
956}
957
958inline Double PP::Block::getSuffixThresh(bool complement, int i) const {
959#ifdef DEBUG
960 if (i<=0 || i>=size())
961 throw ProjectError("Out of range!");
962 Double result=complement ?
963 prefixThresh[size() - i] :
964 suffixThresh[i];
965 if (!almost_equal(result,getPartialThresh(complement, i)))
966 throw ProjectError("partialThresh bug");
967#endif
968 return getPartialThresh(complement, i);
969}
970#endif
This class implements a double object with a very large range.
Definition lldouble.hh:31
Definition pp_profile.hh:566
Definition pp_profile.hh:412
Definition pp_profile.hh:278
intron profile for a block and the inter-block section before the block
Definition pp_profile.hh:143
int getIntraFreq(unsigned c, unsigned f) const
Definition pp_profile.hh:162
int getInterFreq(unsigned n) const
Definition pp_profile.hh:172
int get_numSeq() const
Definition pp_profile.hh:182
Definition pp_profile.hh:665
int getIntronIntraFreq(unsigned b, unsigned c, unsigned f)
Definition pp_profile.hh:681
int getIntronIntraFreqAtCol(unsigned b, unsigned c)
Definition pp_profile.hh:692
int getIntronInterFreq(unsigned b, unsigned n)
Definition pp_profile.hh:708
unsigned getIntronNumSeq()
Definition pp_profile.hh:717
Definition types.hh:449
ProjectError()
Definition types.hh:460
Definition pp_profile.hh:56
Definition pp_profile.hh:917
Definition pp_profile.hh:227
Definition pp_profile.hh:129
Definition pp_profile.hh:407
Definition pp_profile.hh:346
Definition pp_profile.hh:134
Definition pp_profile.hh:112
Definition pp_profile.hh:124
Definition pp_profile.hh:117
Definition pp_profile.hh:190
Definition pp_profile.hh:97
Definition vitmatrix.hh:33