12#ifndef __PP_PROFILE_HH
13#define __PP_PROFILE_HH
16#include "geneticcode.hh"
17#include "vitmatrix.hh"
30#define INTERBLOCKBITS 15
31#define MAXINTERBLOCKDIST ((1<<INTERBLOCKBITS)-1)
34#define MAX_BLOCKCOUNT 64
39#define RELAXATION 0.05
42#define MIN_CHECKCOUNT 3
43#define MIN_BLOCKSIZE 6
49#define MIN_ANCHOR_SPEC 4.0
50#define MIN_ANCHOR_COUNT 1
51#define PARTIAL_SPEC 4.5
52#define PARTIAL_SENS 2.0
53#define GLOBAL_THRESH 2.5
59 Dist(
double m=0,
double v=0) : mu(m), var(v) {}
60 Dist& add(
double m=0,
double v=0) {
61 mu+=m; var+=v;
return *
this;
63 Dist& operator+=(
const Dist& other) {
64 return add(other.mu, other.var);
66 Dist& operator-=(
const Dist& other) {
67 return add(-other.mu, -other.var);
69 Dist operator+(
const Dist& other)
const {
70 return Dist(*
this) += other;
72 Dist operator-(
const Dist& other)
const {
73 return Dist(*
this) -= other;
76 double stddev()
const {
77 return sqrt(var>=0?var:-var);
80 double normed(
double abs)
const {
81 return (abs - mu)/stddev();
83 double abs(
double normed)
const {
84 return normed * stddev() + mu;
89inline Dist operator-(
const Dist& d) {
90 return Dist(-d.mu, -d.var);
97struct ProfileMap :
private map < pair<int, int>, double > {
98 void add(
int n,
int f,
double p) {
99 insert(make_pair(make_pair(n,f), p));
101 double get(
int n,
int f)
const {
102 const_iterator it = find(make_pair(n,f));
103 return (it == end()) ? 0 : it->second;
114 ProjectError(
"PP::Profile: Could not open file \"" + filename +
"\".")
119 ProjectError(
"PP::Profile: Error parsing pattern file\"" + filename +
120 "\", line " + itoa(lineno) +
".")
145 std::map < pair < unsigned, unsigned >,
unsigned > mapIntraBlock;
146 std::map < unsigned, unsigned > mapInterBlock;
163 map < pair < unsigned, unsigned >,
unsigned >::const_iterator it = \
164 mapIntraBlock.find(make_pair(c,f));
165 return (it == mapIntraBlock.end()) ? 0 : (it->second);
173 if (mapInterBlock.empty()) {
176 map < unsigned, unsigned >::const_iterator it = mapInterBlock.find(n);
177 return (it == mapInterBlock.end()) ? 0 : (it->second);
191 Range(
int x=0) : min(x), max(x) {}
192 Range(
int l,
int h) : min(l), max(h) {
193 if (min>max) { min = h; max = l; }
201 return Range(min,max)+=other;
203 bool has(
int elem)
const {
204 return min <= elem && elem <= max;
211 Range alignRight(
int newmax)
const {
212 return Range(newmax-max+min, newmax);
215 Range alignLeft(
int newmin)
const {
216 return Range(newmin, newmin+max-min);
224 return Range(-r.max, -r.min);
232 friend ostream& operator<< (ostream& strm,
DistanceType d) {
234 return strm << d.r.min <<
"\t" << d.r.max;
236 return strm << d.r.min <<
"\t*";
238 friend istream& operator>> (istream& strm,
DistanceType& d) {
240 strm >> d.r.min >> ws;
241 if (strm.peek() ==
'*') {
247 return strm >> d.r.max;
251 has_max = has_max && other.has_max;
261 void makeTolerant() {
262 r.min = int(r.min * (1-RELAXATION) + 0.5);
264 r.max = int(r.max * (1+RELAXATION) + 0.5);
265 if (r.max >= MAXINTERBLOCKDIST)
270 bool has(
int elem)
const {
271 return has_max ? r.has(elem) : r.min <= elem;
280 Column() { values[0]=1.0; }
281 Column(
const double* val) {
284 Column& operator= (
const double*);
291 Dist getOwnDist()
const {
292 return getDist(*
this);
294 Dist getBackDist()
const {
295 return getDist(background);
300 double operator[](
int n)
const {
301 return (0 <= n && n<NUM_AA) ? values[n] : -1.0;
305 return (0 <= n && n<NUM_AA) ? oddRatios[n] : stopCodonScore;
308 return Q(GeneticCode::get_aa_from_symbol(c));
311 double L(
int n)
const {
314 double max_val()
const {
315 return *std::max_element(values, values + NUM_AA);
318 char argmax()
const {
319 return std::string(GeneticCode::aa_symbols)[std::max_element(values, values + NUM_AA) - values];
322 friend ostream& operator<<(ostream&,
const Column&);
323 friend istream& operator>>(istream&,
Column& c);
325 static const Double stopCodonScore;
326 static double invalidScore;
327 static double weight;
330 double values[NUM_AA];
332 double oddRatios[NUM_AA];
334 static const Column background;
335 static const double minFreq;
339 ostream& operator<<(ostream&,
const Column&);
340 istream& operator>>(istream&,
Column& c);
349 Position(
int blockno,
int columnNr) :
350 b(blockno), i(columnNr) {}
352 b(
id.slot % MAX_BLOCKCOUNT),
358 return operator+(-l);
363 bool operator< (
Position other)
const {
364 return (b < other.b) || (b == other.b && i < other.i);
366 bool operator== (
Position other)
const {
367 return i==other.i && b==other.b;
369 bool operator!= (
Position other)
const {
370 return !(other == (*this));
372 bool operator<= (
Position other)
const {
373 return !(other < (*this));
375 void nextB() { b++; i=0; }
391 b =
id.slot % MAX_BLOCKCOUNT;
423 void initDistributions();
424 bool initThresholds();
425 void setIntronProfile(
const vector<string>&);
426 int size()
const {
return columns.size(); }
427 const Column& operator[] (
int columnNr)
const {
return columns[columnNr]; }
439 Double scoreFromScratch(
bool complement,
int dna_offset,
int block_offset,
int len)
const;
440 Double scoreFromScratch(
bool complement,
int dna_offset,
int block_offset)
const {
441 return scoreFromScratch(complement, dna_offset, block_offset, size()-block_offset);
443 Double scoreFromScratch(
bool complement,
int dna_offset)
const {
444 return scoreFromScratch(complement, dna_offset, 0);
446 Double checkedSuffixScore(
bool complement,
int dna_offset,
int block_offset)
const;
447 void bestPartialLogScore(
bool complement,
int dna_offset,
PartScoreType&)
const;
458 void show_score(
int n);
463 result.distance = dist;
464 for (
int i=size()-1; i>=0; i--)
465 result.columns.push_back(columns[i]);
469 friend void initConstants();
471 Double getPrefixThresh(
bool complement,
int i)
const;
472 Double getSuffixThresh(
bool complement,
int i)
const;
473 Double getPartialThresh(
bool complement,
int from,
int to)
const {
475 thresholdMatrix[size()-from][size()-to] :
476 thresholdMatrix[to][from];
478 Double getPartialThresh(
bool complement,
int from)
const {
479 return getPartialThresh(complement, from, size());
482 Double getThreshold()
const {
489 double getIntronFreq(
int i,
int frame)
const {
490 return iP->get(i,frame);
493 return threshold > getSpecThresh(min_anchor_spec);
499 Dist getOwnDist(
int from,
int to,
bool complement =
false)
const {
501 ownDists[size()-to] - ownDists[size()-from] :
502 ownDists[from] - ownDists[to];
504 Dist getOwnDist(
int from=0)
const {
505 return ownDists[from];
507 Dist getBackDist(
int from,
int to,
bool complement =
false )
const {
509 backDists[size()-to] - ownDists[size()-from]:
510 backDists[from] - backDists[to];
512 Dist getBackDist(
int from=0)
const {
513 return backDists[from];
515 double ownNormalize(
double logscore,
int from,
int to,
bool complement =
false)
const {
516 return getOwnDist(complement, from, to).normed(logscore);
518 double backNormalize(
double logscore,
int from,
int to,
bool complement =
false)
const {
519 return getBackDist(complement, from, to).normed(logscore);
521 double getSensThresh(
double stddev,
int from,
int to)
const {
522 return getOwnDist(from, to).abs(-stddev);
524 double getSensThresh(
double stddev,
int from=0)
const {
525 return getOwnDist(from).abs(-stddev);
527 double getSpecThresh(
double stddev,
int from,
int to)
const {
528 return getBackDist(from, to).abs(stddev);
530 double getSpecThresh(
double stddev,
int from=0)
const {
531 return getBackDist(from).abs(stddev);
533 const vector<Dist>& getAllOwnDists()
const {
541 static const Double almostZero;
542 static const double invalidScore;
547 vector<Column> columns;
552 vector<Double> prefixThresh, suffixThresh;
554 vector< vector<Double> > thresholdMatrix;
555 vector<Dist> backDists, ownDists;
558 static double min_spec;
559 static double min_sens;
560 static double min_anchor_spec;
561 static double partial_spec;
562 static double partial_sens;
568 typedef vector<Double> aligned_type;
571 typedef deque<aligned_type> array_type;
574 offset(0), partner(&block) {}
575 bool addBlocksUntil(
bool complement,
int newbase, map<int,Double>* result=NULL);
576 void removeBlocksUntil(
int newbase) {
577 newbase -= 3 * size();
578 while (begin() < newbase)
581 Double savedScore(
int offset) {
582 map<int,Double>::iterator it = hits[offset%3].find(offset);
583 return it == hits[offset%3].end() ? 0 : it->second;
585 const map<int,Double>& savedHits(
int f)
const {
588 void init(
int newoffset = 0) {
591 for (
int f=0; f<3; f++) hits[f].clear();
598 return values.front().back();
607 const aligned_type& getAlignedScores(
int x)
const {
608 return values.at(x-offset);
610 Double get(
int dna_offset,
int block_offset,
int len)
const {
613 if (block_offset == 0)
614 return getAlignedScores(dna_offset).at(len-1);
615 const aligned_type& alignedScores = getAlignedScores(dna_offset - 3*block_offset);
616 return alignedScores.at(len+block_offset-1)/alignedScores.at(block_offset-1);
618 Double get(
int dna_offset,
int block_offset)
const {
619 if (block_offset == 0)
620 return get(dna_offset);
621 const aligned_type& alignedScores = getAlignedScores(dna_offset - 3*block_offset);
622 return alignedScores.back()/alignedScores.at(block_offset-1);
624 Double get(
int dna_offset)
const {
625 return getAlignedScores(dna_offset).back();
627 const int& begin()
const {
631 return values.size();
634 return offset + count();
637 return partner->size();
642 Double prefixThresh(
bool complement,
int i)
const {
643 return partner->getPrefixThresh(complement, i);
645 Double suffixThresh(
bool complement,
int i)
const {
646 return partner->getSuffixThresh(complement, i);
649 aligned_type& new_back() {
650 values.push_back(aligned_type());
651 return values.back();
658 const Block* partner;
659 map<int,Double> hits[3];
667 static double lowest_odds, exp_lowest_odds;
684 return iP[b].getIntraFreq(c, f);
696 for (
int frame = 0; frame < 3; frame++) {
697 freq += iP[b].getIntraFreq(c, frame);
711 return iP[b].getInterFreq(n);
718 for (
unsigned i = 0; i < iP.size(); i++) {
719 if (iP[i].get_numSeq() > 0) {
720 return iP[i].get_numSeq();
752 Position firstInRange(
int blockno=0)
const {
753 return Position(blockno, -interBlockRange(blockno).max);
756 int blockCount()
const {
757 return blocks.size();
759 int blockSize(
int blockno)
const {
761 if (blockno < 0 || blockno >= blocks.size())
762 throw ProjectError(
"prfl.blockSize() called with index out of range");
764 return blocks[blockno].size();
766 DistanceType interBlockDist(
int blockno)
const {
767 return blockno == blockCount() ? finalDist : blocks[blockno].distance;
769 Range interBlockRange(
int blockno)
const {
770 return interBlockDist(blockno).r;
788 Column getColumn(Position pos)
const {
789 return blocks[pos.b][pos.i];
791 string getName()
const {
794 string getInfoLine()
const {
795 ostringstream result;
799 DistanceType d = interBlockDist(blockno);
801 result <<
"[" << d.r.min <<
".." << d.r.max <<
"]";
803 result <<
"[" << d.r.min <<
",...]";
805 if (blockno == blockCount())
807 result <<
"> " << blocks[blockno].id <<
" (" << blockSize(blockno) <<
") <";
811 ostream& write(ostream&)
const;
813 friend ostream& operator<< (ostream& strm,
const Profile& P) {
814 return P.write(strm);
824 Block& operator[] (
int blockno) {
825 return blocks[blockno];
827 const Block& operator[] (
int blockno)
const {
828 return blocks[blockno];
830 Double getGlobalThresh(
bool complement, Position ppos)
const {
831 return ppos.b == blockCount() ? 1 :
832 globalThresh[complement][ppos.b][ppos.i >= 0 ? ppos.i : 0];
835 friend void initConstants();
838 static bool vv_equal(
const vector<Double>& v1,
const vector<Double>& v2) {
839 return equal(v1.begin(), v1.end(), v2.begin(), almost_equal);
846 vector< vector<Dist> > ownDists;
847 result.name = name +
"(rev.)";
848 for (
int b=blockCount(); b>0; b--) {
849 result.blocks.push_back(blocks[b-1].reverse(interBlockDist(b)));
850 result.blocks.back().initThresholds();
851 ownDists.push_back(result.blocks.back().getAllOwnDists());
853 result.calcGlobalThresh(ownDists);
854 if (!equal(result.globalThresh[0].begin(),
855 result.globalThresh[0].end(),
856 globalThresh[1].begin(),
858 !equal(result.globalThresh[1].begin(),
859 result.globalThresh[1].end(),
860 globalThresh[0].begin(),
863 for (
int b=0; b<blockCount(); b++)
864 for (
int i=1; i<result.blockSize(b); i++)
865 if (!almost_equal(result[b].getPrefixThresh(
false, i),
866 blocks[blockCount()-b-1].getPrefixThresh(
true,i)) ||
867 !almost_equal(result[b].getPrefixThresh(
true, i),
868 blocks[blockCount()-b-1].getPrefixThresh(
false,i)))
870 else if (!almost_equal(result[b].getSuffixThresh(
false, i),
871 blocks[blockCount()-b-1].getSuffixThresh(
true,i)) ||
872 !almost_equal(result[b].getSuffixThresh(
true, i),
873 blocks[blockCount()-b-1].getSuffixThresh(
false,i)))
875 result.finalDist = interBlockDist(0);
882 void calcGlobalThresh(
const vector< vector<Dist> >&);
883 void parse_stream(istream&);
887 vector<Block> blocks;
888 vector< vector<Double> > globalThresh[2];
889 DistanceType finalDist;
894 std::map<int, IntronProfile> iP;
897 map < int, IntronProfile >::const_iterator it = iP.find(b);
898 return !(it == iP.end());
900 static int min_anchor_count;
901 static double global_thresh;
902 static double absolute_malus_threshold;
918 static const char* sequence;
920 static void initSeq(
const char* dna,
int seqlen) {
924 static void initSeq(
string s) {
928 char *dna =
new char[len + 1];
929 std::strcpy (dna, s.c_str());
934 void initConstants();
939inline ostream& operator<< (ostream& strm,
PP::Position ppos) {
940 return strm << ppos.b <<
"/" << ppos.i;
945inline Double PP::Block::getPrefixThresh(
bool complement,
int i)
const {
947 if (i<=0 || i>=size())
949 Double result = complement ?
950 suffixThresh[size() - i] :
952 if (!almost_equal(result,getPartialThresh(complement, 0, i)))
955 return getPartialThresh(complement, 0, i);
958inline Double PP::Block::getSuffixThresh(
bool complement,
int i)
const {
960 if (i<=0 || i>=size())
962 Double result=complement ?
963 prefixThresh[size() - i] :
965 if (!almost_equal(result,getPartialThresh(complement, i)))
968 return getPartialThresh(complement, i);
This class implements a double object with a very large range.
Definition lldouble.hh:31
Definition pp_profile.hh:566
Definition pp_profile.hh:412
Definition pp_profile.hh:278
intron profile for a block and the inter-block section before the block
Definition pp_profile.hh:143
int getIntraFreq(unsigned c, unsigned f) const
Definition pp_profile.hh:162
int getInterFreq(unsigned n) const
Definition pp_profile.hh:172
int get_numSeq() const
Definition pp_profile.hh:182
Definition pp_profile.hh:665
int getIntronIntraFreq(unsigned b, unsigned c, unsigned f)
Definition pp_profile.hh:681
int getIntronIntraFreqAtCol(unsigned b, unsigned c)
Definition pp_profile.hh:692
int getIntronInterFreq(unsigned b, unsigned n)
Definition pp_profile.hh:708
unsigned getIntronNumSeq()
Definition pp_profile.hh:717
ProjectError()
Definition types.hh:460
Definition pp_profile.hh:56
Definition pp_profile.hh:917
Definition pp_profile.hh:227
Definition pp_profile.hh:129
Definition pp_profile.hh:407
Definition pp_profile.hh:346
Definition pp_profile.hh:134
Definition pp_profile.hh:112
Definition pp_profile.hh:124
Definition pp_profile.hh:117
Definition pp_profile.hh:190
Definition pp_profile.hh:97
Definition vitmatrix.hh:33