Augustus 3.4.0
Loading...
Searching...
No Matches
vitmatrix.hh
1/*
2 * vitmatrix.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _VITMATRIX_HH
9#define _VITMATRIX_HH
10
11// project includes
12#include "types.hh" // for ProjectError
13
14// standard C/C++ includes
15#include <vector>
16#include <map>
17#include <list>
18#include <limits>
19#include <climits>
20
21using namespace std;
22
23
24// constants for Viterbi*Types
25#define VIT_MIN_CAPACITY 7
26#define MAX_LINKCOUNT 300
27#define SUBSTATEVALUE_BITS (8*sizeof(short))
28
33struct SubstateId {
34 explicit SubstateId(signed char s=-1, short v=0) : slot(s), value(v) {}
35
36 bool operator< (const SubstateId& other) const {
37 return
38 slot < other.slot || (slot == other.slot &&
39 value < other.value);
40 }
41 bool operator== (const SubstateId& other) const {
42 return slot == other.slot && value == other.value;
43 }
44 bool operator> (const SubstateId& other) const {
45 return (other < *this);
46 }
47 bool operator!= (const SubstateId& other) const {
48 return !operator==(other);
49 }
50 bool operator<= (const SubstateId& other) const {
51 return !operator>(other);
52 }
53
54 bool undef() {
55 return slot==-1 && value==0;
56 }
57
58
59
60 int fullId() const {
61 // implements a monotonous mapping
62 // with SubstateId() -> 0
63 return ((slot+1) << SUBSTATEVALUE_BITS) + value;
64 }
65 void set(int fromId) {
66 // reverse mapping int->SubstateId()
67 value = fromId;
68 slot = ((fromId-value) >> SUBSTATEVALUE_BITS) -1;
69 }
70 static SubstateId min(int slot) {
71 return SubstateId(slot, numeric_limits<short>::min());
72 }
73 static SubstateId max(int slot) {
74 return SubstateId(slot, numeric_limits<short>::max());
75 }
76
77 signed char slot;
78 short value;
79};
80
81#ifdef DEBUG
82inline string itoa(SubstateId d) {
83 return "(" + itoa(d.slot) + "/" + itoa(d.value) + ")";
84}
85#endif
86
87typedef map<SubstateId, SubstateId> SubstateIdMap;
88
89#ifdef DEBUG
90#define MEMUSEDBLCOUNT 10
91struct GenericDataType {
92 GenericDataType() {
93 for (int i=0; i<MEMUSEDBLCOUNT; i++)
94 vals[i]=0;
95 }
96 void operator+=(const GenericDataType& other) {
97 for (int i=0; i<MEMUSEDBLCOUNT; i++)
98 vals[i]+=other.vals[i];
99 }
100 double& operator[] (int i) { return vals[i]; }
101 double vals[MEMUSEDBLCOUNT];
102};
103#endif
104
113 ProjectError("Internal error: ViterbiSubmapType has no map!") {}
114};
115
122 ProjectError("Internal error: ViterbiSubmapType is empty!") {}
123};
124
132enum AlgorithmVariant { doViterbiOnly, doViterbiAndForward, doSampling, doBacktracking };
133/* any changes made to the order above needs to be reflected in the following
134 functions */
135inline bool doViterbi(AlgorithmVariant algovar) {
136 return algovar <= doViterbiAndForward;
137}
138inline bool needForwardTable(AlgorithmVariant algovar) {
139 return algovar == doViterbiAndForward || algovar == doSampling;
140}
141inline AlgorithmVariant doViterbi(bool needForwardTable) {
142 return needForwardTable ? doViterbiAndForward : doViterbiOnly;
143}
144
154
160 ViterbiSubmapEntry(const Double& dbl=0, ViterbiSubmapType* pred=0) :
161 p(dbl), predMap(pred), succCount(0) {}
162 bool operator<(const ViterbiSubmapEntry& other) const {
163 return p<other.p;
164 }
165 bool operator<=(const ViterbiSubmapEntry& other) const {
166 return !(other < *this);
167 }
168 bool operator>=(const ViterbiSubmapEntry& other) const {
169 return !operator<(other);
170 }
171 bool operator>(const ViterbiSubmapEntry& other) const {
172 return other < *this;
173 }
174 void set(const Double& dbl, ViterbiSubmapType* pred) {
175 p =dbl;
176 predMap = pred;
177 }
178
179 Double p;
180 ViterbiSubmapType* predMap;
181 short succCount;
182};
183
188struct ViterbiSubmapBasetype : public map<SubstateId, ViterbiSubmapEntry > {
189 ViterbiSubmapBasetype(int ln) :
190 linkcount(ln),
191 predSubstates(0) {}
192 ViterbiSubmapBasetype(const ViterbiSubmapBasetype& other, int ln) :
193 map<SubstateId,ViterbiSubmapEntry>(other),
194 linkcount(ln),
195 predSubstates(0) {}
197 delete predSubstates;
198 }
199
200 // multiple instances of ViterbiSubmapType can share the same values;
201 // this value tells how many of them exist, so ViterbiSubmapType
202 // knows when to delete the pointed to
203 int linkcount;
204
205 // this map stores the predecessor substates, if needed
206 SubstateIdMap* predSubstates;
207}; // ViterbiSubmapBasetype
208
210 friend class ViterbiColumnType;
211 // ``const'' in this class refers not to the contents of the map
212 // but just to the pointer (we want to be able to change
213 // substates of otherwise ``constant'' Viterbi columns)
214
215public:
216 // these are aliases for a map iterator
217 typedef ViterbiSubmapBasetype::iterator iterator;
218 typedef ViterbiSubmapBasetype::const_iterator const_iterator;
219
220 // constructors
221 explicit ViterbiSubmapType(signed char st) :
222 factor(1), state(st), theMap(0) {}
223 // copy constructor: does not copy the map pointed to but
224 // just adds a link to it
226 factor(other.factor), state(other.state)
227 {
228 link(other);
229 }
230 // destructor
232 unlink();
233 }
234 // assignment: the same behaviour as the copy constructor
235 void operator= (const ViterbiSubmapType& other) {
236 if (&other == this)
237 return;
238 factor = other.factor;
239 state = other.state;
240 unlink();
241 link(other);
242 }
243 // share another submap's map
244 void link (const ViterbiSubmapType& other, Double mult) {
245 factor = other.factor * mult;
246 unlink();
247 link(other);
248 }
249 // remove the map
250 void deactivate() {
251 unlink();
252 theMap=0;
253 }
254 int getMainState() const {
255 return state;
256 }
257
258
259 // the following methods can be used also when the map is NULL
260 bool inactive() const { // is the map NULL?
261 return (theMap==0);
262 }
263 bool empty() const {
264 return inactive() || theMap->empty();
265 }
266 bool emptypart(int part) {
267 return inactive() || begin(part) == end(part);
268 }
269 int size() const {
270 return inactive() ? 0 : theMap->size();
271 }
272 void activate() {
273 if (!theMap)
274 theMap = new ViterbiSubmapBasetype(1);
275 }
276 int linkcount() const {
277 return inactive() ? 0 : theMap->linkcount;
278 }
279 bool is_linked() {
280 return theMap && theMap->linkcount > 1;
281 }
282 // access predecessor substate map
283 SubstateIdMap* getPredSubstateMap() const;
284 SubstateId popPredSubstate(SubstateId substate) const;
285 void clearEmptySubstateMap() const;
286
287 void swapMaps(ViterbiSubmapType&); // exchange values with another instance
288
289 void merge(ViterbiSubmapType&, Double); // merge two maps and take the maximum entries
290 int eraseAllUnneededSubstates() const; // erase substates with succCount==0
291
292 // the following methods do not check if theMap is non-NULL
293 iterator begin() const {
294 return theMap->begin();
295 }
296 iterator begin(int slot) const {
297 return theMap->lower_bound(SubstateId::min(slot));
298 }
299 iterator end() const {
300 return theMap->end();
301 }
302 iterator end(int slot) const {
303 return theMap->upper_bound(SubstateId::max(slot));
304 }
305 iterator bound(SubstateId substate) const {
306 return theMap->lower_bound(substate);
307 }
308 iterator bound(signed char slot, short value) const {
309 return theMap->lower_bound(SubstateId(slot,value));
310 }
311 Double get(SubstateId substate) const {
312 const_iterator it = theMap->find(substate);
313 return it==end() ? 0 : it->second.p * factor;
314 }
315 iterator insert(iterator where, pair<SubstateId, ViterbiSubmapEntry> what) const {
316 // insert the value as is (without dividing it by factor)
317 return theMap->insert(where, what);
318 }
319 iterator insert(iterator where, SubstateId substate, const Double& value,
320 ViterbiSubmapType* pred) const {
321 // insert the value as is (without dividing it by factor)
322 return insert(where, make_pair(substate, ViterbiSubmapEntry(value,pred)));
323 }
324 void push_back(SubstateId substate, const Double& value, ViterbiSubmapType* pred) const {
325 // insert at end of map
326 insert(end(), substate, value, pred);
327 }
328 void erase(iterator start, iterator end) const {
329 theMap->erase(start, end);
330 }
331 void erase(SubstateId substate) const {
332 theMap->erase(substate);
333 }
334 short& succCount(SubstateId substate) {
335 return (*theMap)[substate].succCount;
336 }
337 short& succCount(iterator it) {
338 return it->second.succCount;
339 }
340 const short& succCount(iterator it) const {
341#ifdef DEBUG
342 if (it == end())
343 throw SubmapEmptyError();
344#endif
345 return it->second.succCount;
346 }
347 short& succCount() {
348 return succCount(begin());
349 }
350 void eraseAndDecCounts(iterator it) const;
351 bool setMax(SubstateId substate, Double value, ViterbiSubmapType* pred) const;
352 void cloneMap(ViterbiSubmapType* newPredMap, int maxlinkcount=1);
353 void cloneMapIfLinkcountExceeded(ViterbiSubmapType* newPredMap) {
354 cloneMap(newPredMap, MAX_LINKCOUNT);
355 }
356 void getMaxSubstate(SubstateId& substate, Double& value) const;
357 int eraseUnneededSubstate(SubstateId substate);
358 void registerPredecessors();
359 void useMaximum(Double& mainProb);
360
361#ifdef DEBUG
362 // functions used for debugging only
363 GenericDataType memoryUsage();
364 void dieOnZero() const;
365 void checkAfterWindow() const;
366 SubstateId getPredecessorSubstate(SubstateId substate) const;
367 void checkCounts() const;
368#endif
369
370 Double factor; // multiply this to the entries of the map to get the actual value
371
372private:
373 // common to copy constructor and assignment operator:
374 void link(const ViterbiSubmapType& other) {
375 theMap = other.theMap;
376 if (theMap) theMap->linkcount++;
377 }
378 // common to destructor and assignment operator
379 void unlink() {
380 if (theMap && --(theMap->linkcount)==0)
381 delete theMap;
382 }
383
384 signed char state; // the main state
385 ViterbiSubmapBasetype* theMap; // the entries (probabilities/factor) for each substate
386}; // ViterbiSubmapType
387
388// inline definitions of ViterbiSubmapType member functions
389
390// returns the predecessor substate map, creates one if none exists
391inline SubstateIdMap* ViterbiSubmapType::getPredSubstateMap() const {
392 if (!theMap)
393 return 0;
394 SubstateIdMap*& result = theMap->predSubstates;
395 if (!result)
396 result = new SubstateIdMap;
397 return result;
398}
399
400inline void ViterbiSubmapType::clearEmptySubstateMap() const {
401 if (theMap && theMap->predSubstates &&
402 theMap->predSubstates->empty()) {
403 delete theMap->predSubstates;
404 theMap->predSubstates = 0;
405 }
406}
407
408inline void ViterbiSubmapType::swapMaps(ViterbiSubmapType& other) {
409 ViterbiSubmapBasetype* temp = theMap;
410 theMap = other.theMap;
411 other.theMap = temp;
412}
413
414inline void ViterbiSubmapType::eraseAndDecCounts(iterator it) const {
415 ViterbiSubmapType* pred = it->second.predMap;
416 if (pred) {
417 SubstateId substate = popPredSubstate(it->first);
418 pred->succCount(substate)--;
419#ifdef DEBUG
420 if (pred->succCount(substate) < 0)
421 throw ProjectError("Have negative succCounts");
422#endif
423 }
424 theMap->erase(it);
425}
426
427inline bool ViterbiSubmapType::setMax(SubstateId substate, Double value, ViterbiSubmapType* pred) const {
428 ViterbiSubmapEntry& target = (*theMap)[substate];
429 if (target.p * factor < value) {
430 target.set(value / factor, pred);
431 return true;
432 }
433 return false;
434}
435
436inline void ViterbiSubmapType::cloneMap(ViterbiSubmapType* newPredMap, int maxlinkcount) {
437#ifdef DEBUG
438 if (newPredMap == 0)
439 throw ProjectError("SubstateModel::cloneMap: predMap==0!");
440#endif
441 if (theMap->linkcount > maxlinkcount) {
442 theMap->linkcount--;
443 theMap = new ViterbiSubmapBasetype(*theMap,1);
444 for (iterator it=begin(); it != end(); ++it) {
445 it->second.predMap = newPredMap;
446 it->second.succCount = 0;
447 }
448 }
449}
450
451inline void ViterbiSubmapType::getMaxSubstate(SubstateId& substate, Double& value) const {
452 value = 0; substate.set(0);
453 if (empty())
454 return;
455 for (iterator it = begin(); it != end(); ++it)
456 if (it->second > value) {
457 substate = it->first;
458 value = it->second.p;
459 }
460 value *= factor;
461}
462
463
471 ViterbiEntryType(signed char st, Double v, signed char i=-1) :
472 state(st), value(v), substate_idx(i) {}
473 signed char state;
474 Double value;
475 signed char substate_idx;
476};
477
478
479/*
480 * ViterbiColumnType:
481 * The entries of the Viterbi matrix belonging to the same base position
482 * Only nonzero values are actually stored
483 *
484 */
485typedef vector<ViterbiEntryType> ViterbiColumnBasetype;
486
491class ViterbiColumnType : public ViterbiColumnBasetype {
492public:
493
494 // constructor and destructor
496 ViterbiColumnBasetype(),
497 subProbs(), idx(0), maxsize(0) {}
499 delete[] idx;
500 }
501
502 void reset(int n); // delete the column and reserve space for n states
503 void erase(int state); // erase the value for a particular state (including substates)
504
505 // return a value without autovivification
506 Double get(int state) const {
507 return has(state) ? elem(state).value : 0;
508 }
509 Double get(int state, SubstateId substate) const {
510 if (substate.undef()) return get(state);
511 try {
512 return getSubstates(state).get(substate);
513 } catch (NoSubmapFoundError &) {
514 return 0;
515 }
516 }
517 // index operator; uses autovivication in non-const contexts
518 Double operator[](int state) const { // no autovivification
519 return get(state);
520 }
521 Double& operator[](int state) { // autovivification if needed
522 if (has(state))
523 return elem(state).value;
524 addState(state);
525 return back().value;
526 }
527
528 bool has(int state) const {
529#ifdef DEBUG
530 if (state < 0 || state >= maxsize)
531 throw ProjectError("ViterbiColumnType: state out of range");
532#endif
533 return idx[state] >= 0;
534 }
535 int size() { // number of elements !=0
536 return ViterbiColumnBasetype::size();
537 }
538 int max_capacity() { // this equals statecount
539 return maxsize;
540 }
541
542 // access substates
543 bool hasSubstates(int state) const {
544 int idx = getSubstateIdx(state);
545 return idx >= 0 && idx < (int) subProbs.size() && !subProbs[idx].inactive();
546 }
547 const ViterbiSubmapType& getSubstates(int state) const;
548 ViterbiSubmapType& getSubstates(int state);
549 void getMaxSubstate(int state, SubstateId& substate, Double& value) const;
550 ViterbiSubmapType& addSubstates(const ViterbiSubmapType& newmap);
551
552 // calls a member function of class Tp with a parameter of
553 // type ViterbiSubmapType for each submap in the column
554 template <class Tp, class FncPtr>
555 void foreachSubstate(Tp p, FncPtr f) {
556 for (int i=0; i<subProbs.size(); i++)
557 (p->*f)(subProbs[i]);
558 }
559 void eraseSubstates(int state); // erase all the substates for a particular state
560 int eraseUnneededSubstates(); // calls eraseUnneededSubstates for each submap
561 int removeEmptySubmaps();
562
563#ifdef DEBUG
564 // functions used for debugging only
565 int substateCount(int state) const {
566 try {
567 return getSubstates(state).size();
568 } catch (...) {
569 return 0;
570 }
571 }
572 int totalSubstateCount() const {
573 int result=0;
574 for (int i=0; i<subProbs.size(); i++) result += subProbs[i].size();
575 return result;
576 }
577 map<int,Double> substateCounts() const {
578 map<int,Double> result;
579 for (int i=0; i<subProbs.size(); i++)
580 result[subProbs[i].state] = subProbs[i].size();
581 return result;
582 }
583 void testIntegrity() const;
584 void checkAfterWindow() const {
585 for (int i=0; i < subProbs.size(); i++)
586 subProbs[i].checkAfterWindow();
587 }
588 void showSubstateInfo(ostream& strm) const;
589 void write(ostream& strm, const vector<string>&);
590 void write(const vector<string>&);
591
592 int getSubmapCapacity() {
593 return subProbs.capacity() * sizeof(ViterbiSubmapType);
594 }
595 GenericDataType getSubstateMemoryUsage();
596 void countSubmaps(map<ViterbiSubmapBasetype*,int>& linkcounts);
597#endif
598
599private:
600 const ViterbiEntryType& elem(int state) const {
601 return ViterbiColumnBasetype::operator[](idx[state]);
602 }
603 ViterbiEntryType& elem(int state) {
604 return ViterbiColumnBasetype::operator[](idx[state]);
605 }
606 void addState(int state, signed char sub_idx=-1);
607 signed char getSubstateIdx(int state) const {
608 return has(state) ? elem(state).substate_idx : -1;
609 }
610 void eraseSubProbs(int subidx);
611
612 vector<ViterbiSubmapType> subProbs; // the substate probabilities
613 signed char* idx; // for each state, the vector index
614 // or -1 if entry is zero
615 int maxsize; // number of states
616}; // ViterbiColumnType
617
618// inline definitions of ViterbiColumnType member functions
619inline const ViterbiSubmapType& ViterbiColumnType::getSubstates(int state) const {
620 int idx = getSubstateIdx(state);
621 if (idx >= 0 && idx < (int) subProbs.size()) {
622 const ViterbiSubmapType& result = subProbs[idx];
623 if (!result.inactive())
624 return result;
625 }
626 throw NoSubmapFoundError();
627}
628
629inline ViterbiSubmapType& ViterbiColumnType::getSubstates(int state) {
630 int idx = getSubstateIdx(state);
631 if (idx >= 0 && idx < (int) subProbs.size()) {
632 ViterbiSubmapType& result = subProbs[idx];
633 if (!result.inactive())
634 return result;
635 }
636 throw NoSubmapFoundError();
637}
638
639inline void ViterbiColumnType::getMaxSubstate(int state, SubstateId& substate, Double& value) const {
640 Double mainval = get(state);
641 try {
642 const ViterbiSubmapType& submap = getSubstates(state);
643 submap.getMaxSubstate(substate, value);
644 if (value > mainval)
645 return;
646 } catch (NoSubmapFoundError &) {}
647 substate.set(0); value = mainval;
648}
649
650inline int ViterbiColumnType::eraseUnneededSubstates() {
651 int result = 0;
652 for (unsigned i=0; i<subProbs.size(); i++)
653 result += subProbs[i].eraseAllUnneededSubstates();
654 return result;
655}
656
657inline int ViterbiColumnType::removeEmptySubmaps() {
658 int result = 0;
659 for (unsigned i=0; i<subProbs.size(); i++)
660 if (subProbs[i].empty() && !subProbs[i].inactive()) {
661 subProbs[i].deactivate();
662 result++;
663 }
664 return result;
665}
666
667inline void ViterbiColumnType::eraseSubProbs(int subidx) {
668 if (subidx >= 0) {
669 // overwrite entry at position subidx
670 // with the (previously) last entry
671 if (subidx < (int) subProbs.size() - 1) {
672 elem(subProbs.back().state).substate_idx = subidx;
673 subProbs[subidx] = subProbs.back();
674 }
675 // delete last entry in vector
676 subProbs.pop_back();
677 }
678}
679
680
688public:
689 ViterbiMatrixType () : data(0), count(0) {}
691 delete[] data;
692 }
693 void assign(int colcount, int colsize) {
694 count = colcount;
695 delete[] data;
696 data = new ViterbiColumnType[count];
697 for (int i=0; i<count; i++) {
698 data[i].reset(colsize);
699 }
700 }
701 ViterbiColumnType& operator[] (int i) {
702 return data[i];
703 }
704 const ViterbiColumnType& operator[] (int i) const {
705 return data[i];
706 }
707 int size() const {
708 return count;
709 }
710
711#ifdef DEBUG
712 // functions used for debugging only
713 double load() {
714 double result=0;
715 for (int i=0; i<count; i++)
716 result += data[i].capacity();
717 return result / count;
718 }
719 double load(int i) {
720 return data[i].capacity();
721 }
722 double used_load() {
723 double result=0;
724 for (int i=0; i<count; i++)
725 result += data[i].size();
726 return result / count;
727 }
728 void write(ostream& strm, const vector<string>& stateNames = vector<string>()) {
729 for (int i=0; i<count; i++)
730 data[i].write(strm, stateNames);
731 }
732 int compare(istream&, const vector<string>& stateNames = vector<string>());
733 void showSubstateMemoryUsage(int from = 0, int to = -1);
734#endif
735
736private:
737 ViterbiColumnType* data;
738 int count;
739}; // ViterbiMatrixType
740
741
749public:
750 OptionListItem(){reset();};
751 OptionListItem(int st, int bs, Double p){
752 state = st;
753 base = predEnd = bs;
754 probability = p;
755 }
756 OptionListItem(int st, int bs, int pe, Double p){
757 state = st;
758 base = bs;
759 predEnd = pe;
760 probability = p;
761 }
762 void reset(){state = TYPE_UNKNOWN; base = predEnd = -INT_MAX;}
763 void resetPredEnd() {predEnd = -INT_MAX;}
764 int state;
765 int base;
766 int predEnd; // base at which predecessor state ends, usually identical to base unless states (genes) overlap
767 Double probability;
768};
769
770inline bool operator< (const OptionListItem& first, const OptionListItem& second) {
771 // the largest probability comes first
772 return (first.probability > second.probability);
773}
774
780public:
781 OptionsList(){
782 cumprob = 0.0;
783 }
784 ~OptionsList(){
785 while (!options.empty())
786 options.pop_front();
787 }
788 void add(int state, int base, Double probability, int predEnd = -INT_MAX) {
789 options.push_back(OptionListItem(state, base,
790 (predEnd == -INT_MAX)? base : predEnd,
791 probability));
792 cumprob += probability;
793 }
794 void prepareSampling() {
795 options.sort();
796 }
797 int size(){
798 return options.size();
799 }
800 void print(ostream& out) {
801 out << "options: " << cumprob << " | ";
802 for (list<OptionListItem>::iterator it = options.begin(); it != options.end(); ++it){
803 out << it->state << " " << it->base << " " << (it->probability/cumprob) << ", ";
804 }
805 out << endl;
806 }
807 OptionListItem sample();
808
809private:
810 list<OptionListItem> options;
811 Double cumprob;
812};
813
814
815#endif // _VITMATRIX_HH
This class implements a double object with a very large range.
Definition lldouble.hh:31
Options lists are used for sampling; items also in backtracking.
Definition vitmatrix.hh:748
Definition vitmatrix.hh:779
Definition types.hh:449
ProjectError()
Definition types.hh:460
Definition vitmatrix.hh:491
An array of Viterbi columns.
Definition vitmatrix.hh:687
Definition vitmatrix.hh:209
exceptions thrown by Viterbi
Definition vitmatrix.hh:111
Definition vitmatrix.hh:120
Definition vitmatrix.hh:33
one entry in the viterbi matrix, including the index for the substate map
Definition vitmatrix.hh:470
Definition vitmatrix.hh:188
Definition vitmatrix.hh:159