Augustus 3.4.0
Loading...
Searching...
No Matches
hints.hh
1/*
2 * hints.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef __HINTS_HH
9#define __HINTS_HH
10
11// project includes
12#include "types.hh"
13
14// standard C/C++ includes
15#include <cmath> // for pow
16#include <list>
17
18#define NUM_FEATURE_TYPES 17
19#define BONUS_FACTOR 1
20#define QUOT_PSEUDOCOUNT 1
21#define BLOCKSIZE 1000
22
23using namespace std;
24
31enum FeatureType {UNKNOWN_FEATURE = -1, startF, stopF, assF, dssF, tssF, ttsF, exonpartF, exonF, intronpartF, intronF, irpartF, CDSF, CDSpartF, UTRF, UTRpartF, nonexonpartF, nonirpartF};
32extern const char* featureTypeNames[NUM_FEATURE_TYPES];
33
34// start: begin and end position of a start codon
35// start: begin and end position of a stop codon
36// ass: begin=end=the first intron base upstream of an exon
37// dss: begin=end=the first intron base downstream of an exon
38// exonpart: interval contained in a coding exon
39// exon: exact coding exon
40// intronpart: interval contained in an intron
41// intron: exact intron
42// tss: short region that contains the transcription initiation site
43// tts: short region that contains the transcription termination site
44// irpart: interval that is part of an intergenic region
45// CDS: exact coding sequence
46// CDSpart: interval contained in a coding sequence
47// UTR: exact utr exon (only the untranslated part of the exon)
48// UTRpart: part of utr exon
49// nonexonpart: part of intergenic region or part of intron
50// nonirpart: part of genic region
51
52bool isSignalType(FeatureType type);
53bool isGFF(ifstream &istrm);
54
60class Feature {
61public:
62 Feature(){
63 bonus = malus = 1.0;
64 esource = '?';
65 active = true;
66 discard = false;
67 numContradicting = 0.0;
68 numSupporting = 0;
69 mult = 1;
70 }
71
72 Feature(int anfang, int ende, FeatureType typ, Strand strang, int leserahmen, string equelle) {
73 start = anfang;
74 end = ende;
75 type = typ;
76 strand = strang;
77 frame = leserahmen;
78 esource = equelle;
79 score = 0.0;
80 active = true;
81 discard = false;
82 numContradicting = 0.0;
83 numSupporting = 0;
84 mult = 1;
85 }
86
87 ~Feature(){
88 }
89
90 static FeatureType getFeatureType(string typestring);
91 static FeatureType getFeatureType(int typeint);
92 double exonpartMalus(int len){
93 return pow(malus, len);
94 }
95 double distance_faded_bonus(int pos);
96 bool compatibleWith(Feature &other);
97 bool weakerThan(Feature &other, bool &strictly);
98 double conformance();
99 int length() {return end - start + 1;}
100 void shiftCoordinates(int start,int end,bool rc = false);
101 void setFrame(string f);
102 void setStrand(string s);
103 // fields of the GFF-format
104 string seqname;
105 string source;
106 string feature;
107 string groupname;
108 int priority; // >=0 higher priority -> more important. -1 reserved for not specified
109 long start, end;
110 double score;
111 double bonus;
112 double malus;
113 Strand strand;
114 /* frame definition gff: One of '0', '1', '2' or '.'. '0' indicates that the specified region is in frame,
115 i.e. that its first base corresponds to the first base of a codon. '1' indicates
116 that there is one extra base, i.e. that the second base of the region corresponds
117 to the first base of a codon, and '2' means that the third base of the region is
118 the first base of a codon. If the strand is '-', then the first base of the region
119 is value of <end>, because the corresponding coding region will run from <end> to
120 <start> on the reverse strand. */
121 int frame;
122 string attributes;
123 int gradeclass;
124 string esource; // 'annotrain' is reserved for annotation in the training
125 FeatureType type;
126 bool active;
127 bool discard;
128 Feature *next; // used for making a partial list in SequenceFeatureCollections
129 float numContradicting; // fractional number of other hints that contradict this one
130 int numSupporting;
131 static long offset;
132 int mult; // multiplicity for summarizing several identical hints
133};
134
135ostream& operator<<(ostream&out, Feature& feature);
136istream& operator>>( istream& in, Feature& feature );
137
138bool operator<(const Feature& f1, const Feature& f2);
139bool operator==(const Feature& f1, const Feature& f2);
140
141
150public:
151 HintGroup(){
152 hints = NULL;
153 name = "";
154 incompGroups = strongerGroups = NULL;
155 begin = end = -1;
156 geneBegin = geneEnd = -1;
157 priority = -1;
158 copynumber = 1;
159 trashy = false;
160 }
161 ~HintGroup(){
162 if (hints)
163 delete hints;
164 if (incompGroups)
165 delete incompGroups;
166 if (strongerGroups)
167 delete strongerGroups;
168 }
169 friend bool operator<(const HintGroup& g1, const HintGroup& g2);
170 friend bool operator==(const HintGroup& g1, const HintGroup& g2);
171 string getName() const {return name;}
172 int getPriority() const {return priority;}
173 int getBegin() const {return begin;}
174 int getEnd() const {return end;}
175 int getGeneBegin() const {return geneBegin;}
176 int getGeneEnd() const {return geneEnd;}
177 int getCopyNumber() const {return copynumber;}
178 void addCopyNumber(int n) {copynumber += n;}
179 int getSize() const {if (hints) return hints->size(); else return 0;}
180 string getSource() const {if (hints){ return hints->front()->esource;} else return "";}
181 list<HintGroup*> *getIncompGroups(){return incompGroups;}
182 list<HintGroup*> *getStrongerGroups(){return strongerGroups;}
183 list<Feature*> *getHints() {return hints;}
184 void print(ostream& out, bool withHints=false);
185 void sortFeatures();
186 void addFeature(Feature *hint);
187 bool compatibleWith(HintGroup &other, Feature *&rascal1, Feature *&rascal2, bool &weakerThan);
188 void updateFeatureConformance(HintGroup &other);
189 bool nestedGenePossible(HintGroup &other);
190 bool isTrashy();
191 bool canCauseAltSplice();
192 void setActiveFlag(bool active);
193 void setDiscardFlag(bool discard);
194 void addIncompGroup(HintGroup *otherGroup);
195 void addStrongerGroup(HintGroup *otherGroup);
196 void sortIncompGroup(){if (incompGroups) {incompGroups->sort();}}
197private:
198 list<Feature*> *hints;
199 list<HintGroup*> *incompGroups; // incompatible HintGroups
200 list<HintGroup*> *strongerGroups; // groups that are properly stronger
201 string name;
202 int priority;
203 long begin;
204 long end;
205 long geneBegin;
206 long geneEnd;
207 int copynumber;
208 bool trashy;
209};
210
211void printSrcGroupEvidence(list<HintGroup*> *groupList);
212
213#endif //__HINTS_HH
Hints on the gene structure.
Definition hints.hh:60
HintGroup.
Definition hints.hh:149