89 this->collection = collection;
90 featureLists =
new list<Feature>[NUM_FEATURE_TYPES];
93 predictionScheme = NULL;
97 hasLocalSSmalus = NULL;
106 delete[] featureLists;
107 if (predictionScheme)
108 delete predictionScheme;
114 delete [] hintedSites;
116 delete [] hasLocalSSmalus;
118 for (
int type=0; type < NUM_FEATURE_TYPES; type++)
119 delete [] firstEnd[type];
123 for (
int type=0; type < NUM_FEATURE_TYPES; type++)
124 delete [] lastStart[type];
131 void printFeatures(ostream& out);
132 void sortFeatureLists();
135 static void deleteEqualElements(list<Feature> &list);
136 list<Feature> getFeatureList(FeatureType type) {
137 return featureLists[(int) type];
139 Feature *getFeatureAt(FeatureType type,
int endPosition, Strand strand);
140 Feature *getFeatureListAt(FeatureType type,
int endPosition, Strand strand);
141 Feature *getAllActiveFeatures(FeatureType type);
142 Feature *getFeatureListInRange(FeatureType type,
int startPosition,
int endPosition,
143 Strand strand,
int seqRelFrame=-1);
144 Feature *getFeatureListBeginningInRange(FeatureType type,
int startPosition,
int endPosition,
145 Strand strand,
int seqRelFrame=-1);
146 Feature *getFeatureListOvlpingRange(FeatureType type,
int startPosition,
int endPosition, Strand strand);
147 Feature *getFeatureListOvlpingRange(
Bitmask featuretypes,
int startPosition,
int endPosition, Strand strand);
148 Feature *getFeatureListContaining(
Bitmask featuretypes,
int position, Strand strand);
149 Feature *getExonListInRange(
int startPosition,
int endPosition, Strand strand,
int seqRelFrame=-1);
150 Feature *getExonListOvlpingRange(
int startPosition,
int endPosition, Strand strand,
int seqRelFrame=-1);
151 double localSSMalus(FeatureType type,
int pos, Strand strand);
152 void shift(
int offset);
153 void computeHintedSites(
const char* dna);
154 bool validDSSPattern(
const char* dna)
const {
156 return validDSS[
Seq2Int(2)(dna)];
161 bool validRDSSPattern(
const char* dna)
const {
163 return validDSS[
Seq2Int(2).rc(dna)];
168 bool validASSPattern(
const char* dna)
const {
170 return validASS[
Seq2Int(2)(dna)];
175 bool validRASSPattern(
const char* dna)
const {
177 return validASS[
Seq2Int(2).rc(dna)];
182 bool validSplicePattern(
string s)
const {
183 return validHintedSites.count(s) > 0;
185 bool validRSplicePattern(
string s)
const {
186 string reverseComplement(4,0);
187 putReverseComplement(reverseComplement.begin(), s.c_str(), 4);
188 return validSplicePattern(reverseComplement);
190 bool isHintedDSS(
int pos, Strand strand)
const {
191 return hintedSites &&
192 hintedSites[pos][strand == plusstrand ? forwDSS : revDSS];
194 bool isHintedASS(
int pos, Strand strand)
const {
195 return hintedSites &&
196 hintedSites[pos][strand == plusstrand ? forwASS : revASS];
198 void deleteFeatureAt(FeatureType type,
int endPosition, Strand strand);
199 void cleanRedundantFeatures();
200 void setSeqLen(
int len){seqlen = len;}
203 void findGenicGaps();
204 void findGroupGaps();
205 void determineInterGroupRelations();
206 void resetConformance();
208 void computeIndices();
209 list<Feature>::iterator getPosFirstEndAtOrAfter(
int type,
int e);
210 list<Feature>::iterator getPosStartAfter(
int type,
int s);
211 void rescaleBoniByConformance();
212 void createPredictionScheme(list<AltGene> *genes);
213 void setActiveFlag(list<HintGroup*> *groups,
bool flag);
214 list<AltGene> *joinGenesFromPredRuns(list<list<AltGene> *> *genesOfRuns,
int maxtracks,
bool uniqueCDS);
215 void sortIncompGroupsOfGroups();
217 void prepare(
AnnoSequence *annoseq,
bool print,
bool withEvidence=
true);
218 void prepareLocalMalus(
const char* dna);
219 int numZeroCov(
int start,
int end,
int type, Strand strand);
220 static void initHintedSplicesites(
string ssList);
225 static set<string> validHintedSites;
227 list<Feature> *featureLists;
228 list<HintGroup> *groupList;
229 list<Feature> *groupGaps;
239 list<Feature>::iterator **firstEnd;
242 list<Feature>::iterator **lastStart;
243 vector<int> cumCovUTRpartPlus;
244 vector<int> cumCovUTRpartMinus;
245 vector<int> cumCovCDSpartPlus;
246 vector<int> cumCovCDSpartMinus;
247 vector<int> cumCovExonpartPlus;
248 vector<int> cumCovExonpartMinus;
251 void addCumCov(vector<bool> &cov,
const list<Feature>& flist, Strand strand);
260 FeatureTypeInfo(
int numSources=1,
double b=-1.0,
double m=1.0,
double lm=1.0) :
264 gradeclassbounds(numSources, vector<double>()),
265 gradequots(numSources, vector<double>(1, 1.0))
271 int gradeclass(
int source,
double score) {
273 while (klasse < gradeclassnums(source)-1
274 && score >= gradeclassbounds[source][klasse])
279 int gradeclassnums(
int source) {
280 return gradequots[source].size();
282 void read(istream& datei,
int source) {
285 if (numclasses < 0 || numclasses > 10){
286 cerr <<
"Error: number of classes=" << numclasses << endl;
287 throw ProjectError(
"Error: number of classes out of range.");
289 gradeclassbounds[source].resize(numclasses-1);
290 gradequots[source].resize(numclasses);
291 for (
int i=0; i<numclasses-1; i++) {
292 datei >> gradeclassbounds[source][i];
293 if (i>0 && gradeclassbounds[source][i] < gradeclassbounds[source][i-1]) {
294 cerr <<
"Error: class bounds not increasing!" << endl;
298 for (
int i=0; i < numclasses; i++)
299 datei >> gradequots[source][i];
305 vector<vector<double> > gradeclassbounds;
311 vector<vector<double> > gradequots;
321 localmalustable(NULL)
324 offset = Properties::getIntProperty(
"predictionStart" ) - 1;
328 string ssList =
"gtag,gcag,";
330 ssList += Properties::getProperty(
"allow_hinted_splicesites");
333 SequenceFeatureCollection::initHintedSplicesites(ssList);
337 Feature::offset = offset;
345 for (map<string, SequenceFeatureCollection*>::iterator it = collections.begin();
346 it != collections.end();
352 for (
int type=0; type < NUM_FEATURE_TYPES; type++)
353 if (malustable[type])
354 delete [] malustable[type];
355 delete [] malustable;
357 if (localmalustable){
358 for (
int type=0; type < NUM_FEATURE_TYPES; type++)
359 if (localmalustable[type])
360 delete [] localmalustable[type];
361 delete [] localmalustable;
379 return collections[seqname];
381 bool isInCollections(
string seqname){
return collections.count(seqname)>0;}
382 void readGFFFile(
const char *filename);
383 void setBonusMalus(
Feature& f);
384 void readExtrinsicCFGFile();
386 void readSourceRelatedCFG(istream& datei);
387 void readTypeInfo(istream& datei);
388 int getNumSeqsWithInfo() {
return numSeqsWithInfo;}
390 void printAccuracyForSequenceSet(
const AnnoSequence* annoseqs,
bool cleanRedundancies=
true);
391 void printAccuracyForSequenceSetOld(
const AnnoSequence* annoseqs,
bool cleanRedundancies=
true);
392 bool skeyExists(
string skey);
393 int esource(
string skey);
394 bool getIndividualLiability(
string skey){
return individual_liability[esource(skey)];}
395 bool get1group1gene(
string skey){
return oneGroupOneGene[esource(skey)];}
396 double malus(FeatureType type){
397 return typeInfo[type].malus;
399 double localMalus(FeatureType type){
400 return typeInfo[type].localMalus;
403 double partMalus(FeatureType type,
int len);
404 double localPartMalus(FeatureType type,
int len,
Double bonus,
int nindep);
413 bool *individual_liability;
414 bool *oneGroupOneGene;
416 map<string, SequenceFeatureCollection*> collections;
418 double** localmalustable;