13#include "pp_scoring.hh"
52 bool operator()(T* a, T* b) {
return *a < *b; }
62 this->srcname = srcname;
67 list<string> groupnames;
86 this->withNames = withNames;
90 void add(
string source,
string name);
91 void add(
string source);
93 list<SrcEvidence> sourceEvidence;
124 State(
long first,
long last, StateType t) :
153 State *cloneStateSequence(){
156 erg->
next =
next->cloneStateSequence();
168 hasScore(other.hasScore),
169 apostprob(other.apostprob),
170 sampleCount(other.sampleCount),
172 truncated(other.truncated),
173 framemod(other.framemod)
176 evidence =
new Evidence(*other.evidence);
179 bool frame_compatible(
const Feature *hint);
180 void addEvidence(
string srcname) {
if (!evidence) evidence =
new Evidence(
false); evidence->add(srcname);}
181 bool operator< (
const State &other)
const;
182 bool operator== (
const State &other)
const;
186 Strand strand() {
return isOnFStrand(type)? plusstrand : minusstrand;}
187 State *getBiologicalState();
188 void setTruncFlag(
long end,
long predEnd,
long dnalen);
189 void includeFrameModIntoType();
216 void push(
State *st){
223 for (
State* st = first; st != NULL; st = st->
next) {
231 Transcript* projectOntoGeneSequence(
const char *genenames);
232 static StatePath* getInducedStatePath(
Transcript *genelist,
long dnalen,
bool printErrors=
true);
234 bool operator== (
const StatePath &other)
const;
235 bool operator< (
const StatePath &other)
const;
237 void pushIntron(
long begin,
long end,
int frame,
bool onFStrand);
244 list<PP::Match> proteinMatches;
248int lenStateList(
State *head);
253 exons = introns = (
State*) NULL;
255 transstart = transend = -1;
256 seqname =
id = source =
"";
268 list<State*> sl = getExInInHeads();
269 for (list<State*>::iterator it = sl.begin(); it != sl.end(); ++it){
282 res->next = next->cloneGeneSequence();
287 static void destroyGeneSequence(
Transcript *head) {
291 nextHead = nextHead->next;
295 void addStatePostProbs(
float p);
296 void setStatePostProbs(
float p);
297 void addSampleCount(
int k);
298 void setSampleCount(
int k);
299 virtual long geneBegin()
const {
return transstart;}
300 virtual long geneEnd()
const {
return transend;}
301 virtual bool isCoding()
const {
return false; }
302 bool operator< (
const Transcript &other)
const;
303 bool operator== (
const Transcript &other)
const;
304 void normPostProb(
float n);
306 virtual list<State*> getExInHeads()
const {
309 L.push_back(introns);
312 virtual list<State*> getExInInHeads()
const {
return getExInHeads();}
313 double meanStateProb();
315 bool noOffset =
false)
const;
316 virtual void shiftCoordinates(
long d);
317 virtual bool almostIdenticalTo(
Transcript *other);
318 virtual void printCodingSeq(
AnnoSequence *annoseq)
const {};
319 virtual void printProteinSeq(
AnnoSequence *annoseq)
const {};
320 virtual void printBlockSequences(
AnnoSequence *annoseq)
const {};
321 virtual void printGFF()
const;
322 virtual void printEvidence()
const {};
323 void setStateHasScore(
bool has);
325 static void filterTranscriptsByMaxTracks(list<Transcript*> &gl,
int maxTracks);
326 virtual double supportingFraction(
HintGroup *group) {
return 0.0;}
330 long transstart, transend;
344 static bool print_tss;
345 static bool print_tts;
349void filterGenePrediction(list<Transcript*> &gl, list<Transcript*> &filteredTranscripts,
const char *seq, Strand strand,
bool noInFrameStop,
bool &hasInFrameStop,
double minmeanexonintronprob=0.0,
double minexonintronprob=0.0);
359 complete5utr = complete3utr =
true;
360 codingstart = codingend = -1;
361 supportingEvidence = incompatibleEvidence = CDSexonEvidence = CDSintronEvidence = UTR5stateEvidence = UTR3stateEvidence = NULL;
364 virtual Gene* clone() {
return new Gene(*
this); }
366 if (supportingEvidence)
367 delete supportingEvidence;
368 if (incompatibleEvidence)
369 delete incompatibleEvidence;
370 if (CDSintronEvidence)
371 delete CDSintronEvidence;
373 delete CDSexonEvidence;
374 if (UTR5stateEvidence)
375 delete UTR5stateEvidence;
376 if (UTR3stateEvidence)
377 delete UTR3stateEvidence;
379 list<State*> getExInHeads()
const { list<State*> L; L.push_back(exons); L.push_back(introns); L.push_back(
utr5exons); L.push_back(utr3exons);
return L;}
380 list<State*> getExInInHeads()
const { list<State*> L = getExInHeads(); L.push_back(utr5introns); L.push_back(utr3introns);
return L;}
383 int numExons()
const;
384 State *lastExon()
const;
385 bool identicalCDS(
Gene *other);
386 using Transcript::almostIdenticalTo;
387 virtual bool almostIdenticalTo(
Gene *other);
388 void shiftCoordinates(
long d);
389 long geneBegin()
const {
return (transstart>=0)? transstart : codingstart;}
390 long geneEnd()
const {
return (transend>=0)? transend : codingend;}
391 virtual bool isCoding()
const {
return true; }
392 void addUTR(
State *mrnaRanges,
bool complete_l=
true,
bool complete_r=
true);
393 void compileExtrinsicEvidence(list<HintGroup> *groupList);
394 double supportingFraction(
HintGroup *group);
395 void addSupportedStates(
HintGroup *group);
396 double getPercentSupported()
const;
397 long getCDSCoord(
long loc,
bool comp)
const;
398 bool completeCDS()
const;
400 void printGFF()
const;
404 virtual void printEvidence()
const;
414 long codingstart, codingend;
433 list<PP::Match> proteinMatches;
437 static bool print_stop;
438 static bool print_introns;
439 static bool print_cds;
440 static bool print_exonnames;
441 static bool stopCodonExcludedFromCDS;
442 static bool print_utr;
443 static bool print_blocks;
449 list<Transcript*> transcripts;
459 mincodstart = maxcodend = -1;
464 bool operator< (
const AltGene &other)
const;
467 void shiftCoordinates(
long d);
468 void sortTranscripts(
int numkeep=-1);
469 void deleteSuboptimalTranscripts(
bool uniqueCDS);
470 long minTransBegin();
472 bool isCoding(){
return transcripts.empty() ||
dynamic_cast<Gene*
> (transcripts.front());}
477void printGeneList(list<AltGene> *genelist,
AnnoSequence *annoseq,
bool withCS,
bool withAA,
bool withEvidence);
479void printGeneSequence(
Transcript* seq,
AnnoSequence *annoseq = NULL,
bool withCS=
false,
bool withAA=
true);
480list<Gene*>* sortGenePtrList(list<Gene*>);
481list<AltGene> *reverseGeneList(list<AltGene> *altGeneList,
long endpos);
482list<AltGene>* groupTranscriptsToGenes(list<Transcript*> &transcripts);
484void reverseGeneSequence(
Transcript* &seq,
long endpos);
485void postProcessGenes(list<AltGene> *genes,
AnnoSequence *annoseq);
494 genes = lastGene = (
Gene*) 0;
495 forwardGenes = backwardGenes = lastForwardGene = lastBackwardGene = (
Gene*) 0;
496 forwardPath = condensedForwardPath = backwardPath = condensedBackwardPath = (
StatePath*) 0;
499 forwardEmiProb = 1.0;
500 backwardEmiProb = 1.0;
503 Transcript::destroyGeneSequence(genes);
507 delete condensedPath;
509 Transcript::destroyGeneSequence(forwardGenes);
510 Transcript::destroyGeneSequence(backwardGenes);
515 if (condensedForwardPath)
516 delete condensedForwardPath;
517 if (condensedBackwardPath)
518 delete condensedBackwardPath;
524 const void printGFF()
const;
544 Transcript *lastForwardGene, *lastBackwardGene;
571 nextHead = nextHead->next;
575 void setWeight(
int w) {
580 for (
Transcript* t = anno->genes; t != NULL; t = t->next){
621 annoseq = annoseqHead;
622 while (annoseq && !(annoseq->anno && annoseq->anno->genes))
623 annoseq = annoseq->next;
625 gene = annoseq->anno->genes;
631 bool hasMoreElements(){
632 return (annoseq != NULL && gene != NULL);
646 return (first->pathemiProb > second->pathemiProb);
660 while (!pathlist.empty()){
661 p = pathlist.front();
662 pathlist.pop_front();
667 if (!containsPath(p))
668 pathlist.push_back(p);
672 return pathlist.size();
678 void printAPosterioriProbs(ostream& out) {
679 for(list<StatePath*>::iterator it=pathlist.begin(); it!=pathlist.end(); it++)
680 out << (*it)->pathemiProb <<
" , ";
687 list<StatePath*> pathlist;
A summary of extrinsic evidence by source of evidence.
Definition gene.hh:82
Hints on the gene structure.
Definition hints.hh:60
int length
The length of the span of the coding part (with introns)
Definition gene.hh:419
State * utr5exons
members for UnTranslated Region
Definition gene.hh:410
int clength
The coding length of the gene.
Definition gene.hh:421
int frame
The reading frame position of the first base (usually 0)
Definition gene.hh:423
static bool print_start
output options
Definition gene.hh:436
HintGroup.
Definition hints.hh:149
This class implements a double object with a very large range.
Definition lldouble.hh:31
A path through the Hidden-Markov-Model states.
Definition gene.hh:199
void print()
Definition gene.cc:344
long begin
Definition gene.hh:106
State * next
Definition gene.hh:114
long end
Definition gene.hh:110