Augustus 3.4.0
Loading...
Searching...
No Matches
Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
GeneMSA Class Reference

multiple sequence alignment of genomes for comparative gene prediction More...

#include <geneMSA.hh>

Public Member Functions

 GeneMSA (RandSeqAccess *rsa, Alignment *a)
 
string getSeqID (int speciesIdx)
 
Strand getStrand (int speciesIdx)
 
int getStart (int speciesIdx)
 
int getEnd (int speciesIdx)
 
list< ExonCandidate * > * getExonCands (int speciesIdx)
 
AlignmentgetAlignment ()
 
vector< int > getOffsets ()
 
map< string, ExonCandidate * > * getECHash (list< ExonCandidate * > *ec)
 
void createExonCands (int s, const char *dna, map< int_fast64_t, ExonCandidate * > &ecs, map< int_fast64_t, ExonCandidate * > &addECs)
 
void setExonCands (vector< map< int_fast64_t, ExonCandidate * > > &ecs)
 
void createOrthoExons (list< OrthoExon > &orthoExonsList, map< int_fast64_t, list< pair< int, ExonCandidate * > > > &alignedECs, Evo *evo, float consThres=0.0, int minAvLen=0)
 
void printStats ()
 
void printGeneRanges ()
 
void printExonCands ()
 
void printOrthoExons (list< OrthoExon > &orthoExonsList)
 
void computeOmegas (list< OrthoExon > &orthoExonsList, vector< AnnoSequence * > const &seqRanges, PhyloTree *ctree)
 
void computeOmegasEff (list< OrthoExon > &orthoExonsList, vector< AnnoSequence * > const &seqRanges, PhyloTree *ctree, ofstream *codonAli)
 
void computeClamsaEff (list< OrthoExon > &orthoExonsList, vector< AnnoSequence * > const &seqRanges, PhyloTree *ctree, ofstream *codonAli)
 
void computeClamsa (list< OrthoExon > &orthoExonsList, vector< AnnoSequence * > const &seqRanges, PhyloTree *ctree, ofstream *codonAli)
 
vector< string > pruneToBV (vector< string > *cs, bit_vector bv)
 
vector< int > pruneToBV (vector< int > *rfc, bit_vector bv)
 
double omegaForCodonTuple (vector< double > *loglik)
 
void printOmegaForCodon (string outdir)
 
void printCumOmega ()
 
void comparativeSignalScoring (list< OrthoExon > &orthoExonsList)
 
LocusTreeconstructTree ()
 
void calcConsScore (list< OrthoExon > &orthoExonsList, vector< AnnoSequence * > const &seqRanges, string outdir)
 
double calcColumnScore (int a, int c, int t, int g)
 
void consToWig (vector< double > &consScore, string outdir)
 
void printSingleOrthoExon (OrthoExon const &oe, bool files=true)
 
void collect_features (int species, list< OrthoExon > *hects, SpeciesGraph *speciesgraph)
 
void getAllOEMsas (int species, list< OrthoExon > *hects, unordered_map< string, int > *ref_class, vector< AnnoSequence * > const &seqRanges)
 
StringAlignment getMsa (OrthoExon const &oe, vector< AnnoSequence * > const &seqRanges, size_t flanking=0)
 

Static Public Member Functions

static void setTree (PhyloTree *t)
 
static void setCodonEvo (CodonEvo *c)
 
static void setCodonEvoDiscr (CodonEvoDiscr *c)
 
static int numSpecies ()
 
static void openOutputFiles (string outdir)
 
static void closeOutputFiles ()
 

Static Public Attributes

static int padding = 1000
 
static int orthoExonID = 1
 
static int geneRangeID = 1
 
static vector< int > exonCandID
 
static unordered_map< bit_vector, PhyloTree *, boost::hash< bit_vector > > topologies
 
static vector< ofstream * > exonCands_outfiles
 
static vector< ofstream * > orthoExons_outfiles
 
static vector< ofstream * > geneRanges_outfiles_bed
 
static vector< ofstream * > geneRanges_outfiles_gff
 
static vector< ofstream * > omega_outfiles
 
static map< vector< string >, pair< vector< double >, int > > computedCumValues
 

Detailed Description

multiple sequence alignment of genomes for comparative gene prediction

Author
Mario Stanke
Alexander Gebauer

Constructor & Destructor Documentation

◆ GeneMSA()

GeneMSA::GeneMSA ( RandSeqAccess rsa,
Alignment a 
)

construct the gene ranges now: simple copy. TODO: extend region when apparently part of the alignment is missing human ***********—******************* mouse *******----—******************- chicken ***********-------------------— ^ | extend range here

Member Function Documentation

◆ computeClamsaEff()

void GeneMSA::computeClamsaEff ( list< OrthoExon > &  orthoExonsList,
vector< AnnoSequence * > const &  seqRanges,
PhyloTree ctree,
ofstream *  codonAli 
)

computeClamsaEff

Parameters
[in]orthoExonsListall orthoExons in gene range
[in]seqRangethe sequences that were aligned
[in]ctreetree scaled for units of codon substitutions
See also
computeOmegasEff

◆ createOrthoExons()

void GeneMSA::createOrthoExons ( list< OrthoExon > &  orthoExonsList,
map< int_fast64_t, list< pair< int, ExonCandidate * > > > &  alignedECs,
Evo evo,
float  consThres = 0.0,
int  minAvLen = 0 
)

find all ortholog exon candidates, that are present in at least max(2, consThresh * m) where m <= numSpecies is the number of species that are present in the alignment Only report OrthoExons oe with at least 'minAvLen' as average length of the exon candidates in oe. ortholog exon candidates:

  • both splice sites align exactly
  • the exon candidate types agrees (single, rsingle, internal0, ...)
  • the phases at both boundaries agree (i.e. exon candidate types and length modulo 3) EC coordinates are region-based, as they are used in the OrthoGraph

createOrthoExons

◆ getAllOEMsas()

void GeneMSA::getAllOEMsas ( int  species,
list< OrthoExon > *  hects,
unordered_map< string, int > *  ref_class,
vector< AnnoSequence * > const &  seqRanges 
)

getAllOEMsas obtains and prints multiple sequence alignments (MSAs) and their label y=0,1, whether it constitutes a real CDS or not in the reference species.

Parameters
[in]speciesis the reference species index
[in]hectsortho exons to get alignments from
[in]ref_classassignment of CDS keys as 1 (= in reference anno) or 0.
[in]seqRangesthe aligned sequences

◆ getMsa()

StringAlignment GeneMSA::getMsa ( OrthoExon const &  oe,
vector< AnnoSequence * > const &  seqRanges,
size_t  flanking = 0 
)

getMsa obtains the alignment in string format, including unaligned insertions that are not part of any fragment.

Returns
msa vector of alignment rows
Parameters
[in]oethe OrthoExon whose alignment is sought
[in]seqRangescontains the alignment of the larger region tuple
[in]flankingallows to add padding on both sides, e.g. for sequence signals

The documentation for this class was generated from the following files: