Augustus 3.4.0
Loading...
Searching...
No Matches
motif.hh
1/*
2 * motif.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _MOTIF_HH
9#define _MOTIF_HH
10
11// project includes
12#include "matrix.hh"
13#include "geneticcode.hh" // for Seq2Int
14
28enum WeighingType { equalWeights, gcContentClasses, multiNormalKernel};
29
34public:
35 int a;
36 int c;
37 int g;
38 int t;
39 static WeighingType weithType;
40 static Matrix<double> weighingMatrix;
41
42 double ra, rc, rg, rt;
43 BaseCount();
44 BaseCount(int a, int c, int g, int t);
45 BaseCount(const char *sequence, int len=-1);
46 ~BaseCount() {};
47 static void init();
48 void normalize();
49 void addSequence(const char *sequence, int len);
50 void addCharacter(char nucleotide, bool subtract=false);
51 void reverse();
52 static int weight( BaseCount bc1, BaseCount bc2);
53 static double doubleWeight( BaseCount bc1, BaseCount bc2);
54 static int gcContentWeight(BaseCount bc1, BaseCount bc2);
55 static int gcContentClass (double gcContent);
56 static int gcContentClassWeight(BaseCount bc1, BaseCount bc2);
57 static double multiNormalKernelWeight(BaseCount bc1, BaseCount bc2);
58 static void setWeightMatrix(string matrixFileName);
59 static double phi(double x, double sigma);
60
61};
62
63ostream& operator<<( ostream& out, const BaseCount& bc );
64
69 double a;
70 double c;
71 double g;
72 double t;
73
74 Composition() {
75 a = 0.0;
76 c = 0.0;
77 g = 0.0;
78 t = 0.0;
79 }
81 double sum = bc.a + bc.c + bc.g + bc.t;
82 a = bc.a / sum;
83 c = bc.c / sum;
84 g = bc.g / sum;
85 t = bc.t / sum;
86 }
87};
88
92class Motif {
93public:
94 int n;
95 int k;
96 int numSeqs; // number of unweighed sequences
97 int neighbors;
98 int pseudocount;
99
100 Motif() :
101 n(0), k(0), neighbors(0),
102 pseudocount(1),
103 windowProbs(NULL),
104 windowCounts(NULL),
105 s2i(0) {}
106 Motif & operator = (const Motif & other);
107
108 Motif(int length, int memory=0, int pseudocount = 1, int neighbors = 0);
109 ~Motif();
110 /*
111 * add one sequence to the training set of the motif
112 * seq is the beginning of the motiv, but
113 * seq[-k] ... seq[n-1] or seq[0] ... seq[n+k-1] (reverse case) must be accessible!
114 */
115 void addSequence(const char* seq, int weight = 1, bool reverse=false);
116 void makeProbs();
117 void printProbs();
118 Double seqProb(const char* seq, bool reverse=false, bool complement=false);
119 void write(ofstream &out);
120 void read(ifstream &in);
121 void clearCounts();
122 char* getSampleDNA();
123 Double getProbThreshold (double q, int numSamples = 10000);
124private:
125 vector<Double> *windowProbs;
126 vector<int> *windowCounts;
127 Seq2Int s2i;
128};
129
134public:
135 int n;
136 BaseCount *zus;
137 ContentDecomposition() : n(0), zus(0) {
138 setProperties();
139 }
140 BaseCount getBaseCount(int i);
141 int getNearestBaseCountIndex(BaseCount bc);
142private:
143 void setProperties();
144 void makeDecomposition();
145};
146
153public:
156 void computeStairs(const char* dna);
157 int getNextStep(int from);
158 int *idx; // GC content class index for each position of dna
159 const char *dna; // just a pointer, to check whether update is necessary
160 int n; // dna length
161 map<int,int> nextStep;
162private:
163 int GCwinsize;
164};
165
166#endif
Definition motif.hh:33
Definition motif.hh:133
holds the stepwise constant function of GC content class indices
Definition motif.hh:152
This class implements a double object with a very large range.
Definition lldouble.hh:31
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Definition motif.hh:92
a class for converting sequence into integer replacing Base4Int
Definition geneticcode.hh:163
Definition motif.hh:68