Augustus
3.4.0
Loading...
Searching...
No Matches
include
motif.hh
1
/*
2
* motif.hh
3
*
4
* License: Artistic License, see file LICENSE.TXT or
5
* https://opensource.org/licenses/artistic-license-1.0
6
*/
7
8
#ifndef _MOTIF_HH
9
#define _MOTIF_HH
10
11
// project includes
12
#include "matrix.hh"
13
#include "geneticcode.hh"
// for Seq2Int
14
28
enum
WeighingType { equalWeights, gcContentClasses, multiNormalKernel};
29
33
class
BaseCount
{
34
public
:
35
int
a;
36
int
c;
37
int
g;
38
int
t;
39
static
WeighingType weithType;
40
static
Matrix<double>
weighingMatrix;
41
42
double
ra, rc, rg, rt;
43
BaseCount
();
44
BaseCount
(
int
a,
int
c,
int
g,
int
t);
45
BaseCount
(
const
char
*sequence,
int
len=-1);
46
~BaseCount
() {};
47
static
void
init();
48
void
normalize();
49
void
addSequence(
const
char
*sequence,
int
len);
50
void
addCharacter(
char
nucleotide,
bool
subtract=
false
);
51
void
reverse();
52
static
int
weight(
BaseCount
bc1,
BaseCount
bc2);
53
static
double
doubleWeight(
BaseCount
bc1,
BaseCount
bc2);
54
static
int
gcContentWeight(
BaseCount
bc1,
BaseCount
bc2);
55
static
int
gcContentClass (
double
gcContent);
56
static
int
gcContentClassWeight(
BaseCount
bc1,
BaseCount
bc2);
57
static
double
multiNormalKernelWeight(
BaseCount
bc1,
BaseCount
bc2);
58
static
void
setWeightMatrix(
string
matrixFileName);
59
static
double
phi(
double
x,
double
sigma);
60
61
};
62
63
ostream& operator<<( ostream& out,
const
BaseCount
& bc );
64
68
struct
Composition
{
69
double
a;
70
double
c;
71
double
g;
72
double
t;
73
74
Composition
() {
75
a = 0.0;
76
c = 0.0;
77
g = 0.0;
78
t = 0.0;
79
}
80
Composition
(
BaseCount
bc) {
81
double
sum = bc.a + bc.c + bc.g + bc.t;
82
a = bc.a / sum;
83
c = bc.c / sum;
84
g = bc.g / sum;
85
t = bc.t / sum;
86
}
87
};
88
92
class
Motif
{
93
public
:
94
int
n;
95
int
k;
96
int
numSeqs;
// number of unweighed sequences
97
int
neighbors;
98
int
pseudocount;
99
100
Motif
() :
101
n(0), k(0), neighbors(0),
102
pseudocount(1),
103
windowProbs(NULL),
104
windowCounts(NULL),
105
s2i(0) {}
106
Motif
& operator = (
const
Motif
& other);
107
108
Motif
(
int
length,
int
memory=0,
int
pseudocount = 1,
int
neighbors = 0);
109
~Motif
();
110
/*
111
* add one sequence to the training set of the motif
112
* seq is the beginning of the motiv, but
113
* seq[-k] ... seq[n-1] or seq[0] ... seq[n+k-1] (reverse case) must be accessible!
114
*/
115
void
addSequence(
const
char
* seq,
int
weight = 1,
bool
reverse=
false
);
116
void
makeProbs();
117
void
printProbs();
118
Double
seqProb(
const
char
* seq,
bool
reverse=
false
,
bool
complement=
false
);
119
void
write(ofstream &out);
120
void
read(ifstream &in);
121
void
clearCounts();
122
char
* getSampleDNA();
123
Double
getProbThreshold (
double
q,
int
numSamples = 10000);
124
private
:
125
vector<Double> *windowProbs;
126
vector<int> *windowCounts;
127
Seq2Int
s2i;
128
};
129
133
class
ContentDecomposition
{
134
public
:
135
int
n;
136
BaseCount
*zus;
137
ContentDecomposition
() : n(0), zus(0) {
138
setProperties();
139
}
140
BaseCount
getBaseCount(
int
i);
141
int
getNearestBaseCountIndex(
BaseCount
bc);
142
private
:
143
void
setProperties();
144
void
makeDecomposition();
145
};
146
152
class
ContentStairs
{
153
public
:
154
ContentStairs
();
155
~ContentStairs
();
156
void
computeStairs(
const
char
* dna);
157
int
getNextStep(
int
from);
158
int
*idx;
// GC content class index for each position of dna
159
const
char
*dna;
// just a pointer, to check whether update is necessary
160
int
n;
// dna length
161
map<int,int> nextStep;
162
private
:
163
int
GCwinsize;
164
};
165
166
#endif
BaseCount
Definition
motif.hh:33
ContentDecomposition
Definition
motif.hh:133
ContentStairs
holds the stepwise constant function of GC content class indices
Definition
motif.hh:152
LLDouble
This class implements a double object with a very large range.
Definition
lldouble.hh:31
Matrix
A simple matrix class. Base class for all mathematical matrix objects.
Definition
matrix.hh:27
Motif
Definition
motif.hh:92
Seq2Int
a class for converting sequence into integer replacing Base4Int
Definition
geneticcode.hh:163
Composition
Definition
motif.hh:68
Generated by
1.9.8