8#ifndef _GENETIC_CODE_HH
9#define _GENETIC_CODE_HH
13#include "projectio.hh"
25#define NUM_TRANSTABS 24
30#define DECLARE_ON(NAME, PATTERN, COUNT) \
31 inline bool NAME(const char* dna) { \
32 return strncmp(dna, PATTERN, COUNT) == 0; \
35#define DSS_SEQUENCE "gt"
36#define RDSS_SEQUENCE "ac"
37DECLARE_ON(onDSS, DSS_SEQUENCE, 2)
38DECLARE_ON(onRDSS, RDSS_SEQUENCE, 2)
41#define ALT_DSS_SEQUENCE "gc"
42#define ALT_RDSS_SEQUENCE "gc"
43DECLARE_ON(onAltDSS, ALT_DSS_SEQUENCE, 2)
44DECLARE_ON(onAltRDSS, ALT_RDSS_SEQUENCE, 2)
47inline
bool onGenDSS(const
char* dna) {
49 onDSS(dna) || (Constant::dss_gc_allowed && onAltDSS(dna));
51inline bool onGenRDSS(
const char* dna) {
53 onRDSS(dna) || (Constant::dss_gc_allowed && onAltRDSS(dna));
57#define ASS_SEQUENCE "ag"
58#define RASS_SEQUENCE "ct"
59DECLARE_ON(onASS, ASS_SEQUENCE, 2)
60DECLARE_ON(onRASS, RASS_SEQUENCE, 2)
63#define STARTCODON "atg"
64#define RCSTARTCODON "cat"
65DECLARE_ON(onStart, STARTCODON, 3)
66DECLARE_ON(onRStart, RCSTARTCODON, 3)
69DECLARE_ON(onOchre, OCHRECODON, 3)
70DECLARE_ON(onAmber, AMBERCODON, 3)
71DECLARE_ON(onOpal, OPALCODON, 3)
72DECLARE_ON(onROchre, RCOCHRECODON, 3)
73DECLARE_ON(onRAmber, RCAMBERCODON, 3)
74DECLARE_ON(onROpal, RCOPALCODON, 3)
83inline
char wcComplement(
char c) {
107template <
class Iterator>
108inline void putReverseComplement(Iterator result,
const char* dna,
int len) {
109 const char* s = dna + len;
112 *t++ = wcComplement(*s);
122inline char* reverseComplement(
const char* dna) {
125 int len = strlen(dna);
126 char* result =
new char[len+1];
127 putReverseComplement(result, dna, len);
132inline void reverseComplementString(
string &text) {
133 int n = text.length();
134 for (
int i=0; i < n/2; i++) {
137 text[i] = wcComplement(text[n-i-1]);
138 text[n-i-1] = wcComplement(c);
141 text[n/2] = wcComplement(text[n/2]);
145inline void reverseString(
string &text) {
147 int n = text.length();
151 text[i] = text[n-i-1];
166 int operator() (
const char* s)
const {
168 for(
int i = 0; i < size; i++ ){
170 erg |= base2int(s[i]);
174 int rc(
const char* s)
const {
176 for (
int i=0; i<size; i++)
177 erg |= base2int(wcComplement(s[i])) << (2*i);
180 int rev(
const char* s)
const {
182 for (
int i=0; i<size; i++)
183 erg |= base2int(s[i]) << (2*i);
186 string inv(
int pn)
const {
187 string result(size,
'n');
188 for(
int i = size-1; i>=0; i--) {
189 result[i] = int2base(pn%4);
194 string INV(
int pn)
const {
195 string result(size,
'N');
196 for(
int i = size-1; i>=0; i--) {
197 result[i] = int2BASE(pn%4);
203 int read(istream& strm)
const {
204 char* buf =
new char[size];
209 for(
int i = 0; i < size; i++ ){
213 if( c ==
'A' || c ==
'C' || c ==
'G' || c ==
'T' )
216 int result = (*this)(buf);
222 static int base2int(
char c);
223 static char int2base(
int i);
224 static char int2BASE(
int i);
228inline int Seq2Int::base2int(
char c) {
243inline char Seq2Int::int2base(
int i) {
254 throw ProjectError(
"Seq2Int::int2base: internal error: i=" + itoa(i));
258inline char Seq2Int::int2BASE(
int i) {
269 throw ProjectError(
"Seq2Int::int2base: internal error: i=" + itoa(i));
278 ORF() { start = end = -1; complete5prime = complete3prime = 0; strand = plusstrand; }
279 int len() {
return abs(end - start) + 1;}
295 chooseTranslationTable(1);
297 static void printReverseGeneticMap();
301 static void reverseMap();
302 static const char aa_symbols_with_stop[];
303 static bool start_codons[];
304 static Double start_codon_probs[];
305 static const char *
const TranslationTables[];
306 static const char *
const StartCodons[];
308 static int translationtable;
309 static int numStartCodons;
311 static void chooseTranslationTable(
int );
312 static const char*
const aa_symbols;
313 static const char*
const aa_names[];
315 static int **syncodons;
316 static int *codonsOfAA;
317 static int get_aa_from_symbol(
char c) {
320 return string(GeneticCode::aa_symbols_with_stop).find(c)-1;
322 static char translate(
int n) {
323 return aa_symbols_with_stop[map[n]+1];
325 static char translate(
const char* t);
326 static char revtranslate(
const char* t);
327 static bool isStopcodon(
const char* t) {
328 return translate(t)==
'*';
330 static bool isStartcodon(
const char* t,
bool rc=
false){
333 return start_codons[codon.rc(t)];
335 return start_codons[codon(t)];
339 static bool isStartcodon(
int pn){
340 return start_codons[pn];
342 static Double startCodonProb(
const char* t,
bool rc=
false){
344 int pn = rc? codon.rc(t) : codon(t);
345 if (start_codons[pn])
346 return start_codon_probs[pn];
350 static Double startCodonProb(
int pn){
351 return start_codon_probs[pn];
353 static bool isRCStopcodon(
const char* t) {
354 return revtranslate(t)==
'*';
356 static bool containsInFrameStopcodon(
const char*,
int,
int,
bool,
int);
357 static void printStartCodons();
358 static void trainStartCodonProbs(
int startcounts[]);
359 static void writeStart(ofstream &out);
360 static void readStart(ifstream &in);
361 static bool is_purine(
int b){
362 return (b==0 || b==2);
364 static ORF longestORF(
const char* dna);
368char *getSampledCDS(vector<Double> *emiprobs,
int k,
int numCodons);
The genetic code maps codons to amino acids.
Definition geneticcode.hh:292
This class implements a double object with a very large range.
Definition lldouble.hh:31
Definition geneticcode.hh:276
a class for converting sequence into integer replacing Base4Int
Definition geneticcode.hh:163