Augustus 3.4.0
Loading...
Searching...
No Matches
genbank.hh
1/*
2 * genbank.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _GENBANK_HH
9#define _GENBANK_HH
10
11// project includes
12#include "types.hh"
13#include "gene.hh"
14
15// standard C/C++ includes
16#include <list>
17#include <fstream>
18
19#ifdef ZIPINPUT
20#include <boost/iostreams/filtering_stream.hpp>
21#include <boost/iostreams/filter/gzip.hpp>
22#include <boost/iostreams/copy.hpp>
23#endif
24
25#define GBMAXLINELEN 40000
26
27enum FileType {unknown, genbank, fasta};
28
34class GBError : public ProjectError{
35public:
41 GBError( string msg ) : ProjectError( msg ) { }
42};
43
50public:
51 State *ranges;
52 int begin, end;
53 string geneid;
54 string fkey;
55 Strand strand;
56 bool complete_l, complete_r; // complete at the left end, right end?
57 GBFeature(){
58 ranges = NULL;
59 begin=end = -1;
60 geneid = "";
61 strand = plusstrand;
62 complete_l = complete_r=true;
63 }
64 GBFeature(const char *);
65 bool checkRange(int len);
66 bool operator<(const GBFeature &other) const{
67 return (begin<other.begin || (begin==other.begin && end<other.end));
68 }
69 bool matches(GBFeature &other);
70};
71
72
73//========================================================================
74
85 char* buffer;
87 char* seqbegin;
89 int length;
90 int seqlength;
92 list<char*> CDSentry;
94 list<char*> mRNAentry;
95 list<GBFeature> CDS;
96 list<GBFeature> mRNA;
97};
98
99//========================================================================
100
101
102
103
104//========================================================================
105
113public:
114 GBSplitter( string fname );
115 ~GBSplitter( );
116 void determineFileType();
117 GBPositions* nextData( );
118 AnnoSequence *getNextFASTASequence( );
119 void clear() {sin.clear(); sin.str(""); ifstrm.close();}
120 FileType ftype;
121private:
122 Boolean findPositions( GBPositions& pos );
123 Boolean gotoEnd( );
124private:
125 ifstream ifstrm;
126 std::stringstream sin;
127};
128
129//========================================================================
130
138public:
142 GBProcessor(string filename);
143
144 FileType fileType() {
145 return gbs.ftype;
146 }
147
151 GBPositions* nextPosition();
160 AnnoSequence* getAnnoSequence( GBPositions* pos );
161 //Gene* getGeneList();
162 AnnoSequence* getAnnoSequenceList();
163 AnnoSequence* getSequenceList();
164private:
165 char* getSequence( GBPositions& pos );
166 char* getJoin( const char* pos, Strand &strand, char *& genename );
167private:
169 GBSplitter gbs;
170 int gbVerbosity;
171};
172
173#endif // _GENBANK_HH
174
Definition gene.hh:548
Exception class for the GenBank classes.
Definition genbank.hh:34
GBError(string msg)
Definition genbank.hh:41
a genbank feature entry (e.g. CDS, mRNA, TATA_signal)
Definition genbank.hh:49
A GenBank data processor.
Definition genbank.hh:137
Gene * getGene(GBPositions *pos)
A GenBank database splitter.
Definition genbank.hh:112
Definition gene.hh:351
Definition types.hh:449
Definition gene.hh:101
A genbank data structure with the information about a gene.
Definition genbank.hh:83
list< char * > mRNAentry
A list of pointers in 'buffer' on the "mRNA" positions.
Definition genbank.hh:94
int length
The 'buffer' length.
Definition genbank.hh:89
char * seqbegin
Pointer in 'buffer' on the "ORIGIN" position.
Definition genbank.hh:87
char * buffer
The entire information in GenBank format.
Definition genbank.hh:85
list< char * > CDSentry
A list of pointers in 'buffer' on the "CDS" positions.
Definition genbank.hh:92