Augustus 3.4.0
Loading...
Searching...
No Matches
randseqaccess.hh
1/*
2 * randseqaccess.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _RANDSEQACCESS
9#define _RANDSEQACCESS
10
11// project includes
12#include "gene.hh"
13#include "types.hh"
14#include "extrinsicinfo.hh"
15
16#include <map>
17#include <vector>
18#include <cstring>
19
20#ifdef M_MYSQL
21#include <mysql++/mysql++.h>
22#endif
23
24#ifdef M_SQLITE
25#include "sqliteDB.hh"
26#endif
27
40public:
41 FeatureCollection* getFeatureCollection(string speciesname);
42 int getGroupID(string speciesname);
43 void addSpeciesToGroup(string skey, int groupID);
44 bool withEvidence(string speciesname){return getGroupID(speciesname)>0;}
45 // reading in the extrinsicCfgFile and hintsFile
46 void readGFFFile(const char* filename);
47 void readExtrinsicCFGFile(vector<string> &speciesNames);
48private:
49 map<int,FeatureCollection> speciesColl; // maps the group number to a FeatureCollection
50 map<string,int> groupIDs; // maps the speciesname to the group number
51 FeatureCollection defaultColl; // default FeatureColleciton
52 static int groupCount; // number of groups
53};
54
63public:
64 int getNumSpecies() {return numSpecies;}
65 void setLength(int idx, string chrName, int len);
66 int getChrLen(int idx, string chrName);
67 void setSpeciesNames(vector<string> speciesNames);
68 string getSname(size_t idx) {return speciesNames[idx];}
69 int getMaxSnameLen(); // for neat indentation into right column
70 int getIdx(string speciesname);
71 void printStats();
72 bool withEvidence(string speciesname) {return extrinsicFeatures.withEvidence(speciesname);}
73 virtual AnnoSequence* getSeq(string speciesname, string chrName, int start, int end, Strand strand) = 0;
74 AnnoSequence* getSeq(size_t speciesIdx, string chrName, int start, int end, Strand strand) {
75 return getSeq(getSname(speciesIdx), chrName, start, end, strand);
76 }
77 virtual SequenceFeatureCollection* getFeatures(string speciesname, string chrName, int start, int end, Strand strand) = 0;
78 virtual ~RandSeqAccess() {}
79protected:
80 RandSeqAccess() {};
81 int numSpecies;
82 vector<map<string,int> > chrLen;
83 vector<string> speciesNames;
84 map<string, size_t> speciesIndex; // to quickly access the index for a given species name
85 SpeciesCollection extrinsicFeatures; // all hints
86};
87
95public:
96 MemSeqAccess(vector<string> s);
97 ~MemSeqAccess(){} // TODO: delete DNA sequences from 'sequences' map
98 AnnoSequence* getSeq(string speciesname, string chrName, int start, int end, Strand strand);
99 SequenceFeatureCollection* getFeatures(string speciesname, string chrName, int start, int end, Strand strand);
100 void open(){}
101private:
102 map<string,string> filenames;
103 map<string,char*> sequences; //keys: speciesname:chrName values: dna sequence
104};
105
106
107/*
108 * read an input file of format:
109 * human <TAB> /dir/to/genome/genome.fa
110 * Mus musculus <TAB> /dir/to/genome/mouse.fa
111 * to a map
112 */
113map<string,string> getFileNames (string listfile);
114
122public:
123 virtual AnnoSequence* getSeq(string speciesname, string chrName, int start, int end, Strand strand)=0;
124 virtual SequenceFeatureCollection* getFeatures(string speciesname, string chrName, int start, int end, Strand strand)=0;
125 virtual ~DbSeqAccess() {}
126
127protected:
128 DbSeqAccess(vector<string> s = vector<string>());
129 string dbaccess;
130
131};
132
133#ifdef M_MYSQL
134class MysqlAccess : public DbSeqAccess {
135public:
136 MysqlAccess(vector<string> s = vector<string>()) : DbSeqAccess(s){
137 open();
138 }
139 ~MysqlAccess() {}
140 AnnoSequence* getSeq(string speciesname, string chrName, int start, int end, Strand strand);
141 // the following function is for the BGI-style database
142 AnnoSequence* getSeq2(string speciesname, string chrName, int start, int end, Strand strand);
143 SequenceFeatureCollection* getFeatures(string speciesname, string chrName, int start, int end, Strand strand);
144 void open();
145 int split_dbaccess();
146 void connect_db(ostream& out=cout);
147 template<class T>
148 AnnoSequence* getNextDBSequence(string charName, int start, int end, vector<T>& asm_query_region);
149 // template<class T>
150 // AnnoSequence* getDBSequenceList(string charName,int start,int end,vector<T>& asm_query_region);
151 template<class T>
152 int get_region_coord(int seq_region_id, int start, int end, vector<T>& asm_query_region);
153
154private:
155 mysqlpp::Connection con;
156 vector<string> db_information;
157};
158#endif // M_MYSQL
159
160#ifdef M_SQLITE
161
168class SQLiteAccess : public DbSeqAccess {
169public:
170 SQLiteAccess(const char* f, vector<string> s = vector<string>()) : DbSeqAccess(s), db(f) {
171 filenames = getFileNames (Constant::speciesfilenames);
172 }
173 ~SQLiteAccess() {}
174 AnnoSequence* getSeq(string speciesname, string chrName, int start, int end, Strand strand);
175 SequenceFeatureCollection* getFeatures(string speciesname, string chrName, int start, int end, Strand strand);
176private:
177 SQLiteDB db;
178 map<string,string> filenames;
179};
180#endif // M_SQLITE
181
182#endif // _RANDSEQACCESS
Definition gene.hh:548
Random access to sequence segments through a database.
Definition randseqaccess.hh:121
Definition extrinsicinfo.hh:314
Achieve random access by simply storing all genomes in memory and then retrieving the required substr...
Definition randseqaccess.hh:94
abstract class for quick access to an arbitrary sequence segment in genomes needed for comparative ge...
Definition randseqaccess.hh:62
Definition sqliteDB.hh:42
holds all extrinsic feature information for one sequence
Definition extrinsicinfo.hh:86
SpeciesCollection holds all extrinsic evidence given for the species.
Definition randseqaccess.hh:39