00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027
00028 #include <openbabel/pluginiter.h>
00029
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033
00034 namespace OpenBabel
00035 {
00036 class OBBase;
00037
00039 class OBFPRT OBFingerprint
00040 {
00041
00042
00043 MAKE_PLUGIN(OBFingerprint);
00044
00045 public:
00046
00047 virtual ~OBFingerprint(){}
00048
00050 void SetBit(std::vector<unsigned int>& vec, unsigned int n);
00051
00053 void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
00054
00056 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00057
00059 virtual std::string Description()=0;
00060
00062 enum FptFlag{FPT_UNIQUEBITS=1};
00063 virtual unsigned int Flags() { return 0;};
00064
00065
00066
00067
00068
00070 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00071
00073 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
00074 {
00076 int andbits=0, orbits=0;
00077 unsigned int i;
00078 for (i=0;i<vec1.size();++i)
00079 {
00080 int andfp = vec1[i] & p2[i];
00081 int orfp = vec1[i] | p2[i];
00082
00083 for(;andfp;andfp=andfp<<1)
00084 if(andfp<0) ++andbits;
00085 for(;orfp;orfp=orfp<<1)
00086 if(orfp<0) ++orbits;
00087 }
00088 return((double)andbits/(double)orbits);
00089 };
00090
00091 static unsigned int Getbitsperint(){ return bitsperint; }
00092
00093 private:
00095 struct bit_or
00096 {
00097 unsigned int operator()(const unsigned int a, const unsigned int b)
00098 {
00099 return a | b;
00100 }
00101 };
00102
00103
00104 public:
00107 static OBFingerprint* FindFingerprint(const std::string& ID){ return Iter().FindType(ID);}
00108
00109 private:
00110 static const unsigned int bitsperint;
00111 };
00112
00113
00114
00117 struct OBFPRT FptIndexHeader
00118 {
00119 unsigned int headerlength;
00120 unsigned int nEntries;
00121 unsigned int words;
00122 char fpid[16];
00123 char datafilename[256];
00124 };
00125
00128 struct OBFPRT FptIndex
00129 {
00130 FptIndexHeader header;
00131 std::vector<unsigned int> fptdata;
00132 std::vector<unsigned int> seekdata;
00133 bool Read(std::istream* pIndexstream);
00135 OBFingerprint* CheckFP();
00136 };
00137
00140 class OBFPRT FastSearch
00141 {
00142
00143 public:
00145 std::string ReadIndexFile(std::string IndexFilename);
00146 std::string ReadIndex(std::istream* pIndexstream);
00147
00148 virtual ~FastSearch(){};
00149
00151 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00152
00155 bool FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00156 unsigned int MaxCandidates);
00157
00160 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00161 double MinTani);
00162
00165 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00166 int nCandidates=0);
00167
00169 OBFingerprint* GetFingerprint() const{ return _pFP;};
00170
00172 const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00173
00174 private:
00175 FptIndex _index;
00176 OBFingerprint* _pFP;
00177 };
00178
00179
00182 class OBFPRT FastSearchIndexer
00183 {
00184
00185 public:
00187 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00188 int FptBits=0);
00189
00191 FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00192
00193 ~FastSearchIndexer();
00194
00196 bool Add(OBBase* pOb, std::streampos seekpos);
00197
00198 private:
00199 std::ostream* _indexstream;
00200 FptIndex* _pindex;
00201 OBFingerprint* _pFP;
00202 int _nbits;
00203 };
00204
00205 }
00206 #endif
00207