00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027
00028 #include <openbabel/plugin.h>
00029
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033
00034 namespace OpenBabel
00035 {
00036 class OBBase;
00037
00039 class OBFPRT OBFingerprint : public OBPlugin
00040 {
00041
00042
00043 MAKE_PLUGIN(OBFingerprint)
00044
00045 const char* TypeID()
00046 {
00047 return "fingerprints";
00048 }
00049
00050
00051 public:
00052
00053 virtual ~OBFingerprint(){}
00054
00056 void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
00057
00059 bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
00060
00062 void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
00063
00065 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00066
00068 enum FptFlag{FPT_UNIQUEBITS=1};
00069 virtual unsigned int Flags() { return 0;};
00070
00073 virtual std::string DescribeBits(const std:: vector<unsigned int> fp, bool bSet=true)
00074 {
00075 std::string txt("");
00076 return txt;
00077 }
00078
00080 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00081
00083 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
00084 {
00086 int andbits=0, orbits=0;
00087 unsigned int i;
00088 for (i=0;i<vec1.size();++i)
00089 {
00090 int andfp = vec1[i] & p2[i];
00091 int orfp = vec1[i] | p2[i];
00092
00093 #ifdef __GNUC__
00094 andbits += __builtin_popcount(andfp);
00095 orbits += __builtin_popcount(orfp);
00096 #else
00097 for(;andfp;andfp=andfp<<1)
00098 if(andfp<0) ++andbits;
00099 for(;orfp;orfp=orfp<<1)
00100 if(orfp<0) ++orbits;
00101 #endif
00102 }
00103 return((double)andbits/(double)orbits);
00104 };
00105
00106 static unsigned int Getbitsperint(){ return bitsperint; }
00107
00108 private:
00110 struct bit_or
00111 {
00112 unsigned int operator()(const unsigned int a, const unsigned int b)
00113 {
00114 return a | b;
00115 }
00116 };
00117
00118
00119 public:
00122 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
00123
00124 private:
00125 static const unsigned int bitsperint;
00126 };
00127
00128
00131 struct OBFPRT FptIndexHeader
00132 {
00133 unsigned int headerlength;
00134 unsigned int nEntries;
00135 unsigned int words;
00136 char fpid[16];
00137 char datafilename[256];
00138 };
00139
00142 struct OBFPRT FptIndex
00143 {
00144 FptIndexHeader header;
00145 std::vector<unsigned int> fptdata;
00146 std::vector<unsigned int> seekdata;
00147 bool Read(std::istream* pIndexstream);
00149 OBFingerprint* CheckFP();
00150 };
00151
00154 class OBFPRT FastSearch
00155 {
00156
00157 public:
00159 std::string ReadIndexFile(std::string IndexFilename);
00160 std::string ReadIndex(std::istream* pIndexstream);
00161
00162 virtual ~FastSearch(){};
00163
00165 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00166
00169 bool FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00170 unsigned int MaxCandidates);
00171
00174 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00175 double MinTani);
00176
00179 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00180 int nCandidates=0);
00181
00183 OBFingerprint* GetFingerprint() const{ return _pFP;};
00184
00186 const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00187
00188 private:
00189 FptIndex _index;
00190 OBFingerprint* _pFP;
00191 };
00192
00195 class OBFPRT FastSearchIndexer
00196 {
00197
00198 public:
00200 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00201 int FptBits=0);
00202
00204 FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00205
00206 ~FastSearchIndexer();
00207
00209 bool Add(OBBase* pOb, std::streampos seekpos);
00210
00211 private:
00212 std::ostream* _indexstream;
00213 FptIndex* _pindex;
00214 OBFingerprint* _pFP;
00215 int _nbits;
00216 };
00217
00218 }
00219 #endif
00220