00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027
00028 namespace OpenBabel
00029 {
00030 class OBBase;
00031
00033 class OBAPI OBFingerprint
00034 {
00035
00036 public:
00038 void SetBit(std::vector<unsigned int>& vec, unsigned int n);
00039
00041 void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
00042
00044 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00045
00047 virtual std::string Description()=0;
00048
00050 enum FptFlag{FPT_UNIQUEBITS=1};
00051 virtual unsigned int Flags() { return 0;};
00052
00054 static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt);
00055
00057 static OBFingerprint* FindFingerprint(std::string& ID);
00058
00060 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00061
00063 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
00064 {
00066 int andbits=0, orbits=0;
00067 unsigned int i;
00068 for (i=0;i<vec1.size();++i)
00069 {
00070 int andfp = vec1[i] & p2[i];
00071 int orfp = vec1[i] | p2[i];
00072
00073 for(;andfp;andfp=andfp<<1)
00074 if(andfp<0) ++andbits;
00075 for(;orfp;orfp=orfp<<1)
00076 if(orfp<0) ++orbits;
00077 }
00078 return((double)andbits/(double)orbits);
00079 };
00080
00081 static unsigned int Getbitsperint(){ return bitsperint; }
00082
00083 private:
00085 struct bit_or
00086 {
00087 unsigned int operator()(const unsigned int a, const unsigned int b)
00088 {
00089 return a | b;
00090 }
00091 };
00092
00093 typedef std::map<std::string, OBFingerprint*> FPMapType;
00094 typedef FPMapType::iterator Fptpos;
00095
00096 protected:
00101 static FPMapType& FPtsMap()
00102 {
00103 static FPMapType* fptm = NULL;
00104 if (!fptm)
00105 fptm = new FPMapType;
00106 return *fptm;
00107 };
00108
00109 OBFingerprint(std::string ID, bool IsDefault=false)
00110 {
00111 FPtsMap()[ID] = this;
00112 if(IsDefault || FPtsMap().empty())
00113 _pDefault=this;
00114 };
00115
00116 private:
00117 static OBFingerprint* _pDefault;
00118 static const unsigned int bitsperint;
00119 static int rubbish;
00120 };
00121
00122
00123
00124
00125
00126
00128 struct OBAPI FptIndexHeader
00129 {
00130 unsigned int headerlength;
00131 unsigned int nEntries;
00132 unsigned int words;
00133 char fpid[16];
00134 char datafilename[256];
00135 };
00137 struct OBAPI FptIndex
00138 {
00139 FptIndexHeader header;
00140 std::vector<unsigned int> fptdata;
00141 std::vector<unsigned int> seekdata;
00142 bool Read(std::istream* pIndexstream);
00144 OBFingerprint* CheckFP();
00145 };
00146
00148 class OBAPI FastSearch
00149 {
00150
00151 public:
00152 std::string ReadIndex(std::istream* pIndexstream);
00153 virtual ~FastSearch(){};
00154
00156 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00157
00160 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00161 double MinTani);
00162
00165 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00166 int nCandidates=0);
00167
00169 OBFingerprint* GetFingerprint() const{ return _pFP;};
00170
00171 private:
00172 FptIndex _index;
00173 OBFingerprint* _pFP;
00174 };
00175
00176
00178 class OBAPI FastSearchIndexer
00179 {
00180
00181 public:
00183 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00184 int FptBits=0);
00185
00187 FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00188
00189 ~FastSearchIndexer();
00190
00192 bool Add(OBBase* pOb, std::streampos seekpos);
00193
00194 private:
00195 std::ostream* _indexstream;
00196 FptIndex* _pindex;
00197 OBFingerprint* _pFP;
00198 int _nbits;
00199 };
00200
00201 }
00202 #endif
00203