00001 /********************************************************************** 00002 fingerprint.h - Base class for fingerprints and fast searching 00003 00004 Copyright (C) 2005 by Chris Morley 00005 00006 This file is part of the Open Babel project. 00007 For more information, see <http://openbabel.sourceforge.net/> 00008 00009 This program is free software; you can redistribute it and/or modify 00010 it under the terms of the GNU General Public License as published by 00011 the Free Software Foundation version 2 of the License. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 ***********************************************************************/ 00018 00019 #ifndef OB_FINGERPRINT_H 00020 #define OB_FINGERPRINT_H 00021 00022 #include <list> 00023 #include <map> 00024 #include <set> 00025 #include <vector> 00026 #include <string> 00027 00028 #include <openbabel/plugin.h> 00029 00030 #ifndef OBFPRT 00031 #define OBFPRT 00032 #endif 00033 00034 namespace OpenBabel 00035 { 00036 class OBBase; //Forward declaration; used only as pointer. 00037 00039 class OBFPRT OBFingerprint : public OBPlugin 00040 { 00041 //see end of cpp file for detailed documentation 00042 00043 MAKE_PLUGIN(OBFingerprint) 00044 00045 const char* TypeID() 00046 { 00047 return "fingerprints"; 00048 } 00049 00050 //Rest of OBFingerprints declarations 00051 public: 00052 00053 virtual ~OBFingerprint(){} 00054 00056 void SetBit(std::vector<unsigned int>& vec, const unsigned int n); 00057 00059 bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n); 00060 00062 void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 00063 00065 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0; 00066 00068 enum FptFlag{FPT_UNIQUEBITS=1}; 00069 virtual unsigned int Flags() { return 0;}; 00070 00073 virtual std::string DescribeBits(const std:: vector<unsigned int> fp, bool bSet=true) 00074 { 00075 std::string txt(""); 00076 return txt; 00077 } 00078 00080 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2); 00081 00083 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 00084 { 00086 int andbits=0, orbits=0; 00087 unsigned int i; 00088 for (i=0;i<vec1.size();++i) 00089 { 00090 int andfp = vec1[i] & p2[i]; 00091 int orfp = vec1[i] | p2[i]; 00092 // Count bits 00093 #ifdef __GNUC__ 00094 andbits += __builtin_popcount(andfp); 00095 orbits += __builtin_popcount(orfp); 00096 #else 00097 for(;andfp;andfp=andfp<<1) 00098 if(andfp<0) ++andbits; 00099 for(;orfp;orfp=orfp<<1) 00100 if(orfp<0) ++orbits; 00101 #endif 00102 } 00103 return((double)andbits/(double)orbits); 00104 }; 00105 00106 static unsigned int Getbitsperint(){ return bitsperint; } 00107 00108 private: 00110 struct bit_or 00111 { 00112 unsigned int operator()(const unsigned int a, const unsigned int b) 00113 { 00114 return a | b; 00115 } 00116 }; 00117 00118 00119 public: 00122 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);} 00123 00124 private: 00125 static const unsigned int bitsperint;// = 8 * sizeof(unsigned int); 00126 }; 00127 00128 //Fast search routines 00131 struct OBFPRT FptIndexHeader 00132 { 00133 unsigned int headerlength; 00134 unsigned int nEntries; 00135 unsigned int words; 00136 char fpid[16]; 00137 char datafilename[256]; 00138 }; 00139 00142 struct OBFPRT FptIndex 00143 { 00144 FptIndexHeader header; 00145 std::vector<unsigned int> fptdata; 00146 std::vector<unsigned int> seekdata; 00147 bool Read(std::istream* pIndexstream); 00149 OBFingerprint* CheckFP(); 00150 }; 00151 00154 class OBFPRT FastSearch 00155 { 00156 //see end of cpp file for detailed documentation 00157 public: 00159 std::string ReadIndexFile(std::string IndexFilename); 00160 std::string ReadIndex(std::istream* pIndexstream); 00161 00162 virtual ~FastSearch(){}; 00163 00165 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates); 00166 00169 bool FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions, 00170 unsigned int MaxCandidates); 00171 00174 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, 00175 double MinTani); 00176 00179 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, 00180 int nCandidates=0); 00181 00183 OBFingerprint* GetFingerprint() const{ return _pFP;}; 00184 00186 const FptIndexHeader& GetIndexHeader() const{ return _index.header;}; 00187 00188 private: 00189 FptIndex _index; 00190 OBFingerprint* _pFP; 00191 }; 00192 00195 class OBFPRT FastSearchIndexer 00196 { 00197 //see end of cpp file for detailed documentation 00198 public: 00200 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid, 00201 int FptBits=0); 00202 00204 FastSearchIndexer(FptIndex* pindex, std::ostream* os); 00205 00206 ~FastSearchIndexer(); 00207 00209 bool Add(OBBase* pOb, std::streampos seekpos); 00210 00211 private: 00212 std::ostream* _indexstream; 00213 FptIndex* _pindex; 00214 OBFingerprint* _pFP; 00215 int _nbits; 00216 }; 00217 00218 } //namespace OpenBabel 00219 #endif 00220
This file is part of the documentation for Open Babel, version 2.2.0.