00001 /********************************************************************** 00002 fingerprint.h - Base class for fingerprints and fast searching 00003 00004 Copyright (C) 2005 by Chris Morley 00005 00006 This file is part of the Open Babel project. 00007 For more information, see <http://openbabel.org/> 00008 00009 This program is free software; you can redistribute it and/or modify 00010 it under the terms of the GNU General Public License as published by 00011 the Free Software Foundation version 2 of the License. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 ***********************************************************************/ 00018 00019 #ifndef OB_FINGERPRINT_H 00020 #define OB_FINGERPRINT_H 00021 00022 #include <list> 00023 #include <map> 00024 #include <set> 00025 #include <vector> 00026 #include <string> 00027 00028 #include <openbabel/plugin.h> 00029 00030 #ifndef OBFPRT 00031 #define OBFPRT 00032 #endif 00033 00034 namespace OpenBabel 00035 { 00036 class OBBase; //Forward declaration; used only as pointer. 00037 00039 class OBFPRT OBFingerprint : public OBPlugin 00040 { 00041 //see end of cpp file for detailed documentation 00042 00043 MAKE_PLUGIN(OBFingerprint) 00044 00045 const char* TypeID() 00046 { 00047 return "fingerprints"; 00048 } 00049 00050 //Rest of OBFingerprints declarations 00051 public: 00052 00053 virtual ~OBFingerprint(){} 00054 00056 void SetBit(std::vector<unsigned int>& vec, const unsigned int n); 00057 00059 bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n); 00060 00062 void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 00063 00065 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0; 00066 00068 enum FptFlag{FPT_UNIQUEBITS=1, FPT_NOINFO=2}; 00069 virtual unsigned int Flags() { return 0;}; 00071 virtual void SetFlags(unsigned int){} 00072 00075 virtual std::string DescribeBits(const std::vector<unsigned int> /* fp */, 00076 bool /* bSet */ =true) 00077 { 00078 std::string txt(""); 00079 return txt; 00080 } 00081 00083 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2); 00084 00086 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 00087 { 00089 int andbits=0, orbits=0; 00090 unsigned int i; 00091 for (i=0;i<vec1.size();++i) 00092 { 00093 int andfp = vec1[i] & p2[i]; 00094 int orfp = vec1[i] | p2[i]; 00095 // Count bits 00096 /* GCC 3.4 supports a "population count" builtin, which on many targets is 00097 implemented with a single instruction. There is a fallback definition 00098 in libgcc in case a target does not have one, which should be just as 00099 good as the static function below. */ 00100 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) 00101 andbits += __builtin_popcount(andfp); 00102 orbits += __builtin_popcount(orfp); 00103 #else 00104 for(;andfp;andfp=andfp<<1) 00105 if(andfp<0) ++andbits; 00106 for(;orfp;orfp=orfp<<1) 00107 if(orfp<0) ++orbits; 00108 #endif 00109 } 00110 return((double)andbits/(double)orbits); 00111 }; 00112 00113 static unsigned int Getbitsperint(){ return bitsperint; } 00114 00115 private: 00117 struct bit_or 00118 { 00119 unsigned int operator()(const unsigned int a, const unsigned int b) 00120 { 00121 return a | b; 00122 } 00123 }; 00124 00125 00126 public: 00129 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);} 00130 00131 private: 00132 static const unsigned int bitsperint;// = 8 * sizeof(unsigned int); 00133 }; 00134 00135 //Fast search routines 00138 struct OBFPRT FptIndexHeader 00139 { 00140 unsigned int headerlength; 00141 unsigned int nEntries; 00142 unsigned int words; 00143 char fpid[16]; 00144 char datafilename[256]; 00145 }; 00146 00149 struct OBFPRT FptIndex 00150 { 00151 FptIndexHeader header; 00152 std::vector<unsigned int> fptdata; 00153 std::vector<unsigned int> seekdata; 00154 bool Read(std::istream* pIndexstream); 00155 bool ReadIndex(std::istream* pIndexstream); 00156 bool ReadHeader(std::istream* pIndexstream); 00157 00159 OBFingerprint* CheckFP(); 00160 }; 00161 00164 class OBFPRT FastSearch 00165 { 00166 //see end of cpp file for detailed documentation 00167 public: 00169 std::string ReadIndexFile(std::string IndexFilename); 00170 std::string ReadIndex(std::istream* pIndexstream); 00171 00172 virtual ~FastSearch(){}; 00173 00175 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates); 00176 00179 bool FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions, 00180 unsigned int MaxCandidates); 00181 00184 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, 00185 double MinTani, double MaxTani = 1.1 ); 00186 00189 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, 00190 int nCandidates=0); 00191 00193 OBFingerprint* GetFingerprint() const{ return _pFP;}; 00194 00196 const FptIndexHeader& GetIndexHeader() const{ return _index.header;}; 00197 00198 private: 00199 FptIndex _index; 00200 OBFingerprint* _pFP; 00201 }; 00202 00205 class OBFPRT FastSearchIndexer 00206 { 00207 //see end of cpp file for detailed documentation 00208 public: 00210 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid, 00211 int FptBits=0, int nmols=0); 00212 00214 FastSearchIndexer(FptIndex* pindex, std::ostream* os, int nmols=0); 00215 00216 ~FastSearchIndexer(); 00217 00219 bool Add(OBBase* pOb, std::streampos seekpos); 00220 00221 private: 00222 std::ostream* _indexstream; 00223 FptIndex* _pindex; 00224 OBFingerprint* _pFP; 00225 int _nbits; 00226 }; 00227 00228 } //namespace OpenBabel 00229 #endif 00230
This file is part of the documentation for Open Babel, version 2.3.