fingerprint.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027
00028 #include <openbabel/plugin.h>
00029
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033
00034 namespace OpenBabel
00035 {
00036 class OBBase;
00037
00039 class OBFPRT OBFingerprint : public OBPlugin
00040 {
00041
00042
00043 MAKE_PLUGIN(OBFingerprint)
00044
00045 const char* TypeID()
00046 {
00047 return "fingerprints";
00048 }
00049
00050
00051 public:
00052
00053 virtual ~OBFingerprint(){}
00054
00056 void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
00057
00059 bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
00060
00062 void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
00063
00065 virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00066
00068 enum FptFlag{FPT_UNIQUEBITS=1};
00069 virtual unsigned int Flags() { return 0;};
00070
00073 virtual std::string DescribeBits(const std:: vector<unsigned int> fp, bool bSet=true)
00074 {
00075 std::string txt("");
00076 return txt;
00077 }
00078
00080 static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00081
00083 static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
00084 {
00086 int andbits=0, orbits=0;
00087 unsigned int i;
00088 for (i=0;i<vec1.size();++i)
00089 {
00090 int andfp = vec1[i] & p2[i];
00091 int orfp = vec1[i] | p2[i];
00092
00093
00094
00095
00096
00097 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
00098 andbits += __builtin_popcount(andfp);
00099 orbits += __builtin_popcount(orfp);
00100 #else
00101 for(;andfp;andfp=andfp<<1)
00102 if(andfp<0) ++andbits;
00103 for(;orfp;orfp=orfp<<1)
00104 if(orfp<0) ++orbits;
00105 #endif
00106 }
00107 return((double)andbits/(double)orbits);
00108 };
00109
00110 static unsigned int Getbitsperint(){ return bitsperint; }
00111
00112 private:
00114 struct bit_or
00115 {
00116 unsigned int operator()(const unsigned int a, const unsigned int b)
00117 {
00118 return a | b;
00119 }
00120 };
00121
00122
00123 public:
00126 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
00127
00128 private:
00129 static const unsigned int bitsperint;
00130 };
00131
00132
00135 struct OBFPRT FptIndexHeader
00136 {
00137 unsigned int headerlength;
00138 unsigned int nEntries;
00139 unsigned int words;
00140 char fpid[16];
00141 char datafilename[256];
00142 };
00143
00146 struct OBFPRT FptIndex
00147 {
00148 FptIndexHeader header;
00149 std::vector<unsigned int> fptdata;
00150 std::vector<unsigned int> seekdata;
00151 bool Read(std::istream* pIndexstream);
00152 bool ReadIndex(std::istream* pIndexstream);
00153 bool ReadHeader(std::istream* pIndexstream);
00154
00156 OBFingerprint* CheckFP();
00157 };
00158
00161 class OBFPRT FastSearch
00162 {
00163
00164 public:
00166 std::string ReadIndexFile(std::string IndexFilename);
00167 std::string ReadIndex(std::istream* pIndexstream);
00168
00169 virtual ~FastSearch(){};
00170
00172 bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00173
00176 bool FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00177 unsigned int MaxCandidates);
00178
00181 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00182 double MinTani, double MaxTani = 1.1 );
00183
00186 bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00187 int nCandidates=0);
00188
00190 OBFingerprint* GetFingerprint() const{ return _pFP;};
00191
00193 const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00194
00195 private:
00196 FptIndex _index;
00197 OBFingerprint* _pFP;
00198 };
00199
00202 class OBFPRT FastSearchIndexer
00203 {
00204
00205 public:
00207 FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00208 int FptBits=0, int nmols=0);
00209
00211 FastSearchIndexer(FptIndex* pindex, std::ostream* os, int nmols=0);
00212
00213 ~FastSearchIndexer();
00214
00216 bool Add(OBBase* pOb, std::streampos seekpos);
00217
00218 private:
00219 std::ostream* _indexstream;
00220 FptIndex* _pindex;
00221 OBFingerprint* _pFP;
00222 int _nbits;
00223 };
00224
00225 }
00226 #endif
00227