Open Babel  3.0
fingerprint.h
Go to the documentation of this file.
1 /**********************************************************************
2 fingerprint.h - Base class for fingerprints and fast searching
3 
4 Copyright (C) 2005 by Chris Morley
5 
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 
19 #ifndef OB_FINGERPRINT_H
20 #define OB_FINGERPRINT_H
21 
22 #include <list>
23 #include <map>
24 #include <set>
25 #include <vector>
26 #include <string>
27 
28 #include <openbabel/plugin.h>
29 
30 #ifndef OBFPRT
31 #define OBFPRT
32 #endif
33 
34 namespace OpenBabel
35 {
36  class OBBase; //Forward declaration; used only as pointer.
37 
39 class OBFPRT OBFingerprint : public OBPlugin
40 {
41 //see end of cpp file for detailed documentation
42 
44 
45 const char* TypeID()
46  {
47  return "fingerprints";
48  }
49 
50  //Rest of OBFingerprints declarations
51 public:
52 
53  virtual ~OBFingerprint(){}
54 
56  void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
57 
59  bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
60 
62  void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
63 
65  virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
66 
68  enum FptFlag{FPT_UNIQUEBITS=1, FPT_NOINFO=2};
69  virtual unsigned int Flags() { return 0;};
71  virtual void SetFlags(unsigned int){}
72 
75  virtual std::string DescribeBits(const std::vector<unsigned int> /* fp */,
76  bool /* bSet */ =true)
77  {
78  std::string txt("");
79  return txt;
80  }
81 
83  static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
84 
86  static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
87  {
89  int andbits=0, orbits=0;
90  unsigned int i;
91  for (i=0;i<vec1.size();++i)
92  {
93  int andfp = vec1[i] & p2[i];
94  int orfp = vec1[i] | p2[i];
95  // Count bits
96  /* GCC 3.4 supports a "population count" builtin, which on many targets is
97  implemented with a single instruction. There is a fallback definition
98  in libgcc in case a target does not have one, which should be just as
99  good as the static function below. */
100 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
101  andbits += __builtin_popcount(andfp);
102  orbits += __builtin_popcount(orfp);
103 #else
104  for(;andfp;andfp=andfp<<1)
105  if(andfp<0) ++andbits;
106  for(;orfp;orfp=orfp<<1)
107  if(orfp<0) ++orbits;
108 #endif
109  }
110  return((double)andbits/(double)orbits);
111  };
112 
113  static unsigned int Getbitsperint(){ return bitsperint; }
114 
115 private:
117  struct bit_or
118  {
119  unsigned int operator()(const unsigned int a, const unsigned int b)
120  {
121  return a | b;
122  }
123  };
124 
125 
126 public:
129 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
130 
131 private:
132  static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
133 };
134 
135 //Fast search routines
138 struct OBFPRT FptIndexHeader
139 {
140  unsigned int headerlength;
141  unsigned int nEntries;
142  unsigned int words;
143  char fpid[15];
144  char seek64; //if true, seek data consists of 64bit long values (only zero in legacy indices)
145  char datafilename[256];
146 };
147 
150 struct OBFPRT FptIndex
151 {
153  std::vector<unsigned int> fptdata;
154  std::vector<unsigned long> seekdata;
155  bool Read(std::istream* pIndexstream);
156  bool ReadIndex(std::istream* pIndexstream);
157  bool ReadHeader(std::istream* pIndexstream);
158 
160  OBFingerprint* CheckFP();
161 };
162 
165 class OBFPRT FastSearch
166 {
167 //see end of cpp file for detailed documentation
168 public:
170  std::string ReadIndexFile(std::string IndexFilename);
171  std::string ReadIndex(std::istream* pIndexstream);
172 
173  virtual ~FastSearch(){};
174 
176  bool Find(OBBase* pOb, std::vector<unsigned long>& SeekPositions, unsigned int MaxCandidates);
177 
180  bool FindMatch(OBBase* pOb, std::vector<unsigned long>& SeekPositions,
181  unsigned int MaxCandidates);
182 
185  bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned long>& SeekposMap,
186  double MinTani, double MaxTani = 1.1 );
187 
190  bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned long>& SeekposMap,
191  int nCandidates=0);
192 
194  OBFingerprint* GetFingerprint() const{ return _pFP;};
195 
197  const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
198 
199 private:
200  FptIndex _index;
201  OBFingerprint* _pFP;
202 };
203 
206 class OBFPRT FastSearchIndexer
207 {
208 //see end of cpp file for detailed documentation
209 public:
211  FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
212  int FptBits=0, int nmols=0);
213 
215  FastSearchIndexer(FptIndex* pindex, std::ostream* os, int nmols=0);
216 
218 
220  bool Add(OBBase* pOb, std::streampos seekpos);
221 
222 private:
223  std::ostream* _indexstream;
224  FptIndex* _pindex;
225  OBFingerprint* _pFP;
226  int _nbits;
227 };
228 
229 } //namespace OpenBabel
230 #endif
231 
Header for fastsearch index file.
Definition: fingerprint.h:138
virtual ~OBFingerprint()
Definition: fingerprint.h:53
virtual std::string DescribeBits(const std::vector< unsigned int >, bool=true)
Definition: fingerprint.h:75
unsigned int words
number 32bit words per fingerprint
Definition: fingerprint.h:142
Base class for all types of dynamic classes discovered at runtime.
Definition: plugin.h:52
OBFingerprint * GetFingerprint() const
Definition: fingerprint.h:194
std::vector< unsigned long > seekdata
Definition: fingerprint.h:154
Class to search fingerprint index files.
Definition: fingerprint.h:165
static double Tanimoto(const std::vector< unsigned int > &vec1, const unsigned int *p2)
Inline version of Tanimoto() taking a pointer for the second vector.
Definition: fingerprint.h:86
virtual unsigned int Flags()
Definition: fingerprint.h:69
Class to prepare fingerprint index files See FastSearch class for details.
Definition: fingerprint.h:206
FptIndexHeader header
Definition: fingerprint.h:152
static unsigned int Getbitsperint()
Definition: fingerprint.h:113
#define MAKE_PLUGIN(BaseClass)
Definition: plugin.h:195
Simplify &#39;plugin&#39; classes to be discovered and/or loaded at runtime.
char seek64
Definition: fingerprint.h:144
unsigned int nEntries
number of fingerprints
Definition: fingerprint.h:141
double Tanimoto(const OBBitVec &bv1, const OBBitVec &bv2)
The Tanimoto coefficient, which may be regarded as the proportion of the "on-bits" which are shared...
Definition: bitvec.cpp:660
virtual ~FastSearch()
Definition: fingerprint.h:173
unsigned int headerlength
offset to data: sizeof(FptIndexHeader)
Definition: fingerprint.h:140
const FptIndexHeader & GetIndexHeader() const
Definition: fingerprint.h:197
virtual void SetFlags(unsigned int)
Definition: fingerprint.h:71
Structure of fastsearch index files.
Definition: fingerprint.h:150
static OBFingerprint * FindFingerprint(const char *ID)
For backward compatibility; a synonym of OBFingerprint::FindType.
Definition: fingerprint.h:129
The base class for fingerprints.
Definition: fingerprint.h:39
Base Class.
Definition: base.h:239
std::vector< unsigned int > fptdata
Definition: fingerprint.h:153
FptFlag
Optional flags.
Definition: fingerprint.h:68
Global namespace for all Open Babel code.
Definition: alias.h:22