fingerprint.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 fingerprint.h - Base class for fingerprints and fast searching 
00003  
00004 Copyright (C) 2005 by Chris Morley
00005  
00006 This file is part of the Open Babel project.
00007 For more information, see <http://openbabel.sourceforge.net/>
00008  
00009 This program is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation version 2 of the License.
00012  
00013 This program is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017 ***********************************************************************/
00018 
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021 
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027 
00028 #include <openbabel/plugin.h>
00029 
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033 
00034 namespace OpenBabel
00035 {
00036   class OBBase; //Forward declaration; used only as pointer.
00037 
00039 class OBFPRT OBFingerprint : public OBPlugin
00040 {
00041 //see end of cpp file for detailed documentation
00042 
00043 MAKE_PLUGIN(OBFingerprint)
00044 
00045 const char* TypeID()
00046         {
00047                 return "fingerprints";
00048         }
00049         
00050         //Rest of OBFingerprints declarations
00051 public:
00052 
00053   virtual ~OBFingerprint(){}
00054 
00056   void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
00057 
00059   bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
00060 
00062   void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 
00063 
00065   virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00066 
00068   enum FptFlag{FPT_UNIQUEBITS=1};
00069   virtual unsigned int Flags() { return 0;}; 
00070 
00073   virtual std::string DescribeBits(const std::  vector<unsigned int> fp, bool bSet=true)
00074   {
00075     std::string txt("");
00076     return txt;
00077   }
00078 
00080   static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00081   
00083   static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 
00084   {
00086     int andbits=0, orbits=0;
00087     unsigned int i;
00088     for (i=0;i<vec1.size();++i)
00089     {
00090       int andfp = vec1[i] & p2[i];
00091       int orfp = vec1[i] | p2[i];
00092       // Count bits
00093       /* GCC 3.4 supports a "population count" builtin, which on many targets is
00094          implemented with a single instruction.  There is a fallback definition
00095          in libgcc in case a target does not have one, which should be just as
00096          good as the static function below.  */
00097 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
00098       andbits += __builtin_popcount(andfp);
00099       orbits += __builtin_popcount(orfp);
00100 #else
00101       for(;andfp;andfp=andfp<<1)
00102         if(andfp<0) ++andbits;
00103       for(;orfp;orfp=orfp<<1)
00104         if(orfp<0) ++orbits;
00105 #endif
00106     }
00107       return((double)andbits/(double)orbits);
00108   };
00109   
00110   static unsigned int Getbitsperint(){ return bitsperint; }
00111 
00112 private:
00114   struct bit_or
00115   {
00116     unsigned int operator()(const unsigned int a, const unsigned int b)
00117     {
00118       return a | b;     
00119     }
00120   };
00121   
00122 
00123 public:
00126 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
00127 
00128 private:
00129   static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
00130 };
00131 
00132 //Fast search routines
00135 struct OBFPRT FptIndexHeader
00136 {
00137   unsigned int headerlength;
00138   unsigned int nEntries;    
00139   unsigned int words;                           
00140   char fpid[16];            
00141   char datafilename[256];   
00142 };
00143 
00146 struct OBFPRT FptIndex
00147 {
00148   FptIndexHeader header;
00149   std::vector<unsigned int> fptdata;
00150   std::vector<unsigned int> seekdata;
00151   bool Read(std::istream* pIndexstream);
00152   bool ReadIndex(std::istream* pIndexstream);
00153   bool ReadHeader(std::istream* pIndexstream);
00154     
00156   OBFingerprint* CheckFP();
00157 };
00158 
00161 class OBFPRT FastSearch
00162 {
00163 //see end of cpp file for detailed documentation
00164 public:
00166   std::string ReadIndexFile(std::string IndexFilename);
00167   std::string ReadIndex(std::istream* pIndexstream);
00168 
00169   virtual ~FastSearch(){};
00170 
00172   bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00173 
00176   bool    FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00177                             unsigned int MaxCandidates);
00178 
00181   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00182     double MinTani, double MaxTani = 1.1 );
00183 
00186   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00187     int nCandidates=0);
00188 
00190   OBFingerprint* GetFingerprint() const{ return _pFP;};
00191 
00193   const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00194 
00195 private:
00196   FptIndex   _index;
00197   OBFingerprint* _pFP;
00198 };
00199 
00202 class OBFPRT FastSearchIndexer
00203 {
00204 //see end of cpp file for detailed documentation
00205 public:
00207   FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00208       int FptBits=0, int nmols=0);
00209 
00211   FastSearchIndexer(FptIndex* pindex, std::ostream* os, int nmols=0);
00212   
00213   ~FastSearchIndexer();
00214 
00216   bool Add(OBBase* pOb, std::streampos seekpos);
00217 
00218 private:
00219   std::ostream* _indexstream;
00220   FptIndex*             _pindex;
00221   OBFingerprint* _pFP;
00222   int _nbits;
00223 };
00224 
00225 } //namespace OpenBabel
00226 #endif
00227