fingerprint.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 fingerprint.h - Base class for fingerprints and fast searching 
00003  
00004 Copyright (C) 2005 by Chris Morley
00005  
00006 This file is part of the Open Babel project.
00007 For more information, see <http://openbabel.sourceforge.net/>
00008  
00009 This program is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation version 2 of the License.
00012  
00013 This program is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017 ***********************************************************************/
00018 
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021 
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027 
00028 #include <openbabel/pluginiter.h>
00029 
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033 
00034 namespace OpenBabel
00035 {
00036   class OBBase; //Forward declaration; used only as pointer.
00037 
00039 class OBFPRT OBFingerprint
00040 {
00041 //see end of cpp file for detailed documentation
00042 
00043 MAKE_PLUGIN(OBFingerprint);
00044 
00045 public:
00046 
00047   virtual ~OBFingerprint(){}
00048 
00050   void SetBit(std::vector<unsigned int>& vec, unsigned int n);  
00051 
00053   void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 
00054 
00056   virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00057 
00059   virtual std::string Description()=0;
00060 
00062   enum FptFlag{FPT_UNIQUEBITS=1};
00063   virtual unsigned int Flags() { return 0;}; 
00064 
00065   // Obtain info on available fingerprints
00066   // Replaced by FOR_EACH(OBFingerprint)
00067 //  static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt);
00068 
00070   static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00071   
00073   static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 
00074   {
00076     int andbits=0, orbits=0;
00077     unsigned int i;
00078     for (i=0;i<vec1.size();++i)
00079     {
00080       int andfp = vec1[i] & p2[i];
00081       int orfp = vec1[i] | p2[i];
00082       //Count bits
00083       for(;andfp;andfp=andfp<<1)
00084         if(andfp<0) ++andbits;
00085       for(;orfp;orfp=orfp<<1)
00086         if(orfp<0) ++orbits;
00087     }
00088       return((double)andbits/(double)orbits);
00089   };
00090   
00091   static unsigned int Getbitsperint(){ return bitsperint; }
00092 
00093 private:
00095   struct bit_or
00096   {
00097     unsigned int operator()(const unsigned int a, const unsigned int b)
00098     {
00099       return a | b;     
00100     }
00101   };
00102   
00103 
00104 public:
00107 static OBFingerprint* FindFingerprint(const std::string& ID){ return Iter().FindType(ID);}
00108 
00109 private:
00110   static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
00111 };
00112 
00113 //*************************************************************
00114 //Fast search routines
00117 struct OBFPRT FptIndexHeader
00118 {
00119   unsigned int headerlength;
00120   unsigned int nEntries;    
00121   unsigned int words;                           
00122   char fpid[16];            
00123   char datafilename[256];   
00124 };
00125 
00128 struct OBFPRT FptIndex
00129 {
00130   FptIndexHeader header;
00131   std::vector<unsigned int> fptdata;
00132   std::vector<unsigned int> seekdata;
00133   bool Read(std::istream* pIndexstream);
00135   OBFingerprint* CheckFP();
00136 };
00137 
00140 class OBFPRT FastSearch
00141 {
00142 //see end of cpp file for detailed documentation
00143 public:
00145   std::string ReadIndexFile(std::string IndexFilename);
00146   std::string ReadIndex(std::istream* pIndexstream);
00147 
00148   virtual ~FastSearch(){};
00149 
00151   bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00152 
00155   bool    FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00156                             unsigned int MaxCandidates);
00157 
00160   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00161     double MinTani);
00162 
00165   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00166     int nCandidates=0);
00167 
00169   OBFingerprint* GetFingerprint() const{ return _pFP;};
00170 
00172   const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00173 
00174 private:
00175   FptIndex   _index;
00176   OBFingerprint* _pFP;
00177 };
00178 
00179 //**********************************************
00182 class OBFPRT FastSearchIndexer
00183 {
00184 //see end of cpp file for detailed documentation
00185 public:
00187   FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00188       int FptBits=0);
00189 
00191   FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00192   
00193   ~FastSearchIndexer();
00194 
00196   bool Add(OBBase* pOb, std::streampos seekpos);
00197 
00198 private:
00199   std::ostream* _indexstream;
00200   FptIndex*             _pindex;
00201   OBFingerprint* _pFP;
00202   int _nbits;
00203 };
00204 
00205 } //namespace OpenBabel
00206 #endif
00207