• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files

fingerprint.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 fingerprint.h - Base class for fingerprints and fast searching 
00003  
00004 Copyright (C) 2005 by Chris Morley
00005  
00006 This file is part of the Open Babel project.
00007 For more information, see <http://openbabel.sourceforge.net/>
00008  
00009 This program is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation version 2 of the License.
00012  
00013 This program is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017 ***********************************************************************/
00018 
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021 
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027 
00028 #include <openbabel/plugin.h>
00029 
00030 #ifndef OBFPRT
00031 #define OBFPRT
00032 #endif
00033 
00034 namespace OpenBabel
00035 {
00036   class OBBase; //Forward declaration; used only as pointer.
00037 
00039 class OBFPRT OBFingerprint : public OBPlugin
00040 {
00041 //see end of cpp file for detailed documentation
00042 
00043 MAKE_PLUGIN(OBFingerprint)
00044 
00045 const char* TypeID()
00046         {
00047                 return "fingerprints";
00048         }
00049         
00050         //Rest of OBFingerprints declarations
00051 public:
00052 
00053   virtual ~OBFingerprint(){}
00054 
00056   void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
00057 
00059   bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
00060 
00062   void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 
00063 
00065   virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00066 
00068   enum FptFlag{FPT_UNIQUEBITS=1};
00069   virtual unsigned int Flags() { return 0;}; 
00070 
00073   virtual std::string DescribeBits(const std::  vector<unsigned int> fp, bool bSet=true)
00074   {
00075     std::string txt("");
00076     return txt;
00077   }
00078 
00080   static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00081   
00083   static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 
00084   {
00086     int andbits=0, orbits=0;
00087     unsigned int i;
00088     for (i=0;i<vec1.size();++i)
00089     {
00090       int andfp = vec1[i] & p2[i];
00091       int orfp = vec1[i] | p2[i];
00092       // Count bits
00093 #ifdef __GNUC__
00094       andbits += __builtin_popcount(andfp);
00095       orbits += __builtin_popcount(orfp);
00096 #else
00097       for(;andfp;andfp=andfp<<1)
00098         if(andfp<0) ++andbits;
00099       for(;orfp;orfp=orfp<<1)
00100         if(orfp<0) ++orbits;
00101 #endif
00102     }
00103       return((double)andbits/(double)orbits);
00104   };
00105   
00106   static unsigned int Getbitsperint(){ return bitsperint; }
00107 
00108 private:
00110   struct bit_or
00111   {
00112     unsigned int operator()(const unsigned int a, const unsigned int b)
00113     {
00114       return a | b;     
00115     }
00116   };
00117   
00118 
00119 public:
00122 static OBFingerprint* FindFingerprint(const char* ID){ return FindType(ID);}
00123 
00124 private:
00125   static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
00126 };
00127 
00128 //Fast search routines
00131 struct OBFPRT FptIndexHeader
00132 {
00133   unsigned int headerlength;
00134   unsigned int nEntries;    
00135   unsigned int words;                           
00136   char fpid[16];            
00137   char datafilename[256];   
00138 };
00139 
00142 struct OBFPRT FptIndex
00143 {
00144   FptIndexHeader header;
00145   std::vector<unsigned int> fptdata;
00146   std::vector<unsigned int> seekdata;
00147   bool Read(std::istream* pIndexstream);
00149   OBFingerprint* CheckFP();
00150 };
00151 
00154 class OBFPRT FastSearch
00155 {
00156 //see end of cpp file for detailed documentation
00157 public:
00159   std::string ReadIndexFile(std::string IndexFilename);
00160   std::string ReadIndex(std::istream* pIndexstream);
00161 
00162   virtual ~FastSearch(){};
00163 
00165   bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00166 
00169   bool    FindMatch(OBBase* pOb, std::vector<unsigned int>& SeekPositions,
00170                             unsigned int MaxCandidates);
00171 
00174   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00175     double MinTani);
00176 
00179   bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00180     int nCandidates=0);
00181 
00183   OBFingerprint* GetFingerprint() const{ return _pFP;};
00184 
00186   const FptIndexHeader& GetIndexHeader() const{ return _index.header;};
00187 
00188 private:
00189   FptIndex   _index;
00190   OBFingerprint* _pFP;
00191 };
00192 
00195 class OBFPRT FastSearchIndexer
00196 {
00197 //see end of cpp file for detailed documentation
00198 public:
00200   FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00201       int FptBits=0);
00202 
00204   FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00205   
00206   ~FastSearchIndexer();
00207 
00209   bool Add(OBBase* pOb, std::streampos seekpos);
00210 
00211 private:
00212   std::ostream* _indexstream;
00213   FptIndex*             _pindex;
00214   OBFingerprint* _pFP;
00215   int _nbits;
00216 };
00217 
00218 } //namespace OpenBabel
00219 #endif
00220 

This file is part of the documentation for Open Babel, version 2.2.0.

Documentation copyright © 1998-2007, the Open Babel Developers.
Open Babel is hosted by: SourceForge Logo
Generated on Thu Jul 3 14:30:33 2008 by doxygen 1.5.6.