fingerprint.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 fingerprint.h - Base class for fingerprints and fast searching 
00003  
00004 Copyright (C) 2005 by Chris Morley
00005  
00006 This file is part of the Open Babel project.
00007 For more information, see <http://openbabel.sourceforge.net/>
00008  
00009 This program is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation version 2 of the License.
00012  
00013 This program is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017 ***********************************************************************/
00018 
00019 #ifndef OB_FINGERPRINT_H
00020 #define OB_FINGERPRINT_H
00021 
00022 #include <list>
00023 #include <map>
00024 #include <set>
00025 #include <vector>
00026 #include <string>
00027 
00028 namespace OpenBabel
00029 {
00030         class OBBase; //Forward declaration; used only as pointer.
00031 
00033 class OBAPI OBFingerprint
00034 {
00035 //see end of cpp file for detailed documentation
00036 public:
00038         void SetBit(std::vector<unsigned int>& vec, unsigned int n);    
00039 
00041         void Fold(std::vector<unsigned int>& vec, unsigned int nbits); 
00042 
00044         virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
00045 
00047         virtual std::string Description()=0;
00048 
00050         enum FptFlag{FPT_UNIQUEBITS=1};
00051         virtual unsigned int Flags() { return 0;}; 
00052 
00054         static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt);
00055 
00057         static OBFingerprint* FindFingerprint(std::string& ID);
00058 
00060         static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
00061         
00063         static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) 
00064         {
00066                 int andbits=0, orbits=0;
00067                 unsigned int i;
00068                 for (i=0;i<vec1.size();++i)
00069                 {
00070                         int andfp = vec1[i] & p2[i];
00071                         int orfp = vec1[i] | p2[i];
00072                         //Count bits
00073                         for(;andfp;andfp=andfp<<1)
00074                                 if(andfp<0) ++andbits;
00075                         for(;orfp;orfp=orfp<<1)
00076                                 if(orfp<0) ++orbits;
00077                 }
00078                         return((double)andbits/(double)orbits);
00079         };
00080         
00081         static unsigned int Getbitsperint(){ return bitsperint; }
00082 
00083 private:
00085         struct bit_or
00086         {
00087                 unsigned int operator()(const unsigned int a, const unsigned int b)
00088                 {
00089                         return a | b;   
00090                 }
00091         };
00092         
00093         typedef std::map<std::string, OBFingerprint*> FPMapType;
00094         typedef FPMapType::iterator Fptpos;
00095 
00096 protected:
00101         static FPMapType& FPtsMap()
00102         {
00103                 static FPMapType* fptm = NULL;
00104                 if (!fptm)
00105                   fptm = new FPMapType;
00106                 return *fptm;
00107         };
00108 
00109         OBFingerprint(std::string ID, bool IsDefault=false)
00110         {
00111                 FPtsMap()[ID] = this; //registers the derived fingerprint class
00112                 if(IsDefault || FPtsMap().empty())
00113                         _pDefault=this;
00114         };
00115         
00116 private:
00117         static OBFingerprint* _pDefault;
00118         static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
00119         static int rubbish;
00120 };
00121 
00122 
00123 
00124 
00125 //*************************************************************
00126 //Fast search routines
00128 struct OBAPI FptIndexHeader
00129 {
00130         unsigned int headerlength;
00131         unsigned int nEntries;    
00132         unsigned int words;                             
00133         char fpid[16];            
00134         char datafilename[256];   
00135 };
00137 struct OBAPI FptIndex
00138 {
00139         FptIndexHeader header;
00140         std::vector<unsigned int> fptdata;
00141         std::vector<unsigned int> seekdata;
00142         bool Read(std::istream* pIndexstream);
00144         OBFingerprint* CheckFP();
00145 };
00146 
00148 class OBAPI FastSearch
00149 {
00150 //see end of cpp file for detailed documentation
00151 public:
00152   std::string ReadIndex(std::istream* pIndexstream);
00153         virtual ~FastSearch(){};
00154 
00156         bool    Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
00157 
00160         bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00161                 double MinTani);
00162 
00165         bool    FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
00166                 int nCandidates=0);
00167 
00169         OBFingerprint* GetFingerprint() const{ return _pFP;};
00170 
00171 private:
00172         FptIndex   _index;
00173         OBFingerprint* _pFP;
00174 };
00175 
00176 //**********************************************
00178 class OBAPI FastSearchIndexer
00179 {
00180 //see end of cpp file for detailed documentation
00181 public:
00183         FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
00184                         int FptBits=0);
00185 
00187         FastSearchIndexer(FptIndex* pindex, std::ostream* os);
00188         
00189         ~FastSearchIndexer();
00190 
00192         bool Add(OBBase* pOb, std::streampos seekpos);
00193 
00194 private:
00195         std::ostream* _indexstream;
00196         FptIndex*               _pindex;
00197         OBFingerprint* _pFP;
00198         int _nbits;
00199 };
00200 
00201 } //namespace OpenBabel
00202 #endif
00203