parsmart.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 parsmart.h - Daylight SMARTS parser.
00003  
00004 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
00005 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
00006  
00007 This file is part of the Open Babel project.
00008 For more information, see <http://openbabel.sourceforge.net/>
00009  
00010 This program is free software; you can redistribute it and/or modify
00011 it under the terms of the GNU General Public License as published by
00012 the Free Software Foundation version 2 of the License.
00013  
00014 This program is distributed in the hope that it will be useful,
00015 but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 GNU General Public License for more details.
00018 ***********************************************************************/
00019 
00020 #ifndef OB_PARSMART_H
00021 #define OB_PARSMART_H
00022 
00023 #include <string>
00024 #include <vector>
00025 
00026 #include <openbabel/babelconfig.h>
00027 #include <openbabel/mol.h>
00028 
00029 /*==========================*/
00030 /*  SMARTS Data Structures  */
00031 /*==========================*/
00032 
00033 namespace OpenBabel
00034 {
00035 
00036   // mark this so that SWIG will not attempt to wrap for scripting languages
00037 
00038 #ifndef SWIG
00039 
00042   typedef union _AtomExpr {
00043     int type;
00044     struct
00045     {
00046       int type;
00047       int prop;
00048       int value;
00049     }
00050       leaf;
00051     struct
00052     {
00053       int type;
00054       void *recur;
00055     }
00056       recur;
00057     struct
00058     {
00059       int type;
00060       union _AtomExpr *arg;
00061     }
00062       mon;
00063     struct
00064     {
00065       int type;
00066       union _AtomExpr *lft;
00067       union _AtomExpr *rgt;
00068     }
00069       bin;
00070   } AtomExpr;
00071 
00072 #define BE_LEAF      0x01
00073 #define BE_ANDHI     0x02
00074 #define BE_ANDLO     0x03
00075 #define BE_NOT       0x04
00076 #define BE_OR        0x05
00077 
00078 #define BL_CONST     0x01
00079 #define BL_TYPE      0x02
00080 
00081 #define BT_SINGLE     0x01
00082 #define BT_DOUBLE     0x02
00083 #define BT_TRIPLE     0x03
00084 #define BT_AROM       0x04
00085 #define BT_UP         0x05
00086 #define BT_DOWN       0x06
00087 #define BT_UPUNSPEC   0x07
00088 #define BT_DOWNUNSPEC 0x08
00089 #define BT_RING       0x09
00090 #define BT_QUAD       0x0A //quadruple bond $
00091 
00094   typedef union _BondExpr {
00095     int type;
00096     struct
00097     {
00098       int type;
00099       int prop;
00100       int value;
00101     }
00102       leaf;
00103     struct
00104     {
00105       int type;
00106       union _BondExpr *arg;
00107     }
00108       mon;
00109     struct
00110     {
00111       int type;
00112       union _BondExpr *lft;
00113       union _BondExpr *rgt;
00114     }
00115       bin;
00116   } BondExpr;
00117 
00120   typedef struct
00121   {
00122     BondExpr *expr;
00123     int src,dst;
00124     int visit;
00125     bool grow;
00126   }
00127   BondSpec;
00128 
00131   typedef struct
00132   {
00133     AtomExpr *expr;
00134     int visit;
00135     int part;
00136     int chiral_flag;
00137     int vb;
00138   }
00139   AtomSpec;
00140 
00143   typedef struct
00144   {
00145     int aalloc,acount;
00146     int balloc,bcount;
00147     bool ischiral;
00148     AtomSpec *atom;
00149     BondSpec *bond;
00150     int parts;
00151     bool hasExplicitH;
00152     std::vector<int> bond_parse_order; // Used to recover the order in which bonds were parsed
00153   }
00154   Pattern;
00155 #else
00156   // for SWIG, just forward declare that we have some Pattern struct
00157   // (but this is private and not wrapped for scripting languages)
00158   struct Pattern;
00159 #endif
00160 
00161   // class introduction in parsmart.cpp
00163   class OBAPI OBSmartsPattern
00164   {
00165   protected:
00166     std::vector<bool>                     _growbond; 
00167     std::vector<std::vector<int> >      _mlist;    
00168     Pattern                        *_pat;      
00169     std::string                                       _str;      
00170 
00171   public:
00172     OBSmartsPattern() : _pat(NULL) { }
00173     virtual ~OBSmartsPattern();
00174 
00175   OBSmartsPattern(const OBSmartsPattern& cp): _pat(NULL)
00176       {
00177         *this = cp;
00178       }
00179     OBSmartsPattern& operator=(const OBSmartsPattern& cp)
00180       {
00181         if (_pat)
00182           delete [] _pat;
00183         _pat = NULL;
00184         std::string s = cp._str;
00185         Init(s);
00186         return (*this);
00187       }
00188     
00190 
00191 
00192 
00193     bool         Init(const char* pattern);
00196     bool         Init(const std::string& pattern);
00198 
00200 
00201 
00202     const std::string &GetSMARTS() const    {      return _str;    }
00204     std::string  &GetSMARTS()               {      return _str;    }
00205 
00207     bool         Empty() const     {      return(_pat == NULL);    }
00209     bool         IsValid() const   {      return(_pat != NULL);    }
00210 
00212     unsigned int NumAtoms()   const
00213     {
00214       return _pat ? _pat->acount : 0;
00215     }
00217     unsigned int NumBonds()   const
00218     {
00219       return _pat ? _pat->bcount : 0;
00220     }
00221 
00227     void         GetBond(int& src,int& dst,int& ord,int idx);
00229     int          GetAtomicNum(int idx);
00231     int          GetCharge(int idx);
00232 
00234     int          GetVectorBinding(int idx) const
00235     {
00236       return(_pat->atom[idx].vb);
00237     }
00239 
00241 
00242 
00243 
00244 
00245 
00246     bool Match(OBMol &mol, bool single=false);
00247 
00248     bool RestrictedMatch(OBMol &mol, std::vector<std::pair<int,int> > &pairs, bool single=false);
00249 
00250     bool RestrictedMatch(OBMol &mol, OBBitVec &bv, bool single=false);
00253     unsigned int NumMatches() const
00254     {
00255       return static_cast<unsigned int>(_mlist.size());
00256     }
00257 
00260     std::vector<std::vector<int> > &GetMapList()
00261       {
00262         return(_mlist);
00263       }
00265     std::vector<std::vector<int> >::iterator BeginMList()
00266       {
00267         return(_mlist.begin());
00268       }
00270     std::vector<std::vector<int> >::iterator EndMList()
00271       {
00272         return(_mlist.end());
00273       }
00274 
00276 
00286     std::vector<std::vector<int> > &GetUMapList();
00288 
00290     void         WriteMapList(std::ostream&);
00291   };
00292 
00296   class OBAPI OBSSMatch
00297   {
00298   protected:
00299     bool        *_uatoms;
00300     OBMol       *_mol;
00301     Pattern     *_pat;
00302     std::vector<int>  _map;
00303 
00304   public:
00305     OBSSMatch(OBMol&,Pattern*);
00306     ~OBSSMatch();
00307     void Match(std::vector<std::vector<int> > &v, int bidx=-1);
00308   };
00309 
00310   OBAPI void SmartsLexReplace(std::string &,
00311                               std::vector<std::pair<std::string,std::string> > &);
00312 
00313 } // end namespace OpenBabel
00314 
00315 #endif // OB_PARSMART_H
00316