parsmart.h

Go to the documentation of this file.
00001 /**********************************************************************
00002 parsmart.h - Daylight SMARTS parser.
00003  
00004 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
00005 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
00006  
00007 This file is part of the Open Babel project.
00008 For more information, see <http://openbabel.sourceforge.net/>
00009  
00010 This program is free software; you can redistribute it and/or modify
00011 it under the terms of the GNU General Public License as published by
00012 the Free Software Foundation version 2 of the License.
00013  
00014 This program is distributed in the hope that it will be useful,
00015 but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 GNU General Public License for more details.
00018 ***********************************************************************/
00019 
00020 #ifndef OB_PARSMART_H
00021 #define OB_PARSMART_H
00022 
00023 #include <string>
00024 #include <vector>
00025 
00026 #include "mol.h"
00027 
00028 /*==========================*/
00029 /*  SMARTS Data Structures  */
00030 /*==========================*/
00031 
00032 #define AE_LEAF      0x01
00033 #define AE_RECUR     0x02
00034 #define AE_NOT       0x03
00035 #define AE_ANDHI     0x04
00036 #define AE_OR        0x05
00037 #define AE_ANDLO     0x06
00038 
00039 #define AL_CONST     0x01
00040 #define AL_MASS      0x02
00041 #define AL_AROM      0x03
00042 #define AL_ELEM      0x04
00043 #define AL_HCOUNT    0x05
00044 #define AL_NEGATIVE  0x06
00045 #define AL_POSITIVE  0x07
00046 #define AL_CONNECT   0x08
00047 #define AL_DEGREE    0x09
00048 #define AL_IMPLICIT  0x0a
00049 #define AL_RINGS     0x0b
00050 #define AL_SIZE      0x0c
00051 #define AL_VALENCE   0x0d
00052 #define AL_CHIRAL    0x0e
00053 #define AL_HYB       0x0f
00054 #define AL_CLOCKWISE     1
00055 #define AL_ANTICLOCKWISE 2
00056 
00057 namespace OpenBabel
00058 {
00059 
00060   // mark this so that SWIG will not attempt to wrap for scripting languages
00061 
00062 #ifndef SWIG
00063 
00065 typedef union _AtomExpr {
00066     int type;
00067     struct
00068     {
00069         int type;
00070         int prop;
00071         int value;
00072     }
00073     leaf;
00074     struct
00075     {
00076         int type;
00077         void *recur;
00078     }
00079     recur;
00080     struct
00081     {
00082         int type;
00083         union _AtomExpr *arg;
00084     }
00085     mon;
00086     struct
00087     {
00088         int type;
00089         union _AtomExpr *lft;
00090         union _AtomExpr *rgt;
00091     }
00092     bin;
00093 } AtomExpr;
00094 
00095 #define BE_LEAF      0x01
00096 #define BE_ANDHI     0x02
00097 #define BE_ANDLO     0x03
00098 #define BE_NOT       0x04
00099 #define BE_OR        0x05
00100 
00101 #define BL_CONST     0x01
00102 #define BL_TYPE      0x02
00103 
00104 #define BT_SINGLE     0x01
00105 #define BT_DOUBLE     0x02
00106 #define BT_TRIPLE     0x03
00107 #define BT_AROM       0x04
00108 #define BT_UP         0x05
00109 #define BT_DOWN       0x06
00110 #define BT_UPUNSPEC   0x07
00111 #define BT_DOWNUNSPEC 0x08
00112 #define BT_RING       0x09
00113 
00115 typedef union _BondExpr {
00116     int type;
00117     struct
00118     {
00119         int type;
00120         int prop;
00121         int value;
00122     }
00123     leaf;
00124     struct
00125     {
00126         int type;
00127         union _BondExpr *arg;
00128     }
00129     mon;
00130     struct
00131     {
00132         int type;
00133         union _BondExpr *lft;
00134         union _BondExpr *rgt;
00135     }
00136     bin;
00137 } BondExpr;
00138 
00140 typedef struct
00141 {
00142     BondExpr *expr;
00143     int src,dst;
00144     int visit;
00145     bool grow;
00146 }
00147 BondSpec;
00148 
00150 typedef struct
00151 {
00152     AtomExpr *expr;
00153     int visit;
00154     int part;
00155     int chiral_flag;
00156     int vb;
00157 }
00158 AtomSpec;
00159 
00161 typedef struct
00162 {
00163     int aalloc,acount;
00164     int balloc,bcount;
00165     bool ischiral;
00166     AtomSpec *atom;
00167     BondSpec *bond;
00168     int parts;
00169 }
00170 Pattern;
00171 #else
00172 // for SWIG, just forward declare that we have some Pattern struct
00173 // (but this is private and not wrapped for scripting languages)
00174 struct Pattern;
00175 #endif
00176 
00177 // class introduction in parsmart.cpp
00179 class OBAPI OBSmartsPattern
00180 {
00181 protected:
00182     std::vector<bool>                   _growbond;
00183     std::vector<std::vector<int> >      _mlist;
00184     Pattern                             *_pat;
00185     std::string                         _str;
00186 
00187 public:
00188     OBSmartsPattern()
00189     {
00190         _pat=NULL;
00191     }
00192     virtual ~OBSmartsPattern();
00193 
00194     OBSmartsPattern(const OBSmartsPattern& cp)
00195     {
00196         _pat = NULL;
00197         *this = cp;
00198     }
00199     OBSmartsPattern& operator=(const OBSmartsPattern& cp)
00200     {
00201         if (_pat)
00202             delete [] _pat;
00203         _pat = NULL;
00204         std::string s = cp._str;
00205         Init(s);
00206         return (*this);
00207     }
00208 
00209     unsigned int NumMatches() const
00210     {
00211         return (unsigned int)_mlist.size();
00212     }
00213     unsigned int NumAtoms()   const
00214     {
00215         return _pat ? _pat->acount : 0;
00216     }
00217     unsigned int NumBonds()   const
00218     {
00219         return _pat ? _pat->bcount : 0;
00220     }
00221 
00222     int          GetAtomicNum(int);
00223     void         GetBond(int&,int&,int&,int);
00224     int          GetCharge(int);
00225     const std::string &GetSMARTS() const
00226     {
00227         return _str;
00228     }
00229     std::string  &GetSMARTS()
00230     {
00231         return _str;
00232     }
00233     int          GetVectorBinding(int idx) const
00234     {
00235         return(_pat->atom[idx].vb);
00236     }
00237     bool         Empty()                   const
00238     {
00239         return(_pat == NULL);
00240     }
00241     bool         IsValid()                 const
00242     {
00243         return(_pat != NULL);
00244     }
00245     bool         Init(const char*);
00246     bool         Init(const std::string&);
00247     void         WriteMapList(std::ostream&);
00248 
00249     bool Match(OBMol &mol, bool single=false);
00250     bool RestrictedMatch(OBMol &mol, std::vector<std::pair<int,int> > &pairs, bool single=false);
00251     bool RestrictedMatch(OBMol &mol, OBBitVec &bv, bool single=false);
00252 
00253     std::vector<std::vector<int> > &GetMapList()
00254     {
00255         return(_mlist);
00256     }
00257     std::vector<std::vector<int> > &GetUMapList();
00258     std::vector<std::vector<int> >::iterator BeginMList()
00259     {
00260         return(_mlist.begin());
00261     }
00262     std::vector<std::vector<int> >::iterator EndMList()
00263     {
00264         return(_mlist.end());
00265     }
00266 };
00267 
00269 class OBAPI OBSSMatch //used for fast exhaustive matching
00270 {
00271 protected:
00272     bool        *_uatoms;
00273     OBMol       *_mol;
00274     Pattern     *_pat;
00275     std::vector<int>  _map;
00276 
00277 public:
00278     OBSSMatch(OBMol&,Pattern*);
00279     ~OBSSMatch();
00280     void Match(std::vector<std::vector<int> > &v, int bidx=-1);
00281 };
00282 
00283 void SmartsLexReplace(std::string &,
00284                       std::vector<std::pair<std::string,std::string> > &);
00285 
00286 } // end namespace OpenBabel
00287 
00288 #endif // OB_PARSMART_H
00289