00001 /********************************************************************** 00002 parsmart.h - Daylight SMARTS parser. 00003 00004 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 00005 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison 00006 00007 This file is part of the Open Babel project. 00008 For more information, see <http://openbabel.org/> 00009 00010 This program is free software; you can redistribute it and/or modify 00011 it under the terms of the GNU General Public License as published by 00012 the Free Software Foundation version 2 of the License. 00013 00014 This program is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 GNU General Public License for more details. 00018 ***********************************************************************/ 00019 00020 #ifndef OB_PARSMART_H 00021 #define OB_PARSMART_H 00022 00023 #include <string> 00024 #include <vector> 00025 00026 #include <openbabel/babelconfig.h> 00027 #include <openbabel/mol.h> 00028 00029 /*==========================*/ 00030 /* SMARTS Data Structures */ 00031 /*==========================*/ 00032 00033 namespace OpenBabel 00034 { 00035 00036 // mark this so that SWIG will not attempt to wrap for scripting languages 00037 00038 #ifndef SWIG 00039 00042 typedef union _AtomExpr { 00043 int type; 00044 struct 00045 { 00046 int type; 00047 int prop; 00048 int value; 00049 } 00050 leaf; 00051 struct 00052 { 00053 int type; 00054 void *recur; 00055 } 00056 recur; 00057 struct 00058 { 00059 int type; 00060 union _AtomExpr *arg; 00061 } 00062 mon; 00063 struct 00064 { 00065 int type; 00066 union _AtomExpr *lft; 00067 union _AtomExpr *rgt; 00068 } 00069 bin; 00070 } AtomExpr; 00071 00072 #define BE_LEAF 0x01 00073 #define BE_ANDHI 0x02 00074 #define BE_ANDLO 0x03 00075 #define BE_NOT 0x04 00076 #define BE_OR 0x05 00077 00078 #define BL_CONST 0x01 00079 #define BL_TYPE 0x02 00080 00081 #define BT_SINGLE 0x01 00082 #define BT_DOUBLE 0x02 00083 #define BT_TRIPLE 0x03 00084 #define BT_AROM 0x04 00085 #define BT_UP 0x05 00086 #define BT_DOWN 0x06 00087 #define BT_UPUNSPEC 0x07 00088 #define BT_DOWNUNSPEC 0x08 00089 #define BT_RING 0x09 00090 #define BT_QUAD 0x0A //quadruple bond $ 00091 00094 typedef union _BondExpr { 00095 int type; 00096 struct 00097 { 00098 int type; 00099 int prop; 00100 int value; 00101 } 00102 leaf; 00103 struct 00104 { 00105 int type; 00106 union _BondExpr *arg; 00107 } 00108 mon; 00109 struct 00110 { 00111 int type; 00112 union _BondExpr *lft; 00113 union _BondExpr *rgt; 00114 } 00115 bin; 00116 } BondExpr; 00117 00120 typedef struct 00121 { 00122 BondExpr *expr; 00123 int src,dst; 00124 int visit; 00125 bool grow; 00126 } 00127 BondSpec; 00128 00131 typedef struct 00132 { 00133 AtomExpr *expr; 00134 int visit; 00135 int part; 00136 int chiral_flag; 00137 int vb; 00138 } 00139 AtomSpec; 00140 00143 typedef struct 00144 { 00145 int aalloc,acount; 00146 int balloc,bcount; 00147 bool ischiral; 00148 AtomSpec *atom; 00149 BondSpec *bond; 00150 int parts; 00151 bool hasExplicitH; 00152 std::vector<int> bond_parse_order; // Used to recover the order in which bonds were parsed 00153 } 00154 Pattern; 00155 00158 typedef struct 00159 { 00160 BondExpr *closord[100]; 00161 int closure[100]; 00162 int closindex; 00163 } ParseState; 00164 00165 #else 00166 // for SWIG, just forward declare that we have some Pattern struct 00167 // (but this is private and not wrapped for scripting languages) 00168 struct Pattern; 00169 #endif 00170 00172 class OBSmartsPrivate; 00173 00176 00177 // class introduction in parsmart.cpp 00179 class OBAPI OBSmartsPattern 00180 { 00181 protected: 00182 OBSmartsPrivate *_d; 00183 std::vector<bool> _growbond; 00184 std::vector<std::vector<int> > _mlist; 00185 Pattern *_pat; 00186 std::string _str; 00187 00188 char *_buffer; 00189 char *LexPtr; 00190 char *MainPtr; 00191 00192 Pattern *ParseSMARTSPattern( void ); 00193 Pattern *ParseSMARTSPart( Pattern*, int ); 00194 Pattern *SMARTSError( Pattern *pat ); 00195 Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr ); 00196 AtomExpr *ParseSimpleAtomPrimitive( void ); 00197 AtomExpr *ParseComplexAtomPrimitive( void ); 00198 AtomExpr *ParseAtomExpr( int level ); 00199 BondExpr *ParseBondPrimitive( void ); 00200 BondExpr *ParseBondExpr( int level ); 00201 Pattern *ParseSMARTSString( char *ptr ); 00202 Pattern *ParseSMARTSRecord( char *ptr ); 00203 int GetVectorBinding(); 00204 Pattern *SMARTSParser( Pattern *pat, ParseState *stat, 00205 int prev, int part ); 00206 00207 public: 00208 OBSmartsPattern() : _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL) { } 00209 virtual ~OBSmartsPattern(); 00210 00211 OBSmartsPattern(const OBSmartsPattern& cp): _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL) 00212 { 00213 *this = cp; 00214 } 00215 00216 OBSmartsPattern& operator=(const OBSmartsPattern& cp) 00217 { 00218 if (this == &cp) 00219 return *this; 00220 00221 if (_pat) 00222 delete[] _pat; 00223 if (_buffer) 00224 delete[] _buffer; 00225 _buffer = NULL; 00226 _pat = NULL; 00227 std::string s = cp._str; 00228 Init(s); 00229 return (*this); 00230 } 00231 00232 00234 00235 00236 00237 bool Init(const char* pattern); 00240 bool Init(const std::string& pattern); 00242 00244 00245 00246 const std::string &GetSMARTS() const { return _str; } 00248 std::string &GetSMARTS() { return _str; } 00249 00251 bool Empty() const { return(_pat == NULL); } 00253 bool IsValid() const { return(_pat != NULL); } 00254 00256 unsigned int NumAtoms() const 00257 { 00258 return _pat ? _pat->acount : 0; 00259 } 00261 unsigned int NumBonds() const 00262 { 00263 return _pat ? _pat->bcount : 0; 00264 } 00265 00271 void GetBond(int& src,int& dst,int& ord,int idx); 00273 int GetAtomicNum(int idx); 00275 int GetCharge(int idx); 00276 00278 int GetVectorBinding(int idx) const 00279 { 00280 return(_pat->atom[idx].vb); 00281 } 00283 00284 // number and kind of matches to return 00285 enum MatchType {All, Single, AllUnique}; 00286 00288 00289 00290 00291 00292 00293 bool Match(OBMol &mol, bool single=false); 00294 00296 00297 00298 00299 00300 00301 00302 00303 bool Match(OBMol &mol, std::vector<std::vector<int> > & mlist, MatchType mtype = All) const; 00304 00306 00307 00308 00309 00310 bool HasMatch(OBMol &mol) const; 00311 00312 bool RestrictedMatch(OBMol &mol, std::vector<std::pair<int,int> > &pairs, bool single=false); 00313 00314 bool RestrictedMatch(OBMol &mol, OBBitVec &bv, bool single=false); 00317 unsigned int NumMatches() const 00318 { 00319 return static_cast<unsigned int>(_mlist.size()); 00320 } 00321 00324 std::vector<std::vector<int> > &GetMapList() 00325 { 00326 return(_mlist); 00327 } 00329 std::vector<std::vector<int> >::iterator BeginMList() 00330 { 00331 return(_mlist.begin()); 00332 } 00334 std::vector<std::vector<int> >::iterator EndMList() 00335 { 00336 return(_mlist.end()); 00337 } 00338 00340 00350 std::vector<std::vector<int> > &GetUMapList(); 00352 00354 void WriteMapList(std::ostream&); 00355 }; 00356 00358 00362 class OBAPI OBSmartsMatcher 00363 { 00364 protected: 00365 //recursive smarts cache 00366 std::vector<std::pair<const Pattern*,std::vector<bool> > > RSCACHE; 00367 // list of fragment patterns (e.g., (*).(*) 00368 std::vector<const Pattern*> Fragments; 00369 /* 00370 bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom); 00371 bool EvalBondExpr(BondExpr *expr,OBBond *bond); 00372 int GetVectorBinding(); 00373 int CreateAtom(Pattern*,AtomExpr*,int,int vb=0); 00374 */ 00375 bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom); 00376 bool EvalBondExpr(BondExpr *expr,OBBond *bond); 00377 void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab, 00378 const Pattern *pat, OBMol &mol); 00379 void FastSingleMatch(OBMol &mol,const Pattern *pat, 00380 std::vector<std::vector<int> > &mlist); 00381 00382 friend class OBSSMatch; 00383 public: 00384 OBSmartsMatcher() {} 00385 virtual ~OBSmartsMatcher() {} 00386 00387 bool match(OBMol &mol, const Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false); 00388 00389 }; 00390 00394 class OBAPI OBSSMatch 00395 { 00396 protected: 00397 bool *_uatoms; 00398 OBMol *_mol; 00399 const Pattern *_pat; 00400 std::vector<int> _map; 00401 00402 public: 00403 OBSSMatch(OBMol&,const Pattern*); 00404 ~OBSSMatch(); 00405 void Match(std::vector<std::vector<int> > &v, int bidx=-1); 00406 }; 00407 00408 OBAPI void SmartsLexReplace(std::string &, 00409 std::vector<std::pair<std::string,std::string> > &); 00410 00411 } // end namespace OpenBabel 00412 00413 #endif // OB_PARSMART_H 00414
This file is part of the documentation for Open Babel, version 2.3.