parsmart.h
Go to the documentation of this file.
00001 /********************************************************************** 00002 parsmart.h - Daylight SMARTS parser. 00003 00004 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 00005 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison 00006 00007 This file is part of the Open Babel project. 00008 For more information, see <http://openbabel.org/> 00009 00010 This program is free software; you can redistribute it and/or modify 00011 it under the terms of the GNU General Public License as published by 00012 the Free Software Foundation version 2 of the License. 00013 00014 This program is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 GNU General Public License for more details. 00018 ***********************************************************************/ 00019 00020 #ifndef OB_PARSMART_H 00021 #define OB_PARSMART_H 00022 00023 #include <string> 00024 #include <vector> 00025 00026 #include <openbabel/babelconfig.h> 00027 #include <openbabel/mol.h> 00028 00029 /*==========================*/ 00030 /* SMARTS Data Structures */ 00031 /*==========================*/ 00032 00033 namespace OpenBabel 00034 { 00035 00036 // mark this so that SWIG will not attempt to wrap for scripting languages 00037 00038 #ifndef SWIG 00039 00042 typedef union _AtomExpr { 00043 int type; 00044 struct 00045 { 00046 int type; 00047 int value; 00048 } 00049 leaf; 00050 struct 00051 { 00052 int type; 00053 void *recur; 00054 } 00055 recur; 00056 struct 00057 { 00058 int type; 00059 union _AtomExpr *arg; 00060 } 00061 mon; 00062 struct 00063 { 00064 int type; 00065 union _AtomExpr *lft; 00066 union _AtomExpr *rgt; 00067 } 00068 bin; 00069 } AtomExpr; 00070 00073 typedef union _BondExpr { 00074 int type; 00075 struct 00076 { 00077 int type; 00078 union _BondExpr *arg; 00079 } 00080 mon; 00081 struct 00082 { 00083 int type; 00084 union _BondExpr *lft; 00085 union _BondExpr *rgt; 00086 } 00087 bin; 00088 } BondExpr; 00089 00092 typedef struct 00093 { 00094 BondExpr *expr; 00095 int src,dst; 00096 int visit; 00097 bool grow; 00098 } 00099 BondSpec; 00100 00103 typedef struct 00104 { 00105 AtomExpr *expr; 00106 int visit; 00107 int part; 00108 int chiral_flag; 00109 int vb; 00110 std::vector<int> nbrs; 00111 } 00112 AtomSpec; 00113 00116 typedef struct 00117 { 00118 int aalloc,acount; 00119 int balloc,bcount; 00120 bool ischiral; 00121 AtomSpec *atom; 00122 BondSpec *bond; 00123 int parts; 00124 bool hasExplicitH; 00125 } 00126 Pattern; 00127 00130 typedef struct 00131 { 00132 BondExpr *closord[100]; 00133 int closure[100]; 00134 int closindex; 00135 } ParseState; 00136 00137 #else 00138 // for SWIG, just forward declare that we have some Pattern struct 00139 // (but this is private and not wrapped for scripting languages) 00140 struct Pattern; 00141 #endif 00142 00144 class OBSmartsPrivate; 00145 00148 00149 // class introduction in parsmart.cpp 00151 class OBAPI OBSmartsPattern 00152 { 00153 protected: 00154 OBSmartsPrivate *_d; 00155 std::vector<bool> _growbond; 00156 std::vector<std::vector<int> > _mlist; 00157 Pattern *_pat; 00158 std::string _str; 00159 00160 char *_buffer; 00161 char *LexPtr; 00162 char *MainPtr; 00163 00164 Pattern *ParseSMARTSPattern( void ); 00165 Pattern *ParseSMARTSPart( Pattern*, int ); 00166 Pattern *SMARTSError( Pattern *pat ); 00167 Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr ); 00168 AtomExpr *ParseSimpleAtomPrimitive( void ); 00169 AtomExpr *ParseComplexAtomPrimitive( void ); 00170 AtomExpr *ParseAtomExpr( int level ); 00171 BondExpr *ParseBondPrimitive( void ); 00172 BondExpr *ParseBondExpr( int level ); 00173 Pattern *ParseSMARTSString( char *ptr ); 00174 Pattern *ParseSMARTSRecord( char *ptr ); 00175 int GetVectorBinding(); 00176 Pattern *SMARTSParser( Pattern *pat, ParseState *stat, 00177 int prev, int part ); 00178 00179 public: 00180 OBSmartsPattern() : _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL) { } 00181 virtual ~OBSmartsPattern(); 00182 00183 OBSmartsPattern(const OBSmartsPattern& cp): _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL) 00184 { 00185 *this = cp; 00186 } 00187 00188 OBSmartsPattern& operator=(const OBSmartsPattern& cp) 00189 { 00190 if (this == &cp) 00191 return *this; 00192 00193 if (_pat) 00194 delete[] _pat; 00195 if (_buffer) 00196 delete[] _buffer; 00197 _buffer = NULL; 00198 _pat = NULL; 00199 std::string s = cp._str; 00200 Init(s); 00201 return (*this); 00202 } 00203 00204 00206 00207 00208 00209 bool Init(const char* pattern); 00212 bool Init(const std::string& pattern); 00214 00216 00217 00218 const std::string &GetSMARTS() const { return _str; } 00220 #ifndef SWIG 00221 std::string &GetSMARTS() { return _str; } 00222 #endif 00223 00224 bool Empty() const { return(_pat == NULL); } 00226 bool IsValid() const { return(_pat != NULL); } 00227 00229 unsigned int NumAtoms() const 00230 { 00231 return _pat ? _pat->acount : 0; 00232 } 00234 unsigned int NumBonds() const 00235 { 00236 return _pat ? _pat->bcount : 0; 00237 } 00238 00244 void GetBond(int& src,int& dst,int& ord,int idx); 00246 int GetAtomicNum(int idx); 00248 int GetCharge(int idx); 00249 00251 int GetVectorBinding(int idx) const 00252 { 00253 return(_pat->atom[idx].vb); 00254 } 00256 00257 // number and kind of matches to return 00258 enum MatchType {All, Single, AllUnique}; 00259 00261 00262 00263 00264 00265 00266 bool Match(OBMol &mol, bool single=false); 00267 00269 00270 00271 00272 00273 00274 00275 00276 bool Match(OBMol &mol, std::vector<std::vector<int> > & mlist, MatchType mtype = All) const; 00277 00279 00280 00281 00282 00283 bool HasMatch(OBMol &mol) const; 00284 00285 bool RestrictedMatch(OBMol &mol, std::vector<std::pair<int,int> > &pairs, bool single=false); 00286 00287 bool RestrictedMatch(OBMol &mol, OBBitVec &bv, bool single=false); 00290 unsigned int NumMatches() const 00291 { 00292 return static_cast<unsigned int>(_mlist.size()); 00293 } 00294 00297 std::vector<std::vector<int> > &GetMapList() 00298 { 00299 return(_mlist); 00300 } 00302 std::vector<std::vector<int> >::iterator BeginMList() 00303 { 00304 return(_mlist.begin()); 00305 } 00307 std::vector<std::vector<int> >::iterator EndMList() 00308 { 00309 return(_mlist.end()); 00310 } 00311 00313 00323 std::vector<std::vector<int> > &GetUMapList(); 00325 00327 void WriteMapList(std::ostream&); 00328 }; 00329 00331 00335 class OBAPI OBSmartsMatcher 00336 { 00337 protected: 00338 //recursive smarts cache 00339 std::vector<std::pair<const Pattern*,std::vector<bool> > > RSCACHE; 00340 // list of fragment patterns (e.g., (*).(*) 00341 std::vector<const Pattern*> Fragments; 00342 /* 00343 bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom); 00344 bool EvalBondExpr(BondExpr *expr,OBBond *bond); 00345 int GetVectorBinding(); 00346 int CreateAtom(Pattern*,AtomExpr*,int,int vb=0); 00347 */ 00348 bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom); 00349 bool EvalBondExpr(BondExpr *expr,OBBond *bond); 00350 void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab, 00351 const Pattern *pat, OBMol &mol); 00352 void FastSingleMatch(OBMol &mol,const Pattern *pat, 00353 std::vector<std::vector<int> > &mlist); 00354 00355 friend class OBSSMatch; 00356 public: 00357 OBSmartsMatcher() {} 00358 virtual ~OBSmartsMatcher() {} 00359 00360 bool match(OBMol &mol, const Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false); 00361 00362 }; 00363 00367 class OBAPI OBSSMatch 00368 { 00369 protected: 00370 bool *_uatoms; 00371 OBMol *_mol; 00372 const Pattern *_pat; 00373 std::vector<int> _map; 00374 00375 public: 00376 OBSSMatch(OBMol&,const Pattern*); 00377 ~OBSSMatch(); 00378 void Match(std::vector<std::vector<int> > &v, int bidx=-1); 00379 }; 00380 00381 OBAPI void SmartsLexReplace(std::string &, 00382 std::vector<std::pair<std::string,std::string> > &); 00383 00384 } // end namespace OpenBabel 00385 00386 #endif // OB_PARSMART_H 00387


