Open Babel  3.0
parsmart.h
Go to the documentation of this file.
1 /**********************************************************************
2 parsmart.h - Daylight SMARTS parser.
3 
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison
6 
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.org/>
9 
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 
20 #ifndef OB_PARSMART_H
21 #define OB_PARSMART_H
22 
23 #include <string>
24 #include <vector>
25 
26 #include <openbabel/babelconfig.h>
27 
28 /*==========================*/
29 /* SMARTS Data Structures */
30 /*==========================*/
31 
32 namespace OpenBabel
33 {
34  class OBMol;
35  class OBAtom;
36  class OBBond;
37  class OBBitVec;
38 
39  // mark this so that SWIG will not attempt to wrap for scripting languages
40 
41 #ifndef SWIG
42 
45  typedef union _AtomExpr {
46  int type;
47  struct
48  {
49  int type;
50  int value;
51  }
52  leaf;
53  struct
54  {
55  int type;
56  void *recur;
57  }
58  recur;
59  struct
60  {
61  int type;
62  union _AtomExpr *arg;
63  }
64  mon;
65  struct
66  {
67  int type;
68  union _AtomExpr *lft;
69  union _AtomExpr *rgt;
70  }
71  bin;
72  } AtomExpr;
73 
76  typedef union _BondExpr {
77  int type;
78  struct
79  {
80  int type;
81  union _BondExpr *arg;
82  }
83  mon;
84  struct
85  {
86  int type;
87  union _BondExpr *lft;
88  union _BondExpr *rgt;
89  }
90  bin;
91  } BondExpr;
92 
95  typedef struct
96  {
98  int src,dst;
99  int visit;
100  bool grow;
101  }
102  BondSpec;
103 
106  typedef struct
107  {
109  int visit;
110  int part;
112  int vb;
113  std::vector<int> nbrs;
114  }
115  AtomSpec;
116 
119  typedef struct
120  {
121  int aalloc,acount;
122  int balloc,bcount;
123  bool ischiral;
126  int parts;
128  }
129  Pattern;
130 
133  typedef struct
134  {
135  BondExpr *closord[100];
136  int closure[100];
138  } ParseState;
139 
140 #else
141  // for SWIG, just forward declare that we have some Pattern struct
142  // (but this is private and not wrapped for scripting languages)
143  struct Pattern;
144 #endif
145 
147  class OBSmartsPrivate;
148 
151 
152  // class introduction in parsmart.cpp
154  class OBAPI OBSmartsPattern
155  {
156  protected:
157  OBSmartsPrivate *_d;
158  std::vector<bool> _growbond;
159  std::vector<std::vector<int> > _mlist;
161  std::string _str;
162 
163  char *_buffer;
164  char *LexPtr;
165  char *MainPtr;
166 
167  Pattern *ParseSMARTSPattern( void );
168  Pattern *ParseSMARTSPart( Pattern*, int );
169  Pattern *SMARTSError( Pattern *pat );
170  Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr );
171  AtomExpr *ParseSimpleAtomPrimitive( void );
172  AtomExpr *ParseComplexAtomPrimitive( void );
173  AtomExpr *ParseAtomExpr( int level );
174  BondExpr *ParseBondPrimitive( void );
175  BondExpr *ParseBondExpr( int level );
176  Pattern *ParseSMARTSString( char *ptr );
177  Pattern *ParseSMARTSRecord( char *ptr );
178  int GetVectorBinding();
179  Pattern *SMARTSParser( Pattern *pat, ParseState *stat,
180  int prev, int part );
181 
182  public:
183  OBSmartsPattern() : _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL) { }
184  virtual ~OBSmartsPattern();
185 
186  OBSmartsPattern(const OBSmartsPattern& cp): _pat(NULL), _buffer(NULL), LexPtr(NULL), MainPtr(NULL)
187  {
188  *this = cp;
189  }
190 
192  {
193  if (this == &cp)
194  return *this;
195 
196  if (_pat)
197  delete[] _pat;
198  if (_buffer)
199  delete[] _buffer;
200  _buffer = NULL;
201  _pat = NULL;
202  std::string s = cp._str;
203  Init(s);
204  return (*this);
205  }
206 
207 
209 
210  bool Init(const char* pattern);
215  bool Init(const std::string& pattern);
217 
219 
220  const std::string &GetSMARTS() const { return _str; }
223 #ifndef SWIG
224  std::string &GetSMARTS() { return _str; }
225 #endif
226  bool Empty() const { return(_pat == NULL); }
229  bool IsValid() const { return(_pat != NULL); }
230 
232  unsigned int NumAtoms() const
233  {
234  return _pat ? _pat->acount : 0;
235  }
237  unsigned int NumBonds() const
238  {
239  return _pat ? _pat->bcount : 0;
240  }
241 
247  void GetBond(int& src,int& dst,int& ord,int idx);
249  int GetAtomicNum(int idx);
251  int GetCharge(int idx);
252 
254  int GetVectorBinding(int idx) const
255  {
256  return(_pat->atom[idx].vb);
257  }
259 
260  // number and kind of matches to return
261  enum MatchType {All, Single, AllUnique};
262 
264 
265  bool Match(OBMol &mol, bool single=false);
270 
272 
273  bool Match(OBMol &mol, std::vector<std::vector<int> > & mlist, MatchType mtype = All) const;
280 
282 
283  bool HasMatch(OBMol &mol) const;
287 
288  bool RestrictedMatch(OBMol &mol, std::vector<std::pair<int,int> > &pairs, bool single=false);
289 
290  bool RestrictedMatch(OBMol &mol, OBBitVec &bv, bool single=false);
293  unsigned int NumMatches() const
294  {
295  return static_cast<unsigned int>(_mlist.size());
296  }
297 
300  std::vector<std::vector<int> > &GetMapList()
301  {
302  return(_mlist);
303  }
305  std::vector<std::vector<int> >::iterator BeginMList()
306  {
307  return(_mlist.begin());
308  }
310  std::vector<std::vector<int> >::iterator EndMList()
311  {
312  return(_mlist.end());
313  }
314 
316 
326  std::vector<std::vector<int> > &GetUMapList();
328 
330  void WriteMapList(std::ostream&);
331  };
332 
334 
338  class OBAPI OBSmartsMatcher
339  {
340  protected:
341  //recursive smarts cache
342  std::vector<std::pair<const Pattern*,std::vector<bool> > > RSCACHE;
343  // list of fragment patterns (e.g., (*).(*)
344  std::vector<const Pattern*> Fragments;
345  /*
346  bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom);
347  bool EvalBondExpr(BondExpr *expr,OBBond *bond);
348  int GetVectorBinding();
349  int CreateAtom(Pattern*,AtomExpr*,int,int vb=0);
350  */
351  bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom);
352  bool EvalBondExpr(BondExpr *expr,OBBond *bond);
353  void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab,
354  const Pattern *pat, OBMol &mol);
355  void FastSingleMatch(OBMol &mol,const Pattern *pat,
356  std::vector<std::vector<int> > &mlist);
357 
358  friend class OBSSMatch;
359  public:
361  virtual ~OBSmartsMatcher() {}
362 
363  bool match(OBMol &mol, const Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false);
364 
365  };
366 
370  class OBAPI OBSSMatch
371  {
372  protected:
373  bool *_uatoms;
375  const Pattern *_pat;
376  std::vector<int> _map;
377 
378  public:
379  OBSSMatch(OBMol&,const Pattern*);
380  ~OBSSMatch();
381  void Match(std::vector<std::vector<int> > &v, int bidx=-1);
382  };
383 
384  OBAPI void SmartsLexReplace(std::string &,
385  std::vector<std::pair<std::string,std::string> > &);
386 
387 } // end namespace OpenBabel
388 
389 #endif // OB_PARSMART_H
390 
void SmartsLexReplace(std::string &, std::vector< std::pair< std::string, std::string > > &)
unsigned int NumMatches() const
Definition: parsmart.h:293
union _AtomExpr * arg
Definition: parsmart.h:62
MatchType
Definition: parsmart.h:261
bool hasExplicitH
Definition: parsmart.h:127
unsigned int GetAtomicNum(const char *ptr)
Definition: elements.cpp:174
union _BondExpr * rgt
Definition: parsmart.h:88
OBSmartsMatcher()
Definition: parsmart.h:360
char * LexPtr
Definition: parsmart.h:164
int part
Definition: parsmart.h:110
bool * _uatoms
Definition: parsmart.h:373
OBSmartsPrivate * _d
Internal data storage for future expansion.
Definition: parsmart.h:157
A SMARTS parser internal pattern.
Definition: parsmart.h:119
bool IsValid() const
Definition: parsmart.h:229
unsigned int NumAtoms() const
Definition: parsmart.h:232
unsigned int NumBonds() const
Definition: parsmart.h:237
An internal (SMARTS parser) atom specification.
Definition: parsmart.h:106
Pattern * _pat
The parsed SMARTS pattern.
Definition: parsmart.h:160
const Pattern * _pat
Definition: parsmart.h:375
std::string _str
The string of the SMARTS expression.
Definition: parsmart.h:161
union OpenBabel::_BondExpr BondExpr
std::vector< int > _map
Definition: parsmart.h:376
OBSmartsPattern()
Definition: parsmart.h:183
Bond class.
Definition: bond.h:58
struct OpenBabel::_AtomExpr::@5 bin
union _AtomExpr * rgt
Definition: parsmart.h:69
struct OpenBabel::_AtomExpr::@2 leaf
Molecule Class.
Definition: mol.h:118
char * MainPtr
Definition: parsmart.h:165
int type
Definition: parsmart.h:77
AtomExpr * expr
Definition: parsmart.h:108
bool ischiral
Definition: parsmart.h:123
void * recur
Definition: parsmart.h:56
An internal (SMARTS parser) bond expression.
Definition: parsmart.h:76
An internal (SMARTS parser) atomic expression.
Definition: parsmart.h:45
int closindex
Definition: parsmart.h:137
Internal class: performs fast, exhaustive matching used to find just a single match in match() using ...
Definition: parsmart.h:370
A speed-optimized vector of bits.
Definition: bitvec.h:57
virtual ~OBSmartsMatcher()
Definition: parsmart.h:361
std::vector< std::pair< const Pattern *, std::vector< bool > > > RSCACHE
Definition: parsmart.h:342
bool grow
Definition: parsmart.h:100
int type
Definition: parsmart.h:46
SMARTS (SMiles ARbitrary Target Specification) substructure searching.
Definition: parsmart.h:154
int GetVectorBinding(int idx) const
Definition: parsmart.h:254
OBTypeTable ttab
Definition: data.h:225
union _AtomExpr * lft
Definition: parsmart.h:68
std::vector< int > nbrs
Definition: parsmart.h:113
BondSpec * bond
Definition: parsmart.h:125
int visit
Definition: parsmart.h:99
std::string & GetSMARTS()
Definition: parsmart.h:224
std::vector< std::vector< int > > _mlist
The list of matches.
Definition: parsmart.h:159
std::vector< std::vector< int > > & GetMapList()
Definition: parsmart.h:300
union _BondExpr * lft
Definition: parsmart.h:87
std::vector< std::vector< int > >::iterator EndMList()
Definition: parsmart.h:310
int value
Definition: parsmart.h:50
int src
Definition: parsmart.h:98
struct OpenBabel::_AtomExpr::@4 mon
AtomSpec * atom
Definition: parsmart.h:124
OBSmartsPattern & operator=(const OBSmartsPattern &cp)
Definition: parsmart.h:191
OBMol * _mol
Definition: parsmart.h:374
An internal (SMARTS parser) bond specification.
Definition: parsmart.h:95
Definition: parsmart.h:261
std::vector< std::vector< int > >::iterator BeginMList()
Definition: parsmart.h:305
int bcount
Definition: parsmart.h:122
char * _buffer
Definition: parsmart.h:163
OBSmartsPattern(const OBSmartsPattern &cp)
Definition: parsmart.h:186
union OpenBabel::_AtomExpr AtomExpr
A SMARTS parser internal state.
Definition: parsmart.h:133
std::vector< bool > _growbond
Definition: parsmart.h:158
BondExpr * expr
Definition: parsmart.h:97
std::vector< const Pattern * > Fragments
Definition: parsmart.h:344
int parts
Definition: parsmart.h:126
int acount
Definition: parsmart.h:121
int visit
Definition: parsmart.h:109
union _BondExpr * arg
Definition: parsmart.h:81
Internal class: performs matching; a wrapper around previous C matching code to make it thread safe...
Definition: parsmart.h:338
int chiral_flag
Definition: parsmart.h:111
int vb
Definition: parsmart.h:112
Global namespace for all Open Babel code.
Definition: alias.h:22
Atom class.
Definition: atom.h:71