Difference between revisions of "Article:kdr06"

From Open Babel
Jump to: navigation, search
 
Line 7: Line 7:
 
   pages = {2432--2444},
 
   pages = {2432--2444},
 
   abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps.
 
   abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps.
In the first step, a typically large set of fingerprints, or  
+
In the first step, a typically large set of fingerprints, or  
fragments of interest, is constructed (either by hand or  
+
fragments of interest, is constructed (either by hand or  
by some recent data mining techniques). In the second step,  
+
by some recent data mining techniques). In the second step,  
machine learning techniques are applied to obtain a  
+
machine learning techniques are applied to obtain a  
predictive model. The result is often not only a highly accurate
+
predictive model. The result is often not only a highly accurate
 
  but also hard to interpret model. In this paper, we demonstrate
 
  but also hard to interpret model. In this paper, we demonstrate
 
  the capabilities of a novel SAR algorithm, SMIREP, which tightly
 
  the capabilities of a novel SAR algorithm, SMIREP, which tightly
Line 18: Line 18:
 
  These rules contain SMILES fragments, which are easy to understand
 
  These rules contain SMILES fragments, which are easy to understand
 
  to the computational chemist. SMIREP combines ideas from the  
 
  to the computational chemist. SMIREP combines ideas from the  
well-known IREP rule learner with a novel fragmentation algorithm
+
well-known IREP rule learner with a novel fragmentation algorithm
 
  for SMILES strings. SMIREP has been evaluated on three problems:
 
  for SMILES strings. SMIREP has been evaluated on three problems:
 
  the prediction of binding activities for the estrogen receptor
 
  the prediction of binding activities for the estrogen receptor

Latest revision as of 20:07, 28 December 2006

@article{kdr06, 
  author = {A. Karwath and L. De Raedt}, 
  title = {SMIREP: Predicting Chemical Activity from SMILES}, 
  journal = {Journal of Chemical Information and Modeling}, 
  year = {2006}, 
  volume = {46},
  pages = {2432--2444},
  abstract = {Most approaches to structure-activity-relationship (SAR) prediction proceed in two steps.
In the first step, a typically large set of fingerprints, or 
fragments of interest, is constructed (either by hand or 
by some recent data mining techniques). In the second step, 
machine learning techniques are applied to obtain a 
predictive model. The result is often not only a highly accurate
but also hard to interpret model. In this paper, we demonstrate
the capabilities of a novel SAR algorithm, SMIREP, which tightly
integrates the fragment and model generation steps and which
yields simple models in the form of a small set of IF-THEN rules.
These rules contain SMILES fragments, which are easy to understand
to the computational chemist. SMIREP combines ideas from the 
well-known IREP rule learner with a novel fragmentation algorithm
for SMILES strings. SMIREP has been evaluated on three problems:
the prediction of binding activities for the estrogen receptor
(Environmental Protection Agency's (EPA's) Distributed Structure-Searchable
Toxicity (DSSTox) National Center for Toxicological Research estrogen
receptor (NCTRER) Database), the prediction of mutagenicity using
the carcinogenic potency database (CPDB), and the prediction of
biodegradability on a subset of the Environmental Fate Database (EFDB).
In these applications, SMIREP has the advantage of producing easily
interpretable rules while having predictive accuracies that are
comparable to those of alternative state-of-the-art techniques.},
  doi = {10.1021/ci060159g} 
}