inchiformat.h
Go to the documentation of this file.
00001 /********************************************************************** 00002 Copyright (C) 2005,2006,2007 Chris Morley 00003 00004 Based on the IUPAC InChI reference software, which is distributed 00005 under the GNU LGPL: 00006 Copyright (C) 2005 The International Union of Pure and Applied Chemistry 00007 IUPAC International Chemical Identifier (InChI) (contact:secretariat@iupac.org) 00008 00009 This program is free software; you can redistribute it and/or modify 00010 it under the terms of the GNU General Public License as published by 00011 the Free Software Foundation version 2 of the License. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 ***********************************************************************/ 00018 #include <openbabel/babelconfig.h> 00019 #include <openbabel/mol.h> 00020 #include <openbabel/obconversion.h> 00021 #include <openbabel/obmolecformat.h> 00022 00023 #include "inchi_api.h" 00024 #ifdef HAVE_SSTREAM 00025 #include <sstream> 00026 #else 00027 #include <strstream> 00028 #endif 00029 #include <set> 00030 #include <vector> 00031 #include "openbabel/chiral.h" 00032 00033 using namespace std; 00034 namespace OpenBabel 00035 { 00036 extern string GetInChI(istream& is); 00037 00038 class InChIFormat : public OBMoleculeFormat 00039 { 00040 public: 00041 InChIFormat() 00042 { 00043 OBConversion::RegisterFormat("inchi",this); 00044 OBConversion::RegisterOptionParam("n", this, 0, OBConversion::INOPTIONS); 00045 OBConversion::RegisterOptionParam("t", this); 00046 OBConversion::RegisterOptionParam("l", this); 00047 OBConversion::RegisterOptionParam("X", this, 1, OBConversion::OUTOPTIONS); 00048 OBConversion::RegisterOptionParam("K", this, 0, OBConversion::OUTOPTIONS); 00049 OBConversion::RegisterOptionParam("F", this, 0, OBConversion::OUTOPTIONS); 00050 OBConversion::RegisterOptionParam("X", this, 1, OBConversion::INOPTIONS); 00051 OBConversion::RegisterOptionParam("T", this, 1, OBConversion::OUTOPTIONS); 00052 } 00053 00054 virtual const char* Description() 00055 { 00056 return 00057 "InChI format\n" 00058 "IUPAC/NIST molecular identifier\n\n" 00059 00060 "Write Options, e.g. -xat\n" 00061 " Standard InChI is written unless certain InChI options are used\n" 00062 " K output InChIKey only\n" 00063 " t add molecule name after InChI\n" 00064 " w ignore less important warnings\n" 00065 " These are:\n" 00066 " \'Omitted undefined stereo\'\n" 00067 " \'Charges were rearranged\'\n" 00068 " \'Proton(s) added/removed\'\n" 00069 " \'Metal was disconnected\'\n" 00070 " a output auxilliary information\n" 00071 " l display InChI log\n" 00072 " s recalculate wedge and hash bonds(2D structures only)\n" 00073 " **Uniqueness options** (see also ``--unique`` and ``--sort`` which are more versatile)\n" 00074 " u output only unique molecules\n" 00075 " U output only unique molecules and sort them\n" 00076 " e compare first molecule to others\n" 00077 " This can also be done with :ref:`InChICompare format <Compare_molecules_using_InChI>`::\n\n" 00078 " babel first.smi second.mol third.cml -ok\n\n" 00079 " T <param> truncate InChI according to various parameters\n" 00080 " See below for possible truncation parameters.\n" 00081 " These can be combined, e.g. ``/nochg/noiso``\n" 00082 " X <Option string> Additional InChI options\n" 00083 " See InChI documentation.\n" 00084 " These options should be space delimited in a single quoted string.\n\n" 00085 " - Structure perception (compatible with stdInChI): ``NEWPSOFF``, ``DoNotAddH``, ``SNon``\n" 00086 " - Stereo interpretation (produces non-standard InChI): ``SRel``, ``SRac``,\n" 00087 " ``SUCF``, ``ChiralFlagON``, ``ChiralFlagOFF``\n" 00088 " - InChI creation options (produces non-standard InChI): ``SUU``, ``SLUUD``,\n" 00089 " ``FixedH``, ``RecMet``, ``KET``, ``15T``\n" 00090 " The following options are for convenience, e.g. ``-xF``\n" 00091 " but produce non-standard InChI.\n" 00092 " F include fixed hydrogen layer\n" 00093 " M include bonds to metal\n\n" 00094 00095 "Read Options, e.g. -an\n" 00096 " X <Option string> List of InChI options\n" 00097 " n molecule name follows InChI on same line\n" 00098 " a add InChI string to molecule name\n\n" 00099 00100 "Truncation parameters used with ``-xT``:\n\n" 00101 "/formula formula only\n" 00102 "/connect formula and connectivity only\n" 00103 "/nostereo ignore E/Z and sp3 stereochemistry\n" 00104 "/sp3 ignore sp3 stereochemistry\n" 00105 "/noEZ ignore E/Z steroeochemistry\n" 00106 "/nochg ignore charge and protonation\n" 00107 "/noiso ignore isotopes\n\n" 00108 ; 00109 }; 00110 00111 virtual const char* SpecificationURL() 00112 { return "http://www.iupac.org/inchi/";}; 00113 00114 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); 00115 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); 00116 virtual int SkipObjects(int n, OBConversion* pConv); 00117 00118 static char CompareInchi(const string& Inchi1, const string& Inchi2); 00119 static string InChIErrorMessage(const char ch); 00120 00124 static bool EditInchi(std::string& inchi, std::string& spec); 00125 00127 // "a6b" (or "a06b") is less than "a15b" 00128 // and "CH4" is less than "C2H6" 00129 // and "CH4" is less than "ClH" (hydrogen chloride) 00130 struct InchiLess 00131 : public binary_function<const string&, const string&, bool> 00132 { 00133 bool operator()(const string& s1, const string& s2) const 00134 { 00135 //stop at the first space or the end of the strings 00136 string::const_iterator p1=s1.begin(), p2=s2.begin(), 00137 p1end=find(s1.begin(), s1.end(), ' '), p2end=find(s2.begin(), s2.end(), ' '); 00138 00139 while( p1<p1end && p2<p2end) 00140 { 00141 int n1=-1,n2=-1; 00142 if(isdigit(*p1)) 00143 { 00144 n1 = atoi(&*p1); 00145 //skip over number 00146 while(p1!=s1.end() && isdigit(*p1++)); --p1; 00147 } 00148 if(isdigit(*p2)) 00149 { 00150 n2 = atoi(&*p2); 00151 while(p2!=s2.end() && isdigit(*p2++)); --p2; 00152 } 00153 if(n1<0 && n2 < 0) 00154 { 00155 //neither numbers 00156 if(*p1 != *p2) 00157 return *p1 < *p2; 00158 } 00159 else if(n1>=0 && n2>0) 00160 { 00161 //both numbers 00162 if(n1!=n2) 00163 return n1 < n2; 00164 } 00165 else if(n1>0) 00166 return islower(*p2)!=0; 00167 else if(n2>0) 00168 return !islower(*p1); 00169 00170 ++p1; ++p2; // iterate 00171 } // while loop 00172 return false; //identical 00173 } 00174 }; 00175 00176 private: 00178 static void RemoveLayer (std::string& inchi, const std::string& str, bool all=false); 00179 00180 private: 00181 OBAtom* GetCommonAtom(OBBond* pb1, OBBond* pb2); 00182 char* GetInChIOptions(OBConversion* pConv, bool Reading); 00183 00184 typedef set<string, InchiLess> nSet; 00185 nSet allInchi; 00186 string firstInchi; 00187 string firstID; 00188 }; 00189 00190 //***************************************************** 00191 class InChICompareFormat : public OBMoleculeFormat 00192 { 00193 public: 00194 InChICompareFormat() 00195 { 00196 OBConversion::RegisterFormat("k",this); 00197 } 00198 virtual const char* Description() //required 00199 { 00200 return 00201 "Compare molecules using InChI\n" 00202 "A utility format that allows you to compare molecules using their InChIs\n" 00203 "The first molecule is compared with the rest, e.g.::\n\n" 00204 00205 " babel first.smi second.mol third.cml -ok\n\n" 00206 00207 "This is the same as using ``-oinchi -xet`` and can take the same options as InChI format\n" 00208 "(see :ref:`InChI_format`).\n"; 00209 } 00210 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); 00211 virtual unsigned int Flags() { return NOTREADABLE;}; 00212 }; 00213 00214 }//namespace OpenBabel


