#include <openbabel/xml.h>
Inheritance diagram for XMLConversion:
Option handling | |
enum | Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS } |
const char * | IsOption (const char *opt, Option_type opttyp=OUTOPTIONS) |
const std::map< std::string, std::string > * | GetOptions (Option_type opttyp) |
void | AddOption (const char *opt, Option_type opttyp, const char *txt=NULL) |
bool | RemoveOption (const char *opt, Option_type optype) |
void | SetOptions (const char *options, Option_type opttyp) |
static void | RegisterOptionParam (std::string name, OBFormat *pFormat, int numberParams=0, Option_type typ=OUTOPTIONS) |
static int | GetOptionParams (std::string name, Option_type typ) |
Convenience functions | |
bool | Write (OBBase *pOb, std::ostream *pout=NULL) |
std::string | WriteString (OBBase *pOb, bool trimWhitespace=false) |
bool | WriteFile (OBBase *pOb, std::string filePath) |
void | CloseOutFile () |
bool | Read (OBBase *pOb, std::istream *pin=NULL) |
bool | ReadString (OBBase *pOb, std::string input) |
bool | ReadFile (OBBase *pOb, std::string filePath) |
static OBFormat * | GetDefaultFormat () |
static std::string | BatchFileName (std::string &BaseName, std::string &InFile) |
static std::string | IncrementedFileName (std::string &BaseName, const int Count) |
static bool | CheckForUnintendedBatch (const std::string &infile, const std::string &outfile) |
Public Types | |
typedef std::map< std::string, XMLBaseFormat * > | NsMapType |
Public Member Functions | |
XMLConversion (OBConversion *pConv) | |
~XMLConversion () | |
bool | SetupReader () |
bool | SetupWriter () |
bool | ReadXML (XMLBaseFormat *pFormat, OBBase *pOb) |
int | SkipXML (const char *ctag) |
bool | IsLast () |
int | GetOutputIndex () |
xmlTextReaderPtr | GetReader () const |
xmlTextWriterPtr | GetWriter () const |
void | OutputToStream () |
void | LookForNamespace () |
std::string | GetAttribute (const char *attrname) |
std::string | GetContent () |
bool | GetContentInt (int &value) |
bool | GetContentDouble (double &value) |
Parameter get and set | |
std::istream * | GetInStream () const |
std::ostream * | GetOutStream () const |
void | SetInStream (std::istream *pIn) |
void | SetOutStream (std::ostream *pOut) |
bool | SetInAndOutFormats (const char *inID, const char *outID) |
bool | SetInAndOutFormats (OBFormat *pIn, OBFormat *pOut) |
bool | SetInFormat (const char *inID) |
bool | SetInFormat (OBFormat *pIn) |
bool | SetOutFormat (const char *outID) |
bool | SetOutFormat (OBFormat *pOut) |
OBFormat * | GetInFormat () const |
OBFormat * | GetOutFormat () const |
std::string | GetInFilename () const |
std::streampos | GetInPos () const |
size_t | GetInLen () const |
const char * | GetTitle () const |
OBConversion * | GetAuxConv () const |
void | SetAuxConv (OBConversion *pConv) |
Supported file format | |
std::vector< std::string > | GetSupportedInputFormat () |
std::vector< std::string > | GetSupportedOutputFormat () |
Conversion | |
int | Convert (std::istream *is, std::ostream *os) |
int | Convert () |
int | FullConvert (std::vector< std::string > &FileList, std::string &OutputFileName, std::vector< std::string > &OutputFileList) |
Conversion loop control | |
bool | AddChemObject (OBBase *pOb) |
OBBase * | GetChemObject () |
bool | IsFirstInput () |
int | GetOutputIndex () const |
void | SetOutputIndex (int indx) |
void | SetMoreFilesToCome () |
void | SetOneObjectOnly (bool b=true) |
void | SetLast (bool b) |
Static Public Member Functions | |
static NsMapType & | Namespaces () |
static void | RegisterXMLFormat (XMLBaseFormat *pFormat, bool IsDefault=false, const char *uri=NULL) |
static XMLConversion * | GetDerived (OBConversion *pConv, bool ForReading=true) |
static XMLBaseFormat * | GetDefaultXMLClass () |
static int | ReadStream (void *context, char *buffer, int len) |
static int | WriteStream (void *context, const char *buffer, int len) |
Collection of formats | |
static int | RegisterFormat (const char *ID, OBFormat *pFormat, const char *MIME=NULL) |
static OBFormat * | FindFormat (const char *ID) |
static OBFormat * | FormatFromExt (const char *filename) |
static OBFormat * | FormatFromMIME (const char *MIME) |
static bool | GetNextFormat (Formatpos &itr, const char *&str, OBFormat *&pFormat) |
Information | |
static const char * | Description () |
Public Attributes | |
bool | _SkipNextRead |
Protected Types | |
typedef std::map< std::string, int > | OPAMapType |
Protected Member Functions | |
bool | SetStartAndEnd () |
bool | OpenAndSetFormat (bool SetFormat, std::ifstream *is) |
Static Protected Member Functions | |
static FMapType & | FormatsMap () |
static FMapType & | FormatsMIMEMap () |
static OPAMapType & | OptionParamArray (Option_type typ) |
static int | LoadFormatFiles () |
Protected Attributes | |
std::string | InFilename |
std::istream * | pInStream |
std::ostream * | pOutStream |
OBFormat * | pInFormat |
OBFormat * | pOutFormat |
std::map< std::string, std::string > | OptionsArray [3] |
int | Index |
unsigned int | StartNumber |
unsigned int | EndNumber |
int | Count |
bool | m_IsFirstInput |
bool | m_IsLast |
bool | MoreFilesToCome |
bool | OneObjectOnly |
bool | ReadyToInput |
bool | CheckedForGzip |
bool | NeedToFreeInStream |
bool | NeedToFreeOutStream |
OBBase * | pOb1 |
std::streampos | wInpos |
std::streampos | rInpos |
size_t | wInlen |
size_t | rInlen |
OBConversion * | pAuxConv |
std::vector< std::string > | SupportedInputFormat |
std::vector< std::string > | SupportedOutputFormat |
Static Protected Attributes | |
static OBFormat * | pDefaultFormat = NULL |
static int | FormatFilesLoaded = 0 |
An extended OBConversion class which includes a libxml2 reader for use with XML formats. Copies an OBConversion and then extends it with a XML parser. Instances made on the heap are deleted when the original OBConversion object is.
This class is not intended to be used externally -- instead use OBConversion which will find both XML and non-XML OBFormats.
Instead, this subclass also has support for handling specific needs in XML formats. For example, an XML file may include multiple namespaces, and the conversion should call appropriate XMLBaseFormat formats as needed.
typedef std::map<std::string, XMLBaseFormat*> NsMapType |
typedef std::map<std::string,int> OPAMapType [protected, inherited] |
enum Option_type [inherited] |
XMLConversion | ( | OBConversion * | pConv | ) |
Existing OBConversion instance copied.
~XMLConversion | ( | ) |
Frees reader and writer if necessary.
bool SetupReader | ( | ) |
opens libxml2 reader
bool SetupWriter | ( | ) |
opens libxml2 writer
bool ReadXML | ( | XMLBaseFormat * | pFormat, | |
OBBase * | pOb | |||
) |
Parses the input xml stream and sends each element to the format's callback routines.
int SkipXML | ( | const char * | ctag | ) |
Read and discard XML text up to the next occurrence of the tag e.g."/molecule>" This is left as the current node. Returns 1 on success, 0 if not found, -1 if failed.
static NsMapType& Namespaces | ( | ) | [inline, static] |
This static function returns a reference to the map Avoids "static initialization order fiasco"
static void RegisterXMLFormat | ( | XMLBaseFormat * | pFormat, | |
bool | IsDefault = false , |
|||
const char * | uri = NULL | |||
) | [static] |
static XMLConversion* GetDerived | ( | OBConversion * | pConv, | |
bool | ForReading = true | |||
) | [static] |
Returns the extended OBConversion class, making it if necessary.
bool IsLast | ( | ) | [inline] |
Because OBConversion::Convert is still using the unextended OBConversion object we need to obtain the conversion paramters from it when requested
Reimplemented from OBConversion.
int GetOutputIndex | ( | ) | [inline] |
xmlTextReaderPtr GetReader | ( | ) | const [inline] |
xmlTextWriterPtr GetWriter | ( | ) | const [inline] |
void OutputToStream | ( | ) | [inline] |
static XMLBaseFormat* GetDefaultXMLClass | ( | ) | [inline, static] |
void LookForNamespace | ( | ) | [inline] |
static int ReadStream | ( | void * | context, | |
char * | buffer, | |||
int | len | |||
) | [static] |
Static callback functions for xmlReaderForIO().
static int WriteStream | ( | void * | context, | |
const char * | buffer, | |||
int | len | |||
) | [static] |
std::string GetAttribute | ( | const char * | attrname | ) |
std::string GetContent | ( | ) |
Sets value to element content. Returns false if there is no content.
bool GetContentInt | ( | int & | value | ) |
Sets value to element content as an integer. Returns false if there is no content.
bool GetContentDouble | ( | double & | value | ) |
Sets value to element content as an double. Returns false if there is no content.
int RegisterFormat | ( | const char * | ID, | |
OBFormat * | pFormat, | |||
const char * | MIME = NULL | |||
) | [static, inherited] |
Called once by each format class.
Class information on formats is collected by making an instance of the class derived from OBFormat(only one is usually required). RegisterFormat() is called from its constructor.
If the compiled format is stored separately, like in a DLL or shared library, the initialization code makes an instance of the imported OBFormat class.
OBFormat * FindFormat | ( | const char * | ID | ) | [static, inherited] |
Searches registered formats.
OBFormat * FormatFromExt | ( | const char * | filename | ) | [static, inherited] |
Searches registered formats for an ID the same as the file extension.
OBFormat * FormatFromMIME | ( | const char * | MIME | ) | [static, inherited] |
Searches registered formats for a MIME the same as the chemical MIME type passed.
Repeatedly called to recover available Formats.
Returns the ID + the first line of the description in str and a pointer to the format in pFormat. If called with str==NULL the first format is returned; subsequent formats are returned by calling with str!=NULL and the previous value of itr returns false, and str and pFormat NULL, when there are no more formats. Use like:
const char* str=NULL; Formatpos pos; OBConversion conv; // dummy to make sure static data is available while(OBConversion::GetNextFormat(pos,str,pFormat)) { // use str and pFormat } *
NOTE: Because of dynamic loading problems, it is usually necessary to declare a "dummy" OBConversion object to access this static method. (Not elegant, but will hopefully be fixed in the future.)
const char * Description | ( | ) | [static, inherited] |
std::istream* GetInStream | ( | ) | const [inline, inherited] |
std::ostream* GetOutStream | ( | ) | const [inline, inherited] |
void SetInStream | ( | std::istream * | pIn | ) | [inline, inherited] |
void SetOutStream | ( | std::ostream * | pOut | ) | [inline, inherited] |
bool SetInAndOutFormats | ( | const char * | inID, | |
const char * | outID | |||
) | [inherited] |
Sets the formats from their ids, e g CML.
Sets the formats from their ids, e g CML. If inID is NULL, the input format is left unchanged. Similarly for outID Returns true if both formats have been successfully set at sometime
bool SetInFormat | ( | const char * | inID | ) | [inherited] |
Sets the input format from an id e.g. CML.
bool SetInFormat | ( | OBFormat * | pIn | ) | [inherited] |
bool SetOutFormat | ( | const char * | outID | ) | [inherited] |
Sets the output format from an id e.g. CML.
bool SetOutFormat | ( | OBFormat * | pOut | ) | [inherited] |
OBFormat* GetInFormat | ( | ) | const [inline, inherited] |
OBFormat* GetOutFormat | ( | ) | const [inline, inherited] |
std::string GetInFilename | ( | ) | const [inline, inherited] |
std::streampos GetInPos | ( | ) | const [inline, inherited] |
Get the position in the input stream of the object being read.
size_t GetInLen | ( | ) | const [inline, inherited] |
Get the length in the input stream of the object being read.
const char * GetTitle | ( | ) | const [inherited] |
OBConversion* GetAuxConv | ( | ) | const [inline, inherited] |
Extension method: deleted in ~OBConversion().
void SetAuxConv | ( | OBConversion * | pConv | ) | [inline, inherited] |
const char * IsOption | ( | const char * | opt, | |
Option_type | opttyp = OUTOPTIONS | |||
) | [inherited] |
Determine whether an option is set.
const std::map<std::string,std::string>* GetOptions | ( | Option_type | opttyp | ) | [inline, inherited] |
Access the map with option name as key and any associated text as value.
void AddOption | ( | const char * | opt, | |
Option_type | opttyp, | |||
const char * | txt = NULL | |||
) | [inherited] |
Set an option of specified type, with optional text.
bool RemoveOption | ( | const char * | opt, | |
Option_type | optype | |||
) | [inherited] |
void SetOptions | ( | const char * | options, | |
Option_type | opttyp | |||
) | [inherited] |
Set several single character options of specified type from string like ab"btext"c"ctext".
void RegisterOptionParam | ( | std::string | name, | |
OBFormat * | pFormat, | |||
int | numberParams = 0 , |
|||
Option_type | typ = OUTOPTIONS | |||
) | [static, inherited] |
For example -h takes 0 parameters; -f takes 1. Call in a format constructor.
int GetOptionParams | ( | std::string | name, | |
Option_type | typ | |||
) | [static, inherited] |
std::vector< std::string > GetSupportedInputFormat | ( | ) | [inherited] |
Returns the list of supported input format
std::vector< std::string > GetSupportedOutputFormat | ( | ) | [inherited] |
Returns the list of supported output format
int Convert | ( | std::istream * | is, | |
std::ostream * | os | |||
) | [inherited] |
Conversion for single input and output stream.
int Convert | ( | ) | [inherited] |
Conversion with existing streams.
Actions the "convert" interface. Calls the OBFormat class's ReadMolecule() which
AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation()) or if the number of the object is outside the range defined by StartNumber and EndNumber.This means the start and end counts apply to all chemical objects found whether or not they are output.
If ReadMolecule returns false the input conversion loop is exited.
int FullConvert | ( | std::vector< std::string > & | FileList, | |
std::string & | OutputFileName, | |||
std::vector< std::string > & | OutputFileList | |||
) | [inherited] |
Conversion with multiple input/output files: makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
Makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
Normal Done if FileList contains a single file name and OutputFileName does not contain a *.
Aggregation Done if FileList has more than one file name and OutputFileName does not contain * . All the chemical objects are converted and sent to the single output file.
Splitting Done if FileList contains a single file name and OutputFileName contains a * . Each chemical object in the input file is converted and sent to a separate file whose name is OutputFileName with the replaced by 1, 2, 3, etc. OutputFileName must have at least one character other than the * before the extension. For example, if OutputFileName is NEW*.smi then the output files are NEW1.smi, NEW2.smi, etc.
Batch Conversion Done if FileList has more than one file name and contains a * . Each input file is converted to an output file whose name is OutputFileName with the * replaced by the inputfile name without its path and extension. So if the input files were inpath/First.cml, inpath/Second.cml and OutputFileName was NEW*.mol, the output files would be NEWFirst.mol, NEWSecond.mol.
If FileList is empty, the input stream that has already been set (usually in the constructor) is used. If OutputFileName is empty, the output stream already set is used.
On exit, OutputFileList contains the names of the output files.
Returns the number of Chemical objects converted.
bool AddChemObject | ( | OBBase * | pOb | ) | [inherited] |
Adds to internal array during input.
Called by ReadMolecule() to deliver an object it has read from an input stream. Used in two modes:
OBBase * GetChemObject | ( | ) | [inherited] |
Retrieve from internal array during output.
Retrieves an object stored by AddChemObject() during output
bool IsFirstInput | ( | ) | [inherited] |
True if the first input object is being processed.
int GetOutputIndex | ( | ) | const [inherited] |
Retrieves number of ChemObjects that have been actually output.
Returns the number of objects which have been output or are currently being output. The outputindex is incremented when an object for output is fetched by GetChemObject(). So the function will return 1 if called from WriteMolecule() during output of the first object.
void SetOutputIndex | ( | int | indx | ) | [inherited] |
Sets ouput index (maybe to control whether seen as first object).
void SetMoreFilesToCome | ( | ) | [inherited] |
Used with multiple input files. Off by default.
void SetOneObjectOnly | ( | bool | b = true |
) | [inherited] |
Used with multiple input files. Off by default.
void SetLast | ( | bool | b | ) | [inline, inherited] |
static OBFormat* GetDefaultFormat | ( | ) | [inline, static, inherited] |
bool Write | ( | OBBase * | pOb, | |
std::ostream * | pout = NULL | |||
) | [inherited] |
Outputs an object of a class derived from OBBase.
Part of "API" interface. The output stream can be specified and the change is retained in the OBConversion instance
std::string WriteString | ( | OBBase * | pOb, | |
bool | trimWhitespace = false | |||
) | [inherited] |
Outputs an object of a class derived from OBBase as a string.
Part of "API" interface. The output stream is temporarily changed to the string and then restored This method is primarily intended for scripting languages without "stream" classes The optional "trimWhitespace" parameter allows trailing whitespace to be removed (e.g., in a SMILES string or InChI, etc.)
bool WriteFile | ( | OBBase * | pOb, | |
std::string | filePath | |||
) | [inherited] |
Outputs an object of a class derived from OBBase as a file (with the supplied path).
Part of "API" interface. The output stream is changed to the supplied file and the change is retained in the OBConversion instance. This method is primarily intended for scripting languages without "stream" classes
void CloseOutFile | ( | ) | [inherited] |
Manually closes and deletes the output stream The file is closed anyway when in the OBConversion destructor or when WriteFile is called again.
bool Read | ( | OBBase * | pOb, | |
std::istream * | pin = NULL | |||
) | [inherited] |
Reads an object of a class derived from OBBase into pOb.
Part of "API" interface. The input stream can be specified and the change is retained in the OBConversion instance
bool ReadString | ( | OBBase * | pOb, | |
std::string | input | |||
) | [inherited] |
Reads an object of a class derived from OBBase into pOb from the supplied string.
Part of "API" interface.
bool ReadFile | ( | OBBase * | pOb, | |
std::string | filePath | |||
) | [inherited] |
Reads an object of a class derived from OBBase into pOb from the file specified.
Part of "API" interface. The output stream is changed to the supplied file and the change is retained in the OBConversion instance.
string BatchFileName | ( | std::string & | BaseName, | |
std::string & | InFile | |||
) | [static, protected, inherited] |
Replaces * in BaseName by InFile without extension and path.
string IncrementedFileName | ( | std::string & | BaseName, | |
const int | Count | |||
) | [static, protected, inherited] |
Replaces * in BaseName by Count.
bool CheckForUnintendedBatch | ( | const std::string & | infile, | |
const std::string & | outfile | |||
) | [static, protected, inherited] |
Checks for misunderstandings when using the -m option.
bool SetStartAndEnd | ( | ) | [protected, inherited] |
FMapType & FormatsMap | ( | ) | [static, protected, inherited] |
contains ID and pointer to all OBFormat classes
This static function returns a reference to the FormatsMap which, because it is a static local variable is constructed only once. This fiddle is to avoid the "static initialization order fiasco" See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
FMapType & FormatsMIMEMap | ( | ) | [static, protected, inherited] |
contains MIME and pointer to all OBFormat classes
This static function returns a reference to the FormatsMIMEMap which, because it is a static local variable is constructed only once. This fiddle is to avoid the "static initialization order fiasco" See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
OBConversion::OPAMapType & OptionParamArray | ( | Option_type | typ | ) | [static, protected, inherited] |
int LoadFormatFiles | ( | ) | [static, protected, inherited] |
bool OpenAndSetFormat | ( | bool | SetFormat, | |
std::ifstream * | is | |||
) | [protected, inherited] |
bool _SkipNextRead |
std::string InFilename [protected, inherited] |
std::istream* pInStream [protected, inherited] |
std::ostream* pOutStream [protected, inherited] |
OBFormat * pDefaultFormat = NULL [static, protected, inherited] |
OBFormat* pOutFormat [protected, inherited] |
std::map<std::string,std::string> OptionsArray[3] [protected, inherited] |
int Index [protected, inherited] |
unsigned int StartNumber [protected, inherited] |
unsigned int EndNumber [protected, inherited] |
int Count [protected, inherited] |
bool m_IsFirstInput [protected, inherited] |
bool m_IsLast [protected, inherited] |
bool MoreFilesToCome [protected, inherited] |
bool OneObjectOnly [protected, inherited] |
bool ReadyToInput [protected, inherited] |
bool CheckedForGzip [protected, inherited] |
input stream is gzip-encoded
bool NeedToFreeInStream [protected, inherited] |
bool NeedToFreeOutStream [protected, inherited] |
int FormatFilesLoaded = 0 [static, protected, inherited] |
std::streampos wInpos [protected, inherited] |
position in the input stream of the object being written
std::streampos rInpos [protected, inherited] |
position in the input stream of the object being read
size_t wInlen [protected, inherited] |
length in the input stream of the object being written
size_t rInlen [protected, inherited] |
length in the input stream of the object being read
OBConversion* pAuxConv [protected, inherited] |
Way to extend OBConversion.
std::vector<std::string> SupportedInputFormat [protected, inherited] |
list of supported input format
std::vector<std::string> SupportedOutputFormat [protected, inherited] |
list of supported output format