#include <openbabel/obconversion.h>
Option handling | |
enum | Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS, ALL } |
const char * | IsOption (const char *opt, Option_type opttyp=OUTOPTIONS) |
const std::map< std::string, std::string > * | GetOptions (Option_type opttyp) |
void | AddOption (const char *opt, Option_type opttyp=OUTOPTIONS, const char *txt=NULL) |
bool | RemoveOption (const char *opt, Option_type optype) |
void | SetOptions (const char *options, Option_type opttyp) |
static void | RegisterOptionParam (std::string name, OBFormat *pFormat, int numberParams=0, Option_type typ=OUTOPTIONS) |
static int | GetOptionParams (std::string name, Option_type typ) |
Convenience functions | |
bool | Write (OBBase *pOb, std::ostream *pout=NULL) |
std::string | WriteString (OBBase *pOb, bool trimWhitespace=false) |
bool | WriteFile (OBBase *pOb, std::string filePath) |
void | CloseOutFile () |
bool | Read (OBBase *pOb, std::istream *pin=NULL) |
bool | ReadString (OBBase *pOb, std::string input) |
bool | ReadFile (OBBase *pOb, std::string filePath) |
bool | OpenInAndOutFiles (std::string infilepath, std::string outfilepath) |
void | ReportNumberConverted (int count, OBFormat *pFormat=NULL) |
static OBFormat * | GetDefaultFormat () |
static std::string | BatchFileName (std::string &BaseName, std::string &InFile) |
static std::string | IncrementedFileName (std::string &BaseName, const int Count) |
static bool | CheckForUnintendedBatch (const std::string &infile, const std::string &outfile) |
void | InstallStreamFilter () |
Public Member Functions | |
void | CopyOptions (OBConversion *pSourceConv, Option_type typ=ALL) |
Construction | |
OBConversion (std::istream *is=NULL, std::ostream *os=NULL) | |
OBConversion (const OBConversion &o) | |
virtual | ~OBConversion () |
Parameter get and set | |
std::istream * | GetInStream () const |
std::ostream * | GetOutStream () const |
void | SetInStream (std::istream *pIn) |
void | SetOutStream (std::ostream *pOut) |
bool | SetInAndOutFormats (const char *inID, const char *outID) |
bool | SetInAndOutFormats (OBFormat *pIn, OBFormat *pOut) |
bool | SetInFormat (const char *inID) |
bool | SetInFormat (OBFormat *pIn) |
bool | SetOutFormat (const char *outID) |
bool | SetOutFormat (OBFormat *pOut) |
OBFormat * | GetInFormat () const |
OBFormat * | GetOutFormat () const |
std::string | GetInFilename () const |
std::streampos | GetInPos () const |
size_t | GetInLen () const |
const char * | GetTitle () const |
OBConversion * | GetAuxConv () const |
void | SetAuxConv (OBConversion *pConv) |
Supported file format | |
std::vector< std::string > | GetSupportedInputFormat () |
std::vector< std::string > | GetSupportedOutputFormat () |
Conversion | |
int | Convert (std::istream *is, std::ostream *os) |
int | Convert () |
int | FullConvert (std::vector< std::string > &FileList, std::string &OutputFileName, std::vector< std::string > &OutputFileList) |
Conversion loop control | |
int | AddChemObject (OBBase *pOb) |
OBBase * | GetChemObject () |
bool | IsLast () |
bool | IsFirstInput () |
void | SetFirstInput (bool b=true) |
int | GetOutputIndex () const |
void | SetOutputIndex (int indx) |
void | SetMoreFilesToCome () |
void | SetOneObjectOnly (bool b=true) |
void | SetLast (bool b) |
bool | IsLastFile () |
Static Public Member Functions | |
Collection of formats | |
static int | RegisterFormat (const char *ID, OBFormat *pFormat, const char *MIME=NULL) |
static OBFormat * | FindFormat (const char *ID) |
static OBFormat * | FormatFromExt (const char *filename) |
static OBFormat * | FormatFromMIME (const char *MIME) |
static bool | GetNextFormat (Formatpos &itr, const char *&str, OBFormat *&pFormat) |
Information | |
static const char * | Description () |
Protected Types | |
typedef std::map< std::string, int > | OPAMapType |
typedef FilteringInputStreambuf < LineEndingExtractor > | LErdbuf |
Protected Member Functions | |
bool | SetStartAndEnd () |
bool | OpenAndSetFormat (bool SetFormat, std::ifstream *is) |
Static Protected Member Functions | |
static OPAMapType & | OptionParamArray (Option_type typ) |
static int | LoadFormatFiles () |
Protected Attributes | |
std::string | InFilename |
std::istream * | pInStream |
std::ostream * | pOutStream |
OBFormat * | pInFormat |
OBFormat * | pOutFormat |
std::map< std::string, std::string > | OptionsArray [3] |
int | Index |
unsigned int | StartNumber |
unsigned int | EndNumber |
int | Count |
bool | m_IsFirstInput |
bool | m_IsLast |
bool | MoreFilesToCome |
bool | OneObjectOnly |
bool | ReadyToInput |
bool | CheckedForGzip |
bool | NeedToFreeInStream |
bool | NeedToFreeOutStream |
LErdbuf * | pLineEndBuf |
OBBase * | pOb1 |
std::streampos | wInpos |
std::streampos | rInpos |
size_t | wInlen |
size_t | rInlen |
OBConversion * | pAuxConv |
std::vector< std::string > | SupportedInputFormat |
std::vector< std::string > | SupportedOutputFormat |
Static Protected Attributes | |
static OBFormat * | pDefaultFormat |
static int | FormatFilesLoaded = 0 |
OBConversion maintains a list of the available formats, provides information on them, and controls the conversion process.
A conversion is carried out by the calling routine, usually in a user interface or an application program, making an instance of OBConversion. It is loaded with the in and out formats, any options and (usually) the default streams for input and output. Then either the Convert() function is called, which allows a single input file to be converted, or the extended functionality of FullConvert() is used. This allows multiple input and output files, allowing:
These procedures constitute the "Convert" interface. OBConversion and the user interface or application program do not need to be aware of any other part of OpenBabel - mol.h is not #included. This allows any chemical object derived from OBBase to be converted; the type of object is decided by the input format class. However,currently, almost all the conversions are for molecules of class OBMol. / OBConversion can also be used with an "API" interface called from programs which manipulate chemical objects. Input/output is done with the Read() and Write() functions which work with any chemical object, but need to have its type specified. (The ReadMolecule() and WriteMolecule() functions of the format classes can also be used directly.)
Example code using OBConversion
To read in a molecule, manipulate it and write it out.
Set up an istream and an ostream, to and from files or elsewhere. (cin and cout are used in the example). Specify the file formats.
OBConversion conv(&cin,&cout); if(conv.SetInAndOutFormats("SMI","MOL")) { OBMol mol; if(conv.Read(&mol)) // ...manipulate molecule conv->Write(&mol); }
A two stage construction is used to allow error handling if the format ID is not recognized. This is necessary now that the formats are dynamic and errors are not caught at compile time. OBConversion::Read() is a templated function so that objects derived from OBBase can also be handled, in addition to OBMol, if the format routines are written appropriately.
To make a molecule from a SMILES string.
std::string SmilesString; OBMol mol; stringstream ss(SmilesString) OBConversion conv(&ss); if(conv.SetInFormat("smi") && conv.Read(&mol)) // ...
To do a file conversion without manipulating the molecule.
#include <openbabel/obconversion.h> //mol.h is not needed ...set up an istream is and an ostream os OBConversion conv(&is,&os); if(conv.SetInAndOutFormats("SMI","MOL")) { conv.AddOption("h",OBConversion::GENOPTIONS); //Optional; (h adds expicit hydrogens) conv.Convert(); }
To add automatic format conversion to an existing program.
The existing program inputs from the file identified by the const char* filename into the istream is. The file is assumed to have a format ORIG, but other formats, identified by their file extensions, can now be used.
ifstream ifs(filename); //Original code OBConversion conv; OBFormat* inFormat = conv.FormatFromExt(filename); OBFormat* outFormat = conv.GetFormat("ORIG"); istream* pIn = &ifs; stringstream newstream; if(inFormat && outFormat) { conv.SetInAndOutFormats(inFormat,outFormat); conv.Convert(pIn,&newstream); pIn=&newstream; } //else error; new features not available; fallback to original functionality ...Carry on with original code using pIn
In certain Windows builds, a degree of independence from OpenBabel can be achieved using DLLs. This code would be linked with obconv.lib. At runtime the following DLLs would be in the executable directory: obconv.dll, obdll.dll, one or more *.obf format files.
typedef std::map<std::string,int> OPAMapType [protected] |
typedef FilteringInputStreambuf< LineEndingExtractor > LErdbuf [protected] |
enum Option_type |
OBConversion | ( | std::istream * | is = NULL , |
|
std::ostream * | os = NULL | |||
) |
OBConversion | ( | const OBConversion & | o | ) |
Copy constructor.
~OBConversion | ( | ) | [virtual] |
int RegisterFormat | ( | const char * | ID, | |
OBFormat * | pFormat, | |||
const char * | MIME = NULL | |||
) | [static] |
Called once by each format class.
Class information on formats is collected by making an instance of the class derived from OBFormat(only one is usually required). RegisterFormat() is called from its constructor.
If the compiled format is stored separately, like in a DLL or shared library, the initialization code makes an instance of the imported OBFormat class.
OBFormat * FindFormat | ( | const char * | ID | ) | [static] |
Searches registered formats.
Referenced by OBConversion::FormatFromExt(), OBConversion::SetInFormat(), and OBConversion::SetOutFormat().
OBFormat * FormatFromExt | ( | const char * | filename | ) | [static] |
Searches registered formats for an ID the same as the file extension.
Referenced by OBConversion::OpenAndSetFormat().
OBFormat * FormatFromMIME | ( | const char * | MIME | ) | [static] |
Searches registered formats for a MIME the same as the chemical MIME type passed.
Deprecated!.Repeatedly called to recover available Formats.
const char * Description | ( | ) | [static] |
std::istream* GetInStream | ( | ) | const [inline] |
Referenced by OBMoleculeFormat::DeferMolOutput(), and OBMoleculeFormat::ReadChemObjectImpl().
std::ostream* GetOutStream | ( | ) | const [inline] |
void SetInStream | ( | std::istream * | pIn | ) | [inline] |
Referenced by OBConversion::FullConvert().
void SetOutStream | ( | std::ostream * | pOut | ) | [inline] |
Referenced by OBConversion::FullConvert().
bool SetInAndOutFormats | ( | const char * | inID, | |
const char * | outID | |||
) |
Sets the formats from their ids, e g CML.
Sets the formats from their ids, e g CML. If inID is NULL, the input format is left unchanged. Similarly for outID Returns true if both formats have been successfully set at sometime
bool SetInFormat | ( | const char * | inID | ) |
Sets the input format from an id e.g. CML.
Referenced by OBMoleculeFormat::ReadNameIndex(), and OBConversion::SetInAndOutFormats().
bool SetInFormat | ( | OBFormat * | pIn | ) |
bool SetOutFormat | ( | const char * | outID | ) |
bool SetOutFormat | ( | OBFormat * | pOut | ) |
OBFormat* GetInFormat | ( | ) | const [inline] |
OBFormat* GetOutFormat | ( | ) | const [inline] |
Referenced by OBMoleculeFormat::OutputDeferredMols().
std::string GetInFilename | ( | ) | const [inline] |
std::streampos GetInPos | ( | ) | const [inline] |
Get the position in the input stream of the object being read.
size_t GetInLen | ( | ) | const [inline] |
Get the length in the input stream of the object being read.
const char * GetTitle | ( | ) | const |
OBConversion* GetAuxConv | ( | ) | const [inline] |
Extension method: deleted in ~OBConversion().
void SetAuxConv | ( | OBConversion * | pConv | ) | [inline] |
const char * IsOption | ( | const char * | opt, | |
Option_type | opttyp = OUTOPTIONS | |||
) |
Determine whether an option is set.
Referenced by OBConversion::Convert(), OBConversion::FullConvert(), OBMoleculeFormat::ReadChemObjectImpl(), OBConversion::SetStartAndEnd(), OBConversion::Write(), and OBMoleculeFormat::WriteChemObjectImpl().
const std::map<std::string,std::string>* GetOptions | ( | Option_type | opttyp | ) | [inline] |
Access the map with option name as key and any associated text as value.
Referenced by OBMoleculeFormat::OutputDeferredMols(), and OBMoleculeFormat::ReadChemObjectImpl().
void AddOption | ( | const char * | opt, | |
Option_type | opttyp = OUTOPTIONS , |
|||
const char * | txt = NULL | |||
) |
Set an option of specified type, with optional text.
bool RemoveOption | ( | const char * | opt, | |
Option_type | optype | |||
) |
void SetOptions | ( | const char * | options, | |
Option_type | opttyp | |||
) |
Set several single character options of specified type from string like ab"btext"c"ctext".
void RegisterOptionParam | ( | std::string | name, | |
OBFormat * | pFormat, | |||
int | numberParams = 0 , |
|||
Option_type | typ = OUTOPTIONS | |||
) | [static] |
For example -h takes 0 parameters; -f takes 1. Call in a format constructor.
Referenced by OBConversion::OBConversion(), and OBMoleculeFormat::OBMoleculeFormat().
int GetOptionParams | ( | std::string | name, | |
Option_type | typ | |||
) | [static] |
void CopyOptions | ( | OBConversion * | pSourceConv, | |
Option_type | typ = ALL | |||
) |
Copies the options (by default of all types) from one OBConversion Object to another.
std::vector< std::string > GetSupportedInputFormat | ( | ) |
Returns the list of supported input format
std::vector< std::string > GetSupportedOutputFormat | ( | ) |
Returns the list of supported output format
int Convert | ( | std::istream * | is, | |
std::ostream * | os | |||
) |
Conversion for single input and output stream.
int Convert | ( | ) |
Conversion with existing streams.
Actions the "convert" interface. Calls the OBFormat class's ReadMolecule() which
AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation()) or if the number of the object is outside the range defined by StartNumber and EndNumber.This means the start and end counts apply to all chemical objects found whether or not they are output.
If ReadMolecule returns false the input conversion loop is exited.
Referenced by OBConversion::Convert(), and OBConversion::FullConvert().
int FullConvert | ( | std::vector< std::string > & | FileList, | |
std::string & | OutputFileName, | |||
std::vector< std::string > & | OutputFileList | |||
) |
Conversion with multiple input/output files: makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
Makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
Normal Done if FileList contains a single file name and OutputFileName does not contain a *.
Aggregation Done if FileList has more than one file name and OutputFileName does not contain * . All the chemical objects are converted and sent to the single output file.
Splitting Done if FileList contains a single file name and OutputFileName contains a * . Each chemical object in the input file is converted and sent to a separate file whose name is OutputFileName with the replaced by 1, 2, 3, etc. OutputFileName must have at least one character other than the * before the extension. For example, if OutputFileName is NEW*.smi then the output files are NEW1.smi, NEW2.smi, etc.
Batch Conversion Done if FileList has more than one file name and contains a * . Each input file is converted to an output file whose name is OutputFileName with the * replaced by the inputfile name without its path and extension. So if the input files were inpath/First.cml, inpath/Second.cml and OutputFileName was NEW*.mol, the output files would be NEWFirst.mol, NEWSecond.mol.
If FileList is empty, the input stream that has already been set (usually in the constructor) is used. If OutputFileName is empty, the output stream already set is used.
On exit, OutputFileList contains the names of the output files.
Returns the number of Chemical objects converted.
int AddChemObject | ( | OBBase * | pOb | ) |
Adds to internal array during input.
Called by ReadMolecule() to deliver an object it has read from an input stream. Used in two modes:
Referenced by OBMoleculeFormat::ReadChemObjectImpl().
OBBase * GetChemObject | ( | ) |
Retrieve from internal array during output.
Retrieves an object stored by AddChemObject() during output
Referenced by OBMoleculeFormat::WriteChemObjectImpl().
bool IsLast | ( | ) |
True if no more objects to be output.
Reimplemented in XMLConversion.
Referenced by XMLConversion::IsLast(), and OBMoleculeFormat::WriteChemObjectImpl().
bool IsFirstInput | ( | ) |
True if the first input object is being processed.
Referenced by OBMoleculeFormat::DeferMolOutput(), and OBMoleculeFormat::ReadChemObjectImpl().
void SetFirstInput | ( | bool | b = true |
) |
Setwhether or not is the first input.
Referenced by OBConversion::Convert(), and OBConversion::FullConvert().
int GetOutputIndex | ( | ) | const |
Retrieves number of ChemObjects that have been actually output.
Returns the number of objects which have been output or are currently being output. The outputindex is incremented when an object for output is fetched by GetChemObject(). So the function will return 1 if called from WriteMolecule() during output of the first object.
Referenced by XMLConversion::GetOutputIndex().
void SetOutputIndex | ( | int | indx | ) |
Sets ouput index (maybe to control whether seen as first object).
Referenced by OBConversion::FullConvert(), OBMoleculeFormat::OutputDeferredMols(), and OBMoleculeFormat::WriteChemObjectImpl().
void SetMoreFilesToCome | ( | ) |
void SetOneObjectOnly | ( | bool | b = true |
) |
Used with multiple input files. Off by default.
Referenced by OBConversion::FullConvert(), OBMoleculeFormat::OutputDeferredMols(), and OBConversion::Write().
void SetLast | ( | bool | b | ) | [inline] |
Synonym for SetOneObjectOnly().
bool IsLastFile | ( | ) | [inline] |
True if no more files to be read.
static OBFormat* GetDefaultFormat | ( | ) | [inline, static] |
bool Write | ( | OBBase * | pOb, | |
std::ostream * | pout = NULL | |||
) |
Outputs an object of a class derived from OBBase.
Part of "API" interface. The output stream can be specified and the change is retained in the OBConversion instance
Writes the object pOb but does not delete it afterwards. The output stream is lastingly changed if pos is not NULL Returns true if successful.
Referenced by OBConversion::WriteFile(), and OBConversion::WriteString().
std::string WriteString | ( | OBBase * | pOb, | |
bool | trimWhitespace = false | |||
) |
Outputs an object of a class derived from OBBase as a string.
Part of "API" interface. The output stream is temporarily changed to the string and then restored This method is primarily intended for scripting languages without "stream" classes The optional "trimWhitespace" parameter allows trailing whitespace to be removed (e.g., in a SMILES string or InChI, etc.)
Writes the object pOb but does not delete it afterwards. The output stream not changed (since we cannot write to this string later) Returns true if successful.
bool WriteFile | ( | OBBase * | pOb, | |
std::string | filePath | |||
) |
Outputs an object of a class derived from OBBase as a file (with the supplied path).
Part of "API" interface. The output stream is changed to the supplied file and the change is retained in the OBConversion instance. This method is primarily intended for scripting languages without "stream" classes
Writes the object pOb but does not delete it afterwards. The output stream is lastingly changed to point to the file Returns true if successful.
void CloseOutFile | ( | ) |
Manually closes and deletes the output stream The file is closed anyway when in the OBConversion destructor or when WriteFile is called again.
bool Read | ( | OBBase * | pOb, | |
std::istream * | pin = NULL | |||
) |
Reads an object of a class derived from OBBase into pOb.
Part of "API" interface. The input stream can be specified and the change is retained in the OBConversion instance
Referenced by OBConversion::ReadFile(), OBMoleculeFormat::ReadNameIndex(), and OBConversion::ReadString().
bool ReadString | ( | OBBase * | pOb, | |
std::string | input | |||
) |
Reads an object of a class derived from OBBase into pOb from the supplied string.
Part of "API" interface. The input stream can be specified and the change is retained in the OBConversion instance
false and pOb=NULL on error This method is primarily intended for scripting languages without "stream" classes
bool ReadFile | ( | OBBase * | pOb, | |
std::string | filePath | |||
) |
Reads an object of a class derived from OBBase into pOb from the file specified.
Part of "API" interface. The output stream is changed to the supplied file and the change is retained in the OBConversion instance.
bool OpenInAndOutFiles | ( | std::string | infilepath, | |
std::string | outfilepath | |||
) |
Part of the "Convert" interface Open the files and update the streams in the OBConversion object. This method is primarily intended for scripting languages without "stream" classes and will usually followed by a call to Convert()
void ReportNumberConverted | ( | int | count, | |
OBFormat * | pFormat = NULL | |||
) |
Sends a message like "2 molecules converted" to clog The type of object is taken from the TargetClassDescription of the specified class (or the output format if not specified)and is appropriately singular or plural.
string BatchFileName | ( | std::string & | BaseName, | |
std::string & | InFile | |||
) | [static, protected] |
Replaces * in BaseName by InFile without extension and path.
Referenced by OBConversion::FullConvert().
string IncrementedFileName | ( | std::string & | BaseName, | |
const int | Count | |||
) | [static, protected] |
bool CheckForUnintendedBatch | ( | const std::string & | infile, | |
const std::string & | outfile | |||
) | [static, protected] |
void InstallStreamFilter | ( | ) | [protected] |
Adds a filtering rdbuffer to handle line endings if not already installed and not a binary or xml format.
Referenced by OBConversion::Convert(), and OBConversion::Read().
bool SetStartAndEnd | ( | ) | [protected] |
Referenced by OBConversion::Convert().
OBConversion::OPAMapType & OptionParamArray | ( | Option_type | typ | ) | [static, protected] |
Referenced by OBConversion::GetOptionParams(), and OBConversion::RegisterOptionParam().
int LoadFormatFiles | ( | ) | [static, protected] |
Referenced by OBConversion::OBConversion().
bool OpenAndSetFormat | ( | bool | SetFormat, | |
std::ifstream * | is | |||
) | [protected] |
Referenced by OBConversion::FullConvert().
std::string InFilename [protected] |
std::istream* pInStream [protected] |
std::ostream* pOutStream [protected] |
OBFormat* pDefaultFormat [static, protected] |
OBFormat* pOutFormat [protected] |
std::map<std::string,std::string> OptionsArray[3] [protected] |
int Index [protected] |
unsigned int StartNumber [protected] |
unsigned int EndNumber [protected] |
int Count [protected] |
bool m_IsFirstInput [protected] |
Referenced by OBConversion::IsFirstInput(), OBConversion::OBConversion(), and OBConversion::SetFirstInput().
bool m_IsLast [protected] |
bool MoreFilesToCome [protected] |
Referenced by OBConversion::Convert(), OBConversion::OBConversion(), and OBConversion::SetMoreFilesToCome().
bool OneObjectOnly [protected] |
Referenced by OBConversion::Convert(), OBConversion::OBConversion(), and OBConversion::SetOneObjectOnly().
bool ReadyToInput [protected] |
Referenced by OBConversion::AddChemObject(), OBConversion::Convert(), and OBConversion::OBConversion().
bool CheckedForGzip [protected] |
input stream is gzip-encoded
Referenced by OBConversion::Convert(), OBConversion::FullConvert(), OBConversion::OBConversion(), and OBConversion::Read().
bool NeedToFreeInStream [protected] |
bool NeedToFreeOutStream [protected] |
LErdbuf* pLineEndBuf [protected] |
Referenced by OBConversion::InstallStreamFilter(), and OBConversion::OBConversion().
int FormatFilesLoaded = 0 [static, protected] |
Referenced by OBConversion::OBConversion().
std::streampos wInpos [protected] |
position in the input stream of the object being written
Referenced by OBConversion::AddChemObject(), and OBConversion::OBConversion().
std::streampos rInpos [protected] |
position in the input stream of the object being read
Referenced by OBConversion::AddChemObject(), OBConversion::Convert(), and OBConversion::OBConversion().
size_t wInlen [protected] |
length in the input stream of the object being written
Referenced by OBConversion::AddChemObject(), OBConversion::Convert(), and OBConversion::OBConversion().
size_t rInlen [protected] |
length in the input stream of the object being read
Referenced by OBConversion::AddChemObject(), and OBConversion::OBConversion().
OBConversion* pAuxConv [protected] |
Way to extend OBConversion.
Referenced by OBConversion::OBConversion(), and OBConversion::~OBConversion().
std::vector<std::string> SupportedInputFormat [protected] |
list of supported input format
std::vector<std::string> SupportedOutputFormat [protected] |
list of supported output format