XMLNodeIterator.cc

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00013 #include <sys/types.h>
00014 #include <sys/stat.h>
00015 #include <fcntl.h>
00016 #include <stdio.h>
00017 #include <stdlib.h>
00018 #include <string.h>
00019 #include <unistd.h>
00020 #include <sstream>
00021 #include <zypp/parser/XMLNodeIterator.h>
00022 #include <zypp/base/Logger.h>
00023 #include <libxml2/libxml/xmlreader.h>
00024 #include <libxml2/libxml/xmlerror.h>
00025 
00026 namespace zypp {
00027 
00028   namespace parser {
00029 
00030     using namespace std;
00031 
00032     namespace{
00041       int ioread(void *context,
00042                 char *buffer,
00043                 int bufferLen)
00044       {
00045         xml_assert(buffer);
00046         std::istream *streamPtr = (std::istream *) context;
00047         xml_assert(streamPtr);
00048         streamPtr->read(buffer,bufferLen);
00049         return streamPtr->gcount();
00050       }
00051 
00060       int ioclose(void * context)
00061       {
00062         /* don't close. destructor will take care. */
00063         return 0;
00064       }
00065     }
00066 
00067     XMLParserError::XMLParserError(const char *msg,
00068                                        int severity,
00069                                        xmlTextReaderLocatorPtr locator,
00070                                        int docLine,
00071                                        int docColumn)
00072     throw()
00073     : _msg(msg), _severity(severity), _locator(locator),
00074     _docLine(docLine), _docColumn(docColumn)
00075     { }
00076 
00077     XMLParserError::~XMLParserError() throw()
00078     { }
00079 
00080     std::string XMLParserError::msg() const throw()
00081     {
00082       return _msg;
00083     }
00084 
00085     int XMLParserError::severity() const throw()
00086     {
00087       return _severity;
00088     }
00089 
00090     xmlTextReaderLocatorPtr XMLParserError::locator() const throw()
00091     {
00092       return _locator;
00093     }
00094 
00095     int XMLParserError::docLine() const throw()
00096     {
00097       return _docLine;
00098     }
00099 
00100     int XMLParserError::docColumn() const throw()
00101     {
00102       return _docColumn;
00103     }
00104 
00105     std::string XMLParserError::position() const throw()
00106     {
00107       if (_docLine!=-1 && _docLine!=-1) {
00108         std::stringstream strm;
00109         strm << "at line " << _docLine
00110           <<", column " << _docColumn;
00111         return strm.str();
00112       }
00113       else
00114         return "";
00115     }
00116 
00117     std::ostream& operator<<(std::ostream &out, const XMLParserError& error)
00118     {
00119       const char *errOrWarn = (error.severity() & XML_PARSER_SEVERITY_ERROR) ? "error" : "warning";
00120     out << "XML syntax " << errOrWarn << ": " << error.msg();
00121       if (error.docLine()!=-1) {
00122         out  << "at line " << error.docLine()
00123           << ", column " << error.docColumn();
00124       }
00125       out << std::endl;
00126       return out;
00127     }
00128 
00129     XMLNodeIteratorBase::XMLNodeIteratorBase( const Pathname xml_file_path,
00130                                              const std::string &baseUrl,
00131                                              const char *validationPath, parser::ParserProgress::Ptr progress)
00132       : _error(0), _file(0), _baseUrl(baseUrl), _progress(progress), _stream_size(0), _bytes_consumed(0)
00133     {
00134      
00135       int fd = open( xml_file_path.asString().c_str(), O_RDONLY );
00136       if ( fd < 0 )
00137         ZYPP_THROW(Exception("Cant't open " + xml_file_path.asString()));
00138       
00139       _reader = xmlReaderForFd( fd, baseUrl.c_str(), "utf-8", XML_PARSE_PEDANTIC)
00140           ;
00141       xmlTextReaderSetErrorHandler(_reader, (xmlTextReaderErrorFunc) errorHandler, this);
00142       if (_reader )
00143       {
00144         if ( validationPath )
00145         {
00146           if (xmlTextReaderRelaxNGValidate(_reader,validationPath)==-1)
00147             WAR << "Could not enable validation of document using " << validationPath << std::endl;
00148         }
00149         // otherwise validation is disabled.
00150       }
00151         /* Derived classes must call fetchNext() in their constructors themselves,
00152       XMLNodeIterator has no access to their virtual functions during
00153       construction */
00154     }
00155 
00156     XMLNodeIteratorBase::XMLNodeIteratorBase(std::istream &input,
00157                                              const std::string &baseUrl,
00158                                              const char *validationPath, parser::ParserProgress::Ptr progress)
00159       : _error(0),
00160       _input(& input),
00161       _reader(xmlReaderForIO(ioread, ioclose, _input, baseUrl.c_str(), "utf-8",
00162                              XML_PARSE_PEDANTIC)),
00163       _baseUrl(baseUrl), _progress(progress), _stream_size(0), _bytes_consumed(0)
00164     {
00165       xmlTextReaderSetErrorHandler(_reader, (xmlTextReaderErrorFunc) errorHandler, this);
00166       // xmlTextReaderSetStructuredErrorHandler(_reader, structuredErrorHandler, this);
00167       if (_reader )
00168       {
00169         if ( validationPath )
00170         {
00171             if (xmlTextReaderRelaxNGValidate(_reader,validationPath)==-1)
00172               WAR << "Could not enable validation of document using " << validationPath << std::endl;
00173         }
00174         // otherwise validation is disabled.
00175       }
00176         /* Derived classes must call fetchNext() in their constructors themselves,
00177            XMLNodeIterator has no access to their virtual functions during
00178            construction */
00179     }
00180 
00181     XMLNodeIteratorBase::XMLNodeIteratorBase()
00182       : _error(0), _input(0), _reader(0)
00183     { }
00184 
00185 
00186 
00187     XMLNodeIteratorBase::~XMLNodeIteratorBase()
00188     {
00189       if (_reader != 0)
00190         xmlFreeTextReader(_reader);
00191     }
00192 
00193 
00194     bool
00195     XMLNodeIteratorBase::atEnd() const
00196     {
00197       if ( _error.get() != 0 || getCurrent() == 0 )
00198       {
00199         if ( _progress )
00200           _progress->finish();
00201         return true;
00202       }
00203       return false;
00204     }
00205 
00206 
00207     bool
00208     XMLNodeIteratorBase::operator==(const XMLNodeIteratorBase &other) const
00209     {
00210       if (atEnd())
00211         return other.atEnd();
00212       else
00213         return this == & other;
00214     }
00215 
00216 
00217     const XMLParserError *
00218     XMLNodeIteratorBase::errorStatus() const
00219     {
00220       return _error.get();
00221     }
00222 
00223 
00224     void XMLNodeIteratorBase::fetchNext()
00225     {
00226       xml_assert(_reader);
00227       
00228       if ( _progress )
00229       {
00230         long int consumed = xmlTextReaderByteConsumed (_reader);
00231         //MIL <<  consumed << " bytes consumed." << endl;
00232         // only report every 4k or more
00233         if ( ( consumed - _bytes_consumed > 4096 ) )
00234         {
00235           _progress->progress(consumed);
00236           _bytes_consumed = consumed;
00237         }
00238       }
00239       
00240       int status;
00241       /* throw away the old entry */
00242       setCurrent(0);
00243 
00244       if (_reader == 0) {
00245           /* this is a trivial iterator over (max) only one element,
00246              and we reach the end now. */
00247         ;
00248       }
00249       else {
00250           /* repeat as long as we successfully read nodes
00251              breaks out when an interesting node has been found */
00252         while ((status = xmlTextReaderRead(_reader))==1)
00253         { 
00254           xmlNodePtr node = xmlTextReaderCurrentNode(_reader);
00255           
00256           if (isInterested(node))
00257           {
00258               // xmlDebugDumpNode(stdout,node,5);
00259             _process(_reader);
00260               // _currentDataPtr.reset(new ENTRYTYPE(process(_reader)));
00261             status = xmlTextReaderNext(_reader);
00262             break;
00263           }
00264         }
00265         if (status == -1) {  // error occured
00266           if (_error.get() == 0) {
00267             errorHandler(this, "Unknown error while parsing xml file\n",
00268                          XML_PARSER_SEVERITY_ERROR, 0);
00269           }
00270         }
00271       }
00272     }
00273 
00274 
00275     void
00276     XMLNodeIteratorBase::errorHandler(void * arg,
00277                                       const char * msg,
00278                                       int severity,
00279                                       xmlTextReaderLocatorPtr locator)
00280     {
00281       XMLNodeIteratorBase *obj;
00282       obj = (XMLNodeIteratorBase*) arg;
00283       xml_assert(obj);
00284       xmlTextReaderPtr reader = obj->_reader;
00285       if (strcmp("%s",msg) == 0) {
00286           /* This works around a buglet in libxml2, you often get "%s" as message
00287              and the message is in "severity". Does this work for other
00288              architectures??? FIXME */
00289         WAR << "libxml2 error reporting defect, got '%s' as message" << endl;
00290         msg = (char *) severity;
00291         severity = XML_PARSER_SEVERITY_WARNING;
00292       }
00293       const char *errOrWarn = (severity & XML_PARSER_SEVERITY_ERROR) ? "error" : "warning";
00294 
00295 #if 0
00296       std::ostream& out = (severity & XML_PARSER_SEVERITY_ERROR) ? ERR : WAR;
00297 
00298         /* Log it */
00299     out << "XML syntax " << errOrWarn << ": " << msg;
00300       if (obj->_error.get()) {
00301         out << "(encountered during error recovery!)" << std::endl;
00302       }
00303       if (reader && msg[0] != 0) {
00304         out  << "at ";
00305         if (! obj->_baseUrl.empty())
00306           out << obj->_baseUrl << ", ";
00307         out << "line " << xmlTextReaderGetParserLineNumber(reader)
00308         << ", column " << xmlTextReaderGetParserColumnNumber(reader);
00309       }
00310       out << std::endl;
00311 #endif
00312         /* save it */
00313       if ((severity & XML_PARSER_SEVERITY_ERROR)
00314           && ! obj->_error.get()) {
00315             if (reader)
00316               obj->_error.reset(new XMLParserError
00317                                 (msg, severity,locator,
00318                                  xmlTextReaderLocatorLineNumber(locator),
00319                                  xmlTextReaderGetParserColumnNumber(reader)));
00320             else
00321               obj->_error.reset(new XMLParserError
00322                                 (msg, severity, locator,
00323                                  -1, -1));
00324           }
00325     }
00326 
00327   }
00328 }

Generated on Tue Sep 25 19:23:03 2007 for libzypp by  doxygen 1.5.3