TagFileParser.cc

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00012 #include <iostream>
00013 #include <fstream>
00014 #include <sstream>
00015 
00016 #include <boost/tokenizer.hpp>
00017 #include <boost/algorithm/string.hpp>
00018 
00019 #include "zypp/base/Logger.h"
00020 #include "zypp/base/PtrTypes.h"
00021 #include "zypp/base/String.h"
00022 #include "zypp/PathInfo.h"
00023 
00024 #include "zypp/parser/tagfile/TagFileParser.h"
00025 #include "zypp/parser/tagfile/ParseException.h"
00026 
00027 
00028 #undef ZYPP_BASE_LOGGER_LOGGROUP
00029 #define ZYPP_BASE_LOGGER_LOGGROUP "TagFileParser"
00030 
00031 using namespace std;
00032 using namespace boost;
00033 
00035 namespace zypp
00036 { 
00037 
00038   namespace parser
00039   { 
00040 
00041     namespace tagfile
00042     { 
00043 
00044       void dumpRegexpResults( const boost::smatch &what )
00045       {
00046         for ( unsigned int k=0; k < what.size(); k++)
00047         {
00048           XXX << "[match "<< k << "] [" << what[k] << "]" << std::endl;
00049         }
00050       }
00051 
00052       void dumpRegexpResults2( const boost::smatch &what )
00053       {
00054         for ( unsigned int k=0; k < what.size(); k++)
00055         {
00056           DBG << "[match "<< k << "] [" << what[k] << "]" << std::endl;
00057         }
00058       }
00059 
00060       TagFileParser::TagFileParser( ParserProgress::Ptr progress ) : _progress(progress)
00061       {
00062       }
00063 
00064       void TagFileParser::beginParse()
00065       {
00066       }
00067 
00068       void TagFileParser::endParse()
00069       {
00070       }
00071 
00072       void TagFileParser::consume( const SingleTag &tag )
00073       {
00074       }
00075 
00076       void TagFileParser::consume( const MultiTag &tag )
00077       {
00078       }
00079 
00081       //
00082       //        METHOD NAME : Parser::parse
00083       //        METHOD TYPE : void
00084       //
00085       void TagFileParser::parse( const Pathname & file_r)
00086       {
00087         // save parsed filename for debug
00088         int previous_progress = 0;
00089         int new_progress = 0;
00090         _file_r = file_r;
00091         _file_size = 0;
00092         _line_number = 0;
00093         _file_size = PathInfo(file_r).size();
00094         std::ifstream file(file_r.asString().c_str());
00095         int readed = 0;
00096         
00097         boost::regex rxComment("^[[:space:]]*#(.*)$");
00098         boost::regex rxMStart("^\\+([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:$");
00099         boost::regex rxMEnd("^\\-([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:$");
00100         boost::regex rxSStart("^=([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:[[:space:]]*(.*)$");
00101         boost::regex rxEmpty("^([[:space:]]*)$");
00102 
00103              if (!file) {
00104                ZYPP_THROW (ParseException( "Can't open " + file_r.asString() ) );
00105        }
00106 
00107         std::string buffer;
00108         // read vendor
00109         MIL << "Started parsing " << file_r << std::endl;
00110         beginParse();
00111         while(file && !file.eof())
00112         {
00113           getline(file, buffer);
00114           _line_number++;
00115           readed +=  buffer.size();
00116           
00117           boost::smatch what;
00118           if(boost::regex_match(buffer, what, rxComment, boost::match_extra))
00119           {
00120             XXX << "comment" << std::endl;
00121             // comment # something
00122             // str::strtonum(buffer, entry_r.count);
00123             dumpRegexpResults(what);
00124           }
00125           else if(boost::regex_match(buffer, what, rxMStart, boost::match_extra))
00126           {
00127             MultiTag tag;
00128             tag.name = what[1];
00129             tag.modifier = what[3];
00130 
00131             XXX << "start list" << std::endl;
00132             dumpRegexpResults(what);
00133             // start of list +Something.lang:
00134             // lang is optional
00135             // str::strtonum(buffer, entry_r.count);
00136             std::string element;
00137             boost::smatch element_what;
00138             getline(file, element);
00139             _line_number++;
00140             readed +=  element.size();
00141             // while we dont find the list terminator
00142             while(!file.eof())
00143             {
00144               // avoid regexping in most cases.
00145               if ( element[0] == '-' )
00146               {
00147                 if ( boost::regex_match(element, element_what, rxMEnd, boost::match_extra) )
00148                 {
00149                   // end list element? we check that it is the same as the opening tag, otherwise it is all broken!
00150                   if ( tag.name != element_what[1] )
00151                     ZYPP_THROW(ParseException("Expecting tag -" + tag.name + " for closing. Found -" + element_what[1]));
00152                   
00153                   // no problem, is a real close list tag
00154                   break;
00155                 }
00156               }
00157               
00158               // if we are in a multi tag (list), we cannot start a list inside a list, so if we find a
00159               // + sign, we check it. We dont just regexp every entry because it is very expensive
00160               if ( element[0] == '+' )
00161               {
00162                 if ( boost::regex_match(element, element_what, rxMStart, boost::match_extra) )
00163                 {
00164                   if ( tag.name != element_what[1] )
00165                     ZYPP_THROW(ParseException("MultiTag +" + element_what[1] + " started before closing +" + tag.name));
00166                   else
00167                     ZYPP_THROW(ParseException("MultiTag +" + tag.name + " duplicate opening tag"));
00168                 }
00169               }
00170               
00171               tag.values.push_back(element);
00172               
00173               XXX << element << std::endl;
00174               getline(file, element);
00175               _line_number++;
00176               readed +=  element.size();
00177               //dumpRegexpResults(element_what);
00178             }
00179             XXX << "end list" << std::endl;
00180             consume(tag);
00181             // end of list
00182           }
00183           else if(boost::regex_match(buffer, what, rxSStart, boost::match_extra))
00184           {
00185             SingleTag tag;
00186             tag.name = what[1];
00187             tag.modifier = what[3];
00188             tag.value = what[4];
00189             XXX << "assign" << std::endl;
00190             // start of list
00191             // str::strtonum(buffer, entry_r.count);
00192             dumpRegexpResults(what);
00193             consume(tag);
00194           }
00195           else if(boost::regex_match(buffer, what, rxEmpty, boost::match_extra))
00196           {
00197             XXX << "empty line" << std::endl;
00198           }
00199           else
00200           {
00201             // https://bugzilla.novell.com/show_bug.cgi?id=160607
00202             // before we used to throw a parse error exception if we dont find
00203             // a key value line. But package descriptions usually are broken
00204             // and contain multiple lines for single line tags, etc.
00205             stringstream ss;
00206             ss << "Parse error, unrecognized line [" << buffer << "]. Be sure " << _file_r << " line " << _line_number << " misses a tag or comment.";
00207             ZYPP_THROW( ParseException( ss.str() ) );
00208           }
00209           
00210           new_progress = (int)((((float)readed)/((float)_file_size))*100);
00211           if ( new_progress != previous_progress )
00212             _progress->progress( new_progress );
00213           previous_progress = new_progress;
00214           
00215         }
00216         endParse();
00217         MIL << "Done parsing " << file_r << std::endl;
00218       }
00219 
00221     } // namespace tagfile
00224   } // namespace parser
00227 } // namespace zypp

Generated on Tue Nov 28 16:49:30 2006 for zypp by  doxygen 1.5.0