Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

PosixRegEx.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037 
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041 
00042 
00043 namespace BLOCXX_NAMESPACE
00044 {
00045 
00046 
00047 // -------------------------------------------------------------------
00048 static String
00049 substitute_caps(const PosixRegEx::MatchArray &sub,
00050                 const String &str, const String &rep)
00051 {
00052    static const char *cap_refs[] = {
00053       NULL,  "\\1", "\\2", "\\3", "\\4",
00054       "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00055    };
00056 
00057    String res( rep);
00058    size_t pos;
00059 
00060    for(size_t i=1; cap_refs[i] != NULL; i++)
00061    {
00062       String cap;
00063 
00064       if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00065       {
00066          cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00067                                          - sub[i].rm_so);
00068       }
00069 
00070       pos = res.indexOf(cap_refs[i]);
00071       while( pos != String::npos)
00072       {
00073          size_t quotes = 0;
00074          size_t at = pos;
00075 
00076          while( at > 0 && res.charAt(--at) == '\\')
00077             quotes++;
00078 
00079          if( quotes % 2)
00080          {
00081             quotes = (quotes + 1) / 2;
00082 
00083             res = res.erase(pos - quotes, quotes);
00084 
00085             pos = res.indexOf(cap_refs[i],
00086                               pos + 2 - quotes);
00087          }
00088          else
00089          {
00090             quotes = quotes / 2;
00091 
00092             res = res.substring(0, pos - quotes) +
00093                   cap +
00094                   res.substring(pos + 2);
00095 
00096             pos = res.indexOf(cap_refs[i],
00097                               pos + cap.length() - quotes);
00098          }
00099       }
00100    }
00101    return res;
00102 }
00103 
00104 
00105 // -------------------------------------------------------------------
00106 static inline String
00107 getError(const regex_t *preg, const int code)
00108 {
00109    char err[256] = { '\0'};
00110    ::regerror(code, preg, err, sizeof(err));
00111    return String(err);
00112 }
00113 
00114 
00115 // -------------------------------------------------------------------
00116 PosixRegEx::PosixRegEx()
00117    : compiled(false)
00118    , m_flags(0)
00119    , m_ecode(REG_NOERROR)
00120 {
00121 }
00122 
00123 
00124 // -------------------------------------------------------------------
00125 PosixRegEx::PosixRegEx(const String &regex, int cflags)
00126    : compiled(false)
00127    , m_flags(0)
00128    , m_ecode(REG_NOERROR)
00129 {
00130    if( !compile(regex, cflags))
00131    {
00132       BLOCXX_THROW_ERR(RegExCompileException,
00133          errorString().c_str(), m_ecode);
00134    }
00135 }
00136 
00137 
00138 // -------------------------------------------------------------------
00139 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00140    : compiled(false)
00141    , m_flags(ref.m_flags)
00142    , m_ecode(REG_NOERROR)
00143    , m_rxstr(ref.m_rxstr)
00144 {
00145    if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00146    {
00147       BLOCXX_THROW_ERR(RegExCompileException,
00148          errorString().c_str(), m_ecode);
00149    }
00150 }
00151 
00152 
00153 // -------------------------------------------------------------------
00154 PosixRegEx::~PosixRegEx()
00155 {
00156    if( compiled)
00157    {
00158       regfree(&m_regex);
00159    }
00160 }
00161 
00162 
00163 // -------------------------------------------------------------------
00164 PosixRegEx &
00165 PosixRegEx::operator = (const PosixRegEx &ref)
00166 {
00167    if( !ref.compiled)
00168    {
00169       m_ecode = REG_NOERROR;
00170       m_error.erase();
00171       m_flags = ref.m_flags;
00172       m_rxstr = ref.m_rxstr;
00173       if( compiled)
00174       {
00175          regfree(&m_regex);
00176          compiled = false;
00177       }
00178    }
00179    else if( !compile(ref.m_rxstr, ref.m_flags))
00180    {
00181       BLOCXX_THROW_ERR(RegExCompileException,
00182          errorString().c_str(), m_ecode);
00183    }
00184    return *this;
00185 }
00186 
00187 
00188 // -------------------------------------------------------------------
00189 bool
00190 PosixRegEx::compile(const String &regex, int cflags)
00191 {
00192    if( compiled)
00193    {
00194       regfree(&m_regex);
00195       compiled = false;
00196    }
00197 
00198    m_rxstr = regex;
00199    m_flags = cflags;
00200    m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00201    if( m_ecode == REG_NOERROR)
00202    {
00203       compiled = true;
00204       m_error.erase();
00205       return true;
00206    }
00207    else
00208    {
00209       m_error = getError(&m_regex, m_ecode);
00210       return false;
00211    }
00212 }
00213 
00214 
00215 // -------------------------------------------------------------------
00216 int
00217 PosixRegEx::errorCode()
00218 {
00219    return m_ecode;
00220 }
00221 
00222 
00223 // -------------------------------------------------------------------
00224 String
00225 PosixRegEx::errorString() const
00226 {
00227    return m_error;
00228 }
00229 
00230 
00231 // -------------------------------------------------------------------
00232 String
00233 PosixRegEx::patternString() const
00234 {
00235    return m_rxstr;
00236 }
00237 
00238 
00239 // -------------------------------------------------------------------
00240 int
00241 PosixRegEx::compileFlags() const
00242 {
00243    return m_flags;
00244 }
00245 
00246 
00247 // -------------------------------------------------------------------
00248 bool
00249 PosixRegEx::isCompiled() const
00250 {
00251    return compiled;
00252 }
00253 
00254 
00255 // -------------------------------------------------------------------
00256 bool
00257 PosixRegEx::execute(MatchArray &sub, const String &str,
00258                size_t index, size_t count, int eflags)
00259 {
00260    if( !compiled)
00261    {
00262       BLOCXX_THROW(RegExCompileException,
00263          "Regular expression is not compiled");
00264    }
00265 
00266    if( index > str.length())
00267    {
00268       BLOCXX_THROW(OutOfBoundsException,
00269          Format("String index out of bounds ("
00270                 "length = %1, index = %2).",
00271                 str.length(), index
00272          ).c_str());
00273    }
00274 
00275    if( count == 0)
00276    {
00277       count = m_regex.re_nsub + 1;
00278    }
00279    regmatch_t rsub[count];
00280    rsub[0].rm_so = -1;
00281    rsub[0].rm_eo = -1;
00282 
00283    sub.clear();
00284    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00285                        count, rsub, eflags);
00286    if( m_ecode == REG_NOERROR)
00287    {
00288       m_error.erase();
00289       if( m_flags & REG_NOSUB)
00290       {
00291          return true;
00292       }
00293 
00294       sub.resize(count);
00295       for(size_t n = 0; n < count; n++)
00296       {
00297          if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00298          {
00299             sub[n] = rsub[n];
00300          }
00301          else
00302          {
00303             rsub[n].rm_so += index;
00304             rsub[n].rm_eo += index;
00305             sub[n] = rsub[n];
00306          }
00307       }
00308       return true;
00309    }
00310    else
00311    {
00312       m_error = getError(&m_regex, m_ecode);
00313       return false;
00314    }
00315 }
00316 
00317 
00318 // -------------------------------------------------------------------
00319 StringArray
00320 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00321 {
00322    if( !compiled)
00323    {
00324       BLOCXX_THROW(RegExCompileException,
00325          "Regular expression is not compiled");
00326    }
00327 
00328    MatchArray  rsub;
00329    StringArray ssub;
00330 
00331    bool match = execute(rsub, str, index, count, eflags);
00332    if( match)
00333    {
00334       if( rsub.empty())
00335       {
00336          BLOCXX_THROW(RegExCompileException,
00337             "Non-capturing regular expression");
00338       }
00339 
00340       MatchArray::const_iterator i=rsub.begin();
00341       for( ; i != rsub.end(); ++i)
00342       {
00343          if( i->rm_so >= 0 && i->rm_eo >= 0)
00344          {
00345             ssub.push_back(str.substring(i->rm_so,
00346                                 i->rm_eo - i->rm_so));
00347          }
00348          else
00349          {
00350             ssub.push_back(String(""));
00351          }
00352       }
00353    }
00354    else if(m_ecode != REG_NOMATCH)
00355    {
00356       BLOCXX_THROW_ERR(RegExExecuteException,
00357          errorString().c_str(), m_ecode);
00358    }
00359    return ssub;
00360 }
00361 
00362 
00363 // -------------------------------------------------------------------
00364 blocxx::String
00365 PosixRegEx::replace(const String &str, const String &rep,
00366                     bool global, int eflags)
00367 {
00368    if( !compiled)
00369    {
00370       BLOCXX_THROW(RegExCompileException,
00371          "Regular expression is not compiled");
00372    }
00373 
00374    MatchArray  rsub;
00375    bool        match;
00376    size_t      off = 0;
00377    String      out = str;
00378 
00379    do
00380    {
00381       match = execute(rsub, out, off, 0, eflags);
00382       if( match)
00383       {
00384          if( rsub.empty()      ||
00385              rsub[0].rm_so < 0 ||
00386              rsub[0].rm_eo < 0)
00387          {
00388             // only if empty (missused as guard).
00389             BLOCXX_THROW(RegExCompileException,
00390                "Non-capturing regular expression");
00391          }
00392 
00393          String res = substitute_caps(rsub, out, rep);
00394 
00395          out = out.substring(0, rsub[0].rm_so) +
00396                res + out.substring(rsub[0].rm_eo);
00397 
00398          off = rsub[0].rm_so + res.length();
00399       }
00400       else if(m_ecode == REG_NOMATCH)
00401       {
00402          m_ecode = REG_NOERROR;
00403          m_error.erase();
00404       }
00405       else
00406       {
00407          BLOCXX_THROW_ERR(RegExExecuteException,
00408             errorString().c_str(), m_ecode);
00409       }
00410    } while(global && match && out.length() > off);
00411 
00412    return out;
00413 }
00414 
00415 // -------------------------------------------------------------------
00416 StringArray
00417 PosixRegEx::split(const String &str, bool empty, int eflags)
00418 {
00419    if( !compiled)
00420    {
00421       BLOCXX_THROW(RegExCompileException,
00422          "Regular expression is not compiled");
00423    }
00424 
00425    MatchArray  rsub;
00426    StringArray ssub;
00427    bool        match;
00428    size_t      off = 0;
00429    size_t      len = str.length();
00430 
00431    do
00432    {
00433       match = execute(rsub, str, off, 1, eflags);
00434       if( match)
00435       {
00436          if( rsub.empty()      ||
00437              rsub[0].rm_so < 0 ||
00438              rsub[0].rm_eo < 0)
00439          {
00440             BLOCXX_THROW(RegExCompileException,
00441                "Non-capturing regular expression");
00442          }
00443 
00444          if( empty || ((size_t)rsub[0].rm_so > off))
00445          {
00446             ssub.push_back(str.substring(off,
00447                                rsub[0].rm_so - off));
00448          }
00449          off = rsub[0].rm_eo;
00450       }
00451       else if(m_ecode == REG_NOMATCH)
00452       {
00453          String tmp = str.substring(off);
00454          if( empty || !tmp.empty())
00455          {
00456             ssub.push_back(tmp);
00457          }
00458          m_ecode = REG_NOERROR;
00459          m_error.erase();
00460       }
00461       else
00462       {
00463          BLOCXX_THROW_ERR(RegExExecuteException,
00464             errorString().c_str(), m_ecode);
00465       }
00466    } while(match && len > off);
00467 
00468    return ssub;
00469 }
00470 
00471 
00472 // -------------------------------------------------------------------
00473 StringArray
00474 PosixRegEx::grep(const StringArray &src, int eflags)
00475 {
00476    if( !compiled)
00477    {
00478       BLOCXX_THROW(RegExCompileException,
00479          "Regular expression is not compiled");
00480    }
00481 
00482    m_ecode = REG_NOERROR;
00483    m_error.erase();
00484 
00485    StringArray out;
00486    if( !src.empty())
00487    {
00488       StringArray::const_iterator i=src.begin();
00489       for( ; i != src.end(); ++i)
00490       {
00491          int ret = ::regexec(&m_regex, i->c_str(),
00492                              0, NULL, eflags);
00493          if( ret == REG_NOERROR)
00494          {
00495             out.push_back(*i);
00496          }
00497          else if(ret != REG_NOMATCH)
00498          {
00499             m_ecode = ret;
00500             m_error = getError(&m_regex, m_ecode);
00501             BLOCXX_THROW_ERR(RegExExecuteException,
00502                errorString().c_str(), m_ecode);
00503          }
00504       }
00505    }
00506 
00507    return out;
00508 }
00509 
00510 
00511 // -------------------------------------------------------------------
00512 bool
00513 PosixRegEx::match(const String &str, size_t index, int eflags) const
00514 {
00515    if( !compiled)
00516    {
00517       BLOCXX_THROW(RegExCompileException,
00518          "Regular expression is not compiled");
00519    }
00520 
00521    if( index > str.length())
00522    {
00523       BLOCXX_THROW(OutOfBoundsException,
00524          Format("String index out of bounds ("
00525                 "length = %1, index = %2).",
00526                 str.length(), index
00527          ).c_str());
00528    }
00529 
00530    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00531                        0, NULL, eflags);
00532 
00533    if( m_ecode == REG_NOERROR)
00534    {
00535       m_error.erase();
00536       return true;
00537    }
00538    else if(m_ecode == REG_NOMATCH)
00539    {
00540       m_error = getError(&m_regex, m_ecode);
00541       return false;
00542    }
00543    else
00544    {
00545       m_error = getError(&m_regex, m_ecode);
00546       BLOCXX_THROW_ERR(RegExExecuteException,
00547          errorString().c_str(), m_ecode);
00548    }
00549 }
00550 
00551 
00552 // -------------------------------------------------------------------
00553 } // namespace BLOCXX_NAMESPACE
00554 
00555 #endif // BLOCXX_HAVE_REGEX_H
00556 #endif // BLOCXX_HAVE_REGEX
00557 
00558 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00559 

Generated on Mon Sep 12 23:56:36 2005 for blocxx by  doxygen 1.4.4