00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041
00042
00043 namespace BLOCXX_NAMESPACE
00044 {
00045
00046
00047
00048 static String
00049 substitute_caps(const PosixRegEx::MatchArray &sub,
00050 const String &str, const String &rep)
00051 {
00052 static const char *cap_refs[] = {
00053 NULL, "\\1", "\\2", "\\3", "\\4",
00054 "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00055 };
00056
00057 String res( rep);
00058 size_t pos;
00059
00060 for(size_t i=1; cap_refs[i] != NULL; i++)
00061 {
00062 String cap;
00063
00064 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00065 {
00066 cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00067 - sub[i].rm_so);
00068 }
00069
00070 pos = res.indexOf(cap_refs[i]);
00071 while( pos != String::npos)
00072 {
00073 size_t quotes = 0;
00074 size_t at = pos;
00075
00076 while( at > 0 && res.charAt(--at) == '\\')
00077 quotes++;
00078
00079 if( quotes % 2)
00080 {
00081 quotes = (quotes + 1) / 2;
00082
00083 res = res.erase(pos - quotes, quotes);
00084
00085 pos = res.indexOf(cap_refs[i],
00086 pos + 2 - quotes);
00087 }
00088 else
00089 {
00090 quotes = quotes / 2;
00091
00092 res = res.substring(0, pos - quotes) +
00093 cap +
00094 res.substring(pos + 2);
00095
00096 pos = res.indexOf(cap_refs[i],
00097 pos + cap.length() - quotes);
00098 }
00099 }
00100 }
00101 return res;
00102 }
00103
00104
00105
00106 static inline String
00107 getError(const regex_t *preg, const int code)
00108 {
00109 char err[256] = { '\0'};
00110 ::regerror(code, preg, err, sizeof(err));
00111 return String(err);
00112 }
00113
00114
00115
00116 PosixRegEx::PosixRegEx()
00117 : compiled(false)
00118 , m_flags(0)
00119 , m_ecode(REG_NOERROR)
00120 {
00121 }
00122
00123
00124
00125 PosixRegEx::PosixRegEx(const String ®ex, int cflags)
00126 : compiled(false)
00127 , m_flags(0)
00128 , m_ecode(REG_NOERROR)
00129 {
00130 if( !compile(regex, cflags))
00131 {
00132 BLOCXX_THROW_ERR(RegExCompileException,
00133 errorString().c_str(), m_ecode);
00134 }
00135 }
00136
00137
00138
00139 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00140 : compiled(false)
00141 , m_flags(ref.m_flags)
00142 , m_ecode(REG_NOERROR)
00143 , m_rxstr(ref.m_rxstr)
00144 {
00145 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00146 {
00147 BLOCXX_THROW_ERR(RegExCompileException,
00148 errorString().c_str(), m_ecode);
00149 }
00150 }
00151
00152
00153
00154 PosixRegEx::~PosixRegEx()
00155 {
00156 if( compiled)
00157 {
00158 regfree(&m_regex);
00159 }
00160 }
00161
00162
00163
00164 PosixRegEx &
00165 PosixRegEx::operator = (const PosixRegEx &ref)
00166 {
00167 if( !ref.compiled)
00168 {
00169 m_ecode = REG_NOERROR;
00170 m_error.erase();
00171 m_flags = ref.m_flags;
00172 m_rxstr = ref.m_rxstr;
00173 if( compiled)
00174 {
00175 regfree(&m_regex);
00176 compiled = false;
00177 }
00178 }
00179 else if( !compile(ref.m_rxstr, ref.m_flags))
00180 {
00181 BLOCXX_THROW_ERR(RegExCompileException,
00182 errorString().c_str(), m_ecode);
00183 }
00184 return *this;
00185 }
00186
00187
00188
00189 bool
00190 PosixRegEx::compile(const String ®ex, int cflags)
00191 {
00192 if( compiled)
00193 {
00194 regfree(&m_regex);
00195 compiled = false;
00196 }
00197
00198 m_rxstr = regex;
00199 m_flags = cflags;
00200 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00201 if( m_ecode == REG_NOERROR)
00202 {
00203 compiled = true;
00204 m_error.erase();
00205 return true;
00206 }
00207 else
00208 {
00209 m_error = getError(&m_regex, m_ecode);
00210 return false;
00211 }
00212 }
00213
00214
00215
00216 int
00217 PosixRegEx::errorCode()
00218 {
00219 return m_ecode;
00220 }
00221
00222
00223
00224 String
00225 PosixRegEx::errorString() const
00226 {
00227 return m_error;
00228 }
00229
00230
00231
00232 String
00233 PosixRegEx::patternString() const
00234 {
00235 return m_rxstr;
00236 }
00237
00238
00239
00240 int
00241 PosixRegEx::compileFlags() const
00242 {
00243 return m_flags;
00244 }
00245
00246
00247
00248 bool
00249 PosixRegEx::isCompiled() const
00250 {
00251 return compiled;
00252 }
00253
00254
00255
00256 bool
00257 PosixRegEx::execute(MatchArray &sub, const String &str,
00258 size_t index, size_t count, int eflags)
00259 {
00260 if( !compiled)
00261 {
00262 BLOCXX_THROW(RegExCompileException,
00263 "Regular expression is not compiled");
00264 }
00265
00266 if( index > str.length())
00267 {
00268 BLOCXX_THROW(OutOfBoundsException,
00269 Format("String index out of bounds ("
00270 "length = %1, index = %2).",
00271 str.length(), index
00272 ).c_str());
00273 }
00274
00275 if( count == 0)
00276 {
00277 count = m_regex.re_nsub + 1;
00278 }
00279 regmatch_t rsub[count];
00280 rsub[0].rm_so = -1;
00281 rsub[0].rm_eo = -1;
00282
00283 sub.clear();
00284 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00285 count, rsub, eflags);
00286 if( m_ecode == REG_NOERROR)
00287 {
00288 m_error.erase();
00289 if( m_flags & REG_NOSUB)
00290 {
00291 return true;
00292 }
00293
00294 sub.resize(count);
00295 for(size_t n = 0; n < count; n++)
00296 {
00297 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00298 {
00299 sub[n] = rsub[n];
00300 }
00301 else
00302 {
00303 rsub[n].rm_so += index;
00304 rsub[n].rm_eo += index;
00305 sub[n] = rsub[n];
00306 }
00307 }
00308 return true;
00309 }
00310 else
00311 {
00312 m_error = getError(&m_regex, m_ecode);
00313 return false;
00314 }
00315 }
00316
00317
00318
00319 StringArray
00320 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00321 {
00322 if( !compiled)
00323 {
00324 BLOCXX_THROW(RegExCompileException,
00325 "Regular expression is not compiled");
00326 }
00327
00328 MatchArray rsub;
00329 StringArray ssub;
00330
00331 bool match = execute(rsub, str, index, count, eflags);
00332 if( match)
00333 {
00334 if( rsub.empty())
00335 {
00336 BLOCXX_THROW(RegExCompileException,
00337 "Non-capturing regular expression");
00338 }
00339
00340 MatchArray::const_iterator i=rsub.begin();
00341 for( ; i != rsub.end(); ++i)
00342 {
00343 if( i->rm_so >= 0 && i->rm_eo >= 0)
00344 {
00345 ssub.push_back(str.substring(i->rm_so,
00346 i->rm_eo - i->rm_so));
00347 }
00348 else
00349 {
00350 ssub.push_back(String(""));
00351 }
00352 }
00353 }
00354 else if(m_ecode != REG_NOMATCH)
00355 {
00356 BLOCXX_THROW_ERR(RegExExecuteException,
00357 errorString().c_str(), m_ecode);
00358 }
00359 return ssub;
00360 }
00361
00362
00363
00364 blocxx::String
00365 PosixRegEx::replace(const String &str, const String &rep,
00366 bool global, int eflags)
00367 {
00368 if( !compiled)
00369 {
00370 BLOCXX_THROW(RegExCompileException,
00371 "Regular expression is not compiled");
00372 }
00373
00374 MatchArray rsub;
00375 bool match;
00376 size_t off = 0;
00377 String out = str;
00378
00379 do
00380 {
00381 match = execute(rsub, out, off, 0, eflags);
00382 if( match)
00383 {
00384 if( rsub.empty() ||
00385 rsub[0].rm_so < 0 ||
00386 rsub[0].rm_eo < 0)
00387 {
00388
00389 BLOCXX_THROW(RegExCompileException,
00390 "Non-capturing regular expression");
00391 }
00392
00393 String res = substitute_caps(rsub, out, rep);
00394
00395 out = out.substring(0, rsub[0].rm_so) +
00396 res + out.substring(rsub[0].rm_eo);
00397
00398 off = rsub[0].rm_so + res.length();
00399 }
00400 else if(m_ecode == REG_NOMATCH)
00401 {
00402 m_ecode = REG_NOERROR;
00403 m_error.erase();
00404 }
00405 else
00406 {
00407 BLOCXX_THROW_ERR(RegExExecuteException,
00408 errorString().c_str(), m_ecode);
00409 }
00410 } while(global && match && out.length() > off);
00411
00412 return out;
00413 }
00414
00415
00416 StringArray
00417 PosixRegEx::split(const String &str, bool empty, int eflags)
00418 {
00419 if( !compiled)
00420 {
00421 BLOCXX_THROW(RegExCompileException,
00422 "Regular expression is not compiled");
00423 }
00424
00425 MatchArray rsub;
00426 StringArray ssub;
00427 bool match;
00428 size_t off = 0;
00429 size_t len = str.length();
00430
00431 do
00432 {
00433 match = execute(rsub, str, off, 1, eflags);
00434 if( match)
00435 {
00436 if( rsub.empty() ||
00437 rsub[0].rm_so < 0 ||
00438 rsub[0].rm_eo < 0)
00439 {
00440 BLOCXX_THROW(RegExCompileException,
00441 "Non-capturing regular expression");
00442 }
00443
00444 if( empty || ((size_t)rsub[0].rm_so > off))
00445 {
00446 ssub.push_back(str.substring(off,
00447 rsub[0].rm_so - off));
00448 }
00449 off = rsub[0].rm_eo;
00450 }
00451 else if(m_ecode == REG_NOMATCH)
00452 {
00453 String tmp = str.substring(off);
00454 if( empty || !tmp.empty())
00455 {
00456 ssub.push_back(tmp);
00457 }
00458 m_ecode = REG_NOERROR;
00459 m_error.erase();
00460 }
00461 else
00462 {
00463 BLOCXX_THROW_ERR(RegExExecuteException,
00464 errorString().c_str(), m_ecode);
00465 }
00466 } while(match && len > off);
00467
00468 return ssub;
00469 }
00470
00471
00472
00473 StringArray
00474 PosixRegEx::grep(const StringArray &src, int eflags)
00475 {
00476 if( !compiled)
00477 {
00478 BLOCXX_THROW(RegExCompileException,
00479 "Regular expression is not compiled");
00480 }
00481
00482 m_ecode = REG_NOERROR;
00483 m_error.erase();
00484
00485 StringArray out;
00486 if( !src.empty())
00487 {
00488 StringArray::const_iterator i=src.begin();
00489 for( ; i != src.end(); ++i)
00490 {
00491 int ret = ::regexec(&m_regex, i->c_str(),
00492 0, NULL, eflags);
00493 if( ret == REG_NOERROR)
00494 {
00495 out.push_back(*i);
00496 }
00497 else if(ret != REG_NOMATCH)
00498 {
00499 m_ecode = ret;
00500 m_error = getError(&m_regex, m_ecode);
00501 BLOCXX_THROW_ERR(RegExExecuteException,
00502 errorString().c_str(), m_ecode);
00503 }
00504 }
00505 }
00506
00507 return out;
00508 }
00509
00510
00511
00512 bool
00513 PosixRegEx::match(const String &str, size_t index, int eflags) const
00514 {
00515 if( !compiled)
00516 {
00517 BLOCXX_THROW(RegExCompileException,
00518 "Regular expression is not compiled");
00519 }
00520
00521 if( index > str.length())
00522 {
00523 BLOCXX_THROW(OutOfBoundsException,
00524 Format("String index out of bounds ("
00525 "length = %1, index = %2).",
00526 str.length(), index
00527 ).c_str());
00528 }
00529
00530 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00531 0, NULL, eflags);
00532
00533 if( m_ecode == REG_NOERROR)
00534 {
00535 m_error.erase();
00536 return true;
00537 }
00538 else if(m_ecode == REG_NOMATCH)
00539 {
00540 m_error = getError(&m_regex, m_ecode);
00541 return false;
00542 }
00543 else
00544 {
00545 m_error = getError(&m_regex, m_ecode);
00546 BLOCXX_THROW_ERR(RegExExecuteException,
00547 errorString().c_str(), m_ecode);
00548 }
00549 }
00550
00551
00552
00553 }
00554
00555 #endif // BLOCXX_HAVE_REGEX_H
00556 #endif // BLOCXX_HAVE_REGEX
00557
00558
00559