📄 pcrecpp.cc

📁 php-4.4.7学习linux时下载的源代码
💻 CC
📖 第 1 页 / 共 2 页
字号:
上一页 12
  return Rewrite(out, rewrite, text, vec, matches);}/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {  string result;  // Escape any ascii character not in [A-Za-z_0-9].  //  // Note that it's legal to escape a character even if it has no  // special meaning in a regular expression -- so this function does  // that.  (This also makes it identical to the perl function of the  // same name; see `perldoc -f quotemeta`.)  for (int ii = 0; ii < unquoted.size(); ++ii) {    // Note that using 'isalnum' here raises the benchmark time from    // 32ns to 58ns:    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        (unquoted[ii] < '0' || unquoted[ii] > '9') &&        unquoted[ii] != '_' &&        // If this is the part of a UTF8 or Latin1 character, we need        // to copy this byte without escaping.  Experimentally this is        // what works correctly with the regexp library.        !(unquoted[ii] & 128)) {      result += '\\';    }    result += unquoted[ii];  }  return result;}/***** Actual matching and rewriting code *****/int RE::TryMatch(const StringPiece& text,                 int startpos,                 Anchor anchor,                 int *vec,                 int vecsize) const {  pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;  if (re == NULL) {    //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());    return 0;  }  pcre_extra extra = { 0 };  if (options_.match_limit() > 0) {    extra.flags |= PCRE_EXTRA_MATCH_LIMIT;    extra.match_limit = options_.match_limit();  }  if (options_.match_limit_recursion() > 0) {    extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;    extra.match_limit_recursion = options_.match_limit_recursion();  }  int rc = pcre_exec(re,              // The regular expression object                     &extra,                     (text.data() == NULL) ? "" : text.data(),                     text.size(),                     startpos,                     (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,                     vec,                     vecsize);  // Handle errors  if (rc == PCRE_ERROR_NOMATCH) {    return 0;  } else if (rc < 0) {    //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",    //        re, pattern_.c_str());    return 0;  } else if (rc == 0) {    // pcre_exec() returns 0 as a special case when the number of    // capturing subpatterns exceeds the size of the vector.    // When this happens, there is a match and the output vector    // is filled, but we miss out on the positions of the extra subpatterns.    rc = vecsize / 2;  }  if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {    // We need an extra check to make sure that the match extended    // to the end of the input string    assert(vec[0] == 0);                 // PCRE_ANCHORED forces starting match    if (vec[1] != text.size()) return 0; // Did not get ending match  }  return rc;}bool RE::DoMatchImpl(const StringPiece& text,                     Anchor anchor,                     int* consumed,                     const Arg* const* args,                     int n,                     int* vec,                     int vecsize) const {  assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace  int matches = TryMatch(text, 0, anchor, vec, vecsize);  assert(matches >= 0);  // TryMatch never returns negatives  if (matches == 0)    return false;  *consumed = vec[1];  if (n == 0 || args == NULL) {    // We are not interested in results    return true;  }  if (NumberOfCapturingGroups() < n) {    // RE has fewer capturing groups than number of arg pointers passed in    return false;  }  // If we got here, we must have matched the whole pattern.  // We do not need (can not do) any more checks on the value of 'matches' here  // -- see the comment for TryMatch.  for (int i = 0; i < n; i++) {    const int start = vec[2*(i+1)];    const int limit = vec[2*(i+1)+1];    if (!args[i]->Parse(text.data() + start, limit-start)) {      // TODO: Should we indicate what the error was?      return false;    }  }  return true;}bool RE::DoMatch(const StringPiece& text,                 Anchor anchor,                 int* consumed,                 const Arg* const args[],                 int n) const {  assert(n >= 0);  size_t const vecsize = (1 + n) * 3;  // results + PCRE workspace                                       // (as for kVecSize)  int space[21];   // use stack allocation for small vecsize (common case)  int* vec = vecsize <= 21 ? space : new int[vecsize];  bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);  if (vec != space) delete [] vec;  return retval;}bool RE::Rewrite(string *out, const StringPiece &rewrite,                 const StringPiece &text, int *vec, int veclen) const {  for (const char *s = rewrite.data(), *end = s + rewrite.size();       s < end; s++) {    int c = *s;    if (c == '\\') {      c = *++s;      if (isdigit(c)) {        int n = (c - '0');        if (n >= veclen) {          //fprintf(stderr, requested group %d in regexp %.*s\n",          //        n, rewrite.size(), rewrite.data());          return false;        }        int start = vec[2 * n];        if (start >= 0)          out->append(text.data() + start, vec[2 * n + 1] - start);      } else if (c == '\\') {        out->push_back('\\');      } else {        //fprintf(stderr, "invalid rewrite pattern: %.*s\n",        //        rewrite.size(), rewrite.data());        return false;      }    } else {      out->push_back(c);    }  }  return true;}// Return the number of capturing subpatterns, or -1 if the// regexp wasn't valid on construction.int RE::NumberOfCapturingGroups() const {  if (re_partial_ == NULL) return -1;  int result;  int pcre_retval = pcre_fullinfo(re_partial_,  // The regular expression object                                  NULL,         // We did not study the pattern                                  PCRE_INFO_CAPTURECOUNT,                                  &result);  assert(pcre_retval == 0);  return result;}/***** Parsers for various types *****/bool Arg::parse_null(const char* str, int n, void* dest) {  // We fail if somebody asked us to store into a non-NULL void* pointer  return (dest == NULL);}bool Arg::parse_string(const char* str, int n, void* dest) {  reinterpret_cast<string*>(dest)->assign(str, n);  return true;}bool Arg::parse_stringpiece(const char* str, int n, void* dest) {  reinterpret_cast<StringPiece*>(dest)->set(str, n);  return true;}bool Arg::parse_char(const char* str, int n, void* dest) {  if (n != 1) return false;  *(reinterpret_cast<char*>(dest)) = str[0];  return true;}bool Arg::parse_uchar(const char* str, int n, void* dest) {  if (n != 1) return false;  *(reinterpret_cast<unsigned char*>(dest)) = str[0];  return true;}// Largest number spec that we are willing to parsestatic const int kMaxNumberLength = 32;// REQUIRES "buf" must have length at least kMaxNumberLength+1// REQUIRES "n > 0"// Copies "str" into "buf" and null-terminates if necessary.// Returns one of://      a. "str" if no termination is needed//      b. "buf" if the string was copied and null-terminated//      c. "" if the input was invalid and has no hope of being parsedstatic const char* TerminateNumber(char* buf, const char* str, int n) {  if ((n > 0) && isspace(*str)) {    // We are less forgiving than the strtoxxx() routines and do not    // allow leading spaces.    return "";  }  // See if the character right after the input text may potentially  // look like a digit.  if (isdigit(str[n]) ||      ((str[n] >= 'a') && (str[n] <= 'f')) ||      ((str[n] >= 'A') && (str[n] <= 'F'))) {    if (n > kMaxNumberLength) return ""; // Input too big to be a valid number    memcpy(buf, str, n);    buf[n] = '\0';    return buf;  } else {    // We can parse right out of the supplied string, so return it.    return str;  }}bool Arg::parse_long_radix(const char* str,                           int n,                           void* dest,                           int radix) {  if (n == 0) return false;  char buf[kMaxNumberLength+1];  str = TerminateNumber(buf, str, n);  char* end;  errno = 0;  long r = strtol(str, &end, radix);  if (end != str + n) return false;   // Leftover junk  if (errno) return false;  *(reinterpret_cast<long*>(dest)) = r;  return true;}bool Arg::parse_ulong_radix(const char* str,                            int n,                            void* dest,                            int radix) {  if (n == 0) return false;  char buf[kMaxNumberLength+1];  str = TerminateNumber(buf, str, n);  if (str[0] == '-') return false;    // strtoul() on a negative number?!  char* end;  errno = 0;  unsigned long r = strtoul(str, &end, radix);  if (end != str + n) return false;   // Leftover junk  if (errno) return false;  *(reinterpret_cast<unsigned long*>(dest)) = r;  return true;}bool Arg::parse_short_radix(const char* str,                            int n,                            void* dest,                            int radix) {  long r;  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse  if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range  *(reinterpret_cast<short*>(dest)) = r;  return true;}bool Arg::parse_ushort_radix(const char* str,                             int n,                             void* dest,                             int radix) {  unsigned long r;  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse  if (r > USHRT_MAX) return false;                      // Out of range  *(reinterpret_cast<unsigned short*>(dest)) = r;  return true;}bool Arg::parse_int_radix(const char* str,                          int n,                          void* dest,                          int radix) {  long r;  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse  if (r < INT_MIN || r > INT_MAX) return false;         // Out of range  *(reinterpret_cast<int*>(dest)) = r;  return true;}bool Arg::parse_uint_radix(const char* str,                           int n,                           void* dest,                           int radix) {  unsigned long r;  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse  if (r > UINT_MAX) return false;                       // Out of range  *(reinterpret_cast<unsigned int*>(dest)) = r;  return true;}bool Arg::parse_longlong_radix(const char* str,                               int n,                               void* dest,                               int radix) {#ifndef HAVE_LONG_LONG  return false;#else  if (n == 0) return false;  char buf[kMaxNumberLength+1];  str = TerminateNumber(buf, str, n);  char* end;  errno = 0;#if defined HAVE_STRTOQ  long long r = strtoq(str, &end, radix);#elif defined HAVE_STRTOLL  long long r = strtoll(str, &end, radix);#else#error parse_longlong_radix: cannot convert input to a long-long#endif  if (end != str + n) return false;   // Leftover junk  if (errno) return false;  *(reinterpret_cast<long long*>(dest)) = r;  return true;#endif   /* HAVE_LONG_LONG */}bool Arg::parse_ulonglong_radix(const char* str,                                int n,                                void* dest,                                int radix) {#ifndef HAVE_UNSIGNED_LONG_LONG  return false;#else  if (n == 0) return false;  char buf[kMaxNumberLength+1];  str = TerminateNumber(buf, str, n);  if (str[0] == '-') return false;    // strtoull() on a negative number?!  char* end;  errno = 0;#if defined HAVE_STRTOQ  unsigned long long r = strtouq(str, &end, radix);#elif defined HAVE_STRTOLL  unsigned long long r = strtoull(str, &end, radix);#else#error parse_ulonglong_radix: cannot convert input to a long-long#endif  if (end != str + n) return false;   // Leftover junk  if (errno) return false;  *(reinterpret_cast<unsigned long long*>(dest)) = r;  return true;#endif   /* HAVE_UNSIGNED_LONG_LONG */}bool Arg::parse_double(const char* str, int n, void* dest) {  if (n == 0) return false;  static const int kMaxLength = 200;  char buf[kMaxLength];  if (n >= kMaxLength) return false;  memcpy(buf, str, n);  buf[n] = '\0';  errno = 0;  char* end;  double r = strtod(buf, &end);  if (end != buf + n) return false;   // Leftover junk  if (errno) return false;  *(reinterpret_cast<double*>(dest)) = r;  return true;}bool Arg::parse_float(const char* str, int n, void* dest) {  double r;  if (!parse_double(str, n, &r)) return false;  *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);  return true;}#define DEFINE_INTEGER_PARSERS(name)                                    \  bool Arg::parse_##name(const char* str, int n, void* dest) {          \    return parse_##name##_radix(str, n, dest, 10);                      \  }                                                                     \  bool Arg::parse_##name##_hex(const char* str, int n, void* dest) {    \    return parse_##name##_radix(str, n, dest, 16);                      \  }                                                                     \  bool Arg::parse_##name##_octal(const char* str, int n, void* dest) {  \    return parse_##name##_radix(str, n, dest, 8);                       \  }                                                                     \  bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \    return parse_##name##_radix(str, n, dest, 0);                       \  }DEFINE_INTEGER_PARSERS(short)      /*                                   */DEFINE_INTEGER_PARSERS(ushort)     /*                                   */DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */DEFINE_INTEGER_PARSERS(longlong)   /*                                   */DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */#undef DEFINE_INTEGER_PARSERS}   // namespace pcrecpp
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -