regexp.cpp
来自「konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版」· C++ 代码 · 共 256 行
CPP
256 行
// -*- c-basic-offset: 2 -*-/* * This file is part of the KDE libraries * Copyright (C) 1999-2001 Harri Porten (porten@kde.org) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */#include "regexp.h"#include "lexer.h"#include <stdio.h>#include <stdlib.h>#include <string.h>using namespace KJS;RegExp::RegExp(const UString &p, int f) : pat(p), flgs(f), m_notEmpty(false), valid(true){ nrSubPatterns = 0; // determined in match() with POSIX regex. // JS regexps can contain Unicode escape sequences (\uxxxx) which // are rather uncommon elsewhere. As our regexp libs don't understand // them we do the unescaping ourselves internally. UString intern; if (p.find('\\') >= 0) { bool escape = false; for (int i = 0; i < p.size(); ++i) { UChar c = p[i]; if (escape) { escape = false; // we only care about \uxxxx if (c == 'u' && i + 4 < p.size()) { int c0 = p[i+1].unicode(); int c1 = p[i+2].unicode(); int c2 = p[i+3].unicode(); int c3 = p[i+4].unicode(); if (Lexer::isHexDigit(c0) && Lexer::isHexDigit(c1) && Lexer::isHexDigit(c2) && Lexer::isHexDigit(c3)) { c = Lexer::convertUnicode(c0, c1, c2, c3); intern += UString(&c, 1); i += 4; continue; } } intern += UString('\\'); intern += UString(&c, 1); } else { if (c == '\\') escape = true; else intern += UString(&c, 1); } } } else { intern = p; }#ifdef HAVE_PCREPOSIX int pcreflags = 0; const char *perrormsg; int errorOffset; if (flgs & IgnoreCase) pcreflags |= PCRE_CASELESS; if (flgs & Multiline) pcreflags |= PCRE_MULTILINE; pcregex = pcre_compile(intern.ascii(), pcreflags, &perrormsg, &errorOffset, NULL); if (!pcregex) {#ifndef NDEBUG fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);#endif valid = false; return; }#ifdef PCRE_INFO_CAPTURECOUNT // Get number of subpatterns that will be returned int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns); if (rc != 0)#endif nrSubPatterns = 0; // fallback. We always need the first pair of offsets.#else /* HAVE_PCREPOSIX */ int regflags = 0;#ifdef REG_EXTENDED regflags |= REG_EXTENDED;#endif#ifdef REG_ICASE if ( f & IgnoreCase ) regflags |= REG_ICASE;#endif //NOTE: Multiline is not feasible with POSIX regex. //if ( f & Multiline ) // ; // Note: the Global flag is already handled by RegExpProtoFunc::execute int errorCode = regcomp(&preg, intern.ascii(), regflags); if (errorCode != 0) {#ifndef NDEBUG char errorMessage[80]; regerror(errorCode, &preg, errorMessage, sizeof errorMessage); fprintf(stderr, "KJS: regcomp failed with '%s'", errorMessage);#endif valid = false; }#endif}RegExp::~RegExp(){#ifdef HAVE_PCREPOSIX if (pcregex) pcre_free(pcregex);#else /* TODO: is this really okay after an error ? */ regfree(&preg);#endif}UString RegExp::match(const UString &s, int i, int *pos, int **ovector){ if (i < 0) i = 0; if (ovector) *ovector = 0L; int dummyPos; if (!pos) pos = &dummyPos; *pos = -1; if (i > s.size() || s.isNull()) return UString::null;#ifdef HAVE_PCREPOSIX CString buffer(s.cstring()); int bufferSize = buffer.size(); int ovecsize = (nrSubPatterns+1)*3; // see pcre docu if (ovector) *ovector = new int[ovecsize]; if (!pcregex) return UString::null; if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i, m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0, // see man pcretest ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH) { // Failed to match. if ((flgs & Global) && m_notEmpty && ovector) { // We set m_notEmpty ourselves, to look for a non-empty match // (see man pcretest or pcretest.c for details). // So we don't stop here, we want to try again at i+1.#ifdef KJS_VERBOSE fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");#endif m_notEmpty = 0; if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0, ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH) return UString::null; } else // done return UString::null; } // Got a match, proceed with it. if (!ovector) return UString::null; // don't rely on the return value if you pass ovector==0#else const uint maxMatch = 10; regmatch_t rmatch[maxMatch]; char *str = strdup(s.ascii()); // TODO: why ??? if (regexec(&preg, str + i, maxMatch, rmatch, 0)) { free(str); return UString::null; } free(str); if (!ovector) { *pos = rmatch[0].rm_so + i; return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so); } // map rmatch array to ovector used in PCRE case nrSubPatterns = 0; for (uint j = 0; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) { nrSubPatterns++; // if the nonEmpty flag is set, return a failed match if any of the // subMatches happens to be an empty string. if (m_notEmpty && rmatch[j].rm_so == rmatch[j].rm_eo) return UString::null; } // Allow an ovector slot to return the (failed) match result. if (nrSubPatterns == 0) nrSubPatterns = 1; int ovecsize = (nrSubPatterns)*3; // see above *ovector = new int[ovecsize]; for (uint j = 0; j < nrSubPatterns; j++) { (*ovector)[2*j] = rmatch[j].rm_so + i; (*ovector)[2*j+1] = rmatch[j].rm_eo + i; }#endif *pos = (*ovector)[0]; if ( *pos == (*ovector)[1] && (flgs & Global) ) { // empty match, next try will be with m_notEmpty=true m_notEmpty=true; } return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);}#if 0 // unusedbool RegExp::test(const UString &s, int){#ifdef HAVE_PCREPOSIX int ovector[300]; CString buffer(s.cstring()); if (s.isNull() || pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0, 0, ovector, 300) == PCRE_ERROR_NOMATCH) return false; else return true;#else char *str = strdup(s.ascii()); int r = regexec(&preg, str, 0, 0, 0); free(str); return r == 0;#endif}#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?