📄 pcrecpp_unittest.cc
字号:
// Get one string that passes (sometimes), one that never does. string text_good("abcdefghijk"); string text_bad("acdefghijkl"); // According to pcretest, matching text_good against (\w+)*b // requires match_limit of at least 8192, and match_recursion_limit // of at least 37. RE_Options options_ml; options_ml.set_match_limit(8192); RE re("(\\w+)*b", options_ml); CHECK(re.PartialMatch(text_good) == true); CHECK(re.PartialMatch(text_bad) == false); CHECK(re.FullMatch(text_good) == false); CHECK(re.FullMatch(text_bad) == false); options_ml.set_match_limit(1024); RE re2("(\\w+)*b", options_ml); CHECK(re2.PartialMatch(text_good) == false); // because of match_limit CHECK(re2.PartialMatch(text_bad) == false); CHECK(re2.FullMatch(text_good) == false); CHECK(re2.FullMatch(text_bad) == false); RE_Options options_mlr; options_mlr.set_match_limit_recursion(50); RE re3("(\\w+)*b", options_mlr); CHECK(re3.PartialMatch(text_good) == true); CHECK(re3.PartialMatch(text_bad) == false); CHECK(re3.FullMatch(text_good) == false); CHECK(re3.FullMatch(text_bad) == false); options_mlr.set_match_limit_recursion(10); RE re4("(\\w+)*b", options_mlr); CHECK(re4.PartialMatch(text_good) == false); CHECK(re4.PartialMatch(text_bad) == false); CHECK(re4.FullMatch(text_good) == false); CHECK(re4.FullMatch(text_bad) == false);}// A meta-quoted string, interpreted as a pattern, should always match// the original unquoted string.static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { string quoted = RE::QuoteMeta(unquoted); RE re(quoted, options); CHECK(re.FullMatch(unquoted));}// A string containing meaningful regexp characters, which is then meta-// quoted, should not generally match a string the unquoted string does.static void NegativeTestQuoteMeta(string unquoted, string should_not_match, RE_Options options = RE_Options()) { string quoted = RE::QuoteMeta(unquoted); RE re(quoted, options); CHECK(!re.FullMatch(should_not_match));}// Tests that quoted meta characters match their original strings,// and that a few things that shouldn't match indeed do not.static void TestQuotaMetaSimple() { TestQuoteMeta("foo"); TestQuoteMeta("foo.bar"); TestQuoteMeta("foo\\.bar"); TestQuoteMeta("[1-9]"); TestQuoteMeta("1.5-2.0?"); TestQuoteMeta("\\d"); TestQuoteMeta("Who doesn't like ice cream?"); TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); TestQuoteMeta("((?!)xxx).*yyy"); TestQuoteMeta("([");}static void TestQuoteMetaSimpleNegative() { NegativeTestQuoteMeta("foo", "bar"); NegativeTestQuoteMeta("...", "bar"); NegativeTestQuoteMeta("\\.", "."); NegativeTestQuoteMeta("\\.", ".."); NegativeTestQuoteMeta("(a)", "a"); NegativeTestQuoteMeta("(a|b)", "a"); NegativeTestQuoteMeta("(a|b)", "(a)"); NegativeTestQuoteMeta("(a|b)", "a|b"); NegativeTestQuoteMeta("[0-9]", "0"); NegativeTestQuoteMeta("[0-9]", "0-9"); NegativeTestQuoteMeta("[0-9]", "[9]"); NegativeTestQuoteMeta("((?!)xxx)", "xxx");}static void TestQuoteMetaLatin1() { TestQuoteMeta("3\xb2 = 9");}static void TestQuoteMetaUtf8() {#ifdef SUPPORT_UTF8 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) "27\\\xc2\\\xb0", pcrecpp::UTF8());#endif}static void TestQuoteMetaAll() { printf("Testing QuoteMeta\n"); TestQuotaMetaSimple(); TestQuoteMetaSimpleNegative(); TestQuoteMetaLatin1(); TestQuoteMetaUtf8();}//// Options tests contributed by// Giuseppe Maxia, CTO, Stardata s.r.l.// July 2005//static void GetOneOptionResult( const char *option_name, const char *regex, const char *str, RE_Options options, bool full, string expected) { printf("Testing Option <%s>\n", option_name); if(VERBOSE_TEST) printf("/%s/ finds \"%s\" within \"%s\" \n", regex, expected.c_str(), str); string captured(""); if (full) RE(regex,options).FullMatch(str, &captured); else RE(regex,options).PartialMatch(str, &captured); CHECK_EQ(captured, expected);}static void TestOneOption( const char *option_name, const char *regex, const char *str, RE_Options options, bool full, bool assertive = true) { printf("Testing Option <%s>\n", option_name); if (VERBOSE_TEST) printf("'%s' %s /%s/ \n", str, (assertive? "matches" : "doesn't match"), regex); if (assertive) { if (full) CHECK(RE(regex,options).FullMatch(str)); else CHECK(RE(regex,options).PartialMatch(str)); } else { if (full) CHECK(!RE(regex,options).FullMatch(str)); else CHECK(!RE(regex,options).PartialMatch(str)); }}static void Test_CASELESS() { RE_Options options; RE_Options options2; options.set_caseless(true); TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); options.set_caseless(false); TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);}static void Test_MULTILINE() { RE_Options options; RE_Options options2; const char *str = "HELLO\n" "cruel\n" "world\n"; options.set_multiline(true); TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); options.set_multiline(false); TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);}static void Test_DOTALL() { RE_Options options; RE_Options options2; const char *str = "HELLO\n" "cruel\n" "world"; options.set_dotall(true); TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); options.set_dotall(false); TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);}static void Test_DOLLAR_ENDONLY() { RE_Options options; RE_Options options2; const char *str = "HELLO world\n"; TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); options.set_dollar_endonly(true); TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);}static void Test_EXTRA() { RE_Options options; const char *str = "HELLO"; options.set_extra(true); TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); options.set_extra(false); TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );}static void Test_EXTENDED() { RE_Options options; RE_Options options2; const char *str = "HELLO world"; options.set_extended(true); TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); TestOneOption("EXTENDED (class)", "^ HE L{2} O " "\\s+ " "\\w+ $ ", str, options, false); TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); TestOneOption("EXTENDED (function)", "^ HE L{2} O " "\\s+ " "\\w+ $ ", str, pcrecpp::EXTENDED(), false); options.set_extended(false); TestOneOption("no EXTENDED", "HELLO world", str, options, false);}static void Test_NO_AUTO_CAPTURE() { RE_Options options; const char *str = "HELLO world"; string captured; printf("Testing Option <no NO_AUTO_CAPTURE>\n"); if (VERBOSE_TEST) printf("parentheses capture text\n"); RE re("(world|universe)$", options); CHECK(re.Extract("\\1", str , &captured)); CHECK_EQ(captured, "world"); options.set_no_auto_capture(true); printf("testing Option <NO_AUTO_CAPTURE>\n"); if (VERBOSE_TEST) printf("parentheses do not capture text\n"); re.Extract("\\1",str, &captured ); CHECK_EQ(captured, "world");}static void Test_UNGREEDY() { RE_Options options; const char *str = "HELLO, 'this' is the 'world'"; options.set_ungreedy(true); GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); options.set_ungreedy(false); GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );}static void Test_all_options() { const char *str = "HELLO\n" "cruel\n" "world"; RE_Options options; options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); options.set_all_options(0); TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", " ^ c r u e l $ ", str, RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), false); TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", " ^ c r u e l $ ", str, RE_Options() .set_multiline(true) .set_extended(true), false); options.set_all_options(0); TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);}static void TestOptions() { printf("Testing Options\n"); Test_CASELESS(); Test_MULTILINE(); Test_DOTALL(); Test_DOLLAR_ENDONLY(); Test_EXTENDED(); Test_NO_AUTO_CAPTURE(); Test_UNGREEDY(); Test_EXTRA(); Test_all_options();}static void TestConstructors() { printf("Testing constructors\n"); RE_Options options; options.set_dotall(true); const char *str = "HELLO\n" "cruel\n" "world"; RE orig("HELLO.*world", options); CHECK(orig.FullMatch(str)); RE copy1(orig); CHECK(copy1.FullMatch(str)); RE copy2("not a match"); CHECK(!copy2.FullMatch(str)); copy2 = copy1; CHECK(copy2.FullMatch(str)); copy2 = orig; CHECK(copy2.FullMatch(str)); // Make sure when we assign to ourselves, nothing bad happens orig = orig; copy1 = copy1; copy2 = copy2; CHECK(orig.FullMatch(str)); CHECK(copy1.FullMatch(str)); CHECK(copy2.FullMatch(str));}int main(int argc, char** argv) { // Treat any flag as --help if (argc > 1 && argv[1][0] == '-') { printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" " If 'timingX ###' is specified, run the given timing test\n" " with the given number of iterations, rather than running\n" " the default corectness test.\n", argv[0]); return 0; } if (argc > 1) { if ( argc == 2 || atoi(argv[2]) == 0) { printf("timing mode needs a num-iters argument\n"); return 1; } if (!strcmp(argv[1], "timing1")) Timing1(atoi(argv[2])); else if (!strcmp(argv[1], "timing2")) Timing2(atoi(argv[2])); else if (!strcmp(argv[1], "timing3")) Timing3(atoi(argv[2])); else printf("Unknown argument '%s'\n", argv[1]); return 0; } printf("Testing FullMatch\n"); int i; string s; /***** FullMatch with no args *****/ CHECK(RE("h.*o").FullMatch("hello")); CHECK(!RE("h.*o").FullMatch("othello")); CHECK(!RE("h.*o").FullMatch("hello!")); /***** FullMatch with args *****/ // Zero-arg CHECK(RE("\\d+").FullMatch("1001")); // Single-arg CHECK(RE("(\\d+)").FullMatch("1001", &i)); CHECK_EQ(i, 1001); CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -