📄 pcretest.c
字号:
/************************************************** PCRE testing program **************************************************/#include <ctype.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <time.h>#include <locale.h>/* Use the internal info for displaying the results of pcre_study(). */#include "internal.h"/* It is possible to compile this test program without including support fortesting the POSIX interface, though this is not available via the standardMakefile. */#if !defined NOPOSIX#include "pcreposix.h"#endif#ifndef CLOCKS_PER_SEC#ifdef CLK_TCK#define CLOCKS_PER_SEC CLK_TCK#else#define CLOCKS_PER_SEC 100#endif#endif#define LOOPREPEAT 20000static FILE *outfile;static int log_store = 0;static size_t gotten_store;static int utf8_table1[] = { 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};static int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};/************************************************** Convert character value to UTF-8 **************************************************//* This function takes an integer value in the range 0 - 0x7fffffffand encodes it as a UTF-8 character in 0 to 6 bytes.Arguments: cvalue the character value buffer pointer to buffer for result - at least 6 bytes longReturns: number of characters placed in the buffer -1 if input character is negative 0 if input character is positive but too big (only when int is longer than 32 bits)*/static intord2utf8(int cvalue, unsigned char *buffer){register int i, j;for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) if (cvalue <= utf8_table1[i]) break;if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;if (cvalue < 0) return -1;buffer += i;for (j = i; j > 0; j--) { *buffer-- = 0x80 | (cvalue & 0x3f); cvalue >>= 6; }*buffer = utf8_table2[i] | cvalue;return i + 1;}/************************************************** Convert UTF-8 string to value **************************************************//* This function takes one or more bytes that represents a UTF-8 character,and returns the value of the character.Argument: buffer a pointer to the byte vector vptr a pointer to an int to receive the valueReturns: > 0 => the number of bytes consumed -6 to 0 => malformed UTF-8 character at offset = (-return)*/intutf82ord(unsigned char *buffer, int *vptr){int c = *buffer++;int d = c;int i, j, s;for (i = -1; i < 6; i++) /* i is number of additional bytes */ { if ((d & 0x80) == 0) break; d <<= 1; }if (i == -1) { *vptr = c; return 1; } /* ascii character */if (i == 0 || i == 6) return 0; /* invalid UTF-8 *//* i now has a value in the range 1-5 */s = 6*i;d = (c & utf8_table3[i]) << s;for (j = 0; j < i; j++) { c = *buffer++; if ((c & 0xc0) != 0x80) return -(j+1); s -= 6; d |= (c & 0x3f) << s; }/* Check that encoding was the correct unique one */for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++) if (d <= utf8_table1[j]) break;if (j != i) return -(i+1);/* Valid value */*vptr = d;return i+1;}/* Debugging function to print the internal form of the regex. This is the samecode as contained in pcre.c under the DEBUG macro. */static const char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z", "Opt", "^", "$", "Any", "chars", "not", "*", "*?", "+", "+?", "?", "??", "{", "{", "{", "*", "*?", "+", "+?", "?", "??", "{", "{", "{", "*", "*?", "+", "+?", "?", "??", "{", "{", "{", "*", "*?", "+", "+?", "?", "??", "{", "{", "class", "Ref", "Recurse", "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", "Brazero", "Braminzero", "Branumber", "Bra"};static void print_internals(pcre *re){unsigned char *code = ((real_pcre *)re)->code;fprintf(outfile, "------------------------------------------------------------------\n");for(;;) { int c; int charlength; fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code)); if (*code >= OP_BRA) { if (*code - OP_BRA > EXTRACT_BASIC_MAX) fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]); else fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); code += 2; } else switch(*code) { case OP_END: fprintf(outfile, " %s\n", OP_names[*code]); fprintf(outfile, "------------------------------------------------------------------\n"); return; case OP_OPT: fprintf(outfile, " %.2x %s", code[1], OP_names[*code]); code++; break; case OP_CHARS: charlength = *(++code); fprintf(outfile, "%3d ", charlength); while (charlength-- > 0) if (isprint(c = *(++code))) fprintf(outfile, "%c", c); else fprintf(outfile, "\\x%02x", c); break; case OP_KETRMAX: case OP_KETRMIN: case OP_ALT: case OP_KET: case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: case OP_COND: case OP_BRANUMBER: case OP_REVERSE: case OP_CREF: fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); code += 2; break; case OP_STAR: case OP_MINSTAR: case OP_PLUS: case OP_MINPLUS: case OP_QUERY: case OP_MINQUERY: case OP_TYPESTAR: case OP_TYPEMINSTAR: case OP_TYPEPLUS: case OP_TYPEMINPLUS: case OP_TYPEQUERY: case OP_TYPEMINQUERY: if (*code >= OP_TYPESTAR) fprintf(outfile, " %s", OP_names[code[1]]); else if (isprint(c = code[1])) fprintf(outfile, " %c", c); else fprintf(outfile, " \\x%02x", c); fprintf(outfile, "%s", OP_names[*code++]); break; case OP_EXACT: case OP_UPTO: case OP_MINUPTO: if (isprint(c = code[3])) fprintf(outfile, " %c{", c); else fprintf(outfile, " \\x%02x{", c); if (*code != OP_EXACT) fprintf(outfile, ","); fprintf(outfile, "%d}", (code[1] << 8) + code[2]); if (*code == OP_MINUPTO) fprintf(outfile, "?"); code += 3; break; case OP_TYPEEXACT: case OP_TYPEUPTO: case OP_TYPEMINUPTO: fprintf(outfile, " %s{", OP_names[code[3]]); if (*code != OP_TYPEEXACT) fprintf(outfile, "0,"); fprintf(outfile, "%d}", (code[1] << 8) + code[2]); if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?"); code += 3; break; case OP_NOT: if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c); else fprintf(outfile, " [^\\x%02x]", c); break; case OP_NOTSTAR: case OP_NOTMINSTAR: case OP_NOTPLUS: case OP_NOTMINPLUS: case OP_NOTQUERY: case OP_NOTMINQUERY: if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c); else fprintf(outfile, " [^\\x%02x]", c); fprintf(outfile, "%s", OP_names[*code++]); break; case OP_NOTEXACT: case OP_NOTUPTO: case OP_NOTMINUPTO: if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c); else fprintf(outfile, " [^\\x%02x]{", c); if (*code != OP_NOTEXACT) fprintf(outfile, ","); fprintf(outfile, "%d}", (code[1] << 8) + code[2]); if (*code == OP_NOTMINUPTO) fprintf(outfile, "?"); code += 3; break; case OP_REF: fprintf(outfile, " \\%d", (code[1] << 8) | code[2]); code += 3; goto CLASS_REF_REPEAT; case OP_CLASS: { int i, min, max; code++; fprintf(outfile, " ["); for (i = 0; i < 256; i++) { if ((code[i/8] & (1 << (i&7))) != 0) { int j; for (j = i+1; j < 256; j++) if ((code[j/8] & (1 << (j&7))) == 0) break; if (i == '-' || i == ']') fprintf(outfile, "\\"); if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i); if (--j > i) { fprintf(outfile, "-"); if (j == '-' || j == ']') fprintf(outfile, "\\"); if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j); } i = j; } } fprintf(outfile, "]"); code += 32; CLASS_REF_REPEAT: switch(*code) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRPLUS: case OP_CRMINPLUS: case OP_CRQUERY: case OP_CRMINQUERY: fprintf(outfile, "%s", OP_names[*code]); break; case OP_CRRANGE: case OP_CRMINRANGE: min = (code[1] << 8) + code[2]; max = (code[3] << 8) + code[4]; if (max == 0) fprintf(outfile, "{%d,}", min); else fprintf(outfile, "{%d,%d}", min, max); if (*code == OP_CRMINRANGE) fprintf(outfile, "?"); code += 4; break; default: code--; } } break; /* Anything else is just a one-node item */ default: fprintf(outfile, " %s", OP_names[*code]); break; } code++; fprintf(outfile, "\n"); }}/* Character string printing function. A "normal" and a UTF-8 version. */static void pchars(unsigned char *p, int length, int utf8){int c;while (length-- > 0) { if (utf8) { int rc = utf82ord(p, &c); if (rc > 0) { length -= rc - 1; p += rc; if (c < 256 && isprint(c)) fprintf(outfile, "%c", c); else fprintf(outfile, "\\x{%02x}", c); continue; } } /* Not UTF-8, or malformed UTF-8 */ if (isprint(c = *(p++))) fprintf(outfile, "%c", c); else fprintf(outfile, "\\x%02x", c); }}/* Alternative malloc function, to test functionality and show the size of thecompiled re. */static void *new_malloc(size_t size){gotten_store = size;if (log_store) fprintf(outfile, "Memory allocation (code space): %d\n", (int)((int)size - offsetof(real_pcre, code[0])));return malloc(size);}/* Get one piece of information from the pcre_fullinfo() function */static void new_info(pcre *re, pcre_extra *study, int option, void *ptr){int rc;if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -