📄 regularexp.c
字号:
* I~ B~ N~ B~ (...)~ C~ T~x T~m K~ K~ N~ * \_____\__\_| \ \__4__| | * 10 \ \ \_7_________| * \ \_________6_____________| * \_______5_________________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [1], Num_Braces); /* 2,7 */ tail (ret_val, next); /* 2 */ next = emit_special (TEST_COUNT, min_max [0], Num_Braces); /* 4 */ tail (emit_node (BACK), ret_val); /* 3 */ tail (next, emit_node (BACK)); /* 4 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 6,8 */ (void) insert (NOTHING, ret_val, 0UL, 0UL, 0); /* 5 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 8,9 */ next = emit_node (NOTHING); /* 5,6,7 */ offset_tail (ret_val, NODE_SIZE, next); /* 5 */ offset_tail (ret_val, 2 * NODE_SIZE, next); /* 6 */ offset_tail (ret_val, 3 * NODE_SIZE, next); /* 7 */ tail (ret_val, ret_val + (2 * NODE_SIZE)); /* 8 */ offset_tail (next, -NODE_SIZE, ret_val); /* 9 */ insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 10 */ tail (ret_val, ret_val + INDEX_SIZE + (4 * NODE_SIZE)); /* 10 */ } Num_Braces++; } else if (op_code == '{') { if (min_max [0] == REG_ZERO && min_max [1] != REG_INFINITY) { /* Node structure for (x){0,n} or (x){,n} construct. * * ___3____________ * | 1_ _2 \ 5_ * | / |/ | \ / | * I~ B~ (...)~ C~ T~x K~ B~ N~ * \_|\ \_6___|__| * 7 \________4________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [1], Num_Braces); /* 2,6 */ tail (ret_val, next); /* 2 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 3,4,7 */ tail (emit_node (BACK), ret_val); /* 3 */ next = emit_node (BRANCH); /* 4,5 */ tail (ret_val, next); /* 4 */ tail (next, emit_node (NOTHING)); /* 5,6 */ offset_tail (ret_val, NODE_SIZE, next); /* 6 */ next = insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 7 */ tail (ret_val, next); /* 7 */ } else if (min_max [0] > REG_ZERO && min_max [1] == REG_INFINITY) { /* Node structure for (x){m,} construct. * __________4________ * | __3__________ \ * _|___| 1_ _2 \ \ _7 * / | 8 | / |/ | \ \ / | * I~ B~ (...)~ C~ T~m K~ K~ B~ N~ * \ \_5___| | * \__________6__________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [0], Num_Braces); /* 2 */ tail (ret_val, next); /* 2 */ tail (emit_node (BACK), ret_val); /* 3 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 4,6 */ next = emit_node (BACK); /* 4 */ tail (next, ret_val); /* 4 */ offset_tail (ret_val, NODE_SIZE, next); /* 5 */ tail (ret_val, emit_node (BRANCH)); /* 6 */ tail (ret_val, emit_node (NOTHING)); /* 7 */ insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 8 */ tail (ret_val, ret_val + INDEX_SIZE + (2 * NODE_SIZE)); /* 8 */ } else { /* Node structure for (x){m,n} construct. * _____6________________ * | _____________3___ \ * 9_|__| 1_ _2 \ \ _8 * / | | / |/ | \ \ / | * I~ B~ (...)~ C~ T~x T~m K~ K~ B~ N~ * \ \ \__4__| | | * \ \_7_________|__| * \_________5_____________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [1], Num_Braces); /* 2,4 */ tail (ret_val, next); /* 2 */ next = emit_special (TEST_COUNT, min_max [0], Num_Braces); /* 4 */ tail (emit_node (BACK), ret_val); /* 3 */ tail (next, emit_node (BACK)); /* 4 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 5,6 */ next = emit_node (BRANCH); /* 5,8 */ tail (ret_val, next); /* 5 */ offset_tail (next, -NODE_SIZE, ret_val); /* 6 */ next = emit_node (NOTHING); /* 7,8 */ offset_tail (ret_val, NODE_SIZE, next); /* 7 */ offset_tail (next, -NODE_SIZE, next); /* 8 */ (void) insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 9 */ tail (ret_val, ret_val + INDEX_SIZE + (2 * NODE_SIZE)); /* 9 */ } Num_Braces++; } else { /* We get here if the IS_QUANTIFIER macro is not coordinated properly with this function. */ REG_FAIL ("internal error #2, `piece\'"); } if (IS_QUANTIFIER (*Reg_Parse)) { if (op_code == '{') { sprintf (Error_Text, "nested quantifiers, {m,n}%c", *Reg_Parse); } else { sprintf (Error_Text, "nested quantifiers, %c%c", op_code, *Reg_Parse); } REG_FAIL (Error_Text); } return (ret_val);}/*----------------------------------------------------------------------* * atom * * Process one regex item at the lowest level * * OPTIMIZATION: Lumps a continuous sequence of ordinary characters * together so that it can turn them into a single EXACTLY node, which * is smaller to store and faster to run. *----------------------------------------------------------------------*/static unsigned char * atom (int *flag_param, len_range *range_param) { register unsigned char *ret_val; unsigned char test; int flags_local; len_range range_local; *flag_param = WORST; /* Tentatively. */ range_param->lower = 0; /* Idem */ range_param->upper = 0; /* Process any regex comments, e.g. `(?# match next token->)'. The terminating right parenthesis can not be escaped. The comment stops at the first right parenthesis encountered (or the end of the regex string)... period. Handles multiple sequential comments, e.g. `(?# one)(?# two)...' */ while (*Reg_Parse == '(' && *(Reg_Parse + 1) == '?' && *(Reg_Parse + 2) == '#') { Reg_Parse += 3; while (*Reg_Parse != ')' && *Reg_Parse != '\0') { Reg_Parse++; } if (*Reg_Parse == ')') { Reg_Parse++; } if (*Reg_Parse == ')' || *Reg_Parse == '|' || *Reg_Parse == '\0') { /* Hit end of regex string or end of parenthesized regex; have to return "something" (i.e. a NOTHING node) to avoid generating an error. */ ret_val = emit_node (NOTHING); return (ret_val); } } switch (*Reg_Parse++) { case '^': ret_val = emit_node (BOL); break; case '$': ret_val = emit_node (EOL); break; case '<': ret_val = emit_node (BOWORD); break; case '>': ret_val = emit_node (EOWORD); break; case '.': if (Match_Newline) { ret_val = emit_node (EVERY); } else { ret_val = emit_node (ANY); } *flag_param |= (HAS_WIDTH | SIMPLE); range_param->lower = 1; range_param->upper = 1; break; case '(': if (*Reg_Parse == '?') { /* Special parenthetical expression */ Reg_Parse++; range_local.lower = 0; /* Make sure it is always used */ range_local.upper = 0; if (*Reg_Parse == ':') { Reg_Parse++; ret_val = chunk (NO_CAPTURE, &flags_local, &range_local); } else if (*Reg_Parse == '=') { Reg_Parse++; ret_val = chunk (POS_AHEAD_OPEN, &flags_local, &range_local); } else if (*Reg_Parse == '!') { Reg_Parse++; ret_val = chunk (NEG_AHEAD_OPEN, &flags_local, &range_local); } else if (*Reg_Parse == 'i') { Reg_Parse++; ret_val = chunk (INSENSITIVE, &flags_local, &range_local); } else if (*Reg_Parse == 'I') { Reg_Parse++; ret_val = chunk (SENSITIVE, &flags_local, &range_local); } else if (*Reg_Parse == 'n') { Reg_Parse++; ret_val = chunk (NEWLINE, &flags_local, &range_local); } else if (*Reg_Parse == 'N') { Reg_Parse++; ret_val = chunk (NO_NEWLINE, &flags_local, &range_local); } else if (*Reg_Parse == '<') { Reg_Parse++; if (*Reg_Parse == '=') { Reg_Parse++; ret_val = chunk (POS_BEHIND_OPEN, &flags_local, &range_local); } else if (*Reg_Parse == '!') { Reg_Parse++; ret_val = chunk (NEG_BEHIND_OPEN, &flags_local, &range_local); } else { sprintf (Error_Text, "invalid look-behind syntax, \"(?<%c...)\"", *Reg_Parse); REG_FAIL (Error_Text); } } else { sprintf (Error_Text, "invalid grouping syntax, \"(?%c...)\"", *Reg_Parse); REG_FAIL (Error_Text); } } else { /* Normal capturing parentheses */ ret_val = chunk (PAREN, &flags_local, &range_local); } if (ret_val == NULL) return (NULL); /* Something went wrong. */ /* Add HAS_WIDTH flag if it was set by call to chunk. */ *flag_param |= flags_local & HAS_WIDTH; *range_param = range_local; break; case '\0': case '|': case ')': REG_FAIL ("internal error #3, `atom\'"); /* Supposed to be */ /* caught earlier. */ case '?': case '+': case '*': sprintf (Error_Text, "%c follows nothing", *(Reg_Parse - 1)); REG_FAIL (Error_Text); case '{': if (Enable_Counting_Quantifier) { REG_FAIL ("{m,n} follows nothing"); } else { ret_val = emit_node (EXACTLY); /* Treat braces as literals. */ emit_byte ('{'); emit_byte ('\0'); range_param->lower = 1; range_param->upper = 1; } break; case '[': { register unsigned int second_value; register unsigned int last_value; unsigned char
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -