📄 regularexp.c
字号:
min_max [0], *Reg_Parse); } else { sprintf (Error_Text, "max operand of {%lu,%lu%c} > 65535", min_max [0], min_max [1], *Reg_Parse); } REG_FAIL (Error_Text); } } if (!comma_present && *Reg_Parse == ',') { comma_present++; Reg_Parse++; } } /* A max of zero can not be specified directly in the regex since it would signal a max of infinity. This code specifically disallows `{0,0}', `{,0}', and `{0}' which really means nothing to humans but would be interpreted as `{0,infinity}' or `*' if we didn't make this check. */ if (digit_present [0] && (min_max [0] == REG_ZERO) && !comma_present) { REG_FAIL ("{0} is an invalid range"); } else if (digit_present [0] && (min_max [0] == REG_ZERO) && digit_present [1] && (min_max [1] == REG_ZERO)) { REG_FAIL ("{0,0} is an invalid range"); } else if (digit_present [1] && (min_max [1] == REG_ZERO)) { if (digit_present [0]) { sprintf (Error_Text, "{%lu,0} is an invalid range", min_max [0]); REG_FAIL (Error_Text); } else { REG_FAIL ("{,0} is an invalid range"); } } if (!comma_present) min_max [1] = min_max [0]; /* {x} means {x,x} */ if (*Reg_Parse != '}') { REG_FAIL ("{m,n} specification missing right \'}\'"); } else if (min_max [1] != REG_INFINITY && min_max [0] > min_max [1]) { /* Disallow a backward range. */ sprintf (Error_Text, "{%lu,%lu} is an invalid range", min_max [0], min_max [1]); REG_FAIL (Error_Text); } } Reg_Parse++; /* Check for a minimal matching (non-greedy or "lazy") specification. */ if (*Reg_Parse == '?') { lazy = 1; Reg_Parse++; } /* Avoid overhead of counting if possible */ if (op_code == '{') { if (min_max [0] == REG_ZERO && min_max [1] == REG_INFINITY) { op_code = '*'; } else if (min_max [0] == REG_ONE && min_max [1] == REG_INFINITY) { op_code = '+'; } else if (min_max [0] == REG_ZERO && min_max [1] == REG_ONE) { op_code = '?'; } else if (min_max [0] == REG_ONE && min_max [1] == REG_ONE) { /* "x{1,1}" is the same as "x". No need to pollute the compiled regex with such nonsense. */ *flag_param = flags_local; *range_param = range_local; return (ret_val); } else if (Num_Braces > (int)UCHAR_MAX) { sprintf (Error_Text, "number of {m,n} constructs > %d", UCHAR_MAX); REG_FAIL (Error_Text); } } if (op_code == '+') min_max [0] = REG_ONE; if (op_code == '?') min_max [1] = REG_ONE; /* It is dangerous to apply certain quantifiers to a possibly zero width item. */ if (!(flags_local & HAS_WIDTH)) { if (brace_present) { sprintf (Error_Text, "{%lu,%lu} operand could be empty", min_max [0], min_max [1]); } else { sprintf (Error_Text, "%c operand could be empty", op_code); } REG_FAIL (Error_Text); } *flag_param = (min_max [0] > REG_ZERO) ? (WORST | HAS_WIDTH) : WORST; if (range_local.lower >= 0) { if (min_max[1] != REG_INFINITY) { range_param->lower = range_local.lower * min_max[0]; range_param->upper = range_local.upper * min_max[1]; } else { range_param->lower = -1; /* Not a fixed-size length */ range_param->upper = -1; } } else { range_param->lower = -1; /* Not a fixed-size length */ range_param->upper = -1; } /*---------------------------------------------------------------------* * Symbol Legend For Node Structure Diagrams *---------------------------------------------------------------------* * (...) = general grouped thing * B = (B)ranch, K = bac(K), N = (N)othing * I = (I)nitialize count, C = Increment (C)ount * T~m = (T)est against mini(m)um- go to NEXT pointer if >= operand * T~x = (T)est against ma(x)imum- go to NEXT pointer if >= operand * '~' = NEXT pointer, \___| = forward pointer, |___/ = Backward pointer *---------------------------------------------------------------------*/ if (op_code == '*' && (flags_local & SIMPLE)) { insert ((lazy ? LAZY_STAR : STAR), ret_val, 0UL, 0UL, 0); } else if (op_code == '+' && (flags_local & SIMPLE)) { insert (lazy ? LAZY_PLUS : PLUS, ret_val, 0UL, 0UL, 0); } else if (op_code == '?' && (flags_local & SIMPLE)) { insert (lazy ? LAZY_QUESTION : QUESTION, ret_val, 0UL, 0UL, 0); } else if (op_code == '{' && (flags_local & SIMPLE)) { insert (lazy ? LAZY_BRACE : BRACE, ret_val, min_max [0], min_max [1], 0); } else if ((op_code == '*' || op_code == '+') && lazy) { /* Node structure for (x)*? Node structure for (x)+? construct. * construct. (Same as (x)*? except for initial * forward jump into parenthesis.) * * ___6____ * _______5_______ /________|______ * | _4__ 1_\ /| ____ | _\ * |/ | / |\ / |/ | | / |\ * B~ N~ B~ (...)~ K~ N~ N~ B~ N~ B~ (...)~ K~ N~ * \ \___2_______| \ \___________| * \_____3_______| \_____________| * */ tail (ret_val, emit_node (BACK)); /* 1 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 2,4 */ (void) insert (NOTHING, ret_val, 0UL, 0UL, 0); /* 3 */ next = emit_node (NOTHING); /* 2,3 */ offset_tail (ret_val, NODE_SIZE, next); /* 2 */ tail (ret_val, next); /* 3 */ insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 4,5 */ tail (ret_val, ret_val + (2 * NODE_SIZE)); /* 4 */ offset_tail (ret_val, 3 * NODE_SIZE, ret_val); /* 5 */ if (op_code == '+') { insert (NOTHING, ret_val, 0UL, 0UL, 0); /* 6 */ tail (ret_val, ret_val + (4 * NODE_SIZE)); /* 6 */ } } else if (op_code == '*') { /* Node structure for (x)* construct. * ____1_____ * | \ * B~ (...)~ K~ B~ N~ * \ \_|2 |\_| * \__3_______| 4 */ insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 1,3 */ offset_tail (ret_val, NODE_SIZE, emit_node (BACK)); /* 2 */ offset_tail (ret_val, NODE_SIZE, ret_val); /* 1 */ tail (ret_val, emit_node (BRANCH)); /* 3 */ tail (ret_val, emit_node (NOTHING)); /* 4 */ } else if (op_code == '+') { /* Node structure for (x)+ construct. * * ____2_____ * | \ * (...)~ B~ K~ B~ N~ * \_|\____|\_| * 1 3 4 */ next = emit_node (BRANCH); /* 1 */ tail (ret_val, next); /* 1 */ tail (emit_node (BACK), ret_val); /* 2 */ tail (next, emit_node (BRANCH)); /* 3 */ tail (ret_val, emit_node (NOTHING)); /* 4 */ } else if (op_code == '?' && lazy) { /* Node structure for (x)?? construct. * _4__ 1_ * / | / | * B~ N~ B~ (...)~ N~ * \ \___2____| * \_____3____| */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 2,4 */ (void) insert (NOTHING, ret_val, 0UL, 0UL, 0); /* 3 */ next = emit_node (NOTHING); /* 1,2,3 */ offset_tail (ret_val, 2 * NODE_SIZE, next); /* 1 */ offset_tail (ret_val, NODE_SIZE, next); /* 2 */ tail (ret_val, next); /* 3 */ insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 4 */ tail (ret_val, (ret_val + (2 * NODE_SIZE))); /* 4 */ } else if (op_code == '?') { /* Node structure for (x)? construct. * ___1____ _2 * / |/ | * B~ (...)~ B~ N~ * \__3_| */ insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 1 */ tail (ret_val, emit_node (BRANCH)); /* 1 */ next = emit_node (NOTHING); /* 2,3 */ tail (ret_val, next); /* 2 */ offset_tail (ret_val, NODE_SIZE, next); /* 3 */ } else if (op_code == '{' && min_max [0] == min_max [1]) { /* Node structure for (x){m}, (x){m}?, (x){m,m}, or (x){m,m}? constructs. * Note that minimal and maximal matching mean the same thing when we * specify the minimum and maximum to be the same value. * _______3_____ * | 1_ _2 \ * | / |/ | \ * I~ (...)~ C~ T~m K~ N~ * \_| \_____| * 5 4 */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ tail (ret_val, emit_special (TEST_COUNT, min_max [0], Num_Braces));/* 2 */ tail (emit_node (BACK), ret_val); /* 3 */ tail (ret_val, emit_node (NOTHING)); /* 4 */ next = insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 5 */ tail (ret_val, next); /* 5 */ Num_Braces++; } else if (op_code == '{' && lazy) { if (min_max [0] == REG_ZERO && min_max [1] != REG_INFINITY) { /* Node structure for (x){0,n}? or {,n}? construct. * _________3____________ * 8_| _4__ 1_ _2 \ * / |/ | / |/ | \ * I~ B~ N~ B~ (...)~ C~ T~x K~ N~ * \ \ \__7__| * \ \_________6_______| * \______5____________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [0], Num_Braces); /* 2,7 */ tail (ret_val, next); /* 2 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, Num_Braces); /* 4,6 */ (void) insert (NOTHING, ret_val, 0UL, 0UL, Num_Braces); /* 5 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, Num_Braces); /* 3,4,8 */ tail (emit_node (BACK), ret_val); /* 3 */ tail (ret_val, ret_val + (2 * NODE_SIZE)); /* 4 */ next = emit_node (NOTHING); /* 5,6,7 */ offset_tail (ret_val, NODE_SIZE, next); /* 5 */ offset_tail (ret_val, 2 * NODE_SIZE, next); /* 6 */ offset_tail (ret_val, 3 * NODE_SIZE, next); /* 7 */ next = insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 8 */ tail (ret_val, next); /* 8 */ } else if (min_max [0] > REG_ZERO && min_max [1] == REG_INFINITY) { /* Node structure for (x){m,}? construct. * ______8_________________ * | _______3_____ \ * | _7__ | 1_ _2 \ \ * |/ | | / |/ | \ \ * I~ B~ N~ B~ (...)~ C~ T~m K~ K~ N~ * \_____\__\_| \_4___| | * 9 \ \_________5__________| * \_______6______________| */ tail (ret_val, emit_special (INC_COUNT, 0UL, Num_Braces)); /* 1 */ next = emit_special (TEST_COUNT, min_max [0], Num_Braces); /* 2,4 */ tail (ret_val, next); /* 2 */ tail (emit_node (BACK), ret_val); /* 3 */ tail (ret_val, emit_node (BACK)); /* 4 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 5,7 */ (void) insert (NOTHING, ret_val, 0UL, 0UL, 0); /* 6 */ next = emit_node (NOTHING); /* 5,6 */ offset_tail (ret_val, NODE_SIZE, next); /* 5 */ tail (ret_val, next); /* 6 */ (void) insert (BRANCH, ret_val, 0UL, 0UL, 0); /* 7,8 */ tail (ret_val, ret_val + (2 * NODE_SIZE)); /* 7 */ offset_tail (ret_val, 3 * NODE_SIZE, ret_val); /* 8 */ (void) insert (INIT_COUNT, ret_val, 0UL, 0UL, Num_Braces); /* 9 */ tail (ret_val, ret_val + INDEX_SIZE + (4 * NODE_SIZE)); /* 9 */ } else { /* Node structure for (x){m,n}? construct. * ______9_____________________ * | _____________3___ \ * | __8_ | 1_ _2 \ \ * |/ | | / |/ | \ \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -