⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 正则表达式库
💻 C
📖 第 1 页 / 共 5 页
字号:
              fixup_alt_jump = 0;              laststart = 0;              begalt = b;	      /* If we've reached MAX_REGNUM groups, then this open		 won't actually generate any code, so we'll have to		 clear pending_exact explicitly.  */	      pending_exact = 0;              break;            case ')':              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;              if (COMPILE_STACK_EMPTY)                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)                  goto normal_backslash;                else                  return REG_ERPAREN;            handle_close:              if (fixup_alt_jump)                { /* Push a dummy failure point at the end of the                     alternative for a possible future                     `pop_failure_jump' to pop.  See comments at                     `push_dummy_failure' in `re_match_2'.  */                  BUF_PUSH (push_dummy_failure);                                    /* We allocated space for this jump when we assigned                     to `fixup_alt_jump', in the `handle_alt' case below.  */                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);                }              /* See similar code for backslashed left paren above.  */              if (COMPILE_STACK_EMPTY)                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)                  goto normal_char;                else                  return REG_ERPAREN;              /* Since we just checked for an empty stack above, this                 ``can't happen''.  */              assert (compile_stack.avail != 0);              {                /* We don't just want to restore into `regnum', because                   later groups should continue to be numbered higher,                   as in `(ab)c(de)' -- the second group is #2.  */                regnum_t this_group_regnum;                compile_stack.avail--;		                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;                fixup_alt_jump                  = COMPILE_STACK_TOP.fixup_alt_jump                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1                     : 0;                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;                this_group_regnum = COMPILE_STACK_TOP.regnum;		/* If we've reached MAX_REGNUM groups, then this open		   won't actually generate any code, so we'll have to		   clear pending_exact explicitly.  */		pending_exact = 0;                /* We're at the end of the group, so now we know how many                   groups were inside this one.  */                if (this_group_regnum <= MAX_REGNUM)                  {                    unsigned char *inner_group_loc                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;                                        *inner_group_loc = regnum - this_group_regnum;                    BUF_PUSH_3 (stop_memory, this_group_regnum,                                regnum - this_group_regnum);                  }              }              break;            case '|':					/* `\|'.  */              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)                goto normal_backslash;            handle_alt:              if (syntax & RE_LIMITED_OPS)                goto normal_char;              /* Insert before the previous alternative a jump which                 jumps to this alternative if the former fails.  */              GET_BUFFER_SPACE (3);              INSERT_JUMP (on_failure_jump, begalt, b + 6);              pending_exact = 0;              b += 3;              /* The alternative before this one has a jump after it                 which gets executed if it gets matched.  Adjust that                 jump so it will jump to this alternative's analogous                 jump (put in below, which in turn will jump to the next                 (if any) alternative's such jump, etc.).  The last such                 jump jumps to the correct final destination.  A picture:                          _____ _____                           |   | |   |                             |   v |   v                          a | b   | c                    If we are at `b', then fixup_alt_jump right now points to a                 three-byte space after `a'.  We'll put in the jump, set                 fixup_alt_jump to right after `b', and leave behind three                 bytes which we'll fill in when we get to after `c'.  */              if (fixup_alt_jump)                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);              /* Mark and leave space for a jump after this alternative,                 to be filled in later either by next alternative or                 when know we're at the end of a series of alternatives.  */              fixup_alt_jump = b;              GET_BUFFER_SPACE (3);              b += 3;              laststart = 0;              begalt = b;              break;            case '{':               /* If \{ is a literal.  */              if (!(syntax & RE_INTERVALS)                     /* If we're at `\{' and it's not the open-interval                         operator.  */                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))                  || (p - 2 == pattern  &&  p == pend))                goto normal_backslash;            handle_interval:              {                /* If got here, then the syntax allows intervals.  */                /* At least (most) this many matches must be made.  */                int lower_bound = -1, upper_bound = -1;                beg_interval = p - 1;                if (p == pend)                  {                    if (syntax & RE_NO_BK_BRACES)                      goto unfetch_interval;                    else                      return REG_EBRACE;                  }                GET_UNSIGNED_NUMBER (lower_bound);                if (c == ',')                  {                    GET_UNSIGNED_NUMBER (upper_bound);                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;                  }                else                  /* Interval such as `{1}' => match exactly once. */                  upper_bound = lower_bound;                if (lower_bound < 0 || upper_bound > RE_DUP_MAX                    || lower_bound > upper_bound)                  {                    if (syntax & RE_NO_BK_BRACES)                      goto unfetch_interval;                    else                       return REG_BADBR;                  }                if (!(syntax & RE_NO_BK_BRACES))                   {                    if (c != '\\') return REG_EBRACE;                    PATFETCH (c);                  }                if (c != '}')                  {                    if (syntax & RE_NO_BK_BRACES)                      goto unfetch_interval;                    else                       return REG_BADBR;                  }                /* We just parsed a valid interval.  */                /* If it's invalid to have no preceding re.  */                if (!laststart)                  {                    if (syntax & RE_CONTEXT_INVALID_OPS)                      return REG_BADRPT;                    else if (syntax & RE_CONTEXT_INDEP_OPS)                      laststart = b;                    else                      goto unfetch_interval;                  }                /* If the upper bound is zero, don't want to succeed at                   all; jump from `laststart' to `b + 3', which will be                   the end of the buffer after we insert the jump.  */                 if (upper_bound == 0)                   {                     GET_BUFFER_SPACE (3);                     INSERT_JUMP (jump, laststart, b + 3);                     b += 3;                   }                 /* Otherwise, we have a nontrivial interval.  When                    we're all done, the pattern will look like:                      set_number_at <jump count> <upper bound>                      set_number_at <succeed_n count> <lower bound>                      succeed_n <after jump addr> <succed_n count>                      <body of loop>                      jump_n <succeed_n addr> <jump count>                    (The upper bound and `jump_n' are omitted if                    `upper_bound' is 1, though.)  */                 else                    { /* If the upper bound is > 1, we need to insert                        more at the end of the loop.  */                     unsigned nbytes = 10 + (upper_bound > 1) * 10;                     GET_BUFFER_SPACE (nbytes);                     /* Initialize lower bound of the `succeed_n', even                        though it will be set during matching by its                        attendant `set_number_at' (inserted next),                        because `re_compile_fastmap' needs to know.                        Jump to the `jump_n' we might insert below.  */                     INSERT_JUMP2 (succeed_n, laststart,                                   b + 5 + (upper_bound > 1) * 5,                                   lower_bound);                     b += 5;                     /* Code to initialize the lower bound.  Insert                         before the `succeed_n'.  The `5' is the last two                        bytes of this `set_number_at', plus 3 bytes of                        the following `succeed_n'.  */                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);                     b += 5;                     if (upper_bound > 1)                       { /* More than one repetition is allowed, so                            append a backward jump to the `succeed_n'                            that starts this interval.                                                        When we've reached this during matching,                            we'll have matched the interval once, so                            jump back only `upper_bound - 1' times.  */                         STORE_JUMP2 (jump_n, b, laststart + 5,                                      upper_bound - 1);                         b += 5;                         /* The location we want to set is the second                            parameter of the `jump_n'; that is `b-2' as                            an absolute address.  `laststart' will be                            the `set_number_at' we're about to insert;                            `laststart+3' the number to set, the source                            for the relative address.  But we are                            inserting into the middle of the pattern --                            so everything is getting moved up by 5.                            Conclusion: (b - 2) - (laststart + 3) + 5,                            i.e., b - laststart.                                                        We insert this at the beginning of the loop                            so that if we fail during matching, we'll                            reinitialize the bounds.  */                         insert_op2 (set_number_at, laststart, b - laststart,                                     upper_bound - 1, b);                         b += 5;                       }                   }                pending_exact = 0;                beg_interval = NULL;              }              break;            unfetch_interval:              /* If an invalid interval, match the characters as literals.  */               assert (beg_interval);               p = beg_interval;               beg_interval = NULL;               /* normal_char and normal_backslash need `c'.  */               PATFETCH (c);	               if (!(syntax & RE_NO_BK_BRACES))                 {                   if (p > pattern  &&  p[-1] == '\\')                     goto normal_backslash;                 }               goto normal_char;#ifdef emacs            /* There is no way to specify the before_dot and after_dot               operators.  rms says this is ok.  --karl  */            case '=':              BUF_PUSH (at_dot);              break;            case 's':	              laststart = b;              PATFETCH (c);              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);              break;            case 'S':              laststart = b;              PATFETCH (c);              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);              break;#endif /* emacs */            case 'w':              laststart = b;              BUF_PUSH (wordchar);              break;            case 'W':              laststart = b;              BUF_PUSH (notwordchar);              break;            case '<':              BUF_PUSH (wordbeg);              break;            case '>':              BUF_PUSH (wordend);              break;            case 'b':              BUF_PUSH (wordbound);              break;            case 'B':              BUF_PUSH (notwordbound);              break;            case '`':              BUF_PUSH (begbuf);              break;            case '\'':              BUF_PUSH (endbuf);              break;            case '1': case '2': case '3': case '4': case '5':            case '6': case '7': case '8': case '9':              if (syntax & RE_NO_BK_REFS)                goto normal_char;              c1 = c - '0';              if (c1 > regnum)                return REG_ESUBREG;              /* Can't back reference to a subexpression if inside of it.  */              if (group_in_compile_stack (compile_stack, c1))                goto normal_char;              laststart = b;              BUF_PUSH_2 (duplicate, c1);              break;            case '+':            case '?':              if (syntax & RE_BK_PLUS_QM)                goto handle_plus;              else                goto normal_backslash;            default:            normal_backslash:              /* You might think it would be useful for \ to mean                 not to translate; but if we don't translate it                 it will never match anything.  */              c = TRANSLATE (c);              goto normal_char;            }          break;	default:        /* Expects the character in `c'.  */	normal_char:	      /* If no exactn currently being built.  */          if (!pending_exact               /* If last exactn not at current position.  */              || pending_exact + *pending_exact + 1 != b                            /* We have only one byte following the exactn for the count.  */	      || *pending_exact == (1 << BYTEWIDTH) - 1              /* If followed by a repetition operator.  */              || *p == '*' || *p == '^'	      || ((syntax & RE_BK_PLUS_QM)		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')		  : (*p == '+' || *p == '?'))	      || ((syntax & RE_INTERVALS)                  &

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -