⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 正则表达式库
💻 C
📖 第 1 页 / 共 5 页
字号:
	case '+':        case '?':          if ((syntax & RE_BK_PLUS_QM)              || (syntax & RE_LIMITED_OPS))            goto normal_char;        handle_plus:        case '*':          /* If there is no previous pattern... */          if (!laststart)            {              if (syntax & RE_CONTEXT_INVALID_OPS)                return REG_BADRPT;              else if (!(syntax & RE_CONTEXT_INDEP_OPS))                goto normal_char;            }          {            /* Are we optimizing this jump?  */            boolean keep_string_p = false;                        /* 1 means zero (many) matches is allowed.  */            char zero_times_ok = 0, many_times_ok = 0;            /* If there is a sequence of repetition chars, collapse it               down to just one (the right one).  We can't combine               interval operators with these because of, e.g., `a{2}*',               which should only match an even number of `a's.  */            for (;;)              {                zero_times_ok |= c != '+';                many_times_ok |= c != '?';                if (p == pend)                  break;                PATFETCH (c);                if (c == '*'                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))                  ;                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')                  {                    if (p == pend) return REG_EESCAPE;                    PATFETCH (c1);                    if (!(c1 == '+' || c1 == '?'))                      {                        PATUNFETCH;                        PATUNFETCH;                        break;                      }                    c = c1;                  }                else                  {                    PATUNFETCH;                    break;                  }                /* If we get here, we found another repeat character.  */               }            /* Star, etc. applied to an empty pattern is equivalent               to an empty pattern.  */            if (!laststart)                break;            /* Now we know whether or not zero matches is allowed               and also whether or not two or more matches is allowed.  */            if (many_times_ok)              { /* More than one repetition is allowed, so put in at the                   end a backward relative jump from `b' to before the next                   jump we're going to put in below (which jumps from                   laststart to after this jump).                     But if we are at the `*' in the exact sequence `.*\n',                   insert an unconditional jump backwards to the .,                   instead of the beginning of the loop.  This way we only                   push a failure point once, instead of every time                   through the loop.  */                assert (p - 1 > pattern);                /* Allocate the space for the jump.  */                GET_BUFFER_SPACE (3);                /* We know we are not at the first character of the pattern,                   because laststart was nonzero.  And we've already                   incremented `p', by the way, to be the character after                   the `*'.  Do we have to do something analogous here                   for null bytes, because of RE_DOT_NOT_NULL?  */                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')		    && zero_times_ok                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')                    && !(syntax & RE_DOT_NEWLINE))                  { /* We have .*\n.  */                    STORE_JUMP (jump, b, laststart);                    keep_string_p = true;                  }                else                  /* Anything else.  */                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);                /* We've added more stuff to the buffer.  */                b += 3;              }            /* On failure, jump from laststart to b + 3, which will be the               end of the buffer after this jump is inserted.  */            GET_BUFFER_SPACE (3);            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump                                       : on_failure_jump,                         laststart, b + 3);            pending_exact = 0;            b += 3;            if (!zero_times_ok)              {                /* At least one repetition is required, so insert a                   `dummy_failure_jump' before the initial                   `on_failure_jump' instruction of the loop. This                   effects a skip over that instruction the first time                   we hit that loop.  */                GET_BUFFER_SPACE (3);                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);                b += 3;              }            }	  break;	case '.':          laststart = b;          BUF_PUSH (anychar);          break;        case '[':          {            boolean had_char_class = false;            if (p == pend) return REG_EBRACK;            /* Ensure that we have enough space to push a charset: the               opcode, the length count, and the bitset; 34 bytes in all.  */	    GET_BUFFER_SPACE (34);            laststart = b;            /* We test `*p == '^' twice, instead of using an if               statement, so we only need one BUF_PUSH.  */            BUF_PUSH (*p == '^' ? charset_not : charset);             if (*p == '^')              p++;            /* Remember the first position in the bracket expression.  */            p1 = p;            /* Push the number of bytes in the bitmap.  */            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);            /* Clear the whole map.  */            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);            /* charset_not matches newline according to a syntax bit.  */            if ((re_opcode_t) b[-2] == charset_not                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))              SET_LIST_BIT ('\n');            /* Read in characters and ranges, setting map bits.  */            for (;;)              {                if (p == pend) return REG_EBRACK;                PATFETCH (c);                /* \ might escape characters inside [...] and [^...].  */                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')                  {                    if (p == pend) return REG_EESCAPE;                    PATFETCH (c1);                    SET_LIST_BIT (c1);                    continue;                  }                /* Could be the end of the bracket expression.  If it's                   not (i.e., when the bracket expression is `[]' so                   far), the ']' character bit gets set way below.  */                if (c == ']' && p != p1 + 1)                  break;                /* Look ahead to see if it's a range when the last thing                   was a character class.  */                if (had_char_class && c == '-' && *p != ']')                  return REG_ERANGE;                /* Look ahead to see if it's a range when the last thing                   was a character: if this is a hyphen not at the                   beginning or the end of a list, then it's the range                   operator.  */                if (c == '-'                     && !(p - 2 >= pattern && p[-2] == '[')                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')                    && *p != ']')                  {                    reg_errcode_t ret                      = compile_range (&p, pend, translate, syntax, b);                    if (ret != REG_NOERROR) return ret;                  }                else if (p[0] == '-' && p[1] != ']')                  { /* This handles ranges made up of characters only.  */                    reg_errcode_t ret;		    /* Move past the `-'.  */                    PATFETCH (c1);                                        ret = compile_range (&p, pend, translate, syntax, b);                    if (ret != REG_NOERROR) return ret;                  }                /* See if we're at the beginning of a possible character                   class.  */                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')                  { /* Leave room for the null.  */                    char str[CHAR_CLASS_MAX_LENGTH + 1];                    PATFETCH (c);                    c1 = 0;                    /* If pattern is `[[:'.  */                    if (p == pend) return REG_EBRACK;                    for (;;)                      {                        PATFETCH (c);                        if (c == ':' || c == ']' || p == pend                            || c1 == CHAR_CLASS_MAX_LENGTH)                          break;                        str[c1++] = c;                      }                    str[c1] = '\0';                    /* If isn't a word bracketed by `[:' and:`]':                       undo the ending character, the letters, and leave                        the leading `:' and `[' (but set bits for them).  */                    if (c == ':' && *p == ']')                      {                        int ch;                        boolean is_alnum = STREQ (str, "alnum");                        boolean is_alpha = STREQ (str, "alpha");                        boolean is_blank = STREQ (str, "blank");                        boolean is_cntrl = STREQ (str, "cntrl");                        boolean is_digit = STREQ (str, "digit");                        boolean is_graph = STREQ (str, "graph");                        boolean is_lower = STREQ (str, "lower");                        boolean is_print = STREQ (str, "print");                        boolean is_punct = STREQ (str, "punct");                        boolean is_space = STREQ (str, "space");                        boolean is_upper = STREQ (str, "upper");                        boolean is_xdigit = STREQ (str, "xdigit");                                                if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;                        /* Throw away the ] at the end of the character                           class.  */                        PATFETCH (c);					                        if (p == pend) return REG_EBRACK;                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)                          {                            if (   (is_alnum  && ISALNUM (ch))                                || (is_alpha  && ISALPHA (ch))                                || (is_blank  && ISBLANK (ch))                                || (is_cntrl  && ISCNTRL (ch))                                || (is_digit  && ISDIGIT (ch))                                || (is_graph  && ISGRAPH (ch))                                || (is_lower  && ISLOWER (ch))                                || (is_print  && ISPRINT (ch))                                || (is_punct  && ISPUNCT (ch))                                || (is_space  && ISSPACE (ch))                                || (is_upper  && ISUPPER (ch))                                || (is_xdigit && ISXDIGIT (ch)))                            SET_LIST_BIT (ch);                          }                        had_char_class = true;                      }                    else                      {                        c1++;                        while (c1--)                              PATUNFETCH;                        SET_LIST_BIT ('[');                        SET_LIST_BIT (':');                        had_char_class = false;                      }                  }                else                  {                    had_char_class = false;                    SET_LIST_BIT (c);                  }              }            /* Discard any (non)matching list bytes that are all 0 at the               end of the map.  Decrease the map-length byte too.  */            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)               b[-1]--;             b += b[-1];          }          break;	case '(':          if (syntax & RE_NO_BK_PARENS)            goto handle_open;          else            goto normal_char;        case ')':          if (syntax & RE_NO_BK_PARENS)            goto handle_close;          else            goto normal_char;        case '\n':          if (syntax & RE_NEWLINE_ALT)            goto handle_alt;          else            goto normal_char;	case '|':          if (syntax & RE_NO_BK_VBAR)            goto handle_alt;          else            goto normal_char;        case '{':           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)             goto handle_interval;           else             goto normal_char;        case '\\':          if (p == pend) return REG_EESCAPE;          /* Do not translate the character after the \, so that we can             distinguish, e.g., \B from \b, even if we normally would             translate, e.g., B to b.  */          PATFETCH_RAW (c);          switch (c)            {            case '(':              if (syntax & RE_NO_BK_PARENS)                goto normal_backslash;            handle_open:              bufp->re_nsub++;              regnum++;              if (COMPILE_STACK_FULL)                {                   RETALLOC (compile_stack.stack, compile_stack.size << 1,                            compile_stack_elt_t);                  if (compile_stack.stack == NULL) return REG_ESPACE;                  compile_stack.size <<= 1;                }              /* These are the values to restore when we hit end of this                 group.  They are all relative offsets, so that if the                 whole pattern moves because of realloc, they will still                 be valid.  */              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;              COMPILE_STACK_TOP.fixup_alt_jump                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;              COMPILE_STACK_TOP.regnum = regnum;              /* We will eventually replace the 0 with the number of                 groups inner to this one.  But do not push a                 start_memory for groups beyond the last one we can                 represent in the compiled pattern.  */              if (regnum <= MAX_REGNUM)                {                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;                  BUF_PUSH_3 (start_memory, regnum, 0);                }                              compile_stack.avail++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -