📄 regex.cxx

📁 mgcp协议源代码。支持多种编码：g711
💻 CXX
📖 第 1 页 / 共 5 页
字号:
        case '^':          {            if (   /* If at start of pattern, it's an operator.  */                   p == pattern + 1                   /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's come before.  */                || at_begline_loc_p (pattern, p, syntax))              BUF_PUSH (begline);            else              goto normal_char;          }          break;        case '$':          {            if (   /* If at end of pattern, it's an operator.  */                   p == pend                    /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's next.  */                || at_endline_loc_p (p, pend, syntax))               BUF_PUSH (endline);             else               goto normal_char;           }           break;	case '+':        case '?':          if ((syntax & RE_BK_PLUS_QM)              || (syntax & RE_LIMITED_OPS))            goto normal_char;        handle_plus:        case '*':          /* If there is no previous pattern... */          if (!laststart)            {              if (syntax & RE_CONTEXT_INVALID_OPS)                return REG_BADRPT;              else if (!(syntax & RE_CONTEXT_INDEP_OPS))                goto normal_char;            }          {            /* Are we optimizing this jump?  */            boolean keep_string_p = false;                        /* 1 means zero (many) matches is allowed.  */            char zero_times_ok = 0, many_times_ok = 0;            /* If there is a sequence of repetition chars, collapse it               down to just one (the right one).  We can't combine               interval operators with these because of, e.g., `a{2}*',               which should only match an even number of `a's.  */            for (;;)              {                zero_times_ok |= c != '+';                many_times_ok |= c != '?';                if (p == pend)                  break;                PATFETCH (c);                if (c == '*'                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))                  ;                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')                  {                    if (p == pend) return REG_EESCAPE;                    PATFETCH (c1);                    if (!(c1 == '+' || c1 == '?'))                      {                        PATUNFETCH;                        PATUNFETCH;                        break;                      }                    c = c1;                  }                else                  {                    PATUNFETCH;                    break;                  }                /* If we get here, we found another repeat character.  */               }            /* Star, etc. applied to an empty pattern is equivalent               to an empty pattern.  */            if (!laststart)                break;            /* Now we know whether or not zero matches is allowed               and also whether or not two or more matches is allowed.  */            if (many_times_ok)              { /* More than one repetition is allowed, so put in at the                   end a backward relative jump from `b' to before the next                   jump we're going to put in below (which jumps from                   laststart to after this jump).                     But if we are at the `*' in the exact sequence `.*\n',                   insert an unconditional jump backwards to the .,                   instead of the beginning of the loop.  This way we only                   push a failure point once, instead of every time                   through the loop.  */                assert (p - 1 > pattern);                /* Allocate the space for the jump.  */                GET_BUFFER_SPACE (3);                /* We know we are not at the first character of the pattern,                   because laststart was nonzero.  And we've already                   incremented `p', by the way, to be the character after                   the `*'.  Do we have to do something analogous here                   for null bytes, because of RE_DOT_NOT_NULL?  */                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')		    && zero_times_ok                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')                    && !(syntax & RE_DOT_NEWLINE))                  { /* We have .*\n.  */                    STORE_JUMP (jump, b, laststart);                    keep_string_p = true;                  }                else                  /* Anything else.  */                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);                /* We've added more stuff to the buffer.  */                b += 3;              }            /* On failure, jump from laststart to b + 3, which will be the               end of the buffer after this jump is inserted.  */            GET_BUFFER_SPACE (3);            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump                                       : on_failure_jump,                         laststart, b + 3);            pending_exact = 0;            b += 3;            if (!zero_times_ok)              {                /* At least one repetition is required, so insert a                   `dummy_failure_jump' before the initial                   `on_failure_jump' instruction of the loop. This                   effects a skip over that instruction the first time                   we hit that loop.  */                GET_BUFFER_SPACE (3);                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);                b += 3;              }            }	  break;	case '.':          laststart = b;          BUF_PUSH (anychar);          break;        case '[':          {            boolean had_char_class = false;            if (p == pend) return REG_EBRACK;            /* Ensure that we have enough space to push a charset: the               opcode, the length count, and the bitset; 34 bytes in all.  */	    GET_BUFFER_SPACE (34);            laststart = b;            /* We test `*p == '^' twice, instead of using an if               statement, so we only need one BUF_PUSH.  */            BUF_PUSH (*p == '^' ? charset_not : charset);             if (*p == '^')              p++;            /* Remember the first position in the bracket expression.  */            p1 = p;            /* Push the number of bytes in the bitmap.  */            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);            /* Clear the whole map.  */            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);            /* charset_not matches newline according to a syntax bit.  */            if ((re_opcode_t) b[-2] == charset_not                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))              SET_LIST_BIT ('\n');            /* Read in characters and ranges, setting map bits.  */            for (;;)              {                if (p == pend) return REG_EBRACK;                PATFETCH (c);                /* \ might escape characters inside [...] and [^...].  */                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')                  {                    if (p == pend) return REG_EESCAPE;                    PATFETCH (c1);                    SET_LIST_BIT (c1);                    continue;                  }                /* Could be the end of the bracket expression.  If it's                   not (i.e., when the bracket expression is `[]' so                   far), the ']' character bit gets set way below.  */                if (c == ']' && p != p1 + 1)                  break;                /* Look ahead to see if it's a range when the last thing                   was a character class.  */                if (had_char_class && c == '-' && *p != ']')                  return REG_ERANGE;                /* Look ahead to see if it's a range when the last thing                   was a character: if this is a hyphen not at the                   beginning or the end of a list, then it's the range                   operator.  */                if (c == '-'                     && !(p - 2 >= pattern && p[-2] == '[')                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')                    && *p != ']')                  {                    reg_errcode_t ret                      = compile_range (&p, pend, translate, syntax, b);                    if (ret != REG_NOERROR) return ret;                  }                else if (p[0] == '-' && p[1] != ']')                  { /* This handles ranges made up of characters only.  */                    reg_errcode_t ret;		    /* Move past the `-'.  */                    PATFETCH (c1);                                        ret = compile_range (&p, pend, translate, syntax, b);                    if (ret != REG_NOERROR) return ret;                  }                /* See if we're at the beginning of a possible character                   class.  */                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')                  { /* Leave room for the null.  */                    char str[CHAR_CLASS_MAX_LENGTH + 1];                    PATFETCH (c);                    c1 = 0;                    /* If pattern is `[[:'.  */                    if (p == pend) return REG_EBRACK;                    for (;;)                      {                        PATFETCH (c);                        if (c == ':' || c == ']' || p == pend                            || c1 == CHAR_CLASS_MAX_LENGTH)                          break;                        str[c1++] = c;                      }                    str[c1] = '\0';                    /* If isn't a word bracketed by `[:' and:`]':                       undo the ending character, the letters, and leave                        the leading `:' and `[' (but set bits for them).  */                    if (c == ':' && *p == ']')                      {                        int ch;                        boolean is_alnum = STREQ (str, "alnum");                        boolean is_alpha = STREQ (str, "alpha");                        boolean is_blank = STREQ (str, "blank");                        boolean is_cntrl = STREQ (str, "cntrl");                        boolean is_digit = STREQ (str, "digit");                        boolean is_graph = STREQ (str, "graph");                        boolean is_lower = STREQ (str, "lower");                        boolean is_print = STREQ (str, "print");                        boolean is_punct = STREQ (str, "punct");                        boolean is_space = STREQ (str, "space");                        boolean is_upper = STREQ (str, "upper");                        boolean is_xdigit = STREQ (str, "xdigit");                                                if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;                        /* Throw away the ] at the end of the character                           class.  */                        PATFETCH (c);					                        if (p == pend) return REG_EBRACK;                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)                          {                            if (   (is_alnum  && ISALNUM (ch))                                || (is_alpha  && ISALPHA (ch))                                || (is_blank  && ISBLANK (ch))                                || (is_cntrl  && ISCNTRL (ch))                                || (is_digit  && ISDIGIT (ch))                                || (is_graph  && ISGRAPH (ch))                                || (is_lower  && ISLOWER (ch))                                || (is_print  && ISPRINT (ch))                                || (is_punct  && ISPUNCT (ch))                                || (is_space  && ISSPACE (ch))                                || (is_upper  && ISUPPER (ch))                                || (is_xdigit && ISXDIGIT (ch)))                            SET_LIST_BIT (ch);                          }                        had_char_class = true;                      }                    else                      {                        c1++;                        while (c1--)                              PATUNFETCH;                        SET_LIST_BIT ('[');                        SET_LIST_BIT (':');                        had_char_class = false;                      }                  }                else                  {                    had_char_class = false;                    SET_LIST_BIT (c);                  }              }            /* Discard any (non)matching list bytes that are all 0 at the               end of the map.  Decrease the map-length byte too.  */            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)               b[-1]--;             b += b[-1];          }          break;	case '(':          if (syntax & RE_NO_BK_PARENS)            goto handle_open;          else            goto normal_char;        case ')':          if (syntax & RE_NO_BK_PARENS)            goto handle_close;          else            goto normal_char;        case '\n':          if (syntax & RE_NEWLINE_ALT)            goto handle_alt;          else            goto normal_char;	case '|':          if (syntax & RE_NO_BK_VBAR)            goto handle_alt;          else            goto normal_char;        case '{':           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)             goto handle_interval;           else             goto normal_char;        case '\\':          if (p == pend) return REG_EESCAPE;          /* Do not translate the character after the \, so that we can             distinguish, e.g., \B from \b, even if we normally would             translate, e.g., B to b.  */          PATFETCH_RAW (c);          switch (c)            {            case '(':              if (syntax & RE_NO_BK_PARENS)                goto normal_backslash;            handle_open:              bufp->re_nsub++;              regnum++;              if (COMPILE_STACK_FULL)                {                   RETALLOC (compile_stack.stack, compile_stack.size << 1,                            compile_stack_elt_t);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -