⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mbregex.c

📁 php-4.4.7学习linux时下载的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
    case '+':    case '?':    case '*':      /* If there is no previous pattern, char not special. */      if (!laststart) {	snprintf(error_msg, ERROR_MSG_MAX_SIZE, 		 "invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d", 		 c, p-pattern);	if (bufp->buffer) {		xfree(bufp->buffer);	}		FREE_AND_RETURN(stackb, error_msg);      }      /* If there is a sequence of repetition chars,	 collapse it down to just one.  */      zero_times_ok = c != '+';      many_times_ok = c != '?';      greedy = 1;      if (p != pend) {	PATFETCH(c);	switch (c) {	case '?':	  greedy = 0;	  break;	case '*':	case '+':	  goto nested_meta;	default:	  PATUNFETCH;	  break;	}      }    repeat:      /* Star, etc. applied to an empty pattern is equivalent	 to an empty pattern.  */      if (!laststart)  	break;      if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) {	if (b[-1] == stop_paren)	  b--;	if (zero_times_ok)	  *laststart = anychar_repeat;	else {	  BUFPUSH(anychar_repeat);	}	break;      }      /* Now we know whether or not zero matches is allowed	 and also whether or not two or more matches is allowed.  */      if (many_times_ok) {	/* If more than one repetition is allowed, put in at the	   end a backward relative jump from b to before the next	   jump we're going to put in below (which jumps from	   laststart to after this jump).  */	GET_BUFFER_SPACE(3);	store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);	b += 3;  	/* Because store_jump put stuff here.  */      }      /* On failure, jump from laststart to next pattern, which will be the	 end of the buffer after this jump is inserted.  */      GET_BUFFER_SPACE(3);      insert_jump(on_failure_jump, laststart, b + 3, b);      b += 3;      if (zero_times_ok) {	if (greedy == 0) {	  GET_BUFFER_SPACE(3);	  insert_jump(try_next, laststart, b + 3, b);	  b += 3;	}      }      else {	/* At least one repetition is required, so insert a	   `dummy_failure_jump' before the initial	   `on_failure_jump' instruction of the loop. This	   effects a skip over that instruction the first time	   we hit that loop.  */	GET_BUFFER_SPACE(3);	insert_jump(dummy_failure_jump, laststart, laststart + 6, b);	b += 3;      }      break;    case '.':      laststart = b;      BUFPUSH(anychar);      break;    case '[':      if (p == pend)	FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");      while ((b - bufp->buffer + 9 + (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH)	     > bufp->allocated)	EXTEND_BUFFER;      laststart = b;      if (*p == '^') {	BUFPUSH(charset_not); 	p++;      }      else	BUFPUSH(charset);      p0 = p;      BUFPUSH((1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH);      /* Clear the whole map */      memset(b, 0, (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2);      had_mbchar = 0;      had_num_literal = 0;      had_char_class = 0;      /* Read in characters and ranges, setting map bits.  */      for (;;) {	int size;	unsigned last = (unsigned)-1;	if ((size = EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH]))	    || current_mbctype) {	  /* Ensure the space is enough to hold another interval	     of multi-byte chars in charset(_not)?.  */	  size = (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2 + size*8 + 8;	  while (b + size + 1 > bufp->buffer + bufp->allocated)	    EXTEND_BUFFER;	}      range_retry:	if (range && had_char_class) {	  FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");	}	PATFETCH(c);	if (c == ']') {	  if (p == p0 + 1) {	    if (p == pend)	      FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");	  }	  else 	    /* Stop if this isn't merely a ] inside a bracket	       expression, but rather the end of a bracket	       expression.  */	    break;	}	/* Look ahead to see if it's a range when the last thing	   was a character class.  */	if (had_char_class && c == '-' && *p != ']')	  FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range");	if (ismbchar(c)) {	  PATFETCH_MBC(c);	  had_mbchar++;	}	had_char_class = 0;	/* \ escapes characters when inside [...].  */	if (c == '\\') {	  PATFETCH_RAW(c);	  switch (c) {	  case 'w':	    for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) {	      if (SYNTAX(c) == Sword ||		  (!current_mbctype && SYNTAX(c) == Sword2))		SET_LIST_BIT(c);	    }	    if (current_mbctype) {	      set_list_bits(0x80, 0xffffffff, b);	    }	    had_char_class = 1;	    last = -1;	    continue;	  case 'W':	    for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) {	      if (SYNTAX(c) != Sword &&		  ((current_mbctype && !re_mbctab[c]) ||		  (!current_mbctype && SYNTAX(c) != Sword2)))		SET_LIST_BIT(c);	    }	    had_char_class = 1;	    last = -1;	    continue;	  case 's':	    for (c = 0; c < 256; c++)	      if (ISSPACE(c))		SET_LIST_BIT(c);	    had_char_class = 1;	    last = -1;	    continue;	  case 'S':	    for (c = 0; c < 256; c++)	      if (!ISSPACE(c))		SET_LIST_BIT(c);	    if (current_mbctype)	      set_list_bits(0x80, 0xffffffff, b);	    had_char_class = 1;	    last = -1;	    continue;	  case 'd':	    for (c = '0'; c <= '9'; c++)	      SET_LIST_BIT(c);	    had_char_class = 1;	    last = -1;	    continue;	  case 'D':	    for (c = 0; c < 256; c++)	      if (!ISDIGIT(c))		SET_LIST_BIT(c);	    if (current_mbctype)	      set_list_bits(0x80, 0xffffffff, b);	    had_char_class = 1;	    last = -1;	    continue;	  case 'x':	    c = scan_hex(p, 2, &numlen);	    p += numlen;	    had_num_literal = 1;	    break;	  case '0': case '1': case '2': case '3': case '4':	  case '5': case '6': case '7': case '8': case '9':	    PATUNFETCH;	    c = scan_oct(p, 3, &numlen);	    p += numlen;	    had_num_literal = 1;	    break;	  case 'M':	  case 'C':	  case 'c':	    p0 = --p;	    c = read_special(p, pend, &p0);	    if (c > 255) goto invalid_escape;	    p = p0;	    had_num_literal = 1;	    break;	  default:	    c = read_backslash(c);	    if (ismbchar(c)) {	      PATFETCH_MBC(c);	      had_mbchar++;	    }	    break;	  }	}	/* Get a range.  */	if (range) {	  if (last > c)	    goto invalid_pattern;	  range = 0;	  if (had_mbchar == 0) {	    for (;last<=c;last++)	      SET_LIST_BIT(last);	  }	  else if (had_mbchar == 2) {	    set_list_bits(last, c, b);	  }	  else {	    /* restriction: range between sbc and mbc */	    goto invalid_pattern;	  }	}	else if (p[0] == '-' && p[1] != ']') {	  last = c;	  PATFETCH(c1);	  range = 1;	  goto range_retry;	}	else if (c == '[' && *p == ':') {	  /* Leave room for the null.  */	  char str[CHAR_CLASS_MAX_LENGTH + 1];	  PATFETCH_RAW(c);	  c1 = 0;	  /* If pattern is `[[:'.  */	  if (p == pend) 	    FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");	  for (;;) {	    PATFETCH (c);	    if (c == ':' || c == ']' || p == pend		|| c1 == CHAR_CLASS_MAX_LENGTH)	      break;	    str[c1++] = c;	  }	  str[c1] = '\0';	  /* If isn't a word bracketed by `[:' and:`]':	     undo the ending character, the letters, and leave 	     the leading `:' and `[' (but set bits for them).  */	  if (c == ':' && *p == ']') {	    int ch;	    char is_alnum = STREQ(str, "alnum");	    char is_alpha = STREQ(str, "alpha");	    char is_blank = STREQ(str, "blank");	    char is_cntrl = STREQ(str, "cntrl");	    char is_digit = STREQ(str, "digit");	    char is_graph = STREQ(str, "graph");	    char is_lower = STREQ(str, "lower");	    char is_print = STREQ(str, "print");	    char is_punct = STREQ(str, "punct");	    char is_space = STREQ(str, "space");	    char is_upper = STREQ(str, "upper");	    char is_xdigit = STREQ(str, "xdigit");	    if (!IS_CHAR_CLASS(str)){	      snprintf(error_msg, ERROR_MSG_MAX_SIZE, 		       "invalid regular expression; [:%s:] is not a character class", str);	      FREE_AND_RETURN(stackb, error_msg);	    }	    /* Throw away the ] at the end of the character class.  */	    PATFETCH(c);	    if (p == pend) 	      FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");	    for (ch = 0; ch < 1 << MBRE_BYTEWIDTH; ch++) {	      if (   (is_alnum  && ISALNUM(ch))		  || (is_alpha  && ISALPHA(ch))		  || (is_blank  && ISBLANK(ch))		  || (is_cntrl  && ISCNTRL(ch))		  || (is_digit  && ISDIGIT(ch))		  || (is_graph  && ISGRAPH(ch))		  || (is_lower  && ISLOWER(ch))		  || (is_print  && ISPRINT(ch))		  || (is_punct  && ISPUNCT(ch))		  || (is_space  && ISSPACE(ch))		  || (is_upper  && ISUPPER(ch))		  || (is_xdigit && ISXDIGIT(ch)))		SET_LIST_BIT(ch);	    }	    had_char_class = 1;	  }	  else {	    c1++;	    while (c1--)    	      PATUNFETCH;	    SET_LIST_BIT(TRANSLATE_P()?translate['[']:'[');	    SET_LIST_BIT(TRANSLATE_P()?translate[':']:':');	    had_char_class = 0;	    last = ':';	  }	}	else if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {	  SET_LIST_BIT(c);	  had_num_literal = 0;	}	else	  set_list_bits(c, c, b);	had_mbchar = 0;      }      /* Discard any character set/class bitmap bytes that are all	 0 at the end of the map. Decrement the map-length byte too.  */      while ((int)b[-1] > 0 && b[(int)b[-1] - 1] == 0) 	b[-1]--;       if (b[-1] != (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH)	memmove(&b[(int)b[-1]], &b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH],		2 + EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])*8);      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8;      break;    case '(':      {	int old_options = options;	int push_option = 0;	int casefold = 0;      PATFETCH(c);      if (c == '?') {	int negative = 0;	PATFETCH_RAW(c);	switch (c) {	case 'x': case 'p': case 'm': case 'i': case '-':	  for (;;) {	    switch (c) {	    case '-':	      negative = 1;	      break;	    case ':':	    case ')':	      break;	    case 'x':	      if (negative)		options &= ~MBRE_OPTION_EXTENDED;	      else		options |= MBRE_OPTION_EXTENDED;	      break;	    case 'p':	      if (negative) {		if ((options&MBRE_OPTION_POSIXLINE) == MBRE_OPTION_POSIXLINE) {		  options &= ~MBRE_OPTION_POSIXLINE;		}	      }	      else if ((options&MBRE_OPTION_POSIXLINE) != MBRE_OPTION_POSIXLINE) {		options |= MBRE_OPTION_POSIXLINE;	      }	      push_option = 1;	      break;	    case 'm':	      if (negative) {		if (options&MBRE_OPTION_MULTILINE) {		  options &= ~MBRE_OPTION_MULTILINE;		}	      }	      else if (!(options&MBRE_OPTION_MULTILINE)) {		options |= MBRE_OPTION_MULTILINE;	      }	      push_option = 1;	      break;	    case 'i':	      if (negative) {		if (options&MBRE_OPTION_IGNORECASE) {		  options &= ~MBRE_OPTION_IGNORECASE;		}	      }	      else if (!(options&MBRE_OPTION_IGNORECASE)) {		options |= MBRE_OPTION_IGNORECASE;	      }		casefold = 1;	      break;	    default:	      FREE_AND_RETURN(stackb, "undefined (?...) inline option");	    }	    if (c == ')') {	      c = '#';	/* read whole in-line options */	      break;	    }	    if (c == ':') break;	    PATFETCH_RAW(c);	  }	  break;	case '#':	  for (;;) {	    PATFETCH(c);	    if (c == ')') break;	  }	  c = '#';	  break;	case ':':	case '=':	case '!':	case '>':	  break;	default:	  FREE_AND_RETURN(stackb, "undefined (?...) sequence");	}	}	else {	  PATUNFETCH;	  c = '(';	}	if (c == '#') {	if (push_option) {	  BUFPUSH(option_set);	  BUFPUSH(options);	}	  if (casefold) {	    if (options & MBRE_OPTION_IGNORECASE)	      BUFPUSH(casefold_on);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -