⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regparse.c

📁 PHP v6.0 For Linux 运行环境:Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
📖 第 1 页 / 共 5 页
字号:
      cc->mbuf = tbuf;    }    CCLASS_CLEAR_NOT(cc);  }  return 0;}static intand_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc){  int r, not1, not2;  BBuf *buf1, *buf2, *pbuf;  BitSetRef bsr1, bsr2;  BitSet bs1, bs2;  not1 = IS_CCLASS_NOT(dest);  bsr1 = dest->bs;  buf1 = dest->mbuf;  not2 = IS_CCLASS_NOT(cc);  bsr2 = cc->bs;  buf2 = cc->mbuf;  if (not1 != 0) {    bitset_invert_to(bsr1, bs1);    bsr1 = bs1;  }  if (not2 != 0) {    bitset_invert_to(bsr2, bs2);    bsr2 = bs2;  }  bitset_and(bsr1, bsr2);  if (bsr1 != dest->bs) {    bitset_copy(dest->bs, bsr1);    bsr1 = dest->bs;  }  if (not1 != 0) {    bitset_invert(dest->bs);  }  if (! ONIGENC_IS_SINGLEBYTE(enc)) {    if (not1 != 0 && not2 != 0) {      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);    }    else {      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);      if (r == 0 && not1 != 0) {	BBuf *tbuf;	r = not_code_range_buf(enc, pbuf, &tbuf);	if (r != 0) {	  bbuf_free(pbuf);	  return r;	}	bbuf_free(pbuf);	pbuf = tbuf;      }    }    if (r != 0) return r;    dest->mbuf = pbuf;    bbuf_free(buf1);    return r;  }  return 0;}static intor_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc){  int r, not1, not2;  BBuf *buf1, *buf2, *pbuf;  BitSetRef bsr1, bsr2;  BitSet bs1, bs2;  not1 = IS_CCLASS_NOT(dest);  bsr1 = dest->bs;  buf1 = dest->mbuf;  not2 = IS_CCLASS_NOT(cc);  bsr2 = cc->bs;  buf2 = cc->mbuf;  if (not1 != 0) {    bitset_invert_to(bsr1, bs1);    bsr1 = bs1;  }  if (not2 != 0) {    bitset_invert_to(bsr2, bs2);    bsr2 = bs2;  }  bitset_or(bsr1, bsr2);  if (bsr1 != dest->bs) {    bitset_copy(dest->bs, bsr1);    bsr1 = dest->bs;  }  if (not1 != 0) {    bitset_invert(dest->bs);  }  if (! ONIGENC_IS_SINGLEBYTE(enc)) {    if (not1 != 0 && not2 != 0) {      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);    }    else {      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);      if (r == 0 && not1 != 0) {	BBuf *tbuf;	r = not_code_range_buf(enc, pbuf, &tbuf);	if (r != 0) {	  bbuf_free(pbuf);	  return r;	}	bbuf_free(pbuf);	pbuf = tbuf;      }    }    if (r != 0) return r;    dest->mbuf = pbuf;    bbuf_free(buf1);    return r;  }  else    return 0;}static intconv_backslash_value(int c, ScanEnv* env){  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {    switch (c) {    case 'n':  return '\n';    case 't':  return '\t';    case 'r':  return '\r';    case 'f':  return '\f';    case 'a':  return '\007';    case 'b':  return '\010';    case 'e':  return '\033';    case 'v':      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))	return '\v';      break;    default:      break;    }  }  return c;}static intis_invalid_qualifier_target(Node* node){  switch (NTYPE(node)) {  case N_ANCHOR:    return 1;    break;  case N_EFFECT:    if (NEFFECT(node).type == EFFECT_OPTION)      return is_invalid_qualifier_target(NEFFECT(node).target);    break;  case N_LIST: /* ex. (?:\G\A)* */    do {      if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;    } while (IS_NOT_NULL(node = NCONS(node).right));    return 0;    break;  case N_ALT:  /* ex. (?:abc|\A)* */    do {      if (is_invalid_qualifier_target(NCONS(node).left)) return 1;    } while (IS_NOT_NULL(node = NCONS(node).right));    break;  default:    break;  }  return 0;}/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */static intpopular_qualifier_num(QualifierNode* qf){  if (qf->greedy) {    if (qf->lower == 0) {      if (qf->upper == 1) return 0;      else if (IS_REPEAT_INFINITE(qf->upper)) return 1;    }    else if (qf->lower == 1) {      if (IS_REPEAT_INFINITE(qf->upper)) return 2;    }  }  else {    if (qf->lower == 0) {      if (qf->upper == 1) return 3;      else if (IS_REPEAT_INFINITE(qf->upper)) return 4;    }    else if (qf->lower == 1) {      if (IS_REPEAT_INFINITE(qf->upper)) return 5;    }  }  return -1;}enum ReduceType {  RQ_ASIS = 0, /* as is */  RQ_DEL  = 1, /* delete parent */  RQ_A,        /* to '*'    */  RQ_AQ,       /* to '*?'   */  RQ_QQ,       /* to '??'   */  RQ_P_QQ,     /* to '+)??' */  RQ_PQ_Q,     /* to '+?)?' */};static enum ReduceType ReduceTypeTable[6][6] = {  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */  {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */};extern voidonig_reduce_nested_qualifier(Node* pnode, Node* cnode){  int pnum, cnum;  QualifierNode *p, *c;  p = &(NQUALIFIER(pnode));  c = &(NQUALIFIER(cnode));  pnum = popular_qualifier_num(p);  cnum = popular_qualifier_num(c);  switch(ReduceTypeTable[cnum][pnum]) {  case RQ_DEL:    *p = *c;    break;  case RQ_A:    p->target = c->target;    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;    break;  case RQ_AQ:    p->target = c->target;    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;    break;  case RQ_QQ:    p->target = c->target;    p->lower  = 0;  p->upper = 1;  p->greedy = 0;    break;  case RQ_P_QQ:    p->target = cnode;    p->lower  = 0;  p->upper = 1;  p->greedy = 0;    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;    return ;    break;  case RQ_PQ_Q:    p->target = cnode;    p->lower  = 0;  p->upper = 1;  p->greedy = 1;    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;    return ;    break;  case RQ_ASIS:    p->target = cnode;    return ;    break;  }  c->target = NULL_NODE;  onig_node_free(cnode);}enum TokenSyms {  TK_EOT      = 0,   /* end of token */  TK_RAW_BYTE = 1,  TK_CHAR,  TK_STRING,  TK_CODE_POINT,  TK_ANYCHAR,  TK_CHAR_TYPE,  TK_BACKREF,  TK_CALL,  TK_ANCHOR,  TK_OP_REPEAT,  TK_INTERVAL,  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */  TK_ALT,  TK_SUBEXP_OPEN,  TK_SUBEXP_CLOSE,  TK_CC_OPEN,  TK_QUOTE_OPEN,  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */  /* in cc */  TK_CC_CLOSE,  TK_CC_RANGE,  TK_POSIX_BRACKET_OPEN,  TK_CC_AND,             /* && */  TK_CC_CC_OPEN          /* [ */};typedef struct {  enum TokenSyms type;  int escaped;  int base;   /* is number: 8, 16 (used in [....]) */  UChar* backp;  union {    UChar* s;    int   c;    OnigCodePoint code;    int   anchor;    int   subtype;    struct {      int lower;      int upper;      int greedy;      int possessive;    } repeat;    struct {      int  num;      int  ref1;      int* refs;      int  by_name;    } backref;    struct {      UChar* name;      UChar* name_end;    } call;    struct {      int not;    } prop;  } u;} OnigToken;static intfetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env){  int low, up, syn_allow, non_low = 0;  int r = 0;  OnigCodePoint c;  OnigEncoding enc = env->enc;  UChar* p = *src;  PFETCH_READY;  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);  if (PEND) {    if (syn_allow)      return 1;  /* "....{" : OK! */    else      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */  }  if (! syn_allow) {    c = PPEEK;    if (c == ')' || c == '(' || c == '|') {      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;    }  }  low = onig_scan_unsigned_number(&p, end, env->enc);  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  if (low > ONIG_MAX_REPEAT_NUM)    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  if (p == *src) { /* can't read low */    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {      /* allow {,n} as {0,n} */      low = 0;      non_low = 1;    }    else      goto invalid;  }  if (PEND) goto invalid;  PFETCH(c);  if (c == ',') {    UChar* prev = p;    up = onig_scan_unsigned_number(&p, end, env->enc);    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;    if (up > ONIG_MAX_REPEAT_NUM)      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;    if (p == prev) {      if (non_low != 0)	goto invalid;      up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */    }  }  else {    if (non_low != 0)      goto invalid;    PUNFETCH;    up = low;  /* {n} : exact n times */    r = 2;     /* fixed */  }  if (PEND) goto invalid;  PFETCH(c);  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {    if (c != MC_ESC(enc)) goto invalid;    PFETCH(c);  }  if (c != '}') goto invalid;  if (!IS_REPEAT_INFINITE(up) && low > up) {    return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;  }  tok->type = TK_INTERVAL;  tok->u.repeat.lower = low;  tok->u.repeat.upper = up;  *src = p;  return r; /* 0: normal {n,m}, 2: fixed {n} */ invalid:  if (syn_allow)    return 1;  /* OK */  else    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;}/* \M-, \C-, \c, or \... */static intfetch_escaped_value(UChar** src, UChar* end, ScanEnv* env){  int v;  OnigCodePoint c;  OnigEncoding enc = env->enc;  UChar* p = *src;  PFETCH_READY;  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;  PFETCH(c);  switch (c) {  case 'M':    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {      if (PEND) return ONIGERR_END_PATTERN_AT_META;      PFETCH(c);      if (c != '-') return ONIGERR_META_CODE_SYNTAX;      if (PEND) return ONIGERR_END_PATTERN_AT_META;      PFETCH(c);      if (c == MC_ESC(enc)) {	v = fetch_escaped_value(&p, end, env);	if (v < 0) return v;        c = (OnigCodePoint )v;      }      c = ((c & 0xff) | 0x80);    }    else      goto backslash;    break;  case 'C':    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;      PFETCH(c);      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;      goto control;    }    else      goto backslash;  case 'c':    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {    control:      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;      PFETCH(c);      if (c == MC_ESC(enc)) {	v = fetch_escaped_value(&p, end, env);	if (v < 0) return v;        c = (OnigCodePoint )v;      }      else if (c == '?')	c = 0177;      else	c &= 0x9f;      break;    }    /* fall through */  default:    {    backslash:      c = conv_backslash_value(c, env);    }    break;  }  *src = p;  return c;}static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);#ifdef USE_NAMED_GROUP/*  def: 0 -> define name    (don't allow number name)       1 -> reference name (allow number name)*/static intfetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref){  int r, is_num;  OnigCodePoint c = 0;  OnigCodePoint first_code;  OnigEncoding enc = env->enc;  UChar *name_end;  UChar *p = *src;  PFETCH_READY;  name_end = end;  r = 0;  is_num = 0;  if (PEND) {    return ONIGERR_EMPTY_GROUP_NAME;  }  else {    PFETCH(c);    first_code = c;    if (c == '>')      return ONIGERR_EMPTY_GROUP_NAME;    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {      if (ref == 1)	is_num = 1;      else {	r = ONIGERR_INVALID_GROUP_NAME;      }    }    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;    }  }  while (!PEND) {    name_end = p;    PFETCH(c);    if (c == '>' || c == ')') break;    if (is_num == 1) {      if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {	if (!ONIGENC_IS_CODE_WORD(enc, c))	  r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;	else	  r = ONIGERR_INVALID_GROUP_NAME;      }    }    else {      if (!ONIGENC_IS_CODE_WORD(enc, c)) {        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;      }    }  }  if (c != '>') {    r = ONIGERR_INVALID_GROUP_NAME;    name_end = end;  }  else {    if (ONIGENC_IS_CODE_ASCII(first_code) &&        ONIGENC_IS_CODE_UPPER(enc, first_code))      r = ONIGERR_INVALID_GROUP_NAME;  }  if (r == 0) {    *rname_end = name_end;    *src = p;    return 0;  }  else {    onig_scan_env_set_error_string(env, r, *src, name_end);    return r;  }}#elsestatic intfetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref){  int r, len;  OnigCodePoint c = 0;  UChar *name_end;  OnigEncoding enc = env->enc;  UChar *p = *src;  PFETCH_READY;  r = 0;  while (!PEND) {    name_end = p;    if (enc_len(enc, p) > 1)      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -