⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regcomp.c

📁 PHP v6.0 For Linux 运行环境:Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
📖 第 1 页 / 共 5 页
字号:
/**********************************************************************  regcomp.c -  Oniguruma (regular expression library)**********************************************************************//*- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "regparse.h"OnigAmbigType OnigDefaultAmbigFlag =  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);extern OnigAmbigTypeonig_get_default_ambig_flag(){  return OnigDefaultAmbigFlag;}extern intonig_set_default_ambig_flag(OnigAmbigType ambig_flag){  OnigDefaultAmbigFlag = ambig_flag;  return 0;}#ifndef PLATFORM_UNALIGNED_WORD_ACCESSstatic unsigned char PadBuf[WORD_ALIGNMENT_SIZE];#endifstatic UChar*k_strdup(UChar* s, UChar* end){  int len = end - s;  if (len > 0) {    UChar* r = (UChar* )xmalloc(len + 1);    CHECK_NULL_RETURN(r);    xmemcpy(r, s, len);    r[len] = (UChar )0;    return r;  }  else return NULL;}/*  Caution: node should not be a string node.           (s and end member address break)*/static voidswap_node(Node* a, Node* b){  Node c;  c = *a; *a = *b; *b = c;}static OnigDistancedistance_add(OnigDistance d1, OnigDistance d2){  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)    return ONIG_INFINITE_DISTANCE;  else {    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;    else return ONIG_INFINITE_DISTANCE;  }}static OnigDistancedistance_multiply(OnigDistance d, int m){  if (m == 0) return 0;  if (d < ONIG_INFINITE_DISTANCE / m)    return d * m;  else    return ONIG_INFINITE_DISTANCE;}static intbitset_is_empty(BitSetRef bs){  int i;  for (i = 0; i < BITSET_SIZE; i++) {    if (bs[i] != 0) return 0;  }  return 1;}#ifdef ONIG_DEBUGstatic intbitset_on_num(BitSetRef bs){  int i, n;  n = 0;  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {    if (BITSET_AT(bs, i)) n++;  }  return n;}#endifextern intonig_bbuf_init(BBuf* buf, int size){  buf->p = (UChar* )xmalloc(size);  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);  buf->alloc = size;  buf->used  = 0;  return 0;}#ifdef USE_SUBEXP_CALLstatic intunset_addr_list_init(UnsetAddrList* uslist, int size){  UnsetAddr* p;  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);  CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);  uslist->num   = 0;  uslist->alloc = size;  uslist->us    = p;  return 0;}static voidunset_addr_list_end(UnsetAddrList* uslist){  if (IS_NOT_NULL(uslist->us))    xfree(uslist->us);}static intunset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node){  UnsetAddr* p;  int size;  if (uslist->num >= uslist->alloc) {    size = uslist->alloc * 2;    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);    CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);    uslist->alloc = size;    uslist->us    = p;  }  uslist->us[uslist->num].offset = offset;  uslist->us[uslist->num].target = node;  uslist->num++;  return 0;}#endif /* USE_SUBEXP_CALL */static intadd_opcode(regex_t* reg, int opcode){  BBUF_ADD1(reg, opcode);  return 0;}static intadd_rel_addr(regex_t* reg, int addr){  RelAddrType ra = (RelAddrType )addr;  BBUF_ADD(reg, &ra, SIZE_RELADDR);  return 0;}static intadd_abs_addr(regex_t* reg, int addr){  AbsAddrType ra = (AbsAddrType )addr;  BBUF_ADD(reg, &ra, SIZE_ABSADDR);  return 0;}static intadd_length(regex_t* reg, int len){  LengthType l = (LengthType )len;  BBUF_ADD(reg, &l, SIZE_LENGTH);  return 0;}static intadd_mem_num(regex_t* reg, int num){  MemNumType n = (MemNumType )num;  BBUF_ADD(reg, &n, SIZE_MEMNUM);  return 0;}static intadd_pointer(regex_t* reg, void* addr){  PointerType ptr = (PointerType )addr;  BBUF_ADD(reg, &ptr, SIZE_POINTER);  return 0;}static intadd_option(regex_t* reg, OnigOptionType option){  BBUF_ADD(reg, &option, SIZE_OPTION);  return 0;}static intadd_opcode_rel_addr(regex_t* reg, int opcode, int addr){  int r;  r = add_opcode(reg, opcode);  if (r) return r;  r = add_rel_addr(reg, addr);  return r;}static intadd_bytes(regex_t* reg, UChar* bytes, int len){  BBUF_ADD(reg, bytes, len);  return 0;}static intadd_bitset(regex_t* reg, BitSetRef bs){  BBUF_ADD(reg, bs, SIZE_BITSET);  return 0;}static intadd_opcode_option(regex_t* reg, int opcode, OnigOptionType option){  int r;  r = add_opcode(reg, opcode);  if (r) return r;  r = add_option(reg, option);  return r;}static int compile_length_tree(Node* node, regex_t* reg);static int compile_tree(Node* node, regex_t* reg);#define IS_NEED_STR_LEN_OP_EXACT(op) \   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)static intselect_str_opcode(int mb_len, int str_len, int ignore_case){  int op;  if (ignore_case) {    switch (str_len) {    case 1:  op = OP_EXACT1_IC; break;    default: op = OP_EXACTN_IC; break;    }  }  else {    switch (mb_len) {    case 1:      switch (str_len) {      case 1:  op = OP_EXACT1; break;      case 2:  op = OP_EXACT2; break;      case 3:  op = OP_EXACT3; break;      case 4:  op = OP_EXACT4; break;      case 5:  op = OP_EXACT5; break;      default: op = OP_EXACTN; break;      }      break;    case 2:      switch (str_len) {      case 1:  op = OP_EXACTMB2N1; break;      case 2:  op = OP_EXACTMB2N2; break;      case 3:  op = OP_EXACTMB2N3; break;      default: op = OP_EXACTMB2N;  break;      }      break;    case 3:      op = OP_EXACTMB3N;      break;    default:      op = OP_EXACTMBN;      break;    }  }  return op;}static intcompile_tree_empty_check(Node* node, regex_t* reg, int empty_info){  int r;  int saved_num_null_check = reg->num_null_check;  if (empty_info != 0) {    r = add_opcode(reg, OP_NULL_CHECK_START);    if (r) return r;    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */    if (r) return r;    reg->num_null_check++;  }  r = compile_tree(node, reg);  if (r) return r;  if (empty_info != 0) {    if (empty_info == NQ_TARGET_IS_EMPTY)      r = add_opcode(reg, OP_NULL_CHECK_END);    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);    if (r) return r;    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */  }  return r;}#ifdef USE_SUBEXP_CALLstatic intcompile_call(CallNode* node, regex_t* reg){  int r;  r = add_opcode(reg, OP_CALL);  if (r) return r;  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),                          node->target);  if (r) return r;  r = add_abs_addr(reg, 0 /*dummy addr.*/);  return r;}#endifstatic intcompile_tree_n_times(Node* node, int n, regex_t* reg){  int i, r;  for (i = 0; i < n; i++) {    r = compile_tree(node, reg);    if (r) return r;  }  return 0;}static intadd_compile_string_length(UChar* s, int mb_len, int str_len,                          regex_t* reg, int ignore_case){  int len;  int op = select_str_opcode(mb_len, str_len, ignore_case);  len = SIZE_OPCODE;  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;  if (IS_NEED_STR_LEN_OP_EXACT(op))    len += SIZE_LENGTH;  len += mb_len * str_len;  return len;}static intadd_compile_string(UChar* s, int mb_len, int str_len,                   regex_t* reg, int ignore_case){  int op = select_str_opcode(mb_len, str_len, ignore_case);  add_opcode(reg, op);  if (op == OP_EXACTMBN)    add_length(reg, mb_len);  if (IS_NEED_STR_LEN_OP_EXACT(op)) {    if (op == OP_EXACTN_IC)      add_length(reg, mb_len * str_len);    else      add_length(reg, str_len);  }  add_bytes(reg, s, mb_len * str_len);  return 0;}static intcompile_length_string_node(Node* node, regex_t* reg){  int rlen, r, len, prev_len, slen, ambig;  OnigEncoding enc = reg->enc;  UChar *p, *prev;  StrNode* sn;  sn = &(NSTRING(node));  if (sn->end <= sn->s)    return 0;  ambig = NSTRING_IS_AMBIG(node);  p = prev = sn->s;  prev_len = enc_len(enc, p);  p += prev_len;  slen = 1;  rlen = 0;  for (; p < sn->end; ) {    len = enc_len(enc, p);    if (len == prev_len) {      slen++;    }    else {      r = add_compile_string_length(prev, prev_len, slen, reg, ambig);      rlen += r;      prev = p;      slen = 1;      prev_len = len;    }    p += len;  }  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);  rlen += r;  return rlen;}static intcompile_length_string_raw_node(StrNode* sn, regex_t* reg){  if (sn->end <= sn->s)    return 0;  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);}static intcompile_string_node(Node* node, regex_t* reg){  int r, len, prev_len, slen, ambig;  OnigEncoding enc = reg->enc;  UChar *p, *prev, *end;  StrNode* sn;  sn = &(NSTRING(node));  if (sn->end <= sn->s)    return 0;  end = sn->end;  ambig = NSTRING_IS_AMBIG(node);  p = prev = sn->s;  prev_len = enc_len(enc, p);  p += prev_len;  slen = 1;  for (; p < end; ) {    len = enc_len(enc, p);    if (len == prev_len) {      slen++;    }    else {      r = add_compile_string(prev, prev_len, slen, reg, ambig);      if (r) return r;      prev  = p;      slen  = 1;      prev_len = len;    }    p += len;  }  return add_compile_string(prev, prev_len, slen, reg, ambig);}static intcompile_string_raw_node(StrNode* sn, regex_t* reg){  if (sn->end <= sn->s)    return 0;  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);}static intadd_multi_byte_cclass(BBuf* mbuf, regex_t* reg){#ifdef PLATFORM_UNALIGNED_WORD_ACCESS  add_length(reg, mbuf->used);  return add_bytes(reg, mbuf->p, mbuf->used);#else  int r, pad_size;  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;  GET_ALIGNMENT_PAD_SIZE(p, pad_size);  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);  r = add_bytes(reg, mbuf->p, mbuf->used);  /* padding for return value from compile_length_cclass_node() to be fix. */  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);  return r;#endif}static intcompile_length_cclass_node(CClassNode* cc, regex_t* reg){  int len;  if (IS_CCLASS_SHARE(cc)) {    len = SIZE_OPCODE + SIZE_POINTER;    return len;  }  if (IS_NULL(cc->mbuf)) {    len = SIZE_OPCODE + SIZE_BITSET;  }  else {    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {      len = SIZE_OPCODE;    }    else {      len = SIZE_OPCODE + SIZE_BITSET;    }#ifdef PLATFORM_UNALIGNED_WORD_ACCESS    len += SIZE_LENGTH + cc->mbuf->used;#else    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);#endif  }  return len;}static intcompile_cclass_node(CClassNode* cc, regex_t* reg){  int r;  if (IS_CCLASS_SHARE(cc)) {    add_opcode(reg, OP_CCLASS_NODE);    r = add_pointer(reg, cc);    return r;  }  if (IS_NULL(cc->mbuf)) {    if (IS_CCLASS_NOT(cc))      add_opcode(reg, OP_CCLASS_NOT);    else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -