📄 regcomp.c
字号:
/********************************************************************** regcomp.c - Oniguruma (regular expression library)**********************************************************************//*- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "regparse.h"OnigAmbigType OnigDefaultAmbigFlag = (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);extern OnigAmbigTypeonig_get_default_ambig_flag(){ return OnigDefaultAmbigFlag;}extern intonig_set_default_ambig_flag(OnigAmbigType ambig_flag){ OnigDefaultAmbigFlag = ambig_flag; return 0;}#ifndef PLATFORM_UNALIGNED_WORD_ACCESSstatic unsigned char PadBuf[WORD_ALIGNMENT_SIZE];#endifstatic UChar*k_strdup(UChar* s, UChar* end){ int len = end - s; if (len > 0) { UChar* r = (UChar* )xmalloc(len + 1); CHECK_NULL_RETURN(r); xmemcpy(r, s, len); r[len] = (UChar )0; return r; } else return NULL;}/* Caution: node should not be a string node. (s and end member address break)*/static voidswap_node(Node* a, Node* b){ Node c; c = *a; *a = *b; *b = c;}static OnigDistancedistance_add(OnigDistance d1, OnigDistance d2){ if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) return ONIG_INFINITE_DISTANCE; else { if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; else return ONIG_INFINITE_DISTANCE; }}static OnigDistancedistance_multiply(OnigDistance d, int m){ if (m == 0) return 0; if (d < ONIG_INFINITE_DISTANCE / m) return d * m; else return ONIG_INFINITE_DISTANCE;}static intbitset_is_empty(BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1;}#ifdef ONIG_DEBUGstatic intbitset_on_num(BitSetRef bs){ int i, n; n = 0; for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(bs, i)) n++; } return n;}#endifextern intonig_bbuf_init(BBuf* buf, int size){ buf->p = (UChar* )xmalloc(size); if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); buf->alloc = size; buf->used = 0; return 0;}#ifdef USE_SUBEXP_CALLstatic intunset_addr_list_init(UnsetAddrList* uslist, int size){ UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); uslist->num = 0; uslist->alloc = size; uslist->us = p; return 0;}static voidunset_addr_list_end(UnsetAddrList* uslist){ if (IS_NOT_NULL(uslist->us)) xfree(uslist->us);}static intunset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node){ UnsetAddr* p; int size; if (uslist->num >= uslist->alloc) { size = uslist->alloc * 2; p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); uslist->alloc = size; uslist->us = p; } uslist->us[uslist->num].offset = offset; uslist->us[uslist->num].target = node; uslist->num++; return 0;}#endif /* USE_SUBEXP_CALL */static intadd_opcode(regex_t* reg, int opcode){ BBUF_ADD1(reg, opcode); return 0;}static intadd_rel_addr(regex_t* reg, int addr){ RelAddrType ra = (RelAddrType )addr; BBUF_ADD(reg, &ra, SIZE_RELADDR); return 0;}static intadd_abs_addr(regex_t* reg, int addr){ AbsAddrType ra = (AbsAddrType )addr; BBUF_ADD(reg, &ra, SIZE_ABSADDR); return 0;}static intadd_length(regex_t* reg, int len){ LengthType l = (LengthType )len; BBUF_ADD(reg, &l, SIZE_LENGTH); return 0;}static intadd_mem_num(regex_t* reg, int num){ MemNumType n = (MemNumType )num; BBUF_ADD(reg, &n, SIZE_MEMNUM); return 0;}static intadd_pointer(regex_t* reg, void* addr){ PointerType ptr = (PointerType )addr; BBUF_ADD(reg, &ptr, SIZE_POINTER); return 0;}static intadd_option(regex_t* reg, OnigOptionType option){ BBUF_ADD(reg, &option, SIZE_OPTION); return 0;}static intadd_opcode_rel_addr(regex_t* reg, int opcode, int addr){ int r; r = add_opcode(reg, opcode); if (r) return r; r = add_rel_addr(reg, addr); return r;}static intadd_bytes(regex_t* reg, UChar* bytes, int len){ BBUF_ADD(reg, bytes, len); return 0;}static intadd_bitset(regex_t* reg, BitSetRef bs){ BBUF_ADD(reg, bs, SIZE_BITSET); return 0;}static intadd_opcode_option(regex_t* reg, int opcode, OnigOptionType option){ int r; r = add_opcode(reg, opcode); if (r) return r; r = add_option(reg, option); return r;}static int compile_length_tree(Node* node, regex_t* reg);static int compile_tree(Node* node, regex_t* reg);#define IS_NEED_STR_LEN_OP_EXACT(op) \ ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)static intselect_str_opcode(int mb_len, int str_len, int ignore_case){ int op; if (ignore_case) { switch (str_len) { case 1: op = OP_EXACT1_IC; break; default: op = OP_EXACTN_IC; break; } } else { switch (mb_len) { case 1: switch (str_len) { case 1: op = OP_EXACT1; break; case 2: op = OP_EXACT2; break; case 3: op = OP_EXACT3; break; case 4: op = OP_EXACT4; break; case 5: op = OP_EXACT5; break; default: op = OP_EXACTN; break; } break; case 2: switch (str_len) { case 1: op = OP_EXACTMB2N1; break; case 2: op = OP_EXACTMB2N2; break; case 3: op = OP_EXACTMB2N3; break; default: op = OP_EXACTMB2N; break; } break; case 3: op = OP_EXACTMB3N; break; default: op = OP_EXACTMBN; break; } } return op;}static intcompile_tree_empty_check(Node* node, regex_t* reg, int empty_info){ int r; int saved_num_null_check = reg->num_null_check; if (empty_info != 0) { r = add_opcode(reg, OP_NULL_CHECK_START); if (r) return r; r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ if (r) return r; reg->num_null_check++; } r = compile_tree(node, reg); if (r) return r; if (empty_info != 0) { if (empty_info == NQ_TARGET_IS_EMPTY) r = add_opcode(reg, OP_NULL_CHECK_END); else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); else if (empty_info == NQ_TARGET_IS_EMPTY_REC) r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); if (r) return r; r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ } return r;}#ifdef USE_SUBEXP_CALLstatic intcompile_call(CallNode* node, regex_t* reg){ int r; r = add_opcode(reg, OP_CALL); if (r) return r; r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), node->target); if (r) return r; r = add_abs_addr(reg, 0 /*dummy addr.*/); return r;}#endifstatic intcompile_tree_n_times(Node* node, int n, regex_t* reg){ int i, r; for (i = 0; i < n; i++) { r = compile_tree(node, reg); if (r) return r; } return 0;}static intadd_compile_string_length(UChar* s, int mb_len, int str_len, regex_t* reg, int ignore_case){ int len; int op = select_str_opcode(mb_len, str_len, ignore_case); len = SIZE_OPCODE; if (op == OP_EXACTMBN) len += SIZE_LENGTH; if (IS_NEED_STR_LEN_OP_EXACT(op)) len += SIZE_LENGTH; len += mb_len * str_len; return len;}static intadd_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg, int ignore_case){ int op = select_str_opcode(mb_len, str_len, ignore_case); add_opcode(reg, op); if (op == OP_EXACTMBN) add_length(reg, mb_len); if (IS_NEED_STR_LEN_OP_EXACT(op)) { if (op == OP_EXACTN_IC) add_length(reg, mb_len * str_len); else add_length(reg, str_len); } add_bytes(reg, s, mb_len * str_len); return 0;}static intcompile_length_string_node(Node* node, regex_t* reg){ int rlen, r, len, prev_len, slen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev; StrNode* sn; sn = &(NSTRING(node)); if (sn->end <= sn->s) return 0; ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enc_len(enc, p); p += prev_len; slen = 1; rlen = 0; for (; p < sn->end; ) { len = enc_len(enc, p); if (len == prev_len) { slen++; } else { r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; prev = p; slen = 1; prev_len = len; } p += len; } r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; return rlen;}static intcompile_length_string_raw_node(StrNode* sn, regex_t* reg){ if (sn->end <= sn->s) return 0; return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);}static intcompile_string_node(Node* node, regex_t* reg){ int r, len, prev_len, slen, ambig; OnigEncoding enc = reg->enc; UChar *p, *prev, *end; StrNode* sn; sn = &(NSTRING(node)); if (sn->end <= sn->s) return 0; end = sn->end; ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enc_len(enc, p); p += prev_len; slen = 1; for (; p < end; ) { len = enc_len(enc, p); if (len == prev_len) { slen++; } else { r = add_compile_string(prev, prev_len, slen, reg, ambig); if (r) return r; prev = p; slen = 1; prev_len = len; } p += len; } return add_compile_string(prev, prev_len, slen, reg, ambig);}static intcompile_string_raw_node(StrNode* sn, regex_t* reg){ if (sn->end <= sn->s) return 0; return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);}static intadd_multi_byte_cclass(BBuf* mbuf, regex_t* reg){#ifdef PLATFORM_UNALIGNED_WORD_ACCESS add_length(reg, mbuf->used); return add_bytes(reg, mbuf->p, mbuf->used);#else int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; GET_ALIGNMENT_PAD_SIZE(p, pad_size); add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); r = add_bytes(reg, mbuf->p, mbuf->used); /* padding for return value from compile_length_cclass_node() to be fix. */ pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); return r;#endif}static intcompile_length_cclass_node(CClassNode* cc, regex_t* reg){ int len; if (IS_CCLASS_SHARE(cc)) { len = SIZE_OPCODE + SIZE_POINTER; return len; } if (IS_NULL(cc->mbuf)) { len = SIZE_OPCODE + SIZE_BITSET; } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { len = SIZE_OPCODE; } else { len = SIZE_OPCODE + SIZE_BITSET; }#ifdef PLATFORM_UNALIGNED_WORD_ACCESS len += SIZE_LENGTH + cc->mbuf->used;#else len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);#endif } return len;}static intcompile_cclass_node(CClassNode* cc, regex_t* reg){ int r; if (IS_CCLASS_SHARE(cc)) { add_opcode(reg, OP_CCLASS_NODE); r = add_pointer(reg, cc); return r; } if (IS_NULL(cc->mbuf)) { if (IS_CCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_NOT); else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -