📄 regparse.c
字号:
/********************************************************************** regparse.c - Oniguruma (regular expression library)**********************************************************************//*- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "regparse.h"#define WARN_BUFSIZE 256OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_RUBY | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_H_XDIGIT ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) , ONIG_OPTION_NONE};OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;extern void onig_null_warn(const char* s) { }#ifdef DEFAULT_WARN_FUNCTIONstatic OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;#elsestatic OnigWarnFunc onig_warn = onig_null_warn;#endif#ifdef DEFAULT_VERB_WARN_FUNCTIONstatic OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;#elsestatic OnigWarnFunc onig_verb_warn = onig_null_warn;#endifextern void onig_set_warn_func(OnigWarnFunc f){ onig_warn = f;}extern void onig_set_verb_warn_func(OnigWarnFunc f){ onig_verb_warn = f;}static voidbbuf_free(BBuf* bbuf){ if (IS_NOT_NULL(bbuf)) { if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); xfree(bbuf); }}static intbbuf_clone(BBuf** rto, BBuf* from){ int r; BBuf *to; *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); r = BBUF_INIT(to, from->alloc); if (r != 0) return r; to->used = from->used; xmemcpy(to->p, from->p, from->used); return 0;}#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))#define MBCODE_START_POS(enc) \ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ if (r) return r;\ }\} while (0)#define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ for (i = 0; i < BITSET_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ }\} while (0)static voidbitset_set_range(BitSetRef bs, int from, int to){ int i; for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { BITSET_SET_BIT(bs, i); }}#if 0static voidbitset_set_all(BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }}#endifstatic voidbitset_invert(BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }}static voidbitset_invert_to(BitSetRef from, BitSetRef to){ int i; for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }}static voidbitset_and(BitSetRef dest, BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }}static voidbitset_or(BitSetRef dest, BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }}static voidbitset_copy(BitSetRef dest, BitSetRef bs){ int i; for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }}extern intonig_strncmp(const UChar* s1, const UChar* s2, int n){ int x; while (n-- > 0) { x = *s2++ - *s1++; if (x) return x; } return 0;}static voidk_strcpy(UChar* dest, const UChar* src, const UChar* end){ int len = end - src; if (len > 0) { xmemcpy(dest, src, len); dest[len] = (UChar )0; }}static UChar*strdup_with_null(OnigEncoding enc, UChar* s, UChar* end){ int slen, term_len, i; UChar *r; slen = end - s; term_len = ONIGENC_MBC_MINLEN(enc); r = (UChar* )xmalloc(slen + term_len); CHECK_NULL_RETURN(r); xmemcpy(r, s, slen); for (i = 0; i < term_len; i++) r[slen + i] = (UChar )0; return r;}/* scan pattern methods */#define PEND_VALUE 0#define PFETCH_READY UChar* pfetch_prev#define PEND (p < end ? 0 : 1)#define PUNFETCH p = pfetch_prev#define PINC do { \ pfetch_prev = p; \ p += ONIGENC_MBC_ENC_LEN(enc, p); \} while (0)#define PFETCH(c) do { \ c = ONIGENC_MBC_TO_CODE(enc, p, end); \ pfetch_prev = p; \ p += ONIGENC_MBC_ENC_LEN(enc, p); \} while (0)#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)static UChar*k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, int capa){ UChar* r; if (dest) r = (UChar* )xrealloc(dest, capa + 1); else r = (UChar* )xmalloc(capa + 1); CHECK_NULL_RETURN(r); k_strcpy(r + (dest_end - dest), src, src_end); return r;}/* dest on static area */static UChar*strcat_capa_from_static(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, int capa){ UChar* r; r = (UChar* )xmalloc(capa + 1); CHECK_NULL_RETURN(r); k_strcpy(r, dest, dest_end); k_strcpy(r + (dest_end - dest), src, src_end); return r;}#ifdef USE_NAMED_GROUP#define INIT_NAME_BACKREFS_ALLOC_NUM 8typedef struct { UChar* name; int name_len; /* byte length */ int back_num; /* number of backrefs */ int back_alloc; int back_ref1; int* back_refs;} NameEntry;#ifdef USE_ST_HASH_TABLE#include "st.h"typedef st_table NameTable;typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */#define NAMEBUF_SIZE 24#define NAMEBUF_SIZE_1 25#ifdef ONIG_DEBUGstatic inti_print_name_entry(UChar* key, NameEntry* e, void* arg){ int i; FILE* fp = (FILE* )arg; fprintf(fp, "%s: ", e->name); if (e->back_num == 0) fputs("-", fp); else if (e->back_num == 1) fprintf(fp, "%d", e->back_ref1); else { for (i = 0; i < e->back_num; i++) { if (i > 0) fprintf(fp, ", "); fprintf(fp, "%d", e->back_refs[i]); } } fputs("\n", fp); return ST_CONTINUE;}extern intonig_print_names(FILE* fp, regex_t* reg){ NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { fprintf(fp, "name table\n"); onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); fputs("\n", fp); } return 0;}#endifstatic inti_free_name_entry(UChar* key, NameEntry* e, void* arg){ xfree(e->name); /* == key */ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); return ST_DELETE;}static intnames_clear(regex_t* reg){ NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { onig_st_foreach(t, i_free_name_entry, 0); } return 0;}extern intonig_names_free(regex_t* reg){ int r; NameTable* t; r = names_clear(reg); if (r) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) onig_st_free_table(t); reg->name_table = (void* )NULL; return 0;}static NameEntry*name_find(regex_t* reg, const UChar* name, const UChar* name_end){ NameEntry* e; NameTable* t = (NameTable* )reg->name_table; e = (NameEntry* )NULL; if (IS_NOT_NULL(t)) { onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); } return e;}typedef struct { int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); regex_t* reg; void* arg; int ret; OnigEncoding enc;} INamesArg;static inti_names(UChar* key, NameEntry* e, INamesArg* arg){ int r = (*(arg->func))(e->name, /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */ e->name + e->name_len, e->back_num, (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg->reg, arg->arg); if (r != 0) { arg->ret = r; return ST_STOP; } return ST_CONTINUE;}extern intonig_foreach_name(regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg){ INamesArg narg; NameTable* t = (NameTable* )reg->name_table; narg.ret = 0; if (IS_NOT_NULL(t)) { narg.func = func; narg.reg = reg; narg.arg = arg; narg.enc = reg->enc; /* should be pattern encoding. */ onig_st_foreach(t, i_names, (HashDataType )&narg); } return narg.ret;}static inti_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map){ int i; if (e->back_num > 1) { for (i = 0; i < e->back_num; i++) { e->back_refs[i] = map[e->back_refs[i]].new_val; } } else if (e->back_num == 1) { e->back_ref1 = map[e->back_ref1].new_val; } return ST_CONTINUE;}extern intonig_renumber_name_table(regex_t* reg, GroupNumRemap* map){ NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { onig_st_foreach(t, i_renumber_name, (HashDataType )map); } return 0;}extern intonig_number_of_names(regex_t* reg){ NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) return t->num_entries; else return 0;}#else /* USE_ST_HASH_TABLE */#define INIT_NAMES_ALLOC_NUM 8typedef struct { NameEntry* e; int num; int alloc;} NameTable;#ifdef ONIG_DEBUGextern intonig_print_names(FILE* fp, regex_t* reg){ int i, j; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t) && t->num > 0) { fprintf(fp, "name table\n"); for (i = 0; i < t->num; i++) { e = &(t->e[i]); fprintf(fp, "%s: ", e->name); if (e->back_num == 0) { fputs("-", fp); } else if (e->back_num == 1) { fprintf(fp, "%d", e->back_ref1); } else { for (j = 0; j < e->back_num; j++) { if (j > 0) fprintf(fp, ", "); fprintf(fp, "%d", e->back_refs[j]); } } fputs("\n", fp); } fputs("\n", fp); } return 0;}#endifstatic intnames_clear(regex_t* reg){ int i; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { for (i = 0; i < t->num; i++) { e = &(t->e[i]); if (IS_NOT_NULL(e->name)) { xfree(e->name); e->name = NULL; e->name_len = 0; e->back_num = 0; e->back_alloc = 0; if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); e->back_refs = (int* )NULL; } } if (IS_NOT_NULL(t->e)) { xfree(t->e); t->e = NULL; } t->num = 0; } return 0;}extern intonig_names_free(regex_t* reg){ int r; NameTable* t; r = names_clear(reg); if (r) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) xfree(t); reg->name_table = NULL; return 0;}static NameEntry*name_find(regex_t* reg, UChar* name, UChar* name_end){ int i, len; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { len = name_end - name; for (i = 0; i < t->num; i++) { e = &(t->e[i]); if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) return e; } } return (NameEntry* )NULL;}extern intonig_foreach_name(regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg){ int i, r; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) { for (i = 0; i < t->num; i++) { e = &(t->e[i]); r = (*func)(e->name, e->name + e->name_len, e->back_num, (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), reg, arg);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -