📄 regex_internal.h
字号:
/* -*- buffer-read-only: t -*- vi: set ro: *//* DO NOT EDIT! GENERATED AUTOMATICALLY! *//* Extended regular expression matching and search library. Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */#ifndef _REGEX_INTERNAL_H#define _REGEX_INTERNAL_H 1#include <assert.h>#include <ctype.h>#include <stdbool.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#ifdef _LIBC# include <langinfo.h>#else# include "localcharset.h"#endif#if defined HAVE_LOCALE_H || defined _LIBC# include <locale.h>#endif#include <wchar.h>#include <wctype.h>#include <stdint.h>#if defined _LIBC# include <bits/libc-lock.h>#else# define __libc_lock_init(NAME) do { } while (0)# define __libc_lock_lock(NAME) do { } while (0)# define __libc_lock_unlock(NAME) do { } while (0)#endif/* In case that the system doesn't have isblank(). */#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))# define isblank(ch) ((ch) == ' ' || (ch) == '\t')#endif#ifdef _LIBC# ifndef _RE_DEFINE_LOCALE_FUNCTIONS# define _RE_DEFINE_LOCALE_FUNCTIONS 1# include <locale/localeinfo.h># include <locale/elem-hash.h># include <locale/coll-lookup.h># endif#endif/* This is for other GNU distributions with internationalized messages. */#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC# include <libintl.h># ifdef _LIBC# undef gettext# define gettext(msgid) \ INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)# endif#else# define gettext(msgid) (msgid)#endif#ifndef gettext_noop/* This define is so xgettext can find the internationalizable strings. */# define gettext_noop(String) String#endif/* For loser systems without the definition. */#ifndef SIZE_MAX# define SIZE_MAX ((size_t) -1)#endif#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC# define RE_ENABLE_I18N#endif#if __GNUC__ >= 3# define BE(expr, val) __builtin_expect (expr, val)#else# define BE(expr, val) (expr)# ifdef _LIBC# define inline# endif#endif/* Number of ASCII characters. */#define ASCII_CHARS 0x80/* Number of single byte characters. */#define SBC_MAX (UCHAR_MAX + 1)#define COLL_ELEM_LEN_MAX 8/* The character which represents newline. */#define NEWLINE_CHAR '\n'#define WIDE_NEWLINE_CHAR L'\n'/* Rename to standard API for using out of glibc. */#ifndef _LIBC# define __wctype wctype# define __iswctype iswctype# define __btowc btowc# define __wcrtomb wcrtomb# define __regfree regfree# define attribute_hidden#endif /* not _LIBC */#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)# define __attribute(arg) __attribute__ (arg)#else# define __attribute(arg)#endiftypedef __re_idx_t Idx;/* Special return value for failure to match. */#define REG_MISSING ((Idx) -1)/* Special return value for internal error. */#define REG_ERROR ((Idx) -2)/* Test whether N is a valid index, and is not one of the above. */#ifdef _REGEX_LARGE_OFFSETS# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)#else# define REG_VALID_INDEX(n) (0 <= (n))#endif/* Test whether N is a valid nonzero index. */#ifdef _REGEX_LARGE_OFFSETS# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))#else# define REG_VALID_NONZERO_INDEX(n) (0 < (n))#endif/* A hash value, suitable for computing hash tables. */typedef __re_size_t re_hashval_t;/* An integer used to represent a set of bits. It must be unsigned, and must be at least as wide as unsigned int. */typedef unsigned long int bitset_word_t;/* All bits set in a bitset_word_t. */#define BITSET_WORD_MAX ULONG_MAX/* Number of bits in a bitset_word_t. For portability to hosts with padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)'; instead, deduce it directly from BITSET_WORD_MAX. Avoid greater-than-32-bit integers and unconditional shifts by more than 31 bits, as they're not portable. */#if BITSET_WORD_MAX == 0xffffffff# define BITSET_WORD_BITS 32#elif BITSET_WORD_MAX >> 31 >> 5 == 1# define BITSET_WORD_BITS 36#elif BITSET_WORD_MAX >> 31 >> 16 == 1# define BITSET_WORD_BITS 48#elif BITSET_WORD_MAX >> 31 >> 28 == 1# define BITSET_WORD_BITS 60#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1# define BITSET_WORD_BITS 64#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1# define BITSET_WORD_BITS 72#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1# define BITSET_WORD_BITS 128#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1# define BITSET_WORD_BITS 256#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */# if BITSET_WORD_BITS <= SBC_MAX# error "Invalid SBC_MAX"# endif#elif BITSET_WORD_MAX == (0xffffffff + 2) * 0xffffffff/* Work around a bug in 64-bit PGC (before version 6.1-2), where the preprocessor mishandles large unsigned values as if they were signed. */# define BITSET_WORD_BITS 64#else# error "Add case for new bitset_word_t size"#endif/* Number of bitset_word_t values in a bitset_t. */#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)typedef bitset_word_t bitset_t[BITSET_WORDS];typedef bitset_word_t *re_bitset_ptr_t;typedef const bitset_word_t *re_const_bitset_ptr_t;#define PREV_WORD_CONSTRAINT 0x0001#define PREV_NOTWORD_CONSTRAINT 0x0002#define NEXT_WORD_CONSTRAINT 0x0004#define NEXT_NOTWORD_CONSTRAINT 0x0008#define PREV_NEWLINE_CONSTRAINT 0x0010#define NEXT_NEWLINE_CONSTRAINT 0x0020#define PREV_BEGBUF_CONSTRAINT 0x0040#define NEXT_ENDBUF_CONSTRAINT 0x0080#define WORD_DELIM_CONSTRAINT 0x0100#define NOT_WORD_DELIM_CONSTRAINT 0x0200typedef enum{ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, LINE_FIRST = PREV_NEWLINE_CONSTRAINT, LINE_LAST = NEXT_NEWLINE_CONSTRAINT, BUF_FIRST = PREV_BEGBUF_CONSTRAINT, BUF_LAST = NEXT_ENDBUF_CONSTRAINT, WORD_DELIM = WORD_DELIM_CONSTRAINT, NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT} re_context_type;typedef struct{ Idx alloc; Idx nelem; Idx *elems;} re_node_set;typedef enum{ NON_TYPE = 0, /* Node type, These are used by token, node, tree. */ CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET = 3, OP_BACK_REF = 4, OP_PERIOD = 5,#ifdef RE_ENABLE_I18N COMPLEX_BRACKET = 6, OP_UTF8_PERIOD = 7,#endif /* RE_ENABLE_I18N */ /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used when the debugger shows values of this enum type. */#define EPSILON_BIT 8 OP_OPEN_SUBEXP = EPSILON_BIT | 0, OP_CLOSE_SUBEXP = EPSILON_BIT | 1, OP_ALT = EPSILON_BIT | 2, OP_DUP_ASTERISK = EPSILON_BIT | 3, ANCHOR = EPSILON_BIT | 4, /* Tree type, these are used only by tree. */ CONCAT = 16, SUBEXP = 17, /* Token type, these are used only by token. */ OP_DUP_PLUS = 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM, OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM, OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS, OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE, BACK_SLASH} re_token_type_t;#ifdef RE_ENABLE_I18Ntypedef struct{ /* Multibyte characters. */ wchar_t *mbchars; /* Collating symbols. */# ifdef _LIBC int32_t *coll_syms;# endif /* Equivalence classes. */# ifdef _LIBC int32_t *equiv_classes;# endif /* Range expressions. */# ifdef _LIBC uint32_t *range_starts; uint32_t *range_ends;# else /* not _LIBC */ wchar_t *range_starts; wchar_t *range_ends;# endif /* not _LIBC */ /* Character classes. */ wctype_t *char_classes; /* If this character set is the non-matching list. */ unsigned int non_match : 1; /* # of multibyte characters. */ Idx nmbchars; /* # of collating symbols. */ Idx ncoll_syms; /* # of equivalence classes. */ Idx nequiv_classes; /* # of range expressions. */ Idx nranges; /* # of character classes. */ Idx nchar_classes;} re_charset_t;#endif /* RE_ENABLE_I18N */typedef struct{ union { unsigned char c; /* for CHARACTER */ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */#ifdef RE_ENABLE_I18N re_charset_t *mbcset; /* for COMPLEX_BRACKET */#endif /* RE_ENABLE_I18N */ Idx idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ } opr;#if __GNUC__ >= 2 && !__STRICT_ANSI__ re_token_type_t type : 8;#else re_token_type_t type;#endif unsigned int constraint : 10; /* context constraint */ unsigned int duplicated : 1; unsigned int opt_subexp : 1;#ifdef RE_ENABLE_I18N unsigned int accept_mb : 1; /* These 2 bits can be moved into the union if needed (e.g. if running out of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ unsigned int mb_partial : 1;#endif unsigned int word_char : 1;} re_token_t;#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)struct re_string_t{ /* Indicate the raw buffer which is the original string passed as an argument of regexec(), re_search(), etc.. */ const unsigned char *raw_mbs; /* Store the multibyte string. In case of "case insensitive mode" like REG_ICASE, upper cases of the string are stored, otherwise MBS points the same address that RAW_MBS points. */ unsigned char *mbs;#ifdef RE_ENABLE_I18N /* Store the wide character string which is corresponding to MBS. */ wint_t *wcs; Idx *offsets; mbstate_t cur_state;#endif /* Index in RAW_MBS. Each character mbs[i] corresponds to raw_mbs[raw_mbs_idx + i]. */ Idx raw_mbs_idx; /* The length of the valid characters in the buffers. */ Idx valid_len; /* The corresponding number of bytes in raw_mbs array. */ Idx valid_raw_len; /* The length of the buffers MBS and WCS. */ Idx bufs_len; /* The index in MBS, which is updated by re_string_fetch_byte. */ Idx cur_idx; /* length of RAW_MBS array. */ Idx raw_len; /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ Idx len; /* End of the buffer may be shorter than its length in the cases such as re_match_2, re_search_2. Then, we use STOP for end of the buffer instead of LEN. */ Idx raw_stop; /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ Idx stop; /* The context of mbs[0]. We store the context independently, since the context of mbs[0] may be different from raw_mbs[0], which is the beginning of the input string. */ unsigned int tip_context; /* The translation passed as a part of an argument of re_compile_pattern. */ RE_TRANSLATE_TYPE trans; /* Copy of re_dfa_t's word_char. */ re_const_bitset_ptr_t word_char; /* true if REG_ICASE. */ unsigned char icase; unsigned char is_utf8; unsigned char map_notascii; unsigned char mbs_allocated; unsigned char offsets_needed; unsigned char newline_anchor; unsigned char word_ops_used; int mb_cur_max;};typedef struct re_string_t re_string_t;struct re_dfa_t;typedef struct re_dfa_t re_dfa_t;#ifndef _LIBC# ifdef __i386__# define internal_function __attribute ((regparm (3), stdcall))# else# define internal_function# endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -