⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex_internal.h

📁 硬盘各项性能的测试,如温度容量版本健康度型号
💻 H
📖 第 1 页 / 共 2 页
字号:
/* Extended regular expression matching and search library.   Copyright (C) 2002, 2003 Free Software Foundation, Inc.   This file is part of the GNU C Library.   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.   The GNU C Library is free software; you can redistribute it and/or   modify it under the terms of the GNU Lesser General Public   License as published by the Free Software Foundation; either   version 2.1 of the License, or (at your option) any later version.   The GNU C Library is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Lesser General Public License for more details.   You should have received a copy of the GNU Lesser General Public   License along with the GNU C Library; if not, write to the Free   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307 USA.  */#ifndef _REGEX_INTERNAL_H#define _REGEX_INTERNAL_H 1#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <assert.h>#include <ctype.h>#include <limits.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#if defined HAVE_LOCALE_H || defined _LIBC# include <locale.h>#endif#if defined HAVE_WCHAR_H || defined _LIBC# include <wchar.h>#endif /* HAVE_WCHAR_H || _LIBC */#if defined HAVE_WCTYPE_H || defined _LIBC# include <wctype.h>#endif /* HAVE_WCTYPE_H || _LIBC *//* In case that the system doesn't have isblank().  */#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank# define isblank(ch) ((ch) == ' ' || (ch) == '\t')#endif#ifdef _LIBC# ifndef _RE_DEFINE_LOCALE_FUNCTIONS#  define _RE_DEFINE_LOCALE_FUNCTIONS 1#   include <locale/localeinfo.h>#   include <locale/elem-hash.h>#   include <locale/coll-lookup.h># endif#endif/* This is for other GNU distributions with internationalized messages.  */#if HAVE_LIBINTL_H || defined _LIBC# include <libintl.h># ifdef _LIBC#  undef gettext#  define gettext(msgid) \  INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)# endif#else# define gettext(msgid) (msgid)#endif#ifndef gettext_noop/* This define is so xgettext can find the internationalizable   strings.  */# define gettext_noop(String) String#endif#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC# define RE_ENABLE_I18N#endif#if __GNUC__ >= 3# define BE(expr, val) __builtin_expect (expr, val)#else# define BE(expr, val) (expr)# define inline#endif/* Number of bits in a byte.  */#define BYTE_BITS 8/* Number of single byte character.  */#define SBC_MAX 256#define COLL_ELEM_LEN_MAX 8/* The character which represents newline.  */#define NEWLINE_CHAR '\n'#define WIDE_NEWLINE_CHAR L'\n'/* Rename to standard API for using out of glibc.  */#ifndef _LIBC# define __wctype wctype# define __iswctype iswctype# define __btowc btowc# define __mempcpy mempcpy# define __wcrtomb wcrtomb# define attribute_hidden#endif /* not _LIBC */extern const char __re_error_msgid[] attribute_hidden;extern const size_t __re_error_msgid_idx[] attribute_hidden;/* Number of bits in an unsinged int.  */#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)/* Number of unsigned int in an bit_set.  */#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)typedef unsigned int bitset[BITSET_UINTS];typedef unsigned int *re_bitset_ptr_t;#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)#define bitset_set_all(set) \  memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)#define bitset_copy(dest,src) \  memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)static inline void bitset_not (bitset set);static inline void bitset_merge (bitset dest, const bitset src);static inline void bitset_not_merge (bitset dest, const bitset src);#define PREV_WORD_CONSTRAINT 0x0001#define PREV_NOTWORD_CONSTRAINT 0x0002#define NEXT_WORD_CONSTRAINT 0x0004#define NEXT_NOTWORD_CONSTRAINT 0x0008#define PREV_NEWLINE_CONSTRAINT 0x0010#define NEXT_NEWLINE_CONSTRAINT 0x0020#define PREV_BEGBUF_CONSTRAINT 0x0040#define NEXT_ENDBUF_CONSTRAINT 0x0080#define DUMMY_CONSTRAINT 0x0100typedef enum{  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,  WORD_DELIM = DUMMY_CONSTRAINT} re_context_type;typedef struct{  int alloc;  int nelem;  int *elems;} re_node_set;typedef enum{  NON_TYPE = 0,  /* Token type, these are used only by token.  */  OP_OPEN_BRACKET,  OP_CLOSE_BRACKET,  OP_CHARSET_RANGE,  OP_OPEN_DUP_NUM,  OP_CLOSE_DUP_NUM,  OP_NON_MATCH_LIST,  OP_OPEN_COLL_ELEM,  OP_CLOSE_COLL_ELEM,  OP_OPEN_EQUIV_CLASS,  OP_CLOSE_EQUIV_CLASS,  OP_OPEN_CHAR_CLASS,  OP_CLOSE_CHAR_CLASS,  OP_WORD,  OP_NOTWORD,  BACK_SLASH,  /* Tree type, these are used only by tree. */  CONCAT,  ALT,  SUBEXP,  SIMPLE_BRACKET,#ifdef RE_ENABLE_I18N  COMPLEX_BRACKET,#endif /* RE_ENABLE_I18N */  /* Node type, These are used by token, node, tree.  */  OP_OPEN_SUBEXP,  OP_CLOSE_SUBEXP,  OP_PERIOD,  CHARACTER,  END_OF_RE,  OP_ALT,  OP_DUP_ASTERISK,  OP_DUP_PLUS,  OP_DUP_QUESTION,  OP_BACK_REF,  ANCHOR,  /* Dummy marker.  */  END_OF_RE_TOKEN_T} re_token_type_t;#ifdef RE_ENABLE_I18Ntypedef struct{  /* Multibyte characters.  */  wchar_t *mbchars;  /* Collating symbols.  */# ifdef _LIBC  int32_t *coll_syms;# endif  /* Equivalence classes. */# ifdef _LIBC  int32_t *equiv_classes;# endif  /* Range expressions. */# ifdef _LIBC  uint32_t *range_starts;  uint32_t *range_ends;# else /* not _LIBC */  wchar_t *range_starts;  wchar_t *range_ends;# endif /* not _LIBC */  /* Character classes. */  wctype_t *char_classes;  /* If this character set is the non-matching list.  */  unsigned int non_match : 1;  /* # of multibyte characters.  */  int nmbchars;  /* # of collating symbols.  */  int ncoll_syms;  /* # of equivalence classes. */  int nequiv_classes;  /* # of range expressions. */  int nranges;  /* # of character classes. */  int nchar_classes;} re_charset_t;#endif /* RE_ENABLE_I18N */typedef struct{  union  {    unsigned char c;		/* for CHARACTER */    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */#ifdef RE_ENABLE_I18N    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */#endif /* RE_ENABLE_I18N */    int idx;			/* for BACK_REF */    re_context_type ctx_type;	/* for ANCHOR */  } opr;#if __GNUC__ >= 2  re_token_type_t type : 8;#else  re_token_type_t type;#endif  unsigned int constraint : 10;	/* context constraint */  unsigned int duplicated : 1;#ifdef RE_ENABLE_I18N  unsigned int mb_partial : 1;#endif} re_token_t;#define IS_EPSILON_NODE(type) \  ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \   || (type) == OP_DUP_QUESTION || (type) == ANCHOR \   || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)#define ACCEPT_MB_NODE(type) \  ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)struct re_string_t{  /* Indicate the raw buffer which is the original string passed as an     argument of regexec(), re_search(), etc..  */  const unsigned char *raw_mbs;  /* Store the multibyte string.  In case of "case insensitive mode" like     REG_ICASE, upper cases of the string are stored, otherwise MBS points     the same address that RAW_MBS points.  */  unsigned char *mbs;  /* Store the case sensitive multibyte string.  In case of     "case insensitive mode", the original string are stored,     otherwise MBS_CASE points the same address that MBS points.  */  unsigned char *mbs_case;#ifdef RE_ENABLE_I18N  /* Store the wide character string which is corresponding to MBS.  */  wint_t *wcs;  mbstate_t cur_state;#endif  /* Index in RAW_MBS.  Each character mbs[i] corresponds to     raw_mbs[raw_mbs_idx + i].  */  int raw_mbs_idx;  /* The length of the valid characters in the buffers.  */  int valid_len;  /* The length of the buffers MBS, MBS_CASE, and WCS.  */  int bufs_len;  /* The index in MBS, which is updated by re_string_fetch_byte.  */  int cur_idx;  /* This is length_of_RAW_MBS - RAW_MBS_IDX.  */  int len;  /* End of the buffer may be shorter than its length in the cases such     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer     instead of LEN.  */  int stop;  /* The context of mbs[0].  We store the context independently, since     the context of mbs[0] may be different from raw_mbs[0], which is     the beginning of the input string.  */  unsigned int tip_context;  /* The translation passed as a part of an argument of re_compile_pattern.  */  RE_TRANSLATE_TYPE trans;  /* 1 if REG_ICASE.  */  unsigned int icase : 1;};typedef struct re_string_t re_string_t;/* In case of REG_ICASE, we allocate the buffer dynamically for mbs.  */#define MBS_ALLOCATED(pstr) (pstr->icase)/* In case that we need translation, we allocate the buffer dynamically   for mbs_case.  Note that mbs == mbs_case if not REG_ICASE.  */#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,					 int len, int init_len,					 RE_TRANSLATE_TYPE trans, int icase);static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,					  int len, RE_TRANSLATE_TYPE trans,					  int icase);static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,					    int eflags, int newline);static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,						int new_buf_len);#ifdef RE_ENABLE_I18Nstatic void build_wcs_buffer (re_string_t *pstr);static void build_wcs_upper_buffer (re_string_t *pstr);#endif /* RE_ENABLE_I18N */static void build_upper_buffer (re_string_t *pstr);static void re_string_translate_buffer (re_string_t *pstr);static void re_string_destruct (re_string_t *pstr);#ifdef RE_ENABLE_I18Nstatic int re_string_elem_size_at (const re_string_t *pstr, int idx);static inline int re_string_char_size_at (const re_string_t *pstr, int idx);static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);#endif /* RE_ENABLE_I18N */static unsigned int re_string_context_at (const re_string_t *input, int idx,					  int eflags, int newline_anchor);#define re_string_peek_byte(pstr, offset) \  ((pstr)->mbs[(pstr)->cur_idx + offset])#define re_string_peek_byte_case(pstr, offset) \  ((pstr)->mbs_case[(pstr)->cur_idx + offset])#define re_string_fetch_byte(pstr) \  ((pstr)->mbs[(pstr)->cur_idx++])#define re_string_fetch_byte_case(pstr) \  ((pstr)->mbs_case[(pstr)->cur_idx++])#define re_string_first_byte(pstr, idx) \  ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF)#define re_string_is_single_byte_char(pstr, idx) \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -