⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * =========================================================================== * PRODUCTION $Log: pcre.c,v $ * PRODUCTION Revision 1000.0  2003/10/29 15:55:43  gouriano * PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.2 * PRODUCTION * =========================================================================== *//**************************************************      Perl-Compatible Regular Expressions       **************************************************//*This is a library of functions to support regular expressions whose syntaxand semantics are as close as possible to those of the Perl 5 language. Seethe file Tech.Notes for some information on the internals.Written by: Philip Hazel <ph10@cam.ac.uk>           Copyright (c) 1997-2001 University of Cambridge-----------------------------------------------------------------------------Permission is granted to anyone to use this software for any purpose on anycomputer system, and to redistribute it freely, subject to the followingrestrictions:1. This software is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.2. The origin of this software must not be misrepresented, either by   explicit claim or by omission.3. Altered versions must be plainly marked as such, and must not be   misrepresented as being the original software.4. If PCRE is embedded in any software that is released under the GNU   General Purpose Licence (GPL), then the terms of that licence shall   supersede any condition above with which it is incompatible.-----------------------------------------------------------------------------*//* Use a macro for debugging printing */#if defined(PCRE_DEBUG)#  define DPRINTF(p) printf p#else#  define DPRINTF(p) /*nothing*/#endif/* Include the internals header, which itself includes Standard C headers plusthe external pcre header. */#include "pcre_internal.h"/* Allow compilation as C++ source code, should anybody want to do that. */#ifdef __cplusplus#define class pcre_class#endif/* Maximum number of items on the nested bracket stacks at compile time. Thisapplies to the nesting of all kinds of parentheses. It does not limitun-nested, non-capturing parentheses. This number can be made bigger ifnecessary - it is used to dimension one int and one unsigned char vector atcompile time. */#define BRASTACK_SIZE 200/* The number of bytes in a literal character string above which we can't addany more is different when UTF-8 characters may be encountered. */#ifdef SUPPORT_UTF8#define MAXLIT 250#else#define MAXLIT 255#endif/* Min and max values for the common repeats; for the maxima, 0 => infinity */static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };/* Text forms of OP_ values and things, for debugging (not all used) */#ifdef DEBUGstatic const char *OP_names[] = {  "End", "\\A", "\\B", "\\b", "\\D", "\\d",  "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  "Opt", "^", "$", "Any", "chars", "not",  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  "*", "*?", "+", "+?", "?", "??", "{", "{",  "class", "Ref", "Recurse",  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  "Brazero", "Braminzero", "Branumber", "Bra"};#endif/* Table for handling escaped characters in the range '0'-'z'. Positive returnsare simple data values; negative values are for special things like \d and soon. Zero means further processing is needed (for things like \x), or the escapeis invalid. */static const short int escapes[] = {    0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */    0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */  '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */    0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */    0,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  '`',      7, -ESC_b,      0, -ESC_d,  ESC_E,  ESC_F,      0,   /* ` - g */    0,      0,      0,      0,      0,      0,  ESC_N,      0,   /* h - o */    0,      0,  ESC_R, -ESC_s,  ESC_T,      0,      0, -ESC_w,   /* p - w */    0,      0, -ESC_z                                            /* x - z */};/* Tables of names of POSIX character classes and their lengths. The list isterminated by a zero length entry. The first three must be alpha, upper, lower,as this is assumed for handling case independence. */static const char *posix_names[] = {  "alpha", "lower", "upper",  "alnum", "ascii", "cntrl", "digit", "graph",  "print", "punct", "space", "word",  "xdigit" };static const uschar posix_name_lengths[] = {  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };/* Table of class bit maps for each POSIX class; up to three may be combinedto form the class. */static const int posix_class_maps[] = {  cbit_lower, cbit_upper, -1,             /* alpha */  cbit_lower, -1,         -1,             /* lower */  cbit_upper, -1,         -1,             /* upper */  cbit_digit, cbit_lower, cbit_upper,     /* alnum */  cbit_print, cbit_cntrl, -1,             /* ascii */  cbit_cntrl, -1,         -1,             /* cntrl */  cbit_digit, -1,         -1,             /* digit */  cbit_graph, -1,         -1,             /* graph */  cbit_print, -1,         -1,             /* print */  cbit_punct, -1,         -1,             /* punct */  cbit_space, -1,         -1,             /* space */  cbit_word,  -1,         -1,             /* word */  cbit_xdigit,-1,         -1              /* xdigit */};/* Definition to allow mutual recursion */static BOOL  compile_regex(int, int, int *, uschar **, const uschar **, const char **,    BOOL, int, int *, int *, compile_data *);/* Structure for building a chain of data that actually lives on thestack, for holding the values of the subject pointer at the start of eachsubpattern, so as to detect when an empty string has been matched by asubpattern - to break infinite loops. */typedef struct eptrblock {  struct eptrblock *prev;  const uschar *saved_eptr;} eptrblock;/* Flag bits for the match() function */#define match_condassert   0x01    /* Called to check a condition assertion */#define match_isgroup      0x02    /* Set if start of bracketed group *//**************************************************               Global variables                 **************************************************//* PCRE is thread-clean and doesn't use any global variables in the normalsense. However, it calls memory allocation and free functions via the twoindirections below, which are can be changed by the caller, but are sharedbetween all threads. */void *(*pcre_malloc)(size_t) = malloc;void  (*pcre_free)(void *) = free;/**************************************************    Macros and tables for character handling    **************************************************//* When UTF-8 encoding is being used, a character is no longer just a singlebyte. The macros for character handling generate simple sequences when used inbyte-mode, and more complicated ones for UTF-8 characters. */#ifndef SUPPORT_UTF8#define GETCHARINC(c, eptr) c = *eptr++;#define GETCHARLEN(c, eptr, len) c = *eptr;#define BACKCHAR(eptr)#else   /* SUPPORT_UTF8 *//* Get the next UTF-8 character, advancing the pointer */#define GETCHARINC(c, eptr) \  c = *eptr++; \  if (md->utf8 && (c & 0xc0) == 0xc0) \    { \    int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int s = 6*a; \    c = (c & utf8_table3[a]) << s; \    while (a-- > 0) \      { \      s -= 6; \      c |= (*eptr++ & 0x3f) << s; \      } \    }/* Get the next UTF-8 character, not advancing the pointer, setting length */#define GETCHARLEN(c, eptr, len) \  c = *eptr; \  len = 1; \  if (md->utf8 && (c & 0xc0) == 0xc0) \    { \    int i; \    int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int s = 6*a; \    c = (c & utf8_table3[a]) << s; \    for (i = 1; i <= a; i++) \      { \      s -= 6; \      c |= (eptr[i] & 0x3f) << s; \      } \    len += a; \    }/* If the pointer is not at the start of a character, move it back untilit is. */#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;#endif/**************************************************             Default character tables           **************************************************//* A default set of character tables is included in the PCRE binary. Its sourceis built by the maketables auxiliary program, which uses the default C ctypesfunctions, and put in the file chartables.c. These tables are used by PCREwhenever the caller of pcre_compile() does not provide an alternate set oftables. */#include "chartables.c"#ifdef SUPPORT_UTF8/**************************************************           Tables for UTF-8 support             **************************************************//* These are the breakpoints for different numbers of bytes in a UTF-8character. */static int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};/* These are the indicator bits and the mask for the data bits to set in thefirst byte of a character, indexed by the number of additional bytes. */static int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};/* Table of the number of extra characters, indexed by the first charactermasked with 0x3f. The highest number for a valid UTF-8 character is in fact0x3d. */static uschar utf8_table4[] = {  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };/**************************************************       Convert character value to UTF-8         **************************************************//* This function takes an integer value in the range 0 - 0x7fffffffand encodes it as a UTF-8 character in 0 to 6 bytes.Arguments:  cvalue     the character value  buffer     pointer to buffer for result - at least 6 bytes longReturns:     number of characters placed in the buffer*/static intord2utf8(int cvalue, uschar *buffer){register int i, j;for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  if (cvalue <= utf8_table1[i]) break;buffer += i;for (j = i; j > 0; j--) { *buffer-- = 0x80 | (cvalue & 0x3f); cvalue >>= 6; }*buffer = utf8_table2[i] | cvalue;return i + 1;}#endif/**************************************************          Return version string                 **************************************************/#define STRING(a)  # a#define XSTRING(s) STRING(s)const char *pcre_version(void){return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);}/************************************************** (Obsolete) Return info about compiled pattern  **************************************************//* This is the original "info" function. It picks potentially useful data outof the private structure, but its interface was too rigid. It remains forbackwards compatibility. The public options are passed back in an int - thoughthe re->options field has been expanded to a long int, all the public optionsat the low end of it, and so even on 16-bit systems this will still be OK.Therefore, I haven't changed the API for pcre_info().Arguments:  external_re   points to compiled code  optptr        where to pass back the options  first_char    where to pass back the first character,                or -1 if multiline and all branches start ^,                or -2 otherwiseReturns:        number of capturing subpatterns                or negative values on error*/intpcre_info(const pcre *external_re, int *optptr, int *first_char){const real_pcre *re = (const real_pcre *)external_re;if (re == NULL) return PCRE_ERROR_NULL;if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);if (first_char != NULL)  *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;return re->top_bracket;}/**************************************************        Return info about compiled pattern      **************************************************//* This is a newer "info" function which has an extensible interface sothat additional items can be added compatibly.Arguments:  external_re      points to compiled code  external_study   points to study data, or NULL  what             what information is required  where            where to put the informationReturns:           0 if data returned, negative on error*/intpcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,  void *where){const real_pcre *re = (const real_pcre *)external_re;const real_pcre_extra *study = (const real_pcre_extra *)study_data;if (re == NULL || where == NULL) return PCRE_ERROR_NULL;if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;switch (what)  {  case PCRE_INFO_OPTIONS:  *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;  break;  case PCRE_INFO_SIZE:  *((size_t *)where) = re->size;  break;  case PCRE_INFO_CAPTURECOUNT:  *((int *)where) = re->top_bracket;  break;  case PCRE_INFO_BACKREFMAX:  *((int *)where) = re->top_backref;  break;  case PCRE_INFO_FIRSTCHAR:  *((int *)where) =    ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;  break;  case PCRE_INFO_FIRSTTABLE:  *((const uschar **)where) =    (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?      study->start_bits : NULL;  break;  case PCRE_INFO_LASTLITERAL:  *((int *)where) =    ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;  break;  default: return PCRE_ERROR_BADOPTION;  }return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -