📄 pcre_internal.h
字号:
/************************************************** Perl-Compatible Regular Expressions **************************************************//* PCRE is a library of functions to support regular expressions whose syntaxand semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Copyright (c) 1997-2008 University of Cambridge-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This header contains definitions that are shared between the differentmodules, but which are not relevant to the exported API. This includes somefunctions whose names all begin with "_pcre_". */#ifndef PCRE_INTERNAL_H#define PCRE_INTERNAL_H/* Define DEBUG to get debugging output on stdout. */#if 0#define DEBUG#endif/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdefinline, and there are *still* stupid compilers about that don't like indentedpre-processor statements, or at least there were when I first wrote this. Afterall, it had only been about 10 years then...It turns out that the Mac Debugging.h header also defines the macro DPRINTF, sobe absolutely sure we get our version. */#undef DPRINTF#ifdef DEBUG#define DPRINTF(p) printf p#else#define DPRINTF(p) /* Nothing */#endif/* Standard C headers plus the external interface definition. The only timesetjmp and stdarg are used is when NO_RECURSE is set. */#include <ctype.h>#include <limits.h>#include <setjmp.h>#include <stdarg.h>#include <stddef.h>#include <stdio.h>#include <stdlib.h>#include <string.h>/* When compiling a DLL for Windows, the exported symbols have to be declaredusing some MS magic. I found some useful information on this web page:http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to theinformation there, using __declspec(dllexport) without "extern" we have adefinition; with "extern" we have a declaration. The settings here override thesetting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,which is all that is needed for applications (they just import the symbols). Weuse: PCRE_EXP_DECL for declarations PCRE_EXP_DEFN for definitions of exported functions PCRE_EXP_DATA_DEFN for definitions of exported variablesThe reason for the two DEFN macros is that in non-Windows environments, onedoes not want to have "extern" before variable definitions because it leads tocompiler warnings. So we distinguish between functions and variables. InWindows, the two should always be the same.The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,which is an application, but needs to import this file in order to "peek" atinternals, can #include pcre.h first to get an application's-eye view.In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,special-purpose environments) might want to stick other stuff in front ofexported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN andPCRE_EXP_DATA_DEFN only if they are not already set. */#ifndef PCRE_EXP_DECL# ifdef _WIN32# ifndef PCRE_STATIC# define PCRE_EXP_DECL extern __declspec(dllexport)# define PCRE_EXP_DEFN __declspec(dllexport)# define PCRE_EXP_DATA_DEFN __declspec(dllexport)# else# define PCRE_EXP_DECL extern# define PCRE_EXP_DEFN# define PCRE_EXP_DATA_DEFN# endif# else# ifdef __cplusplus# define PCRE_EXP_DECL extern "C"# else# define PCRE_EXP_DECL extern# endif# ifndef PCRE_EXP_DEFN# define PCRE_EXP_DEFN PCRE_EXP_DECL# endif# ifndef PCRE_EXP_DATA_DEFN# define PCRE_EXP_DATA_DEFN# endif# endif#endif/* We need to have types that specify unsigned 16-bit and 32-bit integers. Wecannot determine these outside the compilation (e.g. by running a program aspart of "configure") because PCRE is often cross-compiled for use on othersystems. Instead we make use of the maximum sizes that are available atpreprocessor time in standard C environments. */#if USHRT_MAX == 65535 typedef unsigned short pcre_uint16;#elif UINT_MAX == 65535 typedef unsigned int pcre_uint16;#else #error Cannot determine a type for 16-bit unsigned integers#endif#if UINT_MAX == 4294967295 typedef unsigned int pcre_uint32;#elif ULONG_MAX == 4294967295 typedef unsigned long int pcre_uint32;#else #error Cannot determine a type for 32-bit unsigned integers#endif/* All character handling must be done as unsigned characters. Otherwise thereare problems with top-bit-set characters and functions such as isspace().However, we leave the interface to the outside world as char *, because thatshould make things easier for callers. We define a short type for unsigned charto save lots of typing. I tried "uchar", but it causes problems on DigitalUnix, where it is defined in sys/types, so use "uschar" instead. */typedef unsigned char uschar;/* This is an unsigned int value that no character can ever have. UTF-8characters only go up to 0x7fffffff (though Unicode doesn't go beyond0x0010ffff). */#define NOTACHAR 0xffffffff/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,"any" and "anycrlf" at present). The following macros are used to package uptesting for newlines. NLBLOCK, PSSTART, and PSEND are defined in the variousmodules to indicate in which datablock the parameters exist, and what thestart/end of string field names are. */#define NLTYPE_FIXED 0 /* Newline is a fixed length string */#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF *//* This macro checks for a newline at the given position */#define IS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) < NLBLOCK->PSEND && \ _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ utf8)) \ : \ ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ (p)[0] == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \ ) \ )/* This macro checks for a newline immediately preceding the given position */#define WAS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) > NLBLOCK->PSSTART && \ _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ &(NLBLOCK->nllen), utf8)) \ : \ ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \ ) \ )/* When PCRE is compiled as a C++ library, the subject pointer can be replacedwith a custom type. This makes it possible, for example, to allow pcre_exec()to process subject strings that are discontinuous by using a smart pointerclass. It must always be possible to inspect all of the subject string inpcre_exec() because of the way it backtracks. Two macros are required in thenormal case, for sign-unspecified and unsigned char pointers. The former isused for the external interface and appears in pcre.h, which is why its namemust begin with PCRE_. */#ifdef CUSTOM_SUBJECT_PTR#define PCRE_SPTR CUSTOM_SUBJECT_PTR#define USPTR CUSTOM_SUBJECT_PTR#else#define PCRE_SPTR const char *#define USPTR const unsigned char *#endif/* Include the public PCRE header and the definitions of UCP character propertyvalues. */#include "pcre.h"#include "ucp.h"/* When compiling for use with the Virtual Pascal compiler, these functionsneed to have their names changed. PCRE must be compiled with the -DVPCOMPAToption on the command line. */#ifdef VPCOMPAT#define strlen(s) _strlen(s)#define strncmp(s1,s2,m) _strncmp(s1,s2,m)#define memcmp(s,c,n) _memcmp(s,c,n)#define memcpy(d,s,n) _memcpy(d,s,n)#define memmove(d,s,n) _memmove(d,s,n)#define memset(s,c,n) _memset(s,c,n)#else /* VPCOMPAT *//* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPYis set. Otherwise, include an emulating function for those systems that haveneither (there some non-Unix environments where this is the case). */#ifndef HAVE_MEMMOVE#undef memmove /* some systems may have a macro */#ifdef HAVE_BCOPY#define memmove(a, b, c) bcopy(b, a, c)#else /* HAVE_BCOPY */static void *pcre_memmove(void *d, const void *s, size_t n){size_t i;unsigned char *dest = (unsigned char *)d;const unsigned char *src = (const unsigned char *)s;if (dest > src) { dest += n; src += n; for (i = 0; i < n; ++i) *(--dest) = *(--src); return (void *)dest; }else { for (i = 0; i < n; ++i) *dest++ = *src++; return (void *)(dest - n); }}#define memmove(a, b, c) pcre_memmove(a, b, c)#endif /* not HAVE_BCOPY */#endif /* not HAVE_MEMMOVE */#endif /* not VPCOMPAT *//* PCRE keeps offsets in its compiled code as 2-byte quantities (always storedin big-endian order) by default. These are used, for example, to link from thestart of a subpattern to its alternatives and its end. The use of 2 bytes per
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -