⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.c

📁 SDL文件。SDL_ERROwenjian.....
💻 C
📖 第 1 页 / 共 5 页
字号:
/**************************************************      Perl-Compatible Regular Expressions       **************************************************//* PCRE is a library of functions to support regular expressions whose syntaxand semantics are as close as possible to those of the Perl 5 language.                       Written by Philip Hazel           Copyright (c) 1997-2007 University of Cambridge-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met:    * Redistributions of source code must retain the above copyright notice,      this list of conditions and the following disclaimer.    * Redistributions in binary form must reproduce the above copyright      notice, this list of conditions and the following disclaimer in the      documentation and/or other materials provided with the distribution.    * Neither the name of the University of Cambridge nor the names of its      contributors may be used to endorse or promote products derived from      this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This module contains pcre_exec(), the externally visible function that doespattern matching using an NFA algorithm, trying to mimic Perl as closely aspossible. There are also some static supporting functions. */#ifdef HAVE_CONFIG_H#include "config.h"#endif#define NLBLOCK md             /* Block containing newline information */#define PSSTART start_subject  /* Field containing processed string start */#define PSEND   end_subject    /* Field containing processed string end */#include "pcre_internal.h"/* Undefine some potentially clashing cpp symbols */#undef min#undef max/* Flag bits for the match() function */#define match_condassert     0x01  /* Called to check a condition assertion */#define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group *//* Non-error returns from the match() function. Error returns are externallydefined PCRE_ERROR_xxx codes, which are all negative. */#define MATCH_MATCH        1#define MATCH_NOMATCH      0/* Special internal returns from the match() function. Make them sufficientlynegative to avoid the external error codes. */#define MATCH_COMMIT       (-999)#define MATCH_PRUNE        (-998)#define MATCH_SKIP         (-997)#define MATCH_THEN         (-996)/* Maximum number of ints of offset to save on the stack for recursive calls.If the offset vector is bigger, malloc is used. This should be a multiple of 3,because the offset vector is always a multiple of 3 long. */#define REC_STACK_SAVE_MAX 30/* Min and max values for the common repeats; for the maxima, 0 => infinity */static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };#ifdef DEBUG/**************************************************        Debugging function to print chars       **************************************************//* Print a sequence of chars in printable format, stopping at the end of thesubject if the requested.Arguments:  p           points to characters  length      number to print  is_subject  TRUE if printing from within md->start_subject  md          pointer to matching data block, if is_subject is TRUEReturns:     nothing*/static voidpchars(const uschar *p, int length, BOOL is_subject, match_data *md){unsigned int c;if (is_subject && length > md->end_subject - p) length = md->end_subject - p;while (length-- > 0)  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);}#endif/**************************************************          Match a back-reference                **************************************************//* If a back reference hasn't been set, the length that is passed is greaterthan the number of characters left in the string, so the match fails.Arguments:  offset      index into the offset vector  eptr        points into the subject  length      length to be matched  md          points to match data block  ims         the ims flagsReturns:      TRUE if matched*/static BOOLmatch_ref(int offset, register USPTR eptr, int length, match_data *md,  unsigned long int ims){USPTR p = md->start_subject + md->offset_vector[offset];#ifdef DEBUGif (eptr >= md->end_subject)  printf("matching subject <null>");else  {  printf("matching subject ");  pchars(eptr, length, TRUE, md);  }printf(" against backref ");pchars(p, length, FALSE, md);printf("\n");#endif/* Always fail if not enough characters left */if (length > md->end_subject - eptr) return FALSE;/* Separate the caselesss case for speed */if ((ims & PCRE_CASELESS) != 0)  {  while (length-- > 0)    if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;  }else  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }return TRUE;}/*******************************************************************************************************************************************************                   RECURSION IN THE match() FUNCTIONThe match() function is highly recursive, though not every recursive callincreases the recursive depth. Nevertheless, some regular expressions can causeit to recurse to a great depth. I was writing for Unix, so I just let it callitself recursively. This uses the stack for saving everything that has to besaved for a recursive call. On Unix, the stack can be large, and this worksfine.It turns out that on some non-Unix-like systems there are problems withprograms that use a lot of stack. (This despite the fact that every last chiphas oodles of memory these days, and techniques for extending the stack havebeen known for decades.) So....There is a fudge, triggered by defining NO_RECURSE, which avoids recursivecalls by keeping local variables that need to be preserved in blocks of memoryobtained from malloc() instead instead of on the stack. Macros are used toachieve this so that the actual code doesn't look very different to what italways used to.The original heap-recursive code used longjmp(). However, it seems that thiscan be very slow on some operating systems. Following a suggestion from StanSwitzer, the use of longjmp() has been abolished, at the cost of having toprovide a unique number for each call to RMATCH. There is no way of generatinga sequence of numbers at compile time in C. I have given them names, to makethem stand out more clearly.Crude tests on x86 Linux show a small speedup of around 5-8%. However, onFreeBSD, avoiding longjmp() more than halves the time taken to run the standardtests. Furthermore, not using longjmp() means that local dynamic variablesdon't have indeterminate values; this has meant that the frame size can bereduced because the result can be "passed back" by straight setting of thevariable instead of being passed in the frame.*******************************************************************************************************************************************************//* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURNbelow must be updated in sync.  */enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,       RM51,  RM52, RM53, RM54 };/* These versions of the macros use the stack, as normal. There are debuggingversions and production versions. Note that the "rw" argument of RMATCH isn'tactuall used in this definition. */#ifndef NO_RECURSE#define REGISTER register#ifdef DEBUG#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  { \  printf("match() called in line %d\n", __LINE__); \  rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \  printf("to line %d\n", __LINE__); \  }#define RRETURN(ra) \  { \  printf("match() returned %d from line %d ", ra, __LINE__); \  return ra; \  }#else#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)#define RRETURN(ra) return ra#endif#else/* These versions of the macros manage a private stack on the heap. Note thatthe "rd" argument of RMATCH isn't actually used in this definition. It's the mdargument of match(), which never changes. */#define REGISTER#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  {\  heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\  frame->Xwhere = rw; \  newframe->Xeptr = ra;\  newframe->Xecode = rb;\  newframe->Xmstart = mstart;\  newframe->Xoffset_top = rc;\  newframe->Xims = re;\  newframe->Xeptrb = rf;\  newframe->Xflags = rg;\  newframe->Xrdepth = frame->Xrdepth + 1;\  newframe->Xprevframe = frame;\  frame = newframe;\  DPRINTF(("restarting from line %d\n", __LINE__));\  goto HEAP_RECURSE;\  L_##rw:\  DPRINTF(("jumped back to line %d\n", __LINE__));\  }#define RRETURN(ra)\  {\  heapframe *newframe = frame;\  frame = newframe->Xprevframe;\  (pcre_stack_free)(newframe);\  if (frame != NULL)\    {\    rrc = ra;\    goto HEAP_RETURN;\    }\  return ra;\  }/* Structure for remembering the local variables in a private frame */typedef struct heapframe {  struct heapframe *Xprevframe;  /* Function arguments that may change */  const uschar *Xeptr;  const uschar *Xecode;  const uschar *Xmstart;  int Xoffset_top;  long int Xims;  eptrblock *Xeptrb;  int Xflags;  unsigned int Xrdepth;  /* Function local variables */  const uschar *Xcallpat;  const uschar *Xcharptr;  const uschar *Xdata;  const uschar *Xnext;  const uschar *Xpp;  const uschar *Xprev;  const uschar *Xsaved_eptr;  recursion_info Xnew_recursive;  BOOL Xcur_is_word;  BOOL Xcondition;  BOOL Xprev_is_word;  unsigned long int Xoriginal_ims;#ifdef SUPPORT_UCP  int Xprop_type;  int Xprop_value;  int Xprop_fail_result;  int Xprop_category;  int Xprop_chartype;  int Xprop_script;  int Xoclength;  uschar Xocchars[8];#endif  int Xctype;  unsigned int Xfc;  int Xfi;  int Xlength;  int Xmax;  int Xmin;  int Xnumber;  int Xoffset;  int Xop;  int Xsave_capture_last;  int Xsave_offset1, Xsave_offset2, Xsave_offset3;  int Xstacksave[REC_STACK_SAVE_MAX];  eptrblock Xnewptrb;  /* Where to jump back to */  int Xwhere;} heapframe;#endif/******************************************************************************************************************************************************//**************************************************         Match from current position            **************************************************//* This function is called recursively in many circumstances. Whenever itreturns a negative (error) response, the outer incarnation must also return thesame response.Performance note: It might be tempting to extract commonly used fields from themd structure (e.g. utf8, end_subject) into individual variables to improveperformance. Tests using gcc on a SPARC disproved this; in the first case, itmade performance worse.Arguments:   eptr        pointer to current character in subject   ecode       pointer to current position in compiled code   mstart      pointer to the current match start position (can be modified                 by encountering \K)   offset_top  current top pointer   md          pointer to "static" info for the match   ims         current /i, /m, and /s options   eptrb       pointer to chain of blocks containing eptr at start of                 brackets - for testing for empty matches   flags       can contain                 match_condassert - this is an assertion condition                 match_cbegroup - this is the start of an unlimited repeat                   group that can match an empty string   rdepth      the recursion depthReturns:       MATCH_MATCH if matched            )  these values are >= 0               MATCH_NOMATCH if failed to match  )               a negative PCRE_ERROR_xxx value if aborted by an error condition                 (e.g. stopped by repeated call or recursion limit)*/static intmatch(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,  int flags, unsigned int rdepth){/* These variables do not need to be preserved over recursion in this function,so they can be ordinary variables in all cases. Mark some of them with"register" because they are used a lot in loops. */register int  rrc;         /* Returns from recursive calls */register int  i;           /* Used for loops not involving calls to RMATCH() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -