首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
pcre.c

来自「ncbi源码」· C语言代码 · 共 2,222 行 · 第 1/5 页
2,222 行
#ifdef DEBUG/**************************************************        Debugging function to print chars       **************************************************//* Print a sequence of chars in printable format, stopping at the end of thesubject if the requested.Arguments:  p           points to characters  length      number to print  is_subject  TRUE if printing from within md->start_subject  md          pointer to matching data block, if is_subject is TRUEReturns:     nothing*/static voidpchars(const uschar *p, int length, BOOL is_subject, match_data *md){int c;if (is_subject && length > md->end_subject - p) length = md->end_subject - p;while (length-- > 0)  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);}#endif/**************************************************            Handle escapes                      **************************************************//* This function is called when a \ has been encountered. It either returns apositive value for a simple escape such as \n, or a negative value whichencodes one of the more complicated things such as \d. When UTF-8 is enabled,a positive value greater than 255 may be returned. On entry, ptr is pointing atthe \. On exit, it is on the final character of the escape sequence.Arguments:  ptrptr     points to the pattern position pointer  errorptr   points to the pointer to the error message  bracount   number of previous extracting brackets  options    the options bits  isclass    TRUE if inside a character class  cd         pointer to char tables blockReturns:     zero or positive => a data character             negative => a special escape sequence             on error, errorptr is set*/static intcheck_escape(const uschar **ptrptr, const char **errorptr, int bracount,  int options, BOOL isclass, compile_data *cd){const uschar *ptr = *ptrptr;int c, i;/* If backslash is at the end of the pattern, it's an error. */c = *(++ptr);if (c == 0) *errorptr = ERR1;/* Digits or letters may have special meaning; all others are literals. */else if (c < '0' || c > 'z') {}/* Do an initial lookup in a table. A non-zero result is something that can bereturned immediately. Otherwise further processing may be required. */else if ((i = escapes[c - '0']) != 0) c = i;/* Escapes that need further processing, or are illegal. */else  {  const uschar *oldptr;  switch (c)    {    /* The handling of escape sequences consisting of a string of digits    starting with one that is not zero is not straightforward. By experiment,    the way Perl works seems to be as follows:    Outside a character class, the digits are read as a decimal number. If the    number is less than 10, or if there are that many previous extracting    left brackets, then it is a back reference. Otherwise, up to three octal    digits are read to form an escaped byte. Thus \123 is likely to be octal    123 (cf \0123, which is octal 012 followed by the literal 3). If the octal    value is greater than 377, the least significant 8 bits are taken. Inside a    character class, \ followed by a digit is always an octal number. */    case '1': case '2': case '3': case '4': case '5':    case '6': case '7': case '8': case '9':    if (!isclass)      {      oldptr = ptr;      c -= '0';      while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)        c = c * 10 + *(++ptr) - '0';      if (c < 10 || c <= bracount)        {        c = -(ESC_REF + c);        break;        }      ptr = oldptr;      /* Put the pointer back and fall through */      }    /* Handle an octal number following \. If the first digit is 8 or 9, Perl    generates a binary zero byte and treats the digit as a following literal.    Thus we have to pull back the pointer by one. */    if ((c = *ptr) >= '8')      {      ptr--;      c = 0;      break;      }    /* \0 always starts an octal number, but we may drop through to here with a    larger first octal digit. */    case '0':    c -= '0';    while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&      ptr[1] != '8' && ptr[1] != '9')        c = c * 8 + *(++ptr) - '0';    c &= 255;     /* Take least significant 8 bits */    break;    /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number    which can be greater than 0xff, but only if the ddd are hex digits. */    case 'x':#ifdef SUPPORT_UTF8    if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)      {      const uschar *pt = ptr + 2;      register int count = 0;      c = 0;      while ((cd->ctypes[*pt] & ctype_xdigit) != 0)        {        count++;        c = c * 16 + cd->lcc[*pt] -          (((cd->ctypes[*pt] & ctype_digit) != 0)? '0' : 'W');        pt++;        }      if (*pt == '}')        {        if (c < 0 || count > 8) *errorptr = ERR34;        ptr = pt;        break;        }      /* If the sequence of hex digits does not end with '}', then we don't      recognize this construct; fall through to the normal \x handling. */      }#endif    /* Read just a single hex char */    c = 0;    while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)      {      ptr++;      c = c * 16 + cd->lcc[*ptr] -        (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');      }    break;    /* Other special escapes not starting with a digit are straightforward */    case 'c':    c = *(++ptr);    if (c == 0)      {      *errorptr = ERR2;      return 0;      }    /* A letter is upper-cased; then the 0x40 bit is flipped */    if (c >= 'a' && c <= 'z') c = cd->fcc[c];    c ^= 0x40;    break;    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any    other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,    for Perl compatibility, it is a literal. This code looks a bit odd, but    there used to be some cases other than the default, and there may be again    in future, so I haven't "optimized" it. */    default:    if ((options & PCRE_EXTRA) != 0) switch(c)      {      default:      *errorptr = ERR3;      break;      }    break;    }  }*ptrptr = ptr;return c;}/**************************************************            Check for counted repeat            **************************************************//* This function is called when a '{' is encountered in a place where it mightstart a quantifier. It looks ahead to see if it really is a quantifier or not.It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}where the ddds are digits.Arguments:  p         pointer to the first char after '{'  cd        pointer to char tables blockReturns:    TRUE or FALSE*/static BOOLis_counted_repeat(const uschar *p, compile_data *cd){if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;while ((cd->ctypes[*p] & ctype_digit) != 0) p++;if (*p == '}') return TRUE;if (*p++ != ',') return FALSE;if (*p == '}') return TRUE;if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;while ((cd->ctypes[*p] & ctype_digit) != 0) p++;return (*p == '}');}/**************************************************         Read repeat counts                     **************************************************//* Read an item of the form {n,m} and return the values. This is called onlyafter is_counted_repeat() has confirmed that a repeat-count quantifier exists,so the syntax is guaranteed to be correct, but we need to check the values.Arguments:  p          pointer to first char after '{'  minp       pointer to int for min  maxp       pointer to int for max             returned as -1 if no max  errorptr   points to pointer to error message  cd         pointer to character tables clockReturns:     pointer to '}' on success;             current ptr on error, with errorptr set*/static const uschar *read_repeat_counts(const uschar *p, int *minp, int *maxp,  const char **errorptr, compile_data *cd){int min = 0;int max = -1;while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';if (*p == '}') max = min; else  {  if (*(++p) != '}')    {    max = 0;    while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';    if (max < min)      {      *errorptr = ERR4;      return p;      }    }  }/* Do paranoid checks, then fill in the required variables, and pass back thepointer to the terminating '}'. */if (min > 65535 || max > 65535)  *errorptr = ERR5;else  {  *minp = min;  *maxp = max;  }return p;}/**************************************************        Find the fixed length of a pattern      **************************************************//* Scan a pattern and compute the fixed length of subject that will match it,if the length is fixed. This is needed for dealing with backward assertions.Arguments:  code     points to the start of the pattern (the bracket)  options  the compiling optionsReturns:   the fixed length, or -1 if there is no fixed length*/static intfind_fixedlength(uschar *code, int options){int length = -1;register int branchlength = 0;register uschar *cc = code + 3;/* Scan along the opcodes for this branch. If we get to the end of thebranch, check the length against that of the other branches. */for (;;)  {  int d;  register int op = *cc;  if (op >= OP_BRA) op = OP_BRA;  switch (op)    {    case OP_BRA:    case OP_ONCE:    case OP_COND:    d = find_fixedlength(cc, options);    if (d < 0) return -1;    branchlength += d;    do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);    cc += 3;    break;    /* Reached end of a branch; if it's a ket it is the end of a nested    call. If it's ALT it is an alternation in a nested call. If it is    END it's the end of the outer call. All can be handled by the same code. */    case OP_ALT:    case OP_KET:    case OP_KETRMAX:    case OP_KETRMIN:    case OP_END:    if (length < 0) length = branchlength;      else if (length != branchlength) return -1;    if (*cc != OP_ALT) return length;    cc += 3;    branchlength = 0;    break;    /* Skip over assertive subpatterns */    case OP_ASSERT:    case OP_ASSERT_NOT:    case OP_ASSERTBACK:    case OP_ASSERTBACK_NOT:    do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);    cc += 3;    break;    /* Skip over things that don't match chars */    case OP_REVERSE:    case OP_BRANUMBER:    case OP_CREF:    cc++;    /* Fall through */    case OP_OPT:    cc++;    /* Fall through */    case OP_SOD:    case OP_EOD:    case OP_EODN:    case OP_CIRC:    case OP_DOLL:    case OP_NOT_WORD_BOUNDARY:    case OP_WORD_BOUNDARY:    cc++;    break;    /* Handle char strings. In UTF-8 mode we must count characters, not bytes.    This requires a scan of the string, unfortunately. We assume valid UTF-8    strings, so all we do is reduce the length by one for byte whose bits are    10xxxxxx. */    case OP_CHARS:    branchlength += *(++cc);#ifdef SUPPORT_UTF8    for (d = 1; d <= *cc; d++)      if ((cc[d] & 0xc0) == 0x80) branchlength--;#endif    cc += *cc + 1;    break;    /* Handle exact repetitions */    case OP_EXACT:    case OP_TYPEEXACT:    branchlength += (cc[1] << 8) + cc[2];    cc += 4;    break;    /* Handle single-char matchers */    case OP_NOT_DIGIT:    case OP_DIGIT:    case OP_NOT_WHITESPACE:    case OP_WHITESPACE:    case OP_NOT_WORDCHAR:    case OP_WORDCHAR:    case OP_ANY:    branchlength++;    cc++;    break;    /* Check a class for variable quantification */    case OP_CLASS:    cc += 33;    switch (*cc)      {      case OP_CRSTAR:      case OP_CRMINSTAR:      case OP_CRQUERY:      case OP_CRMINQUERY:      return -1;      case OP_CRRANGE:      case OP_CRMINRANGE:
pcre.c - 源码说明

本页面展示了「ncbi源码」中的 pcre.c 源码文件，采用 C语言编程语言编写，共 2,222 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?