⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gnuregex.c

📁 -
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * $Id: GNUregex.c,v 1.11 1998/09/23 17:14:20 wessels Exp $ *//* Extended regular expression matching and search library, * version 0.12. * (Implements POSIX draft P10003.2/D11.2, except for * internationalization features.) *  * Copyright (C) 1993 Free Software Foundation, Inc. *  * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. *  * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *  * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.  *//* AIX requires this to be the first thing in the file. */#if defined (_AIX) && !defined (REGEX_MALLOC)#pragma alloca#endif#ifndef _GNU_SOURCE#define _GNU_SOURCE 1#endif#include "config.h"#if !HAVE_ALLOCA#define REGEX_MALLOC 1#endif/* The `emacs' switch turns on certain matching commands * that make sense only in Emacs. */#ifdef emacs#include "lisp.h"#include "buffer.h"#include "syntax.h"/* Emacs uses `NULL' as a predicate.  */#undef NULL#else /* not emacs *//* We used to test for `BSTRING' here, but only GCC and Emacs define * `BSTRING', as far as I know, and neither of them use this code.  */#if HAVE_STRING_H || STDC_HEADERS#include <string.h>#else#include <strings.h>#endif#ifdef STDC_HEADERS#include <stdlib.h>#elsechar *malloc();char *realloc();#endif/* Define the syntax stuff for \<, \>, etc.  *//* This must be nonzero for the wordchar and notwordchar pattern * commands in re_match_2.  */#ifndef Sword#define Sword 1#endif#ifdef SYNTAX_TABLEextern char *re_syntax_table;#else /* not SYNTAX_TABLE *//* How many characters in the character set.  */#define CHAR_SET_SIZE 256static char re_syntax_table[CHAR_SET_SIZE];static voidinit_syntax_once(){    register int c;    static int done = 0;    if (done)	return;    memset(re_syntax_table, 0, sizeof re_syntax_table);    for (c = 'a'; c <= 'z'; c++)	re_syntax_table[c] = Sword;    for (c = 'A'; c <= 'Z'; c++)	re_syntax_table[c] = Sword;    for (c = '0'; c <= '9'; c++)	re_syntax_table[c] = Sword;    re_syntax_table['_'] = Sword;    done = 1;}#endif /* not SYNTAX_TABLE */#define SYNTAX(c) re_syntax_table[c]#endif /* not emacs *//* Get the interface, including the syntax bits.  */#include "GNUregex.h"/* isalpha etc. are used for the character classes.  */#include <ctype.h>#ifndef isascii#define isascii(c) 1#endif#ifdef isblank#define ISBLANK(c) (isascii (c) && isblank (c))#else#define ISBLANK(c) ((c) == ' ' || (c) == '\t')#endif#ifdef isgraph#define ISGRAPH(c) (isascii (c) && isgraph (c))#else#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))#endif#define ISPRINT(c) (isascii (c) && isprint (c))#define ISDIGIT(c) (isascii (c) && isdigit (c))#define ISALNUM(c) (isascii (c) && isalnum (c))#define ISALPHA(c) (isascii (c) && isalpha (c))#define ISCNTRL(c) (isascii (c) && iscntrl (c))#define ISLOWER(c) (isascii (c) && islower (c))#define ISPUNCT(c) (isascii (c) && ispunct (c))#define ISSPACE(c) (isascii (c) && isspace (c))#define ISUPPER(c) (isascii (c) && isupper (c))#define ISXDIGIT(c) (isascii (c) && isxdigit (c))#ifndef NULL#define NULL 0#endif/* We remove any previous definition of `SIGN_EXTEND_CHAR', * since ours (we hope) works properly with all combinations of * machines, compilers, `char' and `unsigned char' argument types. * (Per Bothner suggested the basic approach.)  */#undef SIGN_EXTEND_CHAR#ifdef __STDC__#define SIGN_EXTEND_CHAR(c) ((signed char) (c))#else /* not __STDC__ *//* As in Harbison and Steele.  */#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)#endif/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we * use `alloca' instead of `malloc'.  This is because using malloc in * re_search* or re_match* could cause memory leaks when C-g is used in * Emacs; also, malloc is slower and causes storage fragmentation.  On * the other hand, malloc is more portable, and easier to debug.   *  * Because we sometimes use alloca, some routines have to be macros, * not functions -- `alloca'-allocated space disappears at the end of the * function it is called in.  */#ifdef REGEX_MALLOC#define REGEX_ALLOCATE malloc#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)#else /* not REGEX_MALLOC  *//* Emacs already defines alloca, sometimes.  */#ifndef alloca/* Make alloca work the best possible way.  */#ifdef __GNUC__#define alloca __builtin_alloca#else /* not __GNUC__ */#if HAVE_ALLOCA_H#include <alloca.h>#else /* not __GNUC__ or HAVE_ALLOCA_H */#ifndef _AIX			/* Already did AIX, up at the top.  */char *alloca();#endif /* not _AIX */#endif /* not HAVE_ALLOCA_H */#endif /* not __GNUC__ */#endif /* not alloca */#define REGEX_ALLOCATE alloca/* Assumes a `char *destination' variable.  */#define REGEX_REALLOCATE(source, osize, nsize)				\  (destination = (char *) alloca (nsize),				\   xmemcpy (destination, source, osize),				\   destination)#endif /* not REGEX_MALLOC *//* True if `size1' is non-NULL and PTR is pointing anywhere inside * `string1' or just past its end.  This works if PTR is NULL, which is * a good thing.  */#define FIRST_STRING_P(ptr) 					\  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)/* (Re)Allocate N items of type T using malloc, or fail.  */#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))#define BYTEWIDTH 8		/* In bits.  */#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))#define MAX(a, b) ((a) > (b) ? (a) : (b))#define MIN(a, b) ((a) < (b) ? (a) : (b))typedef char boolean;#define false 0#define true 1/* These are the command codes that appear in compiled regular * expressions.  Some opcodes are followed by argument bytes.  A * command code can specify any interpretation whatsoever for its * arguments.  Zero bytes may appear in the compiled regular expression. *  * The value of `exactn' is needed in search.c (search_buffer) in Emacs. * So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of * `exactn' we use here must also be 1.  */typedef enum {    no_op = 0,    /* Followed by one byte giving n, then by n literal bytes.  */    exactn = 1,    /* Matches any (more or less) character.  */    anychar,    /* Matches any one char belonging to specified set.  First     * following byte is number of bitmap bytes.  Then come bytes     * for a bitmap saying which chars are in.  Bits in each byte     * are ordered low-bit-first.  A character is in the set if its     * bit is 1.  A character too large to have a bit in the map is     * automatically not in the set.  */    charset,    /* Same parameters as charset, but match any character that is     * not one of those specified.  */    charset_not,    /* Start remembering the text that is matched, for storing in a     * register.  Followed by one byte with the register number, in     * the range 0 to one less than the pattern buffer's re_nsub     * field.  Then followed by one byte with the number of groups     * inner to this one.  (This last has to be part of the     * start_memory only because we need it in the on_failure_jump     * of re_match_2.)  */    start_memory,    /* Stop remembering the text that is matched and store it in a     * memory register.  Followed by one byte with the register     * number, in the range 0 to one less than `re_nsub' in the     * pattern buffer, and one byte with the number of inner groups,     * just like `start_memory'.  (We need the number of inner     * groups here because we don't have any easy way of finding the     * corresponding start_memory when we're at a stop_memory.)  */    stop_memory,    /* Match a duplicate of something remembered. Followed by one     * byte containing the register number.  */    duplicate,    /* Fail unless at beginning of line.  */    begline,    /* Fail unless at end of line.  */    endline,    /* Succeeds if at beginning of buffer (if emacs) or at beginning     * of string to be matched (if not).  */    begbuf,    /* Analogously, for end of buffer/string.  */    endbuf,    /* Followed by two byte relative address to which to jump.  */    jump,    /* Same as jump, but marks the end of an alternative.  */    jump_past_alt,    /* Followed by two-byte relative address of place to resume at     * in case of failure.  */    on_failure_jump,    /* Like on_failure_jump, but pushes a placeholder instead of the     * current string position when executed.  */    on_failure_keep_string_jump,    /* Throw away latest failure point and then jump to following     * two-byte relative address.  */    pop_failure_jump,    /* Change to pop_failure_jump if know won't have to backtrack to     * match; otherwise change to jump.  This is used to jump     * back to the beginning of a repeat.  If what follows this jump     * clearly won't match what the repeat does, such that we can be     * sure that there is no use backtracking out of repetitions     * already matched, then we change it to a pop_failure_jump.     * Followed by two-byte address.  */    maybe_pop_jump,    /* Jump to following two-byte address, and push a dummy failure     * point. This failure point will be thrown away if an attempt     * is made to use it for a failure.  A `+' construct makes this     * before the first repeat.  Also used as an intermediary kind     * of jump when compiling an alternative.  */    dummy_failure_jump,    /* Push a dummy failure point and continue.  Used at the end of     * alternatives.  */    push_dummy_failure,    /* Followed by two-byte relative address and two-byte number n.     * After matching N times, jump to the address upon failure.  */    succeed_n,    /* Followed by two-byte relative address, and two-byte number n.     * Jump to the address N times, then fail.  */    jump_n,    /* Set the following two-byte relative address to the     * subsequent two-byte number.  The address *includes* the two     * bytes of number.  */    set_number_at,    wordchar,			/* Matches any word-constituent character.  */    notwordchar,		/* Matches any char that is not a word-constituent.  */    wordbeg,			/* Succeeds if at word beginning.  */    wordend,			/* Succeeds if at word end.  */    wordbound,			/* Succeeds if at a word boundary.  */    notwordbound		/* Succeeds if not at a word boundary.  */#ifdef emacs    ,before_dot,		/* Succeeds if before point.  */    at_dot,			/* Succeeds if at point.  */    after_dot,			/* Succeeds if after point.  */    /* Matches any character whose syntax is specified.  Followed by     * a byte which contains a syntax code, e.g., Sword.  */    syntaxspec,    /* Matches any character whose syntax is not that specified.  */    notsyntaxspec#endif				/* emacs */} re_opcode_t;/* Common operations on the compiled pattern.  *//* Store NUMBER in two contiguous bytes starting at DESTINATION.  */#define STORE_NUMBER(destination, number)				\  do {									\    (destination)[0] = (number) & 0377;					\    (destination)[1] = (number) >> 8;					\  } while (0)/* Same as STORE_NUMBER, except increment DESTINATION to * the byte after where the number is stored.  Therefore, DESTINATION * must be an lvalue.  */#define STORE_NUMBER_AND_INCR(destination, number)			\  do {									\    STORE_NUMBER (destination, number);					\    (destination) += 2;							\  } while (0)/* Put into DESTINATION a number stored in two contiguous bytes starting * at SOURCE.  */#define EXTRACT_NUMBER(destination, source)				\  do {									\    (destination) = *(source) & 0377;					\    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\  } while (0)#ifdef DEBUGstatic voidextract_number(dest, source)     int *dest;     unsigned char *source;{    int temp = SIGN_EXTEND_CHAR(*(source + 1));    *dest = *source & 0377;    *dest += temp << 8;}#ifndef EXTRACT_MACROS		/* To debug the macros.  */#undef EXTRACT_NUMBER#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)#endif /* not EXTRACT_MACROS */#endif /* DEBUG *//* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. * SOURCE must be an lvalue.  */#define EXTRACT_NUMBER_AND_INCR(destination, source)			\  do {									\    EXTRACT_NUMBER (destination, source);				\    (source) += 2; 							\  } while (0)#ifdef DEBUGstatic voidextract_number_and_incr(destination, source)     int *destination;     unsigned char **source;{    extract_number(destination, *source);    *source += 2;}#ifndef EXTRACT_MACROS#undef EXTRACT_NUMBER_AND_INCR#define EXTRACT_NUMBER_AND_INCR(dest, src) \  extract_number_and_incr (&dest, &src)#endif /* not EXTRACT_MACROS */#endif /* DEBUG *//* If DEBUG is defined, Regex prints many voluminous messages about what * it is doing (if the variable `debug' is nonzero).  If linked with the * main program in `iregex.c', you can enter patterns and strings * interactively.  And if linked with the main program in `main.c' and * the other test files, you can run the already-written tests.  */#ifdef DEBUG/* We use standard I/O for debugging.  */#include <stdio.h>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -