📄 gnuregex.c
字号:
/* * $Id: GNUregex.c,v 1.11 1998/09/23 17:14:20 wessels Exp $ *//* Extended regular expression matching and search library, * version 0.12. * (Implements POSIX draft P10003.2/D11.2, except for * internationalization features.) * * Copyright (C) 1993 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. *//* AIX requires this to be the first thing in the file. */#if defined (_AIX) && !defined (REGEX_MALLOC)#pragma alloca#endif#ifndef _GNU_SOURCE#define _GNU_SOURCE 1#endif#include "config.h"#if !HAVE_ALLOCA#define REGEX_MALLOC 1#endif/* The `emacs' switch turns on certain matching commands * that make sense only in Emacs. */#ifdef emacs#include "lisp.h"#include "buffer.h"#include "syntax.h"/* Emacs uses `NULL' as a predicate. */#undef NULL#else /* not emacs *//* We used to test for `BSTRING' here, but only GCC and Emacs define * `BSTRING', as far as I know, and neither of them use this code. */#if HAVE_STRING_H || STDC_HEADERS#include <string.h>#else#include <strings.h>#endif#ifdef STDC_HEADERS#include <stdlib.h>#elsechar *malloc();char *realloc();#endif/* Define the syntax stuff for \<, \>, etc. *//* This must be nonzero for the wordchar and notwordchar pattern * commands in re_match_2. */#ifndef Sword#define Sword 1#endif#ifdef SYNTAX_TABLEextern char *re_syntax_table;#else /* not SYNTAX_TABLE *//* How many characters in the character set. */#define CHAR_SET_SIZE 256static char re_syntax_table[CHAR_SET_SIZE];static voidinit_syntax_once(){ register int c; static int done = 0; if (done) return; memset(re_syntax_table, 0, sizeof re_syntax_table); for (c = 'a'; c <= 'z'; c++) re_syntax_table[c] = Sword; for (c = 'A'; c <= 'Z'; c++) re_syntax_table[c] = Sword; for (c = '0'; c <= '9'; c++) re_syntax_table[c] = Sword; re_syntax_table['_'] = Sword; done = 1;}#endif /* not SYNTAX_TABLE */#define SYNTAX(c) re_syntax_table[c]#endif /* not emacs *//* Get the interface, including the syntax bits. */#include "GNUregex.h"/* isalpha etc. are used for the character classes. */#include <ctype.h>#ifndef isascii#define isascii(c) 1#endif#ifdef isblank#define ISBLANK(c) (isascii (c) && isblank (c))#else#define ISBLANK(c) ((c) == ' ' || (c) == '\t')#endif#ifdef isgraph#define ISGRAPH(c) (isascii (c) && isgraph (c))#else#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))#endif#define ISPRINT(c) (isascii (c) && isprint (c))#define ISDIGIT(c) (isascii (c) && isdigit (c))#define ISALNUM(c) (isascii (c) && isalnum (c))#define ISALPHA(c) (isascii (c) && isalpha (c))#define ISCNTRL(c) (isascii (c) && iscntrl (c))#define ISLOWER(c) (isascii (c) && islower (c))#define ISPUNCT(c) (isascii (c) && ispunct (c))#define ISSPACE(c) (isascii (c) && isspace (c))#define ISUPPER(c) (isascii (c) && isupper (c))#define ISXDIGIT(c) (isascii (c) && isxdigit (c))#ifndef NULL#define NULL 0#endif/* We remove any previous definition of `SIGN_EXTEND_CHAR', * since ours (we hope) works properly with all combinations of * machines, compilers, `char' and `unsigned char' argument types. * (Per Bothner suggested the basic approach.) */#undef SIGN_EXTEND_CHAR#ifdef __STDC__#define SIGN_EXTEND_CHAR(c) ((signed char) (c))#else /* not __STDC__ *//* As in Harbison and Steele. */#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)#endif/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we * use `alloca' instead of `malloc'. This is because using malloc in * re_search* or re_match* could cause memory leaks when C-g is used in * Emacs; also, malloc is slower and causes storage fragmentation. On * the other hand, malloc is more portable, and easier to debug. * * Because we sometimes use alloca, some routines have to be macros, * not functions -- `alloca'-allocated space disappears at the end of the * function it is called in. */#ifdef REGEX_MALLOC#define REGEX_ALLOCATE malloc#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)#else /* not REGEX_MALLOC *//* Emacs already defines alloca, sometimes. */#ifndef alloca/* Make alloca work the best possible way. */#ifdef __GNUC__#define alloca __builtin_alloca#else /* not __GNUC__ */#if HAVE_ALLOCA_H#include <alloca.h>#else /* not __GNUC__ or HAVE_ALLOCA_H */#ifndef _AIX /* Already did AIX, up at the top. */char *alloca();#endif /* not _AIX */#endif /* not HAVE_ALLOCA_H */#endif /* not __GNUC__ */#endif /* not alloca */#define REGEX_ALLOCATE alloca/* Assumes a `char *destination' variable. */#define REGEX_REALLOCATE(source, osize, nsize) \ (destination = (char *) alloca (nsize), \ xmemcpy (destination, source, osize), \ destination)#endif /* not REGEX_MALLOC *//* True if `size1' is non-NULL and PTR is pointing anywhere inside * `string1' or just past its end. This works if PTR is NULL, which is * a good thing. */#define FIRST_STRING_P(ptr) \ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)/* (Re)Allocate N items of type T using malloc, or fail. */#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))#define BYTEWIDTH 8 /* In bits. */#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))#define MAX(a, b) ((a) > (b) ? (a) : (b))#define MIN(a, b) ((a) < (b) ? (a) : (b))typedef char boolean;#define false 0#define true 1/* These are the command codes that appear in compiled regular * expressions. Some opcodes are followed by argument bytes. A * command code can specify any interpretation whatsoever for its * arguments. Zero bytes may appear in the compiled regular expression. * * The value of `exactn' is needed in search.c (search_buffer) in Emacs. * So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of * `exactn' we use here must also be 1. */typedef enum { no_op = 0, /* Followed by one byte giving n, then by n literal bytes. */ exactn = 1, /* Matches any (more or less) character. */ anychar, /* Matches any one char belonging to specified set. First * following byte is number of bitmap bytes. Then come bytes * for a bitmap saying which chars are in. Bits in each byte * are ordered low-bit-first. A character is in the set if its * bit is 1. A character too large to have a bit in the map is * automatically not in the set. */ charset, /* Same parameters as charset, but match any character that is * not one of those specified. */ charset_not, /* Start remembering the text that is matched, for storing in a * register. Followed by one byte with the register number, in * the range 0 to one less than the pattern buffer's re_nsub * field. Then followed by one byte with the number of groups * inner to this one. (This last has to be part of the * start_memory only because we need it in the on_failure_jump * of re_match_2.) */ start_memory, /* Stop remembering the text that is matched and store it in a * memory register. Followed by one byte with the register * number, in the range 0 to one less than `re_nsub' in the * pattern buffer, and one byte with the number of inner groups, * just like `start_memory'. (We need the number of inner * groups here because we don't have any easy way of finding the * corresponding start_memory when we're at a stop_memory.) */ stop_memory, /* Match a duplicate of something remembered. Followed by one * byte containing the register number. */ duplicate, /* Fail unless at beginning of line. */ begline, /* Fail unless at end of line. */ endline, /* Succeeds if at beginning of buffer (if emacs) or at beginning * of string to be matched (if not). */ begbuf, /* Analogously, for end of buffer/string. */ endbuf, /* Followed by two byte relative address to which to jump. */ jump, /* Same as jump, but marks the end of an alternative. */ jump_past_alt, /* Followed by two-byte relative address of place to resume at * in case of failure. */ on_failure_jump, /* Like on_failure_jump, but pushes a placeholder instead of the * current string position when executed. */ on_failure_keep_string_jump, /* Throw away latest failure point and then jump to following * two-byte relative address. */ pop_failure_jump, /* Change to pop_failure_jump if know won't have to backtrack to * match; otherwise change to jump. This is used to jump * back to the beginning of a repeat. If what follows this jump * clearly won't match what the repeat does, such that we can be * sure that there is no use backtracking out of repetitions * already matched, then we change it to a pop_failure_jump. * Followed by two-byte address. */ maybe_pop_jump, /* Jump to following two-byte address, and push a dummy failure * point. This failure point will be thrown away if an attempt * is made to use it for a failure. A `+' construct makes this * before the first repeat. Also used as an intermediary kind * of jump when compiling an alternative. */ dummy_failure_jump, /* Push a dummy failure point and continue. Used at the end of * alternatives. */ push_dummy_failure, /* Followed by two-byte relative address and two-byte number n. * After matching N times, jump to the address upon failure. */ succeed_n, /* Followed by two-byte relative address, and two-byte number n. * Jump to the address N times, then fail. */ jump_n, /* Set the following two-byte relative address to the * subsequent two-byte number. The address *includes* the two * bytes of number. */ set_number_at, wordchar, /* Matches any word-constituent character. */ notwordchar, /* Matches any char that is not a word-constituent. */ wordbeg, /* Succeeds if at word beginning. */ wordend, /* Succeeds if at word end. */ wordbound, /* Succeeds if at a word boundary. */ notwordbound /* Succeeds if not at a word boundary. */#ifdef emacs ,before_dot, /* Succeeds if before point. */ at_dot, /* Succeeds if at point. */ after_dot, /* Succeeds if after point. */ /* Matches any character whose syntax is specified. Followed by * a byte which contains a syntax code, e.g., Sword. */ syntaxspec, /* Matches any character whose syntax is not that specified. */ notsyntaxspec#endif /* emacs */} re_opcode_t;/* Common operations on the compiled pattern. *//* Store NUMBER in two contiguous bytes starting at DESTINATION. */#define STORE_NUMBER(destination, number) \ do { \ (destination)[0] = (number) & 0377; \ (destination)[1] = (number) >> 8; \ } while (0)/* Same as STORE_NUMBER, except increment DESTINATION to * the byte after where the number is stored. Therefore, DESTINATION * must be an lvalue. */#define STORE_NUMBER_AND_INCR(destination, number) \ do { \ STORE_NUMBER (destination, number); \ (destination) += 2; \ } while (0)/* Put into DESTINATION a number stored in two contiguous bytes starting * at SOURCE. */#define EXTRACT_NUMBER(destination, source) \ do { \ (destination) = *(source) & 0377; \ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ } while (0)#ifdef DEBUGstatic voidextract_number(dest, source) int *dest; unsigned char *source;{ int temp = SIGN_EXTEND_CHAR(*(source + 1)); *dest = *source & 0377; *dest += temp << 8;}#ifndef EXTRACT_MACROS /* To debug the macros. */#undef EXTRACT_NUMBER#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)#endif /* not EXTRACT_MACROS */#endif /* DEBUG *//* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. * SOURCE must be an lvalue. */#define EXTRACT_NUMBER_AND_INCR(destination, source) \ do { \ EXTRACT_NUMBER (destination, source); \ (source) += 2; \ } while (0)#ifdef DEBUGstatic voidextract_number_and_incr(destination, source) int *destination; unsigned char **source;{ extract_number(destination, *source); *source += 2;}#ifndef EXTRACT_MACROS#undef EXTRACT_NUMBER_AND_INCR#define EXTRACT_NUMBER_AND_INCR(dest, src) \ extract_number_and_incr (&dest, &src)#endif /* not EXTRACT_MACROS */#endif /* DEBUG *//* If DEBUG is defined, Regex prints many voluminous messages about what * it is doing (if the variable `debug' is nonzero). If linked with the * main program in `iregex.c', you can enter patterns and strings * interactively. And if linked with the main program in `main.c' and * the other test files, you can run the already-written tests. */#ifdef DEBUG/* We use standard I/O for debugging. */#include <stdio.h>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -