📄 regcomp.c
字号:
/* regcomp.c - regular expression handling *//*modification history--------------------01e,13apr98,wmd changed name of regcomp to wtxRegComp for HOSTs.01e,23mar98,fle warnings eradication01d,30sep96,elp put in share, adapted to be compiled on target side (SPR# 6775).01c,10jul96,pad undefined redefinition of malloc (AIX specific).01b,20mar95,p_m moved #include "host.h" on top of includes list, this is necessary on Windows platforms. changed #include <regex.h> to #include "regex.h".01a,10jan95,jcf created.*//*DESCRIPTIONThis library is *not* the original BSD distribution. Though the changeswere completely cosmetic (file naming, and such), the user of this libraryis forewarned.AUTHOR: Henry Spencer*//*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Henry Spencer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 */#if defined(LIBC_SCCS) && !defined(lint)static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";#endif /* LIBC_SCCS and not lint */#ifdef HOST#include "host.h"#if defined(RS6000_AIX4) || defined (RS6000_AIX3)#undef malloc#endif#include <sys/types.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <limits.h>#include <stdlib.h>#else#include "vxWorks.h"#include "stdio.h"#include "string.h"#include "ctype.h"#include "limits.h"#include "stdlib.h"#endif /* HOST */#include "regex.h"#include "regex2.h"/* character-class table */static struct cclass { char *name; char *chars; char *multis;} cclasses[] = { {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\0123456789", ""}, {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", ""}, {"blank", " \t", ""}, {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\\25\26\27\30\31\32\33\34\35\36\37\177", ""}, {"digit", "0123456789", ""}, {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""}, {"lower", "abcdefghijklmnopqrstuvwxyz", ""}, {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", ""}, {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""}, {"space", "\t\n\v\f\r ", ""}, {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", ""}, {"xdigit", "0123456789ABCDEFabcdef", ""}, {NULL, 0, ""}};/* character-name table */static struct cname { char *name; char code;} cnames[] = { {"NUL", '\0'}, {"SOH", '\001'}, {"STX", '\002'}, {"ETX", '\003'}, {"EOT", '\004'}, {"ENQ", '\005'}, {"ACK", '\006'}, {"BEL", '\007'}, {"alert", '\007'}, {"BS", '\010'}, {"backspace", '\b'}, {"HT", '\011'}, {"tab", '\t'}, {"LF", '\012'}, {"newline", '\n'}, {"VT", '\013'}, {"vertical-tab", '\v'}, {"FF", '\014'}, {"form-feed", '\f'}, {"CR", '\015'}, {"carriage-return", '\r'}, {"SO", '\016'}, {"SI", '\017'}, {"DLE", '\020'}, {"DC1", '\021'}, {"DC2", '\022'}, {"DC3", '\023'}, {"DC4", '\024'}, {"NAK", '\025'}, {"SYN", '\026'}, {"ETB", '\027'}, {"CAN", '\030'}, {"EM", '\031'}, {"SUB", '\032'}, {"ESC", '\033'}, {"IS4", '\034'}, {"FS", '\034'}, {"IS3", '\035'}, {"GS", '\035'}, {"IS2", '\036'}, {"RS", '\036'}, {"IS1", '\037'}, {"US", '\037'}, {"space", ' '}, {"exclamation-mark", '!'}, {"quotation-mark", '"'}, {"number-sign", '#'}, {"dollar-sign", '$'}, {"percent-sign", '%'}, {"ampersand", '&'}, {"apostrophe", '\''}, {"left-parenthesis", '('}, {"right-parenthesis", ')'}, {"asterisk", '*'}, {"plus-sign", '+'}, {"comma", ','}, {"hyphen", '-'}, {"hyphen-minus", '-'}, {"period", '.'}, {"full-stop", '.'}, {"slash", '/'}, {"solidus", '/'}, {"zero", '0'}, {"one", '1'}, {"two", '2'}, {"three", '3'}, {"four", '4'}, {"five", '5'}, {"six", '6'}, {"seven", '7'}, {"eight", '8'}, {"nine", '9'}, {"colon", ':'}, {"semicolon", ';'}, {"less-than-sign", '<'}, {"equals-sign", '='}, {"greater-than-sign", '>'}, {"question-mark", '?'}, {"commercial-at", '@'}, {"left-square-bracket", '['}, {"backslash", '\\'}, {"reverse-solidus", '\\'}, {"right-square-bracket", ']'}, {"circumflex", '^'}, {"circumflex-accent", '^'}, {"underscore", '_'}, {"low-line", '_'}, {"grave-accent", '`'}, {"left-brace", '{'}, {"left-curly-bracket", '{'}, {"vertical-line", '|'}, {"right-brace", '}'}, {"right-curly-bracket", '}'}, {"tilde", '~'}, {"DEL", '\177'}, {NULL, 0}};/* * parse structure, passed up and down to avoid global variables and * other clumsinesses */struct parse { char *next; /* next character in RE */ char *end; /* end of string (-> NUL normally) */ int error; /* has an error been seen? */ sop *strip; /* malloced strip */ sopno ssize; /* malloced strip size (allocated) */ sopno slen; /* malloced strip length (used) */ int ncsalloc; /* number of csets allocated */ struct re_guts *g;# define NPAREN 10 /* we need to remember () 1-9 for back refs */ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ sopno pend[NPAREN]; /* -> ) ([0] unused) */};/* ========= begin header generated by ./mkh ========= */#ifdef __cplusplusextern "C" {#endif/* === regcomp.c === */static void p_ere __P((struct parse *p, int stop));static void p_ere_exp __P((struct parse *p));static void p_str __P((struct parse *p));static void p_bre __P((struct parse *p, int end1, int end2));static int p_simp_re __P((struct parse *p, int starordinary));static int p_count __P((struct parse *p));static void p_bracket __P((struct parse *p));static void p_b_term __P((struct parse *p, cset *cs));static void p_b_cclass __P((struct parse *p, cset *cs));static void p_b_eclass __P((struct parse *p, cset *cs));static char p_b_symbol __P((struct parse *p));static char p_b_coll_elem __P((struct parse *p, int endc));static char othercase __P((int ch));static void bothcases __P((struct parse *p, int ch));static void ordinary __P((struct parse *p, int ch));static void nonnewline __P((struct parse *p));static void repeat __P((struct parse *p, sopno start, int from, int to));static int seterr __P((struct parse *p, int e));static cset *allocset __P((struct parse *p));static void freeset __P((struct parse *p, cset *cs));static int freezeset __P((struct parse *p, cset *cs));static int firstch __P((struct parse *p, cset *cs));static int nch __P((struct parse *p, cset *cs));static void mcadd __P((struct parse *p, cset *cs, char *cp));#if 0 /* XXX jcf: not used! */static void mcsub __P((cset *cs, char *cp));static int mcin __P((cset *cs, char *cp));static char *mcfind __P((cset *cs, char *cp));#endif /* XXX jcf: not used! */static void mcinvert __P((struct parse *p, cset *cs));static void mccase __P((struct parse *p, cset *cs));static int isinsets __P((struct re_guts *g, int c));static int samesets __P((struct re_guts *g, int c1, int c2));static void categorize __P((struct parse *p, struct re_guts *g));static sopno dupl __P((struct parse *p, sopno start, sopno finish));static void doemit __P((struct parse *p, sop op, size_t opnd));static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));static void dofwd __P((struct parse *p, sopno pos, sop value));static void enlarge __P((struct parse *p, sopno size));static void stripsnug __P((struct parse *p, struct re_guts *g));static void findmust __P((struct parse *p, struct re_guts *g));static sopno pluscount __P((struct parse *p, struct re_guts *g));#ifdef __cplusplus}#endif/* ========= end header generated by ./mkh ========= */static char nuls[10]; /* place to point scanner in event of error *//* * macros for use with parse structure * BEWARE: these know that the parse structure is named `p' !!! */#define PEEK() (*p->next)#define PEEK2() (*(p->next+1))#define MORE() (p->next < p->end)#define MORE2() (p->next+1 < p->end)#define SEE(c) (MORE() && PEEK() == (c))#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)#define NEXT() (p->next++)#define NEXT2() (p->next += 2)#define NEXTn(n) (p->next += (n))#define GETNEXT() (*p->next++)#define SETERROR(e) seterr(p, (e))#define REQUIRE(co, e) (void)((co) || SETERROR(e))#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)#define HERE() (p->slen)#define THERE() (p->slen - 1)#define THERETHERE() (p->slen - 2)#define DROP(n) (p->slen -= (n))#ifndef NDEBUGstatic int never = 0; /* for use in asserts; shuts lint up */#else#define never 0 /* some <assert.h>s have bugs too */#endif/* - regcomp - interface for parser and compilation = extern int regcomp(regex_t *, const char *, int); = #define REG_BASIC 0000 = #define REG_EXTENDED 0001 = #define REG_ICASE 0002 = #define REG_NOSUB 0004 = #define REG_NEWLINE 0010 = #define REG_NOSPEC 0020 = #define REG_PEND 0040 = #define REG_DUMP 0200 */int /* 0 success, otherwise REG_something */#ifdef HOSTwtxRegComp(preg, pattern, cflags)#elseregcomp(preg, pattern, cflags)#endifregex_t *preg;const char *pattern;int cflags;{ struct parse pa; register struct re_guts *g; register struct parse *p = &pa; register int i; register size_t len;#ifdef REDEBUG# define GOODFLAGS(f) (f)#else# define GOODFLAGS(f) ((f)&~REG_DUMP)#endif cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); if (cflags®_PEND) { if (preg->re_endp < pattern) return(REG_INVARG); len = preg->re_endp - pattern; } else len = strlen((char *)pattern); /* do the mallocs early so failure handling is easy */ g = (struct re_guts *)malloc(sizeof(struct re_guts) + (NC-1)*sizeof(cat_t)); if (g == NULL) return(REG_ESPACE); p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ p->strip = (sop *)malloc(p->ssize * sizeof(sop)); p->slen = 0; if (p->strip == NULL) { free((char *)g); return(REG_ESPACE); } /* set things up */ p->g = g; p->next = (char *)pattern; /* convenience; we do not modify it */ p->end = p->next + len; p->error = 0; p->ncsalloc = 0; for (i = 0; i < NPAREN; i++) { p->pbegin[i] = 0; p->pend[i] = 0; } g->csetsize = NC; g->sets = NULL; g->setbits = NULL; g->ncsets = 0; g->cflags = cflags; g->iflags = 0; g->nbol = 0; g->neol = 0; g->must = NULL; g->mlen = 0; g->nsub = 0; g->ncategories = 1; /* category 0 is "everything else" */ g->categories = &g->catspace[-(CHAR_MIN)]; (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); g->backrefs = 0; /* do it */ EMIT(OEND, 0); g->firststate = THERE(); if (cflags®_EXTENDED) p_ere(p, OUT); else if (cflags®_NOSPEC) p_str(p); else p_bre(p, OUT, OUT); EMIT(OEND, 0); g->laststate = THERE(); /* tidy up loose ends and fill things in */ categorize(p, g); stripsnug(p, g); findmust(p, g); g->nplus = pluscount(p, g); g->magic = MAGIC2; preg->re_nsub = g->nsub; preg->re_g = g; preg->re_magic = MAGIC1;#ifndef REDEBUG /* not debugging, so can't rely on the assert() in regexec() */ if (g->iflags&BAD) SETERROR(REG_ASSERT);#endif /* win or lose, we're done */ if (p->error != 0) /* lose */#ifdef HOST wtxRegFree (preg);#else regfree(preg);#endif /* HOST */ return(p->error);}/* - p_ere - ERE parser top level, concatenation and alternation == static void p_ere(register struct parse *p, int stop); */static voidp_ere(p, stop)register struct parse *p;int stop; /* character this ERE should end at */{ register char c; register sopno prevback = 0; register sopno prevfwd = 0; register sopno conc; register int first = 1; /* is this the first alternative? */ for (;;) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -