📄 regxread.c
字号:
/* $Id: regxread.c,v 1.49 2003/09/16 13:56:52 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra. If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#include <string.h>#include <ctype.h>#include <yaz/tpath.h>#include <zebrautl.h>#include <dfa.h>#include "grsread.h"#if HAVE_TCL_H#include <tcl.h>#if MAJOR_VERSION >= 8#define HAVE_TCL_OBJECTS#endif#endif#define REGX_DEBUG 0#define F_WIN_EOF 2000000000#define F_WIN_READ 1#define REGX_EOF 0#define REGX_PATTERN 1#define REGX_BODY 2#define REGX_BEGIN 3#define REGX_END 4#define REGX_CODE 5#define REGX_CONTEXT 6#define REGX_INIT 7struct regxCode { char *str;#if HAVE_TCL_OBJECTS Tcl_Obj *tcl_obj;#endif};struct lexRuleAction { int which; union { struct { struct DFA *dfa; /* REGX_PATTERN */ int body; } pattern; struct regxCode *code; /* REGX_CODE */ } u; struct lexRuleAction *next;};struct lexRuleInfo { int no; struct lexRuleAction *actionList;};struct lexRule { struct lexRuleInfo info; struct lexRule *next;};struct lexContext { char *name; struct DFA *dfa; struct lexRule *rules; struct lexRuleInfo **fastRule; int ruleNo; int initFlag; struct lexRuleAction *beginActionList; struct lexRuleAction *endActionList; struct lexRuleAction *initActionList; struct lexContext *next;};struct lexConcatBuf { int max; char *buf;};struct lexSpec { char *name; struct lexContext *context; struct lexContext **context_stack; int context_stack_size; int context_stack_top; int lineNo; NMEM m; data1_handle dh;#if HAVE_TCL_H Tcl_Interp *tcl_interp;#endif void *f_win_fh; void (*f_win_ef)(void *, off_t); int f_win_start; /* first byte of buffer is this file offset */ int f_win_end; /* last byte of buffer is this offset - 1 */ int f_win_size; /* size of buffer */ char *f_win_buf; /* buffer itself */ int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); struct lexConcatBuf *concatBuf; int maxLevel; data1_node **d1_stack; int d1_level; int stop_flag; int *arg_start; int *arg_end; int arg_no; int ptr;};struct lexSpecs { struct lexSpec *spec;};static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, int *size){ int i, r, off = start_pos - spec->f_win_start; if (off >= 0 && end_pos <= spec->f_win_end) { *size = end_pos - start_pos; return spec->f_win_buf + off; } if (off < 0 || start_pos >= spec->f_win_end) { (*spec->f_win_sf)(spec->f_win_fh, start_pos); spec->f_win_start = start_pos; if (!spec->f_win_buf) spec->f_win_buf = (char *) xmalloc (spec->f_win_size); *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf, spec->f_win_size); spec->f_win_end = spec->f_win_start + *size; if (*size > end_pos - start_pos) *size = end_pos - start_pos; return spec->f_win_buf; } for (i = 0; i<spec->f_win_end - start_pos; i++) spec->f_win_buf[i] = spec->f_win_buf[i + off]; r = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf + i, spec->f_win_size - i); spec->f_win_start = start_pos; spec->f_win_end += r; *size = i + r; if (*size > end_pos - start_pos) *size = end_pos - start_pos; return spec->f_win_buf;}static int f_win_advance (struct lexSpec *spec, int *pos){ int size; char *buf; if (*pos >= spec->f_win_start && *pos < spec->f_win_end) return spec->f_win_buf[(*pos)++ - spec->f_win_start]; if (*pos == F_WIN_EOF) return 0; buf = f_win_get (spec, *pos, *pos+1, &size); if (size == 1) { (*pos)++; return *buf; } *pos = F_WIN_EOF; return 0;}static void regxCodeDel (struct regxCode **pp){ struct regxCode *p = *pp; if (p) {#if HAVE_TCL_OBJECTS if (p->tcl_obj) Tcl_DecrRefCount (p->tcl_obj);#endif xfree (p->str); xfree (p); *pp = NULL; }}static void regxCodeMk (struct regxCode **pp, const char *buf, int len){ struct regxCode *p; p = (struct regxCode *) xmalloc (sizeof(*p)); p->str = (char *) xmalloc (len+1); memcpy (p->str, buf, len); p->str[len] = '\0';#if HAVE_TCL_OBJECTS p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); if (p->tcl_obj) Tcl_IncrRefCount (p->tcl_obj);#endif *pp = p;}static struct DFA *lexSpecDFA (void){ struct DFA *dfa; dfa = dfa_init (); dfa_parse_cmap_del (dfa, ' '); dfa_parse_cmap_del (dfa, '\t'); dfa_parse_cmap_add (dfa, '/', 0); return dfa;}static void actionListDel (struct lexRuleAction **rap){ struct lexRuleAction *ra1, *ra; for (ra = *rap; ra; ra = ra1) { ra1 = ra->next; switch (ra->which) { case REGX_PATTERN: dfa_delete (&ra->u.pattern.dfa); break; case REGX_CODE: regxCodeDel (&ra->u.code); break; } xfree (ra); } *rap = NULL;}static struct lexContext *lexContextCreate (const char *name){ struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p)); p->name = xstrdup (name); p->ruleNo = 1; p->initFlag = 0; p->dfa = lexSpecDFA (); p->rules = NULL; p->fastRule = NULL; p->beginActionList = NULL; p->endActionList = NULL; p->initActionList = NULL; p->next = NULL; return p;}static void lexContextDestroy (struct lexContext *p){ struct lexRule *rp, *rp1; dfa_delete (&p->dfa); xfree (p->fastRule); for (rp = p->rules; rp; rp = rp1) { rp1 = rp->next; actionListDel (&rp->info.actionList); xfree (rp); } actionListDel (&p->beginActionList); actionListDel (&p->endActionList); actionListDel (&p->initActionList); xfree (p->name); xfree (p);}static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh){ struct lexSpec *p; int i; p = (struct lexSpec *) xmalloc (sizeof(*p)); p->name = (char *) xmalloc (strlen(name)+1); strcpy (p->name, name);#if HAVE_TCL_H p->tcl_interp = 0;#endif p->dh = dh; p->context = NULL; p->context_stack_size = 100; p->context_stack = (struct lexContext **) xmalloc (sizeof(*p->context_stack) * p->context_stack_size); p->f_win_buf = NULL; p->maxLevel = 128; p->concatBuf = (struct lexConcatBuf *) xmalloc (sizeof(*p->concatBuf) * p->maxLevel); for (i = 0; i < p->maxLevel; i++) { p->concatBuf[i].max = 0; p->concatBuf[i].buf = 0; } p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel); p->d1_level = 0; return p;}static void lexSpecDestroy (struct lexSpec **pp){ struct lexSpec *p; struct lexContext *lt; int i; assert (pp); p = *pp; if (!p) return ; for (i = 0; i < p->maxLevel; i++) xfree (p->concatBuf[i].buf); xfree (p->concatBuf); lt = p->context; while (lt) { struct lexContext *lt_next = lt->next; lexContextDestroy (lt); lt = lt_next; }#if HAVE_TCL_OBJECTS if (p->tcl_interp) Tcl_DeleteInterp (p->tcl_interp);#endif xfree (p->name); xfree (p->f_win_buf); xfree (p->context_stack); xfree (p->d1_stack); xfree (p); *pp = NULL;}static int readParseToken (const char **cpp, int *len){ const char *cp = *cpp; char cmd[32]; int i, level; while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r') cp++; switch (*cp) { case '\0': return 0; case '/': *cpp = cp+1; return REGX_PATTERN; case '{': *cpp = cp+1; level = 1; while (*++cp) { if (*cp == '{') level++; else if (*cp == '}') { level--; if (level == 0) break; } } *len = cp - *cpp; return REGX_CODE; default: i = 0; while (1) { if (*cp >= 'a' && *cp <= 'z') cmd[i] = *cp; else if (*cp >= 'A' && *cp <= 'Z') cmd[i] = *cp + 'a' - 'A'; else break; if (i < (int) sizeof(cmd)-2) i++; cp++; } cmd[i] = '\0'; if (i == 0) { logf (LOG_WARN, "bad character %d %c", *cp, *cp); cp++; while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n' && *cp != '\r') cp++; *cpp = cp; return 0; } *cpp = cp; if (!strcmp (cmd, "begin")) return REGX_BEGIN; else if (!strcmp (cmd, "end")) return REGX_END; else if (!strcmp (cmd, "body")) return REGX_BODY; else if (!strcmp (cmd, "context")) return REGX_CONTEXT; else if (!strcmp (cmd, "init")) return REGX_INIT; else { logf (LOG_WARN, "bad command %s", cmd); return 0; } }}static int actionListMk (struct lexSpec *spec, const char *s, struct lexRuleAction **ap){ int r, tok, len; int bodyMark = 0; const char *s0; while ((tok = readParseToken (&s, &len))) { switch (tok) { case REGX_BODY: bodyMark = 1; continue; case REGX_CODE: *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); (*ap)->which = tok; regxCodeMk (&(*ap)->u.code, s, len); s += len+1; break; case REGX_PATTERN: *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); (*ap)->which = tok; (*ap)->u.pattern.body = bodyMark; bodyMark = 0; (*ap)->u.pattern.dfa = lexSpecDFA (); s0 = s; r = dfa_parse ((*ap)->u.pattern.dfa, &s); if (r || *s != '/') { xfree (*ap); *ap = NULL; logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0); return -1; } if (debug_dfa_tran)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -