📄 regxread.c
字号:
else logf (LOG_WARN, "bad keyword '%s' after end", p); } else if (!strcmp (p, "data")) { int textFlag = 0; int element_len; const char *element_str = NULL; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len)) textFlag = 1; else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len)) { r = execTok (spec, &s, &element_str, &element_len); if (r < 2) break; } else logf (LOG_WARN, "bad data option: %.*s", cmd_len, cmd_str); } if (r != 2) { logf (LOG_WARN, "missing data item after data"); continue; } if (element_str) tagBegin (spec, element_str, element_len); do { execData (spec, cmd_str, cmd_len,textFlag); r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); if (element_str) tagEnd (spec, 1, NULL, 0); } else if (!strcmp (p, "unread")) { int no, offset; r = execTok (spec, &s, &cmd_str, &cmd_len); if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len)) { r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) { logf (LOG_WARN, "missing number after -offset"); continue; } p = regxStrz (cmd_str, cmd_len, ptmp); offset = atoi (p); r = execTok (spec, &s, &cmd_str, &cmd_len); } else offset = 0; if (r < 2) { logf (LOG_WARN, "missing index after unread command"); continue; } if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9') { logf (LOG_WARN, "bad index after unread command"); continue; } else { no = *cmd_str - '0'; if (no >= spec->arg_no) no = spec->arg_no - 1; spec->ptr = spec->arg_start[no] + offset; } r = execTok (spec, &s, &cmd_str, &cmd_len); } else if (!strcmp (p, "context")) { if (r > 1) { struct lexContext *lc = spec->context; r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); while (lc && strcmp (p, lc->name)) lc = lc->next; if (lc) spec->context_stack[spec->context_stack_top] = lc; else logf (LOG_WARN, "unknown context %s", p); } r = execTok (spec, &s, &cmd_str, &cmd_len); } else { logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str); r = execTok (spec, &s, &cmd_str, &cmd_len); continue; } if (r > 1) { logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str); do { r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); } }}static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, int start_ptr, int *pptr){ int sptr; int arg_start[20]; int arg_end[20]; int arg_no = 1; if (!ap) return 1; arg_start[0] = start_ptr; arg_end[0] = *pptr; spec->arg_start = arg_start; spec->arg_end = arg_end; while (ap) { switch (ap->which) { case REGX_PATTERN: if (ap->u.pattern.body) { arg_start[arg_no] = *pptr; if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0)) { arg_end[arg_no] = F_WIN_EOF; arg_no++; arg_start[arg_no] = F_WIN_EOF; arg_end[arg_no] = F_WIN_EOF; yaz_log(LOG_DEBUG, "Pattern match rest of record"); *pptr = F_WIN_EOF; } else { arg_end[arg_no] = sptr; arg_no++; arg_start[arg_no] = sptr; arg_end[arg_no] = *pptr; } } else { arg_start[arg_no] = *pptr; if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1)) return 1; if (sptr != arg_start[arg_no]) return 1; arg_end[arg_no] = *pptr; } arg_no++; break; case REGX_CODE: spec->arg_no = arg_no; spec->ptr = *pptr;#if HAVE_TCL_H if (spec->tcl_interp) execTcl(spec, ap->u.code); else execCode (spec, ap->u.code);#else execCode (spec, ap->u.code);#endif *pptr = spec->ptr; if (spec->stop_flag) return 0; break; case REGX_END: arg_start[arg_no] = *pptr; arg_end[arg_no] = F_WIN_EOF; arg_no++; *pptr = F_WIN_EOF; } ap = ap->next; } return 1;}static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr){#if REGX_DEBUG logf (LOG_LOG, "exec rule %d", ruleNo);#endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr);}data1_node *lexNode (struct lexSpec *spec, int *ptr){ struct lexContext *context = spec->context_stack[spec->context_stack_top]; struct DFA_state *state = context->dfa->states[0]; struct DFA_tran *t; unsigned char c; unsigned char c_prev = '\n'; int i; int last_rule = 0; /* rule number of current match */ int last_ptr = *ptr; /* last char of match */ int start_ptr = *ptr; /* first char of match */ int skip_ptr = *ptr; /* first char of run */ while (1) { c = f_win_advance (spec, ptr); if (*ptr == F_WIN_EOF) { /* end of file met */ if (last_rule) { /* there was a match */ if (skip_ptr < start_ptr) { /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, start_ptr, &size); execDataP (spec, buf, size, 0); } /* restore pointer */ *ptr = last_ptr; /* execute rule */ if (!execRule (spec, context, last_rule, start_ptr, ptr)) break; /* restore skip pointer */ skip_ptr = *ptr; last_rule = 0; } else if (skip_ptr < *ptr) { /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, *ptr, &size); execDataP (spec, buf, size, 0); } if (*ptr == F_WIN_EOF) break; } t = state->trans; i = state->tran_no; while (1) if (--i < 0) { /* no transition for character c ... */ if (last_rule) { if (skip_ptr < start_ptr) { /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, start_ptr, &size); execDataP (spec, buf, size, 0); } /* restore pointer */ *ptr = last_ptr; if (!execRule (spec, context, last_rule, start_ptr, ptr)) { if (spec->f_win_ef && *ptr != F_WIN_EOF) {#if REGX_DEBUG logf (LOG_LOG, "regx: endf ptr=%d", *ptr);#endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } return NULL; } context = spec->context_stack[spec->context_stack_top]; skip_ptr = *ptr; last_rule = 0; last_ptr = start_ptr = *ptr; if (start_ptr > 0) { --start_ptr; c_prev = f_win_advance (spec, &start_ptr); } } else { c_prev = f_win_advance (spec, &start_ptr); *ptr = start_ptr; } state = context->dfa->states[0]; break; } else if (c >= t->ch[0] && c <= t->ch[1]) { /* transition ... */ state = context->dfa->states[t->to]; if (state->rule_no) { if (c_prev == '\n') { last_rule = state->rule_no; last_ptr = *ptr; } else if (state->rule_nno) { last_rule = state->rule_nno; last_ptr = *ptr; } } break; } else t++; } return NULL;}static data1_node *lexRoot (struct lexSpec *spec, off_t offset, const char *context_name){ struct lexContext *lt = spec->context; int ptr = offset; spec->stop_flag = 0; spec->d1_level = 0; spec->context_stack_top = 0; while (lt) { if (!strcmp (lt->name, context_name)) break; lt = lt->next; } if (!lt) { logf (LOG_WARN, "cannot find context %s", context_name); return NULL; } spec->context_stack[spec->context_stack_top] = lt; spec->d1_stack[spec->d1_level] = NULL;#if 1 if (!lt->initFlag) { lt->initFlag = 1; execAction (spec, lt->initActionList, ptr, &ptr); }#endif execAction (spec, lt->beginActionList, ptr, &ptr); lexNode (spec, &ptr); while (spec->d1_level) { tagDataRelease (spec); (spec->d1_level)--; } execAction (spec, lt->endActionList, ptr, &ptr); return spec->d1_stack[0];}void grs_destroy(void *clientData){ struct lexSpecs *specs = (struct lexSpecs *) clientData; if (specs->spec) { lexSpecDestroy(&specs->spec); } xfree (specs);}void *grs_init(void){ struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs)); specs->spec = 0; return specs;}data1_node *grs_read_regx (struct grs_read_info *p){ int res; struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec;#if REGX_DEBUG logf (LOG_LOG, "grs_read_regx");#endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { if (*curLexSpec) lexSpecDestroy (curLexSpec); *curLexSpec = lexSpecCreate (p->type, p->dh); res = readFileSpec (*curLexSpec); if (res) { lexSpecDestroy (curLexSpec); return NULL; } } (*curLexSpec)->dh = p->dh; if (!p->offset) { (*curLexSpec)->f_win_start = 0; (*curLexSpec)->f_win_end = 0; (*curLexSpec)->f_win_rf = p->readf; (*curLexSpec)->f_win_sf = p->seekf; (*curLexSpec)->f_win_fh = p->fh; (*curLexSpec)->f_win_ef = p->endf; (*curLexSpec)->f_win_size = 500000; } (*curLexSpec)->m = p->mem; return lexRoot (*curLexSpec, p->offset, "main");}static struct recTypeGrs regx_type = { "regx", grs_init, grs_destroy, grs_read_regx};RecTypeGrs recTypeGrs_regx = ®x_type;#if HAVE_TCL_Hdata1_node *grs_read_tcl (struct grs_read_info *p){ int res; struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec;#if REGX_DEBUG logf (LOG_LOG, "grs_read_tcl");#endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { Tcl_Interp *tcl_interp; if (*curLexSpec) lexSpecDestroy (curLexSpec); *curLexSpec = lexSpecCreate (p->type, p->dh); Tcl_FindExecutable(""); tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); Tcl_Init(tcl_interp); Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread, *curLexSpec, 0); res = readFileSpec (*curLexSpec); if (res) { lexSpecDestroy (curLexSpec); return NULL; } } (*curLexSpec)->dh = p->dh; if (!p->offset) { (*curLexSpec)->f_win_start = 0; (*curLexSpec)->f_win_end = 0; (*curLexSpec)->f_win_rf = p->readf; (*curLexSpec)->f_win_sf = p->seekf; (*curLexSpec)->f_win_fh = p->fh; (*curLexSpec)->f_win_ef = p->endf; (*curLexSpec)->f_win_size = 500000; } (*curLexSpec)->m = p->mem; return lexRoot (*curLexSpec, p->offset, "main");}static struct recTypeGrs tcl_type = { "tcl", grs_init, grs_destroy, grs_read_tcl};RecTypeGrs recTypeGrs_tcl = &tcl_type;#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -