📄 zrpn.c
字号:
/* $Id: zrpn.c,v 1.134 2003/09/05 10:51:17 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra. If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#ifdef WIN32#include <io.h>#else#include <unistd.h>#endif#include <ctype.h>#include "index.h"#include <zebra_xpath.h>#include <charmap.h>#include <rstemp.h>#include <rsnull.h>#include <rsbool.h>#include <rsbetween.h>struct rpn_char_map_info { ZebraMaps zm; int reg_type;};typedef struct { int type; int major; int minor; Z_AttributesPlusTerm *zapt;} AttrType;static const char **rpn_char_map_handler (void *vp, const char **from, int len){ struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp; const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);#if 0 if (out && *out) { const char *outp = *out; yaz_log (LOG_LOG, "---"); while (*outp) { yaz_log (LOG_LOG, "%02X", *outp); outp++; } }#endif return out;}static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type, struct rpn_char_map_info *map_info){ map_info->zm = reg->zebra_maps; map_info->reg_type = reg_type; dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);}static int attr_find_ex (AttrType *src, oid_value *attributeSetP, const char **string_value){ int num_attributes; num_attributes = src->zapt->attributes->num_attributes; while (src->major < num_attributes) { Z_AttributeElement *element; element = src->zapt->attributes->attributes[src->major]; if (src->type == *element->attributeType) { switch (element->which) { case Z_AttributeValue_numeric: ++(src->major); if (element->attributeSet && attributeSetP) { oident *attrset; attrset = oid_getentbyoid (element->attributeSet); *attributeSetP = attrset->value; } return *element->value.numeric; break; case Z_AttributeValue_complex: if (src->minor >= element->value.complex->num_list) break; if (element->attributeSet && attributeSetP) { oident *attrset; attrset = oid_getentbyoid (element->attributeSet); *attributeSetP = attrset->value; } if (element->value.complex->list[src->minor]->which == Z_StringOrNumeric_numeric) { ++(src->minor); return *element->value.complex->list[src->minor-1]->u.numeric; } else if (element->value.complex->list[src->minor]->which == Z_StringOrNumeric_string) { if (!string_value) break; ++(src->minor); *string_value = element->value.complex->list[src->minor-1]->u.string; return -2; } else break; default: assert (0); } } ++(src->major); } return -1;}static int attr_find (AttrType *src, oid_value *attributeSetP){ return attr_find_ex (src, attributeSetP, 0);}static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, int type){ src->zapt = zapt; src->type = type; src->major = 0; src->minor = 0;}#define TERM_COUNT struct grep_info { #ifdef TERM_COUNT int *term_no; #endif ISAMS_P *isam_p_buf; int isam_p_size; int isam_p_indx; ZebraHandle zh; int reg_type; ZebraSet termset;}; static void term_untrans (ZebraHandle zh, int reg_type, char *dst, const char *src){ int len = 0; while (*src) { const char *cp = zebra_maps_output (zh->reg->zebra_maps, reg_type, &src); if (!cp && len < IT_MAX_WORD-1) dst[len++] = *src++; else while (*cp && len < IT_MAX_WORD-1) dst[len++] = *cp++; } dst[len] = '\0';}static void add_isam_p (const char *name, const char *info, struct grep_info *p){ if (p->isam_p_indx == p->isam_p_size) { ISAMS_P *new_isam_p_buf;#ifdef TERM_COUNT int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) * p->isam_p_size); if (p->isam_p_buf) { memcpy (new_isam_p_buf, p->isam_p_buf, p->isam_p_indx * sizeof(*p->isam_p_buf)); xfree (p->isam_p_buf); } p->isam_p_buf = new_isam_p_buf;#ifdef TERM_COUNT new_term_no = (int *) xmalloc (sizeof(*new_term_no) * p->isam_p_size); if (p->term_no) { memcpy (new_term_no, p->isam_p_buf, p->isam_p_indx * sizeof(*p->term_no)); xfree (p->term_no); } p->term_no = new_term_no;#endif } assert (*info == sizeof(*p->isam_p_buf)); memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));#if 1 if (p->termset) { const char *db; int set, use; char term_tmp[IT_MAX_WORD]; int su_code = 0; int len = key_SU_decode (&su_code, name); term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp); zebraExplain_lookup_ord (p->zh->reg->zei, su_code, &db, &set, &use); logf (LOG_LOG, "grep: set=%d use=%d db=%s", set, use, db); resultSetAddTerm (p->zh, p->termset, name[len], db, set, use, term_tmp); }#endif (p->isam_p_indx)++;}static int grep_handle (char *name, const char *info, void *p){ add_isam_p (name, info, (struct grep_info *) p); return 0;}static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, const char *ct1, const char *ct2){ const char *s1, *s0 = *src; const char **map; /* skip white space */ while (*s0) { if (ct1 && strchr (ct1, *s0)) break; if (ct2 && strchr (ct2, *s0)) break; s1 = s0; map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1)); if (**map != *CHR_SPACE) break; s0 = s1; } *src = s0; return *s0;}#define REGEX_CHARS " []()|.*+?!"/* term_100: handle term, where trunc=none (no operators at all) */static int term_100 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term){ const char *s0, *s1; const char **map; int i = 0; int j = 0; const char *space_start = 0; const char *space_end = 0; if (!term_pre (zebra_maps, reg_type, src, NULL, NULL)) return 0; s0 = *src; while (*s0) { s1 = s0; map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); if (space_split) { if (**map == *CHR_SPACE) break; } else /* complete subfield only. */ { if (**map == *CHR_SPACE) { /* save space mapping for later .. */ space_start = s1; space_end = s0; continue; } else if (space_start) { /* reload last space */ while (space_start < space_end) { if (strchr (REGEX_CHARS, *space_start)) dst[i++] = '\\'; dst_term[j++] = *space_start; dst[i++] = *space_start++; } /* and reset */ space_start = space_end = 0; } } /* add non-space char */ while (s1 < s0) { if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; } } dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; return i;}/* term_101: handle term, where trunc=Process # */static int term_101 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term){ const char *s0, *s1; const char **map; int i = 0; int j = 0; if (!term_pre (zebra_maps, reg_type, src, "#", "#")) return 0; s0 = *src; while (*s0) { if (*s0 == '#') { dst[i++] = '.'; dst[i++] = '*'; dst_term[j++] = *s0++; } else { s1 = s0; map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); if (space_split && **map == *CHR_SPACE) break; while (s1 < s0) { if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; } } } dst[i] = '\0'; dst_term[j++] = '\0'; *src = s0; return i;}/* term_103: handle term, where trunc=re-2 (regular expressions) */static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int *errors, int space_split, char *dst_term){ int i = 0; int j = 0; const char *s0, *s1; const char **map; if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "(")) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && isdigit (s0[1])) { *errors = s0[1] - '0'; s0 += 3; if (*errors > 3) *errors = 3; } while (*s0) { if (strchr ("^\\()[].*+?|-", *s0)) { dst_term[j++] = *s0; dst[i++] = *s0++; } else { s1 = s0; map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); if (**map == *CHR_SPACE) break; while (s1 < s0) { if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; } } } dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; return i;}/* term_103: handle term, where trunc=re-1 (regular expressions) */static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term){ return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split, dst_term);}/* term_104: handle term, where trunc=Process # and ! */static int term_104 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term){ const char *s0, *s1; const char **map; int i = 0; int j = 0; if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#")) return 0; s0 = *src; while (*s0) { if (*s0 == '?') { dst_term[j++] = *s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); dst_term[j++] = *s0++; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -