📄 zrpn.c
字号:
attent attp; data1_local_attribute id_xpath_attr; data1_local_attribute *local_attr; int max_pos, prefix_len = 0; termp = *term_sub; if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) { zh->errCode = 109; /* Database unavailable */ zh->errString = basenames[base_no]; return -1; } if (use_value == -2) /* string attribute (assume IDXPATH/any) */ { use_value = xpath_use; attp.local_attributes = &id_xpath_attr; attp.attset_ordinal = VAL_IDXPATH; id_xpath_attr.next = 0; id_xpath_attr.local = use_value; } else if (curAttributeSet == VAL_IDXPATH) { attp.local_attributes = &id_xpath_attr; attp.attset_ordinal = VAL_IDXPATH; id_xpath_attr.next = 0; id_xpath_attr.local = use_value; } else { if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value))) { logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d", curAttributeSet, use_value, r); if (r == -1) { /* set was found, but value wasn't defined */ char val_str[32]; sprintf (val_str, "%d", use_value); errCode = 114; errString = nmem_strdup (stream, val_str); } else { int oid[OID_SIZE]; struct oident oident; oident.proto = PROTO_Z3950; oident.oclass = CLASS_ATTSET; oident.value = curAttributeSet; oid_ent_to_oid (&oident, oid); errCode = 121; errString = nmem_strdup (stream, oident.desc); } continue; } } for (local_attr = attp.local_attributes; local_attr; local_attr = local_attr->next) { int ord; char ord_buf[32]; int i, ord_len; ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal, local_attr->local); if (ord < 0) continue; if (prefix_len) term_dict[prefix_len++] = '|'; else term_dict[prefix_len++] = '('; ord_len = key_SU_encode (ord, ord_buf); for (i = 0; i<ord_len; i++) { term_dict[prefix_len++] = 1; term_dict[prefix_len++] = ord_buf[i]; } } if (!prefix_len) { char val_str[32]; sprintf (val_str, "%d", use_value); errCode = 114; errString = nmem_strdup (stream, val_str); continue; } bases_ok++; /* this has OK attributes */ term_dict[prefix_len++] = ')'; term_dict[prefix_len++] = 1; term_dict[prefix_len++] = reg_type; logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]); term_dict[prefix_len] = '\0'; j = prefix_len; switch (truncation_value) { case -1: /* not specified */ case 100: /* do not truncate */ if (!string_relation (zh, zapt, &termp, term_dict, attributeSet, reg_type, space_split, term_dst)) return 0; logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep fail %d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; if (!term_100 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ".*)"); dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; case 2: /* keft truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; if (!term_100 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; case 3: /* left&right truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; if (!term_100 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ".*)"); dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; zh->errCode = 120; return -1; case 101: /* process # in term */ term_dict[j++] = '('; if (!term_101 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r); break; case 102: /* Regexp-1 */ term_dict[j++] = '('; if (!term_102 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d", r); break; case 103: /* Regexp-2 */ r = 1; term_dict[j++] = '('; if (!term_103 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, &r, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r); r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info, &max_pos, 2, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d", r); break; case 104: /* process # and ! in term */ term_dict[j++] = '('; if (!term_104 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r); break; case 105: /* process * and ! in term */ term_dict[j++] = '('; if (!term_105 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst, 1)) return 0; strcat (term_dict, ")"); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); break; case 106: /* process * and ! in term */ term_dict[j++] = '('; if (!term_105 (zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst, 0)) return 0; strcat (term_dict, ")"); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); break; } } if (!bases_ok) { zh->errCode = errCode; zh->errString = errString; return -1; } *term_sub = termp; logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); return 1;}/* convert APT search term to UTF8 */static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz){ size_t sizez; Z_Term *term = zapt->term; switch (term->which) { case Z_Term_general: if (zh->iconv_to_utf8 != 0) { char *inbuf = term->u.general->buf; size_t inleft = term->u.general->len; char *outbuf = termz; size_t outleft = IT_MAX_WORD-1; size_t ret; ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t)(-1)) { ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0); zh->errCode = 125; return -1; } *outbuf = 0; } else { sizez = term->u.general->len; if (sizez > IT_MAX_WORD-1) sizez = IT_MAX_WORD-1; memcpy (termz, term->u.general->buf, sizez); termz[sizez] = '\0'; } break; case Z_Term_characterString: sizez = strlen(term->u.characterString); if (sizez > IT_MAX_WORD-1) sizez = IT_MAX_WORD-1; memcpy (termz, term->u.characterString, sizez); termz[sizez] = '\0'; break; default: zh->errCode = 124; return -1; } return 0;}/* convert APT SCAN term to internal cmap */static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz, int reg_type){ char termz0[IT_MAX_WORD]; if (zapt_term_to_utf8(zh, zapt, termz0)) return -1; /* error */ else { const char **map; const char *cp = (const char *) termz0; const char *cp_end = cp + strlen(cp); const char *src; int i = 0; const char *space_map = NULL; int len; while ((len = (cp_end - cp)) > 0) { map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len); if (**map == *CHR_SPACE) space_map = *map; else { if (i && space_map) for (src = space_map; *src; src++) termz[i++] = *src; space_map = NULL; for (src = *map; *src; src++) termz[i++] = *src; } } termz[i] = '\0'; } return 0;}static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, int ordered, int exclusion, int relation, int distance){ int i; RSFD *rsfd; int *more; struct it_key **buf; RSET result; char prox_term[1024]; int length_prox_term = 0; int min_nn = 10000000; int term_index; int term_type = Z_Term_characterString; const char *flags = NULL; rsfd = (RSFD *) xmalloc (sizeof(*rsfd)*rset_no); more = (int *) xmalloc (sizeof(*more)*rset_no); buf = (struct it_key **) xmalloc (sizeof(*buf)*rset_no); *prox_term = '\0'; for (i = 0; i<rset_no; i++) { int j; for (j = 0; j<rset[i]->no_rset_terms; j++) { const char *nflags = rset[i]->rset_terms[j]->flags; char *term = rset[i]->rset_terms[j]->name; int lterm = strlen(term); if (lterm + length_prox_term < sizeof(prox_term)-1) { if (length_prox_term) prox_term[length_prox_term++] = ' '; strcpy (prox_term + length_prox_term, term); length_prox_term += lterm; } if (min_nn > rset[i]->rset_terms[j]->nn) min_nn = rset[i]->rset_terms[j]->nn; flags = nflags; term_type = rset[i]->rset_terms[j]->type; /* only if all term types are of type characterString .. */ /* the resulting term is of that type */ if (term_type != Z_Term_characterString) term_type = Z_Term_general; } } for (i = 0; i<rset_no; i++) { buf[i] = 0; rsfd[i] = 0; } for (i = 0; i<rset_no; i++) { buf[i] = (struct it_key *) xmalloc (sizeof(**buf)); rsfd[i] = rset_open (rset[i], RSETF_READ); if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index))) break; } if (i != rset_no) { /* at least one is empty ... return null set */ rset_null_parms parms; parms.rset_term = rset_term_create (prox_term, length_prox_term, flags, term_type); parms.rset_term->nn = 0; result = rset_create (rset_kind_null, &parms); } else if (ordered && relation == 3 && exclusion == 0 && distance == 1) { /* special proximity case = phrase search ... */ rset_temp_parms parms; RSFD rsfd_result; parms.rset_term = rset_term_create (prox_term, length_prox_term, flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); rsfd_result = rset_open (result, RSETF_WRITE); while (*more) { for (i = 1; i<rset_no; i++) { int cmp; if (!more[i]) { *more = 0; break; } cmp = key_compare_it (buf[i], buf[i-1]); if (cmp > 1) { more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1], &term_index); break; } else if (cmp == 1) { if (buf[i-1]->seqno+1 != buf[i]->seqno) { more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1], &term_index); break; } } else { more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index); break; } } if (i == rset_no) { rset_write (result, rsfd_result, buf[0]); more[0] = rset_read (*rset, *rsfd, *buf, &term_index); } } rset_close (result, rsfd_result); } else if (rset_no == 2) { /* generic proximity case (two input sets only) ... */ rset_temp_parms parms; RSFD rsfd_result; yaz_log (LOG_LOG, "generic prox, dist=%d, relation=%d, ordered=%d" ", exclusion=%d", distance, relation, ordered, exclusion); parms.rset_term = rset_term_create (prox_term, length_prox_term, flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); rsfd_result = rset_open (result, RSETF_WRITE); while (more[0] && more[1]) { int cmp = key_compare_it (buf[0], buf[1]); if (cmp < -1) more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index); else if (cmp > 1) more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index); else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -