📄 extract.c
字号:
qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); extract_get_fname_tmp (out_fname, key_file_no); if (!(outf = fopen (out_fname, "wb"))) { logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname); exit (1); } logf (LOG_LOG, "writing section %d", key_file_no); i = ptr_i; prevcp = key_buf[ptr_top-i]; while (1) if (!--i || strcmp (prevcp, key_buf[ptr_top-i])) { key_y_len = strlen(prevcp)+1;#if 0 logf (LOG_LOG, "key_y_len: %2d %02x %02x %s", key_y_len, prevcp[0], prevcp[1], 2+prevcp);#endif qsort (key_buf + ptr_top-ptr_i, ptr_i - i, sizeof(char*), key_y_compare); cp = key_buf[ptr_top-ptr_i]; --key_y_len; encode_key_init (&encode_info); encode_key_write (cp, &encode_info, outf); while (--ptr_i > i) { cp = key_buf[ptr_top-ptr_i]; encode_key_write (cp+key_y_len, &encode_info, outf); } encode_key_flush ( &encode_info, outf); if (!i) break; prevcp = key_buf[ptr_top-ptr_i]; }#endif if (fclose (outf)) { logf (LOG_FATAL|LOG_ERRNO, "fclose %s", out_fname); exit (1); } logf (LOG_LOG, "finished section %d", zh->reg->key_file_no); zh->reg->ptr_i = 0; zh->reg->key_buf_used = 0;}void extract_add_index_string (RecWord *p, const char *string, int length){ char *dst; unsigned char attrSet; unsigned short attrUse; int lead = 0; int diff = 0; int *pseqno = &p->seqno; ZebraHandle zh = p->extractCtrl->handle; ZebraExplainInfo zei = zh->reg->zei; struct recKeys *keys = &zh->reg->keys; if (keys->buf_used+1024 > keys->buf_max) { char *b; b = (char *) xmalloc (keys->buf_max += 128000); if (keys->buf_used > 0) memcpy (b, keys->buf, keys->buf_used); xfree (keys->buf); keys->buf = b; } dst = keys->buf + keys->buf_used; attrSet = p->attrSet; if (keys->buf_used > 0 && keys->prevAttrSet == attrSet) lead |= 1; else keys->prevAttrSet = attrSet; attrUse = p->attrUse; if (keys->buf_used > 0 && keys->prevAttrUse == attrUse) lead |= 2; else keys->prevAttrUse = attrUse;#if 1 diff = 1 + *pseqno - keys->prevSeqNo; if (diff >= 1 && diff <= 15) lead |= (diff << 2); else diff = 0;#endif keys->prevSeqNo = *pseqno; *dst++ = lead;#if SU_SCHEME if ((lead & 3) < 3) { int ch = zebraExplain_lookupSU (zei, attrSet, attrUse); if (ch < 0) { ch = zebraExplain_addSU (zei, attrSet, attrUse); yaz_log (LOG_DEBUG, "addSU set=%d use=%d SU=%d", attrSet, attrUse, ch); } assert (ch > 0); memcpy (dst, &ch, sizeof(ch)); dst += sizeof(ch); }#else if (!(lead & 1)) { memcpy (dst, &attrSet, sizeof(attrSet)); dst += sizeof(attrSet); } if (!(lead & 2)) { memcpy (dst, &attrUse, sizeof(attrUse)); dst += sizeof(attrUse); }#endif *dst++ = p->reg_type; memcpy (dst, string, length); dst += length; *dst++ = '\0'; if (!diff) { memcpy (dst, pseqno, sizeof(*pseqno)); dst += sizeof(*pseqno); } keys->buf_used = dst - keys->buf;}static void extract_add_sort_string (RecWord *p, const char *string, int length){ ZebraHandle zh = p->extractCtrl->handle; struct sortKeys *sk = &zh->reg->sortKeys; int off = 0; while (off < sk->buf_used) { int set, use, slen; off += key_SU_decode(&set, sk->buf + off); off += key_SU_decode(&use, sk->buf + off); off += key_SU_decode(&slen, sk->buf + off); off += slen; if (p->attrSet == set && p->attrUse == use) return; } assert (off == sk->buf_used); if (sk->buf_used + IT_MAX_WORD > sk->buf_max) { char *b; b = (char *) xmalloc (sk->buf_max += 128000); if (sk->buf_used > 0) memcpy (b, sk->buf, sk->buf_used); xfree (sk->buf); sk->buf = b; } off += key_SU_encode(p->attrSet, sk->buf + off); off += key_SU_encode(p->attrUse, sk->buf + off); off += key_SU_encode(length, sk->buf + off); memcpy (sk->buf + off, string, length); sk->buf_used = off + length;}void extract_add_string (RecWord *p, const char *string, int length){ assert (length > 0); if (zebra_maps_is_sort (p->zebra_maps, p->reg_type)) extract_add_sort_string (p, string, length); else extract_add_index_string (p, string, length);}static void extract_add_incomplete_field (RecWord *p){ const char *b = p->string; int remain = p->length; const char **map = 0; if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); while (map) { char buf[IT_MAX_WORD+1]; int i, remain; /* Skip spaces */ while (map && *map && **map == *CHR_SPACE) { remain = p->length - (b - p->string); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); else map = 0; } if (!map) break; i = 0; while (map && *map && **map != *CHR_SPACE) { const char *cp = *map; while (i < IT_MAX_WORD && *cp) buf[i++] = *(cp++); remain = p->length - (b - p->string); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); else map = 0; } if (!i) return; extract_add_string (p, buf, i); p->seqno++; }}static void extract_add_complete_field (RecWord *p){ const char *b = p->string; char buf[IT_MAX_WORD+1]; const char **map = 0; int i = 0, remain = p->length; if (remain > 0) map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain); while (remain > 0 && i < IT_MAX_WORD) { while (map && *map && **map == *CHR_SPACE) { remain = p->length - (b - p->string); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); else map = 0; } if (!map) break; if (i && i < IT_MAX_WORD) buf[i++] = *CHR_SPACE; while (map && *map && **map != *CHR_SPACE) { const char *cp = *map; if (i >= IT_MAX_WORD) break; while (i < IT_MAX_WORD && *cp) buf[i++] = *(cp++); remain = p->length - (b - p->string); if (remain > 0) map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain); else map = 0; } } if (!i) return; extract_add_string (p, buf, i);}void extract_token_add (RecWord *p){ WRBUF wrbuf;#if 0 yaz_log (LOG_LOG, "token_add " "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s", p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length, p->string);#endif if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, p->string, p->length))) { p->string = wrbuf_buf(wrbuf); p->length = wrbuf_len(wrbuf); } if (zebra_maps_is_complete (p->zebra_maps, p->reg_type)) extract_add_complete_field (p); else extract_add_incomplete_field(p);}void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid){ ZebraHandle zh = (ZebraHandle) (p->handle); zebraExplain_addSchema (zh->reg->zei, oid);}void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct sortKeys *sk){ SortIdx sortIdx = zh->reg->sortIdx; int off = 0; sortIdx_sysno (sortIdx, sysno); while (off < sk->buf_used) { int set, use, slen; off += key_SU_decode(&set, sk->buf + off); off += key_SU_decode(&use, sk->buf + off); off += key_SU_decode(&slen, sk->buf + off); sortIdx_type(sortIdx, use); if (cmd == 1) sortIdx_add(sortIdx, sk->buf + off, slen); else sortIdx_add(sortIdx, "", 1); off += slen; }}void encode_key_init (struct encode_info *i){ i->sysno = 0; i->seqno = 0; i->cmd = -1; i->prevsys=0; i->prevseq=0; i->prevcmd=-1; i->keylen=0;}char *encode_key_int (int d, char *bp){ if (d <= 63) *bp++ = d; else if (d <= 16383) { *bp++ = 64 + (d>>8); *bp++ = d & 255; } else if (d <= 4194303) { *bp++ = 128 + (d>>16); *bp++ = (d>>8) & 255; *bp++ = d & 255; } else { *bp++ = 192 + (d>>24); *bp++ = (d>>16) & 255; *bp++ = (d>>8) & 255; *bp++ = d & 255; } return bp;}#define OLDENCODE 1#ifdef OLDENCODE/* this is the old encode_key_write * may be deleted once we are confident that the new works * HL 15-oct-2002 */void encode_key_write (char *k, struct encode_info *i, FILE *outf){ struct it_key key; char *bp = i->buf; while ((*bp++ = *k++)) ; memcpy (&key, k+1, sizeof(struct it_key)); bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp); if (i->sysno != key.sysno) { i->sysno = key.sysno; i->seqno = 0; } else if (!i->seqno && !key.seqno && i->cmd == *k) return; bp = encode_key_int (key.seqno - i->seqno, bp); i->seqno = key.seqno; i->cmd = *k; if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) { logf (LOG_FATAL|LOG_ERRNO, "fwrite"); exit (1); }}void encode_key_flush (struct encode_info *i, FILE *outf){ /* dummy routine */}#else/* new encode_key_write * The idea is to buffer one more key, and compare them * If we are going to delete and insert the same key, * we may as well not bother. Should make a difference in * updates with small modifications (appending to a mbox) */void encode_key_write (char *k, struct encode_info *i, FILE *outf){ struct it_key key; char *bp; if (*k) /* first time for new key */ { bp = i->buf; while ((*bp++ = *k++)) ; i->keylen= bp - i->buf -1; assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN); } else { bp=i->buf + i->keylen; *bp++=0; k++; } memcpy (&key, k+1, sizeof(struct it_key)); if (0==i->prevsys) /* no previous filter, fill up */ { i->prevsys=key.sysno; i->prevseq=key.seqno; i->prevcmd=*k; } else if ( (i->prevsys==key.sysno) && (i->prevseq==key.seqno) && (i->prevcmd!=*k) ) { /* same numbers, diff cmd, they cancel out */ i->prevsys=0; } else { /* different stuff, write previous, move buf */ bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); if (i->sysno != i->prevsys) { i->sysno = i->prevsys; i->seqno = 0; } else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) { return; /* ??? Filters some sort of duplicates away */ /* ??? Can this ever happen -H 15oct02 */ } bp = encode_key_int (i->prevseq - i->seqno, bp); i->seqno = i->prevseq; i->cmd = i->prevcmd; if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) { logf (LOG_FATAL|LOG_ERRNO, "fwrite"); exit (1); } i->keylen=0; /* ok, it's written, forget it */ i->prevsys=key.sysno; i->prevseq=key.seqno; i->prevcmd=*k; }}void encode_key_flush (struct encode_info *i, FILE *outf){ /* flush the last key from i */ char *bp =i->buf + i->keylen; if (0==i->prevsys) { return; /* nothing to flush */ } *bp++=0; bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); if (i->sysno != i->prevsys) { i->sysno = i->prevsys; i->seqno = 0; } else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) { return; /* ??? Filters some sort of duplicates away */ /* ??? Can this ever happen -H 15oct02 */ } bp = encode_key_int (i->prevseq - i->seqno, bp); i->seqno = i->prevseq; i->cmd = i->prevcmd; if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) { logf (LOG_FATAL|LOG_ERRNO, "fwrite"); exit (1); } i->keylen=0; /* ok, it's written, forget it */ i->prevsys=0; /* forget the values too */ i->prevseq=0;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -