📄 kinput.c
字号:
printf ("sub1: "); pkey(*dst, *insertMode);#endif (*dst) += sizeof(struct it_key); p->first_in_list = 0; return 1; } strcpy (p->prev_name, p->cur_name); if (!(p->more = heap_read_one (hi, p->cur_name, p->key))) return 0; if (*p->cur_name && strcmp (p->cur_name, p->prev_name)) { p->first_in_list = 1; return 0; } *insertMode = p->key[0]; memcpy (*dst, p->key+1, sizeof(struct it_key));#if PR_KEY printf ("sub2: "); pkey(*dst, *insertMode);#endif (*dst) += sizeof(struct it_key); return 1;}int heap_inpc (struct heap_info *hi){ struct heap_cread_info hci; ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i)); hci.key = (char *) xmalloc (KEY_SIZE); hci.key_1 = (char *) xmalloc (KEY_SIZE); hci.key_2 = (char *) xmalloc (KEY_SIZE); hci.ret = -1; hci.first_in_list = 1; hci.hi = hi; hci.more = heap_read_one (hi, hci.cur_name, hci.key); isamc_i->clientData = &hci; isamc_i->read_item = heap_cread_item2; while (hci.more) { char this_name[INP_NAME_MAX]; ISAMC_P isamc_p, isamc_p2; char *dict_info; strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); hi->no_diffs++; if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P)); isamc_p2 = isc_merge (hi->reg->isamc, isamc_p, isamc_i); if (!isamc_p2) { hi->no_deletions++; if (!dict_delete (hi->reg->dict, this_name)) abort(); } else { hi->no_updates++; if (isamc_p2 != isamc_p) dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p2); } } else { isamc_p = isc_merge (hi->reg->isamc, 0, isamc_i); hi->no_insertions++; dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p); } } xfree (isamc_i); xfree (hci.key); xfree (hci.key_1); xfree (hci.key_2); return 0;} #if 0/* for debugging only */static void print_dict_item (ZebraMaps zm, const char *s){ int reg_type = s[1]; char keybuf[IT_MAX_WORD+1]; char *to = keybuf; const char *from = s + 2; while (*from) { const char *res = zebra_maps_output (zm, reg_type, &from); if (!res) *to++ = *from++; else while (*res) *to++ = *res++; } *to = '\0'; yaz_log (LOG_LOG, "%s", keybuf);}#endifint heap_inpb (struct heap_info *hi){ struct heap_cread_info hci; ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i)); hci.key = (char *) xmalloc (KEY_SIZE); hci.key_1 = (char *) xmalloc (KEY_SIZE); hci.key_2 = (char *) xmalloc (KEY_SIZE); hci.ret = -1; hci.first_in_list = 1; hci.hi = hi; hci.more = heap_read_one (hi, hci.cur_name, hci.key); isamc_i->clientData = &hci; isamc_i->read_item = heap_cread_item2; while (hci.more) { char this_name[INP_NAME_MAX]; ISAMC_P isamc_p, isamc_p2; char *dict_info; strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); hi->no_diffs++;#if 0 print_dict_item (hi->reg->zebra_maps, hci.cur_name);#endif if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P)); isamc_p2 = isamb_merge (hi->reg->isamb, isamc_p, isamc_i); if (!isamc_p2) { hi->no_deletions++; if (!dict_delete (hi->reg->dict, this_name)) abort(); } else { hi->no_updates++; if (isamc_p2 != isamc_p) dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p2); } } else { isamc_p = isamb_merge (hi->reg->isamb, 0, isamc_i); hi->no_insertions++; dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p); } } xfree (isamc_i); xfree (hci.key); xfree (hci.key_1); xfree (hci.key_2); return 0;} int heap_inpd (struct heap_info *hi){ struct heap_cread_info hci; ISAMD_I isamd_i = (ISAMD_I) xmalloc (sizeof(*isamd_i)); hci.key = (char *) xmalloc (KEY_SIZE); hci.key_1 = (char *) xmalloc (KEY_SIZE); hci.key_2 = (char *) xmalloc (KEY_SIZE); hci.ret = -1; hci.first_in_list = 1; hci.hi = hi; hci.more = heap_read_one (hi, hci.cur_name, hci.key); isamd_i->clientData = &hci; isamd_i->read_item = heap_cread_item; while (hci.more) { char this_name[INP_NAME_MAX]; char *dict_info; char dictentry[ISAMD_MAX_DICT_LEN+1]; char dictlen; strcpy (this_name, hci.cur_name); /* print_dict_item (hi->reg->zebra_maps, hci.cur_name); */ /*!*/ /* FIXME: depend on isamd-debug */ assert (hci.cur_name[1]); hi->no_diffs++; if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { dictlen=dict_info[0]; memcpy (dictentry, dict_info+1, dictlen );#ifdef SKIPTHIS logf(LOG_LOG,"dictentry before. len=%d: %d %d %d %d %d %d %d %d %d", dictlen,dictentry[0], dictentry[1], dictentry[2], dictentry[3], dictentry[4], dictentry[5], dictentry[6], dictentry[7], dictentry[8]); /*!*/#endif dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i); /* logf dictentry after */ if (dictlen) { hi->no_updates++; if ( (dictlen!=dict_info[0]) || (0!=memcmp(dictentry, dict_info+1, dictlen)) ) { dict_insert(hi->reg->dict, this_name, dictlen,dictentry); } } else { hi->no_deletions++; if (!dict_delete (hi->reg->dict, this_name)) { logf (LOG_FATAL, "dict_delete failed"); abort(); } } } else { dictlen=0; memset (dictentry, '\0', ISAMD_MAX_DICT_LEN); dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i); /* logf dictentry first */ hi->no_insertions++; if (dictlen) dict_insert(hi->reg->dict, this_name, dictlen,dictentry); } } xfree (isamd_i); xfree (hci.key); xfree (hci.key_1); xfree (hci.key_2); return 0;} int heap_inp (struct heap_info *hi){ char *info; char next_name[INP_NAME_MAX]; char cur_name[INP_NAME_MAX]; int key_buf_size = INP_BUF_START; int key_buf_ptr; char *next_key; char *key_buf; int more; next_key = (char *) xmalloc (KEY_SIZE); key_buf = (char *) xmalloc (key_buf_size); more = heap_read_one (hi, cur_name, key_buf); while (more) /* EOF ? */ { int nmemb; key_buf_ptr = KEY_SIZE; while (1) { if (!(more = heap_read_one (hi, next_name, next_key))) break; if (*next_name && strcmp (next_name, cur_name)) break; memcpy (key_buf + key_buf_ptr, next_key, KEY_SIZE); key_buf_ptr += KEY_SIZE; if (key_buf_ptr+(int) KEY_SIZE >= key_buf_size) { char *new_key_buf; new_key_buf = (char *) xmalloc (key_buf_size + INP_BUF_ADD); memcpy (new_key_buf, key_buf, key_buf_size); key_buf_size += INP_BUF_ADD; xfree (key_buf); key_buf = new_key_buf; } } hi->no_diffs++; nmemb = key_buf_ptr / KEY_SIZE; assert (nmemb * (int) KEY_SIZE == key_buf_ptr); if ((info = dict_lookup (hi->reg->dict, cur_name))) { ISAM_P isam_p, isam_p2; memcpy (&isam_p, info+1, sizeof(ISAM_P)); isam_p2 = is_merge (hi->reg->isam, isam_p, nmemb, key_buf); if (!isam_p2) { hi->no_deletions++; if (!dict_delete (hi->reg->dict, cur_name)) abort (); } else { hi->no_updates++; if (isam_p2 != isam_p) dict_insert (hi->reg->dict, cur_name, sizeof(ISAM_P), &isam_p2); } } else { ISAM_P isam_p; hi->no_insertions++; isam_p = is_merge (hi->reg->isam, 0, nmemb, key_buf); dict_insert (hi->reg->dict, cur_name, sizeof(ISAM_P), &isam_p); } memcpy (key_buf, next_key, KEY_SIZE); strcpy (cur_name, next_name); } return 0;}int heap_inps (struct heap_info *hi){ struct heap_cread_info hci; ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i)); hci.key = (char *) xmalloc (KEY_SIZE); hci.key_1 = (char *) xmalloc (KEY_SIZE); hci.key_2 = (char *) xmalloc (KEY_SIZE); hci.first_in_list = 1; hci.ret = -1; hci.hi = hi; hci.more = heap_read_one (hi, hci.cur_name, hci.key); isams_i->clientData = &hci; isams_i->read_item = heap_cread_item; while (hci.more) { char this_name[INP_NAME_MAX]; ISAMS_P isams_p; char *dict_info; strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); hi->no_diffs++; if (!(dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { isams_p = isams_merge (hi->reg->isams, isams_i); hi->no_insertions++; dict_insert (hi->reg->dict, this_name, sizeof(ISAMS_P), &isams_p); } else { logf (LOG_FATAL, "isams doesn't support this kind of update"); break; } } xfree (isams_i); return 0;} struct progressInfo { time_t startTime; time_t lastTime; off_t totalBytes; off_t totalOffset;};void progressFunc (struct key_file *keyp, void *info){ struct progressInfo *p = (struct progressInfo *) info; time_t now, remaining; if (keyp->buf_size <= 0 || p->totalBytes <= 0) return ; time (&now); if (now >= p->lastTime+10) { p->lastTime = now; remaining = (time_t) ((now - p->startTime)* ((double) p->totalBytes/p->totalOffset - 1.0)); if (remaining <= 130) logf (LOG_LOG, "Merge %2.1f%% completed; %ld seconds remaining", (100.0*p->totalOffset) / p->totalBytes, (long) remaining); else logf (LOG_LOG, "Merge %2.1f%% completed; %ld minutes remaining", (100.0*p->totalOffset) / p->totalBytes, (long) remaining/60); } p->totalOffset += keyp->buf_size;}#ifndef R_OK#define R_OK 4#endifvoid zebra_index_merge (ZebraHandle zh){ struct key_file **kf; char rbuf[1024]; int i, r; struct heap_info *hi; struct progressInfo progressInfo; int nkeys = zh->reg->key_file_no; if (nkeys < 0) { char fname[1024]; nkeys = 0; while (1) { extract_get_fname_tmp (zh, fname, nkeys+1); if (access (fname, R_OK) == -1) break; nkeys++; } if (!nkeys) return ; } kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf)); progressInfo.totalBytes = 0; progressInfo.totalOffset = 0; time (&progressInfo.startTime); time (&progressInfo.lastTime); for (i = 1; i<=nkeys; i++) { kf[i] = key_file_init (i, 8192, zh->res); kf[i]->readHandler = progressFunc; kf[i]->readInfo = &progressInfo; progressInfo.totalBytes += kf[i]->length; progressInfo.totalOffset += kf[i]->buf_size; } hi = key_heap_init (nkeys, key_qsort_compare); hi->reg = zh->reg; for (i = 1; i<=nkeys; i++) if ((r = key_file_read (kf[i], rbuf))) key_heap_insert (hi, rbuf, r, kf[i]); if (zh->reg->isams) heap_inps (hi); if (zh->reg->isamc) heap_inpc (hi); if (zh->reg->isam) heap_inp (hi); if (zh->reg->isamd) heap_inpd (hi); if (zh->reg->isamb) heap_inpb (hi); for (i = 1; i<=nkeys; i++) { extract_get_fname_tmp (zh, rbuf, i); unlink (rbuf); } logf (LOG_LOG, "Iterations . . .%7d", hi->no_iterations); logf (LOG_LOG, "Distinct words .%7d", hi->no_diffs); logf (LOG_LOG, "Updates. . . . .%7d", hi->no_updates); logf (LOG_LOG, "Deletions. . . .%7d", hi->no_deletions); logf (LOG_LOG, "Insertions . . .%7d", hi->no_insertions); zh->reg->key_file_no = 0; key_heap_destroy (hi, nkeys); for (i = 1; i<=nkeys; i++) key_file_destroy (kf[i]); xfree (kf);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -