⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kinput.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
        printf ("sub1: ");        pkey(*dst, *insertMode);#endif        (*dst) += sizeof(struct it_key);        p->first_in_list = 0;        return 1;    }    strcpy (p->prev_name, p->cur_name);    if (!(p->more = heap_read_one (hi, p->cur_name, p->key)))        return 0;    if (*p->cur_name && strcmp (p->cur_name, p->prev_name))    {        p->first_in_list = 1;        return 0;    }    *insertMode = p->key[0];    memcpy (*dst, p->key+1, sizeof(struct it_key));#if PR_KEY    printf ("sub2: ");    pkey(*dst, *insertMode);#endif    (*dst) += sizeof(struct it_key);    return 1;}int heap_inpc (struct heap_info *hi){    struct heap_cread_info hci;    ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i));    hci.key = (char *) xmalloc (KEY_SIZE);    hci.key_1 = (char *) xmalloc (KEY_SIZE);    hci.key_2 = (char *) xmalloc (KEY_SIZE);    hci.ret = -1;    hci.first_in_list = 1;    hci.hi = hi;    hci.more = heap_read_one (hi, hci.cur_name, hci.key);    isamc_i->clientData = &hci;    isamc_i->read_item = heap_cread_item2;    while (hci.more)    {        char this_name[INP_NAME_MAX];        ISAMC_P isamc_p, isamc_p2;        char *dict_info;        strcpy (this_name, hci.cur_name);	assert (hci.cur_name[1]);        hi->no_diffs++;        if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))        {            memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P));            isamc_p2 = isc_merge (hi->reg->isamc, isamc_p, isamc_i);            if (!isamc_p2)            {                hi->no_deletions++;                if (!dict_delete (hi->reg->dict, this_name))                    abort();            }            else             {                hi->no_updates++;                if (isamc_p2 != isamc_p)                    dict_insert (hi->reg->dict, this_name,                                 sizeof(ISAMC_P), &isamc_p2);            }        }         else        {            isamc_p = isc_merge (hi->reg->isamc, 0, isamc_i);            hi->no_insertions++;            dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p);        }    }    xfree (isamc_i);    xfree (hci.key);    xfree (hci.key_1);    xfree (hci.key_2);    return 0;} #if 0/* for debugging only */static void print_dict_item (ZebraMaps zm, const char *s){    int reg_type = s[1];    char keybuf[IT_MAX_WORD+1];    char *to = keybuf;    const char *from = s + 2;    while (*from)    {        const char *res = zebra_maps_output (zm, reg_type, &from);        if (!res)            *to++ = *from++;        else            while (*res)                *to++ = *res++;    }    *to = '\0';    yaz_log (LOG_LOG, "%s", keybuf);}#endifint heap_inpb (struct heap_info *hi){    struct heap_cread_info hci;    ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i));    hci.key = (char *) xmalloc (KEY_SIZE);    hci.key_1 = (char *) xmalloc (KEY_SIZE);    hci.key_2 = (char *) xmalloc (KEY_SIZE);    hci.ret = -1;    hci.first_in_list = 1;    hci.hi = hi;    hci.more = heap_read_one (hi, hci.cur_name, hci.key);    isamc_i->clientData = &hci;    isamc_i->read_item = heap_cread_item2;    while (hci.more)    {        char this_name[INP_NAME_MAX];        ISAMC_P isamc_p, isamc_p2;        char *dict_info;        strcpy (this_name, hci.cur_name);	assert (hci.cur_name[1]);        hi->no_diffs++;#if 0        print_dict_item (hi->reg->zebra_maps, hci.cur_name);#endif        if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))        {            memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P));            isamc_p2 = isamb_merge (hi->reg->isamb, isamc_p, isamc_i);            if (!isamc_p2)            {                hi->no_deletions++;                if (!dict_delete (hi->reg->dict, this_name))                    abort();            }            else             {                hi->no_updates++;                if (isamc_p2 != isamc_p)                    dict_insert (hi->reg->dict, this_name,                                 sizeof(ISAMC_P), &isamc_p2);            }        }         else        {            isamc_p = isamb_merge (hi->reg->isamb, 0, isamc_i);            hi->no_insertions++;            dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p);        }    }    xfree (isamc_i);    xfree (hci.key);    xfree (hci.key_1);    xfree (hci.key_2);    return 0;} int heap_inpd (struct heap_info *hi){    struct heap_cread_info hci;    ISAMD_I isamd_i = (ISAMD_I) xmalloc (sizeof(*isamd_i));    hci.key = (char *) xmalloc (KEY_SIZE);    hci.key_1 = (char *) xmalloc (KEY_SIZE);    hci.key_2 = (char *) xmalloc (KEY_SIZE);    hci.ret = -1;    hci.first_in_list = 1;    hci.hi = hi;    hci.more = heap_read_one (hi, hci.cur_name, hci.key);    isamd_i->clientData = &hci;    isamd_i->read_item = heap_cread_item;    while (hci.more)    {        char this_name[INP_NAME_MAX];        char *dict_info;        char dictentry[ISAMD_MAX_DICT_LEN+1];        char dictlen;        strcpy (this_name, hci.cur_name);                /* print_dict_item (hi->reg->zebra_maps, hci.cur_name); */        /*!*/ /* FIXME: depend on isamd-debug */	assert (hci.cur_name[1]);        hi->no_diffs++;        if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))        {            dictlen=dict_info[0];            memcpy (dictentry, dict_info+1, dictlen );#ifdef SKIPTHIS            logf(LOG_LOG,"dictentry before. len=%d: %d %d %d %d %d %d %d %d %d",               dictlen,dictentry[0], dictentry[1], dictentry[2],                       dictentry[3], dictentry[4], dictentry[5],                       dictentry[6], dictentry[7], dictentry[8]); /*!*/#endif            dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i);             /* logf dictentry after */            if (dictlen)            {                hi->no_updates++;                if ( (dictlen!=dict_info[0]) ||                     (0!=memcmp(dictentry, dict_info+1, dictlen)) )                {                    dict_insert(hi->reg->dict, this_name,                                dictlen,dictentry);                }            }            else            {                hi->no_deletions++;                if (!dict_delete (hi->reg->dict, this_name))                 {	            logf (LOG_FATAL, "dict_delete failed");                    abort();                }            }        }         else        {            dictlen=0;            memset (dictentry, '\0', ISAMD_MAX_DICT_LEN);            dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i);             /* logf dictentry first */            hi->no_insertions++;            if (dictlen)                dict_insert(hi->reg->dict, this_name,                                dictlen,dictentry);        }    }    xfree (isamd_i);    xfree (hci.key);    xfree (hci.key_1);    xfree (hci.key_2);    return 0;} int heap_inp (struct heap_info *hi){    char *info;    char next_name[INP_NAME_MAX];    char cur_name[INP_NAME_MAX];    int key_buf_size = INP_BUF_START;    int key_buf_ptr;    char *next_key;    char *key_buf;    int more;        next_key = (char *) xmalloc (KEY_SIZE);    key_buf = (char *) xmalloc (key_buf_size);    more = heap_read_one (hi, cur_name, key_buf);    while (more)                   /* EOF ? */    {        int nmemb;        key_buf_ptr = KEY_SIZE;        while (1)        {            if (!(more = heap_read_one (hi, next_name, next_key)))                break;            if (*next_name && strcmp (next_name, cur_name))                break;            memcpy (key_buf + key_buf_ptr, next_key, KEY_SIZE);            key_buf_ptr += KEY_SIZE;            if (key_buf_ptr+(int) KEY_SIZE >= key_buf_size)            {                char *new_key_buf;                new_key_buf = (char *) xmalloc (key_buf_size + INP_BUF_ADD);                memcpy (new_key_buf, key_buf, key_buf_size);                key_buf_size += INP_BUF_ADD;                xfree (key_buf);                key_buf = new_key_buf;            }        }        hi->no_diffs++;        nmemb = key_buf_ptr / KEY_SIZE;        assert (nmemb * (int) KEY_SIZE == key_buf_ptr);        if ((info = dict_lookup (hi->reg->dict, cur_name)))        {            ISAM_P isam_p, isam_p2;            memcpy (&isam_p, info+1, sizeof(ISAM_P));            isam_p2 = is_merge (hi->reg->isam, isam_p, nmemb, key_buf);            if (!isam_p2)            {                hi->no_deletions++;                if (!dict_delete (hi->reg->dict, cur_name))                    abort ();            }            else             {                hi->no_updates++;                if (isam_p2 != isam_p)                    dict_insert (hi->reg->dict, cur_name,                                 sizeof(ISAM_P), &isam_p2);            }        }        else        {            ISAM_P isam_p;            hi->no_insertions++;            isam_p = is_merge (hi->reg->isam, 0, nmemb, key_buf);            dict_insert (hi->reg->dict, cur_name, sizeof(ISAM_P), &isam_p);        }        memcpy (key_buf, next_key, KEY_SIZE);        strcpy (cur_name, next_name);    }    return 0;}int heap_inps (struct heap_info *hi){    struct heap_cread_info hci;    ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i));    hci.key = (char *) xmalloc (KEY_SIZE);    hci.key_1 = (char *) xmalloc (KEY_SIZE);    hci.key_2 = (char *) xmalloc (KEY_SIZE);    hci.first_in_list = 1;    hci.ret = -1;    hci.hi = hi;    hci.more = heap_read_one (hi, hci.cur_name, hci.key);    isams_i->clientData = &hci;    isams_i->read_item = heap_cread_item;    while (hci.more)    {        char this_name[INP_NAME_MAX];        ISAMS_P isams_p;        char *dict_info;        strcpy (this_name, hci.cur_name);	assert (hci.cur_name[1]);        hi->no_diffs++;        if (!(dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))        {            isams_p = isams_merge (hi->reg->isams, isams_i);            hi->no_insertions++;            dict_insert (hi->reg->dict, this_name, sizeof(ISAMS_P), &isams_p);        }	else	{	    logf (LOG_FATAL, "isams doesn't support this kind of update");	    break;	}    }    xfree (isams_i);    return 0;} struct progressInfo {    time_t   startTime;    time_t   lastTime;    off_t    totalBytes;    off_t    totalOffset;};void progressFunc (struct key_file *keyp, void *info){    struct progressInfo *p = (struct progressInfo *) info;    time_t now, remaining;    if (keyp->buf_size <= 0 || p->totalBytes <= 0)        return ;    time (&now);    if (now >= p->lastTime+10)    {        p->lastTime = now;        remaining = (time_t) ((now - p->startTime)*            ((double) p->totalBytes/p->totalOffset - 1.0));        if (remaining <= 130)            logf (LOG_LOG, "Merge %2.1f%% completed; %ld seconds remaining",                 (100.0*p->totalOffset) / p->totalBytes, (long) remaining);        else            logf (LOG_LOG, "Merge %2.1f%% completed; %ld minutes remaining",	         (100.0*p->totalOffset) / p->totalBytes, (long) remaining/60);    }    p->totalOffset += keyp->buf_size;}#ifndef R_OK#define R_OK 4#endifvoid zebra_index_merge (ZebraHandle zh){    struct key_file **kf;    char rbuf[1024];    int i, r;    struct heap_info *hi;    struct progressInfo progressInfo;    int nkeys = zh->reg->key_file_no;        if (nkeys < 0)    {        char fname[1024];        nkeys = 0;        while (1)        {            extract_get_fname_tmp  (zh, fname, nkeys+1);            if (access (fname, R_OK) == -1)                break;            nkeys++;        }        if (!nkeys)            return ;    }    kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf));    progressInfo.totalBytes = 0;    progressInfo.totalOffset = 0;    time (&progressInfo.startTime);    time (&progressInfo.lastTime);    for (i = 1; i<=nkeys; i++)    {        kf[i] = key_file_init (i, 8192, zh->res);        kf[i]->readHandler = progressFunc;        kf[i]->readInfo = &progressInfo;        progressInfo.totalBytes += kf[i]->length;        progressInfo.totalOffset += kf[i]->buf_size;    }    hi = key_heap_init (nkeys, key_qsort_compare);    hi->reg = zh->reg;        for (i = 1; i<=nkeys; i++)        if ((r = key_file_read (kf[i], rbuf)))            key_heap_insert (hi, rbuf, r, kf[i]);    if (zh->reg->isams)	heap_inps (hi);    if (zh->reg->isamc)        heap_inpc (hi);    if (zh->reg->isam)	heap_inp (hi);    if (zh->reg->isamd)	heap_inpd (hi);    if (zh->reg->isamb)	heap_inpb (hi);	    for (i = 1; i<=nkeys; i++)    {        extract_get_fname_tmp  (zh, rbuf, i);        unlink (rbuf);    }    logf (LOG_LOG, "Iterations . . .%7d", hi->no_iterations);    logf (LOG_LOG, "Distinct words .%7d", hi->no_diffs);    logf (LOG_LOG, "Updates. . . . .%7d", hi->no_updates);    logf (LOG_LOG, "Deletions. . . .%7d", hi->no_deletions);    logf (LOG_LOG, "Insertions . . .%7d", hi->no_insertions);    zh->reg->key_file_no = 0;    key_heap_destroy (hi, nkeys);    for (i = 1; i<=nkeys; i++)        key_file_destroy (kf[i]);    xfree (kf);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -