⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trunc.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: trunc.c,v 1.28 2003/03/26 16:41:48 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#define NEW_TRUNC 1#include "index.h"#include <rstemp.h>#include <rsnull.h>#include <rsisams.h>#include <rsisam.h>#include <rsisamc.h>#include <rsisamd.h>#include <rsisamb.h>#if NEW_TRUNC#include <rsm_or.h>#endifstruct trunc_info {    int  *ptr;    int  *indx;    char **heap;    int  heapnum;    int  (*cmp)(const void *p1, const void *p2);    int  keysize;    char *swapbuf;    char *tmpbuf;    char *buf;};static void heap_swap (struct trunc_info *ti, int i1, int i2){    int swap;    swap = ti->ptr[i1];    ti->ptr[i1] = ti->ptr[i2];    ti->ptr[i2] = swap;}static void heap_delete (struct trunc_info *ti){    int cur = 1, child = 2;    heap_swap (ti, 1, ti->heapnum--);    while (child <= ti->heapnum) {        if (child < ti->heapnum &&            (*ti->cmp)(ti->heap[ti->ptr[child]],                       ti->heap[ti->ptr[1+child]]) > 0)            child++;        if ((*ti->cmp)(ti->heap[ti->ptr[cur]],                       ti->heap[ti->ptr[child]]) > 0)        {            heap_swap (ti, cur, child);            cur = child;            child = 2*cur;        }        else            break;    }}static void heap_insert (struct trunc_info *ti, const char *buf, int indx){    int cur, parent;    cur = ++(ti->heapnum);    memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize);    ti->indx[ti->ptr[cur]] = indx;    parent = cur/2;    while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]],                                ti->heap[ti->ptr[cur]]) > 0)    {        heap_swap (ti, cur, parent);        cur = parent;        parent = cur/2;    }}static struct trunc_info *heap_init (int size, int key_size,				     int (*cmp)(const void *p1,						const void *p2)){    struct trunc_info *ti = (struct trunc_info *) xmalloc (sizeof(*ti));    int i;    ++size;    ti->heapnum = 0;    ti->keysize = key_size;    ti->cmp = cmp;    ti->indx = (int *) xmalloc (size * sizeof(*ti->indx));    ti->heap = (char **) xmalloc (size * sizeof(*ti->heap));    ti->ptr = (int *) xmalloc (size * sizeof(*ti->ptr));    ti->swapbuf = (char *) xmalloc (ti->keysize);    ti->tmpbuf = (char *) xmalloc (ti->keysize);    ti->buf = (char *) xmalloc (size * ti->keysize);    for (i = size; --i >= 0; )    {        ti->ptr[i] = i;        ti->heap[i] = ti->buf + ti->keysize * i;    }    return ti;}static void heap_close (struct trunc_info *ti){    xfree (ti->ptr);    xfree (ti->indx);    xfree (ti->heap);    xfree (ti->swapbuf);    xfree (ti->tmpbuf);    xfree (ti->buf);    xfree (ti);}static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,                          const char *flags, ISAMS_P *isam_p, int from, int to,                          int merge_chunk, int preserve_position,                          int term_type){    RSET result;     RSFD result_rsfd;    rset_temp_parms parms;    int nn = 0;    parms.cmp = key_compare_it;    parms.key_size = sizeof(struct it_key);    parms.temp_path = res_get (zi->res, "setTmpDir");    parms.rset_term = rset_term_create (term, length, flags, term_type);    result = rset_create (rset_kind_temp, &parms);    result_rsfd = rset_open (result, RSETF_WRITE);    if (to - from > merge_chunk)    {        RSFD *rsfd;        RSET *rset;	int term_index;        int i, i_add = (to-from)/merge_chunk + 1;        struct trunc_info *ti;        int rscur = 0;        int rsmax = (to-from)/i_add + 1;                rset = (RSET *) xmalloc (sizeof(*rset) * rsmax);        rsfd = (RSFD *) xmalloc (sizeof(*rsfd) * rsmax);                for (i = from; i < to; i += i_add)        {            if (i_add <= to - i)                rset[rscur] = rset_trunc_r (zi, term, length, flags,				            isam_p, i, i+i_add,                                            merge_chunk, preserve_position,                                            term_type);            else                rset[rscur] = rset_trunc_r (zi, term, length, flags,                                            isam_p, i, to,                                            merge_chunk, preserve_position,                                            term_type);            rscur++;        }        ti = heap_init (rscur, sizeof(struct it_key), key_compare_it);        for (i = rscur; --i >= 0; )        {            rsfd[i] = rset_open (rset[i], RSETF_READ);            if (rset_read (rset[i], rsfd[i], ti->tmpbuf, &term_index))                heap_insert (ti, ti->tmpbuf, i);            else            {                rset_close (rset[i], rsfd[i]);                rset_delete (rset[i]);            }        }        while (ti->heapnum)        {            int n = ti->indx[ti->ptr[1]];            rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);            nn++;            while (1)            {                if (!rset_read (rset[n], rsfd[n], ti->tmpbuf, &term_index))                {                    heap_delete (ti);                    rset_close (rset[n], rsfd[n]);                    rset_delete (rset[n]);                    break;                }                if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)                {                    heap_delete (ti);                    heap_insert (ti, ti->tmpbuf, n);                    break;                }            }        }        xfree (rset);        xfree (rsfd);        heap_close (ti);    }    else if (zi->reg->isam)    {        ISPT *ispt;        int i;        struct trunc_info *ti;        ispt = (ISPT *) xmalloc (sizeof(*ispt) * (to-from));        ti = heap_init (to-from, sizeof(struct it_key),                        key_compare_it);        for (i = to-from; --i >= 0; )        {            ispt[i] = is_position (zi->reg->isam, isam_p[from+i]);            if (is_readkey (ispt[i], ti->tmpbuf))                heap_insert (ti, ti->tmpbuf, i);            else                is_pt_free (ispt[i]);        }        while (ti->heapnum)        {            int n = ti->indx[ti->ptr[1]];            rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);            nn++;            if (preserve_position)            {/* section that preserve all keys */                heap_delete (ti);                if (is_readkey (ispt[n], ti->tmpbuf))                    heap_insert (ti, ti->tmpbuf, n);                else                    is_pt_free (ispt[n]);            }            else            {/* section that preserve all keys with unique sysnos */                while (1)                {                    if (!is_readkey (ispt[n], ti->tmpbuf))                    {                        heap_delete (ti);                        is_pt_free (ispt[n]);                        break;                    }                    if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)                    {                        heap_delete (ti);                        heap_insert (ti, ti->tmpbuf, n);                        break;                    }                }            }        }        heap_close (ti);        xfree (ispt);    }    else if (zi->reg->isamc)    {        ISAMC_PP *ispt;        int i;        struct trunc_info *ti;        ispt = (ISAMC_PP *) xmalloc (sizeof(*ispt) * (to-from));        ti = heap_init (to-from, sizeof(struct it_key),                        key_compare_it);        for (i = to-from; --i >= 0; )        {            ispt[i] = isc_pp_open (zi->reg->isamc, isam_p[from+i]);            if (isc_pp_read (ispt[i], ti->tmpbuf))                heap_insert (ti, ti->tmpbuf, i);            else                isc_pp_close (ispt[i]);        }        while (ti->heapnum)        {            int n = ti->indx[ti->ptr[1]];            rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);            nn++;            if (preserve_position)            {                heap_delete (ti);                if (isc_pp_read (ispt[n], ti->tmpbuf))                    heap_insert (ti, ti->tmpbuf, n);                else                    isc_pp_close (ispt[n]);            }            else            {                while (1)                {                    if (!isc_pp_read (ispt[n], ti->tmpbuf))                    {                        heap_delete (ti);                        isc_pp_close (ispt[n]);                        break;                    }                    if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)                    {                        heap_delete (ti);                        heap_insert (ti, ti->tmpbuf, n);                        break;                    }                }            }        }        heap_close (ti);        xfree (ispt);    }    else if (zi->reg->isamd)    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -