⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 merge.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: merge.c,v 1.23 2003/06/23 15:36:11 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <yaz/log.h>#include "isamc-p.h"struct isc_merge_block {    int offset;       /* offset in r_buf */    int block;        /* block number of file (0 if none) */    int dirty;        /* block is different from that on file */};#if 0static void opt_blocks (ISAMC is, struct isc_merge_block *mb, int ptr,			int last){    int i, no_dirty = 0;    for (i = 0; i<ptr; i++)	if (mb[i].dirty)	    no_dirty++;    if (no_dirty*4 < ptr*3)	return;    /* bubble-sort it */    for (i = 0; i<ptr; i++)    {	int tmp, j, j_min = -1;	for (j = i; j<ptr; j++)	{	    if (j_min < 0 || mb[j_min].block > mb[j].block)		j_min = j;	}	assert (j_min >= 0);	tmp = mb[j_min].block;	mb[j_min].block = mb[i].block;	mb[i].block = tmp;	mb[i].dirty = 1;    }    if (!last)	mb[i].dirty = 1;}#endifstatic void flush_blocks (ISAMC is, struct isc_merge_block *mb, int ptr,                          char *r_buf, int *firstpos, int cat, int last,                          int *numkeys){    int i;    for (i = 0; i<ptr; i++)    {        /* consider this block number */        if (!mb[i].block)         {            mb[i].block = isc_alloc_block (is, cat);            mb[i].dirty = 1;        }        /* consider next block pointer */        if (last && i == ptr-1)            mb[i+1].block = 0;        else if (!mb[i+1].block)               {            mb[i+1].block = isc_alloc_block (is, cat);            mb[i+1].dirty = 1;            mb[i].dirty = 1;        }    }    for (i = 0; i<ptr; i++)    {        char *src;        ISAMC_BLOCK_SIZE ssize = mb[i+1].offset - mb[i].offset;        assert (ssize);        /* skip rest if not dirty */        if (!mb[i].dirty)        {            assert (mb[i].block);            if (!*firstpos)                *firstpos = mb[i].block;            if (is->method->debug > 2)                logf (LOG_LOG, "isc: skip ptr=%d size=%d %d %d",                     i, ssize, cat, mb[i].block);            ++(is->files[cat].no_skip_writes);            continue;        }        /* write block */        if (!*firstpos)        {            *firstpos = mb[i].block;            src = r_buf + mb[i].offset - ISAMC_BLOCK_OFFSET_1;            ssize += ISAMC_BLOCK_OFFSET_1;            memcpy (src+sizeof(int)+sizeof(ssize), numkeys,                    sizeof(*numkeys));            if (is->method->debug > 2)                logf (LOG_LOG, "isc: flush ptr=%d numk=%d size=%d nextpos=%d",                     i, *numkeys, (int) ssize, mb[i+1].block);        }        else        {            src = r_buf + mb[i].offset - ISAMC_BLOCK_OFFSET_N;            ssize += ISAMC_BLOCK_OFFSET_N;            if (is->method->debug > 2)                logf (LOG_LOG, "isc: flush ptr=%d size=%d nextpos=%d",                     i, (int) ssize, mb[i+1].block);        }        memcpy (src, &mb[i+1].block, sizeof(int));        memcpy (src+sizeof(int), &ssize, sizeof(ssize));        isc_write_block (is, cat, mb[i].block, src);    }}static int get_border (ISAMC is, struct isc_merge_block *mb, int ptr,                       int cat, int firstpos){   /* Border set to initial fill or block size depending on      whether we are creating a new one or updating and old one.    */        int fill = mb[ptr].block ? is->method->filecat[cat].bsize :                               is->method->filecat[cat].ifill;    int off = (ptr||firstpos) ? ISAMC_BLOCK_OFFSET_N : ISAMC_BLOCK_OFFSET_1;        assert (ptr < 199);    return mb[ptr].offset + fill - off;}ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I *data){    char i_item[128], *i_item_ptr;    int i_more, i_mode, i;    ISAMC_PP pp;     char f_item[128], *f_item_ptr;    int f_more;    int last_dirty = 0;    int debug = is->method->debug;     struct isc_merge_block mb[200];    int firstpos = 0;    int cat = 0;    char r_item_buf[128]; /* temporary result output */    char *r_buf;          /* block with resulting data */    int r_offset = 0;     /* current offset in r_buf */    int ptr = 0;          /* pointer */    void *r_clientData;   /* encode client data */    int border;    int numKeys = 0;    r_clientData = (*is->method->code_start)(ISAMC_ENCODE);    r_buf = is->merge_buf + 128;    pp = isc_pp_open (is, ipos);    /* read first item from file. make sure f_more indicates no boundary */    f_item_ptr = f_item;    f_more = isc_read_item (pp, &f_item_ptr);    if (f_more > 0)        f_more = 1;    cat = pp->cat;    if (debug > 1)        logf (LOG_LOG, "isc: isc_merge begin %d %d", cat, pp->pos);    /* read first item from i */    i_item_ptr = i_item;    i_more = (*data->read_item)(data->clientData, &i_item_ptr, &i_mode);    mb[ptr].block = pp->pos;     /* is zero if no block on disk */    mb[ptr].dirty = 0;    mb[ptr].offset = 0;    border = get_border (is, mb, ptr, cat, firstpos);    while (i_more || f_more)    {        char *r_item = r_item_buf;        int cmp;        if (f_more > 1)        {            /* block to block boundary in the original file. */            f_more = 1;            if (cat == pp->cat)             {                /* the resulting output is of the same category as the                   the original 		*/                if (r_offset <= mb[ptr].offset +is->method->filecat[cat].mfill)                {                    /* the resulting output block is too small/empty. Delete                       the original (if any)		    */                    if (debug > 3)                        logf (LOG_LOG, "isc: release A");                    if (mb[ptr].block)                        isc_release_block (is, pp->cat, mb[ptr].block);                    mb[ptr].block = pp->pos;		    if (!mb[ptr].dirty)			mb[ptr].dirty = 1;                    if (ptr > 0)                        mb[ptr-1].dirty = 1;                }                else                {                    /* indicate new boundary based on the original file */                    mb[++ptr].block = pp->pos;                    mb[ptr].dirty = last_dirty;                    mb[ptr].offset = r_offset;                    if (debug > 3)                        logf (LOG_LOG, "isc: bound ptr=%d,offset=%d",                            ptr, r_offset);                    if (cat==is->max_cat && ptr >= is->method->max_blocks_mem)                    {                        /* We are dealing with block(s) of max size. Block(s)                           except 1 will be flushed.                         */                        if (debug > 2)                            logf (LOG_LOG, "isc: flush A %d sections", ptr);                        flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat,                                      0, &pp->numKeys);                        mb[0].block = mb[ptr-1].block;                        mb[0].dirty = mb[ptr-1].dirty;                        memcpy (r_buf, r_buf + mb[ptr-1].offset,                                mb[ptr].offset - mb[ptr-1].offset);                        mb[0].offset = 0;                        mb[1].block = mb[ptr].block;                        mb[1].dirty = mb[ptr].dirty;                        mb[1].offset = mb[ptr].offset - mb[ptr-1].offset;                        ptr = 1;                        r_offset = mb[ptr].offset;                    }                }            }            border = get_border (is, mb, ptr, cat, firstpos);        }	last_dirty = 0;        if (!f_more)            cmp = -1;        else if (!i_more)            cmp = 1;        else            cmp = (*is->method->compare_item)(i_item, f_item);        if (cmp == 0)                   /* insert i=f */        {            if (!i_mode)   /* delete item? */            {                /* move i */                i_item_ptr = i_item;                i_more = (*data->read_item)(data->clientData, &i_item_ptr,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -