⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 isamc.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: isamc.c,v 1.23 2003/06/23 15:36:11 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*//*  * TODO: *   Reduction to lower categories in isc_merge */#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <yaz/log.h>#include "isamc-p.h"static void flush_block (ISAMC is, int cat);static void release_fc (ISAMC is, int cat);static void init_fc (ISAMC is, int cat);#define ISAMC_FREELIST_CHUNK 1#define SMALL_TEST 0void isc_getmethod (ISAMC_M *m){    static struct ISAMC_filecat_s def_cat[] = {#if SMALL_TEST        {    32,     28,      0,  3 },	{    64,     54,     30,  0 },#else        {    32,     26,     20,  10 },	{   128,    120,    100,  10 },        {   512,    490,    350,  10 },        {  2048,   1900,   1700,  10 },        {  8192,   8000,   7900,  10 },        { 32768,  32000,  31000,  0 },#endif    };    m->filecat = def_cat;    m->code_start = NULL;    m->code_item = NULL;    m->code_stop = NULL;    m->code_reset = NULL;    m->compare_item = NULL;    m->debug = 1;    m->max_blocks_mem = 10;}ISAMC isc_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method){    ISAMC is;    ISAMC_filecat filecat;    int i = 0;    int max_buf_size = 0;    is = (ISAMC) xmalloc (sizeof(*is));    is->method = (ISAMC_M *) xmalloc (sizeof(*is->method));    memcpy (is->method, method, sizeof(*method));    filecat = is->method->filecat;    assert (filecat);    /* determine number of block categories */    if (is->method->debug)        logf (LOG_LOG, "isc: bsize  ifill  mfill mblocks");    do    {        if (is->method->debug)            logf (LOG_LOG, "isc:%6d %6d %6d %6d",                  filecat[i].bsize, filecat[i].ifill,                   filecat[i].mfill, filecat[i].mblocks);        if (max_buf_size < filecat[i].mblocks * filecat[i].bsize)            max_buf_size = filecat[i].mblocks * filecat[i].bsize;    } while (filecat[i++].mblocks);    is->no_files = i;    is->max_cat = --i;    /* max_buf_size is the larget buffer to be used during merge */    max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;    if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)        max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;    if (is->method->debug)        logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size);        assert (is->no_files > 0);    is->files = (ISAMC_file) xmalloc (sizeof(*is->files)*is->no_files);    if (writeflag)    {        is->merge_buf = (char *) xmalloc (max_buf_size+256);	memset (is->merge_buf, 0, max_buf_size+256);    }    else        is->merge_buf = NULL;    for (i = 0; i<is->no_files; i++)    {        char fname[512];        sprintf (fname, "%s%c", name, i+'A');        is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,                                   writeflag);        is->files[i].head_is_dirty = 0;        if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMC_head),                     &is->files[i].head))        {            is->files[i].head.lastblock = 1;            is->files[i].head.freelist = 0;        }	is->files[i].alloc_entries_num = 0;	is->files[i].alloc_entries_max =	    is->method->filecat[i].bsize / sizeof(int) - 1;	is->files[i].alloc_buf = (char *)	    xmalloc (is->method->filecat[i].bsize);        is->files[i].no_writes = 0;        is->files[i].no_reads = 0;        is->files[i].no_skip_writes = 0;        is->files[i].no_allocated = 0;        is->files[i].no_released = 0;        is->files[i].no_remap = 0;	is->files[i].no_forward = 0;	is->files[i].no_backward = 0;	is->files[i].sum_forward = 0;	is->files[i].sum_backward = 0;	is->files[i].no_next = 0;	is->files[i].no_prev = 0;        init_fc (is, i);    }    return is;}int isc_block_used (ISAMC is, int type){    if (type < 0 || type >= is->no_files)	return -1;    return is->files[type].head.lastblock-1;}int isc_block_size (ISAMC is, int type){    ISAMC_filecat filecat = is->method->filecat;    if (type < 0 || type >= is->no_files)	return -1;    return filecat[type].bsize;}int isc_close (ISAMC is){    int i;    if (is->method->debug)    {	logf (LOG_LOG, "isc:    next    forw   mid-f    prev   backw   mid-b");	for (i = 0; i<is->no_files; i++)	    logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",		  is->files[i].no_next,		  is->files[i].no_forward,		  is->files[i].no_forward ?		  (double) is->files[i].sum_forward/is->files[i].no_forward		  : 0.0,		  is->files[i].no_prev,		  is->files[i].no_backward,		  is->files[i].no_backward ?		  (double) is->files[i].sum_backward/is->files[i].no_backward		  : 0.0);    }    if (is->method->debug)        logf (LOG_LOG, "isc:  writes   reads skipped   alloc released  remap");    for (i = 0; i<is->no_files; i++)    {        release_fc (is, i);        assert (is->files[i].bf);        if (is->files[i].head_is_dirty)            bf_write (is->files[i].bf, 0, 0, sizeof(ISAMC_head),                 &is->files[i].head);        if (is->method->debug)            logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",                  is->files[i].no_writes,                  is->files[i].no_reads,                  is->files[i].no_skip_writes,                  is->files[i].no_allocated,                  is->files[i].no_released,                  is->files[i].no_remap);        xfree (is->files[i].fc_list);	flush_block (is, i);        bf_close (is->files[i].bf);    }    xfree (is->files);    xfree (is->merge_buf);    xfree (is->method);    xfree (is);    return 0;}int isc_read_block (ISAMC is, int cat, int pos, char *dst){    ++(is->files[cat].no_reads);    return bf_read (is->files[cat].bf, pos, 0, 0, dst);}int isc_write_block (ISAMC is, int cat, int pos, char *src){    ++(is->files[cat].no_writes);    if (is->method->debug > 2)        logf (LOG_LOG, "isc: write_block %d %d", cat, pos);    return bf_write (is->files[cat].bf, pos, 0, 0, src);}int isc_write_dblock (ISAMC is, int cat, int pos, char *src,                      int nextpos, int offset){    ISAMC_BLOCK_SIZE size = offset + ISAMC_BLOCK_OFFSET_N;    if (is->method->debug > 2)        logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d",              (int) size, nextpos);    src -= ISAMC_BLOCK_OFFSET_N;    memcpy (src, &nextpos, sizeof(int));    memcpy (src + sizeof(int), &size, sizeof(size));    return isc_write_block (is, cat, pos, src);}#if ISAMC_FREELIST_CHUNKstatic void flush_block (ISAMC is, int cat){    char *abuf = is->files[cat].alloc_buf;    int block = is->files[cat].head.freelist;    if (block && is->files[cat].alloc_entries_num)    {	memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));	bf_write (is->files[cat].bf, block, 0, 0, abuf);	is->files[cat].alloc_entries_num = 0;    }    xfree (abuf);}static int alloc_block (ISAMC is, int cat){    int block = is->files[cat].head.freelist;    char *abuf = is->files[cat].alloc_buf;    (is->files[cat].no_allocated)++;    if (!block)    {        block = (is->files[cat].head.lastblock)++;   /* no free list */	is->files[cat].head_is_dirty = 1;    }    else    {	if (!is->files[cat].alloc_entries_num) /* read first time */	{	    bf_read (is->files[cat].bf, block, 0, 0, abuf);	    memcpy (&is->files[cat].alloc_entries_num, abuf,		    sizeof(is->files[cat].alloc_entries_num));	    assert (is->files[cat].alloc_entries_num > 0);	}	/* have some free blocks now */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -