⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 isamd.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: isamd.c,v 1.26 2003/06/23 15:36:11 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <yaz/log.h>#include "../index/index.h"  /* isamd uses the internal structure of it_key */#include "isamd-p.h"static void flush_block (ISAMD is, int cat);static void release_fc (ISAMD is, int cat);static void init_fc (ISAMD is, int cat);#define ISAMD_FREELIST_CHUNK 1#define SMALL_TEST 0ISAMD_M *isamd_getmethod (ISAMD_M *me){    static struct ISAMD_filecat_s def_cat[] = {#if SMALL_TEST/*        blocksz,   max. Unused time being */        {    32,   40 },  /* 24 is the smallest unreasonable size! */	{    64,    0 },#else        {    32,    1 },	{   128,    1 },	{   256,    1 },	{   512,    1 },        {  1024,    1 },        {  2048,    1 },        {  4096,    1 },        {  8192,    0 },#endif#ifdef SKIPTHIS        {    32,    1 },        {   128,    1 },        {   512,    1 },        {  2048,    1 },        {  8192,    1 },        { 32768,    1 },        {131072,    0 },        {    24,    1 }, /* Experimental sizes */        {    32,    1 },        {    64,    1 },        {   128,    1 },        {   256,    1 },        {   512,    1 },        {  1024,    1 },        {  2048,    0 },#endif     };    ISAMD_M *m = (ISAMD_M *) xmalloc (sizeof(*m));  /* never released! */    m->filecat = def_cat;                        /* ok, only alloc'd once */    m->code_start = NULL;    m->code_item = NULL;    m->code_stop = NULL;    m->code_reset = NULL;    m->compare_item = NULL;    m->debug = 0; /* default to no debug */    m->max_blocks_mem = 10;    return m;}ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M *method){    ISAMD is;    ISAMD_filecat filecat;    int i = 0;    is = (ISAMD) xmalloc (sizeof(*is));    is->method = (ISAMD_M *) xmalloc (sizeof(*is->method));    memcpy (is->method, method, sizeof(*method));    filecat = is->method->filecat;    assert (filecat);    /* determine number of block categories */    if (is->method->debug>0)        logf (LOG_LOG, "isamd: bsize  maxkeys");    do    {        if (is->method->debug>0)            logf (LOG_LOG, "isamd:%6d %6d",                  filecat[i].bsize, filecat[i].mblocks);    } while (filecat[i++].mblocks);    is->no_files = i;    is->max_cat = --i;     assert (is->no_files > 0);    assert (is->max_cat <=8 ); /* we have only 3 bits for it */        is->files = (ISAMD_file) xmalloc (sizeof(*is->files)*is->no_files);    for (i = 0; i<is->no_files; i++)    {        char fname[512];        sprintf (fname, "%s%c", name, i+'A');        is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,                                   writeflag);        is->files[i].head_is_dirty = 0;        if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMD_head),                     &is->files[i].head))        {            is->files[i].head.lastblock = 1;            is->files[i].head.freelist = 0;        }	is->files[i].alloc_entries_num = 0;	is->files[i].alloc_entries_max =	    is->method->filecat[i].bsize / sizeof(int) - 1;	is->files[i].alloc_buf = (char *)	    xmalloc (is->method->filecat[i].bsize);        is->files[i].no_writes = 0; /* clear statistics */        is->files[i].no_reads = 0;        is->files[i].no_skip_writes = 0;        is->files[i].no_allocated = 0;        is->files[i].no_released = 0;        is->files[i].no_remap = 0;	is->files[i].no_forward = 0;	is->files[i].no_backward = 0;	is->files[i].sum_forward = 0;	is->files[i].sum_backward = 0;	is->files[i].no_next = 0;	is->files[i].no_prev = 0;        is->files[i].no_op_diffonly=0;        is->files[i].no_op_main=0;        init_fc (is, i);    }    is->last_pos=0;    is->last_cat=0;       is->no_read=0;        is->no_read_main=0;    is->no_write=0;       is->no_op_single=0;    is->no_op_new=0;    is->no_read_keys=0;    is->no_read_eof=0;    is->no_seek_nxt=0;    is->no_seek_sam=0;    is->no_seek_fwd=0;    is->no_seek_prv=0;    is->no_seek_bak=0;    is->no_seek_cat=0;    is->no_fbuilds=0;    is->no_appds=0;    is->no_merges=0;    is->no_non=0;    is->no_singles=0;    return is;}int isamd_block_used (ISAMD is, int type){    if ( type==-1) /* singleton */      return 0;     if (type < 0 || type >= is->no_files)	return -1;    return is->files[type].head.lastblock-1;}int isamd_block_size (ISAMD is, int type){    ISAMD_filecat filecat = is->method->filecat;    if ( type==-1) /* singleton */      return 0; /* no bytes used */     if (type < 0 || type >= is->no_files)	return -1;    return filecat[type].bsize;}int isamd_close (ISAMD is){    int i;    int s;    if (is->method->debug>0)    {        logf (LOG_LOG, "isamd statistics");	logf (LOG_LOG, "f    nxt   forw  mid-f   prev  backw  mid-b");	for (i = 0; i<is->no_files; i++)	    logf (LOG_LOG, "%d%7d%7d%7.1f%7d%7d%7.1f",i,		  is->files[i].no_next,		  is->files[i].no_forward,		  is->files[i].no_forward ?		    (double) is->files[i].sum_forward/is->files[i].no_forward		    : 0.0,		  is->files[i].no_prev,		  is->files[i].no_backward,		  is->files[i].no_backward ?		    (double) is->files[i].sum_backward/is->files[i].no_backward		    : 0.0);    }    if (is->method->debug>0)        logf (LOG_LOG, "f  writes   reads skipped   alloc released ");    for (i = 0; i<is->no_files; i++)    {        release_fc (is, i);        assert (is->files[i].bf);        if (is->files[i].head_is_dirty)            bf_write (is->files[i].bf, 0, 0, sizeof(ISAMD_head),                 &is->files[i].head);        if (is->method->debug>0)            logf (LOG_LOG, "%d%8d%8d%8d%8d%8d",i,                  is->files[i].no_writes,                  is->files[i].no_reads,                  is->files[i].no_skip_writes,                  is->files[i].no_allocated,                  is->files[i].no_released);        xfree (is->files[i].fc_list);	flush_block (is, i);        bf_close (is->files[i].bf);    }        if (is->method->debug>0)     {        logf (LOG_LOG, "f   opens    main  diffonly");        for (i = 0; i<is->no_files; i++)        {            logf (LOG_LOG, "%d%8d%8d%8d",i,                  is->files[i].no_op_main+                  is->files[i].no_op_diffonly,                  is->files[i].no_op_main,                  is->files[i].no_op_diffonly);        }        logf(LOG_LOG,"open single  %8d", is->no_op_single);        logf(LOG_LOG,"open new     %8d", is->no_op_new);        logf(LOG_LOG, "new build   %8d", is->no_fbuilds);        logf(LOG_LOG, "append      %8d", is->no_appds);        logf(LOG_LOG, "  merges    %8d", is->no_merges);        logf(LOG_LOG, "  singles   %8d", is->no_singles);        logf(LOG_LOG, "  no-ops    %8d", is->no_non);        logf(LOG_LOG, "read blocks %8d", is->no_read);        logf(LOG_LOG, "read keys:  %8d %8.1f k/bl",                   is->no_read_keys,                   1.0*(is->no_read_keys+1)/(is->no_read+1) );        logf(LOG_LOG, "read main-k %8d %8.1f %% of keys",                  is->no_read_main,                  100.0*(is->no_read_main+1)/(is->no_read_keys+1) );        logf(LOG_LOG, "read ends:  %8d %8.1f k/e",                  is->no_read_eof,                  1.0*(is->no_read_keys+1)/(is->no_read_eof+1) );        s= is->no_seek_nxt+ is->no_seek_sam+ is->no_seek_fwd +           is->no_seek_prv+ is->no_seek_bak+ is->no_seek_cat;        if (s==0)           s++;        logf(LOG_LOG, "seek same   %8d %8.1f%%",            is->no_seek_sam, 100.0*is->no_seek_sam/s );        logf(LOG_LOG, "seek next   %8d %8.1f%%",            is->no_seek_nxt, 100.0*is->no_seek_nxt/s );        logf(LOG_LOG, "seek prev   %8d %8.1f%%",            is->no_seek_prv, 100.0*is->no_seek_prv/s );        logf(LOG_LOG, "seek forw   %8d %8.1f%%",            is->no_seek_fwd, 100.0*is->no_seek_fwd/s );        logf(LOG_LOG, "seek back   %8d %8.1f%%",            is->no_seek_bak, 100.0*is->no_seek_bak/s );        logf(LOG_LOG, "seek cat    %8d %8.1f%%",            is->no_seek_cat, 100.0*is->no_seek_cat/s );    }    xfree (is->files);    xfree (is->method);    xfree (is);    return 0;}static void isamd_seek_stat(ISAMD is, int cat, int pos){  if (cat != is->last_cat)     is->no_seek_cat++;  else if ( pos == is->last_pos)     is->no_seek_sam++;  else if ( pos == is->last_pos+1)     is->no_seek_nxt++;  else if ( pos == is->last_pos-1)     is->no_seek_prv++;  else if ( pos > is->last_pos)     is->no_seek_fwd++;  else if ( pos < is->last_pos)     is->no_seek_bak++;  is->last_cat = cat;  is->last_pos = pos;} /* seek_stat */int isamd_read_block (ISAMD is, int cat, int pos, char *dst){    isamd_seek_stat(is,cat,pos);    ++(is->files[cat].no_reads);    ++(is->no_read);    if (is->method->debug > 6)        logf (LOG_LOG, "isamd: read_block %d:%d",cat, pos);    return bf_read (is->files[cat].bf, pos, 0, 0, dst);}int isamd_write_block (ISAMD is, int cat, int pos, char *src){    isamd_seek_stat(is,cat,pos);    ++(is->files[cat].no_writes);    ++(is->no_write);    if (is->method->debug > 6)        logf (LOG_LOG, "isamd: write_block %d:%d", cat, pos);    return bf_write (is->files[cat].bf, pos, 0, 0, src);}int isamd_write_dblock (ISAMD is, int cat, int pos, char *src,                      int nextpos, int offset){    ISAMD_BLOCK_SIZE size = offset + ISAMD_BLOCK_OFFSET_N;    if (is->method->debug > 4)        logf (LOG_LOG, "isamd: write_dblock. size=%d nextpos=%d",              (int) size, nextpos);    src -= ISAMD_BLOCK_OFFSET_N;    assert( ISAMD_BLOCK_OFFSET_N == sizeof(int)+sizeof(int) );    memcpy (src, &nextpos, sizeof(int));    memcpy (src + sizeof(int), &size, sizeof(size));    return isamd_write_block (is, cat, pos, src);}#if ISAMD_FREELIST_CHUNKstatic void flush_block (ISAMD is, int cat){    char *abuf = is->files[cat].alloc_buf;    int block = is->files[cat].head.freelist;    if (block && is->files[cat].alloc_entries_num)    {	memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));	bf_write (is->files[cat].bf, block, 0, 0, abuf);	is->files[cat].alloc_entries_num = 0;    }    xfree (abuf);}static int alloc_block (ISAMD is, int cat){    int block = is->files[cat].head.freelist;    char *abuf = is->files[cat].alloc_buf;    (is->files[cat].no_allocated)++;    if (!block)    {        block = (is->files[cat].head.lastblock)++;   /* no free list */	is->files[cat].head_is_dirty = 1;    }    else    {	if (!is->files[cat].alloc_entries_num) /* read first time */	{	    bf_read (is->files[cat].bf, block, 0, 0, abuf);	    memcpy (&is->files[cat].alloc_entries_num, abuf,		    sizeof(is->files[cat].alloc_entries_num));	    assert (is->files[cat].alloc_entries_num > 0);	}	/* have some free blocks now */	assert (is->files[cat].alloc_entries_num > 0);	is->files[cat].alloc_entries_num--;	if (!is->files[cat].alloc_entries_num)  /* last one in block? */	{	    memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),		    sizeof(int));	    is->files[cat].head_is_dirty = 1;	    if (is->files[cat].head.freelist)	    {		bf_read (is->files[cat].bf, is->files[cat].head.freelist,			 0, 0, abuf);		memcpy (&is->files[cat].alloc_entries_num, abuf,			sizeof(is->files[cat].alloc_entries_num));		assert (is->files[cat].alloc_entries_num);	    }	}	else	    memcpy (&block, abuf + sizeof(int) + sizeof(int) *		    is->files[cat].alloc_entries_num, sizeof(int));    }    return block;}static void release_block (ISAMD is, int cat, int pos){    char *abuf = is->files[cat].alloc_buf;    int block = is->files[cat].head.freelist;    (is->files[cat].no_released)++;    if (block && !is->files[cat].alloc_entries_num) /* must read block */    {	bf_read (is->files[cat].bf, block, 0, 0, abuf);	memcpy (&is->files[cat].alloc_entries_num, abuf,		sizeof(is->files[cat].alloc_entries_num));	assert (is->files[cat].alloc_entries_num > 0);    }    assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);    if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)    {	assert (block);	memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));	bf_write (is->files[cat].bf, block, 0, 0, abuf);	is->files[cat].alloc_entries_num = 0;    }    if (!is->files[cat].alloc_entries_num) /* make new buffer? */    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -