⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 isamb.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: isamb.c,v 1.27 2003/06/23 15:36:11 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <string.h>#include <yaz/xmalloc.h>#include <yaz/log.h>#include <isamb.h>#include <assert.h>struct ISAMB_head {    int first_block;    int last_block;    int block_size;    int block_max;    int free_list;};#define ISAMB_DATA_OFFSET 3#define DST_ITEM_MAX 256/* approx 2*4 K + max size of item */#define DST_BUF_SIZE 8448#define ISAMB_CACHE_ENTRY_SIZE 4096struct ISAMB_cache_entry {    ISAMB_P pos;    unsigned char *buf;    int dirty;    int hits;    struct ISAMB_cache_entry *next;};struct ISAMB_file {    BFile bf;    int head_dirty;    struct ISAMB_head head;    struct ISAMB_cache_entry *cache_entries;};struct ISAMB_s {    BFiles bfs;    ISAMC_M *method;    struct ISAMB_file *file;    int no_cat;    int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */    int log_io;        /* log level for bf_read/bf_write calls */    int log_freelist;  /* log level for freelist handling */};struct ISAMB_block {    ISAMB_P pos;    int cat;    int size;    int leaf;    int dirty;    int deleted;    int offset;    char *bytes;    unsigned char *buf;    void *decodeClientData;    int log_rw;};struct ISAMB_PP_s {    ISAMB isamb;    ISAMB_P pos;    int level;    int total_size;    int no_blocks;    struct ISAMB_block **block;};void encode_ptr (char **dst, int pos){    memcpy (*dst, &pos, sizeof(pos));    (*dst) += sizeof(pos);}void decode_ptr (char **src, int *pos){    memcpy (pos, *src, sizeof(*pos));    (*src) += sizeof(*pos);}ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method,                  int cache){    ISAMB isamb = xmalloc (sizeof(*isamb));    int i, b_size = 32;    isamb->bfs = bfs;    isamb->method = (ISAMC_M *) xmalloc (sizeof(*method));    memcpy (isamb->method, method, sizeof(*method));    isamb->no_cat = 4;    isamb->log_io = 0;    isamb->log_freelist = 0;    isamb->cache = cache;    assert (cache == 0);    isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat);    for (i = 0; i<isamb->no_cat; i++)    {        char fname[DST_BUF_SIZE];        isamb->file[i].cache_entries = 0;        isamb->file[i].head_dirty = 0;        sprintf (fname, "%s%c", name, i+'A');        if (cache)            isamb->file[i].bf = bf_open (bfs, fname, ISAMB_CACHE_ENTRY_SIZE,                                         writeflag);        else            isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag);                if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head),                      &isamb->file[i].head))	{            isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;            isamb->file[i].head.last_block = isamb->file[i].head.first_block;            isamb->file[i].head.block_size = b_size;            isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET;            isamb->file[i].head.free_list = 0;	}        assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET);        isamb->file[i].head_dirty = 0;        assert(isamb->file[i].head.block_size == b_size);        b_size = b_size * 4;    }    return isamb;}static void flush_blocks (ISAMB b, int cat){    while (b->file[cat].cache_entries)    {        struct ISAMB_cache_entry *ce_this = b->file[cat].cache_entries;        b->file[cat].cache_entries = ce_this->next;        if (ce_this->dirty)        {            yaz_log (b->log_io, "bf_write: flush_blocks");            bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);        }        xfree (ce_this->buf);        xfree (ce_this);    }}static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr){    int cat = pos&3;    int off = ((pos/4) &                (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1))        * b->file[cat].head.block_size;    int norm = pos / (4*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size);    int no = 0;    struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0;    if (!b->cache)        return 0;    assert (ISAMB_CACHE_ENTRY_SIZE >= b->file[cat].head.block_size);    for (ce = &b->file[cat].cache_entries; *ce; ce = &(*ce)->next, no++)    {        ce_last = ce;        if ((*ce)->pos == norm)        {            ce_this = *ce;            *ce = (*ce)->next;   /* remove from list */                        ce_this->next = b->file[cat].cache_entries;  /* move to front */            b->file[cat].cache_entries = ce_this;                        if (wr)            {                memcpy (ce_this->buf + off, userbuf,                         b->file[cat].head.block_size);                ce_this->dirty = 1;            }            else                memcpy (userbuf, ce_this->buf + off,                        b->file[cat].head.block_size);            return 1;        }    }    if (no >= 40)    {        assert (no == 40);        assert (ce_last && *ce_last);        ce_this = *ce_last;        *ce_last = 0;  /* remove the last entry from list */        if (ce_this->dirty)        {            yaz_log (b->log_io, "bf_write: get_block");            bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);        }        xfree (ce_this->buf);        xfree (ce_this);    }    ce_this = xmalloc (sizeof(*ce_this));    ce_this->next = b->file[cat].cache_entries;    b->file[cat].cache_entries = ce_this;    ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE);    ce_this->pos = norm;    yaz_log (b->log_io, "bf_read: get_block");    if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf))        memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE);    if (wr)    {        memcpy (ce_this->buf + off, userbuf, b->file[cat].head.block_size);        ce_this->dirty = 1;    }    else    {        ce_this->dirty = 0;        memcpy (userbuf, ce_this->buf + off, b->file[cat].head.block_size);    }    return 1;}void isamb_close (ISAMB isamb){    int i;    for (i = 0; i<isamb->no_cat; i++)    {        flush_blocks (isamb, i);        if (isamb->file[i].head_dirty)            bf_write (isamb->file[i].bf, 0, 0,                      sizeof(struct ISAMB_head), &isamb->file[i].head);                bf_close (isamb->file[i].bf);    }    xfree (isamb->file);    xfree (isamb->method);    xfree (isamb);}struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos){    int cat = pos&3;    struct ISAMB_block *p;    if (!pos)        return 0;    p = xmalloc (sizeof(*p));    p->pos = pos;    p->cat = pos & 3;    p->buf = xmalloc (b->file[cat].head.block_size);    if (!get_block (b, pos, p->buf, 0))    {        yaz_log (b->log_io, "bf_read: open_block");        if (!bf_read (b->file[cat].bf, pos/4, 0, 0, p->buf))        {            yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld",                     (long) pos, (long) pos/4);            abort();        }    }    p->bytes = p->buf + ISAMB_DATA_OFFSET;    p->leaf = p->buf[0];    p->size = (p->buf[1] + 256 * p->buf[2]) - ISAMB_DATA_OFFSET;    if (p->size < 0)    {        fprintf (stderr, "pos=%d\n", pos);    }    assert (p->size >= 0);    p->offset = 0;    p->dirty = 0;    p->deleted = 0;    p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);    return p;}struct ISAMB_block *new_block (ISAMB b, int leaf, int cat){    struct ISAMB_block *p;    p = xmalloc (sizeof(*p));    p->buf = xmalloc (b->file[cat].head.block_size);    if (!b->file[cat].head.free_list)    {        int block_no;        block_no = b->file[cat].head.last_block++;        p->pos = block_no * 4 + cat;    }    else    {        p->pos = b->file[cat].head.free_list;        assert((p->pos & 3) == cat);        if (!get_block (b, p->pos, p->buf, 0))        {            yaz_log (b->log_io, "bf_read: new_block");            if (!bf_read (b->file[cat].bf, p->pos/4, 0, 0, p->buf))            {                yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld",                         (long) p->pos/4, (long) p->pos/4);                abort ();            }        }        yaz_log (b->log_freelist, "got block %d from freelist %d:%d", p->pos,                 cat, p->pos/4);        memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int));    }    p->cat = cat;    b->file[cat].head_dirty = 1;    memset (p->buf, 0, b->file[cat].head.block_size);    p->bytes = p->buf + ISAMB_DATA_OFFSET;    p->leaf = leaf;    p->size = 0;    p->dirty = 1;    p->deleted = 0;    p->offset = 0;    p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);    return p;}struct ISAMB_block *new_leaf (ISAMB b, int cat){    return new_block (b, 1, cat);}struct ISAMB_block *new_int (ISAMB b, int cat){    return new_block (b, 0, cat);}static void check_block (ISAMB b, struct ISAMB_block *p){    if (p->leaf)    {        ;    }    else    {        /* sanity check */        char *startp = p->bytes;        char *src = startp;        char *endp = p->bytes + p->size;        int pos;                    decode_ptr (&src, &pos);        assert ((pos&3) == p->cat);        while (src != endp)        {            int item_len;            decode_ptr (&src, &item_len);            assert (item_len > 0 && item_len < 30);            src += item_len;            decode_ptr (&src, &pos);            assert ((pos&3) == p->cat);        }    }}void close_block (ISAMB b, struct ISAMB_block *p){    if (!p)        return;    if (p->deleted)    {        yaz_log (b->log_freelist, "release block %d from freelist %d:%d",                 p->pos, p->cat, p->pos/4);        memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int));        b->file[p->cat].head.free_list = p->pos;        if (!get_block (b, p->pos, p->buf, 1))        {            yaz_log (b->log_io, "bf_write: close_block (deleted)");            bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf);        }    }    else if (p->dirty)    {        int size = p->size + ISAMB_DATA_OFFSET;        assert (p->size >= 0);        p->buf[0] = p->leaf;        p->buf[1] = size & 255;        p->buf[2] = size >> 8;        check_block(b, p);        if (!get_block (b, p->pos, p->buf, 1))        {            yaz_log (b->log_io, "bf_write: close_block");            bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf);        }    }    (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData);    xfree (p->buf);    xfree (p);}int insert_sub (ISAMB b, struct ISAMB_block **p,                void *new_item, int *mode,                ISAMC_I *stream,                struct ISAMB_block **sp,                void *sub_item, int *sub_size,                void *max_item);int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item,                int *mode,                ISAMC_I *stream, struct ISAMB_block **sp,                void *split_item, int *split_size, void *last_max_item){    char *startp = p->bytes;    char *src = startp;    char *endp = p->bytes + p->size;    int pos;    struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;    char sub_item[DST_ITEM_MAX];    int sub_size;    int more;    *sp = 0;    assert(p->size >= 0);    decode_ptr (&src, &pos);    while (src != endp)    {        int item_len;        int d;        char *src0 = src;        decode_ptr (&src, &item_len);        d = (*b->method->compare_item)(src, lookahead_item);        if (d > 0)        {            sub_p1 = open_block (b, pos);            assert (sub_p1);            more = insert_sub (b, &sub_p1, lookahead_item, mode,                               stream, &sub_p2,                                sub_item, &sub_size, src);            src = src0;            break;        }        src += item_len;        decode_ptr (&src, &pos);    }    if (!sub_p1)    {        sub_p1 = open_block (b, pos);        assert (sub_p1);        more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2,                            sub_item, &sub_size, last_max_item);    }    if (sub_p2)    {        /* there was a split - must insert pointer in this one */        char dst_buf[DST_BUF_SIZE];        char *dst = dst_buf;        assert (sub_size < 30 && sub_size > 1);        memcpy (dst, startp, src - startp);                        dst += src - startp;        encode_ptr (&dst, sub_size);      /* sub length and item */        memcpy (dst, sub_item, sub_size);        dst += sub_size;        encode_ptr (&dst, sub_p2->pos);   /* pos */        if (endp - src)                   /* remaining data */        {            memcpy (dst, src, endp - src);            dst += endp - src;        }        p->size = dst - dst_buf;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -