cfile.c

来自「harvest是一个下载html网页得机器人」· C语言代码 · 共 518 行
518 行
/* $Id: cfile.c,v 1.27 2002/08/02 19:26:55 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <assert.h>#include <stdlib.h>#include <string.h>#include <zebrautl.h>#include <mfile.h>#include "cfile.h"static int write_head (CFile cf){    int left = cf->head.hash_size * sizeof(int);    int bno = 1;    const char *tab = (char*) cf->array;    if (!tab)        return 0;    while (left >= (int) HASH_BSIZE)    {        mf_write (cf->hash_mf, bno++, 0, 0, tab);        tab += HASH_BSIZE;        left -= HASH_BSIZE;    }    if (left > 0)        mf_write (cf->hash_mf, bno, 0, left, tab);    return 0;}static int read_head (CFile cf){    int left = cf->head.hash_size * sizeof(int);    int bno = 1;    char *tab = (char*) cf->array;    if (!tab)        return 0;    while (left >= (int) HASH_BSIZE)    {        mf_read (cf->hash_mf, bno++, 0, 0, tab);        tab += HASH_BSIZE;        left -= HASH_BSIZE;    }    if (left > 0)        mf_read (cf->hash_mf, bno, 0, left, tab);    return 0;}CFile cf_open (MFile mf, MFile_area area, const char *fname,               int block_size, int wflag, int *firstp){    char path[1024];    int i;    CFile cf = (CFile) xmalloc (sizeof(*cf));    int hash_bytes;       cf->rmf = mf;     logf (LOG_DEBUG, "cf: open %s %s", cf->rmf->name, wflag ? "rdwr" : "rd");    sprintf (path, "%s-b", fname);    if (!(cf->block_mf = mf_open (area, path, block_size, wflag)))    {        logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);        exit (1);    }    sprintf (path, "%s-i", fname);    if (!(cf->hash_mf = mf_open (area, path, HASH_BSIZE, wflag)))    {        logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);        exit (1);    }    assert (firstp);    if (!mf_read (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head) ||        !cf->head.state)    {        *firstp = 1;        cf->head.state = 1;        cf->head.block_size = block_size;        cf->head.hash_size = 199;        hash_bytes = cf->head.hash_size * sizeof(int);        cf->head.flat_bucket = cf->head.next_bucket = cf->head.first_bucket =             (hash_bytes+sizeof(cf->head))/HASH_BSIZE + 2;        cf->head.next_block = 1;        if (wflag)            mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);        cf->array = (int *) xmalloc (hash_bytes);        for (i = 0; i<cf->head.hash_size; i++)            cf->array[i] = 0;        if (wflag)            write_head (cf);    }    else    {        *firstp = 0;        assert (cf->head.block_size == block_size);        assert (cf->head.hash_size > 2);        hash_bytes = cf->head.hash_size * sizeof(int);        assert (cf->head.next_bucket > 0);        assert (cf->head.next_block > 0);        if (cf->head.state == 1)            cf->array = (int *) xmalloc (hash_bytes);        else            cf->array = NULL;        read_head (cf);    }    if (cf->head.state == 1)    {        cf->parray = (struct CFile_hash_bucket **)	    xmalloc (cf->head.hash_size * sizeof(*cf->parray));        for (i = 0; i<cf->head.hash_size; i++)            cf->parray[i] = NULL;    }    else        cf->parray = NULL;    cf->bucket_lru_front = cf->bucket_lru_back = NULL;    cf->bucket_in_memory = 0;    cf->max_bucket_in_memory = 100;    cf->dirty = 0;    cf->iobuf = (char *) xmalloc (cf->head.block_size);    memset (cf->iobuf, 0, cf->head.block_size);    cf->no_hits = 0;    cf->no_miss = 0;    zebra_mutex_init (&cf->mutex);    return cf;}static int cf_hash (CFile cf, int no){    return (no>>3) % cf->head.hash_size;}static void release_bucket (CFile cf, struct CFile_hash_bucket *p){    if (p->lru_prev)        p->lru_prev->lru_next = p->lru_next;    else        cf->bucket_lru_back = p->lru_next;    if (p->lru_next)        p->lru_next->lru_prev = p->lru_prev;    else        cf->bucket_lru_front = p->lru_prev;    *p->h_prev = p->h_next;    if (p->h_next)        p->h_next->h_prev = p->h_prev;        --(cf->bucket_in_memory);    xfree (p);}static void flush_bucket (CFile cf, int no_to_flush){    int i;    struct CFile_hash_bucket *p;    for (i = 0; i != no_to_flush; i++)    {        p = cf->bucket_lru_back;        if (!p)            break;        if (p->dirty)        {            mf_write (cf->hash_mf, p->ph.this_bucket, 0, 0, &p->ph);            cf->dirty = 1;        }        release_bucket (cf, p);    }}static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno){    struct CFile_hash_bucket *p, **pp;    if (cf->bucket_in_memory == cf->max_bucket_in_memory)        flush_bucket (cf, 1);    assert (cf->bucket_in_memory < cf->max_bucket_in_memory);    ++(cf->bucket_in_memory);    p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));    p->lru_next = NULL;    p->lru_prev = cf->bucket_lru_front;    if (cf->bucket_lru_front)        cf->bucket_lru_front->lru_next = p;    else        cf->bucket_lru_back = p;    cf->bucket_lru_front = p;     pp = cf->parray + hno;    p->h_next = *pp;    p->h_prev = pp;    if (*pp)        (*pp)->h_prev = &p->h_next;    *pp = p;    return p;}static struct CFile_hash_bucket *get_bucket (CFile cf, int block_no, int hno){    struct CFile_hash_bucket *p;    p = alloc_bucket (cf, block_no, hno);    if (!mf_read (cf->hash_mf, block_no, 0, 0, &p->ph))    {        logf (LOG_FATAL|LOG_ERRNO, "read get_bucket");        exit (1);    }    assert (p->ph.this_bucket == block_no);    p->dirty = 0;    return p;}static struct CFile_hash_bucket *new_bucket (CFile cf, int *block_nop, int hno){    struct CFile_hash_bucket *p;    int i, block_no;    block_no = *block_nop = cf->head.next_bucket++;    p = alloc_bucket (cf, block_no, hno);    for (i = 0; i<HASH_BUCKET; i++)    {        p->ph.vno[i] = 0;        p->ph.no[i] = 0;    }    p->ph.next_bucket = 0;    p->ph.this_bucket = block_no;    p->dirty = 1;    return p;}static int cf_lookup_flat (CFile cf, int no){    int hno = (no*sizeof(int))/HASH_BSIZE;    int off = (no*sizeof(int)) - hno*HASH_BSIZE;    int vno = 0;    mf_read (cf->hash_mf, hno+cf->head.next_bucket, off, sizeof(int), &vno);    return vno;}static int cf_lookup_hash (CFile cf, int no){    int hno = cf_hash (cf, no);    struct CFile_hash_bucket *hb;    int block_no, i;    for (hb = cf->parray[hno]; hb; hb = hb->h_next)    {        for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)            if (hb->ph.no[i] == no)            {                (cf->no_hits)++;                return hb->ph.vno[i];            }    }    for (block_no = cf->array[hno]; block_no; block_no = hb->ph.next_bucket)    {        for (hb = cf->parray[hno]; hb; hb = hb->h_next)        {            if (hb->ph.this_bucket == block_no)                break;        }        if (hb)            continue;#if 0        /* extra check ... */        for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)        {            if (hb->ph.this_bucket == block_no)            {                logf (LOG_FATAL, "Found hash bucket on other chain (1)");                abort ();            }            for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)                if (hb->ph.no[i] == no)                {                    logf (LOG_FATAL, "Found hash bucket on other chain (2)");                    abort ();                }        }#endif        (cf->no_miss)++;        hb = get_bucket (cf, block_no, hno);        for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)            if (hb->ph.no[i] == no)                return hb->ph.vno[i];    }    return 0;}static void cf_write_flat (CFile cf, int no, int vno){    int hno = (no*sizeof(int))/HASH_BSIZE;    int off = (no*sizeof(int)) - hno*HASH_BSIZE;    hno += cf->head.next_bucket;    if (hno >= cf->head.flat_bucket)        cf->head.flat_bucket = hno+1;    cf->dirty = 1;    mf_write (cf->hash_mf, hno, off, sizeof(int), &vno);}static void cf_moveto_flat (CFile cf){    struct CFile_hash_bucket *p;    int i, j;    logf (LOG_DEBUG, "cf: Moving to flat shadow: %s", cf->rmf->name);    logf (LOG_DEBUG, "cf: hits=%d miss=%d bucket_in_memory=%d total=%d",	cf->no_hits, cf->no_miss, cf->bucket_in_memory,         cf->head.next_bucket - cf->head.first_bucket);    assert (cf->head.state == 1);    flush_bucket (cf, -1);    assert (cf->bucket_in_memory == 0);    p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));    for (i = cf->head.first_bucket; i < cf->head.next_bucket; i++)    {        if (!mf_read (cf->hash_mf, i, 0, 0, &p->ph))        {            logf (LOG_FATAL|LOG_ERRNO, "read bucket moveto flat");            exit (1);        }        for (j = 0; j < HASH_BUCKET && p->ph.vno[j]; j++)            cf_write_flat (cf, p->ph.no[j], p->ph.vno[j]);    }    xfree (p);    xfree (cf->array);    cf->array = NULL;    xfree (cf->parray);    cf->parray = NULL;    cf->head.state = 2;    cf->dirty = 1;}static int cf_lookup (CFile cf, int no){    if (cf->head.state > 1)        return cf_lookup_flat (cf, no);    return cf_lookup_hash (cf, no);}static int cf_new_flat (CFile cf, int no){    int vno = (cf->head.next_block)++;    cf_write_flat (cf, no, vno);    return vno;}static int cf_new_hash (CFile cf, int no){    int hno = cf_hash (cf, no);    struct CFile_hash_bucket *hbprev = NULL, *hb = cf->parray[hno];    int *bucketpp = &cf->array[hno];     int i, vno = (cf->head.next_block)++;      for (hb = cf->parray[hno]; hb; hb = hb->h_next)        if (!hb->ph.vno[HASH_BUCKET-1])            for (i = 0; i<HASH_BUCKET; i++)                if (!hb->ph.vno[i])                {                    (cf->no_hits)++;                    hb->ph.no[i] = no;                    hb->ph.vno[i] = vno;                    hb->dirty = 1;                    return vno;                }    while (*bucketpp)    {        for (hb = cf->parray[hno]; hb; hb = hb->h_next)            if (hb->ph.this_bucket == *bucketpp)            {                bucketpp = &hb->ph.next_bucket;                hbprev = hb;                break;            }        if (hb)            continue;#if 0        /* extra check ... */        for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)        {            if (hb->ph.this_bucket == *bucketpp)            {                logf (LOG_FATAL, "Found hash bucket on other chain");                abort ();            }        }#endif        (cf->no_miss)++;        hb = get_bucket (cf, *bucketpp, hno);        assert (hb);        for (i = 0; i<HASH_BUCKET; i++)            if (!hb->ph.vno[i])            {                hb->ph.no[i] = no;                hb->ph.vno[i] = vno;                hb->dirty = 1;                return vno;            }        bucketpp = &hb->ph.next_bucket;        hbprev = hb;    }    if (hbprev)        hbprev->dirty = 1;    hb = new_bucket (cf, bucketpp, hno);    hb->ph.no[0] = no;    hb->ph.vno[0] = vno;    return vno;}int cf_new (CFile cf, int no){    if (cf->head.state > 1)        return cf_new_flat (cf, no);    if (cf->no_miss*2 > cf->no_hits)    {        cf_moveto_flat (cf);        assert (cf->head.state > 1);        return cf_new_flat (cf, no);    }    return cf_new_hash (cf, no);}int cf_read (CFile cf, int no, int offset, int nbytes, void *buf){    int block;        assert (cf);    zebra_mutex_lock (&cf->mutex);    if (!(block = cf_lookup (cf, no)))    {	zebra_mutex_unlock (&cf->mutex);        return -1;    }    zebra_mutex_unlock (&cf->mutex);    if (!mf_read (cf->block_mf, block, offset, nbytes, buf))    {        logf (LOG_FATAL|LOG_ERRNO, "cf_read no=%d, block=%d", no, block);        exit (1);    }    return 1;}int cf_write (CFile cf, int no, int offset, int nbytes, const void *buf){    int block;    assert (cf);    zebra_mutex_lock (&cf->mutex);    if (!(block = cf_lookup (cf, no)))    {        block = cf_new (cf, no);        if (offset || nbytes)        {            mf_read (cf->rmf, no, 0, 0, cf->iobuf);            memcpy (cf->iobuf + offset, buf, nbytes);            buf = cf->iobuf;            offset = 0;            nbytes = 0;        }    }    zebra_mutex_unlock (&cf->mutex);    if (mf_write (cf->block_mf, block, offset, nbytes, buf))    {        logf (LOG_FATAL|LOG_ERRNO, "cf_write no=%d, block=%d", no, block);        exit (1);    }    return 0;}int cf_close (CFile cf){    logf (LOG_DEBUG, "cf: close hits=%d miss=%d bucket_in_memory=%d total=%d",          cf->no_hits, cf->no_miss, cf->bucket_in_memory,          cf->head.next_bucket - cf->head.first_bucket);    flush_bucket (cf, -1);    if (cf->dirty)    {        mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);        write_head (cf);    }    mf_close (cf->hash_mf);    mf_close (cf->block_mf);    xfree (cf->array);    xfree (cf->parray);    xfree (cf->iobuf);    zebra_mutex_destroy (&cf->mutex);    xfree (cf);    return 0;}
cfile.c - 源码说明

本页面展示了「harvest是一个下载html网页得机器人」中的 cfile.c 源码文件，采用 C语言编程语言编写，共 518 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与harvest相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?