⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 recindex.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
    *sysnop = -1;    if (ref_count)    {	int csize = 0;  /* indicate compression "not performed yet" */	compression_method = p->compression_method;	switch (compression_method)	{	case REC_COMPRESS_BZIP2:#if HAVE_BZLIB_H		    csize = out_offset + (out_offset >> 6) + 620;	    rec_tmp_expand (p, csize);#ifdef BZ_CONFIG_ERROR	    i = BZ2_bzBuffToBuffCompress #else	    i = bzBuffToBuffCompress #endif			 	     (p->tmp_buf+sizeof(int)+sizeof(short)+				      sizeof(char),				      &csize, out_buf, out_offset, 1, 0, 30);	    if (i != BZ_OK)	    {		logf (LOG_WARN, "bzBuffToBuffCompress error code=%d", i);		csize = 0;	    }	    logf (LOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,		  csize);#endif	    break;	case REC_COMPRESS_NONE:	    break;	}	if (!csize)  	{	    /* either no compression or compression not supported ... */	    csize = out_offset;	    rec_tmp_expand (p, csize);	    memcpy (p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char),		    out_buf, out_offset);	    csize = out_offset;	    compression_method = REC_COMPRESS_NONE;	}	memcpy (p->tmp_buf + sizeof(int), &ref_count, sizeof(ref_count));	memcpy (p->tmp_buf + sizeof(int)+sizeof(short),		&compression_method, sizeof(compression_method));			/* -------- compression */	rec_write_tmp_buf (p, csize + sizeof(short) + sizeof(char), sysnos);    }    xfree (out_buf);    xfree (sysnos);}static void rec_cache_flush (Records p, int saveCount){    int i, j;    if (saveCount >= p->cache_cur)        saveCount = 0;    rec_write_multiple (p, saveCount);    for (i = 0; i<p->cache_cur - saveCount; i++)    {        struct record_cache_entry *e = p->record_cache + i;        rec_rm (&e->rec);    }     /* i still being used ... */    for (j = 0; j<saveCount; j++, i++)        memcpy (p->record_cache+j, p->record_cache+i,                sizeof(*p->record_cache));    p->cache_cur = saveCount;}static Record *rec_cache_lookup (Records p, int sysno,                                 enum recordCacheFlag flag){    int i;    for (i = 0; i<p->cache_cur; i++)    {        struct record_cache_entry *e = p->record_cache + i;        if (e->rec->sysno == sysno)        {            if (flag != recordFlagNop && e->flag == recordFlagNop)                e->flag = flag;            return &e->rec;        }    }    return NULL;}static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag){    struct record_cache_entry *e;    if (p->cache_cur == p->cache_max)        rec_cache_flush (p, 1);    else if (p->cache_cur > 0)    {        int i, j;        int used = 0;        for (i = 0; i<p->cache_cur; i++)        {            Record r = (p->record_cache + i)->rec;            for (j = 0; j<REC_NO_INFO; j++)                used += r->size[j];        }        if (used > 90000)            rec_cache_flush (p, 1);    }    assert (p->cache_cur < p->cache_max);    e = p->record_cache + (p->cache_cur)++;    e->flag = flag;    e->rec = rec_cp (rec);}void rec_close (Records *pp){    Records p = *pp;    int i;    assert (p);    zebra_mutex_destroy (&p->mutex);    rec_cache_flush (p, 0);    xfree (p->record_cache);    if (p->rw)        rec_write_head (p);    if (p->index_BFile)        bf_close (p->index_BFile);    for (i = 0; i<REC_BLOCK_TYPES; i++)    {        if (p->data_BFile[i])            bf_close (p->data_BFile[i]);        xfree (p->data_fname[i]);    }    xfree (p->tmp_buf);    xfree (p);    *pp = NULL;}static Record rec_get_int (Records p, int sysno){    int i, in_size, r;    Record rec, *recp;    struct record_index_entry entry;    int freeblock, dst_type;    char *nptr, *cptr;    char *in_buf = 0;    char *bz_buf = 0;#if HAVE_BZLIB_H    int bz_size;#endif    char compression_method;    assert (sysno > 0);    assert (p);    if ((recp = rec_cache_lookup (p, sysno, recordFlagNop)))        return rec_cp (*recp);    if (read_indx (p, sysno, &entry, sizeof(entry), 1) < 1)        return NULL;       /* record is not there! */    if (!entry.size)        return NULL;       /* record is deleted */    dst_type = entry.next & 7;    assert (dst_type < REC_BLOCK_TYPES);    freeblock = entry.next / 8;    assert (freeblock > 0);        rec_tmp_expand (p, entry.size);    cptr = p->tmp_buf;    r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr);    if (r < 0)	return 0;    memcpy (&freeblock, cptr, sizeof(freeblock));    while (freeblock)    {        int tmp;        cptr += p->head.block_size[dst_type] - sizeof(freeblock);                memcpy (&tmp, cptr, sizeof(tmp));        r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr);	if (r < 0)	    return 0;        memcpy (&freeblock, cptr, sizeof(freeblock));        memcpy (cptr, &tmp, sizeof(tmp));    }    rec = (Record) xmalloc (sizeof(*rec));    rec->sysno = sysno;    memcpy (&compression_method, p->tmp_buf + sizeof(int) + sizeof(short),	    sizeof(compression_method));    in_buf = p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char);    in_size = entry.size - sizeof(short) - sizeof(char);    switch (compression_method)    {    case REC_COMPRESS_BZIP2:#if HAVE_BZLIB_H	bz_size = entry.size * 20 + 100;	while (1)	{	    bz_buf = (char *) xmalloc (bz_size);#ifdef BZ_CONFIG_ERROR	    i = BZ2_bzBuffToBuffDecompress#else	    i = bzBuffToBuffDecompress#endif                 (bz_buf, &bz_size, in_buf, in_size, 0, 0);	    logf (LOG_LOG, "decompress %5d %5d", in_size, bz_size);	    if (i == BZ_OK)		break;	    logf (LOG_LOG, "failed");	    xfree (bz_buf);            bz_size *= 2;	}	in_buf = bz_buf;	in_size = bz_size;#else	logf (LOG_FATAL, "cannot decompress record(s) in BZIP2 format");	exit (1);#endif	break;    case REC_COMPRESS_NONE:	break;    }    for (i = 0; i<REC_NO_INFO; i++)	rec->info[i] = 0;    nptr = in_buf;                /* skip ref count */    while (nptr < in_buf + in_size)    {	int this_sysno;	int len;	rec_decode_unsigned (&this_sysno, nptr, &len);	nptr += len;	for (i = 0; i < REC_NO_INFO; i++)	{	    int this_size;	    rec_decode_unsigned (&this_size, nptr, &len);	    nptr += len;	    if (this_size == 0)		continue;	    rec->size[i] = this_size-1;	    if (rec->size[i])	    {		rec->info[i] = nptr;		nptr += rec->size[i];	    }	    else		rec->info[i] = NULL;	}	if (this_sysno == sysno)	    break;    }    for (i = 0; i<REC_NO_INFO; i++)    {	if (rec->info[i] && rec->size[i])	{	    char *np = xmalloc (rec->size[i]+1);	    memcpy (np, rec->info[i], rec->size[i]);            np[rec->size[i]] = '\0';	    rec->info[i] = np;	}	else	{	    assert (rec->info[i] == 0);	    assert (rec->size[i] == 0);	}    }    xfree (bz_buf);    rec_cache_insert (p, rec, recordFlagNop);    return rec;}Record rec_get (Records p, int sysno){    Record rec;    zebra_mutex_lock (&p->mutex);    rec = rec_get_int (p, sysno);    zebra_mutex_unlock (&p->mutex);    return rec;}static Record rec_new_int (Records p){    int sysno, i;    Record rec;    assert (p);    rec = (Record) xmalloc (sizeof(*rec));    if (1 || p->head.index_free == 0)        sysno = (p->head.index_last)++;    else    {        struct record_index_entry entry;        read_indx (p, p->head.index_free, &entry, sizeof(entry), 0);        sysno = p->head.index_free;        p->head.index_free = entry.next;    }    (p->head.no_records)++;    rec->sysno = sysno;    for (i = 0; i < REC_NO_INFO; i++)    {        rec->info[i] = NULL;        rec->size[i] = 0;    }    rec_cache_insert (p, rec, recordFlagNew);    return rec;}Record rec_new (Records p){    Record rec;    zebra_mutex_lock (&p->mutex);    rec = rec_new_int (p);    zebra_mutex_unlock (&p->mutex);    return rec;}void rec_del (Records p, Record *recpp){    Record *recp;    zebra_mutex_lock (&p->mutex);    (p->head.no_records)--;    if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete)))    {        rec_rm (recp);        *recp = *recpp;    }    else    {        rec_cache_insert (p, *recpp, recordFlagDelete);        rec_rm (recpp);    }    zebra_mutex_unlock (&p->mutex);    *recpp = NULL;}void rec_put (Records p, Record *recpp){    Record *recp;    zebra_mutex_lock (&p->mutex);    if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagWrite)))    {        rec_rm (recp);        *recp = *recpp;    }    else    {        rec_cache_insert (p, *recpp, recordFlagWrite);        rec_rm (recpp);    }    zebra_mutex_unlock (&p->mutex);    *recpp = NULL;}void rec_rm (Record *recpp){    int i;    if (!*recpp)        return ;    for (i = 0; i < REC_NO_INFO; i++)        xfree ((*recpp)->info[i]);    xfree (*recpp);    *recpp = NULL;}Record rec_cp (Record rec){    Record n;    int i;    n = (Record) xmalloc (sizeof(*n));    n->sysno = rec->sysno;    for (i = 0; i < REC_NO_INFO; i++)        if (!rec->info[i])        {            n->info[i] = NULL;            n->size[i] = 0;        }        else        {            n->size[i] = rec->size[i];            n->info[i] = (char *) xmalloc (rec->size[i]);            memcpy (n->info[i], rec->info[i], rec->size[i]);        }    return n;}char *rec_strdup (const char *s, size_t *len){    char *p;    if (!s)    {        *len = 0;        return NULL;    }    *len = strlen(s)+1;    p = (char *) xmalloc (*len);    strcpy (p, s);    return p;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -