⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kinput.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: kinput.c,v 1.56 2003/06/23 15:35:25 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/ #include <fcntl.h>#ifdef WIN32#include <io.h>#else#include <unistd.h>#endif#include <stdlib.h>#include <string.h>#include <stdio.h>#include <assert.h>#include "index.h"#define KEY_SIZE (1+sizeof(struct it_key))#define INP_NAME_MAX 768#define INP_BUF_START 60000#define INP_BUF_ADD  400000struct key_file {    int   no;            /* file no */    off_t offset;        /* file offset */    unsigned char *buf;  /* buffer block */    size_t buf_size;     /* number of read bytes in block */    size_t chunk;        /* number of bytes allocated */    size_t buf_ptr;      /* current position in buffer */    char *prev_name;     /* last word read */    int   sysno;         /* last sysno */    int   seqno;         /* last seqno */    off_t length;        /* length of file */                         /* handler invoked in each read */    void (*readHandler)(struct key_file *keyp, void *rinfo);    void *readInfo;    Res res;};void getFnameTmp (Res res, char *fname, int no){    const char *pre;        pre = res_get_def (res, "keyTmpDir", ".");    sprintf (fname, "%s/key%d.tmp", pre, no);}void extract_get_fname_tmp (ZebraHandle zh, char *fname, int no){    const char *pre;        pre = res_get_def (zh->res, "keyTmpDir", ".");    sprintf (fname, "%s/key%d.tmp", pre, no);}void key_file_chunk_read (struct key_file *f){    int nr = 0, r = 0, fd;    char fname[1024];    getFnameTmp (f->res, fname, f->no);    fd = open (fname, O_BINARY|O_RDONLY);    f->buf_ptr = 0;    f->buf_size = 0;    if (fd == -1)    {        logf (LOG_WARN|LOG_ERRNO, "cannot open %s", fname);	return ;    }    if (!f->length)    {        if ((f->length = lseek (fd, 0L, SEEK_END)) == (off_t) -1)        {            logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname);	    close (fd);	    return ;        }    }    if (lseek (fd, f->offset, SEEK_SET) == -1)    {        logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname);	close(fd);	return ;    }    while (f->chunk - nr > 0)    {        r = read (fd, f->buf + nr, f->chunk - nr);        if (r <= 0)            break;        nr += r;    }    if (r == -1)    {        logf (LOG_WARN|LOG_ERRNO, "read of %s", fname);	close (fd);	return;    }    f->buf_size = nr;    if (f->readHandler)        (*f->readHandler)(f, f->readInfo);    close (fd);}void key_file_destroy (struct key_file *f){    xfree (f->buf);    xfree (f->prev_name);    xfree (f);}struct key_file *key_file_init (int no, int chunk, Res res){    struct key_file *f;    f = (struct key_file *) xmalloc (sizeof(*f));    f->res = res;    f->sysno = 0;    f->seqno = 0;    f->no = no;    f->chunk = chunk;    f->offset = 0;    f->length = 0;    f->readHandler = NULL;    f->buf = (unsigned char *) xmalloc (f->chunk);    f->prev_name = (char *) xmalloc (INP_NAME_MAX);    *f->prev_name = '\0';    key_file_chunk_read (f);    return f;}int key_file_getc (struct key_file *f){    if (f->buf_ptr < f->buf_size)        return f->buf[(f->buf_ptr)++];    if (f->buf_size < f->chunk)        return EOF;    f->offset += f->buf_size;    key_file_chunk_read (f);    if (f->buf_ptr < f->buf_size)        return f->buf[(f->buf_ptr)++];    else        return EOF;}int key_file_decode (struct key_file *f){    int c, d;    c = key_file_getc (f);    switch (c & 192)     {    case 0:        d = c;        break;    case 64:        d = ((c&63) << 8) + (key_file_getc (f) & 0xff);        break;    case 128:        d = ((c&63) << 8) + (key_file_getc (f) & 0xff);        d = (d << 8) + (key_file_getc (f) & 0xff);        break;    case 192:        d = ((c&63) << 8) + (key_file_getc (f) & 0xff);        d = (d << 8) + (key_file_getc (f) & 0xff);        d = (d << 8) + (key_file_getc (f) & 0xff);        break;    }    return d;}int key_file_read (struct key_file *f, char *key){    int i, d, c;    struct it_key itkey;    c = key_file_getc (f);    if (c == 0)    {        strcpy (key, f->prev_name);        i = 1+strlen (key);    }    else if (c == EOF)        return 0;    else    {        i = 0;        key[i++] = c;        while ((key[i++] = key_file_getc (f)))            ;        strcpy (f->prev_name, key);        f->sysno = 0;    }    d = key_file_decode (f);    key[i++] = d & 1;    d = d >> 1;    itkey.sysno = d + f->sysno;    if (d)     {        f->sysno = itkey.sysno;        f->seqno = 0;    }    d = key_file_decode (f);    itkey.seqno = d + f->seqno;    f->seqno = itkey.seqno;    memcpy (key + i, &itkey, sizeof(struct it_key));    return i + sizeof (struct it_key);}struct heap_info {    struct {        struct key_file **file;        char   **buf;    } info;    int    heapnum;    int    *ptr;    int    (*cmp)(const void *p1, const void *p2);    struct zebra_register *reg;    int no_diffs;    int no_updates;    int no_deletions;    int no_insertions;    int no_iterations;};struct heap_info *key_heap_init (int nkeys,                                 int (*cmp)(const void *p1, const void *p2)){    struct heap_info *hi;    int i;    hi = (struct heap_info *) xmalloc (sizeof(*hi));    hi->info.file = (struct key_file **)	xmalloc (sizeof(*hi->info.file) * (1+nkeys));    hi->info.buf = (char **) xmalloc (sizeof(*hi->info.buf) * (1+nkeys));    hi->heapnum = 0;    hi->ptr = (int *) xmalloc (sizeof(*hi->ptr) * (1+nkeys));    hi->cmp = cmp;    for (i = 0; i<= nkeys; i++)    {        hi->ptr[i] = i;        hi->info.buf[i] = (char *) xmalloc (INP_NAME_MAX);    }    hi->no_diffs = 0;    hi->no_diffs = 0;    hi->no_updates = 0;    hi->no_deletions = 0;    hi->no_insertions = 0;    hi->no_iterations = 0;    return hi;}void key_heap_destroy (struct heap_info *hi, int nkeys){    int i;    yaz_log (LOG_DEBUG, "key_heap_destroy");    for (i = 0; i<=nkeys; i++)        xfree (hi->info.buf[i]);        xfree (hi->info.buf);    xfree (hi->ptr);    xfree (hi->info.file);    xfree (hi);}static void key_heap_swap (struct heap_info *hi, int i1, int i2){    int swap;    swap = hi->ptr[i1];    hi->ptr[i1] = hi->ptr[i2];    hi->ptr[i2] = swap;}static void key_heap_delete (struct heap_info *hi){    int cur = 1, child = 2;    assert (hi->heapnum > 0);    key_heap_swap (hi, 1, hi->heapnum);    hi->heapnum--;    while (child <= hi->heapnum) {        if (child < hi->heapnum &&            (*hi->cmp)(&hi->info.buf[hi->ptr[child]],                       &hi->info.buf[hi->ptr[child+1]]) > 0)            child++;        if ((*hi->cmp)(&hi->info.buf[hi->ptr[cur]],                       &hi->info.buf[hi->ptr[child]]) > 0)        {                        key_heap_swap (hi, cur, child);            cur = child;            child = 2*cur;        }        else            break;    }}static void key_heap_insert (struct heap_info *hi, const char *buf, int nbytes,                             struct key_file *kf){    int cur, parent;    cur = ++(hi->heapnum);    memcpy (hi->info.buf[hi->ptr[cur]], buf, nbytes);    hi->info.file[hi->ptr[cur]] = kf;    parent = cur/2;    while (parent && (*hi->cmp)(&hi->info.buf[hi->ptr[parent]],                                &hi->info.buf[hi->ptr[cur]]) > 0)    {        key_heap_swap (hi, cur, parent);        cur = parent;        parent = cur/2;    }}static int heap_read_one (struct heap_info *hi, char *name, char *key){    int n, r;    char rbuf[INP_NAME_MAX];    struct key_file *kf;    if (!hi->heapnum)        return 0;    n = hi->ptr[1];    strcpy (name, hi->info.buf[n]);    kf = hi->info.file[n];    r = strlen(name);    memcpy (key, hi->info.buf[n] + r+1, KEY_SIZE);    key_heap_delete (hi);    if ((r = key_file_read (kf, rbuf)))        key_heap_insert (hi, rbuf, r, kf);    hi->no_iterations++;    return 1;}#define PR_KEY 0#if PR_KEYstatic void pkey(const char *b, int mode){    struct it_key *key = (struct it_key *) b;    printf ("%c %d:%d\n", mode + 48, key->sysno, key->seqno);}#endifstruct heap_cread_info {    char prev_name[INP_NAME_MAX];    char cur_name[INP_NAME_MAX];    char *key;    char *key_1, *key_2;    int mode_1, mode_2;    int sz_1, sz_2;    struct heap_info *hi;    int first_in_list;    int more;    int ret;};static int heap_cread_item (void *vp, char **dst, int *insertMode);int heap_cread_item2 (void *vp, char **dst, int *insertMode){    struct heap_cread_info *p = (struct heap_cread_info *) vp;    int level = 0;    if (p->ret == 0)    /* lookahead was 0?. Return that in read next round */    {        p->ret = -1;        return 0;    }    else if (p->ret == -1) /* Must read new item ? */    {        char *dst_1 = p->key_1;        p->ret = heap_cread_item(vp, &dst_1, &p->mode_1);        p->sz_1 = dst_1 - p->key_1;    }    else    {        /* lookahead in 2 . Now in 1. */        p->sz_1 = p->sz_2;        p->mode_1 = p->mode_2;        memcpy (p->key_1, p->key_2, p->sz_2);    }    if (p->mode_1)        level = 1;     /* insert */    else        level = -1;    /* delete */    while(1)    {        char *dst_2 = p->key_2;        p->ret = heap_cread_item(vp, &dst_2, &p->mode_2);        if (!p->ret)        {            if (level)                break;            p->ret = -1;            return 0;        }        p->sz_2 = dst_2 - p->key_2;        if (p->sz_1 == p->sz_2 && memcmp(p->key_1, p->key_2, p->sz_1) == 0)        {            if (p->mode_2) /* adjust level according to deletes/inserts */                level++;            else                level--;        }        else        {            if (level)                break;            /* all the same. new round .. */            p->sz_1 = p->sz_2;            p->mode_1 = p->mode_2;            memcpy (p->key_1, p->key_2, p->sz_1);            if (p->mode_1)                level = 1;     /* insert */            else                level = -1;    /* delete */        }    }    /* outcome is insert (1) or delete (0) depending on final level */    if (level > 0)        *insertMode = 1;    else        *insertMode = 0;    memcpy (*dst, p->key_1, p->sz_1);#if PR_KEY    printf ("top: ");    pkey(*dst, *insertMode); fflush(stdout);#endif    (*dst) += p->sz_1;    return 1;}      int heap_cread_item (void *vp, char **dst, int *insertMode){    struct heap_cread_info *p = (struct heap_cread_info *) vp;    struct heap_info *hi = p->hi;    if (p->first_in_list)    {        *insertMode = p->key[0];        memcpy (*dst, p->key+1, sizeof(struct it_key));#if PR_KEY

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -