⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zsets.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: zsets.c,v 1.46 2003/06/18 11:46:34 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#ifdef WIN32#include <io.h>#else#include <unistd.h>#endif#include "index.h"#include <rstemp.h>#define SORT_IDX_ENTRYSIZE 64#define ZSET_SORT_MAX_LEVEL 3struct zebra_set_term_entry {    int reg_type;    char *db;    int set;    int use;    char *term;};struct zebra_set {    char *name;    RSET rset;    NMEM nmem;    int hits;    int num_bases;    char **basenames;    Z_RPNQuery *rpn;    struct zset_sort_info *sort_info;    struct zebra_set_term_entry *term_entries;    int term_entries_max;    struct zebra_set *next;    int locked;};struct zset_sort_entry {    int sysno;    int score;    char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];};struct zset_sort_info {    int max_entries;    int num_entries;    struct zset_sort_entry *all_entries;    struct zset_sort_entry **entries;};ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,			  Z_RPNQuery *rpn, int num_bases,                          char **basenames, 			  const char *setname){    ZebraSet zebraSet;    int i;    zh->errCode = 0;    zh->errString = NULL;    zh->hits = 0;    zebraSet = resultSetAdd (zh, setname, 1);    if (!zebraSet)	return 0;    zebraSet->locked = 1;    zebraSet->rpn = 0;    zebraSet->nmem = m;    zebraSet->num_bases = num_bases;    zebraSet->basenames =         nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));    for (i = 0; i<num_bases; i++)        zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]);    zebraSet->rset = rpn_search (zh, zebraSet->nmem, rpn,                                 zebraSet->num_bases,		                 zebraSet->basenames, zebraSet->name,				 zebraSet);    zh->hits = zebraSet->hits;    if (zebraSet->rset)        zebraSet->rpn = rpn;    zebraSet->locked = 0;    return zebraSet;}void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,		       const char *db, int set,		       int use, const char *term){    if (!s->nmem)	s->nmem = nmem_create ();    if (!s->term_entries)    {	int i;	s->term_entries_max = 1000;	s->term_entries =	    nmem_malloc (s->nmem, s->term_entries_max * 			 sizeof(*s->term_entries));	for (i = 0; i < s->term_entries_max; i++)	    s->term_entries[i].term = 0;    }    if (s->hits < s->term_entries_max)    {	s->term_entries[s->hits].reg_type = reg_type;	s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);	s->term_entries[s->hits].set = set;	s->term_entries[s->hits].use = use;	s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);    }    (s->hits)++;}int zebra_resultSetTerms (ZebraHandle zh, const char *setname,                           int no, int *count,                           int *type, char *out, size_t *len){    ZebraSet s = resultSetGet (zh, setname);    int no_max = 0;    if (count)        *count = 0;    if (!s || !s->rset)        return 0;    no_max = s->rset->no_rset_terms;    if (no < 0 || no >= no_max)        return 0;    if (count)        *count = s->rset->rset_terms[no]->count;    if (type)        *type = s->rset->rset_terms[no]->type;        if (out)    {        char *inbuf = s->rset->rset_terms[no]->name;        size_t inleft = strlen(inbuf);        size_t outleft = *len - 1;	int converted = 0;        if (zh->iconv_from_utf8 != 0)        {            char *outbuf = out;            size_t ret;                        ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,                        &outbuf, &outleft);            if (ret == (size_t)(-1))                *len = 0;            else                *len = outbuf - out;	    converted = 1;        }        if (!converted)        {            if (inleft > outleft)                inleft = outleft;            *len = inleft;            memcpy (out, inbuf, *len);        }        out[*len] = 0;    }    return no_max;}ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov){    ZebraSet s;    int i;    for (s = zh->sets; s; s = s->next)        if (!strcmp (s->name, name))	    break;    if (s)    {	yaz_log (LOG_DEBUG, "updating result set %s", name);	if (!ov || s->locked)	    return NULL;	if (s->rset)	    rset_delete (s->rset);	if (s->nmem)	    nmem_destroy (s->nmem);    }    else    {	yaz_log (LOG_DEBUG, "adding result set %s", name);	s = (ZebraSet) xmalloc (sizeof(*s));	s->next = zh->sets;	zh->sets = s;	s->name = (char *) xmalloc (strlen(name)+1);	strcpy (s->name, name);	s->sort_info = (struct zset_sort_info *)	    xmalloc (sizeof(*s->sort_info));	s->sort_info->max_entries = 1000;	s->sort_info->entries = (struct zset_sort_entry **)	    xmalloc (sizeof(*s->sort_info->entries) *		     s->sort_info->max_entries);	s->sort_info->all_entries = (struct zset_sort_entry *)	    xmalloc (sizeof(*s->sort_info->all_entries) *		     s->sort_info->max_entries);	for (i = 0; i < s->sort_info->max_entries; i++)	    s->sort_info->entries[i] = s->sort_info->all_entries + i;    }    s->locked = 0;    s->term_entries = 0;    s->hits = 0;    s->rset = 0;    s->nmem = 0;    s->rpn = 0;    return s;}ZebraSet resultSetGet (ZebraHandle zh, const char *name){    ZebraSet s;    for (s = zh->sets; s; s = s->next)        if (!strcmp (s->name, name))        {            if (!s->term_entries && !s->rset && s->rpn)            {                NMEM nmem = nmem_create ();                yaz_log (LOG_LOG, "research %s", name);                s->rset =                    rpn_search (zh, nmem, s->rpn, s->num_bases,				s->basenames, s->name, s);                nmem_destroy (nmem);            }            return s;        }    return NULL;}void resultSetInvalidate (ZebraHandle zh){    ZebraSet s = zh->sets;        for (; s; s = s->next)    {        if (s->rset)            rset_delete (s->rset);        s->rset = 0;    }}void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses){    ZebraSet * ss = &zh->sets;    int i;        if (statuses)	for (i = 0; i<num; i++)	    statuses[i] = Z_DeleteStatus_resultSetDidNotExist;    zh->errCode = 0;    zh->errString = NULL;    while (*ss)    {	int i = -1;	ZebraSet s = *ss;	if (num >= 0)	{	    for (i = 0; i<num; i++)		if (!strcmp (s->name, names[i]))		{		    if (statuses)			statuses[i] = Z_DeleteStatus_success;		    i = -1;		    break;		}	}	if (i < 0)	{	    *ss = s->next;	    	    xfree (s->sort_info->all_entries);	    xfree (s->sort_info->entries);	    xfree (s->sort_info);	    	    if (s->nmem)		nmem_destroy (s->nmem);	    if (s->rset)		rset_delete (s->rset);	    xfree (s->name);	    xfree (s);	}	else	    ss = &s->next;    }}ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, 			       int num, int *positions){    ZebraSet sset;    ZebraPosSet sr = 0;    RSET rset;    int i;    struct zset_sort_info *sort_info;    if (!(sset = resultSetGet (zh, name)))        return NULL;    if (!(rset = sset->rset))    {	if (!sset->term_entries)	    return 0;	sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);	for (i = 0; i<num; i++)	{	    sr[i].sysno = 0;	    sr[i].score = -1;	    sr[i].term = 0;	    sr[i].db = 0;	    if (positions[i] <= sset->term_entries_max)	    {		sr[i].term = sset->term_entries[positions[i]-1].term;		sr[i].db = sset->term_entries[positions[i]-1].db;	    }	}    }    else    {	sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);	for (i = 0; i<num; i++)	{	    sr[i].sysno = 0;	    sr[i].score = -1;	    sr[i].term = 0;	    sr[i].db = 0;	}	sort_info = sset->sort_info;	if (sort_info)	{	    int position;	    	    for (i = 0; i<num; i++)	    {		position = positions[i];		if (position > 0 && position <= sort_info->num_entries)		{		    yaz_log (LOG_DEBUG, "got pos=%d (sorted)", position);		    sr[i].sysno = sort_info->entries[position-1]->sysno;		    sr[i].score = sort_info->entries[position-1]->score;		}	    }	}	/* did we really get all entries using sort ? */	for (i = 0; i<num; i++)	{	    if (!sr[i].sysno)		break;	}	if (i < num) /* nope, get the rest, unsorted - sorry */	{	    int position = 0;	    int num_i = 0;	    int psysno = 0;	    int term_index;	    RSFD rfd;	    struct it_key key;	    	    if (sort_info)		position = sort_info->num_entries;	    while (num_i < num && positions[num_i] < position)		num_i++;	    rfd = rset_open (rset, RSETF_READ);	    while (num_i < num && rset_read (rset, rfd, &key, &term_index))	    {		if (key.sysno != psysno)		{		    psysno = key.sysno;		    if (sort_info)		    {			/* determine we alreay have this in our set */			for (i = sort_info->num_entries; --i >= 0; )

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -