⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cclfind.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * Copyright (c) 1995, the EUROPAGATE consortium (see below). * * The EUROPAGATE consortium members are: * *    University College Dublin *    Danmarks Teknologiske Videnscenter *    An Chomhairle Leabharlanna *    Consejo Superior de Investigaciones Cientificas * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, * provided that: * * 1. This copyright and permission notice appear in all copies of the * software and its documentation. Notices of copyright or attribution * which appear at the beginning of any file must remain unchanged. * * 2. The names of EUROPAGATE or the project partners may not be used to * endorse or promote products derived from this software without specific * prior written permission. * * 3. Users of this software (implementors and gateway operators) agree to * inform the EUROPAGATE consortium of their use of the software. This * information will be used to evaluate the EUROPAGATE project and the * software, and to plan further developments. The consortium may use * the information in later publications. *  * 4. Users of this software agree to make their best efforts, when * documenting their use of the software, to acknowledge the EUROPAGATE * consortium, and the role played by the software in their work. * * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND, * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE * USE OR PERFORMANCE OF THIS SOFTWARE. * *//* CCL find (to rpn conversion) * Europagate, 1995 * * $Id: cclfind.c,v 1.34 2003/06/23 10:22:21 adam Exp $ * * Old Europagate log: * * Revision 1.16  1996/01/08  08:41:13  adam * Removed unused function. * * Revision 1.15  1995/07/20  08:14:34  adam * Qualifiers were observed too often. Instead tokens are treated as * qualifiers only when separated by comma. * * Revision 1.14  1995/05/16  09:39:26  adam * LICENSE. * * Revision 1.13  1995/04/17  09:31:42  adam * Improved handling of qualifiers. Aliases or reserved words. * * Revision 1.12  1995/03/20  15:27:43  adam * Minor changes. * * Revision 1.11  1995/02/23  08:31:59  adam * Changed header. * * Revision 1.9  1995/02/16  13:20:06  adam * Spell fix. * * Revision 1.8  1995/02/14  19:59:42  adam * Removed a syntax error. * * Revision 1.7  1995/02/14  19:55:10  adam * Header files ccl.h/cclp.h are gone! They have been merged an * moved to ../include/ccl.h. * Node kind(s) in ccl_rpn_node have changed names. * * Revision 1.6  1995/02/14  16:20:55  adam * Qualifiers are read from a file now. * * Revision 1.5  1995/02/14  14:12:41  adam * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990). * * Revision 1.4  1995/02/14  13:16:29  adam * Left and/or right truncation implemented. * * Revision 1.3  1995/02/14  10:25:56  adam * The constructions 'qualifier rel term ...' implemented. * * Revision 1.2  1995/02/13  15:15:07  adam * Added handling of qualifiers. Not finished yet. * * Revision 1.1  1995/02/13  12:35:20  adam * First version of CCL. Qualifiers aren't handled yet. * */#include <stdlib.h>#include <string.h>#include <yaz/ccl.h>/* returns type of current lookahead */#define KIND (cclp->look_token->kind)/* move one token forward */#define ADVANCE cclp->look_token = cclp->look_token->next/*  * qual_val_type: test for existance of attribute type/value pair. * qa:     Attribute array * type:   Type of attribute to search for * value:  Value of attribute to seach for * return: 1 if found; 0 otherwise. */static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value,                           char **attset){    int i;    struct ccl_rpn_attr *q;    if (!qa)        return 0;    for (i = 0;  (q=qa[i]); i++)        while (q)        {            if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC &&		q->value.numeric == value)            {                if (attset)                    *attset = q->set;                return 1;            }            q = q->next;        }    return 0;}/* * strxcat: concatenate strings. * n:      Null-terminated Destination string  * src:    Source string to be appended (not null-terminated) * len:    Length of source string. */static void strxcat (char *n, const char *src, int len){    while (*n)        n++;    while (--len >= 0)        *n++ = *src++;    *n = '\0';}/* * copy_token_name: Return copy of CCL token name * tp:      Pointer to token info. * return:  malloc(3) allocated copy of token name. */static char *copy_token_name (struct ccl_token *tp){    char *str = (char *)xmalloc (tp->len + 1);    ccl_assert (str);    memcpy (str, tp->name, tp->len);    str[tp->len] = '\0';    return str;}/* * mk_node: Create RPN node. * kind:   Type of node. * return: pointer to allocated node. */static struct ccl_rpn_node *mk_node (int kind){    struct ccl_rpn_node *p;    p = (struct ccl_rpn_node *)xmalloc (sizeof(*p));    ccl_assert (p);    p->kind = kind;    return p;}/* * ccl_rpn_delete: Delete RPN tree. * rpn:   Pointer to tree. */void ccl_rpn_delete (struct ccl_rpn_node *rpn){    struct ccl_rpn_attr *attr, *attr1;    if (!rpn)        return;    switch (rpn->kind)    {    case CCL_RPN_AND:    case CCL_RPN_OR:    case CCL_RPN_NOT:        ccl_rpn_delete (rpn->u.p[0]);        ccl_rpn_delete (rpn->u.p[1]);        break;    case CCL_RPN_TERM:        xfree (rpn->u.t.term);        for (attr = rpn->u.t.attr_list; attr; attr = attr1)        {            attr1 = attr->next;	    if (attr->kind == CCL_RPN_ATTR_STRING)		xfree(attr->value.str);            if (attr->set)                xfree (attr->set);            xfree (attr);        }        break;    case CCL_RPN_SET:        xfree (rpn->u.setname);        break;    case CCL_RPN_PROX:        ccl_rpn_delete (rpn->u.p[0]);        ccl_rpn_delete (rpn->u.p[1]);        break;    }    xfree (rpn);}static struct ccl_rpn_node *find_spec (CCL_parser cclp,                                       struct ccl_rpn_attr **qa);static int is_term_ok (int look, int *list){    for (;*list >= 0; list++)        if (look == *list)            return 1;    return 0;}static struct ccl_rpn_node *search_terms (CCL_parser cclp,                                          struct ccl_rpn_attr **qa);static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p,					   const char *set, int type){    struct ccl_rpn_attr *n;        n = (struct ccl_rpn_attr *)xmalloc (sizeof(*n));    ccl_assert (n);    if (set)    {        n->set = (char*) xmalloc (strlen(set)+1);        strcpy (n->set, set);    }    else        n->set = 0;    n->type = type;    n->next = p->u.t.attr_list;    p->u.t.attr_list = n;        n->kind = CCL_RPN_ATTR_NUMERIC;    n->value.numeric = 0;    return n;}/* * add_attr_numeric: Add attribute (type/value) to RPN term node. * p:     RPN node of type term. * type:  Type of attribute * value: Value of attribute * set: Attribute set name */static void add_attr_numeric (struct ccl_rpn_node *p, const char *set,			      int type, int value){    struct ccl_rpn_attr *n;    n = add_attr_node(p, set, type);    n->kind = CCL_RPN_ATTR_NUMERIC;    n->value.numeric = value;}static void add_attr_string (struct ccl_rpn_node *p, const char *set,			     int type, char *value){    struct ccl_rpn_attr *n;    n = add_attr_node(p, set, type);    n->kind = CCL_RPN_ATTR_STRING;    n->value.str = xstrdup(value);}/* * search_term: Parse CCL search term.  * cclp:   CCL Parser * qa:     Qualifier attributes already applied. * term_list: tokens we accept as terms in context * multi:  whether we accept "multiple" tokens * return: pointer to node(s); NULL on error. */static struct ccl_rpn_node *search_term_x (CCL_parser cclp,                                           struct ccl_rpn_attr **qa,                                           int *term_list, int multi){    struct ccl_rpn_node *p_top = 0;    struct ccl_token *lookahead = cclp->look_token;    int and_list = 0;    int or_list = 0;    char *attset;    const char *truncation_aliases;    truncation_aliases =	ccl_qual_search_special(cclp->bibset, "truncation");    if (!truncation_aliases)	truncation_aliases = "?";    if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))        and_list = 1;    if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))        or_list = 1;    while (1)    {        struct ccl_rpn_node *p;        size_t no, i;        int no_spaces = 0;        int left_trunc = 0;        int right_trunc = 0;        int mid_trunc = 0;        int relation_value = -1;        int position_value = -1;        int structure_value = -1;        int truncation_value = -1;        int completeness_value = -1;        int len = 0;        size_t max = 200;        if (and_list || or_list || !multi)            max = 1;		/* ignore commas when dealing with and-lists .. */        if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)        {	    lookahead = lookahead->next;            ADVANCE;	    continue;        }        /* go through each TERM token. If no truncation attribute is yet           met, then look for left/right truncation markers (?) and           set left_trunc/right_trunc/mid_trunc accordingly */        for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)        {            for (i = 0; i<lookahead->len; i++)                if (lookahead->name[i] == ' ')		    no_spaces++;		else if (strchr(truncation_aliases, lookahead->name[i]))                {                    if (no == 0 && i == 0 && lookahead->len >= 1)                        left_trunc = 1;                    else if (!is_term_ok(lookahead->next->kind, term_list) &&                             i == lookahead->len-1 && i >= 1)                        right_trunc = 1;                    else                        mid_trunc = 1;                }            len += 1+lookahead->len;            lookahead = lookahead->next;        }        if (len == 0)            break;      /* no more terms . stop . */        if (p_top)        {            if (or_list)                p = mk_node (CCL_RPN_OR);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -