📄 ccltoken.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
/* * Copyright (c) 1995, the EUROPAGATE consortium (see below). * * The EUROPAGATE consortium members are: * *    University College Dublin *    Danmarks Teknologiske Videnscenter *    An Chomhairle Leabharlanna *    Consejo Superior de Investigaciones Cientificas * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, * provided that: * * 1. This copyright and permission notice appear in all copies of the * software and its documentation. Notices of copyright or attribution * which appear at the beginning of any file must remain unchanged. * * 2. The names of EUROPAGATE or the project partners may not be used to * endorse or promote products derived from this software without specific * prior written permission. * * 3. Users of this software (implementors and gateway operators) agree to * inform the EUROPAGATE consortium of their use of the software. This * information will be used to evaluate the EUROPAGATE project and the * software, and to plan further developments. The consortium may use * the information in later publications. *  * 4. Users of this software agree to make their best efforts, when * documenting their use of the software, to acknowledge the EUROPAGATE * consortium, and the role played by the software in their work. * * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND, * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE * USE OR PERFORMANCE OF THIS SOFTWARE. * *//* CCL - lexical analysis * Europagate, 1995 * * $Id: ccltoken.c,v 1.22 2003/02/14 18:49:23 adam Exp $ * * Old Europagate Log: * * Revision 1.10  1995/07/11  12:28:31  adam * New function: ccl_token_simple (split into simple tokens) and *  ccl_token_del (delete tokens). * * Revision 1.9  1995/05/16  09:39:28  adam * LICENSE. * * Revision 1.8  1995/05/11  14:03:57  adam * Changes in the reading of qualifier(s). New function: ccl_qual_fitem. * New variable ccl_case_sensitive, which controls whether reserved * words and field names are case sensitive or not. * * Revision 1.7  1995/04/19  12:11:24  adam * Minor change. * * Revision 1.6  1995/04/17  09:31:48  adam * Improved handling of qualifiers. Aliases or reserved words. * * Revision 1.5  1995/02/23  08:32:00  adam * Changed header. * * Revision 1.3  1995/02/15  17:42:16  adam * Minor changes of the api of this module. FILE* argument added * to ccl_pr_tree. * * Revision 1.2  1995/02/14  19:55:13  adam * Header files ccl.h/cclp.h are gone! They have been merged an * moved to ../include/ccl.h. * Node kind(s) in ccl_rpn_node have changed names. * * Revision 1.1  1995/02/13  12:35:21  adam * First version of CCL. Qualifiers aren't handled yet. * */#include <string.h>#include <stdlib.h>#include <ctype.h>#include <yaz/ccl.h>/* * token_cmp: Compare token with keyword(s) * kw:     Keyword list. Each keyword is separated by space. * token:  CCL token. * return: 1 if token string matches one of the keywords in list; *         0 otherwise. */static int token_cmp (CCL_parser cclp, const char *kw, struct ccl_token *token){    const char *cp1 = kw;    const char *cp2;    const char *aliases;    int case_sensitive = cclp->ccl_case_sensitive;    aliases = ccl_qual_search_special(cclp->bibset, "case");    if (aliases)	case_sensitive = atoi(aliases);    if (!kw)        return 0;    while ((cp2 = strchr (cp1, ' ')))    {        if (token->len == (size_t) (cp2-cp1))        {            if (case_sensitive)            {                if (!memcmp (cp1, token->name, token->len))                    return 1;            }            else            {                if (!ccl_memicmp (cp1, token->name, token->len))                    return 1;            }        }	cp1 = cp2+1;    }    if (case_sensitive)        return token->len == strlen(cp1)             && !memcmp (cp1, token->name, token->len);    return token->len == strlen(cp1) &&        !ccl_memicmp (cp1, token->name, token->len);}/* * ccl_token_simple: tokenize CCL raw tokens */struct ccl_token *ccl_token_simple (const char *command){    const char *cp = command;    struct ccl_token *first = NULL;    struct ccl_token *last = NULL;    while (1)    {	while (*cp && strchr (" \t\r\n", *cp))	{	    cp++;	    continue;	}	if (!first)	{	    first = last = (struct ccl_token *)xmalloc (sizeof (*first));	    ccl_assert (first);	    last->prev = NULL;	}	else	{	    last->next = (struct ccl_token *)xmalloc (sizeof(*first));	    ccl_assert (last->next);	    last->next->prev = last;	    last = last->next;	}	last->next = NULL;	last->name = cp;	last->len = 1;	switch (*cp++)	{        case '\0':            last->kind = CCL_TOK_EOL;            return first;	case '\"':	    last->kind = CCL_TOK_TERM;	    last->name = cp;	    last->len = 0;	    while (*cp && *cp != '\"')	    {		cp++;		++ last->len;	    }	    if (*cp == '\"')		cp++;	    break;	default:	    while (*cp && !strchr (" \t\n\r", *cp))	    {		cp++;		++ last->len;	    }            last->kind = CCL_TOK_TERM;	}    }    return first;}/* * ccl_tokenize: tokenize CCL command string. * return: CCL token list. */struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command){    const char *aliases;    const char *cp = command;    struct ccl_token *first = NULL;    struct ccl_token *last = NULL;    while (1)    {	while (*cp && strchr (" \t\r\n", *cp))	{	    cp++;	    continue;	}	if (!first)	{	    first = last = (struct ccl_token *)xmalloc (sizeof (*first));	    ccl_assert (first);	    last->prev = NULL;	}	else	{	    last->next = (struct ccl_token *)xmalloc (sizeof(*first));	    ccl_assert (last->next);	    last->next->prev = last;	    last = last->next;	}	last->next = NULL;	last->name = cp;	last->len = 1;	switch (*cp++)	{        case '\0':            last->kind = CCL_TOK_EOL;            return first;	case '(':	    last->kind = CCL_TOK_LP;	    break;	case ')':	    last->kind = CCL_TOK_RP;	    break;	case ',':	    last->kind = CCL_TOK_COMMA;	    break;	case '%':	case '!':	    last->kind = CCL_TOK_PROX;            while (isdigit(*cp))	    {		++ last->len;		cp++;	    }	    break;	case '>':	case '<':	case '=':	    if (*cp == '=' || *cp == '<' || *cp == '>')	    {		cp++;		last->kind = CCL_TOK_REL;		++ last->len;	    }	    else if (cp[-1] == '=')		last->kind = CCL_TOK_EQ;	    else		last->kind = CCL_TOK_REL;	    break;	case '\"':	    last->kind = CCL_TOK_TERM;	    last->name = cp;	    last->len = 0;	    while (*cp && *cp != '\"')	    {		cp++;		++ last->len;	    }	    if (*cp == '\"')		cp++;	    break;	default:	    if (!strchr ("(),%!><= \t\n\r", cp[-1]))	    {		while (*cp && !strchr ("(),%!><= \t\n\r", *cp))		{		    cp++;		    ++ last->len;		}	    }	    last->kind = CCL_TOK_TERM;	    aliases = ccl_qual_search_special(cclp->bibset, "and");	    if (!aliases)		aliases = cclp->ccl_token_and;	    if (token_cmp (cclp, aliases, last))	        last->kind = CCL_TOK_AND;	    aliases = ccl_qual_search_special(cclp->bibset, "or");	    if (!aliases)		aliases = cclp->ccl_token_or;	    if (token_cmp (cclp, aliases, last))	        last->kind = CCL_TOK_OR;	    aliases = ccl_qual_search_special(cclp->bibset, "not");	    if (!aliases)		aliases = cclp->ccl_token_not;            if (token_cmp (cclp, aliases, last))	        last->kind = CCL_TOK_NOT;	    aliases = ccl_qual_search_special(cclp->bibset, "set");	    if (!aliases)		aliases = cclp->ccl_token_set;	    if (token_cmp (cclp, aliases, last))	        last->kind = CCL_TOK_SET;	}    }    return first;}struct ccl_token *ccl_tokenize (const char *command){    CCL_parser cclp = ccl_parser_create ();    struct ccl_token *list;    list = ccl_parser_tokenize (cclp, command);    ccl_parser_destroy (cclp);    return list;}/* * ccl_token_del: delete CCL tokens */void ccl_token_del (struct ccl_token *list){    struct ccl_token *list1;    while (list)     {        list1 = list->next;        xfree (list);        list = list1;    }}char *ccl_strdup (const char *str){    int len = strlen(str);    char *p = (char*) xmalloc (len+1);    strcpy (p, str);    return p;}CCL_parser ccl_parser_create (void){    CCL_parser p = (CCL_parser)xmalloc (sizeof(*p));    if (!p)	return p;    p->look_token = NULL;    p->error_code = 0;    p->error_pos = NULL;    p->bibset = NULL;    p->ccl_token_and = ccl_strdup("and");    p->ccl_token_or = ccl_strdup("or");    p->ccl_token_not = ccl_strdup("not andnot");    p->ccl_token_set = ccl_strdup("set");    p->ccl_case_sensitive = 1;    return p;}void ccl_parser_destroy (CCL_parser p){    if (!p)	return;    xfree (p->ccl_token_and);    xfree (p->ccl_token_or);    xfree (p->ccl_token_not);    xfree (p->ccl_token_set);    xfree (p);}void ccl_parser_set_op_and (CCL_parser p, const char *op){    if (p && op)    {	if (p->ccl_token_and)	    xfree (p->ccl_token_and);	p->ccl_token_and = ccl_strdup (op);    }}void ccl_parser_set_op_or (CCL_parser p, const char *op){    if (p && op)    {	if (p->ccl_token_or)	    xfree (p->ccl_token_or);	p->ccl_token_or = ccl_strdup (op);    }}void ccl_parser_set_op_not (CCL_parser p, const char *op){    if (p && op)    {	if (p->ccl_token_not)	    xfree (p->ccl_token_not);	p->ccl_token_not = ccl_strdup (op);    }}void ccl_parser_set_op_set (CCL_parser p, const char *op){    if (p && op)    {	if (p->ccl_token_set)	    xfree (p->ccl_token_set);	p->ccl_token_set = ccl_strdup (op);    }}void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag){    if (p)	p->ccl_case_sensitive = case_sensitivity_flag;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -