⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pquery.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 1995-2003, Index Data. * See the file LICENSE for details. * * $Id: pquery.c,v 1.20 2003/01/06 08:20:29 adam Exp $ */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <ctype.h>#include <yaz/proto.h>#include <yaz/oid.h>#include <yaz/pquery.h>static oid_value p_query_dfset = VAL_NONE;struct yaz_pqf_parser {    const char *query_buf;    const char *query_ptr;    const char *lex_buf;    size_t lex_len;    int query_look;    char *left_sep;    char *right_sep;    int escape_char;    int term_type;    int external_type;    int error;};static Z_RPNStructure *rpn_structure (struct yaz_pqf_parser *li, ODR o,                                      oid_proto,                                       int num_attr, int max_attr,                                       int *attr_list, char **attr_clist,				      oid_value *attr_set);static enum oid_value query_oid_getvalbyname (struct yaz_pqf_parser *li){    enum oid_value value;    char buf[32];    if (li->lex_len > 31)        return VAL_NONE;    memcpy (buf, li->lex_buf, li->lex_len);    buf[li->lex_len] = '\0';    value = oid_getvalbyname (buf);    return value;}static int compare_term (struct yaz_pqf_parser *li, const char *src,                         size_t off){    size_t len=strlen(src);        if (li->lex_len == len+off && !memcmp (li->lex_buf+off, src, len-off))	return 1;    return 0;}static int query_token (struct yaz_pqf_parser *li){    int sep_char = ' ';    const char *sep_match;    const char **qptr = &li->query_ptr;    while (**qptr == ' ')        (*qptr)++;    if (**qptr == '\0')        return 0;    li->lex_len = 0;    if ((sep_match = strchr (li->left_sep, **qptr)))    {	sep_char = li->right_sep[sep_match - li->left_sep];        ++(*qptr);    }    li->lex_buf = *qptr;       if (**qptr == li->escape_char && isdigit ((*qptr)[1]))    {	++(li->lex_len);	++(*qptr);	return 'l';    }    while (**qptr && **qptr != sep_char)    {	if (**qptr == '\\')	{	    ++(li->lex_len);	    ++(*qptr);	}	++(li->lex_len);	++(*qptr);    }    if (**qptr)	++(*qptr);    if (sep_char == ' ' &&        li->lex_len >= 1 && li->lex_buf[0] == li->escape_char)    {	if (compare_term (li, "and", 1))	    return 'a';        if (compare_term (li, "or", 1))            return 'o';        if (compare_term (li, "not", 1))            return 'n';        if (compare_term (li, "attr", 1))            return 'l';        if (compare_term (li, "set", 1))            return 's';        if (compare_term (li, "attrset", 1))            return 'r';        if (compare_term (li, "prox", 1))            return 'p';        if (compare_term (li, "term", 1))            return 'y';    }    return 't';}static int lex (struct yaz_pqf_parser *li){    return li->query_look = query_token (li);}static int escape_string(char *out_buf, const char *in, int len){    char *out = out_buf;    while (--len >= 0)	if (*in == '\\' && len > 0)	{	    --len;	    switch (*++in)	    {	    case 't':		*out++ = '\t';		break;	    case 'n':		*out++ = '\n';		break;	    case 'r':		*out++ = '\r';		break;	    case 'f':		*out++ = '\f';		break;	    case 'x':		if (len > 1)		{		    char s[4];		    int n = 0;		    s[0] = *++in;		    s[1] = *++in;		    s[2] = '\0';		    len = len - 2;		    sscanf (s, "%x", &n);		    *out++ = n;		}		break;	    case '0':	    case '1':	    case '2':	    case '3':		if (len > 1)		{		    char s[4];		    int n = 0;		    s[0] = *in;		    s[1] = *++in;		    		    s[2] = *++in;		    s[3] = '\0';		    len = len - 2;		    sscanf (s, "%o", &n);		    *out++ = n;		}		break;	    default:		*out++ = *in;		break;	    }	    in++;	}	else	    *out++ = *in++;    return out - out_buf;}static int p_query_parse_attr(struct yaz_pqf_parser *li, ODR o,			      int num_attr, int *attr_list,			      char **attr_clist, oid_value *attr_set){    const char *cp;    if (!(cp = strchr (li->lex_buf, '=')) ||	(size_t) (cp-li->lex_buf) > li->lex_len)    {	attr_set[num_attr] = query_oid_getvalbyname (li);	if (attr_set[num_attr] == VAL_NONE)        {            li->error = YAZ_PQF_ERROR_ATTSET;	    return 0;        }	if (!lex (li))        {            li->error = YAZ_PQF_ERROR_MISSING;            return 0;        }	if (!(cp = strchr (li->lex_buf, '=')))        {            li->error = YAZ_PQF_ERROR_BADATTR;	    return 0;        }    }    else     {	if (num_attr > 0)	    attr_set[num_attr] = attr_set[num_attr-1];	else	    attr_set[num_attr] = VAL_NONE;    }    attr_list[2*num_attr] = atoi(li->lex_buf);	cp++;    if (*cp >= '0' && *cp <= '9')    {	attr_list[2*num_attr+1] = atoi (cp);	attr_clist[num_attr] = 0;    }    else    {	int len = li->lex_len - (cp - li->lex_buf);	attr_list[2*num_attr+1] = 0;	attr_clist[num_attr] = (char *) odr_malloc (o, len+1);	len = escape_string(attr_clist[num_attr], cp, len);	attr_clist[num_attr][len] = '\0';    }    return 1;}static Z_AttributesPlusTerm *rpn_term (struct yaz_pqf_parser *li, ODR o,                                       oid_proto proto,                                        int num_attr, int *attr_list,				       char **attr_clist, oid_value *attr_set){    Z_AttributesPlusTerm *zapt;    Odr_oct *term_octet;    Z_Term *term;    Z_AttributeElement **elements;    zapt = (Z_AttributesPlusTerm *)odr_malloc (o, sizeof(*zapt));    term_octet = (Odr_oct *)odr_malloc (o, sizeof(*term_octet));    term = (Z_Term *)odr_malloc (o, sizeof(*term));    if (!num_attr)        elements = (Z_AttributeElement**)odr_nullval();    else    {        int i, k = 0;        int *attr_tmp;        elements = (Z_AttributeElement**)	    odr_malloc (o, num_attr * sizeof(*elements));        attr_tmp = (int *)odr_malloc (o, num_attr * 2 * sizeof(int));        memcpy (attr_tmp, attr_list, num_attr * 2 * sizeof(int));        for (i = num_attr; --i >= 0; )        {            int j;            for (j = i+1; j<num_attr; j++)                if (attr_tmp[2*j] == attr_tmp[2*i])                    break;            if (j < num_attr)                continue;            elements[k] =                (Z_AttributeElement*)odr_malloc (o,sizeof(**elements));            elements[k]->attributeType = &attr_tmp[2*i];	    elements[k]->attributeSet =		yaz_oidval_to_z3950oid(o, CLASS_ATTSET, attr_set[i]);	    if (attr_clist[i])	    {		elements[k]->which = Z_AttributeValue_complex;		elements[k]->value.complex = (Z_ComplexAttribute *)		    odr_malloc (o, sizeof(Z_ComplexAttribute));		elements[k]->value.complex->num_list = 1;		elements[k]->value.complex->list =		    (Z_StringOrNumeric **)		    odr_malloc (o, 1 * sizeof(Z_StringOrNumeric *));		elements[k]->value.complex->list[0] =		    (Z_StringOrNumeric *)		    odr_malloc (o, sizeof(Z_StringOrNumeric));		elements[k]->value.complex->list[0]->which =		    Z_StringOrNumeric_string;		elements[k]->value.complex->list[0]->u.string =		    attr_clist[i];		elements[k]->value.complex->semanticAction = (int **)		    odr_nullval();		elements[k]->value.complex->num_semanticAction = 0;	    }	    else	    {		elements[k]->which = Z_AttributeValue_numeric;		elements[k]->value.numeric = &attr_tmp[2*i+1];	    }            k++;        }        num_attr = k;    }    zapt->attributes = (Z_AttributeList *)	odr_malloc (o, sizeof(*zapt->attributes));    zapt->attributes->num_attributes = num_attr;    zapt->attributes->attributes = elements;    zapt->term = term;    term_octet->buf = (unsigned char *)odr_malloc (o, 1 + li->lex_len);    term_octet->size = term_octet->len =        escape_string ((char *) (term_octet->buf), li->lex_buf, li->lex_len);    term_octet->buf[term_octet->size] = 0;  /* null terminate */        switch (li->term_type)    {    case Z_Term_general:        term->which = Z_Term_general;        term->u.general = term_octet;        break;    case Z_Term_characterString:        term->which = Z_Term_characterString;        term->u.characterString = (char*) term_octet->buf;                                     /* null terminated above */        break;    case Z_Term_numeric:        term->which = Z_Term_numeric;        term->u.numeric = odr_intdup (o, atoi((char*) (term_octet->buf)));        break;    case Z_Term_null:        term->which = Z_Term_null;        term->u.null = odr_nullval();        break;    case Z_Term_external:        term->which = Z_Term_external;	term->u.external = 0;	break;    default:        term->which = Z_Term_null;        term->u.null = odr_nullval();        break;    }    return zapt;}static Z_Operand *rpn_simple (struct yaz_pqf_parser *li, ODR o, oid_proto proto,                              int num_attr, int *attr_list, char **attr_clist,                              oid_value *attr_set){    Z_Operand *zo;    zo = (Z_Operand *)odr_malloc (o, sizeof(*zo));    switch (li->query_look)    {    case 't':        zo->which = Z_Operand_APT;        if (!(zo->u.attributesPlusTerm =              rpn_term (li, o, proto, num_attr, attr_list, attr_clist,			attr_set)))            return 0;        lex (li);        break;    case 's':        lex (li);        if (!li->query_look)        {            li->error = YAZ_PQF_ERROR_MISSING;            return 0;        }        zo->which = Z_Operand_resultSetId;        zo->u.resultSetId = (char *)odr_malloc (o, li->lex_len+1);        memcpy (zo->u.resultSetId, li->lex_buf, li->lex_len);        zo->u.resultSetId[li->lex_len] = '\0';        lex (li);        break;    default:        /* we're only called if one of the above types are seens so           this shouldn't happen */        li->error = YAZ_PQF_ERROR_INTERNAL;        return 0;    }    return zo;}static Z_ProximityOperator *rpn_proximity (struct yaz_pqf_parser *li, ODR o){    Z_ProximityOperator *p = (Z_ProximityOperator *)odr_malloc (o, sizeof(*p));    if (!lex (li))    {        li->error = YAZ_PQF_ERROR_MISSING;        return NULL;    }    if (*li->lex_buf == '1')    {        p->exclusion = (int *)odr_malloc (o, sizeof(*p->exclusion));        *p->exclusion = 1;    }     else if (*li->lex_buf == '0')    {        p->exclusion = (int *)odr_malloc (o, sizeof(*p->exclusion));        *p->exclusion = 0;    }    else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -