⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cclfind.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
            else if (and_list)                p = mk_node (CCL_RPN_AND);            else                p = mk_node (CCL_RPN_AND);            p->u.p[0] = p_top;            p_top = p;        }                        /* create the term node, but wait a moment before adding the term */        p = mk_node (CCL_RPN_TERM);        p->u.t.attr_list = NULL;        p->u.t.term = NULL;        /* make the top node point to us.. */        if (p_top)            p_top->u.p[1] = p;        else            p_top = p;                /* go through all attributes and add them to the attribute list */        for (i=0; qa && qa[i]; i++)        {            struct ccl_rpn_attr *attr;                        for (attr = qa[i]; attr; attr = attr->next)		switch(attr->kind)		{		case CCL_RPN_ATTR_STRING:		    add_attr_string(p, attr->set, attr->type,				    attr->value.str);		    break;		case CCL_RPN_ATTR_NUMERIC:		    if (attr->value.numeric > 0)		    {   /* deal only with REAL attributes (positive) */			switch (attr->type)			{			case CCL_BIB1_REL:			    if (relation_value != -1)				continue;			    relation_value = attr->value.numeric;			    break;			case CCL_BIB1_POS:			    if (position_value != -1)				continue;			    position_value = attr->value.numeric;			    break;			case CCL_BIB1_STR:			    if (structure_value != -1)				continue;			    structure_value = attr->value.numeric;			    break;			case CCL_BIB1_TRU:			    if (truncation_value != -1)				continue;			    truncation_value = attr->value.numeric;			    left_trunc = right_trunc = mid_trunc = 0;			    break;			case CCL_BIB1_COM:			    if (completeness_value != -1)				continue;			    completeness_value = attr->value.numeric;			    break;			}			add_attr_numeric(p, attr->set, attr->type,					 attr->value.numeric);		    }		}        }        /* len now holds the number of characters in the RPN term */        /* no holds the number of CCL tokens (1 or more) */                if (structure_value == -1 &&             qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))        {   /* no structure attribute met. Apply either structure attribute                WORD or PHRASE depending on number of CCL tokens */            if (no == 1 && no_spaces == 0)                add_attr_numeric (p, attset, CCL_BIB1_STR, 2);            else                add_attr_numeric (p, attset, CCL_BIB1_STR, 1);        }                /* make the RPN token */        p->u.t.term = (char *)xmalloc (len);        ccl_assert (p->u.t.term);        p->u.t.term[0] = '\0';        for (i = 0; i<no; i++)        {            const char *src_str = cclp->look_token->name;            int src_len = cclp->look_token->len;                        if (i == 0 && left_trunc)            {                src_len--;                src_str++;            }            else if (i == no-1 && right_trunc)                src_len--;            if (src_len)            {                int len = strlen(p->u.t.term);                if (len &&                    !strchr("-+", *src_str) &&                    !strchr("-+", p->u.t.term[len-1]))                {                    strcat (p->u.t.term, " ");                }            }            strxcat (p->u.t.term, src_str, src_len);            ADVANCE;        }        if (left_trunc && right_trunc)        {            if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,                                &attset))            {                cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;                ccl_rpn_delete (p);                return NULL;            }            add_attr_numeric (p, attset, CCL_BIB1_TRU, 3);        }        else if (right_trunc)        {            if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,                                 &attset))            {                cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;                ccl_rpn_delete (p);                return NULL;            }            add_attr_numeric (p, attset, CCL_BIB1_TRU, 1);        }        else if (left_trunc)        {            if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,                                &attset))            {                cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;                ccl_rpn_delete (p);                return NULL;            }            add_attr_numeric (p, attset, CCL_BIB1_TRU, 2);        }        else        {            if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,                               &attset))                add_attr_numeric (p, attset, CCL_BIB1_TRU, 100);        }        if (!multi)            break;    }    if (!p_top)        cclp->error_code = CCL_ERR_TERM_EXPECTED;    return p_top;}static struct ccl_rpn_node *search_term (CCL_parser cclp,                                         struct ccl_rpn_attr **qa){    static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};    return search_term_x(cclp, qa, list, 0);}static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,                                         struct ccl_rpn_attr **ap){    char *attset;    int rel;    if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset))    {                        /* unordered relation */        struct ccl_rpn_node *p;        if (KIND != CCL_TOK_EQ)        {            cclp->error_code = CCL_ERR_EQ_EXPECTED;            return NULL;        }        ADVANCE;        if (KIND == CCL_TOK_LP)        {            ADVANCE;            if (!(p = find_spec (cclp, ap)))            {                return NULL;            }            if (KIND != CCL_TOK_RP)            {                cclp->error_code = CCL_ERR_RP_EXPECTED;                ccl_rpn_delete (p);                return NULL;            }            ADVANCE;        }        else            p = search_terms (cclp, ap);        return p;    }    /* ordered relation ... */    rel = 0;    if (cclp->look_token->len == 1)    {        if (cclp->look_token->name[0] == '<')            rel = 1;        else if (cclp->look_token->name[0] == '=')            rel = 3;        else if (cclp->look_token->name[0] == '>')            rel = 5;    }    else if (cclp->look_token->len == 2)    {        if (!memcmp (cclp->look_token->name, "<=", 2))            rel = 2;        else if (!memcmp (cclp->look_token->name, ">=", 2))            rel = 4;        else if (!memcmp (cclp->look_token->name, "<>", 2))            rel = 6;    }    if (!rel)        cclp->error_code = CCL_ERR_BAD_RELATION;    else    {        struct ccl_rpn_node *p;                ADVANCE;                      /* skip relation */        if (KIND == CCL_TOK_TERM &&            cclp->look_token->next && cclp->look_token->next->len == 1 &&            cclp->look_token->next->name[0] == '-')        {            struct ccl_rpn_node *p1;            if (!(p1 = search_term (cclp, ap)))                return NULL;            ADVANCE;                   /* skip '-' */            if (KIND == CCL_TOK_TERM)  /* = term - term  ? */            {                struct ccl_rpn_node *p2;                                if (!(p2 = search_term (cclp, ap)))                {                    ccl_rpn_delete (p1);                    return NULL;                }                p = mk_node (CCL_RPN_AND);                p->u.p[0] = p1;                add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);                p->u.p[1] = p2;                add_attr_numeric (p2, attset, CCL_BIB1_REL, 2);                return p;            }            else                       /* = term -    */            {                add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);                return p1;            }        }        else if (cclp->look_token->len == 1 &&                 cclp->look_token->name[0] == '-')   /* = - term  ? */        {            ADVANCE;            if (!(p = search_term (cclp, ap)))                return NULL;            add_attr_numeric (p, attset, CCL_BIB1_REL, 2);            return p;        }        else if (KIND == CCL_TOK_LP)        {            ADVANCE;            if (!(p = find_spec (cclp, ap)))                return NULL;            if (KIND != CCL_TOK_RP)            {                cclp->error_code = CCL_ERR_RP_EXPECTED;                ccl_rpn_delete (p);                return NULL;            }            ADVANCE;            return p;        }        else        {            if (!(p = search_terms (cclp, ap)))                return NULL;            add_attr_numeric (p, attset, CCL_BIB1_REL, rel);            return p;        }        cclp->error_code = CCL_ERR_TERM_EXPECTED;    }    return NULL;}/* * qualifiers1: Parse CCL qualifiers and search terms.  * cclp:   CCL Parser * la:     Token pointer to RELATION token. * qa:     Qualifier attributes already applied. * return: pointer to node(s); NULL on error. */static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,                                         struct ccl_rpn_attr **qa){    struct ccl_token *lookahead = cclp->look_token;    struct ccl_token *look_start = cclp->look_token;    struct ccl_rpn_attr **ap;    struct ccl_rpn_node *node = 0;    const char *field_str;    int no = 0;    int seq = 0;    int i;    int mode_merge = 1;#if 0    if (qa)    {        cclp->error_code = CCL_ERR_DOUBLE_QUAL;        return NULL;    }#endif    for (lookahead = cclp->look_token; lookahead != la;         lookahead=lookahead->next)        no++;    if (qa)        for (i=0; qa[i]; i++)            no++;    ap = (struct ccl_rpn_attr **)xmalloc ((no ? (no+1) : 2) * sizeof(*ap));    ccl_assert (ap);    field_str = ccl_qual_search_special(cclp->bibset, "field");    if (field_str)    {        if (!strcmp (field_str, "or"))            mode_merge = 0;        else if (!strcmp (field_str, "merge"))            mode_merge = 1;    }    if (!mode_merge)    {        /* consider each field separately and OR */        lookahead = look_start;        while (lookahead != la)        {            ap[1] = 0;            seq = 0;            while ((ap[0] = ccl_qual_search (cclp, lookahead->name,                                             lookahead->len, seq)) != 0)            {                struct ccl_rpn_node *node_sub;                cclp->look_token = la;                                node_sub = qualifiers2(cclp, ap);                if (!node_sub)                {                    ccl_rpn_delete (node);                    xfree (ap);                    return 0;                }                if (node)                {                    struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);                    node_this->u.p[0] = node;                    node_this->u.p[1] = node_sub;                    node = node_this;                }                else                    node = node_sub;                seq++;            }            if (seq == 0)            {                cclp->look_token = lookahead;                cclp->error_code = CCL_ERR_UNKNOWN_QUAL;                xfree (ap);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -