⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zrpn.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
                    errCode = 121;                continue;            }        }        if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))        {            zh->errCode = 109; /* Database unavailable */            zh->errString = basenames[base_no];            return -1;        }        for (local_attr = attp.local_attributes; local_attr;             local_attr = local_attr->next)        {            int ord;	    char ord_buf[32];	    int i, ord_len;            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,                                          local_attr->local);            if (ord < 0)                continue;            if (prefix_len)                term_dict[prefix_len++] = '|';            else                term_dict[prefix_len++] = '(';	    ord_len = key_SU_encode (ord, ord_buf);	    for (i = 0; i<ord_len; i++)	    {		term_dict[prefix_len++] = 1;		term_dict[prefix_len++] = ord_buf[i];	    }        }        if (!prefix_len)        {            char val_str[32];            sprintf (val_str, "%d", use_value);            errCode = 114;            errString = nmem_strdup (stream, val_str);            continue;        }	bases_ok++;        term_dict[prefix_len++] = ')';                term_dict[prefix_len++] = 1;        term_dict[prefix_len++] = reg_type;	logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);        term_dict[prefix_len] = '\0';        if (!numeric_relation (zh, zapt, &termp, term_dict,			       attributeSet, grep_info, &max_pos, reg_type,			       term_dst))	    return 0;    }    if (!bases_ok)    {	zh->errCode = errCode;	zh->errString = errString;	return -1;    }    *term_sub = termp;    logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);    return 1;}static RSET rpn_search_APT_numeric (ZebraHandle zh,				    Z_AttributesPlusTerm *zapt,				    const char *termz,				    oid_value attributeSet,				    NMEM stream,				    int reg_type, int complete_flag,				    const char *rank_type, int xpath_use,				    int num_bases, char **basenames){    char term_dst[IT_MAX_WORD+1];    const char *termp = termz;    RSET rset[60], result;    int i, r, rset_no = 0;    struct grep_info grep_info;    if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))	return 0;    while (1)    { 	logf (LOG_DEBUG, "APT_numeric termp=%s", termp);	grep_info.isam_p_indx = 0;        r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,			  reg_type, complete_flag, num_bases, basenames,			  term_dst, xpath_use,			  stream);        if (r < 1)            break;	logf (LOG_DEBUG, "term: %s", term_dst);        rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,                                    grep_info.isam_p_indx, term_dst,				    strlen(term_dst), rank_type,                                    0 /* preserve position */,                                    zapt->term->which);        assert (rset[rset_no]);        if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))            break;    }    grep_info_delete (&grep_info);    if (rset_no == 0)    {	rset_null_parms parms;		parms.rset_term = rset_term_create (term_dst, -1, rank_type,                                            zapt->term->which);        return rset_create (rset_kind_null, &parms);    }    result = rset[0];    for (i = 1; i<rset_no; i++)    {        rset_bool_parms bool_parms;        bool_parms.rset_l = result;        bool_parms.rset_r = rset[i];        bool_parms.key_size = sizeof(struct it_key);	bool_parms.cmp = key_compare_it;        result = rset_create (rset_kind_and, &bool_parms);    }    return result;}static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,				  const char *termz,                                  oid_value attributeSet,				  NMEM stream,				  const char *rank_type){    RSET result;    RSFD rsfd;    struct it_key key;    rset_temp_parms parms;    parms.rset_term = rset_term_create (termz, -1, rank_type,                                        zapt->term->which);    parms.cmp = key_compare_it;    parms.key_size = sizeof (struct it_key);    parms.temp_path = res_get (zh->res, "setTmpDir");    result = rset_create (rset_kind_temp, &parms);    rsfd = rset_open (result, RSETF_WRITE);    key.sysno = atoi (termz);    key.seqno = 1;    if (key.sysno <= 0)        key.sysno = 1;    rset_write (result, rsfd, &key);    rset_close (result, rsfd);    return result;}static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,			   oid_value attributeSet, NMEM stream,			   Z_SortKeySpecList *sort_sequence,			   const char *rank_type){    rset_null_parms parms;        int i;    int sort_relation_value;    AttrType sort_relation_type;    int use_value;    AttrType use_type;    Z_SortKeySpec *sks;    Z_SortKey *sk;    Z_AttributeElement *ae;    int oid[OID_SIZE];    oident oe;    char termz[20];        attr_init (&sort_relation_type, zapt, 7);    sort_relation_value = attr_find (&sort_relation_type, &attributeSet);    attr_init (&use_type, zapt, 1);    use_value = attr_find (&use_type, &attributeSet);    if (!sort_sequence->specs)    {	sort_sequence->num_specs = 10;	sort_sequence->specs = (Z_SortKeySpec **)	    nmem_malloc (stream, sort_sequence->num_specs *			 sizeof(*sort_sequence->specs));	for (i = 0; i<sort_sequence->num_specs; i++)	    sort_sequence->specs[i] = 0;    }    if (zapt->term->which != Z_Term_general)	i = 0;    else	i = atoi_n ((char *) zapt->term->u.general->buf,		    zapt->term->u.general->len);    if (i >= sort_sequence->num_specs)	i = 0;    sprintf (termz, "%d", i);    oe.proto = PROTO_Z3950;    oe.oclass = CLASS_ATTSET;    oe.value = attributeSet;    if (!oid_ent_to_oid (&oe, oid))	return 0;    sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));    sks->sortElement = (Z_SortElement *)	nmem_malloc (stream, sizeof(*sks->sortElement));    sks->sortElement->which = Z_SortElement_generic;    sk = sks->sortElement->u.generic = (Z_SortKey *)	nmem_malloc (stream, sizeof(*sk));    sk->which = Z_SortKey_sortAttributes;    sk->u.sortAttributes = (Z_SortAttributes *)	nmem_malloc (stream, sizeof(*sk->u.sortAttributes));    sk->u.sortAttributes->id = oid;    sk->u.sortAttributes->list = (Z_AttributeList *)	nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));    sk->u.sortAttributes->list->num_attributes = 1;    sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)	nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));    ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)	nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));    ae->attributeSet = 0;    ae->attributeType =	(int *)	nmem_malloc (stream, sizeof(*ae->attributeType));    *ae->attributeType = 1;    ae->which = Z_AttributeValue_numeric;    ae->value.numeric = (int *)	nmem_malloc (stream, sizeof(*ae->value.numeric));    *ae->value.numeric = use_value;    sks->sortRelation = (int *)	nmem_malloc (stream, sizeof(*sks->sortRelation));    if (sort_relation_value == 1)	*sks->sortRelation = Z_SortRelation_ascending;    else if (sort_relation_value == 2)	*sks->sortRelation = Z_SortRelation_descending;    else 	*sks->sortRelation = Z_SortRelation_ascending;    sks->caseSensitivity = (int *)	nmem_malloc (stream, sizeof(*sks->caseSensitivity));    *sks->caseSensitivity = 0;    sks->which = Z_SortKeySpec_null;    sks->u.null = odr_nullval ();    sort_sequence->specs[i] = sks;    parms.rset_term = rset_term_create (termz, -1, rank_type,                                        zapt->term->which);    return rset_create (rset_kind_null, &parms);}/* pop - moved to xpath.c */#if 0struct xpath_predicate {    int which;    union {#define XPATH_PREDICATE_RELATION 1        struct {            char *name;            char *op;            char *value;        } relation;#define XPATH_PREDICATE_BOOLEAN 2        struct {            const char *op;            struct xpath_predicate *left;            struct xpath_predicate *right;        } boolean;    } u;};struct xpath_location_step {    char *part;    struct xpath_predicate *predicate;};#endifstatic int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,                       oid_value attributeSet,                       struct xpath_location_step *xpath, int max, NMEM mem){    oid_value curAttributeSet = attributeSet;    AttrType use;    const char *use_string = 0;        attr_init (&use, zapt, 1);    attr_find_ex (&use, &curAttributeSet, &use_string);    if (!use_string || *use_string != '/')        return -1;    return zebra_parse_xpath_str(use_string, xpath, max, mem);}                static RSET xpath_trunc(ZebraHandle zh, NMEM stream,                        int reg_type, const char *term, int use,                        oid_value curAttributeSet){    RSET rset;    struct grep_info grep_info;    char term_dict[2048];    char ord_buf[32];    int prefix_len = 0;    int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);    int ord_len, i, r, max_pos;    int term_type = Z_Term_characterString;    const char *flags = "void";    if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))    {	rset_null_parms parms;		parms.rset_term = rset_term_create (term, strlen(term),					    flags, term_type);	parms.rset_term->nn = 0;	return rset_create (rset_kind_null, &parms);    }    if (ord < 0)    {	rset_null_parms parms;		parms.rset_term = rset_term_create (term, strlen(term),					    flags, term_type);	parms.rset_term->nn = 0;	return rset_create (rset_kind_null, &parms);    }    if (prefix_len)        term_dict[prefix_len++] = '|';    else        term_dict[prefix_len++] = '(';        ord_len = key_SU_encode (ord, ord_buf);    for (i = 0; i<ord_len; i++)    {        term_dict[prefix_len++] = 1;        term_dict[prefix_len++] = ord_buf[i];    }    term_dict[prefix_len++] = ')';    term_dict[prefix_len++] = 1;    term_dict[prefix_len++] = reg_type;        strcpy (term_dict+prefix_len, term);        grep_info.isam_p_indx = 0;    r = dict_lookup_grep (zh->reg->dict, term_dict, 0,                          &grep_info, &max_pos, 0, grep_handle);    yaz_log (LOG_LOG, "%s %d positions", term,             grep_info.isam_p_indx);    rset = rset_trunc (zh, grep_info.isam_p_buf,                       grep_info.isam_p_indx, term, strlen(term),                       flags, 1, term_type);    grep_info_delete (&grep_info);    return rset;}static RSET rpn_search_xpath (ZebraHandle zh,                              oid_value attributeSet,                              int num_bases, char **basenames,                              NMEM stream, const char *rank_type, RSET rset,                              int xpath_len, struct xpath_location_step *xpath){    oid_value curAttributeSet = attributeSet;    int base_no;    int i;    if (xpath_len < 0)        return rset;    yaz_log (LOG_LOG, "len=%d", xpath_len);    for (i = 0; i<xpath_len; i++)    {        yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);    }    curAttributeSet = VAL_IDXPATH;    /*      //a    ->    a/.*      //a/b  ->    b/a/.*      /a     ->    a/      /a/b   ->    b/a/      /      ->    none   a[@attr=value]/b[@other=othervalue] /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/) /a/b val       range(b/a/,freetext(w,1016,val),b/a/) /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y) /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y) /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)          */    dict_grep_cmap (zh->reg->dict, 0, 0);    for (base_no = 0; base_no < num_bases; base_no++)    {        int level = xpath_len;        int first_path = 1;                if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))        {            zh->errCode = 109; /* Database unavailable */            zh->errString = basenames[base_no];            return rset;        }        while (--level >= 0)        {            char xpath_rev[128];            int i, len;            rset_between_parms parms;            RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;            *xpath_rev = 0;            len = 0;            for (i = level; i >= 1; --i)            {                const char *cp = xpath[i].part;                if (*cp)                {                    for (;*cp; cp++)                        if (*cp == '*')                        {                            memcpy (xpath_rev + len, "[^/]*", 5);                            len += 5;                        }                        else if (*cp == ' ')                        {                            xpath_rev[len++] = 1;                            xpath_rev[len++] = ' ';                        }                        else                            xpath_rev[len++] = *cp;                    xpath_rev[len++] = '/';                }                else if (i == 1)  /* // case */                {                    xpath_rev[len++] = '.';                    xpath_rev[len++] = '*';                }            }            xpath_rev[len] = 0;            if (xpath[level].predicate &&                xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&                xpath[level].predicate->u.relation.name[0])            {                char predicate_str[128];                strcpy (predicate_str,                        xpath[level].predicate->u.relation.name+1);                if (xpath[level].predicate->u.relation.value)                {                    strcat (predicate_str, "=");                    strcat (predicate_str,                            xpath[level].predicate->u.relation.value);                }                rset_attr = xpat

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -