⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zebramap.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
    int major;    int minor;    Z_AttributeElement **attributeList;    int num_attributes;} AttrType;static int attr_find (AttrType *src, oid_value *attributeSetP){    while (src->major < src->num_attributes)    {        Z_AttributeElement *element;        element = src->attributeList[src->major];        if (src->type == *element->attributeType)        {            switch (element->which)             {            case Z_AttributeValue_numeric:                ++(src->major);                if (element->attributeSet && attributeSetP)                {                    oident *attrset;                    attrset = oid_getentbyoid (element->attributeSet);                    *attributeSetP = attrset->value;                }                return *element->value.numeric;                break;            case Z_AttributeValue_complex:                if (src->minor >= element->value.complex->num_list ||                    element->value.complex->list[src->minor]->which !=                      Z_StringOrNumeric_numeric)                    break;                ++(src->minor);                if (element->attributeSet && attributeSetP)                {                    oident *attrset;                    attrset = oid_getentbyoid (element->attributeSet);                    *attributeSetP = attrset->value;                }                return *element->value.complex->list[src->minor-1]->u.numeric;            default:                assert (0);            }        }        ++(src->major);    }    return -1;}static void attr_init_APT (AttrType *src, Z_AttributesPlusTerm *zapt, int type){    src->attributeList = zapt->attributes->attributes;    src->num_attributes = zapt->attributes->num_attributes;    src->type = type;    src->major = 0;    src->minor = 0;}static void attr_init_AttrList (AttrType *src, Z_AttributeList *list, int type){    src->attributeList = list->attributes;    src->num_attributes = list->num_attributes;    src->type = type;    src->major = 0;    src->minor = 0;}/* ------------------------------------ */int zebra_maps_is_complete (ZebraMaps zms, unsigned reg_id){     struct zebra_map *zm = zebra_map_get (zms, reg_id);    if (zm)	return zm->completeness;    return 0;}int zebra_maps_is_positioned (ZebraMaps zms, unsigned reg_id){    struct zebra_map *zm = zebra_map_get (zms, reg_id);    if (zm)	return zm->positioned;    return 0;}    int zebra_maps_is_sort (ZebraMaps zms, unsigned reg_id){    struct zebra_map *zm = zebra_map_get (zms, reg_id);    if (zm)	return zm->type == ZEBRA_MAP_TYPE_SORT;    return 0;}int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes,                     int *numerical){    AttrType use;    AttrType structure;    int structure_value;    attr_init_AttrList (&use, sortAttributes->list, 1);    attr_init_AttrList (&structure, sortAttributes->list, 4);    *numerical = 0;    structure_value = attr_find (&structure, 0);    if (structure_value == 109)        *numerical = 1;    return attr_find (&use, NULL);}int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt,		     unsigned *reg_id, char **search_type, char *rank_type,		     int *complete_flag, int *sort_flag){    AttrType completeness;    AttrType structure;    AttrType relation;    AttrType sort_relation;    AttrType weight;    AttrType use;    int completeness_value;    int structure_value;    int relation_value;    int sort_relation_value;    int weight_value;    int use_value;    attr_init_APT (&structure, zapt, 4);    attr_init_APT (&completeness, zapt, 6);    attr_init_APT (&relation, zapt, 2);    attr_init_APT (&sort_relation, zapt, 7);    attr_init_APT (&weight, zapt, 9);    attr_init_APT (&use, zapt, 1);    completeness_value = attr_find (&completeness, NULL);    structure_value = attr_find (&structure, NULL);    relation_value = attr_find (&relation, NULL);    sort_relation_value = attr_find (&sort_relation, NULL);    weight_value = attr_find (&weight, NULL);    use_value = attr_find(&use, NULL);    if (completeness_value == 2 || completeness_value == 3)	*complete_flag = 1;    else	*complete_flag = 0;    *reg_id = 0;    *sort_flag = (sort_relation_value > 0) ? 1 : 0;    *search_type = "phrase";    strcpy (rank_type, "void");    if (relation_value == 102)    {        if (weight_value == -1)            weight_value = 34;        sprintf (rank_type, "rank,w=%d,u=%d", weight_value, use_value);    }    if (relation_value == 103)    {        *search_type = "always";        return 0;    }    if (*complete_flag)	*reg_id = 'p';    else	*reg_id = 'w';    switch (structure_value)    {    case 6:   /* word list */	*search_type = "and-list";	break;    case 105: /* free-form-text */	*search_type = "or-list";	break;    case 106: /* document-text */        *search_type = "or-list";	break;	    case -1:    case 1:   /* phrase */    case 2:   /* word */    case 108: /* string */ 	*search_type = "phrase";	break;    case 107: /* local-number */	*search_type = "local";	*reg_id = 0;	break;    case 109: /* numeric string */	*reg_id = 'n';	*search_type = "numeric";        break;    case 104: /* urx */	*reg_id = 'u';	*search_type = "phrase";	break;    case 3:   /* key */        *reg_id = '0';        *search_type = "phrase";        break;    case 4:  /* year */        *reg_id = 'y';        *search_type = "phrase";        break;    case 5:  /* date */        *reg_id = 'd';        *search_type = "phrase";        break;    default:	return -1;    }    return 0;}int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list,		      const char *input_str, int input_len, WRBUF wrbuf);WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list,		    const char *input_str, int input_len){    struct zebra_map *zm = zebra_map_get (zms, reg_id);    wrbuf_rewind(zms->wrbuf_1);    wrbuf_write(zms->wrbuf_1, input_str, input_len);    if (!zm || !zm->replace_tokens)	return zms->wrbuf_1;  #if 0    logf (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1),	  wrbuf_buf(zms->wrbuf_1));#endif    for (;;)    {	if (!zebra_replace_sub(zms, reg_id, ex_list, wrbuf_buf(zms->wrbuf_1),			       wrbuf_len(zms->wrbuf_1), zms->wrbuf_2))	    return zms->wrbuf_2;	if (!zebra_replace_sub(zms, reg_id, ex_list, wrbuf_buf(zms->wrbuf_2),			       wrbuf_len(zms->wrbuf_2), zms->wrbuf_1))	    return zms->wrbuf_1;    }    return 0;}int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list,		      const char *input_str, int input_len, WRBUF wrbuf){    int i = -1;    int no_replaces = 0;    struct zebra_map *zm = zebra_map_get (zms, reg_id);    wrbuf_rewind(wrbuf);    for (i = -1; i <= input_len; )    {	struct zm_token *token;	char replace_string[128];	int replace_out;	int replace_in = 0;	for (token = zm->replace_tokens; !replace_in && token;	     token = token->next)	{	    int j = 0;	    int replace_done = 0;	    replace_out = 0;	    for (;; j++)	    {		int c;		if (!token->token_from[j])		{		    replace_in = j;		    break;		}		if (ex_list && strchr (ex_list, token->token_from[j]))		    break;		if (i+j < 0 || j+i >= input_len)		    c = ' ';		else		    c = input_str[j+i] & 255;		if (token->token_from[j] == ZEBRA_REPLACE_ANY)		{		    if (c == ' ')			break;		    replace_string[replace_out++] = c;		}		else		{		    if (c != token->token_from[j])		    {			break;		    }		    if (!replace_done)		    {			const char *cp = token->token_to;			replace_done = 1;			for (; cp && *cp; cp++)			    replace_string[replace_out++] = *cp;		    }		}	    }	}	if (!replace_in)	{	    if (i >= 0 && i < input_len)		wrbuf_putc(wrbuf, input_str[i]);	    i++;	}	else	{	    no_replaces++;	    if (replace_out)		wrbuf_write(wrbuf, replace_string, replace_out);	    i += replace_in;	}    }#if 0    logf (LOG_LOG, "out:%.*s:", wrbuf_len(wrbuf), wrbuf_buf(wrbuf));#endif    return no_replaces;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -