⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zrpn.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
	    {		int sysno = buf[0]->sysno;		int seqno[500];		int n = 0;				seqno[n++] = buf[0]->seqno;		while ((more[0] = rset_read (rset[0], rsfd[0], buf[0],					     &term_index)) &&		       sysno == buf[0]->sysno)		    if (n < 500)			seqno[n++] = buf[0]->seqno;		do		{		    for (i = 0; i<n; i++)		    {			int diff = buf[1]->seqno - seqno[i];			int excl = exclusion;			if (!ordered && diff < 0)			    diff = -diff;			switch (relation)			{			case 1:      /* < */			    if (diff < distance && diff >= 0)				excl = !excl;			    break;			case 2:      /* <= */			    if (diff <= distance && diff >= 0)				excl = !excl;			    break;			case 3:      /* == */			    if (diff == distance && diff >= 0)				excl = !excl;			    break;			case 4:      /* >= */			    if (diff >= distance && diff >= 0)				excl = !excl;			    break;			case 5:      /* > */			    if (diff > distance && diff >= 0)				excl = !excl;			    break;			case 6:      /* != */			    if (diff != distance && diff >= 0)				excl = !excl;			    break;			}			if (excl)			{			    rset_write (result, rsfd_result, buf[1]);			    break;			}		    }		} while ((more[1] = rset_read (rset[1], rsfd[1], buf[1],					       &term_index)) &&			 sysno == buf[1]->sysno);	    }	}	rset_close (result, rsfd_result);    }    else    {	rset_null_parms parms;		parms.rset_term = rset_term_create (prox_term, length_prox_term,					    flags, term_type);	parms.rset_term->nn = 0;	result = rset_create (rset_kind_null, &parms);    }    for (i = 0; i<rset_no; i++)    {	if (rsfd[i])	    rset_close (rset[i], rsfd[i]);	xfree (buf[i]);    }    xfree (buf);    xfree (more);    xfree (rsfd);    return result;}char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,		     const char *termz, NMEM stream, unsigned reg_id){    WRBUF wrbuf = 0;    AttrType truncation;    int truncation_value;    char *ex_list = 0;    attr_init (&truncation, zapt, 5);    truncation_value = attr_find (&truncation, NULL);    switch (truncation_value)    {    default:	ex_list = "";	break;    case 101:	ex_list = "#";	break;    case 102:    case 103:	ex_list = 0;	break;    case 104:	ex_list = "!#";	break;    case 105:	ex_list = "!*";	break;    }    if (ex_list)	wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,			      termz, strlen(termz));    if (!wrbuf)	return nmem_strdup(stream, termz);    else    {	char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);	memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));	buf[wrbuf_len(wrbuf)] = '\0';	return buf;    }}static void grep_info_delete (struct grep_info *grep_info){#ifdef TERM_COUNT    xfree(grep_info->term_no);#endif    xfree (grep_info->isam_p_buf);}static int grep_info_prepare (ZebraHandle zh,			      Z_AttributesPlusTerm *zapt,			      struct grep_info *grep_info,			      int reg_type,			      NMEM stream){    AttrType termset;    int termset_value_numeric;    const char *termset_value_string;#ifdef TERM_COUNT    grep_info->term_no = 0;#endif    grep_info->isam_p_size = 0;    grep_info->isam_p_buf = NULL;    grep_info->zh = zh;    grep_info->reg_type = reg_type;    grep_info->termset = 0;    if (!zapt)        return 0;    attr_init (&termset, zapt, 8);    termset_value_numeric =	attr_find_ex (&termset, NULL, &termset_value_string);    if (termset_value_numeric != -1)    {	char resname[32];	const char *termset_name = 0;	if (termset_value_numeric != -2)	{    	    sprintf (resname, "%d", termset_value_numeric);	    termset_name = resname;	}	else	    termset_name = termset_value_string;	logf (LOG_LOG, "creating termset set %s", termset_name);	grep_info->termset = resultSetAdd (zh, termset_name, 1);	if (!grep_info->termset)	{	    zh->errCode = 128;	    zh->errString = nmem_strdup (stream, termset_name);	    return -1;	}    }    return 0;}			       static RSET rpn_search_APT_phrase (ZebraHandle zh,                                   Z_AttributesPlusTerm *zapt,				   const char *termz_org,                                   oid_value attributeSet,				   NMEM stream,				   int reg_type, int complete_flag,				   const char *rank_type, int xpath_use,				   int num_bases, char **basenames){    char term_dst[IT_MAX_WORD+1];    RSET rset[60], result;    int i, rset_no = 0;    struct grep_info grep_info;    char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);    const char *termp = termz;    *term_dst = 0;    if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))	return 0;    while (1)    { 	logf (LOG_DEBUG, "APT_phrase termp=%s", termp);        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,                                    stream, &grep_info,                                    reg_type, complete_flag,                                    num_bases, basenames,                                    term_dst, rank_type,                                    xpath_use);        if (!rset[rset_no])            break;        if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))            break;    }    grep_info_delete (&grep_info);    if (rset_no == 0)    {	rset_null_parms parms;		parms.rset_term = rset_term_create (termz, -1, rank_type,                                            zapt->term->which);        return rset_create (rset_kind_null, &parms);    }    else if (rset_no == 1)        return (rset[0]);    result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1);    for (i = 0; i<rset_no; i++)        rset_delete (rset[i]);    return result;}static RSET rpn_search_APT_or_list (ZebraHandle zh,                                    Z_AttributesPlusTerm *zapt,				    const char *termz_org,                                    oid_value attributeSet,				    NMEM stream,				    int reg_type, int complete_flag,				    const char *rank_type,                                    int xpath_use,				    int num_bases, char **basenames){    char term_dst[IT_MAX_WORD+1];    RSET rset[60], result;    int i, rset_no = 0;    struct grep_info grep_info;    char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);    const char *termp = termz;    if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))	return 0;    while (1)    { 	logf (LOG_DEBUG, "APT_or_list termp=%s", termp);        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,                                    stream, &grep_info,                                    reg_type, complete_flag,                                    num_bases, basenames,                                    term_dst, rank_type,                                    xpath_use);        if (!rset[rset_no])            break;        if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))            break;    }    grep_info_delete (&grep_info);    if (rset_no == 0)    {	rset_null_parms parms;		parms.rset_term = rset_term_create (termz, -1, rank_type,                                            zapt->term->which);        return rset_create (rset_kind_null, &parms);    }    result = rset[0];    for (i = 1; i<rset_no; i++)    {        rset_bool_parms bool_parms;        bool_parms.rset_l = result;        bool_parms.rset_r = rset[i];        bool_parms.key_size = sizeof(struct it_key);	bool_parms.cmp = key_compare_it;        result = rset_create (rset_kind_or, &bool_parms);    }    return result;}static RSET rpn_search_APT_and_list (ZebraHandle zh,                                     Z_AttributesPlusTerm *zapt,				     const char *termz_org,                                     oid_value attributeSet,				     NMEM stream,				     int reg_type, int complete_flag,				     const char *rank_type,                                      int xpath_use,				     int num_bases, char **basenames){    char term_dst[IT_MAX_WORD+1];    RSET rset[60], result;    int i, rset_no = 0;    struct grep_info grep_info;    char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);    const char *termp = termz;    if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))	return 0;    while (1)    { 	logf (LOG_DEBUG, "APT_and_list termp=%s", termp);        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,                                    stream, &grep_info,                                    reg_type, complete_flag,                                    num_bases, basenames,                                    term_dst, rank_type,                                    xpath_use);        if (!rset[rset_no])            break;        assert (rset[rset_no]);        if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))            break;    }    grep_info_delete (&grep_info);    if (rset_no == 0)    {	rset_null_parms parms;		parms.rset_term = rset_term_create (termz, -1, rank_type,                                            zapt->term->which);        return rset_create (rset_kind_null, &parms);    }    result = rset[0];    for (i = 1; i<rset_no; i++)    {        rset_bool_parms bool_parms;        bool_parms.rset_l = result;        bool_parms.rset_r = rset[i];        bool_parms.key_size = sizeof(struct it_key);	bool_parms.cmp = key_compare_it;        result = rset_create (rset_kind_and, &bool_parms);    }    return result;}static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,			     const char **term_sub,			     char *term_dict,			     oid_value attributeSet,			     struct grep_info *grep_info,			     int *max_pos,			     int reg_type,			     char *term_dst){    AttrType relation;    int relation_value;    int term_value;    int r;    char *term_tmp = term_dict + strlen(term_dict);    attr_init (&relation, zapt, 2);    relation_value = attr_find (&relation, NULL);    logf (LOG_DEBUG, "numeric relation value=%d", relation_value);    if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,		   term_dst))	return 0;    term_value = atoi (term_tmp);    switch (relation_value)    {    case 1:        logf (LOG_DEBUG, "Relation <");        gen_regular_rel (term_tmp, term_value-1, 1);        break;    case 2:        logf (LOG_DEBUG, "Relation <=");        gen_regular_rel (term_tmp, term_value, 1);        break;    case 4:        logf (LOG_DEBUG, "Relation >=");        gen_regular_rel (term_tmp, term_value, 0);        break;    case 5:        logf (LOG_DEBUG, "Relation >");        gen_regular_rel (term_tmp, term_value+1, 0);        break;    case 3:    default:	logf (LOG_DEBUG, "Relation =");	sprintf (term_tmp, "(0*%d)", term_value);    }    logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);    r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,                          0, grep_handle);    if (r)        logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);    logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);    return 1;}static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,			 const char **term_sub, 			 oid_value attributeSet, struct grep_info *grep_info,			 int reg_type, int complete_flag,			 int num_bases, char **basenames,			 char *term_dst, int xpath_use, NMEM stream){    char term_dict[2*IT_MAX_WORD+2];    int r, base_no;    AttrType use;    int use_value;    const char *use_string = 0;    oid_value curAttributeSet = attributeSet;    const char *termp;    struct rpn_char_map_info rcmi;    int bases_ok = 0;     /* no of databases with OK attribute */    int errCode = 0;      /* err code (if any is not OK) */    char *errString = 0;  /* addinfo */    rpn_char_map_prepare (zh->reg, reg_type, &rcmi);    attr_init (&use, zapt, 1);    use_value = attr_find_ex (&use, &curAttributeSet, &use_string);    if (use_value == -1)        use_value = 1016;    for (base_no = 0; base_no < num_bases; base_no++)    {        attent attp;        data1_local_attribute id_xpath_attr;        data1_local_attribute *local_attr;        int max_pos, prefix_len = 0;        termp = *term_sub;        if (use_value == -2)  /* string attribute (assume IDXPATH/any) */        {            use_value = xpath_use;            attp.local_attributes = &id_xpath_attr;            attp.attset_ordinal = VAL_IDXPATH;            id_xpath_attr.next = 0;            id_xpath_attr.local = use_value;        }	else if (curAttributeSet == VAL_IDXPATH)        {            attp.local_attributes = &id_xpath_attr;            attp.attset_ordinal = VAL_IDXPATH;            id_xpath_attr.next = 0;            id_xpath_attr.local = use_value;        }        else        {            if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))            {                logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",                      curAttributeSet, use_value, r);                if (r == -1)		{                    char val_str[32];                    sprintf (val_str, "%d", use_value);                    errString = nmem_strdup (stream, val_str);                    errCode = 114;		}                else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -