⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 livcode.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
    top->s2                 = s2                     ;    top->s2->which          = Z_RPNStructure_simple  ;    top->s2->u.simple       = simp                   ;    return top ;}/*** expand_query()** expand a simple query into a number of complex queries*/Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ){    Z_Complex *top ;    int numattrs = 0 ;    /*    ** start_rsnode will be set if we have already read the rankfile     ** so don't bother again but we need to know the number of attributes    ** in the linked list so traverse it again to find out how many.    */    if ( start_rsnode )    {        refrsnode node = start_rsnode ;        while ( node )        {            numattrs++ ;            node = node->next_rsnode ;        }    }     /*    ** only expand the query if there are 2 or more attributes     */    if ( numattrs >= 2 )    {        refrsnode node = start_rsnode ;        int attr1 ;        int attr2 ;        attr1 = node->rank ; node = node->next_rsnode ;        attr2 = node->rank ; node = node->next_rsnode ;        /*        ** this is the special case and has to be done first because the         ** last complex node in the linear list has two simple nodes whereas         ** all the others have a complex and a simple.        */        top = set_2operands( set_operand( thisop,attr1 ),                             set_operand( thisop,attr2 ) ) ;        /*        ** do the rest as complex/simple pairs        */        while ( node )        {            attr1 = node->rank ; node = node->next_rsnode ;            top = set_1complex_1operand( top,set_operand( thisop,attr1 ) ) ;        }        /*        ** finally add the 1016 rank attribute at the top of the tree        */        top = set_1complex_1operand( top,set_operand( thisop,1016 ) ) ;        return top ;    }    else return NULL ;}/*** check_operand_attrs()** loop through the attributes of a particular operand ** return 1 if (type==1 && value==1016) && (type==2 && value==102) ** otherwise return 0*/int check_operand_attrs( Z_Operand *thisop ) {    Z_AttributeElement *attrptr ;    int cond1 = 0 ;    int cond2 = 0 ;    int numattrs ;    int i ;    numattrs = thisop->u.attributesPlusTerm->attributes->num_attributes ;    for ( i = 0 ; i < numattrs ; i++ )    {        attrptr = thisop->u.attributesPlusTerm->attributes->attributes[i] ;        if ( (*attrptr->attributeType == 1) &&              (*attrptr->value.numeric == 1016) )            cond1 = 1 ;        if ( (*attrptr->attributeType == 2) &&              (*attrptr->value.numeric == 102) )            cond2 = 1 ;    }    return (cond1 & cond2) ;}/*** convert_simple2complex()** */void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ){    Z_Complex *complex = NULL ;                                             Z_Operand *operand = rpnstruct->u.simple ;                              if ( check_operand_attrs( operand ) )    {        complex = expand_query(zh, operand ) ;        if ( complex )        {            /*            ** Everything is complete so replace the original            ** operand with the newly built complex structure            ** This is it ... no going back!!            */            rpnstruct->which     = Z_RPNStructure_complex ;            rpnstruct->u.complex = complex ;        }    }                                                                       }/*** walk_complex_query()** recursively traverse the tree expanding any simple queries we find*/void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ){    if ( rpnstruct->which == Z_RPNStructure_simple )    {        convert_simple2complex(zh, rpnstruct ) ;    }    else    {        walk_complex_query(zh, rpnstruct->u.complex->s1 ) ;        walk_complex_query(zh, rpnstruct->u.complex->s2 ) ;    }}void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query){    /*    ** Got a search request,    ** 1. if it is a simple query, see if it suitable for expansion    **    i.e. the attributes are of the form ...    **    (type==1 && value==1016) && (type==2 && value==102)    ** or    ** 2. if it is complex, traverse the complex query tree and expand    **    any simples simples as above    */#if LIV_CODE    Z_RPNStructure *rpnstruct = query->RPNStructure ;        if ( rpnstruct->which == Z_RPNStructure_simple )    {        convert_simple2complex(zh, rpnstruct ) ;    }    else if ( rpnstruct->which == Z_RPNStructure_complex )    {        walk_complex_query(zh, rpnstruct ) ;    }#endif}struct rank_class_info {    int dummy;};struct rank_term_info {    int local_occur;    int global_occur;    int global_inv;    int rank_flag;};struct rank_set_info {    int last_pos;    int no_entries;    int no_rank_entries;    struct rank_term_info *entries;};static int log2_int (unsigned g){    int n = 0;    while ((g = g>>1))	n++;    return n;}/* * create: Creates/Initialises this rank handler. This routine is  *  called exactly once. The routine returns the class_handle. */static void *create (ZebraHandle zh){    struct rank_class_info *ci = (struct rank_class_info *)	xmalloc (sizeof(*ci));    logf (LOG_DEBUG, "livrank create");    read_zrank_file(zh) ;    return ci;}/* * destroy: Destroys this rank handler. This routine is called *  when the handler is no longer needed - i.e. when the server *  dies. The class_handle was previously returned by create. */static void destroy (struct zebra_register *reg, void *class_handle){    struct rank_class_info *ci = (struct rank_class_info *) class_handle;    logf (LOG_DEBUG, "livrank destroy");    xfree (ci);}/* * begin: Prepares beginning of "real" ranking. Called once for *  each result set. The returned handle is a "set handle" and *  will be used in each of the handlers below. */static void *begin (struct zebra_register *reg, void *class_handle, RSET rset){    struct rank_set_info *si = (struct rank_set_info *) xmalloc (sizeof(*si));    int i;    logf (LOG_DEBUG, "livrank begin");    si->no_entries = rset->no_rset_terms;    si->no_rank_entries = 0;    si->entries = (struct rank_term_info *)	xmalloc (sizeof(*si->entries)*si->no_entries);    for (i = 0; i < si->no_entries; i++)    {        const char *flags = rset->rset_terms[i]->flags;	int g = rset->rset_terms[i]->nn;        const char *cp = strstr(flags, ",u=");        si->entries[i].rank_flag = 1;        if (cp)        {            char *t = search_for_rankstr(atoi(cp+3));            if (t)                si->entries[i].rank_flag = search_for_score(t) ;        }        if ( si->entries[i].rank_flag )            (si->no_rank_entries)++;	si->entries[i].local_occur = 0;	si->entries[i].global_occur = g;	si->entries[i].global_inv = 32 - log2_int (g);	logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g));    }    return si;}/* * end: Terminates ranking process. Called after a result set *  has been ranked. */static void end (struct zebra_register *reg, void *set_handle){    struct rank_set_info *si = (struct rank_set_info *) set_handle;    logf (LOG_DEBUG, "livrank end");    xfree (si->entries);    xfree (si);}/* * add: Called for each word occurence in a result set. This routine *  should be as fast as possible. This routine should "incrementally" *  update the score. */static void add (void *set_handle, int seqno, int term_index){    struct rank_set_info *si = (struct rank_set_info *) set_handle;    logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index);    si->last_pos = seqno;    si->entries[term_index].local_occur++;}/* * calc: Called for each document in a result. This handler should  *  produce a score based on previous call(s) to the add handler. The *  score should be between 0 and 1000. If score cannot be obtained *  -1 should be returned. */static int calc (void *set_handle, int sysno){    int i, lo, divisor, score = 0;    struct rank_set_info *si = (struct rank_set_info *) set_handle;    logf (LOG_DEBUG, "livrank calc sysno=%d", sysno);    if (!si->no_rank_entries)	return -1;    for (i = 0; i < si->no_entries; i++)    {        score += si->entries[i].local_occur * si->entries[i].rank_flag ;    }    for (i = 0; i < si->no_entries; i++)	if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur))	    score += (8+log2_int (lo)) * si->entries[i].global_inv;    score *= 34;    divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));    score = score / divisor;    if (score > 1000)	score = 1000;    for (i = 0; i < si->no_entries; i++)	si->entries[i].local_occur = 0;    return score;}/* * Pseudo-meta code with sequence of calls as they occur in a * server. Handlers are prefixed by --: * *     server init *     -- create *     foreach search *        rank result set *        -- begin *        foreach record *           foreach word *              -- add *           -- calc *        -- end *     -- destroy *     server close */static struct rank_control rank_control = {    "livrank",    create,    destroy,    begin,    end,    calc,    add,}; struct rank_control *rankliv_class = &rank_control;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -