⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 booleanscorer.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Search::BooleanScorer;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::Scorer );BEGIN { __PACKAGE__->init_instance_vars() }our %instance_vars;sub new {    my $self = shift->SUPER::new;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = ( %instance_vars, @_ );    $self->set_similarity( $args{similarity} );    $self->_init_child;    return $self;}1;__END____XS__MODULE = KinoSearch    PACKAGE = KinoSearch::Search::BooleanScorervoid_init_child(scorer)    Scorer *scorer;PPCODE:    Kino_BoolScorer_init_child(scorer);=for commentAdd a scorer for a sub-query of the BooleanQuery.=cutvoid add_subscorer(scorer, subscorer_sv, occur)    Scorer *scorer;    SV     *subscorer_sv;    char   *occur;PREINIT:    BoolScorerChild* child;    Scorer *subscorer;    SV     *subscorer_sv_copy;PPCODE:    child = (BoolScorerChild*)scorer->child;    Kino_extract_struct(subscorer_sv, subscorer,         Scorer*, "KinoSearch::Search::Scorer");    subscorer_sv_copy = newSVsv(subscorer_sv);    av_push(child->subscorers_av, subscorer_sv_copy);    Kino_BoolScorer_add_subscorer(scorer, subscorer, occur);SV*_boolean_scorer_set_or_get(scorer, ...)    Scorer* scorer;ALIAS:    _get_subscorer_storage = 2CODE:{    BoolScorerChild* child = (BoolScorerChild*)scorer->child;    KINO_START_SET_OR_GET_SWITCH    case 2:  RETVAL = newRV((SV*)child->subscorers_av);             break;    KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(scorer)    Scorer *scorer;PPCODE:    Kino_BoolScorer_destroy(scorer);__H__#ifndef H_KINO_BOOLEAN_SCORER#define H_KINO_BOOLEAN_SCORER 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchSearchScorer.h"#include "KinoSearchUtilMemManager.h"#define KINO_MATCH_BATCH_SIZE (1 << 11)#define KINO_MATCH_BATCH_DOC_MASK (KINO_MATCH_BATCH_SIZE - 1)/* A MatchBatch can hold scoring data for 2048 documents.  */typedef struct matchbatch {    U32       count;    float    *scores;    U32      *matcher_counts;    U32      *bool_masks;    U32      *recent_docs;} MatchBatch;typedef struct boolsubscorer {    Scorer *scorer;    U32     bool_mask;    bool    done;    struct boolsubscorer *next_subscorer;} BoolSubScorer;typedef struct boolscorerchild {    U32            doc;    U32            end;    U32            max_coord;    float         *coord_factors;    U32            required_mask;    U32            prohibited_mask;    U32            next_mask;    MatchBatch    *mbatch;    BoolSubScorer *subscorers; /* linked list */    AV            *subscorers_av;} BoolScorerChild;void Kino_BoolScorer_init_child(Scorer*);MatchBatch* Kino_BoolScorer_new_mbatch();void Kino_BoolScorer_clear_mbatch(MatchBatch*);void Kino_BoolScorer_compute_coord_factors(Scorer*);void Kino_BoolScorer_add_subscorer(Scorer*, Scorer*, char*);bool Kino_BoolScorer_next(Scorer*);float Kino_BoolScorer_score(Scorer*);U32 Kino_BoolScorer_doc(Scorer*);void Kino_BoolScorer_destroy(Scorer*);#endif /* include guard */__C__#include "KinoSearchSearchBooleanScorer.h"voidKino_BoolScorer_init_child(Scorer *scorer) {    BoolScorerChild *child;    Kino_New(0, child, 1, BoolScorerChild);    scorer->child = child;    /* define Scorer's abstract methods */    scorer->next  = Kino_BoolScorer_next;    scorer->doc   = Kino_BoolScorer_doc;    scorer->score = Kino_BoolScorer_score;    /* init */    child->end             = 0;    child->max_coord       = 1;    child->coord_factors   = NULL;    child->required_mask   = 0;    child->prohibited_mask = 0;    child->next_mask       = 1;    child->mbatch          = Kino_BoolScorer_new_mbatch();    child->subscorers      = NULL;    child->subscorers_av   = newAV();}MatchBatch*Kino_BoolScorer_new_mbatch() {    MatchBatch* mbatch;    /* allocate and init */    Kino_New(0, mbatch, 1, MatchBatch);    Kino_New(0, mbatch->scores, KINO_MATCH_BATCH_SIZE, float);    Kino_New(0, mbatch->matcher_counts, KINO_MATCH_BATCH_SIZE, U32);    Kino_New(0, mbatch->bool_masks, KINO_MATCH_BATCH_SIZE, U32);    Kino_New(0, mbatch->recent_docs, KINO_MATCH_BATCH_SIZE, U32);    mbatch->count    = 0;    return mbatch;}/* Return a MatchBatch to a "zeroed" state.  Only the matcher_counts and the * count are actually cleared; the rest get initialized the next time a doc * gets captured. */voidKino_BoolScorer_clear_mbatch(MatchBatch *mbatch) {    Zero(mbatch->matcher_counts, KINO_MATCH_BATCH_SIZE, U32);    mbatch->count = 0;}/* BooleanScorers award bonus points to documents which match multiple * subqueries.  This routine calculates the size of the bonuses. */voidKino_BoolScorer_compute_coord_factors(Scorer *scorer) {    BoolScorerChild *child;    float           *coord_factors;    U32              i;    child = (BoolScorerChild*)scorer->child;    Kino_New(0, child->coord_factors, (child->max_coord + 1), float);    coord_factors = child->coord_factors;    for (i = 0; i <= child->max_coord; i++) {        *coord_factors++             = scorer->sim->coord(scorer->sim, i, child->max_coord);    }}voidKino_BoolScorer_add_subscorer(Scorer* main_scorer, Scorer* subscorer,                               char *occur) {    BoolScorerChild *child;    BoolSubScorer   *bool_subscorer;    child = (BoolScorerChild*)main_scorer->child;        Kino_New(0, bool_subscorer, 1, BoolSubScorer);    bool_subscorer->scorer = subscorer;    /* if this scorer is required or negated, assign it a unique mask bit. */    if (strnEQ(occur, "SHOULD", 6)) {        bool_subscorer->bool_mask = 0;        child->max_coord++;    }    else {        if (child->next_mask == 0) {            Kino_confess("more than 32 required or prohibited clauses");        }        bool_subscorer->bool_mask = child->next_mask;        child->next_mask <<= 1;        if (strnEQ(occur, "MUST_NOT", 8)) {            child->prohibited_mask |= bool_subscorer->bool_mask;        }        else { /* "MUST" occur */            child->max_coord++;            child->required_mask |= bool_subscorer->bool_mask;        }    }    /* prime the pump */    bool_subscorer->done = !subscorer->next(subscorer);    /* link up the linked list of subscorers */    bool_subscorer->next_subscorer = child->subscorers;    child->subscorers = bool_subscorer;}boolKino_BoolScorer_next(Scorer* scorer) {    BoolScorerChild *child;    MatchBatch      *mbatch;    bool             more;    U32              doc;    U32              masked_doc;    U32              bool_mask;    BoolSubScorer   *sub;    child = (BoolScorerChild*)scorer->child;    mbatch = child->mbatch;    do {        while (mbatch->count-- > 0) {             /* check to see if the doc is prohibited */            doc        = mbatch->recent_docs[ mbatch->count ];            masked_doc = doc & KINO_MATCH_BATCH_DOC_MASK;            bool_mask  = mbatch->bool_masks[masked_doc];            if (   (bool_mask & child->prohibited_mask) == 0                && (bool_mask & child->required_mask) == child->required_mask            ) {                /* it's not prohibited, so next() was successful */                child->doc = doc;                return 1;            }        }        /* refill the queue, processing all docs within the next range */        Kino_BoolScorer_clear_mbatch(mbatch);        more = 0;        child->end += KINO_MATCH_BATCH_SIZE;                /* iterate through subscorers, caching results to the MatchBatch */        for (sub = child->subscorers; sub != NULL; sub = sub->next_subscorer) {            Scorer *scorer = sub->scorer;            while (!sub->done && scorer->doc(scorer) < child->end) {                doc        = scorer->doc(scorer);                masked_doc = doc & KINO_MATCH_BATCH_DOC_MASK;                if (mbatch->matcher_counts[masked_doc] == 0) {                    /* first scorer to hit this doc */                    mbatch->recent_docs[mbatch->count] = doc;                    mbatch->count++;                    mbatch->matcher_counts[masked_doc] = 1;                    mbatch->scores[masked_doc]     = scorer->score(scorer);                    mbatch->bool_masks[masked_doc] = sub->bool_mask;                }                else {                    mbatch->matcher_counts[masked_doc]++;                    mbatch->scores[masked_doc] += scorer->score(scorer);                    mbatch->bool_masks[masked_doc] |= sub->bool_mask;                }                /* check whether this scorer is exhausted */                sub->done = !scorer->next(scorer);            }            /* if at least one scorer succeeded, loop back */            if (!sub->done) {                more = 1;            }        }     } while (mbatch->count > 0 || more);    /* out of docs!  we're done. */    return 0;}floatKino_BoolScorer_score(Scorer* scorer){    BoolScorerChild *child = (BoolScorerChild*)scorer->child;    MatchBatch      *mbatch = child->mbatch;    U32              masked_doc;    float            score;    if (child->coord_factors == NULL) {        Kino_BoolScorer_compute_coord_factors(scorer);    }    /* retrieve the docs accumulated score from the MatchBatch */    masked_doc = child->doc & KINO_MATCH_BATCH_DOC_MASK;    score = mbatch->scores[masked_doc];    /* assign bonus for multi-subscorer matches */    score *= child->coord_factors[ mbatch->matcher_counts[masked_doc] ];    return score;}U32Kino_BoolScorer_doc(Scorer* scorer) {    BoolScorerChild *child = (BoolScorerChild*)scorer->child;    return child->doc;}voidKino_BoolScorer_destroy(Scorer * scorer) {    BoolSubScorer   *sub, *next_sub;    BoolScorerChild *child;    child = (BoolScorerChild*)scorer->child;    if (child->mbatch != NULL) {        Kino_Safefree(child->mbatch->scores);        Kino_Safefree(child->mbatch->matcher_counts);        Kino_Safefree(child->mbatch->bool_masks);        Kino_Safefree(child->mbatch->recent_docs);        Kino_Safefree(child->mbatch);    }        sub = child->subscorers;    while (sub != NULL) {        next_sub = sub->next_subscorer;        Kino_Safefree(sub);        sub = next_sub;        /* individual scorers will be GC'd on their own by Perl */    }    Kino_Safefree(child->coord_factors);    SvREFCNT_dec((SV*)child->subscorers_av);    Kino_Safefree(child);    Kino_Scorer_destroy(scorer);}__POD__=begin devdocs=head1 NAMEKinoSearch::Search::BooleanScorer - scorer for BooleanQuery=head1 DESCRIPTION Implementation of Scorer for BooleanQuery.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -