⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hitcollector.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Search::HitCollector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::CClass );# all xs, other than the pragmas/includespackage KinoSearch::Search::HitQueueCollector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::HitCollector );BEGIN {    __PACKAGE__->init_instance_vars(        # constructor args        size => undef,    );}our %instance_vars;use KinoSearch::Search::HitQueue;sub new {    my $self = shift->SUPER::new;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = @_;    croak("Required parameter: 'size'") unless defined $args{size};    my $hit_queue        = KinoSearch::Search::HitQueue->new( max_size => $args{size} );    $self->_set_storage($hit_queue);    $self->_define_collect;    return $self;}*get_total_hits = *KinoSearch::Search::HitCollector::get_i;*get_hit_queue  = *KinoSearch::Search::HitCollector::get_storage;sub get_max_size {    shift->get_hit_queue->get_max_size;}package KinoSearch::Search::BitCollector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::HitCollector );BEGIN {    __PACKAGE__->init_instance_vars(        # constructor params        capacity => 0,    );}our %instance_vars;use KinoSearch::Util::BitVector;sub new {    my $self = shift->SUPER::new;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = ( %instance_vars, @_ );    my $bit_vec        = KinoSearch::Util::BitVector->new( capacity => $args{capacity} );    $self->_set_storage($bit_vec);    $self->_define_collect;    return $self;}*get_bit_vector = *KinoSearch::Search::HitCollector::get_storage;package KinoSearch::Search::FilteredCollector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::HitCollector );BEGIN {    __PACKAGE__->init_instance_vars(        hit_collector => undef,        filter_bits   => undef,    );}our %instance_vars;sub new {    my $self = shift->SUPER::new;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = @_;    croak("Required parameter: 'hit_collector'")        unless a_isa_b( $args{hit_collector},        "KinoSearch::Search::HitCollector" );    $self->_set_filter_bits( $args{filter_bits} );    $self->_set_storage( $args{hit_collector} );    $self->_define_collect;    return $self;}package KinoSearch::Search::OffsetCollector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::HitCollector );BEGIN {    __PACKAGE__->init_instance_vars(        hit_collector => undef,        offset        => undef,    );}our %instance_vars;sub new {    my $self = shift->SUPER::new;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = @_;    croak("Required parameter: 'hit_collector'")        unless a_isa_b( $args{hit_collector},        "KinoSearch::Search::HitCollector" );    $self->_set_f( $args{offset} );    $self->_set_storage( $args{hit_collector} );    $self->_define_collect;    return $self;}1;__END____XS__MODULE = KinoSearch    PACKAGE = KinoSearch::Search::HitCollectorvoidnew(either_sv)    SV *either_sv;PREINIT:    char         *class;    HitCollector *hc;PPCODE:    hc    = Kino_HC_new();    class = sv_isobject(either_sv)         ? sv_reftype(either_sv, 0)        : SvPV_nolen(either_sv);    ST(0) = sv_newmortal();    sv_setref_pv(ST(0), class, (void*)hc);    XSRETURN(1);=begin comment    $hit_collector->collect( $doc_num, $score );Process a doc_num/score combination.  In production, this method should not becalled from Perl, as collecting hits is an extremely data-intensive operation.=end comment=cutvoidcollect(hc, doc_num, score)    HitCollector *hc;    U32           doc_num;    float         score;PPCODE:    hc->collect(hc, doc_num, score);SV* _set_or_get(hc, ...)    HitCollector *hc;ALIAS:    _set_storage     = 1    get_storage      = 2    _set_i           = 3    get_i            = 4    _set_f           = 5    _get_f           = 6    _set_filter_bits = 7    _get_filter_bits = 8CODE:{    KINO_START_SET_OR_GET_SWITCH        case 1:  SvREFCNT_dec(hc->storage_ref);             hc->storage_ref = newSVsv( ST(1) );             Kino_extract_anon_struct(hc->storage_ref, hc->storage);             /* fall through */    case 2:  RETVAL = newSVsv(hc->storage_ref);             break;    case 3:  hc->i = SvUV( ST(1) );             /* fall through */    case 4:  RETVAL = newSVuv(hc->i);             break;    case 5:  hc->f = SvNV( ST(1) );             /* fall through */    case 6:  RETVAL = newSVnv(hc->f);             break;                 case 7:  SvREFCNT_dec(hc->filter_bits_ref);             hc->filter_bits_ref = newSVsv( ST(1) );             Kino_extract_struct( hc->filter_bits_ref, hc->filter_bits,                 BitVector*, "KinoSearch::Util::BitVector" );             /* fall through */    case 8:  RETVAL = newSVsv(hc->filter_bits_ref);             break;    KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(hc)    HitCollector *hc;PPCODE:    Kino_HC_destroy(hc);MODULE = KinoSearch    PACKAGE = KinoSearch::Search::HitQueueCollectorvoid_define_collect(hc)    HitCollector *hc;PPCODE:    hc->collect = Kino_HC_collect_HitQueue;MODULE = KinoSearch    PACKAGE = KinoSearch::Search::BitCollectorvoid_define_collect(hc)    HitCollector *hc;PPCODE:    hc->collect = Kino_HC_collect_BitVec;MODULE = KinoSearch    PACKAGE = KinoSearch::Search::FilteredCollectorvoid_define_collect(hc);    HitCollector *hc;PPCODE:    hc->collect = Kino_HC_collect_filtered;MODULE = KinoSearch    PACKAGE = KinoSearch::Search::OffsetCollectorvoid_define_collect(hc);    HitCollector *hc;PPCODE:    hc->collect = Kino_HC_collect_offset;__H__#ifndef H_KINO_HIT_COLLECTOR#define H_KINO_HIT_COLLECTOR 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchUtilCarp.h"#include "KinoSearchUtilMathUtils.h"#include "KinoSearchUtilBitVector.h"#include "KinoSearchUtilPriorityQueue.h"#include "KinoSearchUtilMemManager.h"typedef struct hitcollector {    void      (*collect)(struct hitcollector*, U32, float);    float       f;    U32         i;    void       *storage;    SV         *storage_ref;    BitVector  *filter_bits;    SV         *filter_bits_ref;} HitCollector;HitCollector* Kino_HC_new();void Kino_HC_collect_death(HitCollector*, U32, float);void Kino_HC_collect_HitQueue(HitCollector*, U32, float);void Kino_HC_collect_BitVec(HitCollector*, U32, float);void Kino_HC_collect_filtered(HitCollector*, U32, float);void Kino_HC_collect_offset(HitCollector*, U32, float);void Kino_HC_destroy(HitCollector*);#endif /* include guard */__C__#include "KinoSearchSearchHitCollector.h"HitCollector*Kino_HC_new() {    HitCollector  *hc;    /* allocate memory and init */    Kino_New(0, hc, 1, HitCollector);    hc->f               = 0;    hc->i               = 0;    hc->storage         = NULL;    hc->storage_ref     = &PL_sv_undef;    hc->filter_bits     = NULL;    hc->filter_bits_ref = &PL_sv_undef;    /* force the subclass to spec a collect method */    hc->collect = Kino_HC_collect_death;    return hc;}voidKino_HC_collect_death(HitCollector *hc, U32 doc_num, float score) {    Kino_confess("hit_collector->collect must be assigned in a subclass");}voidKino_HC_collect_HitQueue(HitCollector *hc, U32 doc_num, float score) {    /* add to the total number of hits */    hc->i++;        /* bail if the score doesn't exceed the minimum */    if (score < hc->f) {        return;    }    else {        SV *element;        char doc_num_buf[4];        PriorityQueue *hit_queue;        hit_queue = (PriorityQueue*)hc->storage;        /* put a dualvar scalar -- encoded doc_num in PV, score in NV */         element = sv_newmortal();        (void)SvUPGRADE(element, SVt_PVNV);        Kino_encode_bigend_U32(doc_num, &doc_num_buf);        sv_setpvn(element, doc_num_buf, (STRLEN)4);        SvNV_set(element, (double)score);        SvNOK_on(element);        (void)Kino_PriQ_insert(hit_queue, element);        /* store the bubble score in a more accessible spot */        if (hit_queue->size == hit_queue->max_size) {            SV *least_sv;            least_sv = Kino_PriQ_peek(hit_queue);            hc->f    = SvNV(least_sv);        }    }}voidKino_HC_collect_BitVec(HitCollector *hc, U32 doc_num, float score) {    BitVector *bit_vec;    bit_vec = (BitVector*)hc->storage;    /* add to the total number of hits */    hc->i++;    /* add the doc_num to the BitVector */    Kino_BitVec_set(bit_vec, doc_num);}voidKino_HC_collect_filtered(HitCollector *hc, U32 doc_num, float score) {    if (hc->filter_bits == NULL) {        Kino_confess("filter_bits not set on FilteredCollector");    }    if (Kino_BitVec_get(hc->filter_bits, doc_num)) {        HitCollector *inner_collector;        inner_collector = (HitCollector*)hc->storage;        inner_collector->collect(inner_collector, doc_num, score);    }}voidKino_HC_collect_offset(HitCollector *hc, U32 doc_num, float score) {    HitCollector *inner_collector = (HitCollector*)hc->storage;    U32 offset_doc_num = doc_num + hc->f;    inner_collector->collect(inner_collector, offset_doc_num, score);}voidKino_HC_destroy(HitCollector *hc) {    SvREFCNT_dec(hc->storage_ref);    SvREFCNT_dec(hc->filter_bits_ref);    Kino_Safefree(hc);}__POD__=begin devdocs=head1 NAMEKinoSearch::Search::HitCollector - process doc/score pairs=head1 DESCRIPTIONA Scorer spits out raw doc_num/score pairs; a HitCollector decides what to dowith them, based on the hc->collect method.A HitQueueCollector keeps the highest scoring N documents and their associatedscores in a HitQueue while iterating through a large list.A BitCollector builds a BitVector with a set bit for each doc number (scoresare irrelevant).A FilterCollector wraps another HitCollector, only allowing the innercollector to "see" doc_num/score pairs which make it through the filter.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -