⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multisearcher.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Search::MultiSearcher;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Searcher );BEGIN {    __PACKAGE__->init_instance_vars(        # members / constructor args        searchables => undef,        # members        starts      => undef,        max_doc     => undef,    );}use KinoSearch::Search::Similarity;sub init_instance {    my $self = shift;    $self->{field_sims} = {};        # derive max_doc, relative start offsets    my $max_doc = 0;    my @starts;    for my $searchable ( @{ $self->{searchables} } ) {        push @starts, $max_doc;        $max_doc += $searchable->max_doc;    }    $self->{max_doc} = $max_doc;    $self->{starts} = \@starts;    # default similarity    $self->{similarity} = KinoSearch::Search::Similarity->new        unless defined $self->{similarity};}sub get_field_names {    my $self = shift;    my %field_names;    for my $searchable ( @{ $self->{searchables} } ) {        my $sub_field_names = $searchable->get_field_names;        @field_names{@$sub_field_names} = (1) x scalar @$sub_field_names;    }    return [ keys %field_names ];}sub max_doc { shift->{max_doc} }sub close { }sub subsearcher {    my ( $self, $doc_num ) = @_;    my $i = -1;    for ( @{ $self->{starts} } ) {        last if $_ > $doc_num;        $i++;    }    return $i;}sub doc_freq {    my ( $self, $term ) = @_;    my $doc_freq = 0;    $doc_freq += $_->doc_freq($term) for @{ $self->{searchables} };    return $doc_freq;}sub fetch_doc {    my ( $self, $doc_num ) = @_;    my $i = $self->subsearcher($doc_num);    my $searchable = $self->{searchables}[$i];    $doc_num -= $self->{starts}[$i];    return $searchable->fetch_doc($doc_num);}my %search_hit_collector_args = (    hit_collector => undef,    weight        => undef,    filter        => undef,    sort_spec     => undef,);sub search_hit_collector {    my $self = shift;    confess kerror() unless verify_args( \%search_hit_collector_args, @_ );    my %args = ( %search_hit_collector_args, @_ );    my ( $searchables, $starts ) = @{$self}{qw( searchables starts )};    for my $i ( 0 .. $#$searchables ) {        my $searchable = $searchables->[$i];        my $start      = $starts->[$i];        my $collector = KinoSearch::Search::OffsetCollector->new(            hit_collector => $args{hit_collector},            offset        => $start        );        $searchable->search_hit_collector( %args, hit_collector => $collector);    }}sub rewrite {    my ( $self, $orig_query ) = @_;    # not necessary to rewrite until we add query types that need it    return $orig_query;    #my @queries = map { $_->rewrite($orig_query) } @{ $self->{searchables} };    #my $combined = $queries->[0]->combine(\@queries);    #return $combined;}sub create_weight {    my ( $self, $query ) = @_;    my $searchables = $self->{searchables};    my $rewritten_query = $self->rewrite($query);    # generate an array of unique terms    my @terms = $rewritten_query->extract_terms;    my %unique_terms;    for my $term (@terms) {        if ( a_isa_b($term, "KinoSearch::Index::Term") ) {            $unique_terms{ $term->to_string } = $term;        }        else {            # PhraseQuery returns an array of terms            $unique_terms{ $_->to_string } = $_ for @$term;        }    }    @terms = values %unique_terms;    my @stringified = keys %unique_terms;    # get an aggregated doc_freq for each term    my @aggregated_doc_freqs = (0) x scalar @terms;    for my $i ( 0 .. $#$searchables ) {        my $doc_freqs = $searchables->[$i]->doc_freqs(\@terms);        for my $j ( 0 .. $#terms ) {            $aggregated_doc_freqs[$j] += $doc_freqs->[$j];        }    }    # prepare a hashmap of stringified_term => doc_freq pairs.    my %doc_freq_map;    @doc_freq_map{@stringified} = @aggregated_doc_freqs;    my $cache_df_source = KinoSearch::Search::CacheDFSource->new(        doc_freq_map => \%doc_freq_map,        max_doc => $self->max_doc,        similarity => $self->get_similarity,    );    return $rewritten_query->to_weight($cache_df_source);}package KinoSearch::Search::CacheDFSource;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::Searchable );BEGIN {    __PACKAGE__->init_instance_vars(        doc_freq_map => {},        max_doc      => undef,    );    __PACKAGE__->ready_get(qw( max_doc ));}sub init_instance { }sub doc_freq {    my ( $self, $term ) = @_;    my $df = $self->{doc_freq_map}{ $term->to_string };    confess("df for " . $term->to_string . " not available")        unless defined $df;}sub doc_freqs {    my $self = shift;    my @doc_freqs = map { $self->doc_freq($_) } @_;    return \@doc_freqs;}sub max_doc { shift->{max_doc} }sub rewrite {   return $_[1];}=for commentDummy class, only here to support initialization of Weights from Queries.=cut1;__END__=head1 NAMEKinoSearch::Search::MultiSearcher - Aggregate results from multiple searchers.=head1 SYNOPSIS    for my $server_name (@server_names) {        push @searchers, KinoSearch::Search::SearchClient->new(            peer_address => "$server_name:$port",            analyzer     => $analyzer,            password     => $pass,        );    }    my $multi_searcher = KinoSearch::Search::MultiSearcher->new(        searchables => \@searchers,        analyzer    => $analyzer,    );    my $hits = $multi_searcher->search( query => $query );=head1 DESCRIPTIONAside from the arguments to its constructor, MultiSearcher looks and acts justlike a L<KinoSearch::Searcher> object.The primary use for MultiSearcher is to aggregate results from several remotesearchers via L<SearchClient|KinoSearch::Search::SearchClient>, diffusing thecost of searching a large corpus over multiple machines.=head1 METHODS=head2 newConstructor.  Takes two hash-style parameters, both of which are required.=over=item *B<analyzer> - an item which subclasses L<KinoSearch::Analysis::Analyzer>.=item *B<searchables> - a reference to an array of searchers.=back=head1 COPYRIGHTCopyright 2006-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -