📄 multisearcher.pm
字号:
package KinoSearch::Search::MultiSearcher;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Searcher );BEGIN { __PACKAGE__->init_instance_vars( # members / constructor args searchables => undef, # members starts => undef, max_doc => undef, );}use KinoSearch::Search::Similarity;sub init_instance { my $self = shift; $self->{field_sims} = {}; # derive max_doc, relative start offsets my $max_doc = 0; my @starts; for my $searchable ( @{ $self->{searchables} } ) { push @starts, $max_doc; $max_doc += $searchable->max_doc; } $self->{max_doc} = $max_doc; $self->{starts} = \@starts; # default similarity $self->{similarity} = KinoSearch::Search::Similarity->new unless defined $self->{similarity};}sub get_field_names { my $self = shift; my %field_names; for my $searchable ( @{ $self->{searchables} } ) { my $sub_field_names = $searchable->get_field_names; @field_names{@$sub_field_names} = (1) x scalar @$sub_field_names; } return [ keys %field_names ];}sub max_doc { shift->{max_doc} }sub close { }sub subsearcher { my ( $self, $doc_num ) = @_; my $i = -1; for ( @{ $self->{starts} } ) { last if $_ > $doc_num; $i++; } return $i;}sub doc_freq { my ( $self, $term ) = @_; my $doc_freq = 0; $doc_freq += $_->doc_freq($term) for @{ $self->{searchables} }; return $doc_freq;}sub fetch_doc { my ( $self, $doc_num ) = @_; my $i = $self->subsearcher($doc_num); my $searchable = $self->{searchables}[$i]; $doc_num -= $self->{starts}[$i]; return $searchable->fetch_doc($doc_num);}my %search_hit_collector_args = ( hit_collector => undef, weight => undef, filter => undef, sort_spec => undef,);sub search_hit_collector { my $self = shift; confess kerror() unless verify_args( \%search_hit_collector_args, @_ ); my %args = ( %search_hit_collector_args, @_ ); my ( $searchables, $starts ) = @{$self}{qw( searchables starts )}; for my $i ( 0 .. $#$searchables ) { my $searchable = $searchables->[$i]; my $start = $starts->[$i]; my $collector = KinoSearch::Search::OffsetCollector->new( hit_collector => $args{hit_collector}, offset => $start ); $searchable->search_hit_collector( %args, hit_collector => $collector); }}sub rewrite { my ( $self, $orig_query ) = @_; # not necessary to rewrite until we add query types that need it return $orig_query; #my @queries = map { $_->rewrite($orig_query) } @{ $self->{searchables} }; #my $combined = $queries->[0]->combine(\@queries); #return $combined;}sub create_weight { my ( $self, $query ) = @_; my $searchables = $self->{searchables}; my $rewritten_query = $self->rewrite($query); # generate an array of unique terms my @terms = $rewritten_query->extract_terms; my %unique_terms; for my $term (@terms) { if ( a_isa_b($term, "KinoSearch::Index::Term") ) { $unique_terms{ $term->to_string } = $term; } else { # PhraseQuery returns an array of terms $unique_terms{ $_->to_string } = $_ for @$term; } } @terms = values %unique_terms; my @stringified = keys %unique_terms; # get an aggregated doc_freq for each term my @aggregated_doc_freqs = (0) x scalar @terms; for my $i ( 0 .. $#$searchables ) { my $doc_freqs = $searchables->[$i]->doc_freqs(\@terms); for my $j ( 0 .. $#terms ) { $aggregated_doc_freqs[$j] += $doc_freqs->[$j]; } } # prepare a hashmap of stringified_term => doc_freq pairs. my %doc_freq_map; @doc_freq_map{@stringified} = @aggregated_doc_freqs; my $cache_df_source = KinoSearch::Search::CacheDFSource->new( doc_freq_map => \%doc_freq_map, max_doc => $self->max_doc, similarity => $self->get_similarity, ); return $rewritten_query->to_weight($cache_df_source);}package KinoSearch::Search::CacheDFSource;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Search::Searchable );BEGIN { __PACKAGE__->init_instance_vars( doc_freq_map => {}, max_doc => undef, ); __PACKAGE__->ready_get(qw( max_doc ));}sub init_instance { }sub doc_freq { my ( $self, $term ) = @_; my $df = $self->{doc_freq_map}{ $term->to_string }; confess("df for " . $term->to_string . " not available") unless defined $df;}sub doc_freqs { my $self = shift; my @doc_freqs = map { $self->doc_freq($_) } @_; return \@doc_freqs;}sub max_doc { shift->{max_doc} }sub rewrite { return $_[1];}=for commentDummy class, only here to support initialization of Weights from Queries.=cut1;__END__=head1 NAMEKinoSearch::Search::MultiSearcher - Aggregate results from multiple searchers.=head1 SYNOPSIS for my $server_name (@server_names) { push @searchers, KinoSearch::Search::SearchClient->new( peer_address => "$server_name:$port", analyzer => $analyzer, password => $pass, ); } my $multi_searcher = KinoSearch::Search::MultiSearcher->new( searchables => \@searchers, analyzer => $analyzer, ); my $hits = $multi_searcher->search( query => $query );=head1 DESCRIPTIONAside from the arguments to its constructor, MultiSearcher looks and acts justlike a L<KinoSearch::Searcher> object.The primary use for MultiSearcher is to aggregate results from several remotesearchers via L<SearchClient|KinoSearch::Search::SearchClient>, diffusing thecost of searching a large corpus over multiple machines.=head1 METHODS=head2 newConstructor. Takes two hash-style parameters, both of which are required.=over=item *B<analyzer> - an item which subclasses L<KinoSearch::Analysis::Analyzer>.=item *B<searchables> - a reference to an array of searchers.=back=head1 COPYRIGHTCopyright 2006-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -