📄 multireader.pm
字号:
package KinoSearch::Index::MultiReader;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Index::IndexReader );BEGIN { __PACKAGE__->init_instance_vars( invindex => undef, sub_readers => undef, starts => undef, max_doc => 0, norms_cache => undef, );}use KinoSearch::Index::FieldInfos;use KinoSearch::Index::SegReader;use KinoSearch::Index::MultiTermDocs;# use KinoSearch::Util::Class's new()# Note: can't inherit IndexReader's new() without recursion problems*new = *KinoSearch::Util::Class::new;sub init_instance { my $self = shift; $self->{sub_readers} ||= []; $self->{starts} ||= []; $self->{norms_cache} ||= {}; $self->_init_sub_readers;}sub _init_sub_readers { my $self = shift; my @starts; my $max_doc = 0; for my $sub_reader ( @{ $self->{sub_readers} } ) { push @starts, $max_doc; $max_doc += $sub_reader->max_doc; } $self->{starts} = \@starts; $self->{max_doc} = $max_doc;}sub max_doc { shift->{max_doc} }sub num_docs { my $self = shift; my $num_docs = 0; $num_docs += $_->num_docs for @{ $self->{sub_readers} }; return $num_docs;}sub term_docs { my ( $self, $term ) = @_; my $term_docs = KinoSearch::Index::MultiTermDocs->new( sub_readers => $self->{sub_readers}, starts => $self->{starts}, ); $term_docs->seek($term); return $term_docs;}sub doc_freq { my ( $self, $term ) = @_; my $doc_freq = 0; $doc_freq += $_->doc_freq($term) for @{ $self->{sub_readers} }; return $doc_freq;}sub fetch_doc { my ( $self, $doc_num ) = @_; my $reader_index = $self->_reader_index($doc_num); $doc_num -= $self->{starts}[$reader_index]; return $self->{sub_readers}[$reader_index]->fetch_doc($doc_num);}sub delete_docs_by_term { my ( $self, $term ) = @_; $_->delete_docs_by_term($term) for @{ $self->{sub_readers} };}sub commit_deletions { my $self = shift; $_->commit_deletions for @{ $self->{sub_readers} };}# Determine which sub-reader a document resides insub _reader_index { my ( $self, $doc_num ) = @_; my $starts = $self->{starts}; my ( $lo, $mid, $hi ) = ( 0, undef, $#$starts ); while ( $hi >= $lo ) { $mid = ( $lo + $hi ) >> 1; my $mid_start = $starts->[$mid]; if ( $doc_num < $mid_start ) { $hi = $mid - 1; } elsif ( $doc_num > $mid_start ) { $lo = $mid + 1; } else { while ( $mid < $#$starts and $starts->[ $mid + 1 ] == $mid_start ) { $mid++; } return $mid; } } return $hi;}sub norms_reader { # TODO refactor and minimize copying my ( $self, $field_num ) = @_; if ( exists $self->{norms_cache}{$field_num} ) { return $self->{norms_cache}{$field_num}; } else { my $bytes = ''; for my $seg_reader ( @{ $self->{sub_readers} } ) { my $seg_norms_reader = $seg_reader->norms_reader($field_num); $bytes .= ${ $seg_norms_reader->get_bytes } if $seg_norms_reader; } my $norms_reader = $self->{norms_cache}{$field_num} = KinoSearch::Index::NormsReader->new( bytes => $bytes, max_doc => $self->max_doc, ); return $norms_reader; }}sub generate_field_infos { my $self = shift; my $new_finfos = KinoSearch::Index::FieldInfos->new; my @sub_finfos = map { $_->generate_field_infos } @{ $self->{sub_readers} }; $new_finfos->consolidate(@sub_finfos); return $new_finfos;}sub get_field_names { my $self = shift; my %field_names; for my $sub_reader ( @{ $self->{sub_readers} } ) { my $sub_field_names = $sub_reader->get_field_names; @field_names{@$sub_field_names} = (1) x scalar @$sub_field_names; } return [ keys %field_names ];}sub segreaders_to_merge { my ( $self, $all ) = @_; return unless @{ $self->{sub_readers} }; return @{ $self->{sub_readers} } if $all; # sort by ascending size in docs my @sorted_sub_readers = sort { $a->num_docs <=> $b->num_docs } @{ $self->{sub_readers} }; # find sparsely populated segments my $total_docs = 0; my $threshold = -1; for my $i ( 0 .. $#sorted_sub_readers ) { $total_docs += $sorted_sub_readers[$i]->num_docs; if ( $total_docs < fibonacci( $i + 5 ) ) { $threshold = $i; } } # if any of the segments are sparse, return their readers if ( $threshold > -1 ) { return @sorted_sub_readers[ 0 .. $threshold ]; } else { return; }}# Generate fibonacci seriesmy %fibo_cache;sub fibonacci { my $n = shift; return $fibo_cache{$n} if exists $fibo_cache{$n}; my $result = $n < 2 ? $n : fibonacci( $n - 1 ) + fibonacci( $n - 2 ); $fibo_cache{$n} = $result; return $result;}sub close { my $self = shift; return unless $self->{close_invindex}; $_->close for @{ $self->{sub_readers} };}1;__END__=begin devdocs=head1 NAMEKinoSearch::Index::MultiReader - read from a multi-segment invindex=head1 DESCRIPTION Multi-segment implementation of IndexReader.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -