📄 segmentsreader.pm
字号:
package Plucene::Index::SegmentsReader;=head1 NAME Plucene::Index::SegmentsReader - reads the segments=head1 SYNOPSIS my $segs_reader = Plucene::Index::SegmentsReader ->new($dir, Plucene::Index::SegmentReader @readers); my $num_docs = $segs_reader->num_docs; my $doc = $segs_reader->document($id); my $norms = $seg_reader->norms($field); my $doc_freq = $segs_reader->doc_freq($term); my Plucene::Index::SegmentsTermEnum $term_enum = $segs_reader->terms($term); my Plucene::Index::SegmentsTermDocs $term_docs = $segs_reader->term_docs; my Plucene::Index::SegmentsTermPositions $term_positions = $segs_reader->term_positions; if ($segs_reader->is_deleted($id)) { ... }=head1 DESCRIPTIONThis is the segments reader class.=head1 METHODS=cutuse strict;use warnings;use List::Util qw(sum);use Plucene::Index::SegmentsTermEnum;use base qw(Plucene::Index::Reader Class::Accessor::Fast);__PACKAGE__->mk_accessors(qw(max_doc));use Memoize;memoize("norms"); # Saves messing with normsCache=head2 new my $segs_reader = Plucene::Index::SegmentsReader ->new($dir, Plucene::Index::SegmentReader @readers);This will create a new Plucene::Index::SegmentsReader object with the passeddirectory and Plucene::Index::SegmentReader objects.=cutsub new { my ($class, $dir, @readers) = @_; my $self = $class->SUPER::new($dir); $self->{readers} = \@readers; $self->{max_doc} = 0; for my $reader (@readers) { push @{ $self->{starts} }, $self->{max_doc}; $self->{max_doc} += $reader->max_doc; } return $self;}=head2 num_docs my $num_docs = $segs_reader->num_docs;This will return the number of documents in all the segments in the Reader.=cutsub num_docs { my $self = shift; return $self->{num_docs} if exists $self->{num_docs}; return $self->{num_docs} = sum(map $_->num_docs, @{ $self->{readers} });}=head2 document my $doc = $segs_reader->document($id);This will return the document at the passed document id.=cutsub document { my ($self, $n) = @_; my $i = $self->_reader_index($n); return $self->{readers}[$i]->document($n - $self->{starts}[$i]);}=head2 is_deleted if ($segs_reader->is_deleted($id)) { ... }=cutsub is_deleted { my ($self, $n) = @_; my $i = $self->_reader_index($n); return $self->{readers}[$i]->is_deleted($n - $self->{starts}[$i]);}sub _do_delete { my ($self, $n) = @_; delete $self->{num_docs}; # Invalidate cache my $i = $self->_reader_index($n); return $self->{readers}[$i]->_do_delete($n - $self->{starts}[$i]);}sub _do_close { my $self = shift; $_->close for @{ $self->{readers} };}sub _reader_index { my ($self, $n) = @_; my ($lo, $hi) = (0, $#{ $self->{readers} }); # Binary search while ($hi >= $lo) { my $mid = int(($lo + $hi) / 2); my $mid_val = $self->{starts}[$mid]; if ($n < $mid_val) { $hi = $mid - 1; } elsif ($n > $mid_val) { $lo = $mid + 1; } else { return $mid; } } return $hi;}=head2 norms my $norms = $seg_reader->norms($field);This returns the norms for the passed field.=cutsub norms { my ($self, $field) = @_; my $bytes = "\0" x $self->max_doc; for my $i (0 .. $#{ $self->{readers} }) { my $norm = $self->{readers}[$i]->norms($field); substr($bytes, $self->{starts}[$i], length $norm) = $norm; } return $bytes;}=head2 terms my Plucene::Index::SegmentsTermEnum $term_enum = $segs_reader->terms($term);This will return the Plucene::Index::SegmentsTermEnum onject for thepassed in term. =cutsub terms { my ($self, $term) = @_; return Plucene::Index::SegmentsTermEnum->new($self->{readers}, $self->{starts}, $term);}=head2 doc_freq my $doc_freq = $segs_reader->doc_freq($term);This returns the number of documents containing the passed term.=cutsub doc_freq { my ($self, $term) = @_; return sum map $_->doc_freq($term), @{ $self->{readers} };}=head2 term_docs my Plucene::Index::SegmentsTermDocs $term_docs = $segs_reader->term_docs;This will return the Plucene::Index::SegmentsTermDocs object. =cutsub term_docs { my $self = shift; my $term = shift; my $docs = Plucene::Index::SegmentsTermDocs->new($self->{readers}, $self->{starts}); if ($term) { $docs->seek($term) } return $docs;}=head2 term_positions my Plucene::Index::SegmentsTermPositions $term_positions = $segs_reader->term_positions;This will return the Plucene::Index::SegmentsTermPositions object.=cutsub term_positions { my $self = shift; my $term = shift; my $pos = Plucene::Index::SegmentsTermPositions->new($self->{readers}, $self->{starts}); if ($term) { $pos->seek($term) } return $pos;}package Plucene::Index::SegmentsTermDocs;sub new { my ($class, $readers, $starts) = @_; bless { readers => $readers, starts => $starts, seg_term_docs => [], base => 0, pointer => 0, current => undef, }, $class;}sub doc { my $self = shift; return $self->{base} + $self->{current}->doc;}sub freq { return shift->{current}->freq; }sub seek { my ($self, $term) = @_; $self->{term} = $term; $self->{base} = 0; $self->{pointer} = 0; $self->{current} = undef;}sub _at_end { $_[0]->{pointer} >= @{ $_[0]->{readers} } }sub _set_base_and_advance { my $self = shift; $self->{base} = $self->{starts}[ $self->{pointer} ]; $self->{current} = $self->term_docs($self->{pointer}++);}sub next { my $self = shift; return 1 if $self->{current} && $self->{current}->next; unless ($self->_at_end) { $self->_set_base_and_advance; return $self->next; } return 0;}sub read { my ($self) = @_; my ($docs, $freqs) = ([], []); while (1) { # Get a .current, somehow while (!$self->{current}) { goto done if $self->_at_end; # Don't fall off $self->_set_base_and_advance; } my ($new_docs, $new_freqs) = $self->{current}->read($docs, $freqs); if (!scalar @$new_docs) { # It's empty undef $self->{current}; } else { # Correct the doc positions to the appropriate base $_ += $self->{base} for @$new_docs; push @$docs, @$new_docs; push @$freqs, @$new_freqs; goto done; } } done: return ($docs, $freqs);}sub skip_to { my ($self, $target) = @_; $self->next || return while $target > $self->doc; return 1;}sub term_docs { my ($self, $i) = @_; return unless $self->{term}; $self->{seg_term_docs}[$i] = $self->term_docs_r($self->{readers}[$i]) unless exists $self->{seg_term_docs}[$i]; my $result = $self->{seg_term_docs}[$i]; $result->seek($self->{term}); return $result;}sub term_docs_r { my ($self, $reader) = @_; return $reader->term_docs;}package Plucene::Index::SegmentsTermPositions;use base 'Plucene::Index::SegmentsTermDocs';sub term_docs_r { my ($self, $reader) = @_; return $reader->term_positions;}sub next_position { return shift->{current}->next_position;}1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -