📄 invindexer.pm
字号:
# commit changes to the invindex $invindex->run_while_locked( lock_name => COMMIT_LOCK_NAME, timeout => COMMIT_LOCK_TIMEOUT, do_body => sub { $self->{reader}->commit_deletions if defined $self->{reader}; $sinfos->write_infos($invindex); }, ); my @files_to_delete = $self->_generate_deletions_list( \@to_merge ); push @files_to_delete, $self->_read_delqueue; # close reader, so that we can delete its files if appropriate $self->{reader}->close if defined $self->{reader}; $self->_purge_merged(@files_to_delete); $self->_release_locks; $self->{state} = FINISHED;}# Given an array of SegReaders, return a list of their files.sub _generate_deletions_list { my ( $self, $readers_to_merge ) = @_; my $invindex = $self->{invindex}; my @segs_to_merge = map { $_->get_seg_name } @$readers_to_merge; my @deletions = grep { $invindex->file_exists($_) } map { ( "$_.cfs", "$_.del" ) } @segs_to_merge; return @deletions;}# Retrieve a list of files that weren't successfully deleted before.sub _read_delqueue { my ( $self, $readers_to_merge ) = @_; my $invindex = $self->{invindex}; my @deletions; if ( $invindex->file_exists('delqueue') ) { my $instream = $invindex->open_instream('delqueue'); my $num_in_queue = $instream->lu_read('i'); @deletions = $instream->lu_read("T$num_in_queue"); $instream->close; } return @deletions;}# Delete segments that have been folded into the new segment.sub _purge_merged { my ( $self, @deletions ) = @_; my $invindex = $self->{invindex}; my @delqueue; for my $deletion (@deletions) { eval { $invindex->delete_file($deletion) }; # Win32: if the deletion fails (because a reader is open), queue it if ( $@ and $invindex->file_exists($deletion) ) { push @delqueue, $deletion; } } $self->_write_delqueue(@delqueue);}sub _write_delqueue { my ( $self, @delqueue ) = @_; my $invindex = $self->{invindex}; my $num_files = scalar @delqueue; if ($num_files) { # we have files that weren't successfully deleted, so write list my $outstream = $invindex->open_outstream('delqueue.new'); $outstream->lu_write( "iT$num_files", $num_files, @delqueue ); $outstream->close; $invindex->rename_file( 'delqueue.new', 'delqueue' ); } elsif ( $invindex->file_exists('delqueue') ) { # no files to delete, so delete the delqueue file if it's there $invindex->delete_file('delqueue'); }}# Release the write lock - if it's there.sub _release_locks { my $self = shift; if ( defined $self->{write_lock} ) { $self->{write_lock}->release if $self->{write_lock}->is_locked; undef $self->{write_lock}; }}# Generate segment names (no longer Lucene compatible, as of 0.06).sub _new_seg_name { my $self = shift; my $counter = $self->{sinfos}->get_counter; $self->{sinfos}->set_counter( ++$counter ); return "_$counter";}sub DESTROY { shift->_release_locks }1;__END__=head1 NAMEKinoSearch::InvIndexer - build inverted indexes=head1 WARNINGKinoSearch is alpha test software. The API and the file format are subject tochange.=head1 SYNOPSIS use KinoSearch::InvIndexer; use KinoSearch::Analysis::PolyAnalyzer; my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); my $invindexer = KinoSearch::InvIndexer->new( invindex => '/path/to/invindex', create => 1, analyzer => $analyzer, ); $invindexer->spec_field( name => 'title' boost => 3, ); $invindexer->spec_field( name => 'bodytext' ); while ( my ( $title, $bodytext ) = each %source_documents ) { my $doc = $invindexer->new_doc($title); $doc->set_value( title => $title ); $doc->set_value( bodytext => $bodytext ); $invindexer->add_doc($doc); } $invindexer->finish;=head1 DESCRIPTIONThe InvIndexer class is KinoSearch's primary tool for creating andmodifying inverted indexes, which may be searched usingL<KinoSearch::Searcher|KinoSearch::Searcher>.=head1 METHODS=head2 new my $invindexer = KinoSearch::InvIndexer->new( invindex => '/path/to/invindex', # required create => 1, # default: 0 analyzer => $analyzer, # default: no-op Analyzer );Create an InvIndexer object. =over=item *B<invindex> - can be either a filepath, or an InvIndex subclass such asL<KinoSearch::Store::FSInvIndex|KinoSearch::Store::FSInvIndex> or L<KinoSearch::Store::RAMInvIndex|KinoSearch::Store::RAMInvIndex>.=item *B<create> - create a new invindex, clobbering an existing one if necessary.=item *B<analyzer> - an object which subclasses L<KinoSearch::Analysis::Analyzer>,such as a L<PolyAnalyzer|KinoSearch::Analysis::PolyAnalyzer>.=back=head2 spec_field $invindexer->spec_field( name => 'url', # required boost => 1, # default: 1, analyzer => undef, # default: analyzer spec'd in new() indexed => 0, # default: 1 analyzed => 0, # default: 1 stored => 1, # default: 1 compressed => 0, # default: 0 vectorized => 0, # default: 1 );Define a field. =over=item *B<name> - the field's name.=item *B<boost> - A multiplier which determines how much a field contributesto a document's score. =item *B<analyzer> - By default, all indexed fields are analyzed using the analyzerthat was supplied to new(). Supplying an alternate for a given fieldoverrides the primary analyzer.=item *B<indexed> - index the field, so that it can be searched later.=item *B<analyzed> - analyze the field, using the relevant Analyzer. Fields such as"category" or "product_number" might be indexed but not analyzed.=item *B<stored> - store the field, so that it can be retrieved when the documentturns up in a search.=item *B<compressed> - compress the stored field, using the zlib compression algorithm.=item *B<vectorized> - store the field's "term vectors", which are required byL<KinoSearch::Highlight::Highlighter|KinoSearch::Highlight::Highlighter> forexcerpt selection and search term highlighting.=back=head2 new_doc my $doc = $invindexer->new_doc;Spawn an empty L<KinoSearch::Document::Doc|KinoSearch::Document::Doc> object,primed to accept values for the fields spec'd by spec_field.=head2 add_doc $invindexer->add_doc($doc);Add a document to the invindex.=head2 add_invindexes my $invindexer = KinoSearch::InvIndexer->new( invindex => $invindex, analyzer => $analyzer, ); $invindexer->add_invindexes( $another_invindex, $yet_another_invindex ); $invindexer->finish;Absorb existing invindexes into this one. May only be called once perInvIndexer. add_invindexes() and add_doc() cannot be called on the sameInvIndexer.=head2 delete_docs_by_term my $term = KinoSearch::Index::Term->new( 'id', $unique_id ); $invindexer->delete_docs_by_term($term);Mark any document which contains the supplied term as deleted, so that it willbe excluded from search results. For more info, seeL<Deletions|KinoSearch::Docs::FileFormat/Deletions> inKinoSearch::Docs::FileFormat.=head2 finish $invindexer->finish( optimize => 1, # default: 0 );Finish the invindex. Invalidates the InvIndexer. Takes one hash-styleparameter.=over=item *B<optimize> - If optimize is set to 1, the invindex will be collapsed to itsmost compact form, which will yield the fastest queries.=back=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -