⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segreader.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Index::SegReader;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Index::IndexReader );BEGIN {    __PACKAGE__->init_instance_vars(        # params/members        invindex => undef,        seg_name => undef,        # members        comp_file_reader => undef,        tinfos_reader    => undef,        finfos           => undef,        fields_reader    => undef,        freq_stream      => undef,        prox_stream      => undef,        deldocs          => undef,        norms_readers    => undef,    );    __PACKAGE__->ready_get(        qw(            finfos            fields_reader            freq_stream            prox_stream            deldocs            seg_name            )    );}use KinoSearch::Index::CompoundFileReader;use KinoSearch::Index::TermInfosReader;use KinoSearch::Index::FieldsReader;use KinoSearch::Index::FieldInfos;use KinoSearch::Index::NormsReader;use KinoSearch::Index::SegTermDocs;use KinoSearch::Index::DelDocs;# use KinoSearch::Util::Class's new()# Note: can't inherit IndexReader's new() without recursion problems*new = *KinoSearch::Util::Class::new;sub init_instance {    my $self = shift;    my ( $seg_name, $invindex ) = @{$self}{ 'seg_name', 'invindex' };    $self->{norms_readers} = {};    # initialize DelDocs    $self->{deldocs} = KinoSearch::Index::DelDocs->new(        invindex => $invindex,        seg_name => $seg_name,    );    $self->{deldocs}->read_deldocs( $invindex, "$seg_name.del" )        if ( $invindex->file_exists("$seg_name.del") );    # initialize a CompoundFileReader    my $comp_file_reader = $self->{comp_file_reader}        = KinoSearch::Index::CompoundFileReader->new(        invindex => $invindex,        seg_name => $seg_name,        );    # initialize FieldInfos    my $finfos = $self->{finfos} = KinoSearch::Index::FieldInfos->new;    $finfos->read_infos( $comp_file_reader->open_instream("$seg_name.fnm") );    # initialize FieldsReader    $self->{fields_reader} = KinoSearch::Index::FieldsReader->new(        finfos        => $finfos,        fdata_stream  => $comp_file_reader->open_instream("$seg_name.fdt"),        findex_stream => $comp_file_reader->open_instream("$seg_name.fdx"),    );    # initialize TermInfosReader    $self->{tinfos_reader} = KinoSearch::Index::TermInfosReader->new(        invindex => $comp_file_reader,        seg_name => $seg_name,        finfos   => $finfos,    );    # open the frequency data, the positional data, and the norms    $self->{freq_stream} = $comp_file_reader->open_instream("$seg_name.frq");    $self->{prox_stream} = $comp_file_reader->open_instream("$seg_name.prx");    $self->_open_norms;}sub max_doc { shift->{fields_reader}->get_size }sub num_docs {    my $self = shift;    return $self->max_doc - $self->{deldocs}->get_num_deletions;}sub delete_docs_by_term {    my ( $self, $term ) = @_;    my $term_docs = $self->term_docs($term);    $self->{deldocs}->delete_by_term_docs($term_docs);}sub commit_deletions {    my $self = shift;    return unless $self->{deldocs}->get_num_deletions;    my $filename = $self->{seg_name} . ".del";    $self->{deldocs}        ->write_deldocs( $self->{invindex}, $filename, $self->max_doc );}sub has_deletions { shift->{deldocs}->get_num_deletions }sub _open_norms {    my $self = shift;    my ( $seg_name, $finfos, $comp_file_reader )        = @{$self}{ 'seg_name', 'finfos', 'comp_file_reader' };    my $max_doc = $self->max_doc;    # create a NormsReader for each indexed field.    for my $finfo ( $finfos->get_infos ) {        next unless $finfo->get_indexed;        my $filename = "$seg_name.f" . $finfo->get_field_num;        my $instream = $comp_file_reader->open_instream($filename);        $self->{norms_readers}{ $finfo->get_name }            = KinoSearch::Index::NormsReader->new(            instream => $instream,            max_doc  => $max_doc,            );    }}sub terms {    my ( $self, $term ) = @_;    return $self->{tinfos_reader}->terms($term);}sub fetch_term_info {    my ( $self, $term ) = @_;    return $self->{tinfos_reader}->fetch_term_info($term);}sub get_skip_interval {    shift->{tinfos_reader}->get_skip_interval;}sub doc_freq {    my ( $self, $term ) = @_;    my $tinfo = $self->{tinfos_reader}->fetch_term_info($term);    return defined $tinfo ? $tinfo->get_doc_freq : 0;}sub term_docs {    my ( $self, $term ) = @_;    my $term_docs = KinoSearch::Index::SegTermDocs->new( reader => $self, );    $term_docs->seek($term);    return $term_docs;}sub norms_reader {    my ( $self, $field_name ) = @_;    return unless exists $self->{norms_readers}{$field_name};    return $self->{norms_readers}{$field_name};}sub get_field_names {    my ( $self, %args ) = @_;    my @fields = $self->{finfos}->get_infos;    @fields = grep { $_->get_indexed } @fields        if $args{indexed};    my @names = map { $_->get_name } @fields;    return \@names;}sub generate_field_infos {    my $self       = shift;    my $new_finfos = $self->{finfos}->clone;    $new_finfos->set_from_file(0);    return $new_finfos;}sub fetch_doc {    $_[0]->{fields_reader}->fetch_doc( $_[1] );}sub segreaders_to_merge {    my ( $self, $all ) = @_;    return $self if $all;    return;}sub close {    my $self = shift;    return unless $self->{close_invindex};    $self->{deldocs}->close;    $self->{finfos}->close;    $self->{fields_reader}->close;    $self->{tinfos_reader}->close;    $self->{comp_file_reader}->close;    $self->{freq_stream}->close;    $self->{prox_stream}->close;    $_->close for values %{ $self->{norms_readers} };}1;__END__=begin devdocs=head1 NAMEKinoSearch::Index::SegReader - read from a single-segment invindex=head1 DESCRIPTIONSingle-segment implementation of IndexReader.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -