⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fieldinfos.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Index::FieldInfos;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::Class Exporter );use constant INDEXED    => "\x01";use constant VECTORIZED => "\x02";use constant OMIT_NORMS => "\x10";our @EXPORT_OK;BEGIN {    __PACKAGE__->init_instance_vars(        # members        by_name   => undef,        by_num    => undef,        from_file => 0,    );    __PACKAGE__->ready_get_set(qw( from_file ));    @EXPORT_OK = qw(        INDEXED        VECTORIZED        OMIT_NORMS    );}use KinoSearch::Document::Field;sub init_instance {    my $self = shift;    $self->{by_name} = {};    $self->{by_num}  = [];}sub clone {    my $self      = shift;    my $evil_twin = __PACKAGE__->new;    $evil_twin->{from_file} = $self->{from_file};    my @by_num;    my %by_name;    for my $finfo ( @{ $self->{by_num} } ) {        my $dupe = $finfo->clone;        push @by_num, $dupe;        $by_name{ $finfo->get_name } = $dupe;    }    $evil_twin->{by_num}  = \@by_num;    $evil_twin->{by_name} = \%by_name;    return $evil_twin;}# Add a user-supplied Field object to the collection.sub add_field {    my ( $self, $field ) = @_;    croak("Not a KinoSearch::Document::Field")        unless a_isa_b( $field, 'KinoSearch::Document::Field' );    # don't mod Field objects for segments that are read back in    croak("Can't update FieldInfos that were read in from file")        if $self->{from_file};    # add the field    my $fieldname = $field->get_name;    $self->{by_name}{$fieldname} = $field;    $self->_assign_field_nums;}# Return the number of fields in the segment.sub size { scalar @{ $_[0]->{by_num} } }# Return a list of the Field objects.sub get_infos { @{ $_[0]->{by_num} } }# Given a fieldname, return its number.sub get_field_num {    my ( $self, $name ) = @_;    return undef        unless exists $self->{by_name}{$name};    my $num = $self->{by_name}{$name}->get_field_num;    return $num;}# Given a fieldname, return its FieldInfo.sub info_by_name { $_[0]->{by_name}{ $_[1] } }# Given a field number, return its fieldInfo.sub info_by_num { $_[0]->{by_num}[ $_[1] ] }# Given the field number (new, not original), return the name of the field.sub field_name {    my ( $self, $num ) = @_;    my $name = $self->{by_num}[$num]->get_name;    croak("Don't know about field number $num")        unless defined $name;    return $name;}# Sort all the fields lexically by name and assign ascending numbers.sub _assign_field_nums {    my $self = shift;    confess("Can't _assign_field_nums when from_file") if $self->{from_file};    # assign field nums according to lexical order of field names    @{ $self->{by_num} }        = sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} };    my $inc = 0;    $_->set_field_num( $inc++ ) for @{ $self->{by_num} };}# Decode an existing .fnm file.sub read_infos {    my ( $self,    $instream ) = @_;    my ( $by_name, $by_num )   = @{$self}{qw( by_name by_num )};    # set flag indicating that this FieldInfos object has been read in    $self->{from_file} = 1;    # read in infos from stream    my $num_fields     = $instream->lu_read('V');    my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields );    my $field_num      = 0;    while ( $field_num < $num_fields ) {        my ( $name, $bits ) = splice( @names_and_bits, 0, 2 );        my $info = KinoSearch::Document::Field->new(            field_num  => $field_num,            name       => $name,            indexed    => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0,            vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0,            fnm_bits   => $bits,        );        $by_name->{$name} = $info;        # order of storage implies lexical order by name and field number        push @$by_num, $info;        $field_num++;    }}# Write .fnm file.sub write_infos {    my ( $self, $outstream ) = @_;    $outstream->lu_write( 'V', scalar @{ $self->{by_num} } );    for my $finfo ( @{ $self->{by_num} } ) {        $outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, );    }}# Merge two FieldInfos objects, redefining fields as necessary and generating# new field numbers.sub consolidate {    my ( $self, @others ) = @_;    my $infos = $self->{by_name};    # Make *this* finfos the master FieldInfos object    for my $other (@others) {        while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) {            if ( exists $infos->{$name} ) {                $infos->{$name} = $other_finfo->breed_with( $infos->{$name} );            }            else {                $infos->{$name} = $other_finfo->clone;            }        }    }    $self->_assign_field_nums;}# Generate a mapping of field numbers between two FieldInfos objects.  Should# be called by the superset.sub generate_field_num_map {    my ( $self, $other ) = @_;    my $map = '';    for my $other_finfo ( @{ $other->{by_num} } ) {        my $orig_finfo = $self->{by_name}{ $other_finfo->get_name };        $map .= pack( 'I', $orig_finfo->get_field_num );    }    return KinoSearch::Util::IntMap->new( \$map );}sub encode_fnm_bits {    my ( undef, $field ) = @_;    my $bits = "\0";    for ($bits) {        $_ |= INDEXED    if $field->get_indexed;        $_ |= VECTORIZED if $field->get_vectorized;        $_ |= OMIT_NORMS if $field->get_omit_norms;    }    return $bits;}sub decode_fnm_bits {    my ( undef, $field, $bits ) = @_;    $field->set_indexed(    ( $bits & INDEXED )    eq INDEXED );    $field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED );    $field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS );}sub close { }1;__END__=begin devdocs=head1 NAMEKinoSearch::Index::FieldInfos - track field characteristics=head1 SYNOPSIS    my $finfos = KinoSearch::Index::FieldInfos->new;    $finfos->read_infos($instream);=head1 DESCRIPTIONA FieldInfos object tracks the characteristics of all fields in a givensegment.KinoSearch counts on having field nums assigned to fields by lexically sortedorder of field names, but indexes generated by Java Lucene are not likely tohave this property. =head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -