📄 fieldinfos.pm
字号:
package KinoSearch::Index::FieldInfos;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::Class Exporter );use constant INDEXED => "\x01";use constant VECTORIZED => "\x02";use constant OMIT_NORMS => "\x10";our @EXPORT_OK;BEGIN { __PACKAGE__->init_instance_vars( # members by_name => undef, by_num => undef, from_file => 0, ); __PACKAGE__->ready_get_set(qw( from_file )); @EXPORT_OK = qw( INDEXED VECTORIZED OMIT_NORMS );}use KinoSearch::Document::Field;sub init_instance { my $self = shift; $self->{by_name} = {}; $self->{by_num} = [];}sub clone { my $self = shift; my $evil_twin = __PACKAGE__->new; $evil_twin->{from_file} = $self->{from_file}; my @by_num; my %by_name; for my $finfo ( @{ $self->{by_num} } ) { my $dupe = $finfo->clone; push @by_num, $dupe; $by_name{ $finfo->get_name } = $dupe; } $evil_twin->{by_num} = \@by_num; $evil_twin->{by_name} = \%by_name; return $evil_twin;}# Add a user-supplied Field object to the collection.sub add_field { my ( $self, $field ) = @_; croak("Not a KinoSearch::Document::Field") unless a_isa_b( $field, 'KinoSearch::Document::Field' ); # don't mod Field objects for segments that are read back in croak("Can't update FieldInfos that were read in from file") if $self->{from_file}; # add the field my $fieldname = $field->get_name; $self->{by_name}{$fieldname} = $field; $self->_assign_field_nums;}# Return the number of fields in the segment.sub size { scalar @{ $_[0]->{by_num} } }# Return a list of the Field objects.sub get_infos { @{ $_[0]->{by_num} } }# Given a fieldname, return its number.sub get_field_num { my ( $self, $name ) = @_; return undef unless exists $self->{by_name}{$name}; my $num = $self->{by_name}{$name}->get_field_num; return $num;}# Given a fieldname, return its FieldInfo.sub info_by_name { $_[0]->{by_name}{ $_[1] } }# Given a field number, return its fieldInfo.sub info_by_num { $_[0]->{by_num}[ $_[1] ] }# Given the field number (new, not original), return the name of the field.sub field_name { my ( $self, $num ) = @_; my $name = $self->{by_num}[$num]->get_name; croak("Don't know about field number $num") unless defined $name; return $name;}# Sort all the fields lexically by name and assign ascending numbers.sub _assign_field_nums { my $self = shift; confess("Can't _assign_field_nums when from_file") if $self->{from_file}; # assign field nums according to lexical order of field names @{ $self->{by_num} } = sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} }; my $inc = 0; $_->set_field_num( $inc++ ) for @{ $self->{by_num} };}# Decode an existing .fnm file.sub read_infos { my ( $self, $instream ) = @_; my ( $by_name, $by_num ) = @{$self}{qw( by_name by_num )}; # set flag indicating that this FieldInfos object has been read in $self->{from_file} = 1; # read in infos from stream my $num_fields = $instream->lu_read('V'); my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields ); my $field_num = 0; while ( $field_num < $num_fields ) { my ( $name, $bits ) = splice( @names_and_bits, 0, 2 ); my $info = KinoSearch::Document::Field->new( field_num => $field_num, name => $name, indexed => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0, vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0, fnm_bits => $bits, ); $by_name->{$name} = $info; # order of storage implies lexical order by name and field number push @$by_num, $info; $field_num++; }}# Write .fnm file.sub write_infos { my ( $self, $outstream ) = @_; $outstream->lu_write( 'V', scalar @{ $self->{by_num} } ); for my $finfo ( @{ $self->{by_num} } ) { $outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, ); }}# Merge two FieldInfos objects, redefining fields as necessary and generating# new field numbers.sub consolidate { my ( $self, @others ) = @_; my $infos = $self->{by_name}; # Make *this* finfos the master FieldInfos object for my $other (@others) { while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) { if ( exists $infos->{$name} ) { $infos->{$name} = $other_finfo->breed_with( $infos->{$name} ); } else { $infos->{$name} = $other_finfo->clone; } } } $self->_assign_field_nums;}# Generate a mapping of field numbers between two FieldInfos objects. Should# be called by the superset.sub generate_field_num_map { my ( $self, $other ) = @_; my $map = ''; for my $other_finfo ( @{ $other->{by_num} } ) { my $orig_finfo = $self->{by_name}{ $other_finfo->get_name }; $map .= pack( 'I', $orig_finfo->get_field_num ); } return KinoSearch::Util::IntMap->new( \$map );}sub encode_fnm_bits { my ( undef, $field ) = @_; my $bits = "\0"; for ($bits) { $_ |= INDEXED if $field->get_indexed; $_ |= VECTORIZED if $field->get_vectorized; $_ |= OMIT_NORMS if $field->get_omit_norms; } return $bits;}sub decode_fnm_bits { my ( undef, $field, $bits ) = @_; $field->set_indexed( ( $bits & INDEXED ) eq INDEXED ); $field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED ); $field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS );}sub close { }1;__END__=begin devdocs=head1 NAMEKinoSearch::Index::FieldInfos - track field characteristics=head1 SYNOPSIS my $finfos = KinoSearch::Index::FieldInfos->new; $finfos->read_infos($instream);=head1 DESCRIPTIONA FieldInfos object tracks the characteristics of all fields in a givensegment.KinoSearch counts on having field nums assigned to fields by lexically sortedorder of field names, but indexes generated by Java Lucene are not likely tohave this property. =head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -