⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 field.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Document::Field;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::Class );BEGIN {    __PACKAGE__->init_instance_vars(        # constructor args / members        name       => undef,        analyzer   => undef,        boost      => 1,        stored     => 1,        indexed    => 1,        analyzed   => 1,        vectorized => 1,        binary     => 0,        compressed => 0,        omit_norms => 0,        field_num  => undef,        value      => '',        fnm_bits   => undef,        fdt_bits   => undef,        tv_string  => '',        tv_cache   => undef,    );    __PACKAGE__->ready_get_set(        qw(            value            tv_string            boost            indexed            stored            analyzed            vectorized            binary            compressed            analyzer            field_num            name            omit_norms            )    );}use KinoSearch::Index::FieldsReader;use KinoSearch::Index::FieldInfos;use KinoSearch::Index::TermVector;use Storable qw( dclone );sub init_instance {    my $self = shift;    # field name is required    croak("Missing required parameter 'name'")        unless length $self->{name};    # don't index binary fields    if ( $self->{binary} ) {        $self->{indexed}  = 0;        $self->{analyzed} = 0;    }}sub clone {    my $self = shift;    return dclone($self);}# Given two Field objects, return a child which has all the positive# attributes of both parents (meaning: values are OR'd).sub breed_with {    my ( $self, $other ) = @_;    my $kid = $self->clone;    for (qw( indexed vectorized )) {        $kid->{$_} ||= $other->{$_};    }    return $kid;}sub set_fnm_bits { $_[0]->{fnm_bits} = $_[1] }sub get_fnm_bits {    my $self = shift;    $self->{fnm_bits} = KinoSearch::Index::FieldInfos->encode_fnm_bits($self)        unless defined $self->{fnm_bits};    return $self->{fnm_bits};}sub set_fdt_bits { $_[0]->{fdt_bits} = $_[1] }sub get_fdt_bits {    my $self = shift;    $self->{fdt_bits}        = KinoSearch::Index::FieldsReader->encode_fdt_bits($self)        unless defined $self->{fdt_bits};    return $self->{fdt_bits};}sub get_value_len { bytes::length( $_[0]->{value} ) }# Return a TermVector object for a given Term, if it's in this field.sub term_vector {    my ( $self, $term_text ) = @_;    return unless bytes::length( $self->{tv_string} );    if ( !defined $self->{tv_cache} ) {        $self->{tv_cache} = _extract_tv_cache( $self->{tv_string} );    }    if ( exists $self->{tv_cache}{$term_text} ) {        my ( $positions, $starts, $ends )            = _unpack_posdata( $self->{tv_cache}{$term_text} );        my $term_vector = KinoSearch::Index::TermVector->new(            text          => $term_text,            field         => $self->{name},            positions     => $positions,            start_offsets => $starts,            end_offsets   => $ends,        );        return $term_vector;    }    return;}1;__END____XS__MODULE = KinoSearch    PACKAGE = KinoSearch::Document::Field=for commentReturn ref to a hash where the keys are term texts and the values are encodedpositional data.=cutvoid_extract_tv_cache(tv_string_sv)    SV *tv_string_sv;PREINIT:    HV *tv_cache_hv;PPCODE:    tv_cache_hv = Kino_Field_extract_tv_cache(tv_string_sv);    XPUSHs( sv_2mortal( newRV_noinc( (SV*)tv_cache_hv ) ) );    XSRETURN(1);=for commentDecompress positional data.=cutvoid_unpack_posdata(posdata_sv)    SV *posdata_sv;PREINIT:    AV     *positions_av, *starts_av, *ends_av;PPCODE:    positions_av = newAV();    starts_av    = newAV();    ends_av      = newAV();    Kino_Field_unpack_posdata(posdata_sv, positions_av, starts_av, ends_av);    XPUSHs(sv_2mortal( newRV_noinc((SV*)positions_av) ));    XPUSHs(sv_2mortal( newRV_noinc((SV*)starts_av)    ));    XPUSHs(sv_2mortal( newRV_noinc((SV*)ends_av)      ));    XSRETURN(3);__H__#ifndef H_KINOSEARCH_FIELD#define H_KINOSEARCH_FIELD 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchStoreInStream.h"#include "KinoSearchUtilCarp.h"HV*  Kino_Field_extract_tv_cache(SV*);void Kino_Field_unpack_posdata(SV*, AV*, AV*, AV*);#endif /* include guard */__C__#include "KinoSearchDocumentField.h"HV* Kino_Field_extract_tv_cache(SV *tv_string_sv) {    HV *tv_cache_hv;    char    *tv_string, *bookmark_ptr, *key;    char   **tv_ptr;    STRLEN   len, tv_len, overlap, key_len;    SV      *text_sv, *nums_sv;    I32      i, num_terms, num_positions;    /* allocate a new hash */    tv_cache_hv = newHV();        /* extract pointers */    tv_string = SvPV(tv_string_sv, tv_len);    tv_ptr    = &tv_string;    /* create a base text scalar */    text_sv = newSV(1);    SvPOK_on(text_sv);    *(SvEND(text_sv)) = '\0';    /* read the number of vectorized terms in the field */    num_terms = Kino_InStream_decode_vint(tv_ptr);    for (i = 0; i < num_terms; i++) {        /* decompress the term text */        overlap = Kino_InStream_decode_vint(tv_ptr);        SvCUR_set(text_sv, overlap);        len = Kino_InStream_decode_vint(tv_ptr);        sv_catpvn(text_sv, *tv_ptr, len);        *tv_ptr += len;        key = SvPV(text_sv, key_len);        /* get positions & offsets string */        num_positions = Kino_InStream_decode_vint(tv_ptr);        bookmark_ptr = *tv_ptr;        while(num_positions--) {            /* leave nums compressed to save a little mem */            (void)Kino_InStream_decode_vint(tv_ptr);            (void)Kino_InStream_decode_vint(tv_ptr);            (void)Kino_InStream_decode_vint(tv_ptr);        }        len = *tv_ptr - bookmark_ptr;        nums_sv = newSVpvn(bookmark_ptr, len);        /* store the $text => $posdata pair in the output hash */        hv_store(tv_cache_hv, key, key_len, nums_sv, 0);    }    SvREFCNT_dec(text_sv);    return tv_cache_hv;}voidKino_Field_unpack_posdata(SV *posdata_sv, AV *positions_av,                           AV *starts_av,  AV *ends_av) {    STRLEN  len;    char   *posdata, *posdata_end;    char  **posdata_ptr;    SV     *num_sv;    posdata      = SvPV(posdata_sv, len);    posdata_ptr  = &posdata;    posdata_end  = SvEND(posdata_sv);    /* translate encoded VInts to Perl scalars */    while(*posdata_ptr < posdata_end) {        num_sv = newSViv( Kino_InStream_decode_vint(posdata_ptr) );        av_push(positions_av, num_sv);        num_sv = newSViv( Kino_InStream_decode_vint(posdata_ptr) );        av_push(starts_av,    num_sv);        num_sv = newSViv( Kino_InStream_decode_vint(posdata_ptr) );        av_push(ends_av,      num_sv);    }    if (*posdata_ptr != posdata_end)        Kino_confess("Bad encoding of posdata");}__POD__=head1 NAMEKinoSearch::Document::Field - a field within a document=head1 SYNOPSIS    # no public interface=head1 DESCRIPTIONFields can only be defined or manipulated indirectly, via InvIndexer and Doc.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -