⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 terminfoswriter.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Index::TermInfosWriter;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::Class );BEGIN {    __PACKAGE__->init_instance_vars(        # constructor params        invindex       => undef,        seg_name       => undef,        is_index       => 0,        index_interval => 1024,        skip_interval  => 16,    );}our %instance_vars;sub new {    my $class = shift;    confess kerror() unless verify_args( \%instance_vars, @_ );    my %args = ( %instance_vars, @_ );    # open an outstream    my $suffix = $args{is_index} ? 'tii' : 'tis';    my $outstream        = $args{invindex}->open_outstream("$args{seg_name}.$suffix");    my $self = _new( $outstream,        @args{qw( is_index index_interval skip_interval )} );    # create the tii doppelganger    if ( !$args{is_index} ) {        my $other = __PACKAGE__->new(            invindex => $args{invindex},            seg_name => $args{seg_name},            is_index => 1,        );        $self->_set_other($other);        $other->_set_other($self);    }    return $self;}sub finish {    my $self      = shift;    my $outstream = $self->_get_outstream;    # seek to near the head and write the number of terms processed    $outstream->seek(4);    $outstream->lu_write( 'Q', $self->_get_size );    # cue the doppelganger's exit    if ( !$self->_get_is_index ) {        $self->_get_other()->finish;    }    $outstream->close;}1;__END____XS__MODULE = KinoSearch    PACKAGE = KinoSearch::Index::TermInfosWriterTermInfosWriter*_new(outstream_sv, is_index, index_interval, skip_interval)    SV  *outstream_sv;    I32  is_index;    I32  index_interval;    I32  skip_interval;CODE:    RETVAL = Kino_TInfosWriter_new(outstream_sv, is_index, index_interval,         skip_interval);OUTPUT: RETVAL=for commentAdd a Term (encoded as a termstring) and its associated TermInfo.=cut voidadd(obj, termstring_sv, tinfo)    TermInfosWriter *obj;    SV              *termstring_sv;    TermInfo        *tinfo;PREINIT:    ByteBuf bb;    STRLEN len;PPCODE:    bb.ptr  = SvPV(termstring_sv, len);    bb.size = len;    Kino_TInfosWriter_add(obj, &bb, tinfo);=for commentExport the FORMAT constant to Perl.=cutIVFORMAT()CODE:    RETVAL = KINO_TINFOS_FORMAT;OUTPUT: RETVALSV*_set_or_get(obj, ...)    TermInfosWriter *obj;ALIAS:    _set_other     = 1    _get_other     = 2    _get_outstream = 4    _get_is_index  = 6    _get_size      = 8CODE:{    KINO_START_SET_OR_GET_SWITCH    case 1:  SvREFCNT_dec(obj->other_sv);             obj->other_sv = newSVsv( ST(1) );             Kino_extract_struct(obj->other_sv, obj->other, TermInfosWriter*,                "KinoSearch::Index::TermInfosWriter");             /* fall through */    case 2:  RETVAL = newSVsv(obj->other_sv);             break;    case 4:  RETVAL = newSVsv(obj->fh_sv);             break;    case 6:  RETVAL = newSViv(obj->is_index);             break;    case 8:  RETVAL = newSViv(obj->size);             break;    KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(obj)    TermInfosWriter *obj;PPCODE:    Kino_TInfosWriter_destroy(obj);__H__#ifndef H_KINO_TERM_INFOS_WRITER#define H_KINO_TERM_INFOS_WRITER 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchIndexTerm.h"#include "KinoSearchIndexTermInfo.h"#include "KinoSearchStoreOutStream.h"#include "KinoSearchUtilByteBuf.h"#include "KinoSearchUtilCClass.h"#include "KinoSearchUtilMathUtils.h"#include "KinoSearchUtilMemManager.h"#include "KinoSearchUtilStringHelper.h"#define KINO_TINFOS_FORMAT -2typedef struct terminfoswriter {    OutStream *fh;    SV        *fh_sv;    I32        is_index;    I32        index_interval;    I32        skip_interval;    struct terminfoswriter* other;    SV        *other_sv;    ByteBuf   *last_termstring;    TermInfo  *last_tinfo;    I32        last_fieldnum;    double     last_tis_ptr;    I32        size;} TermInfosWriter;TermInfosWriter* Kino_TInfosWriter_new(SV*, I32, I32, I32);void Kino_TInfosWriter_add(TermInfosWriter*, ByteBuf*, TermInfo*);void Kino_TInfosWriter_destroy(TermInfosWriter*);#endif /* include guard */__C__#include "KinoSearchIndexTermInfosWriter.h"TermInfosWriter*Kino_TInfosWriter_new(SV *outstream_sv, I32 is_index, I32 index_interval,                       I32 skip_interval) {    TermInfosWriter *obj;    /* allocate */    Kino_New(0, obj, 1, TermInfosWriter);    /* assign */    obj->is_index       = is_index;    obj->index_interval = index_interval;    obj->skip_interval  = skip_interval;    obj->fh_sv          = newSVsv(outstream_sv);    Kino_extract_struct(obj->fh_sv, obj->fh, OutStream*,        "KinoSearch::Store::OutStream");    /* NOTE: this value forces the first field_num in the .tii file to -1.     * Do not change it. */    obj->last_termstring    = Kino_BB_new_string("\xff\xff", 2);    obj->last_tinfo         = Kino_TInfo_new();    obj->last_fieldnum      = -1;    obj->last_tis_ptr       = 0,    obj->size               = 0;    obj->other              = NULL;    obj->other_sv           = &PL_sv_undef;     /* write file header */    obj->fh->write_int(obj->fh, KINO_TINFOS_FORMAT);    obj->fh->write_long(obj->fh, 0.0); /* return to fill in later */    obj->fh->write_int(obj->fh, index_interval);    obj->fh->write_int(obj->fh, skip_interval);    return obj;}/* Write out a term/terminfo combo. */void Kino_TInfosWriter_add(TermInfosWriter* obj, ByteBuf* termstring_bb,                      TermInfo* tinfo) {    char      *termstring, *last_tstring;    STRLEN     termstring_len, last_tstring_len;    I32        field_num;    I32        overlap;    char      *diff_start_str;    STRLEN     diff_len;    OutStream* fh;    /* make local copy */    fh = obj->fh;    /* write a subset of the entries to the .tii index */    if (    (obj->size % obj->index_interval == 0)         && (!obj->is_index)                   ) {        Kino_TInfosWriter_add(obj->other, obj->last_termstring,        obj->last_tinfo);    }    /* extract string pointers and string lengths */    termstring       = termstring_bb->ptr;    last_tstring     = obj->last_termstring->ptr;    termstring_len   = termstring_bb->size;    last_tstring_len = obj->last_termstring->size;    /* to obtain field number, decode packed 'n' at top of termstring */    field_num = (I16)Kino_decode_bigend_U16(termstring);    /* move past field_num */    termstring       += KINO_FIELD_NUM_LEN;    last_tstring     += KINO_FIELD_NUM_LEN;    termstring_len   -= KINO_FIELD_NUM_LEN;    last_tstring_len -= KINO_FIELD_NUM_LEN;    /* count how many bytes the strings share at the top */     overlap = Kino_StrHelp_string_diff(last_tstring, termstring,        last_tstring_len, termstring_len);    diff_start_str = termstring + overlap;    diff_len       = termstring_len - overlap;    /* write number of common bytes */    fh->write_vint(fh, overlap);    /* write common bytes */    fh->write_string(fh, diff_start_str, diff_len);        /* write field number and doc_freq */    fh->write_vint(fh, field_num);    fh->write_vint(fh, tinfo->doc_freq);    /* delta encode filepointers */    fh->write_vlong(fh, (tinfo->frq_fileptr - obj->last_tinfo->frq_fileptr) );    fh->write_vlong(fh, (tinfo->prx_fileptr - obj->last_tinfo->prx_fileptr) );    /* write skipdata */    if (tinfo->doc_freq >= obj->skip_interval)        fh->write_vint(fh, tinfo->skip_offset);    /* the .tii index file gets a pointer to the location of the primary */    if (obj->is_index) {        double tis_ptr;        tis_ptr = obj->other->fh->tell(obj->other->fh);        obj->fh->write_vlong(obj->fh, (tis_ptr - obj->last_tis_ptr));        obj->last_tis_ptr = tis_ptr;    }    /* track number of terms */    obj->size++;    /* remember for delta encoding */    Kino_BB_assign_string(obj->last_termstring, termstring_bb->ptr,        termstring_bb->size);    StructCopy(tinfo, obj->last_tinfo, TermInfo);}voidKino_TInfosWriter_destroy(TermInfosWriter *obj) {    SvREFCNT_dec(obj->fh_sv);    SvREFCNT_dec(obj->other_sv);    Kino_BB_destroy(obj->last_termstring);    Kino_TInfo_destroy(obj->last_tinfo);    Kino_Safefree(obj);}__POD__=begin devdocs=head1 NAMEKinoSearch::Index::TermInfosWriter - write a term dictionary=head1 DESCRIPTIONThe TermInfosWriter write both parts of the term dictionary.  The primaryinstance creates a shadow TermInfosWriter that writes the index.=head TODOFind the optimum TermIndexInterval.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -