⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segtermenum.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
📖 第 1 页 / 共 2 页
字号:
    case 17: Kino_confess("can't set is_index");             /* fall through */    case 18: RETVAL = newSViv(obj->is_index);             break;        KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(obj)    SegTermEnum* obj;PPCODE:    Kino_SegTermEnum_destroy(obj);__H__#ifndef H_KINOSEARCH_INDEX_SEG_TERM_ENUM#define H_KINOSEARCH_INDEX_SEG_TERM_ENUM 1#include "EXTERN.h"#include "perl.h"#include "KinoSearchIndexTermBuffer.h"#include "KinoSearchIndexTermInfo.h"#include "KinoSearchStoreInStream.h"#include "KinoSearchUtilByteBuf.h"#include "KinoSearchUtilCarp.h"#include "KinoSearchUtilCClass.h"#include "KinoSearchUtilMemManager.h"#include "KinoSearchUtilStringHelper.h"typedef struct segtermenum {    SV         *finfos;    SV         *instream_sv;    SV         *term_buf_ref;    TermBuffer *term_buf;    TermInfo   *tinfo;    InStream   *instream;    I32         is_index;    I32         enum_size;    I32         position;    I32         index_interval;    I32         skip_interval;    ByteBuf   **termstring_cache;    TermInfo  **tinfos_cache;} SegTermEnum;SegTermEnum* Kino_SegTermEnum_new_helper(SV*, I32, SV*, SV*);void Kino_SegTermEnum_reset(SegTermEnum*);I32  Kino_SegTermEnum_next(SegTermEnum*);void Kino_SegTermEnum_fill_cache(SegTermEnum*);void Kino_SegTermEnum_scan_to(SegTermEnum*, char*, I32);I32  Kino_SegTermEnum_scan_cache(SegTermEnum*, char*, I32);void Kino_SegTermEnum_destroy(SegTermEnum*);#endif /* include guard */__C__#include "KinoSearchIndexSegTermEnum.h"SegTermEnum*Kino_SegTermEnum_new_helper(SV *instream_sv, I32 is_index, SV *finfos_sv,                            SV *term_buffer_sv) {    I32           format;    InStream     *instream;    SegTermEnum  *obj;    /* allocate */    Kino_New(0, obj, 1, SegTermEnum);    obj->tinfo = Kino_TInfo_new();    /* init */    obj->tinfos_cache     = NULL;    obj->termstring_cache = NULL;    /* save instream, finfos, and term_buffer, incrementing refcounts */    obj->instream_sv  = newSVsv(instream_sv);    obj->finfos       = newSVsv(finfos_sv);    obj->term_buf_ref = newSVsv(term_buffer_sv);    Kino_extract_struct(term_buffer_sv, obj->term_buf, TermBuffer*,         "KinoSearch::Index::TermBuffer");    Kino_extract_struct(instream_sv, obj->instream, InStream*,         "KinoSearch::Store::InStream");    instream = obj->instream;    /* determine whether this is a primary or index enum */    obj->is_index = is_index;    /* reject older or newer index formats */    format = (I32)instream->read_int(instream);    if (format != -2)        Kino_confess("Unsupported index format: %d", format);    /* read in some vars */    obj->enum_size      = instream->read_long(instream);    obj->index_interval = instream->read_int(instream);    obj->skip_interval  = instream->read_int(instream);    /* define the position of the Enum as "not yet started" */    obj->position = -1;        return obj;}#define KINO_SEG_TERM_ENUM_HEADER_LEN 20 voidKino_SegTermEnum_reset(SegTermEnum* obj) {    obj->position = -1;    obj->instream->seek(obj->instream, KINO_SEG_TERM_ENUM_HEADER_LEN);    Kino_TermBuf_reset(obj->term_buf);    Kino_TInfo_reset(obj->tinfo);}I32 Kino_SegTermEnum_next(SegTermEnum *obj) {    InStream *instream;    TermInfo *tinfo;    /* make some local copies for clarity of code */    instream = obj->instream;    tinfo    = obj->tinfo;    /* if we've run out of terms, null out the termstring and return */    if (++obj->position >= obj->enum_size) {        Kino_TermBuf_reset(obj->term_buf);        return 0;    }    /* read in the term */    Kino_TermBuf_read(obj->term_buf, instream);    /* read doc freq */    tinfo->doc_freq = instream->read_vint(instream);    /* adjust file pointers. */    tinfo->frq_fileptr += instream->read_vlong(instream);    tinfo->prx_fileptr += instream->read_vlong(instream);    /* read skip data (which doesn't do anything right now) */    if (tinfo->doc_freq >= obj->skip_interval)        tinfo->skip_offset = instream->read_vint(instream);    else        tinfo->skip_offset = 0;    /* read filepointer to main enum if this is an index enum */    if (obj->is_index)        tinfo->index_fileptr += instream->read_vlong(instream);    return 1;}voidKino_SegTermEnum_fill_cache(SegTermEnum* obj) {    TermBuffer  *term_buf;    TermInfo    *tinfo;    TermInfo   **tinfos_cache;    ByteBuf    **termstring_cache;    /* allocate caches */    if (obj->tinfos_cache != NULL)        Kino_confess("Internal error: cache already filled");    Kino_New(0, obj->termstring_cache, obj->enum_size, ByteBuf*);     Kino_New(0, obj->tinfos_cache, obj->enum_size, TermInfo*);    /* make some local copies */    tinfo                = obj->tinfo;    term_buf             = obj->term_buf;    tinfos_cache         = obj->tinfos_cache;    termstring_cache     = obj->termstring_cache;    while (Kino_SegTermEnum_next(obj)) {        /* copy tinfo and termstring into caches */        *tinfos_cache++     = Kino_TInfo_dupe(tinfo);        *termstring_cache++ = Kino_BB_clone(term_buf->termstring);    }}voidKino_SegTermEnum_scan_to(SegTermEnum *obj, char *target_termstring,                          I32 target_termstring_len) {    TermBuffer *term_buf = obj->term_buf;    ByteBuf     target;    /* make convenience copies */    target.ptr  = target_termstring;    target.size = target_termstring_len;    /* keep looping until the termstring is lexically ge target */    do {        const I32 comparison = Kino_BB_compare(term_buf->termstring, &target);        if ( comparison >= 0 &&  obj->position != -1) {            break;        }    } while (Kino_SegTermEnum_next(obj));}I32Kino_SegTermEnum_scan_cache(SegTermEnum *obj, char *target_termstring,                             I32 target_len) {    TermBuffer  *term_buf = obj->term_buf;    ByteBuf    **termstrings = obj->termstring_cache;    ByteBuf      target;    I32          lo       = 0;    I32          hi       = obj->enum_size - 1;    I32          result   = -100;    I32          mid, comparison;    /* make convenience copies */    target.ptr  = target_termstring;    target.size = target_len;    if (obj->tinfos_cache == NULL)        Kino_confess("Internal Error: fill_cache hasn't been called yet");         /* divide and conquer */    while (hi >= lo) {        mid        = (lo + hi) >> 1;        comparison = Kino_BB_compare(&target, termstrings[mid]);        if (comparison < 0)             hi = mid - 1;        else if (comparison > 0)            lo = mid + 1;        else {            result = mid;            break;        }    }    result = hi     == -1   ? 0  /* indicating that target lt first entry */           : result == -100 ? hi /* if result is still -100, it wasn't set */           : result;        /* set the state of the Enum/TermBuffer as if we'd called scan_to */    obj->position  = result;    Kino_TermBuf_set_termstring(term_buf, termstrings[result]->ptr,        termstrings[result]->size);    Kino_TInfo_destroy(obj->tinfo);    obj->tinfo = Kino_TInfo_dupe( obj->tinfos_cache[result] );    return result;}voidKino_SegTermEnum_destroy(SegTermEnum *obj) {    /* put out the garbage for collection */    SvREFCNT_dec(obj->finfos);    SvREFCNT_dec(obj->instream_sv);    SvREFCNT_dec(obj->term_buf_ref);    Kino_TInfo_destroy(obj->tinfo);    /* if fill_cache was called, free all of that... */    if (obj->tinfos_cache != NULL) {        I32         iter;        ByteBuf   **termstring_cache = obj->termstring_cache;        TermInfo  **tinfos_cache     = obj->tinfos_cache;        for (iter = 0; iter < obj->enum_size; iter++) {            Kino_BB_destroy(*termstring_cache++);            Kino_TInfo_destroy(*tinfos_cache++);        }        Kino_Safefree(obj->tinfos_cache);        Kino_Safefree(obj->termstring_cache);    }    /* last, the SegTermEnum object itself */    Kino_Safefree(obj);}__POD__=begin devdocs=head1 NAMEKinoSearch::Index::SegTermEnum - single-segment TermEnum=head1 DESCRIPTIONSingle-segment implementation of KinoSearch::Index::TermEnum.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -