📄 segtermenum.pm
字号:
case 17: Kino_confess("can't set is_index"); /* fall through */ case 18: RETVAL = newSViv(obj->is_index); break; KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(obj) SegTermEnum* obj;PPCODE: Kino_SegTermEnum_destroy(obj);__H__#ifndef H_KINOSEARCH_INDEX_SEG_TERM_ENUM#define H_KINOSEARCH_INDEX_SEG_TERM_ENUM 1#include "EXTERN.h"#include "perl.h"#include "KinoSearchIndexTermBuffer.h"#include "KinoSearchIndexTermInfo.h"#include "KinoSearchStoreInStream.h"#include "KinoSearchUtilByteBuf.h"#include "KinoSearchUtilCarp.h"#include "KinoSearchUtilCClass.h"#include "KinoSearchUtilMemManager.h"#include "KinoSearchUtilStringHelper.h"typedef struct segtermenum { SV *finfos; SV *instream_sv; SV *term_buf_ref; TermBuffer *term_buf; TermInfo *tinfo; InStream *instream; I32 is_index; I32 enum_size; I32 position; I32 index_interval; I32 skip_interval; ByteBuf **termstring_cache; TermInfo **tinfos_cache;} SegTermEnum;SegTermEnum* Kino_SegTermEnum_new_helper(SV*, I32, SV*, SV*);void Kino_SegTermEnum_reset(SegTermEnum*);I32 Kino_SegTermEnum_next(SegTermEnum*);void Kino_SegTermEnum_fill_cache(SegTermEnum*);void Kino_SegTermEnum_scan_to(SegTermEnum*, char*, I32);I32 Kino_SegTermEnum_scan_cache(SegTermEnum*, char*, I32);void Kino_SegTermEnum_destroy(SegTermEnum*);#endif /* include guard */__C__#include "KinoSearchIndexSegTermEnum.h"SegTermEnum*Kino_SegTermEnum_new_helper(SV *instream_sv, I32 is_index, SV *finfos_sv, SV *term_buffer_sv) { I32 format; InStream *instream; SegTermEnum *obj; /* allocate */ Kino_New(0, obj, 1, SegTermEnum); obj->tinfo = Kino_TInfo_new(); /* init */ obj->tinfos_cache = NULL; obj->termstring_cache = NULL; /* save instream, finfos, and term_buffer, incrementing refcounts */ obj->instream_sv = newSVsv(instream_sv); obj->finfos = newSVsv(finfos_sv); obj->term_buf_ref = newSVsv(term_buffer_sv); Kino_extract_struct(term_buffer_sv, obj->term_buf, TermBuffer*, "KinoSearch::Index::TermBuffer"); Kino_extract_struct(instream_sv, obj->instream, InStream*, "KinoSearch::Store::InStream"); instream = obj->instream; /* determine whether this is a primary or index enum */ obj->is_index = is_index; /* reject older or newer index formats */ format = (I32)instream->read_int(instream); if (format != -2) Kino_confess("Unsupported index format: %d", format); /* read in some vars */ obj->enum_size = instream->read_long(instream); obj->index_interval = instream->read_int(instream); obj->skip_interval = instream->read_int(instream); /* define the position of the Enum as "not yet started" */ obj->position = -1; return obj;}#define KINO_SEG_TERM_ENUM_HEADER_LEN 20 voidKino_SegTermEnum_reset(SegTermEnum* obj) { obj->position = -1; obj->instream->seek(obj->instream, KINO_SEG_TERM_ENUM_HEADER_LEN); Kino_TermBuf_reset(obj->term_buf); Kino_TInfo_reset(obj->tinfo);}I32 Kino_SegTermEnum_next(SegTermEnum *obj) { InStream *instream; TermInfo *tinfo; /* make some local copies for clarity of code */ instream = obj->instream; tinfo = obj->tinfo; /* if we've run out of terms, null out the termstring and return */ if (++obj->position >= obj->enum_size) { Kino_TermBuf_reset(obj->term_buf); return 0; } /* read in the term */ Kino_TermBuf_read(obj->term_buf, instream); /* read doc freq */ tinfo->doc_freq = instream->read_vint(instream); /* adjust file pointers. */ tinfo->frq_fileptr += instream->read_vlong(instream); tinfo->prx_fileptr += instream->read_vlong(instream); /* read skip data (which doesn't do anything right now) */ if (tinfo->doc_freq >= obj->skip_interval) tinfo->skip_offset = instream->read_vint(instream); else tinfo->skip_offset = 0; /* read filepointer to main enum if this is an index enum */ if (obj->is_index) tinfo->index_fileptr += instream->read_vlong(instream); return 1;}voidKino_SegTermEnum_fill_cache(SegTermEnum* obj) { TermBuffer *term_buf; TermInfo *tinfo; TermInfo **tinfos_cache; ByteBuf **termstring_cache; /* allocate caches */ if (obj->tinfos_cache != NULL) Kino_confess("Internal error: cache already filled"); Kino_New(0, obj->termstring_cache, obj->enum_size, ByteBuf*); Kino_New(0, obj->tinfos_cache, obj->enum_size, TermInfo*); /* make some local copies */ tinfo = obj->tinfo; term_buf = obj->term_buf; tinfos_cache = obj->tinfos_cache; termstring_cache = obj->termstring_cache; while (Kino_SegTermEnum_next(obj)) { /* copy tinfo and termstring into caches */ *tinfos_cache++ = Kino_TInfo_dupe(tinfo); *termstring_cache++ = Kino_BB_clone(term_buf->termstring); }}voidKino_SegTermEnum_scan_to(SegTermEnum *obj, char *target_termstring, I32 target_termstring_len) { TermBuffer *term_buf = obj->term_buf; ByteBuf target; /* make convenience copies */ target.ptr = target_termstring; target.size = target_termstring_len; /* keep looping until the termstring is lexically ge target */ do { const I32 comparison = Kino_BB_compare(term_buf->termstring, &target); if ( comparison >= 0 && obj->position != -1) { break; } } while (Kino_SegTermEnum_next(obj));}I32Kino_SegTermEnum_scan_cache(SegTermEnum *obj, char *target_termstring, I32 target_len) { TermBuffer *term_buf = obj->term_buf; ByteBuf **termstrings = obj->termstring_cache; ByteBuf target; I32 lo = 0; I32 hi = obj->enum_size - 1; I32 result = -100; I32 mid, comparison; /* make convenience copies */ target.ptr = target_termstring; target.size = target_len; if (obj->tinfos_cache == NULL) Kino_confess("Internal Error: fill_cache hasn't been called yet"); /* divide and conquer */ while (hi >= lo) { mid = (lo + hi) >> 1; comparison = Kino_BB_compare(&target, termstrings[mid]); if (comparison < 0) hi = mid - 1; else if (comparison > 0) lo = mid + 1; else { result = mid; break; } } result = hi == -1 ? 0 /* indicating that target lt first entry */ : result == -100 ? hi /* if result is still -100, it wasn't set */ : result; /* set the state of the Enum/TermBuffer as if we'd called scan_to */ obj->position = result; Kino_TermBuf_set_termstring(term_buf, termstrings[result]->ptr, termstrings[result]->size); Kino_TInfo_destroy(obj->tinfo); obj->tinfo = Kino_TInfo_dupe( obj->tinfos_cache[result] ); return result;}voidKino_SegTermEnum_destroy(SegTermEnum *obj) { /* put out the garbage for collection */ SvREFCNT_dec(obj->finfos); SvREFCNT_dec(obj->instream_sv); SvREFCNT_dec(obj->term_buf_ref); Kino_TInfo_destroy(obj->tinfo); /* if fill_cache was called, free all of that... */ if (obj->tinfos_cache != NULL) { I32 iter; ByteBuf **termstring_cache = obj->termstring_cache; TermInfo **tinfos_cache = obj->tinfos_cache; for (iter = 0; iter < obj->enum_size; iter++) { Kino_BB_destroy(*termstring_cache++); Kino_TInfo_destroy(*tinfos_cache++); } Kino_Safefree(obj->tinfos_cache); Kino_Safefree(obj->termstring_cache); } /* last, the SegTermEnum object itself */ Kino_Safefree(obj);}__POD__=begin devdocs=head1 NAMEKinoSearch::Index::SegTermEnum - single-segment TermEnum=head1 DESCRIPTIONSingle-segment implementation of KinoSearch::Index::TermEnum.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -