📄 segtermdocs.pm
字号:
package KinoSearch::Index::SegTermDocs;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Index::TermDocs );BEGIN { __PACKAGE__->init_instance_vars( # constructor params reader => undef, );}our %instance_vars;sub new { my $self = shift->SUPER::new; confess kerror() unless verify_args( \%instance_vars, @_ ); my %args = ( %instance_vars, @_ ); my $reader = $args{reader}; _init_child($self); # dupe some stuff from the parent reader. $self->_set_reader( $reader ); $self->_set_skip_interval( $reader->get_skip_interval ); $self->_set_freq_stream( $reader->get_freq_stream()->clone_stream ); $self->_set_skip_stream( $reader->get_freq_stream()->clone_stream ); $self->_set_prox_stream( $reader->get_prox_stream()->clone_stream ); $self->_set_deldocs( $reader->get_deldocs ); return $self;}sub seek { my ( $self, $term ) = @_; my $tinfo = defined $term ? $self->_get_reader()->fetch_term_info($term) : undef; $self->seek_tinfo($tinfo);}sub close { my $self = shift; $self->_get_freq_stream()->close; $self->_get_prox_stream()->close; $self->_get_skip_stream()->close;}1;__END____XS__MODULE = KinoSearch PACKAGE = KinoSearch::Index::SegTermDocsvoid_init_child(term_docs) TermDocs *term_docs;PPCODE: Kino_SegTermDocs_init_child(term_docs);SV*_set_or_get(term_docs, ...) TermDocs *term_docs;ALIAS: _set_count = 1 _get_count = 2 _set_freq_stream = 3 _get_freq_stream = 4 _set_prox_stream = 5 _get_prox_stream = 6 _set_skip_stream = 7 _get_skip_stream = 8 _set_deldocs = 9 _get_deldocs = 10 _set_reader = 11 _get_reader = 12 set_read_positions = 13 get_read_positions = 14 _set_skip_interval = 15 _get_skip_interval = 16CODE:{ SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child; KINO_START_SET_OR_GET_SWITCH case 1: child->count = SvUV(ST(1)); /* fall through */ case 2: RETVAL = newSVuv(child->count); break; case 3: SvREFCNT_dec(child->freq_stream_sv); child->freq_stream_sv = newSVsv( ST(1) ); Kino_extract_struct( child->freq_stream_sv, child->freq_stream, InStream*, "KinoSearch::Store::InStream"); /* fall through */ case 4: RETVAL = newSVsv(child->freq_stream_sv); break; case 5: SvREFCNT_dec(child->prox_stream_sv); child->prox_stream_sv = newSVsv( ST(1) ); Kino_extract_struct( child->prox_stream_sv, child->prox_stream, InStream*, "KinoSearch::Store::InStream"); /* fall through */ case 6: RETVAL = newSVsv(child->prox_stream_sv); break; case 7: SvREFCNT_dec(child->skip_stream_sv); child->skip_stream_sv = newSVsv( ST(1) ); Kino_extract_struct( child->skip_stream_sv, child->skip_stream, InStream*, "KinoSearch::Store::InStream"); /* fall through */ case 8: RETVAL = newSVsv(child->skip_stream_sv); break; case 9: SvREFCNT_dec(child->deldocs_sv); child->deldocs_sv = newSVsv( ST(1) ); Kino_extract_struct( child->deldocs_sv, child->deldocs, BitVector*, "KinoSearch::Index::DelDocs" ); /* fall through */ case 10: RETVAL = newSVsv(child->deldocs_sv); break; case 11: SvREFCNT_dec(child->reader_sv); if (!sv_derived_from( ST(1), "KinoSearch::Index::IndexReader") ) Kino_confess("not a KinoSearch::Index::IndexReader"); child->reader_sv = newSVsv( ST(1) ); /* fall through */ case 12: RETVAL = newSVsv(child->reader_sv); break; case 13: child->read_positions = SvTRUE( ST(1) ) ? 1 : 0; /* fall through */ case 14: RETVAL = newSViv(child->read_positions); break; case 15: child->skip_interval = SvUV(ST(1)); /* fall through */ case 16: RETVAL = newSVuv(child->skip_interval); break; KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVAL__H__#ifndef H_KINO_SEG_TERM_DOCS#define H_KINO_SEG_TERM_DOCS 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchUtilBitVector.h"#include "KinoSearchIndexTermDocs.h"#include "KinoSearchIndexTermInfo.h"#include "KinoSearchStoreInStream.h"#include "KinoSearchUtilMemManager.h"typedef struct segtermdocschild { U32 count; U32 doc_freq; U32 doc; U32 freq; U32 skip_doc; U32 skip_count; U32 num_skips; SV *positions; U32 read_positions; U32 skip_interval; InStream *freq_stream; InStream *prox_stream; InStream *skip_stream; bool have_skipped; double frq_fileptr; double prx_fileptr; double skip_fileptr; BitVector *deldocs; SV *freq_stream_sv; SV *prox_stream_sv; SV *skip_stream_sv; SV *deldocs_sv; SV *reader_sv;} SegTermDocsChild;void Kino_SegTermDocs_init_child(TermDocs*);void Kino_SegTermDocs_set_doc_freq(TermDocs*, U32);U32 Kino_SegTermDocs_get_doc_freq(TermDocs*);U32 Kino_SegTermDocs_get_doc(TermDocs*);U32 Kino_SegTermDocs_get_freq(TermDocs*);SV* Kino_SegTermDocs_get_positions(TermDocs*);U32 Kino_SegTermDocs_bulk_read(TermDocs*, SV*, SV*, U32);void Kino_SegTermDocs_seek_tinfo(TermDocs*, TermInfo*);bool Kino_SegTermDocs_next(TermDocs*);bool Kino_SegTermDocs_skip_to(TermDocs*, U32 target);bool Kino_SegTermDocs_skip_to_with_positions(TermDocs*);void Kino_SegTermDocs_destroy(TermDocs*);#endif /* include guard */__C__#include "KinoSearchIndexSegTermDocs.h"static voidload_positions(TermDocs *term_docs);voidKino_SegTermDocs_init_child(TermDocs *term_docs) { SegTermDocsChild *child; Kino_New(1, child, 1, SegTermDocsChild); term_docs->child = child; child->doc_freq = KINO_TERM_DOCS_SENTINEL; child->doc = KINO_TERM_DOCS_SENTINEL; child->freq = KINO_TERM_DOCS_SENTINEL; /* child->positions starts life as an empty string */ child->positions = newSV(1); SvCUR_set(child->positions, 0); SvPOK_on(child->positions); term_docs->set_doc_freq = Kino_SegTermDocs_set_doc_freq; term_docs->get_doc_freq = Kino_SegTermDocs_get_doc_freq; term_docs->get_doc = Kino_SegTermDocs_get_doc; term_docs->get_freq = Kino_SegTermDocs_get_freq; term_docs->get_positions = Kino_SegTermDocs_get_positions; term_docs->bulk_read = Kino_SegTermDocs_bulk_read; term_docs->seek_tinfo = Kino_SegTermDocs_seek_tinfo; term_docs->next = Kino_SegTermDocs_next; term_docs->skip_to = Kino_SegTermDocs_skip_to; term_docs->destroy = Kino_SegTermDocs_destroy; child->freq_stream_sv = &PL_sv_undef; child->prox_stream_sv = &PL_sv_undef; child->skip_stream_sv = &PL_sv_undef; child->deldocs_sv = &PL_sv_undef; child->reader_sv = &PL_sv_undef; child->count = 0; child->read_positions = 0; /* off by default */}voidKino_SegTermDocs_set_doc_freq(TermDocs *term_docs, U32 doc_freq) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; child->doc_freq = doc_freq;}U32Kino_SegTermDocs_get_doc_freq(TermDocs *term_docs) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; return child->doc_freq;}U32Kino_SegTermDocs_get_doc(TermDocs *term_docs) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; return child->doc;}U32Kino_SegTermDocs_get_freq(TermDocs *term_docs) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; return child->freq;}SV*Kino_SegTermDocs_get_positions(TermDocs *term_docs) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; return child->positions;}U32 Kino_SegTermDocs_bulk_read(TermDocs *term_docs, SV* doc_nums_sv, SV* freqs_sv, U32 num_wanted) { SegTermDocsChild *child; InStream *freq_stream; U32 doc_code; U32 *doc_nums; U32 *freqs; STRLEN len; U32 num_got = 0; /* local copies */ child = (SegTermDocsChild*)term_docs->child; freq_stream = child->freq_stream; /* allocate space in supplied SVs and make them POK, if necessary */ len = num_wanted * sizeof(U32); SvUPGRADE(doc_nums_sv, SVt_PV); SvUPGRADE(freqs_sv, SVt_PV); SvPOK_on(doc_nums_sv); SvPOK_on(freqs_sv); doc_nums = (U32*)SvGROW(doc_nums_sv, len + 1); freqs = (U32*)SvGROW(freqs_sv, len + 1); while (child->count < child->doc_freq && num_got < num_wanted) { /* manually inlined call to term_docs->next */ child->count++; doc_code = freq_stream->read_vint(freq_stream);; child->doc += doc_code >> 1; if (doc_code & 1) child->freq = 1; else child->freq = freq_stream->read_vint(freq_stream); /* if the doc isn't deleted... */ if ( !Kino_BitVec_get(child->deldocs, child->doc) ) { /* ... append to results */ *doc_nums++ = child->doc; *freqs++ = child->freq; num_got++; } } /* set the string end to the end of the U32 array */ SvCUR_set(doc_nums_sv, (num_got * sizeof(U32))); SvCUR_set(freqs_sv, (num_got * sizeof(U32))); return num_got;}boolKino_SegTermDocs_next(TermDocs *term_docs) { SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child; InStream *freq_stream = child->freq_stream; U32 doc_code; while (1) { /* bail if we're out of docs */ if (child->count == child->doc_freq) { return 0; } /* decode delta doc */ doc_code = freq_stream->read_vint(freq_stream); child->doc += doc_code >> 1; /* if the stored num was odd, the freq is 1 */ if (doc_code & 1) { child->freq = 1; } /* otherwise, freq was stored as a VInt. */ else { child->freq = freq_stream->read_vint(freq_stream); } child->count++; /* read positions if desired */ if (child->read_positions) load_positions(term_docs); /* if the doc isn't deleted... success! */ if (!Kino_BitVec_get(child->deldocs, child->doc)) break; } return 1;}static voidload_positions(TermDocs *term_docs) { SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child; InStream *prox_stream = child->prox_stream; STRLEN len = child->freq * sizeof(U32); U32 *positions, *positions_end; U32 position = 0; SvGROW( child->positions, len ); SvCUR_set(child->positions, len); positions = (U32*)SvPVX(child->positions); positions_end = (U32*)SvEND(child->positions); while (positions < positions_end) { position += prox_stream->read_vint(prox_stream); *positions++ = position; }}voidKino_SegTermDocs_seek_tinfo(TermDocs *term_docs, TermInfo *tinfo) { SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; child->count = 0; if (tinfo == NULL) { child->doc_freq = 0; } else { child->doc = 0; child->freq = 0; child->skip_doc = 0; child->skip_count = 0; child->have_skipped = FALSE; child->num_skips = tinfo->doc_freq / child->skip_interval; child->doc_freq = tinfo->doc_freq; child->frq_fileptr = tinfo->frq_fileptr; child->prx_fileptr = tinfo->prx_fileptr; child->skip_fileptr = tinfo->frq_fileptr + tinfo->skip_offset; child->freq_stream->seek( child->freq_stream, tinfo->frq_fileptr ); child->prox_stream->seek( child->prox_stream, tinfo->prx_fileptr ); }}boolKino_SegTermDocs_skip_to(TermDocs *term_docs, U32 target) { SegTermDocsChild *child = (SegTermDocsChild*)term_docs->child; if (child->doc_freq >= child->skip_interval) { InStream *freq_stream = child->freq_stream; InStream *prox_stream = child->prox_stream; InStream *skip_stream = child->skip_stream; U32 last_skip_doc = child->skip_doc; double last_frq_fileptr = freq_stream->tell(freq_stream); double last_prx_fileptr = -1; I32 num_skipped = -1 - (child->count % child->skip_interval); if (!child->have_skipped) { child->skip_stream->seek(child->skip_stream, child->skip_fileptr); child->have_skipped = TRUE; } while (target > child->skip_doc) { last_skip_doc = child->skip_doc; last_frq_fileptr = child->frq_fileptr; last_prx_fileptr = child->prx_fileptr; if (child->skip_doc != 0 && child->skip_doc >= child->doc) { num_skipped += child->skip_interval; } if (child->skip_count >= child->num_skips) { break; } child->skip_doc += skip_stream->read_vint(skip_stream); child->frq_fileptr += skip_stream->read_vint(skip_stream); child->prx_fileptr += skip_stream->read_vint(skip_stream); child->skip_count++; } /* if there's something to skip, skip it */ if (last_frq_fileptr > freq_stream->tell(freq_stream)) { freq_stream->seek(freq_stream, last_frq_fileptr); if (child->read_positions) { prox_stream->seek(prox_stream, last_prx_fileptr); } child->doc = last_skip_doc; child->count += num_skipped; } } /* done skipping, so scan */ do { if (!term_docs->next(term_docs)) { return FALSE; } } while (target > child->doc); return TRUE;}void Kino_SegTermDocs_destroy(TermDocs *term_docs){ SegTermDocsChild *child; child = (SegTermDocsChild*)term_docs->child; SvREFCNT_dec(child->positions); SvREFCNT_dec(child->freq_stream_sv); SvREFCNT_dec(child->prox_stream_sv); SvREFCNT_dec(child->skip_stream_sv); SvREFCNT_dec(child->deldocs_sv); SvREFCNT_dec(child->reader_sv); Kino_Safefree(child); Kino_TermDocs_destroy(term_docs);}__POD__=begin devdocs=head1 NAMEKinoSearch::Index::SegTermDocs - single-segment TermDocs=head1 DESCRIPTIONSingle-segment implemetation of KinoSearch::Index::TermDocs.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -