⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 deldocs.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Index::DelDocs;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::BitVector );use KinoSearch::Util::IntMap;# instance vars:my %num_deletions;sub new {    my $self = shift->SUPER::new;    $num_deletions{"$self"} = 0;    return $self;}# Read a deletions file if one exists.sub read_deldocs {    my ( $self, $invindex, $filename ) = @_;    # load the file into memory if it's there    if ( $invindex->file_exists($filename) ) {        my $instream = $invindex->open_instream($filename);        my $byte_size;        ( $byte_size, $num_deletions{"$self"} ) = $instream->lu_read('ii');        $self->set_bits( $instream->lu_read("a$byte_size") );        $instream->close;    }}# Blast out a hard copy of the deletions held in memory.sub write_deldocs {    my ( $self, $invindex, $filename, $max_doc ) = @_;    if ( $invindex->file_exists($filename) ) {        $invindex->delete_file($filename);    }    my $outstream = $invindex->open_outstream($filename);    # pad out deldocs->bits    $self->set_capacity($max_doc);    # write header followed by deletions data    my $byte_size = ceil( $max_doc / 8 );    $outstream->lu_write(        "iia$byte_size",         $byte_size,        $num_deletions{"$self"}, $self->get_bits,    );    $outstream->close;}# Mark a doc as deleted.sub set {    my ( $self, $doc_num ) = @_;    # ... only if it isn't already deleted    if ( !$self->get($doc_num) ) {        $self->SUPER::set($doc_num);        $num_deletions{"$self"}++;    }}# Delete all the docs represented by a TermDocs object.sub delete_by_term_docs {    my ( $self, $term_docs ) = @_;    $num_deletions{"$self"} += _delete_by_term_docs( $self, $term_docs );}# Undelete a doc.sub clear {    my ( $self, $doc_num ) = @_;    # ... only if it was deleted before    if ( $self->get($doc_num) ) {        $self->SUPER::clear($doc_num);        $num_deletions{"$self"}--;    }}sub get_num_deletions { $num_deletions{"$_[0]"} }# Map around deleted documents.sub generate_doc_map {    my ( $self, $max, $offset ) = @_;    my $map = $self->_generate_doc_map( $max, $offset );    return KinoSearch::Util::IntMap->new($map);}# If these get implemented, we'll need to write a range_count(first, last)# method for BitVector.sub bulk_set   { shift->todo_death }sub bulk_clear { shift->todo_death }sub close { }sub DESTROY {    my $self = shift;    delete $num_deletions{"$self"};    $self->SUPER::DESTROY;}1;__END____XS__MODULE = KinoSearch PACKAGE = KinoSearch::Index::DelDocsSV* _generate_doc_map(deldocs, max, offset);    BitVector *deldocs;    I32        max;    I32        offset;PREINIT:    SV *map_sv;CODE:    map_sv = Kino_DelDocs_generate_doc_map(deldocs, max, offset);    RETVAL = newRV_noinc(map_sv);OUTPUT: RETVALI32_delete_by_term_docs(deldocs, term_docs)    BitVector *deldocs;    TermDocs  *term_docs;CODE:    RETVAL = Kino_DelDocs_delete_by_term_docs(deldocs, term_docs);OUTPUT: RETVAL__H__#ifndef H_KINOSEARCH_DELDOCS#define H_KINOSEARCH_DELDOCS 1#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchIndexTermDocs.h"#include "KinoSearchUtilBitVector.h"SV* Kino_DelDocs_generate_doc_map(BitVector*, I32, I32);I32 Kino_DelDocs_delete_by_term_docs(BitVector*, TermDocs*);#endif /* include guard */__C__#include "KinoSearchIndexDelDocs.h"SV*Kino_DelDocs_generate_doc_map(BitVector *deldocs, I32 max, I32 offset) {    SV   *doc_map_sv;    I32  *doc_map;    I32   new_doc_num;    int   i;    /* allocate space for the doc map */    doc_map_sv = newSV(max * sizeof(I32) + 1);    SvCUR_set(doc_map_sv, max * sizeof(I32));    SvPOK_on(doc_map_sv);    doc_map = (I32*)SvPVX(doc_map_sv);    /* -1 for a deleted doc, a new number otherwise */    new_doc_num = 0;    for (i = 0; i < max; i++) {        if (Kino_BitVec_get(deldocs, i))            *doc_map++ = -1;        else            *doc_map++ = offset + new_doc_num++;    }        return doc_map_sv;}I32  Kino_DelDocs_delete_by_term_docs(BitVector* deldocs, TermDocs* term_docs) {    I32 doc;    I32 num_deleted = 0;    /* iterate through term docs, marking each doc returned as deleted */    while (term_docs->next(term_docs)) {        doc = term_docs->get_doc(term_docs);        if (Kino_BitVec_get(deldocs, doc))            continue;        Kino_BitVec_set(deldocs, doc);        num_deleted++;    }    return num_deleted;}__POD__=begin devdocs=head1 NAMEKinoSearch::Index::DelDocs - manage documents deleted from an invindex=head1 DESCRIPTIONDelDocs provides the low-level mechanisms for declaring a document deletedfrom a segment, and for finding out whether or not a particular document hasbeen deleted.Note that documents are not actually gone from the invindex until the segmentgets rewritten.=head1 TODOConsider ways to synchronize instances of this class so that there will beexactly one instance per segment.  That way, if an InvIndexer uses the instanceto delete a document, readers would have the modified vecstring availableright away without having to reread the .del file.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -