⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bitvector.pm

📁 外国人写的Perl搜索引擎程序
💻 PM
字号:
package KinoSearch::Util::BitVector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::CClass );BEGIN {    __PACKAGE__->init_instance_vars(        # constructor params        capacity => 0,    );}1;__END____XS__MODULE = KinoSearch    PACKAGE = KinoSearch::Util::BitVectorvoidnew(either_sv, ...)    SV        *either_sv;PREINIT:    char      *class;    HV        *args_hash;    U32        capacity;    BitVector *bit_vec;PPCODE:    /* determine the class */    class = sv_isobject(either_sv)         ? sv_reftype(either_sv, 0)         : SvPV_nolen(either_sv);    /* process hash-style params */    Kino_Verify_build_args_hash(args_hash,         "KinoSearch::Util::BitVector::instance_vars", 1);    capacity = (U32)SvUV( Kino_Verify_extract_arg(args_hash, "capacity", 8) );    /* build object */    bit_vec = Kino_BitVec_new(capacity);    ST(0)   = sv_newmortal();    sv_setref_pv(ST(0), class, (void*)bit_vec);    XSRETURN(1);=for commentReturn true if the bit indcated by $num has been set, false if it hasn't(regardless of whether $num lies within the bounds of the object's capacity).=cutboolget(bit_vec, num)    BitVector *bit_vec;    U32        num;CODE:    RETVAL = Kino_BitVec_get(bit_vec, num);OUTPUT: RETVAL=for commentSet the bit at $num to 1.=cutvoidset(bit_vec, ...)    BitVector *bit_vec;PREINIT:    U32 i, num;PPCODE:    for (i = 1; i < items; i++) {        num = (U32)( SvUV( ST(i) ) );        Kino_BitVec_set(bit_vec, num);    }=for commentClear the bit at $num (i.e. set it to 0).=cutvoidclear(bit_vec, num)    BitVector *bit_vec;    U32        num;PPCODE:    Kino_BitVec_clear(bit_vec, num);=for commentSet all the bits bounded by $first and $last, inclusive, to 1.=cutvoidbulk_set(bit_vec, first, last)    BitVector *bit_vec;    U32        first;    U32        last;PPCODE:    Kino_BitVec_bulk_set(bit_vec, first, last);    =for commentClear all the bits bounded by $first and $last, inclusive.=cutvoidbulk_clear(bit_vec, first, last)    BitVector *bit_vec;    U32        first;    U32        last;PPCODE:    Kino_BitVec_bulk_clear(bit_vec, first, last);=for commentGiven $num, return either $num (if it is set), the next set bit above it, orif no such bit exists, undef (from Perl) or a sentinel (0xFFFFFFFF) from C.=cut    SV*next_set_bit(bit_vec, num)    BitVector *bit_vec;    U32        num;CODE:    num    = Kino_BitVec_next_set_bit(bit_vec, num);    RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);OUTPUT: RETVAL=for commentGiven $num, return $num (if it is clear), or the next clear bit above it.The highest number that next_clear_bit can return is the object's capacity.=cutSV*next_clear_bit(bit_vec, num)    BitVector *bit_vec;    U32        num;CODE:    num = Kino_BitVec_next_clear_bit(bit_vec, num);    RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);OUTPUT: RETVAL=for commentModify the BitVector so that only bits which remain set are those which 1)were already set in this BitVector, and 2) were also set in the otherBitVector.=cutvoidlogical_and(bit_vec, other)    BitVector *bit_vec;    BitVector *other;PPCODE:    Kino_BitVec_logical_and(bit_vec, other);=for commentReturn a count of the number of set bits in the BitVector.=cutU32count(bit_vec)    BitVector *bit_vec;CODE:    RETVAL = Kino_BitVec_count(bit_vec);OUTPUT: RETVAL=for commentReturn an arrayref of the with each element the number of a set bit.=cutvoidto_arrayref(bit_vec)    BitVector *bit_vec;PREINIT:    AV *out_av;PPCODE:    out_av = Kino_BitVec_to_array(bit_vec);    XPUSHs( sv_2mortal(newRV_noinc( (SV*)out_av )) );    XSRETURN(1);    =for commentSetters and getters.  A quirk: set_bits automatically adjusts capacityupwards to the appropriate multiple of 8 if necessary.=cutSV* _set_or_get(bit_vec, ...)    BitVector *bit_vec;ALIAS:    set_capacity = 1    get_capacity = 2    set_bits     = 3    get_bits     = 4PREINIT:    STRLEN  len;    U32     new_capacity;    char   *new_bits;CODE:{    KINO_START_SET_OR_GET_SWITCH    case 1:  new_capacity = SvUV(ST(1));             if (new_capacity < bit_vec->capacity) {                 Kino_BitVec_shrink(bit_vec, new_capacity);             }             else if (new_capacity > bit_vec->capacity) {                 Kino_BitVec_grow(bit_vec, new_capacity);             }             /* fall through */    case 2:  RETVAL = newSVuv(bit_vec->capacity);             break;    case 3:  Kino_Safefree(bit_vec->bits);             new_bits          = SvPV(ST(1), len);             bit_vec->bits     = (unsigned char*)Kino_savepvn(new_bits, len);             bit_vec->capacity = len << 3;             /* fall through */    case 4:  len = ceil(bit_vec->capacity / 8.0);             RETVAL = newSVpv((char*)bit_vec->bits, len);             break;    KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(bit_vec)    BitVector *bit_vec;PPCODE:    Kino_BitVec_destroy(bit_vec);__H__#ifndef H_KINO_BIT_VECTOR#define H_KINO_BIT_VECTOR 1#include "limits.h"#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchUtilMathUtils.h"#include "KinoSearchUtilCarp.h"#include "KinoSearchUtilMemManager.h"#define KINO_BITVEC_SENTINEL 0xFFFFFFFFtypedef struct bitvector {    U32            capacity;    unsigned char *bits;} BitVector;BitVector* Kino_BitVec_new(U32);BitVector* Kino_BitVec_clone(BitVector*);void Kino_BitVec_grow(BitVector*, U32);void Kino_BitVec_shrink(BitVector *, U32);void Kino_BitVec_set(BitVector*, U32);void Kino_BitVec_clear(BitVector*, U32);void Kino_BitVec_bulk_set(BitVector*, U32, U32);void Kino_BitVec_bulk_clear(BitVector*, U32, U32);bool Kino_BitVec_get(BitVector*, U32);U32  Kino_BitVec_next_set_bit(BitVector*, U32);U32  Kino_BitVec_next_clear_bit(BitVector*, U32);void Kino_BitVec_logical_and(BitVector*, BitVector*);U32  Kino_BitVec_count(BitVector*);AV*  Kino_BitVec_to_array(BitVector*);void Kino_BitVec_destroy(BitVector*);#endif /* include guard */__C__#include "KinoSearchUtilBitVector.h"static unsigned char bitmasks[] = {     0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,};BitVector*Kino_BitVec_new(U32 capacity) {    BitVector *bit_vec;    Kino_New(0, bit_vec, 1, BitVector);    bit_vec->capacity = 0;    bit_vec->bits = NULL;    Kino_BitVec_grow(bit_vec, capacity);    return bit_vec;}BitVector*Kino_BitVec_clone(BitVector *bit_vec) {    BitVector *evil_twin;    U32 byte_size;    Kino_New(0, evil_twin, 1, BitVector);    byte_size = ceil(bit_vec->capacity / 8.0);    evil_twin->bits         = (unsigned char*)Kino_savepvn((char*)bit_vec->bits, byte_size);    evil_twin->capacity = bit_vec->capacity;    return evil_twin;}voidKino_BitVec_grow(BitVector *bit_vec, U32 capacity) {    U32 byte_size;    U32 old_capacity;    /* derive size in bytes from size in bits */    byte_size = ceil(capacity / 8.0);    if (capacity > bit_vec->capacity && bit_vec->bits != NULL) {        U32 old_byte_size;        old_byte_size = ceil(bit_vec->capacity / 8.0);        Kino_Renew(bit_vec->bits, byte_size, unsigned char);        /* zero out all new bits, since Renew doesn't guarantee they're 0 */        old_capacity      = bit_vec->capacity;        bit_vec->capacity = capacity;        Kino_BitVec_bulk_clear(bit_vec, old_capacity, capacity - 1);        /* shouldn't be necessary, but Valgrind reports an error without it */        if (byte_size > old_byte_size) {            memset( (bit_vec->bits + old_byte_size), 0x00,                 (byte_size - old_byte_size) );        }    }    else if (bit_vec->bits == NULL) {        Kino_Newz(0, bit_vec->bits, byte_size, unsigned char);        bit_vec->capacity = capacity;    }}void Kino_BitVec_shrink(BitVector *bit_vec, U32 capacity) {    U32 byte_size;        if (capacity >= bit_vec->capacity)        return;    /* derive size in bytes from size in bits */    byte_size = ceil(capacity / 8.0);    Kino_Renew(bit_vec->bits, byte_size, unsigned char);    bit_vec->capacity = capacity;}void Kino_BitVec_set(BitVector *bit_vec, U32 num) {    if (num >= bit_vec->capacity)        Kino_BitVec_grow(bit_vec, num + 1);    bit_vec->bits[ (num >> 3) ]  |= bitmasks[num & 0x7];}void Kino_BitVec_clear(BitVector *bit_vec, U32 num) {    if (num >= bit_vec->capacity)        Kino_BitVec_grow(bit_vec, num + 1);    bit_vec->bits[ (num >> 3) ] &= ~(bitmasks[num & 0x7]);}voidKino_BitVec_bulk_set(BitVector *bit_vec, U32 first, U32 last) {    unsigned char *ptr;    U32   num_bytes;    /* detect range errors */    if (first > last) {        Kino_confess("bitvec range error: %d %d %d", first, last,             bit_vec->capacity);    }    /* grow the bits if necessary */    if (last >= bit_vec->capacity) {        Kino_BitVec_grow(bit_vec, last);    }    /* set partial bytes */    while (first % 8 != 0 && first <= last) {        Kino_BitVec_set(bit_vec, first++);    }    while (last % 8 != 0 && last >= first) {        Kino_BitVec_set(bit_vec, last--);    }    Kino_BitVec_set(bit_vec, last);    /* mass set whole bytes */    if (last > first) {        ptr = bit_vec->bits + (first >> 3);        num_bytes = (last - first) >> 3;        memset(ptr, 0xff, num_bytes);    }}voidKino_BitVec_bulk_clear(BitVector *bit_vec, U32 first, U32 last) {    unsigned char *ptr;    U32   num_bytes;    /* detect range errors */    if (first > last) {        Kino_confess("bitvec range error: %d %d %d", first, last,             bit_vec->capacity);    }    /* grow the bits if necessary */    if (last >= bit_vec->capacity) {        Kino_BitVec_grow(bit_vec, last);    }    /* clear partial bytes */    while (first % 8 != 0 && first <= last) {        Kino_BitVec_clear(bit_vec, first++);    }    while (last % 8 != 0 && last >= first) {        Kino_BitVec_clear(bit_vec, last--);    }    Kino_BitVec_clear(bit_vec, last);    /* mass clear whole bytes */    if (last > first) {        ptr       = bit_vec->bits + (first >> 3);        num_bytes = (last - first) >> 3;        memset(ptr, 0, num_bytes);    }}boolKino_BitVec_get(BitVector *bit_vec, U32 num) {    if (num >= bit_vec->capacity) return 0;    return (bit_vec->bits[ (num >> 3) ] & bitmasks[num & 0x7]) != 0;}U32Kino_BitVec_next_set_bit(BitVector *bit_vec, U32 num) {    U32   outval;    unsigned char *bits_ptr;    unsigned char *end_ptr;    int i;    U32 byte_size;    if (num >= bit_vec->capacity) {        return KINO_BITVEC_SENTINEL;    }    outval = KINO_BITVEC_SENTINEL;    bits_ptr  = bit_vec->bits + (num >> 3) ;    byte_size = ceil(bit_vec->capacity / 8.0);    end_ptr   = bit_vec->bits + byte_size;    while (outval == KINO_BITVEC_SENTINEL) {        if (*bits_ptr != 0) {            /* check each num in represented in this byte */            outval = (bits_ptr - bit_vec->bits) * 8;            for (i = 0; i < 8; i++) {                if (Kino_BitVec_get(bit_vec, outval) == 1) {                    if (outval < bit_vec->capacity && outval >= num) {                        return outval;                    }                }                outval++;            }            /* nothing valid, so reset the sentinel */            outval = KINO_BITVEC_SENTINEL;        }        if (++bits_ptr >= end_ptr)            break;    }    /* nothing valid, so return a sentinel */    return KINO_BITVEC_SENTINEL;}U32Kino_BitVec_next_clear_bit(BitVector *bit_vec, U32 num) {    U32   outval;    unsigned char *bits_ptr;    unsigned char *end_ptr;    int i;    if (num >= bit_vec->capacity) {        return num;    }    outval = KINO_BITVEC_SENTINEL;    bits_ptr = bit_vec->bits + (num >> 3) ;    end_ptr  = bit_vec->bits + (bit_vec->capacity >> 3);    while (outval == KINO_BITVEC_SENTINEL) {        if (*bits_ptr != 0xFF) {            /* check each num in represented in this byte */            outval = (bits_ptr - bit_vec->bits) * 8;            for (i = 0; i < 8; i++) {                if (Kino_BitVec_get(bit_vec, outval) == 0) {                    if (outval < bit_vec->capacity && outval >= num) {                        return outval;                    }                }                outval++;            }            /* nothing valid, so reset the sentinel */            outval = KINO_BITVEC_SENTINEL;        }        if (++bits_ptr >= end_ptr)            break;    }    /* didn't find clear bits in the set, so return 1 larger than the max */    return bit_vec->capacity;}voidKino_BitVec_logical_and(BitVector *bit_vec, BitVector *other) {    U32 num = 0;    while (1) {          num = Kino_BitVec_next_set_bit(bit_vec, num);        if (num == KINO_BITVEC_SENTINEL)            break;        if ( !Kino_BitVec_get(other, num) )             Kino_BitVec_clear(bit_vec, num);        num++;    }}const U32 BYTE_COUNTS[256] = {    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};U32 Kino_BitVec_count(BitVector *bit_vec) {    U32 count = 0;    U32 byte_size = ceil(bit_vec->capacity / 8.0);    unsigned char *ptr = bit_vec->bits;    unsigned char *limit = ptr + byte_size;    for( ; ptr < limit; ptr++) {        count += BYTE_COUNTS[*ptr];    }    return count;}AV*  Kino_BitVec_to_array(BitVector* bit_vec) {    U32  num = 0;    AV  *out_av;    out_av = newAV();    while (1) {          num = Kino_BitVec_next_set_bit(bit_vec, num);        if (num == KINO_BITVEC_SENTINEL)            break;        av_push( out_av, newSViv(num) );        num++;    }    return out_av;}voidKino_BitVec_destroy(BitVector* bit_vec) {    Kino_Safefree(bit_vec->bits);    Kino_Safefree(bit_vec);}__POD__=begin devdocs=head1 NAMEKinoSearch::Util::BitVector - a set of bits=head1 DESCRIPTIONA vector of bits, which grows as needed.  The implementation is designed toresemble both org.apache.lucene.util.BitVector and java.util.BitSet.  Accessible from both C and Perl.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -