📄 bitvector.pm
字号:
package KinoSearch::Util::BitVector;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Util::CClass );BEGIN { __PACKAGE__->init_instance_vars( # constructor params capacity => 0, );}1;__END____XS__MODULE = KinoSearch PACKAGE = KinoSearch::Util::BitVectorvoidnew(either_sv, ...) SV *either_sv;PREINIT: char *class; HV *args_hash; U32 capacity; BitVector *bit_vec;PPCODE: /* determine the class */ class = sv_isobject(either_sv) ? sv_reftype(either_sv, 0) : SvPV_nolen(either_sv); /* process hash-style params */ Kino_Verify_build_args_hash(args_hash, "KinoSearch::Util::BitVector::instance_vars", 1); capacity = (U32)SvUV( Kino_Verify_extract_arg(args_hash, "capacity", 8) ); /* build object */ bit_vec = Kino_BitVec_new(capacity); ST(0) = sv_newmortal(); sv_setref_pv(ST(0), class, (void*)bit_vec); XSRETURN(1);=for commentReturn true if the bit indcated by $num has been set, false if it hasn't(regardless of whether $num lies within the bounds of the object's capacity).=cutboolget(bit_vec, num) BitVector *bit_vec; U32 num;CODE: RETVAL = Kino_BitVec_get(bit_vec, num);OUTPUT: RETVAL=for commentSet the bit at $num to 1.=cutvoidset(bit_vec, ...) BitVector *bit_vec;PREINIT: U32 i, num;PPCODE: for (i = 1; i < items; i++) { num = (U32)( SvUV( ST(i) ) ); Kino_BitVec_set(bit_vec, num); }=for commentClear the bit at $num (i.e. set it to 0).=cutvoidclear(bit_vec, num) BitVector *bit_vec; U32 num;PPCODE: Kino_BitVec_clear(bit_vec, num);=for commentSet all the bits bounded by $first and $last, inclusive, to 1.=cutvoidbulk_set(bit_vec, first, last) BitVector *bit_vec; U32 first; U32 last;PPCODE: Kino_BitVec_bulk_set(bit_vec, first, last); =for commentClear all the bits bounded by $first and $last, inclusive.=cutvoidbulk_clear(bit_vec, first, last) BitVector *bit_vec; U32 first; U32 last;PPCODE: Kino_BitVec_bulk_clear(bit_vec, first, last);=for commentGiven $num, return either $num (if it is set), the next set bit above it, orif no such bit exists, undef (from Perl) or a sentinel (0xFFFFFFFF) from C.=cut SV*next_set_bit(bit_vec, num) BitVector *bit_vec; U32 num;CODE: num = Kino_BitVec_next_set_bit(bit_vec, num); RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);OUTPUT: RETVAL=for commentGiven $num, return $num (if it is clear), or the next clear bit above it.The highest number that next_clear_bit can return is the object's capacity.=cutSV*next_clear_bit(bit_vec, num) BitVector *bit_vec; U32 num;CODE: num = Kino_BitVec_next_clear_bit(bit_vec, num); RETVAL = num == KINO_BITVEC_SENTINEL ? &PL_sv_undef : newSVuv(num);OUTPUT: RETVAL=for commentModify the BitVector so that only bits which remain set are those which 1)were already set in this BitVector, and 2) were also set in the otherBitVector.=cutvoidlogical_and(bit_vec, other) BitVector *bit_vec; BitVector *other;PPCODE: Kino_BitVec_logical_and(bit_vec, other);=for commentReturn a count of the number of set bits in the BitVector.=cutU32count(bit_vec) BitVector *bit_vec;CODE: RETVAL = Kino_BitVec_count(bit_vec);OUTPUT: RETVAL=for commentReturn an arrayref of the with each element the number of a set bit.=cutvoidto_arrayref(bit_vec) BitVector *bit_vec;PREINIT: AV *out_av;PPCODE: out_av = Kino_BitVec_to_array(bit_vec); XPUSHs( sv_2mortal(newRV_noinc( (SV*)out_av )) ); XSRETURN(1); =for commentSetters and getters. A quirk: set_bits automatically adjusts capacityupwards to the appropriate multiple of 8 if necessary.=cutSV* _set_or_get(bit_vec, ...) BitVector *bit_vec;ALIAS: set_capacity = 1 get_capacity = 2 set_bits = 3 get_bits = 4PREINIT: STRLEN len; U32 new_capacity; char *new_bits;CODE:{ KINO_START_SET_OR_GET_SWITCH case 1: new_capacity = SvUV(ST(1)); if (new_capacity < bit_vec->capacity) { Kino_BitVec_shrink(bit_vec, new_capacity); } else if (new_capacity > bit_vec->capacity) { Kino_BitVec_grow(bit_vec, new_capacity); } /* fall through */ case 2: RETVAL = newSVuv(bit_vec->capacity); break; case 3: Kino_Safefree(bit_vec->bits); new_bits = SvPV(ST(1), len); bit_vec->bits = (unsigned char*)Kino_savepvn(new_bits, len); bit_vec->capacity = len << 3; /* fall through */ case 4: len = ceil(bit_vec->capacity / 8.0); RETVAL = newSVpv((char*)bit_vec->bits, len); break; KINO_END_SET_OR_GET_SWITCH}OUTPUT: RETVALvoidDESTROY(bit_vec) BitVector *bit_vec;PPCODE: Kino_BitVec_destroy(bit_vec);__H__#ifndef H_KINO_BIT_VECTOR#define H_KINO_BIT_VECTOR 1#include "limits.h"#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#include "KinoSearchUtilMathUtils.h"#include "KinoSearchUtilCarp.h"#include "KinoSearchUtilMemManager.h"#define KINO_BITVEC_SENTINEL 0xFFFFFFFFtypedef struct bitvector { U32 capacity; unsigned char *bits;} BitVector;BitVector* Kino_BitVec_new(U32);BitVector* Kino_BitVec_clone(BitVector*);void Kino_BitVec_grow(BitVector*, U32);void Kino_BitVec_shrink(BitVector *, U32);void Kino_BitVec_set(BitVector*, U32);void Kino_BitVec_clear(BitVector*, U32);void Kino_BitVec_bulk_set(BitVector*, U32, U32);void Kino_BitVec_bulk_clear(BitVector*, U32, U32);bool Kino_BitVec_get(BitVector*, U32);U32 Kino_BitVec_next_set_bit(BitVector*, U32);U32 Kino_BitVec_next_clear_bit(BitVector*, U32);void Kino_BitVec_logical_and(BitVector*, BitVector*);U32 Kino_BitVec_count(BitVector*);AV* Kino_BitVec_to_array(BitVector*);void Kino_BitVec_destroy(BitVector*);#endif /* include guard */__C__#include "KinoSearchUtilBitVector.h"static unsigned char bitmasks[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,};BitVector*Kino_BitVec_new(U32 capacity) { BitVector *bit_vec; Kino_New(0, bit_vec, 1, BitVector); bit_vec->capacity = 0; bit_vec->bits = NULL; Kino_BitVec_grow(bit_vec, capacity); return bit_vec;}BitVector*Kino_BitVec_clone(BitVector *bit_vec) { BitVector *evil_twin; U32 byte_size; Kino_New(0, evil_twin, 1, BitVector); byte_size = ceil(bit_vec->capacity / 8.0); evil_twin->bits = (unsigned char*)Kino_savepvn((char*)bit_vec->bits, byte_size); evil_twin->capacity = bit_vec->capacity; return evil_twin;}voidKino_BitVec_grow(BitVector *bit_vec, U32 capacity) { U32 byte_size; U32 old_capacity; /* derive size in bytes from size in bits */ byte_size = ceil(capacity / 8.0); if (capacity > bit_vec->capacity && bit_vec->bits != NULL) { U32 old_byte_size; old_byte_size = ceil(bit_vec->capacity / 8.0); Kino_Renew(bit_vec->bits, byte_size, unsigned char); /* zero out all new bits, since Renew doesn't guarantee they're 0 */ old_capacity = bit_vec->capacity; bit_vec->capacity = capacity; Kino_BitVec_bulk_clear(bit_vec, old_capacity, capacity - 1); /* shouldn't be necessary, but Valgrind reports an error without it */ if (byte_size > old_byte_size) { memset( (bit_vec->bits + old_byte_size), 0x00, (byte_size - old_byte_size) ); } } else if (bit_vec->bits == NULL) { Kino_Newz(0, bit_vec->bits, byte_size, unsigned char); bit_vec->capacity = capacity; }}void Kino_BitVec_shrink(BitVector *bit_vec, U32 capacity) { U32 byte_size; if (capacity >= bit_vec->capacity) return; /* derive size in bytes from size in bits */ byte_size = ceil(capacity / 8.0); Kino_Renew(bit_vec->bits, byte_size, unsigned char); bit_vec->capacity = capacity;}void Kino_BitVec_set(BitVector *bit_vec, U32 num) { if (num >= bit_vec->capacity) Kino_BitVec_grow(bit_vec, num + 1); bit_vec->bits[ (num >> 3) ] |= bitmasks[num & 0x7];}void Kino_BitVec_clear(BitVector *bit_vec, U32 num) { if (num >= bit_vec->capacity) Kino_BitVec_grow(bit_vec, num + 1); bit_vec->bits[ (num >> 3) ] &= ~(bitmasks[num & 0x7]);}voidKino_BitVec_bulk_set(BitVector *bit_vec, U32 first, U32 last) { unsigned char *ptr; U32 num_bytes; /* detect range errors */ if (first > last) { Kino_confess("bitvec range error: %d %d %d", first, last, bit_vec->capacity); } /* grow the bits if necessary */ if (last >= bit_vec->capacity) { Kino_BitVec_grow(bit_vec, last); } /* set partial bytes */ while (first % 8 != 0 && first <= last) { Kino_BitVec_set(bit_vec, first++); } while (last % 8 != 0 && last >= first) { Kino_BitVec_set(bit_vec, last--); } Kino_BitVec_set(bit_vec, last); /* mass set whole bytes */ if (last > first) { ptr = bit_vec->bits + (first >> 3); num_bytes = (last - first) >> 3; memset(ptr, 0xff, num_bytes); }}voidKino_BitVec_bulk_clear(BitVector *bit_vec, U32 first, U32 last) { unsigned char *ptr; U32 num_bytes; /* detect range errors */ if (first > last) { Kino_confess("bitvec range error: %d %d %d", first, last, bit_vec->capacity); } /* grow the bits if necessary */ if (last >= bit_vec->capacity) { Kino_BitVec_grow(bit_vec, last); } /* clear partial bytes */ while (first % 8 != 0 && first <= last) { Kino_BitVec_clear(bit_vec, first++); } while (last % 8 != 0 && last >= first) { Kino_BitVec_clear(bit_vec, last--); } Kino_BitVec_clear(bit_vec, last); /* mass clear whole bytes */ if (last > first) { ptr = bit_vec->bits + (first >> 3); num_bytes = (last - first) >> 3; memset(ptr, 0, num_bytes); }}boolKino_BitVec_get(BitVector *bit_vec, U32 num) { if (num >= bit_vec->capacity) return 0; return (bit_vec->bits[ (num >> 3) ] & bitmasks[num & 0x7]) != 0;}U32Kino_BitVec_next_set_bit(BitVector *bit_vec, U32 num) { U32 outval; unsigned char *bits_ptr; unsigned char *end_ptr; int i; U32 byte_size; if (num >= bit_vec->capacity) { return KINO_BITVEC_SENTINEL; } outval = KINO_BITVEC_SENTINEL; bits_ptr = bit_vec->bits + (num >> 3) ; byte_size = ceil(bit_vec->capacity / 8.0); end_ptr = bit_vec->bits + byte_size; while (outval == KINO_BITVEC_SENTINEL) { if (*bits_ptr != 0) { /* check each num in represented in this byte */ outval = (bits_ptr - bit_vec->bits) * 8; for (i = 0; i < 8; i++) { if (Kino_BitVec_get(bit_vec, outval) == 1) { if (outval < bit_vec->capacity && outval >= num) { return outval; } } outval++; } /* nothing valid, so reset the sentinel */ outval = KINO_BITVEC_SENTINEL; } if (++bits_ptr >= end_ptr) break; } /* nothing valid, so return a sentinel */ return KINO_BITVEC_SENTINEL;}U32Kino_BitVec_next_clear_bit(BitVector *bit_vec, U32 num) { U32 outval; unsigned char *bits_ptr; unsigned char *end_ptr; int i; if (num >= bit_vec->capacity) { return num; } outval = KINO_BITVEC_SENTINEL; bits_ptr = bit_vec->bits + (num >> 3) ; end_ptr = bit_vec->bits + (bit_vec->capacity >> 3); while (outval == KINO_BITVEC_SENTINEL) { if (*bits_ptr != 0xFF) { /* check each num in represented in this byte */ outval = (bits_ptr - bit_vec->bits) * 8; for (i = 0; i < 8; i++) { if (Kino_BitVec_get(bit_vec, outval) == 0) { if (outval < bit_vec->capacity && outval >= num) { return outval; } } outval++; } /* nothing valid, so reset the sentinel */ outval = KINO_BITVEC_SENTINEL; } if (++bits_ptr >= end_ptr) break; } /* didn't find clear bits in the set, so return 1 larger than the max */ return bit_vec->capacity;}voidKino_BitVec_logical_and(BitVector *bit_vec, BitVector *other) { U32 num = 0; while (1) { num = Kino_BitVec_next_set_bit(bit_vec, num); if (num == KINO_BITVEC_SENTINEL) break; if ( !Kino_BitVec_get(other, num) ) Kino_BitVec_clear(bit_vec, num); num++; }}const U32 BYTE_COUNTS[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};U32 Kino_BitVec_count(BitVector *bit_vec) { U32 count = 0; U32 byte_size = ceil(bit_vec->capacity / 8.0); unsigned char *ptr = bit_vec->bits; unsigned char *limit = ptr + byte_size; for( ; ptr < limit; ptr++) { count += BYTE_COUNTS[*ptr]; } return count;}AV* Kino_BitVec_to_array(BitVector* bit_vec) { U32 num = 0; AV *out_av; out_av = newAV(); while (1) { num = Kino_BitVec_next_set_bit(bit_vec, num); if (num == KINO_BITVEC_SENTINEL) break; av_push( out_av, newSViv(num) ); num++; } return out_av;}voidKino_BitVec_destroy(BitVector* bit_vec) { Kino_Safefree(bit_vec->bits); Kino_Safefree(bit_vec);}__POD__=begin devdocs=head1 NAMEKinoSearch::Util::BitVector - a set of bits=head1 DESCRIPTIONA vector of bits, which grows as needed. The implementation is designed toresemble both org.apache.lucene.util.BitVector and java.util.BitSet. Accessible from both C and Perl.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=end devdocs=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -