📄 bitutil.java
字号:
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.lucene.util; // from org.apache.solr.util rev 555343/** A variety of high efficiencly bit twiddling routines. * * @version $Id$ */public class BitUtil { /** Returns the number of bits set in the long */ public static int pop(long x) { /* Hacker's Delight 32 bit pop function: * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc * int pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x >> 8); x = x + (x >> 16); return x & 0x0000003F; } ***/ // 64 bit java version of the C function from above x = x - ((x >>> 1) & 0x5555555555555555L); x = (x & 0x3333333333333333L) + ((x >>>2 ) & 0x3333333333333333L); x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL; x = x + (x >>> 8); x = x + (x >>> 16); x = x + (x >>> 32); return ((int)x) & 0x7F; } /*** Returns the number of set bits in an array of longs. */ public static long pop_array(long A[], int wordOffset, int numWords) { /* * Robert Harley and David Seal's bit counting algorithm, as documented * in the revisions of Hacker's Delight * http://www.hackersdelight.org/revisions.pdf * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc * * This function was adapted to Java, and extended to use 64 bit words. * if only we had access to wider registers like SSE from java... * * This function can be transformed to compute the popcount of other functions * on bitsets via something like this: * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' * */ int n = wordOffset+numWords; long tot=0, tot8=0; long ones=0, twos=0, fours=0; int i; for (i = wordOffset; i <= n - 8; i+=8) { /*** C macro from Hacker's Delight #define CSA(h,l, a,b,c) \ {unsigned u = a ^ b; unsigned v = c; \ h = (a & b) | (u & v); l = u ^ v;} ***/ long twosA,twosB,foursA,foursB,eights; // CSA(twosA, ones, ones, A[i], A[i+1]) { long b=A[i], c=A[i+1]; long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } // CSA(twosB, ones, ones, A[i+2], A[i+3]) { long b=A[i+2], c=A[i+3]; long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } //CSA(foursA, twos, twos, twosA, twosB) { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(twosA, ones, ones, A[i+4], A[i+5]) { long b=A[i+4], c=A[i+5]; long u=ones^b; twosA=(ones&b)|(u&c); ones=u^c; } // CSA(twosB, ones, ones, A[i+6], A[i+7]) { long b=A[i+6], c=A[i+7]; long u=ones^b; twosB=(ones&b)|(u&c); ones=u^c; } //CSA(foursB, twos, twos, twosA, twosB) { long u=twos^twosA; foursB=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(eights, fours, fours, foursA, foursB) { long u=fours^foursA; eights=(fours&foursA)|(u&foursB); fours=u^foursB; } tot8 += pop(eights); } // handle trailing words in a binary-search manner... // derived from the loop above by setting specific elements to 0. // the original method in Hackers Delight used a simple for loop: // for (i = i; i < n; i++) // Add in the last elements // tot = tot + pop(A[i]); if (i<=n-4) { long twosA, twosB, foursA, eights; { long b=A[i], c=A[i+1]; long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } { long b=A[i+2], c=A[i+3]; long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } eights=fours&foursA; fours=fours^foursA; tot8 += pop(eights); i+=4; } if (i<=n-2) { long b=A[i], c=A[i+1]; long u=ones ^ b; long twosA=(ones & b)|( u & c); ones=u^c; long foursA=twos&twosA; twos=twos^twosA; long eights=fours&foursA; fours=fours^foursA; tot8 += pop(eights); i+=2; } if (i<n) { tot += pop(A[i]); } tot += (pop(fours)<<2) + (pop(twos)<<1) + pop(ones) + (tot8<<3); return tot; } /** Returns the popcount or cardinality of the two sets after an intersection. * Neither array is modified. */ public static long pop_intersect(long A[], long B[], int wordOffset, int numWords) { // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' int n = wordOffset+numWords; long tot=0, tot8=0; long ones=0, twos=0, fours=0; int i; for (i = wordOffset; i <= n - 8; i+=8) { long twosA,twosB,foursA,foursB,eights; // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1])) { long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]); long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3])) { long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]); long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } //CSA(foursA, twos, twos, twosA, twosB) { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5])) { long b=(A[i+4] & B[i+4]), c=(A[i+5] & B[i+5]); long u=ones^b; twosA=(ones&b)|(u&c); ones=u^c; } // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7])) { long b=(A[i+6] & B[i+6]), c=(A[i+7] & B[i+7]); long u=ones^b; twosB=(ones&b)|(u&c); ones=u^c; } //CSA(foursB, twos, twos, twosA, twosB) { long u=twos^twosA; foursB=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(eights, fours, fours, foursA, foursB) { long u=fours^foursA; eights=(fours&foursA)|(u&foursB); fours=u^foursB; } tot8 += pop(eights); } if (i<=n-4) { long twosA, twosB, foursA, eights; { long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]); long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } { long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]); long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } eights=fours&foursA; fours=fours^foursA; tot8 += pop(eights); i+=4; } if (i<=n-2) { long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]); long u=ones ^ b; long twosA=(ones & b)|( u & c); ones=u^c; long foursA=twos&twosA; twos=twos^twosA; long eights=fours&foursA; fours=fours^foursA; tot8 += pop(eights); i+=2; } if (i<n) { tot += pop((A[i] & B[i])); } tot += (pop(fours)<<2) + (pop(twos)<<1) + pop(ones) + (tot8<<3); return tot; } /** Returns the popcount or cardinality of the union of two sets. * Neither array is modified. */ public static long pop_union(long A[], long B[], int wordOffset, int numWords) { // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g' int n = wordOffset+numWords; long tot=0, tot8=0; long ones=0, twos=0, fours=0; int i; for (i = wordOffset; i <= n - 8; i+=8) { /*** C macro from Hacker's Delight #define CSA(h,l, a,b,c) \ {unsigned u = a ^ b; unsigned v = c; \ h = (a & b) | (u & v); l = u ^ v;} ***/ long twosA,twosB,foursA,foursB,eights; // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1])) { long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]); long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3])) { long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]); long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } //CSA(foursA, twos, twos, twosA, twosB) { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5])) { long b=(A[i+4] | B[i+4]), c=(A[i+5] | B[i+5]); long u=ones^b; twosA=(ones&b)|(u&c); ones=u^c; } // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7])) { long b=(A[i+6] | B[i+6]), c=(A[i+7] | B[i+7]); long u=ones^b; twosB=(ones&b)|(u&c); ones=u^c; } //CSA(foursB, twos, twos, twosA, twosB) { long u=twos^twosA; foursB=(twos&twosA)|(u&twosB); twos=u^twosB; } //CSA(eights, fours, fours, foursA, foursB) { long u=fours^foursA; eights=(fours&foursA)|(u&foursB); fours=u^foursB; } tot8 += pop(eights); } if (i<=n-4) { long twosA, twosB, foursA, eights; { long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]); long u=ones ^ b; twosA=(ones & b)|( u & c); ones=u^c; } { long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]); long u=ones^b; twosB =(ones&b)|(u&c); ones=u^c; } { long u=twos^twosA; foursA=(twos&twosA)|(u&twosB); twos=u^twosB; } eights=fours&foursA; fours=fours^foursA;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -