📄 radix.mx
字号:
command jivejoin2( proj:bat[:oid,:oid], attr:bat[:void,:pax@1] ):bat[:void,:Int@1]address M5_RDX_jivejoin2comment"positional join that creates a void head by inserting the join result positionally";command posjoin( proj:bat[:void,:oid], attr:bat[:void,:integer@1] ):bat[:void,:Int@1]address M5_RDX_posjoin_tuplecomment"positional join with built-in NSM projection";command posjoin( proj:bat[:void,:oid], attr:bat[:void,:pax@1] ):bat[:void,:Int@1]address M5_RDX_posjoin_tuplecomment"positional join with built-in PAX projection";pattern radix_cluster( b:bat[:integer@1,:any_2], limits:str, perc:flt, radix1:int ... ):bat[:Int@1,:any_2]address M5_RDX_radix_clustercomment"";pattern radix_cluster( b:bat[:pax@1,:any_2], limits:str, perc:flt, radix1:int ... ):bat[:Int@1,:any_2]address M5_RDX_radix_clustercomment""; @= tpe @:@1(1,4)@ @:@1(2,8)@ @:@1(4,16)@ @:@1(8,32)@ @:@1(16,64)@ @:@1(32,128)@ @:@1(64,256)@ @:@1(128,512)@ @:@1(256,1024)@ @mal @:tpe(integer)@ @:tpe(int)@command [integer]( b:bat[:any_1,:int], width:int ):bat[:any_1,:any]address M5_RDX_BATintegercomment"create a view that makes tail column appear as an integerX column for some width=X";command [integer]( b:bat[:any_1,:int] ):bat[:any_1,:integer1]address M5_RDX_BATinteger1comment"shortcut for: [integer]( b, width=1 );";command pax_blocksize( int ):intaddress M5_RDX_pax_blocksizecomment"set the pax blocksize";command pax_blocksize( ):intaddress M5_RDX_pax_blocksize_getcomment"get the pax blocksize";@)@( @f radix_examples @milmodule("alarm");var b := view_bbp_name.reverse();# create data tablesif (not(b.exist("smaller_key"))) uniform(1000000,1000000).reverse().mark(0@0).reverse().rename("smaller_key").persists(true).mmap(1);if (not(b.exist("smaller_a1"))) smaller_key.mirror().copy().rename("smaller_a1").persists(true).mmap(1);if (not(b.exist("smaller_aY"))) smaller_key.copy().rename("smaller_aY").persists(true).mmap(1);if (not(b.exist("larger_key"))) bat(void,int,2000000).insert(smaller_key).insert(smaller_key.copy().seqbase(oid(int(smaller_key.seqbase()) + smaller_key.count()))).rename("larger_key").persists(true).mmap(1);if (not(b.exist("larger_b1"))) larger_key.mirror().copy().rename("larger_b1").persists(true).mmap(1);if (not(b.exist("larger_bZ"))) larger_key.copy().rename("larger_bZ").persists(true).mmap(1);if (not(b.exist("smaller_all"))) [integer]([int]([integer](smaller_key,1).reverse()),16).reverse().rename("smaller_all").persists(true).mmap(1);if (not(b.exist("larger_all"))) [integer]([int]([integer](larger_key,1).reverse()),16).reverse().rename("larger_all").persists(true).mmap(1);# 0) SQL benchmark query# ======================## SELECT smaller.a1, smaller.aY, larger.b1, larger.bZ# FROM smaller, larger# where smaller.key = larger.key# 1) cache-conscious-monet-join-strategy, optimized for a 256KB L2 cache of 32 bytes line width# =============================================================================================## first radix cluster both key columns on H bits (maybe different number of passes and bit settings)## We have a 256KB cache, but want to fit comfortable in 128KB. Given a 8-byte relation width an 8-byte hash table, the# chunk size is 128KB/16 = 8192, and since we have a 1M inner relation this leads to 7 bits clustering, wich we do in 2# passes (4+3) to fit the 64-entry TLB#cluster_larger := radix_cluster(larger_key, 4, 3);#250cluster_smaller := radix_cluster(smaller_key, 4, 3);#626# phash, followed by radix-sort## As we have 2M tuples with max value 2000000, we need to cluster on 21 bits (7+7+7) in order to achieve radix-sort.#res_join := phash_join(cluster_larger, cluster_smaller.reverse(), 7, 2, false).reverse().radix_cluster(7,7,7).reverse();#2.7res_larger_sorted := res_join.mark(0@0).reverse();# no longer neededcluster_larger := 0;cluster_smaller := 0;# positional-join projected columns from larger table into resultres_b1 := join(res_larger_sorted, larger_b1);#570res_bZ := join(res_larger_sorted, larger_bZ);#570# no longer neededres_larger_sorted := 0;# Given a 128KB of 'comfortable' L2, and 4-byte tuples in smaller_aX, we want chunk sizes of 32768. As we have a# cardinality of 2M, we create 64 chunks by partial radix-cluster on 6 bits. The maximum value is 1M, hence there# are 20 significant bits, so we ignore the lower 20-6=14 bits.#res_smaller_clustered := res_join.reverse().mark(0@0).reverse().radix_cluster(-14,6);#589#344# no longer neededres_join := 0;# positional-join and decluster projected columns from smaller table into result## The window size of the matching window is again the comfortable 128KB, with 4 byte wide tuples its tuple size is# 32768. As we have 64 clusters, we can use a multiplier of 512. With 64 cluster, TLB trouble is avoided as well.#borders_smaller := res_smaller_clustered.radix_count(14, 6);res_a1 := join(res_smaller_clustered, smaller_a1).radix_decluster(borders_smaller, 512);res_aY := join(res_smaller_clustered, smaller_aY).radix_decluster(borders_smaller, 512);# no longer neededres_smaller_clustered := 0;print(res_b1.slice(1000000,1000100).col_name("b1"), res_bZ.col_name("bZ"), res_a1.col_name("a1"), res_aY.col_name("aY"));print(res_b1.count());# no longer neededres_a1 := 0;res_aY := 0;res_b1 := 0;res_bZ := 0;# 2) cache-conscious-relational-join-strategy# ===========================================smaller_view := [integer](smaller_all.reverse(), 2).reverse();larger_view := [integer](larger_all.reverse(), 2).reverse();# the inner relation will be 12+8 = 24 bytes wide, we have 128KB of cache hence can have chunk sizes of 5000.# given an inner relation size of 1M tuples, this leads to 256 clusters of 4096, hence 8 bytes.cluster_smaller := radix_cluster(smaller_view, 4, 4);cluster_larger := radix_cluster(larger_view, 4, 4);res_join := phash_join(cluster_larger, cluster_smaller.reverse(), 8, 2, false);# no longer neededcluster_smaller := 0;cluster_larger := 0;print(res_join.slice(1000000,1000100));res_join.count().print();# alternatively, we try without clusteringres_join := phash_join(larger_view, smaller_view.reverse(), 0, 2, false);print(res_join.slice(1000000,1000100));res_join.count().print();larger_view := [integer](larger_all.reverse(), 2).reverse().copy();smaller_view := [integer](smaller_all.reverse(), 2).reverse();res_join := phash_join(larger_view, smaller_view.reverse(), 0, 2, false);print(res_join.slice(1000000,1000100));res_join.count().print();quick test==========module(radix);module(alarm);module(pcl);var b := view_bbp_name.reverse();if (not(b.exist("res_join"))) uniform(30000000,10000000).[oid].rename("res_join").persists(true).mmap(1);if (not(b.exist("tab_attr"))) uniform(10000000).[oid].reverse().mark(0@0).reverse().rename("tab_attr").persists(true).mmap(1);var available := pcl_query(pcl_events.reverse().project(nil).reverse(),1).select(0).project(nil).reverse().select(10,55).reverse();proc tst(bat[oid,oid] res_join, int nbits) { var res_smaller_clustered := res_join.reverse().mark(0@0).reverse(); var skip := 24 - nbits; if (nbits > 7) { res_smaller_clustered := res_smaller_clustered.radix_cluster(-1 * skip, nbits - nbits/2, nbits/2); } else { res_smaller_clustered := res_smaller_clustered.radix_cluster(-1 * skip, nbits); } borders_smaller := res_smaller_clustered.radix_count(skip, nbits); cl_new := res_smaller_clustered.mark(0@0).reverse(); cl_old := res_smaller_clustered.reverse().mark(0@0).reverse(); t := time(); cl_tmp := join(cl_old, tab_attr); print(time() - t); var res_a1; available@batloop() { var e := bat(void,int).insert(nil,$h); CATCH(pcl_start(e,1)); t := time(); res_a1 := radix_decluster2(cl_new, cl_tmp, borders_smaller, 32); printf("% 6dms === % 15lld %s\n", time() - t, pcl_stop(e).mark(0@0).reverse().find(0@0), pcl_events.reverse().find($h)); } res_a1.slice(3343424, 3343444).print();}res_join.reverse().mark(0@0).reverse().join(tab_attr).slice(3343424, 3343444).print();tst(res_join,1);tst(res_join,13);tst(res_join,19);jivejoin========module(alarm);module(radix);proc log2(int n) : int { var ret := 0; while((n :/= 2) > 0) ret :+= 1; return ret; }proc jivetest(BAT[oid,oid] proj, BAT[void,any] attr, int nbits) { var shift := max(0, 1 + log2(attr.count()) - nbits); var mask := ((1 << nbits) - 1) << shift; var cnt := [>>]([and]([int](proj), mask), shift).histogram().sort(); cnt.sum().print(); var b1, b3, b2 := bat(void,oid,1+proj.count()); { var t := time(); b1 := jivejoin1(b2, proj, attr, cnt, shift, nbits); print(time() - t); } var c := b2.mirror().[int].[and](mask).[>>](shift); c.slice(0,7).print(); c.histogram().max().print(); { var t := time(); b3 := b2.jivejoin2(attr); print(time() - t); } b1.count().print(); b1.slice(0,7).print(); b3.count().print(); b3.slice(0,7).print(); b1.slice(200,220).reverse().join(b3).print();}var proj := [oid](smaller_aY).reverse().mirror().copy();var attr_integer16 := [integer](smaller_aY,16);var attr_int16 := [IntX](attr_integer16);var attr_int := smaller_aY;jivetest(proj, attr_integer16, 12);jivetest(proj, attr_int16, 12);jivetest(proj, attr_int, 12);var cnt := radix_count2(proj, 8, 12);var b2 := bat(void,oid,proj.count());var b1 := jivejoin1(b2, proj, attr_int, cnt, 8, 12);b2.slice(0,30).print();var b3 := radix_cluster(b2, cnt.bbpname(), 1.0, -1, 7);b3.info().find("tsorted").print();var b4 := radix_cluster(b2, cnt.bbpname(), 1.0, 1, 7);b4.info().find("tsorted").print();radix_cluster(attr_int,@)@-@{@f radix@* Implementation@c#include "mal_config.h"#include "mal.h"#include "mal_exception.h"#include "mal_interpreter.h" /* for getArgReference() */#include <math.h>#include "radix.h"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -