⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 selectivity.sql

📁 derby database source code.good for you.
💻 SQL
📖 第 1 页 / 共 3 页
字号:
-- statistics for hundred_two to come up with row estimate.-- selectivity should be 0.01 * 0.5 == 0.005.-- row count is 16*10^6 * 0.005 = 8*10^4.get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.hundred = template.hundred and 1=template.two';close c;insert into rts_table 	   values (default, 'join on hundred. constant pred on two. NL, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount());  -- JUST retry above query with different access paths-- -- row count shouldn't change! get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_102where test.hundred = template.hundred and 1=template.two';close c;insert into rts_table 	   values (default, 'join on hundred. constant pred on two. NL, index=template_102, all rows.', true, runtimestatistics()->getEstimatedRowCount());  select id, comments, rtsfrom rts_table where comments like 'join on hundred. constant pred on two%';-- hundred and twenty-- we can use statistics for 100,2 to get-- selectivity for 100 and twenty to get selectivity for 20.-- selectivity should 0.01 * 0.05 = 0.0005 -> 80,000get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.hundred = template.hundred and 1=template.twenty';close c;insert into rts_table 	   values (default, 'join on hundred. constant pred on twenty. NL, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount());  get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_102where test.hundred = template.hundred and 1=template.twenty';close c;insert into rts_table 	   values (default, 'join on hundred. constant pred on twenty. NL, index=template_102 all rows.', true, runtimestatistics()->getEstimatedRowCount());  select id, comments, rtsfrom rts_table where comments like 'join on hundred. constant pred on twenty%';-- some 3 way joins.--drop table template;--drop table test;-- 4000create table t1 (id int generated always as identity, two int, twenty int, hundred varchar(3));insert into t1 (hundred, twenty, two) select CAST(CHAR(hundred.x) AS VARCHAR(3)), twenty.x, two.x from hundred, twenty, two;create table t2 (id int generated always as identity, two int, twenty int, hundred varchar(3));insert into t2 (hundred, twenty, two) select CAST(CHAR(hundred.x) AS VARCHAR(3)) , twenty.x, two.x from hundred, twenty, two;create table t3 (id int generated always as identity, two int, twenty int, hundred varchar(3));insert into t3 (hundred, twenty, two) select CAST(CHAR(hundred.x) AS VARCHAR(3)), twenty.x, two.x from hundred, twenty, two;create index t1_hundred on t1(hundred);create index t1_two_twenty on t1(two,twenty);create index t1_twenty_hundred on t1(twenty, hundred);create index t2_hundred on t2(hundred);create index t2_two_twenty on t2(two,twenty);create index t2_twenty_hundred on t2(twenty, hundred);create index t3_hundred on t3(hundred);create index t3_two_twenty on t3(two,twenty);create index t3_twenty_hundred on t3(twenty, hundred);update statistics for table t1;update statistics for table t2;update statistics for table t3;select * from showstats where indexname like 'T1%' order by indexname;select * from showstats where indexname like 'T2%' order by indexname;select * from showstats where indexname like 'T3%' order by indexname;-- t1 x t2 yields 8000 rows.-- x t3 yield 8*4 * 10^6 / 2= 16*10^6.get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2, t3 where t1.hundred=t2.hundred and t1.twenty = t2.twenty and  t2.two = t3.two';close c;values runtimestatistics()->getEstimatedRowCount();-- t1 x t2 --> 16 * 10^4.-- x t3    --> 32 * 10^7.-- additional pred --> 32 * 10^5.get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2, t3 where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();-- variations on above query: try different join strategies.get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2, t3 properties joinStrategy=hashwhere t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2, t3 properties joinStrategy=nestedLoopwhere t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2 properties joinStrategy=nestedLoop, t3 where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2 properties joinStrategy=hash, t3 where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();-- duplicate predicates; this time t1.hundred=?-- will show up twice when t1 is optimized at the end.-- selectivity should be same as above!get cursor c as 'select t1.id from properties joinOrder=fixed t2, t3, t1where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();-- variations on above query: try different join strategies.get cursor c as 'select t1.id from properties joinOrder=fixed t3, t2, t1 properties joinStrategy=hashwhere t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t3, t2, t1 properties joinStrategy=nestedLoopwhere t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t2, t3 properties joinStrategy=nestedLoop, t1where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t3, t2 properties joinStrategy=hash, t1where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();-- some more variations on the above theme-- some constant predicates thrown in.-- remember hundred is a char column-- for some reason if you give the constant -- as a numeric argument it doesn't recognize that as a constant start/stop -- value for the index (is this a bug?)get cursor c as 'select t1.id from properties joinOrder=fixed t2, t3, t1where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred and t1.hundred = ''1''';close c;values runtimestatistics()->getEstimatedRowCount();-- we have t1.100=t2.100 and t1.100=t3.100, so t2.100=t3.100 is redundant-- row count shouldn't factor in the redundant predicate.-- row count should be 3200000.0get cursor c as 'select t1.id from properties joinOrder=fixed t2, t3, t1where t1.hundred=t2.hundred and t2.two=t3.two and t1.hundred = t3.hundred and t2.hundred = t3.hundred';close c;values runtimestatistics()->getEstimatedRowCount();-- slightly different join predicates-- use composite stats.-- t1 x t2			  --> 16 * 10.4.--         x t3		  --> 16 * 10.4 * 4000 * 1/40 = 16*10.6get cursor c as 'select t1.id from properties joinOrder=fixed t2, t3, t1where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();-- same as above but muck around with join order.get cursor c as 'select t1.id from properties joinOrder=fixed t1, t2, t3where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t2, t1, t3where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t1, t3, t2where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t3, t2, t1where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();get cursor c as 'select t1.id from properties joinOrder=fixed t3, t1, t2where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount();-- and just for fun, what would we have gotton without statistics.get cursor c as 'select t1.id from properties useStatistics=false, joinOrder=fixed t3, t1, t2where t1.hundred=t2.hundred and t2.two=t3.two and t2.twenty=t3.twenty';close c;values runtimestatistics()->getEstimatedRowCount() / 1000;-- gosh what now? I'm tired of writing queries....-- make sure we do a good job of stats on 1/3.create table scratch_table (id int, two int, twenty int, hundred int);insert into scratch_table select id, two, twenty, CAST(CHAR(hundred) AS INTEGER) from t1;create index st_all on scratch_table (two, twenty, hundred);update statistics for table scratch_table;-- since the statistics (rowEstimates) are not precise, force a checkpoint-- to force out all the row counts to the container header, and for good-- measure do a count which will update the row counts exactly.CALL SYSCS_UTIL.SYSCS_CHECKPOINT_DATABASE();select count(*) from t1;select count(*) from scratch_table;-- preds are on columns 1 and 3.-- should use default stats for 100 (0.1) and 0.5 for two.-- 16*10.6 * 5*10.-2 = 80*10.4get cursor c as'select s.idfrom properties joinOrder=fixed t1, scratch_table swhere t1.two=s.two  and s.hundred= CAST(CHAR(t1.hundred) AS INTEGER)';close c;values runtimestatistics()->getEstimatedRowCount();-- preds are on column 2.-- 0.1 --> 16*10.5get cursor c as'select s.idfrom properties joinOrder=fixed t1, scratch_table swhere t1.twenty=s.twenty';close c;values runtimestatistics()->getEstimatedRowCount();-- pred are on column 2,3-- 0.01 --> 16*10.4get cursor c as'select s.idfrom properties joinOrder=fixed t1, scratch_table swhere t1.twenty=s.twenty  and s.hundred = CAST(CHAR(t1.hundred) AS INTEGER)';close c;values runtimestatistics()->getEstimatedRowCount();-- test of statistics matcher algorithm; make sure that -- we choose the best statistics (the weight stuff in-- predicatelist).-- 2,20,100

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -