📄 selectivity.sql
字号:
-- this test relies heavily on old statistics functionality that is no longer supported and hence taking this test out of nightly suite.maximumdisplaywidth 5000;create table two (x int);insert into two values (1),(2);create table ten (x int);insert into ten values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);create table twenty (x int);insert into twenty values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15),(16),(17),(18),(19),(20);create table hundred (x int generated always as identity, dc int);insert into hundred (dc) select t1.x from ten t1, ten t2;create table template (id int not null generated always as identity, two int, twenty int, hundred int);-- 4000 rows.insert into template (two, twenty, hundred) select two.x, twenty.x, hundred.x from two, twenty, hundred;create index template_two on template(two);create index template_twenty on template(twenty);-- 20 distinct values.create index template_22 on template(twenty,two);create unique index template_id on template(id);create index template_102 on template(hundred,two);create table test (id int, two int, twenty int, hundred int);create index test_id on test(id);insert into test select * from template;create view showstats asselect cast (conglomeratename as varchar(20)) indexname, cast (statistics->toString() as varchar(40)) stats, creationtimestamp createtime, colcount ncolsfrom sys.sysstatistics, sys.sysconglomerates where conglomerateid = referenceid;select * from showstats order by indexname, stats, createtime, ncols;update statistics for table template;update statistics for table test;create table rts_table ( id int generated always as identity, comments varchar(128), stats boolean, rts double);--- SINGLE COLUMN TESTS------call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(1);-- choose whatever plan you want but the row estimate should be.-- (n * n) * 0.5get cursor c as 'select template.idfrom properties joinOrder=fixed test, templatewhere test.two = template.two';close c;insert into rts_table values (default, 'join on two, template inner, all rows.', true, runtimestatistics()->getEstimatedRowCount()); -- choose hash join. selectivity should be the same.get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=hash where test.two = template.two';close c;insert into rts_table values (default, 'join on two. template inner, hash join, all rows.', true, runtimestatistics()->getEstimatedRowCount()); -- choose NL join. selectivity should be the same.get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.two = template.two';close c;insert into rts_table values (default, 'join on two. template inner, NL, no index, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twowhere test.two = template.two';close c;insert into rts_table values (default, 'join on two. template inner, NL, index=two, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on two%';-- do joins on 20-- first do NLget cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twentywhere test.twenty = template.twenty';close c;insert into rts_table values (default, 'join on twenty. template inner, NL, index=template_twenty, all rows.', true, runtimestatistics()->getEstimatedRowCount()); -- join on 20 but using index 20_2. cost as well as selectivity-- should be divided using selectivity. cost should almost be the-- same as using template_twenty but a shade more...get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_22where test.twenty = template.twenty';close c;insert into rts_table values (default, 'join on twenty. template inner, NL, index=template_22, all rows.', true, runtimestatistics()->getEstimatedRowCount()); -- join on twenty but no index.-- rc should be divided using selectivity. cost should be way different!get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.twenty = template.twenty';close c;insert into rts_table values (default, 'join on twenty. template inner, NL, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on twenty%';-- still single column, try stuff on 100 but with extra qualification.-- on outer table.-- row count is 100 * 4000 * 0.01 = 4000get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_102where test.hundred = template.hundred and test.id <= 100';close c;insert into rts_table values (default, 'join on hundred. template inner, NL, index=template_102, 100 rows from outer.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.hundred = template.hundred and test.id <= 100';close c;insert into rts_table values (default, 'join on hundred. template inner, NL, index=null, 100 rows from outer .', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=hash, index=nullwhere test.hundred = template.hundred and test.id <= 100';close c;insert into rts_table values (default, 'join on hundred. template inner, hash, index=null, 100 rows from outer.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on hundred%';-- multi predicate tests.-- first do a join involving twenty and two.-- force use of a single column index to do the join.-- the row count should involve statistics from both-- 10 and 2 though....-- row count should 4K * 4K * 1/40= 400,000-- cost doesn't show up in runtest output but should depend on the-- index being used. verify by hand before checking in.get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=hash, index=nullwhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. template inner, hash, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twowhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. template inner, NL, index=template_two, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twentywhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. template inner, NL, index=template_twenty, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_22where test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. template inner, NL, index=template_22, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on twenty/two%';-- multi predicate tests continued.-- drop index twenty,two -- use above predicates. should-- be smart enough to figure out the selectivity-- by combining twenty and two.drop index template_22;get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=hash, index=nullwhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. index twenty_two dropped. template inner, hash, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twowhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. index twenty_two dropped. template inner, NL, index=template_two, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twentywhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. index twenty_two dropped. template inner, NL, index=template_twenty, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on twenty/two. index twenty_two dropped%';drop index template_two;-- we only have index template_twenty-- for the -- second predicate we should use 0.1 instead of 0.5-- thus reducing earlier row count by a factor of 5.-- 80,000 instead of 400000get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. index twenty_two and two dropped. template inner, NL, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount()); get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=template_twentywhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. index twenty_two and two dropped. template inner, NL, index=template_twenty, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on twenty/two. index twenty_two and two dropped%';-- now drop index template_twenty.-- selectivity should become 0.1 * 0.1 = 0.01.-- 16 * 10^6 * .01 = 160,000drop index template_twenty;get cursor c as 'select template.idfrom properties joinOrder=fixed test, template properties joinStrategy=nestedLoop, index=nullwhere test.twenty = template.twenty and test.two=template.two';close c;insert into rts_table values (default, 'join on twenty/two. all indexes dropped. template inner, NL, index=null, all rows.', true, runtimestatistics()->getEstimatedRowCount()); select id, comments, rtsfrom rts_table where comments like 'join on twenty/two. all indexes dropped%';create index template_two on template(two);create index template_twenty on template(twenty);create index template_22 on template(twenty,two);update statistics for table template;-- throw in additional predicates-- see that the optimizer does the right thing. -- index on template_102. join on hundred, -- constant predicate on two. should be able to use
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -