From 84f04a56d18a9e5e4bfa58f5fe75c7c9567b9c4b Mon Sep 17 00:00:00 2001 From: laishenghao Date: Thu, 15 Aug 2024 15:26:05 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3samplescan=E8=A1=8C=E6=95=B0?= =?UTF-8?q?=E4=BC=B0=E7=AE=97=E5=B7=AE=E8=B7=9D=E5=A4=A7=EF=BC=8C=E6=89=A7?= =?UTF-8?q?=E8=A1=8C=E8=AE=A1=E5=88=92=E9=80=89=E6=8B=A9=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E7=9A=84=E6=80=A7=E8=83=BD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gausskernel/optimizer/path/costsize.cpp | 14 +-- src/test/regress/expected/tablesample_3.out | 103 +++++++++++++++----- src/test/regress/sql/tablesample_3.sql | 19 ++++ 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp index caa2c27c08..e1edaba99f 100755 --- a/src/gausskernel/optimizer/path/costsize.cpp +++ b/src/gausskernel/optimizer/path/costsize.cpp @@ -6230,14 +6230,14 @@ void finalize_dml_cost(ModifyTable* plan) * Parameters: * @in pctnode: node of percent args. * - * Return: float4 + * Return: float8 */ -static float4 get_samplefract(Node* pctnode) +static float8 get_samplefract(Node* pctnode) { - float4 samplefract; + float8 samplefract; if (IsA(pctnode, Const) && !((Const*)pctnode)->constisnull) { - samplefract = DatumGetFloat4(((Const*)pctnode)->constvalue); + samplefract = DatumGetFloat8(((Const*)pctnode)->constvalue); if (samplefract >= 0.0 && samplefract <= 100.0 && !isnan(samplefract)) { samplefract /= 100.0f; } else { @@ -6265,7 +6265,7 @@ static float4 get_samplefract(Node* pctnode) void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs) { Node* pctnode = NULL; - float4 samplefract; + float8 samplefract; /* Try to extract an estimate for the sample percentage */ pctnode = (Node*)linitial(paramexprs); @@ -6292,7 +6292,7 @@ void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List void bernoulli_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs) { Node* pctnode = NULL; - float4 samplefract; + float8 samplefract; /* Try to extract an estimate for the sample percentage */ pctnode = (Node*)linitial(paramexprs); @@ -6325,7 +6325,7 @@ void hybrid_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List foreach (lc, paramexprs) { Node* paramnode = (Node*)lfirst(lc); Node* pctnode = estimate_expression_value(root, paramnode); - float4 samplefract = 0.0; + float8 samplefract = 0.0; if (likely(pctnode)) { samplefract = get_samplefract(pctnode); } else { diff --git a/src/test/regress/expected/tablesample_3.out b/src/test/regress/expected/tablesample_3.out index a592ef2b04..9adf72f124 100755 --- a/src/test/regress/expected/tablesample_3.out +++ b/src/test/regress/expected/tablesample_3.out @@ -20,15 +20,16 @@ select count(*) from ((select * from test_tablesample tablesample SYSTEM(20) REP (1 row) explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; - QUERY PLAN ---------------------------------------------------------------------------------------- - Nested Loop Anti Join - Join Filter: (test_tablesample.id = test_tablesample2.id) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Hash Anti Join + Hash Cond: (test_tablesample.id = test_tablesample2.id) -> Sample Scan on test_tablesample Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) - -> Sample Scan on test_tablesample2 - Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) -(6 rows) + -> Hash + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) +(7 rows) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; id | name | salary | id | name | salary @@ -37,15 +38,19 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j set enable_hashjoin to off; explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; - QUERY PLAN ---------------------------------------------------------------------------------------- - Nested Loop Anti Join - Join Filter: (test_tablesample.id = test_tablesample2.id) - -> Sample Scan on test_tablesample - Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) - -> Sample Scan on test_tablesample2 - Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) -(6 rows) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Merge Anti Join + Merge Cond: (test_tablesample.id = test_tablesample2.id) + -> Sort + Sort Key: test_tablesample.id + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) + -> Sort + Sort Key: test_tablesample2.id + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) +(10 rows) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; id | name | salary | id | name | salary @@ -54,15 +59,16 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j set enable_mergejoin to off; explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; - QUERY PLAN ---------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------- Nested Loop Anti Join Join Filter: (test_tablesample.id = test_tablesample2.id) -> Sample Scan on test_tablesample Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) - -> Sample Scan on test_tablesample2 - Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) -(6 rows) + -> Materialize + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision) +(7 rows) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; id | name | salary | id | name | salary @@ -315,8 +321,61 @@ select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join ----+------+--------+----+------+-------- (0 rows) +-- test rows estimation of samplescan +set enable_hashjoin to on; +create table ss_rows_t1 (a int); +create table ss_rows_t2 (b int); +create table ss_rows_t3 (c int); +insert into ss_rows_t1 values (generate_series(1, 20000)); +insert into ss_rows_t2 values (generate_series(1, 20000)); +insert into ss_rows_t3 values (generate_series(1, 20000)); +explain select a from ss_rows_t1 tablesample system (99.999999); +--?.* +--?.* +--? Sample Scan on ss_rows_t1 (cost=.* rows=\d{5} width=4) + Sampling: system (99.999999::double precision) +(2 rows) + +explain select b from ss_rows_t2 tablesample bernoulli (99.999999); +--?.* +--?.* +--? Sample Scan on ss_rows_t2 (cost=.* rows=\d{5} width=4) + Sampling: bernoulli (99.999999::double precision) +(2 rows) + +explain select c from ss_rows_t3 tablesample hybrid (99.999999,99.999999); +--?.* +--?.* +--? Sample Scan on ss_rows_t3 (cost=.* rows=\d{5} width=4) + Sampling: hybrid (99.999999::double precision, 99.999999::double precision) +(2 rows) + +explain select a, b, c from + ss_rows_t1 tablesample system (99.999999) repeatable (325), + ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0), + ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510) + where a = b and b = c; +--?.* +--?.* +--? Hash Join (.* width=12) + Hash Cond: (ss_rows_t1.a = ss_rows_t3.c) +--? -> Hash Join (.* width=8) + Hash Cond: (ss_rows_t1.a = ss_rows_t2.b) +--? -> Sample Scan on ss_rows_t1 (cost=.* rows=\d{5} width=4) + Sampling: system (99.999999::double precision) REPEATABLE (325::double precision) +--? -> Hash (cost=.* rows=\d{5} width=4) +--? -> Sample Scan on ss_rows_t2 (cost=.* rows=\d{5} width=4) + Sampling: bernoulli (99.999999::double precision) REPEATABLE (0::double precision) +--? -> Hash (cost=.* rows=\d{5} width=4) +--? -> Sample Scan on ss_rows_t3 (cost=.* rows=\d{5} width=4) + Sampling: hybrid (99.999999::double precision, 99.999999::double precision) REPEATABLE (510::double precision) +(12 rows) + reset search_path; drop schema tablesample_schema4 cascade; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table tablesample_schema4.test_tablesample drop cascades to table tablesample_schema4.test_tablesample2 +drop cascades to table tablesample_schema4.ss_rows_t1 +drop cascades to table tablesample_schema4.ss_rows_t2 +drop cascades to table tablesample_schema4.ss_rows_t3 diff --git a/src/test/regress/sql/tablesample_3.sql b/src/test/regress/sql/tablesample_3.sql index 696fef561e..f2794ec18e 100755 --- a/src/test/regress/sql/tablesample_3.sql +++ b/src/test/regress/sql/tablesample_3.sql @@ -95,5 +95,24 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +-- test rows estimation of samplescan +set enable_hashjoin to on; +create table ss_rows_t1 (a int); +create table ss_rows_t2 (b int); +create table ss_rows_t3 (c int); +insert into ss_rows_t1 values (generate_series(1, 20000)); +insert into ss_rows_t2 values (generate_series(1, 20000)); +insert into ss_rows_t3 values (generate_series(1, 20000)); + +explain select a from ss_rows_t1 tablesample system (99.999999); +explain select b from ss_rows_t2 tablesample bernoulli (99.999999); +explain select c from ss_rows_t3 tablesample hybrid (99.999999,99.999999); + +explain select a, b, c from + ss_rows_t1 tablesample system (99.999999) repeatable (325), + ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0), + ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510) + where a = b and b = c; + reset search_path; drop schema tablesample_schema4 cascade; -- Gitee