From 84f04a56d18a9e5e4bfa58f5fe75c7c9567b9c4b Mon Sep 17 00:00:00 2001
From: laishenghao <laishenghao1@huawei.com>
Date: Thu, 15 Aug 2024 15:26:05 +0800
Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3samplescan=E8=A1=8C=E6=95=B0?=
 =?UTF-8?q?=E4=BC=B0=E7=AE=97=E5=B7=AE=E8=B7=9D=E5=A4=A7=EF=BC=8C=E6=89=A7?=
 =?UTF-8?q?=E8=A1=8C=E8=AE=A1=E5=88=92=E9=80=89=E6=8B=A9=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E5=AF=BC=E8=87=B4=E7=9A=84=E6=80=A7=E8=83=BD=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/gausskernel/optimizer/path/costsize.cpp |  14 +--
 src/test/regress/expected/tablesample_3.out | 103 +++++++++++++++-----
 src/test/regress/sql/tablesample_3.sql      |  19 ++++
 3 files changed, 107 insertions(+), 29 deletions(-)

diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp
index caa2c27c08..e1edaba99f 100755
--- a/src/gausskernel/optimizer/path/costsize.cpp
+++ b/src/gausskernel/optimizer/path/costsize.cpp
@@ -6230,14 +6230,14 @@ void finalize_dml_cost(ModifyTable* plan)
  * Parameters:
  *	@in pctnode: node of percent args.
  *
- * Return: float4
+ * Return: float8
  */
-static float4 get_samplefract(Node* pctnode)
+static float8 get_samplefract(Node* pctnode)
 {
-    float4 samplefract;
+    float8 samplefract;
 
     if (IsA(pctnode, Const) && !((Const*)pctnode)->constisnull) {
-        samplefract = DatumGetFloat4(((Const*)pctnode)->constvalue);
+        samplefract = DatumGetFloat8(((Const*)pctnode)->constvalue);
         if (samplefract >= 0.0 && samplefract <= 100.0 && !isnan(samplefract)) {
             samplefract /= 100.0f;
         } else {
@@ -6265,7 +6265,7 @@ static float4 get_samplefract(Node* pctnode)
 void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs)
 {
     Node* pctnode = NULL;
-    float4 samplefract;
+    float8 samplefract;
 
     /* Try to extract an estimate for the sample percentage */
     pctnode = (Node*)linitial(paramexprs);
@@ -6292,7 +6292,7 @@ void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List
 void bernoulli_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs)
 {
     Node* pctnode = NULL;
-    float4 samplefract;
+    float8 samplefract;
 
     /* Try to extract an estimate for the sample percentage */
     pctnode = (Node*)linitial(paramexprs);
@@ -6325,7 +6325,7 @@ void hybrid_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List
     foreach (lc, paramexprs) {
         Node* paramnode = (Node*)lfirst(lc);
         Node* pctnode = estimate_expression_value(root, paramnode);
-        float4 samplefract = 0.0;
+        float8 samplefract = 0.0;
         if (likely(pctnode)) {
             samplefract = get_samplefract(pctnode);
         } else {
diff --git a/src/test/regress/expected/tablesample_3.out b/src/test/regress/expected/tablesample_3.out
index a592ef2b04..9adf72f124 100755
--- a/src/test/regress/expected/tablesample_3.out
+++ b/src/test/regress/expected/tablesample_3.out
@@ -20,15 +20,16 @@ select count(*) from ((select * from test_tablesample tablesample SYSTEM(20) REP
 (1 row)
 
 explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Nested Loop Anti Join
-   Join Filter: (test_tablesample.id = test_tablesample2.id)
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Hash Anti Join
+   Hash Cond: (test_tablesample.id = test_tablesample2.id)
    ->  Sample Scan on test_tablesample
          Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-   ->  Sample Scan on test_tablesample2
-         Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-(6 rows)
+   ->  Hash
+         ->  Sample Scan on test_tablesample2
+               Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
+(7 rows)
 
 select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
  id | name | salary | id | name | salary 
@@ -37,15 +38,19 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
 
 set enable_hashjoin to off;
 explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Nested Loop Anti Join
-   Join Filter: (test_tablesample.id = test_tablesample2.id)
-   ->  Sample Scan on test_tablesample
-         Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-   ->  Sample Scan on test_tablesample2
-         Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-(6 rows)
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Merge Anti Join
+   Merge Cond: (test_tablesample.id = test_tablesample2.id)
+   ->  Sort
+         Sort Key: test_tablesample.id
+         ->  Sample Scan on test_tablesample
+               Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
+   ->  Sort
+         Sort Key: test_tablesample2.id
+         ->  Sample Scan on test_tablesample2
+               Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
+(10 rows)
 
 select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
  id | name | salary | id | name | salary 
@@ -54,15 +59,16 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
 
 set enable_mergejoin to off;
 explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
  Nested Loop Anti Join
    Join Filter: (test_tablesample.id = test_tablesample2.id)
    ->  Sample Scan on test_tablesample
          Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-   ->  Sample Scan on test_tablesample2
-         Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
-(6 rows)
+   ->  Materialize
+         ->  Sample Scan on test_tablesample2
+               Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
+(7 rows)
 
 select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
  id | name | salary | id | name | salary 
@@ -315,8 +321,61 @@ select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join
 ----+------+--------+----+------+--------
 (0 rows)
 
+-- test rows estimation of samplescan
+set enable_hashjoin to on;
+create table ss_rows_t1 (a int);
+create table ss_rows_t2 (b int);
+create table ss_rows_t3 (c int);
+insert into ss_rows_t1 values (generate_series(1, 20000));
+insert into ss_rows_t2 values (generate_series(1, 20000));
+insert into ss_rows_t3 values (generate_series(1, 20000));
+explain select a from  ss_rows_t1 tablesample system (99.999999);
+--?.*
+--?.*
+--? Sample Scan on ss_rows_t1  (cost=.* rows=\d{5} width=4)
+   Sampling: system (99.999999::double precision)
+(2 rows)
+
+explain select b from  ss_rows_t2 tablesample bernoulli (99.999999);
+--?.*
+--?.*
+--? Sample Scan on ss_rows_t2  (cost=.* rows=\d{5} width=4)
+   Sampling: bernoulli (99.999999::double precision)
+(2 rows)
+
+explain select c from  ss_rows_t3 tablesample hybrid (99.999999,99.999999);
+--?.*
+--?.*
+--? Sample Scan on ss_rows_t3  (cost=.* rows=\d{5} width=4)
+   Sampling: hybrid (99.999999::double precision, 99.999999::double precision)
+(2 rows)
+
+explain select a, b, c from
+    ss_rows_t1 tablesample system (99.999999) repeatable (325),
+    ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0),
+    ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510)
+    where a = b and b = c;
+--?.*
+--?.*
+--? Hash Join  (.* width=12)
+   Hash Cond: (ss_rows_t1.a = ss_rows_t3.c)
+--?   ->  Hash Join  (.* width=8)
+         Hash Cond: (ss_rows_t1.a = ss_rows_t2.b)
+--?         ->  Sample Scan on ss_rows_t1  (cost=.* rows=\d{5} width=4)
+               Sampling: system (99.999999::double precision) REPEATABLE (325::double precision)
+--?         ->  Hash  (cost=.* rows=\d{5} width=4)
+--?               ->  Sample Scan on ss_rows_t2  (cost=.* rows=\d{5} width=4)
+                     Sampling: bernoulli (99.999999::double precision) REPEATABLE (0::double precision)
+--?   ->  Hash  (cost=.* rows=\d{5} width=4)
+--?         ->  Sample Scan on ss_rows_t3  (cost=.* rows=\d{5} width=4)
+               Sampling: hybrid (99.999999::double precision, 99.999999::double precision) REPEATABLE (510::double precision)
+(12 rows)
+
 reset search_path;
 drop schema  tablesample_schema4 cascade;
-NOTICE:  drop cascades to 2 other objects
+NOTICE:  drop cascades to 5 other objects
 DETAIL:  drop cascades to table tablesample_schema4.test_tablesample
 drop cascades to table tablesample_schema4.test_tablesample2
+drop cascades to table tablesample_schema4.ss_rows_t1
+drop cascades to table tablesample_schema4.ss_rows_t2
+drop cascades to table tablesample_schema4.ss_rows_t3
diff --git a/src/test/regress/sql/tablesample_3.sql b/src/test/regress/sql/tablesample_3.sql
index 696fef561e..f2794ec18e 100755
--- a/src/test/regress/sql/tablesample_3.sql
+++ b/src/test/regress/sql/tablesample_3.sql
@@ -95,5 +95,24 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
 explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
 select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
 
+-- test rows estimation of samplescan
+set enable_hashjoin to on;
+create table ss_rows_t1 (a int);
+create table ss_rows_t2 (b int);
+create table ss_rows_t3 (c int);
+insert into ss_rows_t1 values (generate_series(1, 20000));
+insert into ss_rows_t2 values (generate_series(1, 20000));
+insert into ss_rows_t3 values (generate_series(1, 20000));
+
+explain select a from  ss_rows_t1 tablesample system (99.999999);
+explain select b from  ss_rows_t2 tablesample bernoulli (99.999999);
+explain select c from  ss_rows_t3 tablesample hybrid (99.999999,99.999999);
+
+explain select a, b, c from
+    ss_rows_t1 tablesample system (99.999999) repeatable (325),
+    ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0),
+    ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510)
+    where a = b and b = c;
+
 reset search_path;
 drop schema  tablesample_schema4 cascade;
-- 
Gitee