Skip to content

Commit cf9a6ea

Browse files
authored
Fix Segment NDV number underflow issue (#1387)
For the ratio of NDV, the function `abs` accepts an int value as parameter, the double value `-0.x` cast to `int(0)`, which lead to unefficient plan picked. Fix `abs` -> `fabs`
1 parent 2edd83a commit cf9a6ea

9 files changed

Lines changed: 105 additions & 122 deletions

File tree

contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ set optimizer_force_multistage_agg = on;
188188
select count_operator('select count(*) from multi_stage_test group by b;','GroupAggregate');
189189
count_operator
190190
----------------
191-
1
191+
2
192192
(1 row)
193193

194194
set optimizer_force_multistage_agg = off;

contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -65,20 +65,18 @@ select count(distinct d) from dqa_t1 group by i;
6565
(12 rows)
6666

6767
explain (costs off) select count(distinct d) from dqa_t1 group by i;
68-
QUERY PLAN
69-
------------------------------------------------------------
68+
QUERY PLAN
69+
------------------------------------------------------------------
7070
Gather Motion 3:1 (slice1; segments: 3)
71-
-> Finalize HashAggregate
71+
-> GroupAggregate
7272
Group Key: i
73-
-> Redistribute Motion 3:3 (slice2; segments: 3)
74-
Hash Key: i
75-
-> Partial GroupAggregate
76-
Group Key: i
77-
-> Sort
78-
Sort Key: i, d
79-
-> Seq Scan on dqa_t1
73+
-> Sort
74+
Sort Key: i
75+
-> Redistribute Motion 3:3 (slice2; segments: 3)
76+
Hash Key: i
77+
-> Seq Scan on dqa_t1
8078
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
81-
(11 rows)
79+
(9 rows)
8280

8381
select count(distinct d), sum(distinct d) from dqa_t1 group by i;
8482
count | sum
@@ -98,20 +96,18 @@ select count(distinct d), sum(distinct d) from dqa_t1 group by i;
9896
(12 rows)
9997

10098
explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group by i;
101-
QUERY PLAN
102-
------------------------------------------------------------
99+
QUERY PLAN
100+
------------------------------------------------------------------
103101
Gather Motion 3:1 (slice1; segments: 3)
104-
-> Finalize HashAggregate
102+
-> GroupAggregate
105103
Group Key: i
106-
-> Redistribute Motion 3:3 (slice2; segments: 3)
107-
Hash Key: i
108-
-> Partial GroupAggregate
109-
Group Key: i
110-
-> Sort
111-
Sort Key: i, d
112-
-> Seq Scan on dqa_t1
104+
-> Sort
105+
Sort Key: i
106+
-> Redistribute Motion 3:3 (slice2; segments: 3)
107+
Hash Key: i
108+
-> Seq Scan on dqa_t1
113109
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
114-
(11 rows)
110+
(9 rows)
115111

116112
select count(distinct d), count(distinct dt) from dqa_t1;
117113
count | count
@@ -1909,20 +1905,18 @@ select count(distinct d) from dqa_t1 group by i;
19091905
(12 rows)
19101906

19111907
explain (costs off) select count(distinct d) from dqa_t1 group by i;
1912-
QUERY PLAN
1913-
------------------------------------------------------------
1908+
QUERY PLAN
1909+
------------------------------------------------------------------
19141910
Gather Motion 3:1 (slice1; segments: 3)
1915-
-> Finalize HashAggregate
1911+
-> GroupAggregate
19161912
Group Key: i
1917-
-> Redistribute Motion 3:3 (slice2; segments: 3)
1918-
Hash Key: i
1919-
-> Partial GroupAggregate
1920-
Group Key: i
1921-
-> Sort
1922-
Sort Key: i, d
1923-
-> Seq Scan on dqa_t1
1913+
-> Sort
1914+
Sort Key: i
1915+
-> Redistribute Motion 3:3 (slice2; segments: 3)
1916+
Hash Key: i
1917+
-> Seq Scan on dqa_t1
19241918
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
1925-
(11 rows)
1919+
(9 rows)
19261920

19271921
select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
19281922
count | count | count

contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,12 +1891,11 @@ explain (costs off)
18911891
-> Streaming Partial HashAggregate
18921892
Group Key: share0_ref5.hundred
18931893
-> Shared Scan (share slice:id 5:0)
1894-
-> Finalize HashAggregate
1894+
-> HashAggregate
18951895
Group Key: share0_ref6.thousand
18961896
-> Redistribute Motion 3:3 (slice6; segments: 3)
18971897
Hash Key: share0_ref6.thousand
1898-
-> Streaming Partial HashAggregate
1899-
Group Key: share0_ref6.thousand
1898+
-> Result
19001899
-> Shared Scan (share slice:id 6:0)
19011900
-> HashAggregate
19021901
Group Key: share0_ref7.twothousand
@@ -1908,7 +1907,7 @@ explain (costs off)
19081907
Group Key: share0_ref8.unique1
19091908
-> Shared Scan (share slice:id 1:0)
19101909
Optimizer: Pivotal Optimizer (GPORCA)
1911-
(50 rows)
1910+
(49 rows)
19121911

19131912
explain (costs off)
19141913
select unique1,
@@ -1999,12 +1998,11 @@ explain (costs off)
19991998
-> Streaming Partial HashAggregate
20001999
Group Key: share0_ref5.hundred
20012000
-> Shared Scan (share slice:id 5:0)
2002-
-> Finalize HashAggregate
2001+
-> HashAggregate
20032002
Group Key: share0_ref6.thousand
20042003
-> Redistribute Motion 3:3 (slice6; segments: 3)
20052004
Hash Key: share0_ref6.thousand
2006-
-> Streaming Partial HashAggregate
2007-
Group Key: share0_ref6.thousand
2005+
-> Result
20082006
-> Shared Scan (share slice:id 6:0)
20092007
-> HashAggregate
20102008
Group Key: share0_ref7.twothousand
@@ -2016,7 +2014,7 @@ explain (costs off)
20162014
Group Key: share0_ref8.unique1
20172015
-> Shared Scan (share slice:id 1:0)
20182016
Optimizer: Pivotal Optimizer (GPORCA)
2019-
(50 rows)
2017+
(49 rows)
20202018

20212019
reset hash_mem_multiplier;
20222020
-- check collation-sensitive matching between grouping expressions

contrib/pax_storage/src/test/regress/expected/limit_optimizer.out

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -358,29 +358,28 @@ order by s2 desc;
358358
explain (verbose, costs off)
359359
select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
360360
from tenk1 group by thousand order by thousand limit 3;
361-
QUERY PLAN
362-
-------------------------------------------------------------------------------------------------------------------------------------
361+
QUERY PLAN
362+
-----------------------------------------------------------------------------------------------------------------------------------------
363363
Result
364-
Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision)))
364+
Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random() * '0'::double precision)))
365365
-> Limit
366-
Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))), thousand
366+
Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random() * '0'::double precision))), thousand
367367
-> Gather Motion 3:1 (slice1; segments: 3)
368-
Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))), thousand
368+
Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random() * '0'::double precision))), thousand
369369
Merge Key: thousand
370370
-> Limit
371-
Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))), thousand
372-
-> Finalize GroupAggregate
373-
Output: sum(tenthous), ((sum(tenthous))::double precision + (random() * '0'::double precision)), thousand
374-
Group Key: tenk1.thousand
371+
Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random() * '0'::double precision))), thousand
372+
-> Result
373+
Output: (sum(tenthous)), (((sum(tenthous)))::double precision + (random() * '0'::double precision)), thousand
375374
-> Sort
376-
Output: thousand, (PARTIAL sum(tenthous)), (PARTIAL sum(tenthous))
375+
Output: (sum(tenthous)), (sum(tenthous)), thousand
377376
Sort Key: tenk1.thousand
378-
-> Redistribute Motion 3:3 (slice2; segments: 3)
379-
Output: thousand, (PARTIAL sum(tenthous)), (PARTIAL sum(tenthous))
380-
Hash Key: thousand
381-
-> Streaming Partial HashAggregate
382-
Output: thousand, PARTIAL sum(tenthous), PARTIAL sum(tenthous)
383-
Group Key: tenk1.thousand
377+
-> HashAggregate
378+
Output: sum(tenthous), sum(tenthous), thousand
379+
Group Key: tenk1.thousand
380+
-> Redistribute Motion 3:3 (slice2; segments: 3)
381+
Output: thousand, tenthous
382+
Hash Key: thousand
384383
-> Seq Scan on public.tenk1
385384
Output: thousand, tenthous
386385
Optimizer: Pivotal Optimizer (GPORCA)

src/backend/commands/analyze.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,12 +2871,13 @@ process_sample_rows(Portal portal,
28712871
arrayVal = DatumGetArrayTypeP(funcRetValues[3]);
28722872
deconstruct_array(arrayVal, FLOAT8OID, 8, true, 'd',
28732873
&colndv, &nulls, &numelems);
2874-
for (i = 0; i < relDesc->natts; i++)
2874+
Assert(numelems == relDesc->natts);
2875+
for (i = 0; i < numelems; i++)
28752876
{
28762877
double this_colndv = DatumGetFloat8(colndv[i]);
28772878
if (this_colndv < 0) {
28782879
Assert(this_colndv >= -1);
2879-
colNDVBySeg[i] += abs(this_colndv) * this_totalrows;
2880+
colNDVBySeg[i] += fabs(this_colndv) * this_totalrows;
28802881
} else {
28812882
/* if current segment have any data, then ndv won't be 0.
28822883
* if current segment have no rows, ndv is 0.

src/test/regress/expected/bfv_aggregate_optimizer.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ set optimizer_force_multistage_agg = on;
188188
select count_operator('select count(*) from multi_stage_test group by b;','GroupAggregate');
189189
count_operator
190190
----------------
191-
1
191+
2
192192
(1 row)
193193

194194
set optimizer_force_multistage_agg = off;

src/test/regress/expected/gp_dqa_optimizer.out

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -66,20 +66,18 @@ select count(distinct d) from dqa_t1 group by i;
6666
(12 rows)
6767

6868
explain (costs off) select count(distinct d) from dqa_t1 group by i;
69-
QUERY PLAN
70-
------------------------------------------------------------
69+
QUERY PLAN
70+
------------------------------------------------------------------
7171
Gather Motion 3:1 (slice1; segments: 3)
72-
-> Finalize HashAggregate
72+
-> GroupAggregate
7373
Group Key: i
74-
-> Redistribute Motion 3:3 (slice2; segments: 3)
75-
Hash Key: i
76-
-> Partial GroupAggregate
77-
Group Key: i
78-
-> Sort
79-
Sort Key: i, d
80-
-> Seq Scan on dqa_t1
81-
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
82-
(11 rows)
74+
-> Sort
75+
Sort Key: i
76+
-> Redistribute Motion 3:3 (slice2; segments: 3)
77+
Hash Key: i
78+
-> Seq Scan on dqa_t1
79+
Optimizer: GPORCA
80+
(9 rows)
8381

8482
select count(distinct d), sum(distinct d) from dqa_t1 group by i;
8583
count | sum
@@ -99,20 +97,18 @@ select count(distinct d), sum(distinct d) from dqa_t1 group by i;
9997
(12 rows)
10098

10199
explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group by i;
102-
QUERY PLAN
103-
------------------------------------------------------------
100+
QUERY PLAN
101+
------------------------------------------------------------------
104102
Gather Motion 3:1 (slice1; segments: 3)
105-
-> Finalize HashAggregate
103+
-> GroupAggregate
106104
Group Key: i
107-
-> Redistribute Motion 3:3 (slice2; segments: 3)
108-
Hash Key: i
109-
-> Partial GroupAggregate
110-
Group Key: i
111-
-> Sort
112-
Sort Key: i, d
113-
-> Seq Scan on dqa_t1
114-
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
115-
(11 rows)
105+
-> Sort
106+
Sort Key: i
107+
-> Redistribute Motion 3:3 (slice2; segments: 3)
108+
Hash Key: i
109+
-> Seq Scan on dqa_t1
110+
Optimizer: GPORCA
111+
(9 rows)
116112

117113
select count(distinct d), count(distinct dt) from dqa_t1;
118114
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
@@ -2022,20 +2018,18 @@ select count(distinct d) from dqa_t1 group by i;
20222018
(12 rows)
20232019

20242020
explain (costs off) select count(distinct d) from dqa_t1 group by i;
2025-
QUERY PLAN
2026-
------------------------------------------------------------
2021+
QUERY PLAN
2022+
------------------------------------------------------------------
20272023
Gather Motion 3:1 (slice1; segments: 3)
2028-
-> Finalize HashAggregate
2024+
-> GroupAggregate
20292025
Group Key: i
2030-
-> Redistribute Motion 3:3 (slice2; segments: 3)
2031-
Hash Key: i
2032-
-> Partial GroupAggregate
2033-
Group Key: i
2034-
-> Sort
2035-
Sort Key: i, d
2036-
-> Seq Scan on dqa_t1
2037-
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
2038-
(11 rows)
2026+
-> Sort
2027+
Sort Key: i
2028+
-> Redistribute Motion 3:3 (slice2; segments: 3)
2029+
Hash Key: i
2030+
-> Seq Scan on dqa_t1
2031+
Optimizer: GPORCA
2032+
(9 rows)
20392033

20402034
select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
20412035
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner

src/test/regress/expected/groupingsets_optimizer.out

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,12 +1968,11 @@ explain (costs off)
19681968
-> Streaming Partial HashAggregate
19691969
Group Key: share0_ref5.hundred
19701970
-> Shared Scan (share slice:id 5:0)
1971-
-> Finalize HashAggregate
1971+
-> HashAggregate
19721972
Group Key: share0_ref6.thousand
19731973
-> Redistribute Motion 3:3 (slice6; segments: 3)
19741974
Hash Key: share0_ref6.thousand
1975-
-> Streaming Partial HashAggregate
1976-
Group Key: share0_ref6.thousand
1975+
-> Result
19771976
-> Shared Scan (share slice:id 6:0)
19781977
-> HashAggregate
19791978
Group Key: share0_ref7.twothousand
@@ -1985,7 +1984,7 @@ explain (costs off)
19851984
Group Key: share0_ref8.unique1
19861985
-> Shared Scan (share slice:id 1:0)
19871986
Optimizer: Pivotal Optimizer (GPORCA)
1988-
(50 rows)
1987+
(49 rows)
19891988

19901989
explain (costs off)
19911990
select unique1,
@@ -2076,12 +2075,11 @@ explain (costs off)
20762075
-> Streaming Partial HashAggregate
20772076
Group Key: share0_ref5.hundred
20782077
-> Shared Scan (share slice:id 5:0)
2079-
-> Finalize HashAggregate
2078+
-> HashAggregate
20802079
Group Key: share0_ref6.thousand
20812080
-> Redistribute Motion 3:3 (slice6; segments: 3)
20822081
Hash Key: share0_ref6.thousand
2083-
-> Streaming Partial HashAggregate
2084-
Group Key: share0_ref6.thousand
2082+
-> Result
20852083
-> Shared Scan (share slice:id 6:0)
20862084
-> HashAggregate
20872085
Group Key: share0_ref7.twothousand
@@ -2093,7 +2091,7 @@ explain (costs off)
20932091
Group Key: share0_ref8.unique1
20942092
-> Shared Scan (share slice:id 1:0)
20952093
Optimizer: Pivotal Optimizer (GPORCA)
2096-
(50 rows)
2094+
(49 rows)
20972095

20982096
reset hash_mem_multiplier;
20992097
-- check collation-sensitive matching between grouping expressions

0 commit comments

Comments
 (0)