Skip to content

Commit 2edd83a

Browse files
yjhjstzmy-ship-it
authored andcommitted
[ORCA] Fix segmentation fault when appending group statistics
Previously, if a group reused statistics from its duplicate, appending new statistics could cause a segmentation fault when trying to release a NULL pointer. Fixed by appending directly to the duplicate's statistics instead.
1 parent f799d10 commit 2edd83a

6 files changed

Lines changed: 345 additions & 0 deletions

File tree

src/backend/gporca/libgpopt/src/search/CGroup.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,12 @@ CGroup::AppendStats(CMemoryPool *mp, IStatistics *stats)
861861
GPOS_ASSERT(nullptr != stats);
862862
GPOS_ASSERT(nullptr != Pstats());
863863

864+
if (FDuplicateGroup())
865+
{
866+
PgroupDuplicate()->AppendStats(mp, stats);
867+
return;
868+
}
869+
864870
IStatistics *stats_copy = Pstats()->CopyStats(mp);
865871
stats_copy->AppendStats(mp, stats);
866872

src/backend/gporca/libgpopt/src/search/CGroupExpression.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,16 @@ CGroupExpression::OsPrintCostContexts(IOstream &os, const CHAR *szPrefix) const
10981098
// 4: CLogicalInnerJoin [ 6 7 3 ] Origin: (xform: CXformExpandNAryJoinGreedy, Grp: 4, GrpExpr: 3)
10991099
//
11001100
// Group 0 (#GExprs: 0, Duplicate Group: 4):
1101+
//
1102+
// There is also a chance that one of the group's expressions refers to the same group.
1103+
// This can happen if one of the groups has a link to another group, which in turn is its duplicate.
1104+
// Example: Group 0 is a duplicate of group 12, while it has an expression 1
1105+
// that refers to group 12. After the groups merge, this expression will end up
1106+
// in group 12, which will lead to a cyclical relationship.
1107+
// Group 0 (#GExprs: 3, Duplicate Group: 12):
1108+
// 0: CLogicalCTEConsumer (1), Columns: ["a" (98), "b" (99), "c" (100)] [ ]
1109+
// 1: CLogicalSelect [ 12 3 ]
1110+
// 2: CLogicalNAryJoin [ 13 14 15 ]
11011111
BOOL
11021112
CGroupExpression::ContainsCircularDependencies()
11031113
{
@@ -1118,6 +1128,14 @@ CGroupExpression::ContainsCircularDependencies()
11181128
{
11191129
continue;
11201130
}
1131+
1132+
if (child_group->Id() == m_pgroup->Id())
1133+
{
1134+
m_ecirculardependency = CGroupExpression::ecdCircularDependency;
1135+
GPOS_ASSERT(Pgroup()->UlGExprs() > 1);
1136+
break;
1137+
}
1138+
11211139
CGroup *child_duplicate_group = child_group->PgroupDuplicate();
11221140
if (child_duplicate_group != nullptr)
11231141
{

src/backend/gporca/server/src/unittest/gpopt/base/CGroupTest.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,19 +99,39 @@ CGroupTest::EresUnittest_FResetStatsOnCGroupWithDuplicateGroup()
9999
return GPOS_FAILED;
100100
}
101101

102+
CStatistics *stat =
103+
GPOS_NEW(mp) CStatistics(mp, GPOS_NEW(mp) UlongToHistogramMap(mp),
104+
GPOS_NEW(mp) UlongToDoubleMap(mp), 0, false);
105+
106+
IStatistics *oldStats = pmemo->Pgroup(0)->Pstats();
107+
pmemo->Pgroup(0)->AppendStats(mp, stat);
108+
109+
// By appending stats on group (0), we really are appending the stats on
110+
// group (1). group (0) stats is never set in the first place.
111+
if (oldStats == pmemo->Pgroup(1)->Pstats())
112+
{
113+
stat->Release();
114+
GPOS_DELETE(pmemo);
115+
pexprGet1->Release();
116+
pexprGet2->Release();
117+
return GPOS_FAILED;
118+
}
119+
102120
pmemo->Pgroup(0)->FResetStats();
103121

104122
// After resetting stats on group (0), we should have also reset stats on
105123
// group (1)
106124
if (pmemo->Pgroup(0)->Pstats() != nullptr ||
107125
pmemo->Pgroup(1)->Pstats() != nullptr)
108126
{
127+
stat->Release();
109128
GPOS_DELETE(pmemo);
110129
pexprGet1->Release();
111130
pexprGet2->Release();
112131
return GPOS_FAILED;
113132
}
114133

134+
stat->Release();
115135
GPOS_DELETE(pmemo);
116136
pexprGet1->Release();
117137
pexprGet2->Release();

src/test/regress/expected/gporca.out

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15012,3 +15012,144 @@ explain (costs off) select max(s1) from foo inner join bar on j1 = j2 group by g
1501215012
drop table foo;
1501315013
drop table bar;
1501415014
reset optimizer_enable_eageragg;
15015+
--
15016+
-- Test CTE with nested joins to verify fix for infinite recursion during statistic derivation
15017+
-- This test case exercises ORCA's ability to handle CTEs with complex join patterns
15018+
-- and redistribution motions without falling into infinite recursion
15019+
--
15020+
create table cte_test1 (a int, b int, c int) distributed randomly;
15021+
create table cte_test2 (a int, b int, c int) distributed randomly;
15022+
create table cte_test3 (a int, b int, c int) distributed randomly;
15023+
explain (costs off) with cte1 as (
15024+
select cte_test1.a, cte_test2.b, cte_test1.c from cte_test1 inner join cte_test2 on cte_test1.a = cte_test2.b
15025+
),
15026+
cte2 as (select * from cte1)
15027+
select * from cte2 inner join cte_test3 on cte2.c = cte_test3.a;
15028+
QUERY PLAN
15029+
------------------------------------------------------------------------
15030+
Gather Motion 3:1 (slice1; segments: 3)
15031+
-> Hash Join
15032+
Hash Cond: (cte_test1.c = cte_test3.a)
15033+
-> Hash Join
15034+
Hash Cond: (cte_test1.a = cte_test2.b)
15035+
-> Redistribute Motion 3:3 (slice2; segments: 3)
15036+
Hash Key: cte_test1.a
15037+
-> Seq Scan on cte_test1
15038+
-> Hash
15039+
-> Redistribute Motion 3:3 (slice3; segments: 3)
15040+
Hash Key: cte_test2.b
15041+
-> Seq Scan on cte_test2
15042+
-> Hash
15043+
-> Broadcast Motion 3:3 (slice4; segments: 3)
15044+
-> Seq Scan on cte_test3
15045+
Optimizer: Postgres query optimizer
15046+
(16 rows)
15047+
15048+
drop table cte_test1;
15049+
drop table cte_test2;
15050+
drop table cte_test3;
15051+
-- start_ignore
15052+
DROP SCHEMA orca CASCADE;
15053+
NOTICE: drop cascades to 190 other objects
15054+
DETAIL: drop cascades to table bar1
15055+
drop cascades to table bar2
15056+
drop cascades to table r
15057+
drop cascades to table s
15058+
drop cascades to table m
15059+
drop cascades to table m1
15060+
drop cascades to table orca_w1
15061+
drop cascades to table orca_w2
15062+
drop cascades to table orca_w3
15063+
drop cascades to table rcte
15064+
drop cascades to table onek
15065+
drop cascades to table pp
15066+
drop cascades to table multilevel_p
15067+
drop cascades to table t
15068+
drop cascades to table t_date
15069+
drop cascades to table t_text
15070+
drop cascades to table t_ceeval_ints
15071+
drop cascades to function csq_f(integer)
15072+
drop cascades to table csq_r
15073+
drop cascades to table fooh1
15074+
drop cascades to table fooh2
15075+
drop cascades to table t77
15076+
drop cascades to table prod9
15077+
drop cascades to table toanalyze
15078+
drop cascades to table ur
15079+
drop cascades to table us
15080+
drop cascades to table ut
15081+
drop cascades to table uu
15082+
drop cascades to table twf1
15083+
drop cascades to table twf2
15084+
drop cascades to table tab1
15085+
drop cascades to table tab2
15086+
drop cascades to function sum_sfunc(anyelement,anyelement)
15087+
drop cascades to function sum_combinefunc(anyelement,anyelement)
15088+
drop cascades to function myagg1(anyelement)
15089+
drop cascades to function sum_sfunc2(anyelement,anyelement,anyelement)
15090+
drop cascades to function myagg2(anyelement,anyelement)
15091+
drop cascades to function gptfp(anyarray,anyelement)
15092+
drop cascades to function gpffp(anyarray)
15093+
drop cascades to function myagg3(anyelement)
15094+
drop cascades to table array_table
15095+
drop cascades to table mpp22453
15096+
drop cascades to table mpp22791
15097+
drop cascades to table p1
15098+
drop cascades to table tmp_verd_s_pp_provtabs_agt_0015_extract1
15099+
drop cascades to table arrtest
15100+
drop cascades to table foo_missing_stats
15101+
drop cascades to table bar_missing_stats
15102+
drop cascades to table table_with_small_statistic_precision_diff
15103+
drop cascades to table cust
15104+
drop cascades to table datedim
15105+
drop cascades to function plusone(integer)
15106+
drop cascades to table bm_test
15107+
drop cascades to table bm_dyn_test
15108+
drop cascades to table bm_dyn_test_onepart
15109+
drop cascades to table bm_dyn_test_multilvl_part
15110+
drop cascades to table my_tt_agg_opt
15111+
drop cascades to table my_tq_agg_opt_part
15112+
drop cascades to function plusone(numeric)
15113+
drop cascades to table ggg
15114+
drop cascades to table t3
15115+
drop cascades to table index_test
15116+
drop cascades to table btree_test
15117+
drop cascades to table bitmap_test
15118+
drop cascades to type rainbow
15119+
drop cascades to table foo_ctas
15120+
drop cascades to table input_tab1
15121+
drop cascades to table input_tab2
15122+
drop cascades to table tab_1
15123+
drop cascades to table tab_2
15124+
drop cascades to table tab_3
15125+
drop cascades to table t_outer
15126+
drop cascades to table t_inner
15127+
drop cascades to table wst0
15128+
drop cascades to table wst1
15129+
drop cascades to table wst2
15130+
drop cascades to table test1
15131+
drop cascades to table t_new
15132+
drop cascades to table x_tab
15133+
drop cascades to table y_tab
15134+
drop cascades to table z_tab
15135+
drop cascades to function test_func_pg_stats()
15136+
drop cascades to function myintin(cstring)
15137+
drop cascades to type myint
15138+
drop cascades to function myintout(myint)
15139+
drop cascades to function myint_int8(myint)
15140+
drop cascades to table csq_cast_param_outer
15141+
drop cascades to table csq_cast_param_inner
15142+
drop cascades to function myint_numeric(myint)
15143+
drop cascades to cast from myint to numeric
15144+
drop cascades to table onetimefilter1
15145+
drop cascades to table onetimefilter2
15146+
drop cascades to table ffoo
15147+
drop cascades to table fbar
15148+
drop cascades to table touter
15149+
drop cascades to table tinnerbitmap
15150+
drop cascades to table tinnerbtree
15151+
drop cascades to table ds_part
15152+
drop cascades to table non_part1
15153+
drop cascades to table non_part2
15154+
and 90 other objects (see server log for list)
15155+
-- end_ignore

src/test/regress/expected/gporca_optimizer.out

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15049,3 +15049,144 @@ explain (costs off) select max(s1) from foo inner join bar on j1 = j2 group by g
1504915049
drop table foo;
1505015050
drop table bar;
1505115051
reset optimizer_enable_eageragg;
15052+
--
15053+
-- Test CTE with nested joins to verify fix for infinite recursion during statistic derivation
15054+
-- This test case exercises ORCA's ability to handle CTEs with complex join patterns
15055+
-- and redistribution motions without falling into infinite recursion
15056+
--
15057+
create table cte_test1 (a int, b int, c int) distributed randomly;
15058+
create table cte_test2 (a int, b int, c int) distributed randomly;
15059+
create table cte_test3 (a int, b int, c int) distributed randomly;
15060+
explain (costs off) with cte1 as (
15061+
select cte_test1.a, cte_test2.b, cte_test1.c from cte_test1 inner join cte_test2 on cte_test1.a = cte_test2.b
15062+
),
15063+
cte2 as (select * from cte1)
15064+
select * from cte2 inner join cte_test3 on cte2.c = cte_test3.a;
15065+
QUERY PLAN
15066+
---------------------------------------------------------------------------
15067+
Gather Motion 3:1 (slice1; segments: 3)
15068+
-> Hash Join
15069+
Hash Cond: (cte_test1.c = cte_test3.a)
15070+
-> Redistribute Motion 3:3 (slice2; segments: 3)
15071+
Hash Key: cte_test1.c
15072+
-> Hash Join
15073+
Hash Cond: (cte_test1.a = cte_test2.b)
15074+
-> Seq Scan on cte_test1
15075+
-> Hash
15076+
-> Broadcast Motion 3:3 (slice3; segments: 3)
15077+
-> Seq Scan on cte_test2
15078+
-> Hash
15079+
-> Redistribute Motion 3:3 (slice4; segments: 3)
15080+
Hash Key: cte_test3.a
15081+
-> Seq Scan on cte_test3
15082+
Optimizer: GPORCA
15083+
(16 rows)
15084+
15085+
drop table cte_test1;
15086+
drop table cte_test2;
15087+
drop table cte_test3;
15088+
-- start_ignore
15089+
DROP SCHEMA orca CASCADE;
15090+
NOTICE: drop cascades to 190 other objects
15091+
DETAIL: drop cascades to table bar1
15092+
drop cascades to table bar2
15093+
drop cascades to table r
15094+
drop cascades to table s
15095+
drop cascades to table m
15096+
drop cascades to table m1
15097+
drop cascades to table orca_w1
15098+
drop cascades to table orca_w2
15099+
drop cascades to table orca_w3
15100+
drop cascades to table rcte
15101+
drop cascades to table onek
15102+
drop cascades to table pp
15103+
drop cascades to table multilevel_p
15104+
drop cascades to table t
15105+
drop cascades to table t_date
15106+
drop cascades to table t_text
15107+
drop cascades to table t_ceeval_ints
15108+
drop cascades to function csq_f(integer)
15109+
drop cascades to table csq_r
15110+
drop cascades to table fooh1
15111+
drop cascades to table fooh2
15112+
drop cascades to table t77
15113+
drop cascades to table prod9
15114+
drop cascades to table toanalyze
15115+
drop cascades to table ur
15116+
drop cascades to table us
15117+
drop cascades to table ut
15118+
drop cascades to table uu
15119+
drop cascades to table twf1
15120+
drop cascades to table twf2
15121+
drop cascades to table tab1
15122+
drop cascades to table tab2
15123+
drop cascades to function sum_sfunc(anyelement,anyelement)
15124+
drop cascades to function sum_combinefunc(anyelement,anyelement)
15125+
drop cascades to function myagg1(anyelement)
15126+
drop cascades to function sum_sfunc2(anyelement,anyelement,anyelement)
15127+
drop cascades to function myagg2(anyelement,anyelement)
15128+
drop cascades to function gptfp(anyarray,anyelement)
15129+
drop cascades to function gpffp(anyarray)
15130+
drop cascades to function myagg3(anyelement)
15131+
drop cascades to table array_table
15132+
drop cascades to table mpp22453
15133+
drop cascades to table mpp22791
15134+
drop cascades to table p1
15135+
drop cascades to table tmp_verd_s_pp_provtabs_agt_0015_extract1
15136+
drop cascades to table arrtest
15137+
drop cascades to table foo_missing_stats
15138+
drop cascades to table bar_missing_stats
15139+
drop cascades to table table_with_small_statistic_precision_diff
15140+
drop cascades to table cust
15141+
drop cascades to table datedim
15142+
drop cascades to function plusone(integer)
15143+
drop cascades to table bm_test
15144+
drop cascades to table bm_dyn_test
15145+
drop cascades to table bm_dyn_test_onepart
15146+
drop cascades to table bm_dyn_test_multilvl_part
15147+
drop cascades to table my_tt_agg_opt
15148+
drop cascades to table my_tq_agg_opt_part
15149+
drop cascades to function plusone(numeric)
15150+
drop cascades to table ggg
15151+
drop cascades to table t3
15152+
drop cascades to table index_test
15153+
drop cascades to table btree_test
15154+
drop cascades to table bitmap_test
15155+
drop cascades to type rainbow
15156+
drop cascades to table foo_ctas
15157+
drop cascades to table input_tab1
15158+
drop cascades to table input_tab2
15159+
drop cascades to table tab_1
15160+
drop cascades to table tab_2
15161+
drop cascades to table tab_3
15162+
drop cascades to table t_outer
15163+
drop cascades to table t_inner
15164+
drop cascades to table wst0
15165+
drop cascades to table wst1
15166+
drop cascades to table wst2
15167+
drop cascades to table test1
15168+
drop cascades to table t_new
15169+
drop cascades to table x_tab
15170+
drop cascades to table y_tab
15171+
drop cascades to table z_tab
15172+
drop cascades to function test_func_pg_stats()
15173+
drop cascades to function myintin(cstring)
15174+
drop cascades to type myint
15175+
drop cascades to function myintout(myint)
15176+
drop cascades to function myint_int8(myint)
15177+
drop cascades to table csq_cast_param_outer
15178+
drop cascades to table csq_cast_param_inner
15179+
drop cascades to function myint_numeric(myint)
15180+
drop cascades to cast from myint to numeric
15181+
drop cascades to table onetimefilter1
15182+
drop cascades to table onetimefilter2
15183+
drop cascades to table ffoo
15184+
drop cascades to table fbar
15185+
drop cascades to table touter
15186+
drop cascades to table tinnerbitmap
15187+
drop cascades to table tinnerbtree
15188+
drop cascades to table ds_part
15189+
drop cascades to table non_part1
15190+
drop cascades to table non_part2
15191+
and 90 other objects (see server log for list)
15192+
-- end_ignore

0 commit comments

Comments
 (0)