Skip to content

Commit fac6611

Browse files
authored
Fix colNDVBySeg attnum index mismatch in column-specific ANALYZE (#1680)
* Fix colNDVBySeg index mismatch in do_analyze_rel When ANALYZE is run on specific columns (e.g., ANALYZE t (col)) or when a table has dropped columns, the vacattrstats loop index `i` diverges from the attribute's actual attnum-1 index used by colNDVBySeg. Two fixes: 1. QD side (line 887): read colNDVBySeg[attnum-1] instead of colNDVBySeg[i] when storing stadistinctbyseg. 2. Segment side (line 1011): write ctx->stadistincts[attnum-1] instead of ctx->stadistincts[i] when collecting per-segment NDV. * Add regression test for colNDVBySeg index mismatch in do_analyze_rel ANALYZE t(b) puts column b at loop index i=0 on the QD, but b has attnum=2, so attnum-1=1 != i=0. The fix in do_analyze_rel (using attnum-1 instead of i to index colNDVBySeg) ensures stadistinctbyseg is read from the correct per-segment NDV slot. Test verifies stadistinctbyseg for column b equals 100 (all distinct) rather than ~5 (NDV of column a at index 0).
1 parent bff7715 commit fac6611

3 files changed

Lines changed: 52 additions & 2 deletions

File tree

src/backend/commands/analyze.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
884884

885885
if (Gp_role == GP_ROLE_DISPATCH && GpPolicyIsPartitioned(onerel->rd_cdbpolicy))
886886
{
887-
stats->stadistinctbyseg = colNDVBySeg[i];
887+
stats->stadistinctbyseg = colNDVBySeg[stats->attr->attnum - 1];
888888
}
889889

890890
stats->tupDesc = onerel->rd_att;
@@ -1008,7 +1008,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
10081008

10091009
if (Gp_role == GP_ROLE_EXECUTE) {
10101010
Assert(ctx->stadistincts);
1011-
ctx->stadistincts[i] = Float8GetDatum(stats->stadistinct);
1011+
ctx->stadistincts[stats->attr->attnum - 1] = Float8GetDatum(stats->stadistinct);
10121012
}
10131013

10141014
MemoryContextResetAndDeleteChildren(col_context);

src/test/regress/expected/analyze.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,3 +1314,30 @@ select * from pg_stats where tablename like 'part2';
13141314
(1 row)
13151315

13161316
drop table multipart cascade;
1317+
--
1318+
-- Test column-specific ANALYZE correctly uses attnum-based NDV index (not loop index).
1319+
-- When ANALYZE t(b) is run, the QD loop has i=0 for column b (attnum=2),
1320+
-- so attnum-1=1 != i=0. Without the fix, colNDVBySeg[i=0] reads column a's NDV
1321+
-- instead of column b's NDV.
1322+
--
1323+
CREATE TABLE analyze_col_ndv_drop (a int, b int, c int) DISTRIBUTED BY (a);
1324+
INSERT INTO analyze_col_ndv_drop SELECT i%5, i, i%50 FROM generate_series(1, 100) i;
1325+
-- ANALYZE specific column b: QD loop has i=0, b.attnum=2, so attnum-1=1 != i=0
1326+
ANALYZE analyze_col_ndv_drop (b);
1327+
-- stadistinctbyseg for b should be 100 (all distinct), not ~5 (NDV of column a at index 0)
1328+
SELECT a.attname,
1329+
CASE WHEN s.stakind1 = 8 THEN array_to_string(s.stavalues1, ',')
1330+
WHEN s.stakind2 = 8 THEN array_to_string(s.stavalues2, ',')
1331+
WHEN s.stakind3 = 8 THEN array_to_string(s.stavalues3, ',')
1332+
WHEN s.stakind4 = 8 THEN array_to_string(s.stavalues4, ',')
1333+
WHEN s.stakind5 = 8 THEN array_to_string(s.stavalues5, ',')
1334+
END AS stadistinctbyseg
1335+
FROM pg_statistic s
1336+
JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum
1337+
WHERE s.starelid = 'analyze_col_ndv_drop'::regclass AND a.attname = 'b';
1338+
attname | stadistinctbyseg
1339+
---------+------------------
1340+
b | 100
1341+
(1 row)
1342+
1343+
DROP TABLE analyze_col_ndv_drop;

src/test/regress/sql/analyze.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,3 +677,26 @@ analyze verbose p2;
677677
select * from pg_stats where tablename like 'part2';
678678

679679
drop table multipart cascade;
680+
681+
--
682+
-- Test column-specific ANALYZE correctly uses attnum-based NDV index (not loop index).
683+
-- When ANALYZE t(b) is run, the QD loop has i=0 for column b (attnum=2),
684+
-- so attnum-1=1 != i=0. Without the fix, colNDVBySeg[i=0] reads column a's NDV
685+
-- instead of column b's NDV.
686+
--
687+
CREATE TABLE analyze_col_ndv_drop (a int, b int, c int) DISTRIBUTED BY (a);
688+
INSERT INTO analyze_col_ndv_drop SELECT i%5, i, i%50 FROM generate_series(1, 100) i;
689+
-- ANALYZE specific column b: QD loop has i=0, b.attnum=2, so attnum-1=1 != i=0
690+
ANALYZE analyze_col_ndv_drop (b);
691+
-- stadistinctbyseg for b should be 100 (all distinct), not ~5 (NDV of column a at index 0)
692+
SELECT a.attname,
693+
CASE WHEN s.stakind1 = 8 THEN array_to_string(s.stavalues1, ',')
694+
WHEN s.stakind2 = 8 THEN array_to_string(s.stavalues2, ',')
695+
WHEN s.stakind3 = 8 THEN array_to_string(s.stavalues3, ',')
696+
WHEN s.stakind4 = 8 THEN array_to_string(s.stavalues4, ',')
697+
WHEN s.stakind5 = 8 THEN array_to_string(s.stavalues5, ',')
698+
END AS stadistinctbyseg
699+
FROM pg_statistic s
700+
JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum
701+
WHERE s.starelid = 'analyze_col_ndv_drop'::regclass AND a.attname = 'b';
702+
DROP TABLE analyze_col_ndv_drop;

0 commit comments

Comments
 (0)