Skip to content

Commit a43da9b

Browse files
huansongyjhjstz
authored andcommitted
Fix possible inconsistency between bitmap LOV table and index
Similar to gp_fastsequence, there was a potential inconsistency between the bitmap LOV table and its index due to the frozen insert. Now we fix the inconsistency by similar method as in 961de2da40858ed302b2a656b5258aa8d17c87b9
1 parent 9544655 commit a43da9b

4 files changed

Lines changed: 318 additions & 3 deletions

File tree

src/backend/access/bitmap/bitmapattutil.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "nodes/makefuncs.h"
4444
#include "optimizer/clauses.h"
4545
#include "utils/builtins.h"
46+
#include "utils/faultinjector.h"
4647
#include "utils/lsyscache.h"
4748
#include "utils/syscache.h"
4849
#include "utils/snapmgr.h"
@@ -339,7 +340,7 @@ _bitmap_insert_lov(Relation lovHeap, Relation lovIndex, Datum *datum,
339340

340341
/* insert this tuple into the heap */
341342
tuple = heap_form_tuple(tupDesc, datum, nulls);
342-
frozen_heap_insert(lovHeap, tuple);
343+
simple_heap_insert(lovHeap, tuple);
343344

344345
/* insert a new tuple into the index */
345346
indexDatum = palloc0((tupDesc->natts - 2) * sizeof(Datum));
@@ -349,6 +350,23 @@ _bitmap_insert_lov(Relation lovHeap, Relation lovIndex, Datum *datum,
349350
result = index_insert(lovIndex, indexDatum, indexNulls,
350351
&(tuple->t_self), lovHeap, true, false, NULL);
351352

353+
#ifdef FAULT_INJECTOR
354+
FaultInjector_InjectFaultIfSet(
355+
"insert_bmlov_before_freeze",
356+
DDLNotSpecified,
357+
"", //databaseName
358+
RelationGetRelationName(lovHeap));
359+
#endif
360+
/* freeze the tuple */
361+
heap_freeze_tuple_wal_logged(lovHeap, tuple);
362+
363+
#ifdef FAULT_INJECTOR
364+
FaultInjector_InjectFaultIfSet(
365+
"insert_bmlov_after_freeze",
366+
DDLNotSpecified,
367+
"", //databaseName
368+
RelationGetRelationName(lovHeap));
369+
#endif
352370
pfree(indexDatum);
353371
pfree(indexNulls);
354372
Assert(result);

src/backend/tcop/postgres.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5794,6 +5794,7 @@ PostgresMain(int argc, char *argv[],
57945794

57955795
SetUserIdAndSecContext(GetOuterUserId(), 0);
57965796

5797+
SIMPLE_FAULT_INJECTOR("qe_exec_finished");
57975798
send_ready_for_query = true;
57985799
}
57995800
break;

src/test/isolation2/expected/frozen_insert_crash.out

Lines changed: 202 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
--
77
-- And the above behavior should remain consistent using seqscan or indexscan.
88
--
9-
-- We test gp_fastsequence here since it does frozen insert and has an index.
9+
-- We test gp_fastsequence and bitmap here since they do frozen insert and
10+
-- normal index insert, so that the inconsistency could exist.
1011

1112
-- Case 1. crash after the regular MVCC insert has made to disk, but not
1213
-- the WAL record responsible for updating it to frozen.
@@ -243,6 +244,206 @@ END
243244
0 | 2 | 88
244245
(2 rows)
245246

247+
-- Same set of tests for bitmap LOV insert.
248+
create extension if not exists pageinspect;
249+
CREATE
250+
251+
-- Function to check the bitmap lov content regarding the column 'b'
252+
-- which is the table column that we will have bitmap created on.
253+
-- Basically, we want to see if "SELECT b FROM pg_bitmapindex.pg_bm_xxx"
254+
-- returns the same result in seqscan and indexscan.
255+
CREATE OR REPLACE FUNCTION insert_bm_lov_res() RETURNS void AS $$ DECLARE lov_table text; /* in func */ sql text; /* in func */ BEGIN /* in func */ drop table if exists bm_lov_res; /* in func */ create temp table bm_lov_res(b int); /* in func */ SELECT c.relname INTO lov_table /* in func */ FROM bm_metap('tab_fi_idx') b /* in func */ JOIN pg_class c ON b.auxrelid = c.oid; /* in func */ sql := format('INSERT INTO bm_lov_res SELECT b FROM pg_bitmapindex.%I', lov_table); /* in func */ EXECUTE sql; /* in func */ END; /* in func */ $$ LANGUAGE plpgsql;
256+
CREATE
257+
258+
1: create table tab_fi(a int, b int) with (appendoptimized=true) distributed replicated;
259+
CREATE
260+
1: create index tab_fi_idx on tab_fi using bitmap(b);
261+
CREATE
262+
1: insert into tab_fi values(1, 1);
263+
INSERT 1
264+
-- switch WAL on seg0 to reduce flakiness
265+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
266+
gp_segment_id | ?column?
267+
---------------+----------
268+
0 | t
269+
(1 row)
270+
271+
-- case 1: suspend and flush WAL before freezing the tuple
272+
273+
-- suspend right after the insert into the bitmap lov table and its index
274+
-- during a table insert, but before freezing the tuple
275+
1: select gp_inject_fault('insert_bmlov_before_freeze', 'suspend', dbid) from gp_segment_configuration where role = 'p' and content = 0;
276+
gp_inject_fault
277+
-----------------
278+
Success:
279+
(1 row)
280+
2>: insert into tab_fi values(2, 2); <waiting ...>
281+
1: select gp_wait_until_triggered_fault('insert_bmlov_before_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0;
282+
gp_wait_until_triggered_fault
283+
-------------------------------
284+
Success:
285+
(1 row)
286+
-- switch WAL on seg0, so the new row gets flushed (including its index)
287+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
288+
gp_segment_id | ?column?
289+
---------------+----------
290+
0 | t
291+
(1 row)
292+
-- inject a panic, and resume the insert. The WAL for the freeze operation is not
293+
-- going to be made to disk (we just flushed WALs), so we won't replay it during restart later.
294+
-- skip FTS probe to prevent unexpected mirror promotion
295+
1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
296+
gp_inject_fault_infinite
297+
--------------------------
298+
Success:
299+
(1 row)
300+
1: select gp_inject_fault('qe_exec_finished', 'panic', dbid) from gp_segment_configuration where role = 'p' and content = 0;
301+
gp_inject_fault
302+
-----------------
303+
Success:
304+
(1 row)
305+
1: select gp_inject_fault('insert_bmlov_before_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0;
306+
gp_inject_fault
307+
-----------------
308+
Success:
309+
(1 row)
310+
1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
311+
gp_inject_fault
312+
-----------------
313+
Success:
314+
(1 row)
315+
2<: <... completed>
316+
ERROR: fault triggered, fault name:'qe_exec_finished' fault type:'panic'
317+
1q: ... <quitting>
318+
-- check the lov table content w/ table vs index scan, neither should see the
319+
-- new inserted row (b=2)
320+
0U: set enable_indexscan = on;
321+
SET
322+
0U: set enable_seqscan = off;
323+
SET
324+
0U: select insert_bm_lov_res();
325+
insert_bm_lov_res
326+
-------------------
327+
328+
(1 row)
329+
0U: select * from bm_lov_res;
330+
b
331+
---
332+
1
333+
(1 row)
334+
0U: set enable_indexscan = off;
335+
SET
336+
0U: set enable_seqscan = on;
337+
SET
338+
0U: select insert_bm_lov_res();
339+
insert_bm_lov_res
340+
-------------------
341+
342+
(1 row)
343+
0U: select * from bm_lov_res;
344+
b
345+
---
346+
1
347+
(1 row)
348+
0Uq: ... <quitting>
349+
1: drop table tab_fi;
350+
DROP
351+
352+
-- case 2: suspend and flush WAL after freezing the tuple
353+
354+
1: create table tab_fi(a int, b int) with (appendoptimized=true) distributed replicated;
355+
CREATE
356+
1: create index tab_fi_idx on tab_fi using bitmap(b);
357+
CREATE
358+
1: insert into tab_fi values(1, 1);
359+
INSERT 1
360+
-- switch WAL on seg0 to reduce flakiness
361+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
362+
gp_segment_id | ?column?
363+
---------------+----------
364+
0 | t
365+
(1 row)
366+
-- suspend right after freezing the tuple
367+
1: select gp_inject_fault('insert_bmlov_after_freeze', 'suspend', dbid) from gp_segment_configuration where role = 'p' and content = 0;
368+
gp_inject_fault
369+
-----------------
370+
Success:
371+
(1 row)
372+
2>: insert into tab_fi values(2, 2); <waiting ...>
373+
1: select gp_wait_until_triggered_fault('insert_bmlov_after_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0;
374+
gp_wait_until_triggered_fault
375+
-------------------------------
376+
Success:
377+
(1 row)
378+
-- switch WAL on seg0, so the freeze record gets flushed
379+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
380+
gp_segment_id | ?column?
381+
---------------+----------
382+
0 | t
383+
(1 row)
384+
-- While we are on it, check the wal record for the freeze operation.
385+
! seg0_datadir=$(psql -At -c "select datadir from gp_segment_configuration where content = 0 and role = 'p'" postgres) && seg0_last_wal_file=$(psql -At -c "SELECT pg_walfile_name(pg_current_wal_lsn()) from gp_dist_random('gp_id') where gp_segment_id = 0" postgres) && pg_waldump ${seg0_last_wal_file} -p ${seg0_datadir}/pg_wal | grep FREEZE_PAGE;
386+
rmgr: Heap2 len (rec/tot): 68/ 68, tx: 950, lsn: 0/280001E0, prev 0/28000198, desc: FREEZE_PAGE cutoff xid 0 ntuples 1, blkref #0: rel 1663/17018/98313 blk 0
387+
388+
-- inject a panic and resume in same way as Case 1. But this time we will be able to replay the frozen insert.
389+
-- skip FTS probe to prevent unexpected mirror promotion
390+
1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
391+
gp_inject_fault_infinite
392+
--------------------------
393+
Success:
394+
(1 row)
395+
1: select gp_inject_fault('qe_exec_finished', 'panic', dbid) from gp_segment_configuration where role = 'p' and content = 0;
396+
gp_inject_fault
397+
-----------------
398+
Success:
399+
(1 row)
400+
1: select gp_inject_fault('insert_bmlov_after_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0;
401+
gp_inject_fault
402+
-----------------
403+
Success:
404+
(1 row)
405+
1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
406+
gp_inject_fault
407+
-----------------
408+
Success:
409+
(1 row)
410+
2<: <... completed>
411+
ERROR: fault triggered, fault name:'qe_exec_finished' fault type:'panic'
412+
1q: ... <quitting>
413+
-- check the lov table content w/ table vs index scan, both should see the
414+
-- new inserted row (b=2)
415+
0U: set enable_indexscan = on;
416+
SET
417+
0U: set enable_seqscan = off;
418+
SET
419+
0U: select insert_bm_lov_res();
420+
insert_bm_lov_res
421+
-------------------
422+
423+
(1 row)
424+
0U: select * from bm_lov_res;
425+
b
426+
---
427+
1
428+
2
429+
(2 rows)
430+
0U: set enable_indexscan = off;
431+
SET
432+
0U: set enable_seqscan = on;
433+
SET
434+
0U: select insert_bm_lov_res();
435+
insert_bm_lov_res
436+
-------------------
437+
438+
(1 row)
439+
0U: select * from bm_lov_res;
440+
b
441+
---
442+
1
443+
2
444+
(2 rows)
445+
446+
-- validate that we've actually tested desired scan method
246447
-- for some reason this disrupts the output of subsequent queries so
247448
-- validating at the end here
248449
! psql postgres -At -c "set enable_indexscan = off; set enable_seqscan = on; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Seq Scan on gp_fastsequence";

src/test/isolation2/sql/frozen_insert_crash.sql

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
--
77
-- And the above behavior should remain consistent using seqscan or indexscan.
88
--
9-
-- We test gp_fastsequence here since it does frozen insert and has an index.
9+
-- We test gp_fastsequence and bitmap here since they do frozen insert and
10+
-- normal index insert, so that the inconsistency could exist.
1011

1112
-- Case 1. crash after the regular MVCC insert has made to disk, but not
1213
-- the WAL record responsible for updating it to frozen.
@@ -116,6 +117,100 @@
116117
2: end;
117118
3: select segment_id, segno, eof from gp_toolkit.__gp_aoseg('tab_aoseg') where segment_id = 0;
118119

120+
-- Same set of tests for bitmap LOV insert.
121+
create extension if not exists pageinspect;
122+
123+
-- Function to check the bitmap lov content regarding the column 'b'
124+
-- which is the table column that we will have bitmap created on.
125+
-- Basically, we want to see if "SELECT b FROM pg_bitmapindex.pg_bm_xxx"
126+
-- returns the same result in seqscan and indexscan.
127+
CREATE OR REPLACE FUNCTION insert_bm_lov_res() RETURNS void AS $$
128+
DECLARE
129+
lov_table text; /* in func */
130+
sql text; /* in func */
131+
BEGIN /* in func */
132+
drop table if exists bm_lov_res; /* in func */
133+
create temp table bm_lov_res(b int); /* in func */
134+
SELECT c.relname INTO lov_table /* in func */
135+
FROM bm_metap('tab_fi_idx') b /* in func */
136+
JOIN pg_class c ON b.auxrelid = c.oid; /* in func */
137+
sql := format('INSERT INTO bm_lov_res SELECT b FROM pg_bitmapindex.%I', lov_table); /* in func */
138+
EXECUTE sql; /* in func */
139+
END; /* in func */
140+
$$ LANGUAGE plpgsql;
141+
142+
1: create table tab_fi(a int, b int) with (appendoptimized=true) distributed replicated;
143+
1: create index tab_fi_idx on tab_fi using bitmap(b);
144+
1: insert into tab_fi values(1, 1);
145+
-- switch WAL on seg0 to reduce flakiness
146+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
147+
148+
-- case 1: suspend and flush WAL before freezing the tuple
149+
150+
-- suspend right after the insert into the bitmap lov table and its index
151+
-- during a table insert, but before freezing the tuple
152+
1: select gp_inject_fault('insert_bmlov_before_freeze', 'suspend', dbid) from gp_segment_configuration where role = 'p' and content = 0;
153+
2>: insert into tab_fi values(2, 2);
154+
1: select gp_wait_until_triggered_fault('insert_bmlov_before_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0;
155+
-- switch WAL on seg0, so the new row gets flushed (including its index)
156+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
157+
-- inject a panic, and resume the insert. The WAL for the freeze operation is not
158+
-- going to be made to disk (we just flushed WALs), so we won't replay it during restart later.
159+
-- skip FTS probe to prevent unexpected mirror promotion
160+
1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
161+
1: select gp_inject_fault('qe_exec_finished', 'panic', dbid) from gp_segment_configuration where role = 'p' and content = 0;
162+
1: select gp_inject_fault('insert_bmlov_before_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0;
163+
1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
164+
2<:
165+
1q:
166+
-- check the lov table content w/ table vs index scan, neither should see the
167+
-- new inserted row (b=2)
168+
0U: set enable_indexscan = on;
169+
0U: set enable_seqscan = off;
170+
0U: select insert_bm_lov_res();
171+
0U: select * from bm_lov_res;
172+
0U: set enable_indexscan = off;
173+
0U: set enable_seqscan = on;
174+
0U: select insert_bm_lov_res();
175+
0U: select * from bm_lov_res;
176+
0Uq:
177+
1: drop table tab_fi;
178+
179+
-- case 2: suspend and flush WAL after freezing the tuple
180+
181+
1: create table tab_fi(a int, b int) with (appendoptimized=true) distributed replicated;
182+
1: create index tab_fi_idx on tab_fi using bitmap(b);
183+
1: insert into tab_fi values(1, 1);
184+
-- switch WAL on seg0 to reduce flakiness
185+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
186+
-- suspend right after freezing the tuple
187+
1: select gp_inject_fault('insert_bmlov_after_freeze', 'suspend', dbid) from gp_segment_configuration where role = 'p' and content = 0;
188+
2>: insert into tab_fi values(2, 2);
189+
1: select gp_wait_until_triggered_fault('insert_bmlov_after_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0;
190+
-- switch WAL on seg0, so the freeze record gets flushed
191+
1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0;
192+
-- While we are on it, check the wal record for the freeze operation.
193+
! seg0_datadir=$(psql -At -c "select datadir from gp_segment_configuration where content = 0 and role = 'p'" postgres) && seg0_last_wal_file=$(psql -At -c "SELECT pg_walfile_name(pg_current_wal_lsn()) from gp_dist_random('gp_id') where gp_segment_id = 0" postgres) && pg_waldump ${seg0_last_wal_file} -p ${seg0_datadir}/pg_wal | grep FREEZE_PAGE;
194+
-- inject a panic and resume in same way as Case 1. But this time we will be able to replay the frozen insert.
195+
-- skip FTS probe to prevent unexpected mirror promotion
196+
1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
197+
1: select gp_inject_fault('qe_exec_finished', 'panic', dbid) from gp_segment_configuration where role = 'p' and content = 0;
198+
1: select gp_inject_fault('insert_bmlov_after_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0;
199+
1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
200+
2<:
201+
1q:
202+
-- check the lov table content w/ table vs index scan, both should see the
203+
-- new inserted row (b=2)
204+
0U: set enable_indexscan = on;
205+
0U: set enable_seqscan = off;
206+
0U: select insert_bm_lov_res();
207+
0U: select * from bm_lov_res;
208+
0U: set enable_indexscan = off;
209+
0U: set enable_seqscan = on;
210+
0U: select insert_bm_lov_res();
211+
0U: select * from bm_lov_res;
212+
213+
-- validate that we've actually tested desired scan method
119214
-- for some reason this disrupts the output of subsequent queries so
120215
-- validating at the end here
121216
! psql postgres -At -c "set enable_indexscan = off; set enable_seqscan = on; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Seq Scan on gp_fastsequence";

0 commit comments

Comments
 (0)