From 27e4fa7dcfc069c8bf9bcce6f905b49b43df7a39 Mon Sep 17 00:00:00 2001 From: husen Date: Thu, 10 Aug 2023 19:55:04 +0800 Subject: [PATCH 001/152] Support separation of catalog and compute. core changes: 1. add macro serverless, configure with --enable-serverless 2. add hooks to get control in transaction/dispatch management 3. add transaction processing framework 4. add session state dispatch framework --- configure | 32 ++++ src/backend/Makefile | 3 + src/backend/access/transam/slru.c | 47 ++++++ src/backend/access/transam/varsup.c | 8 + src/backend/access/transam/xact.c | 43 ++++++ src/backend/access/transam/xlog.c | 54 +++++++ src/backend/cdb/cdbtm.c | 8 + src/backend/cdb/dispatcher/Makefile | 2 +- src/backend/cdb/dispatcher/cdbdisp_extra.c | 170 +++++++++++++++++++++ src/backend/cdb/dispatcher/cdbdisp_query.c | 41 ++++- src/backend/executor/execMain.c | 4 + src/backend/postmaster/autovacuum.c | 8 + src/backend/storage/buffer/bufmgr.c | 18 +++ src/backend/storage/ipc/procarray.c | 16 +- src/backend/tcop/postgres.c | 21 +++ src/backend/tcop/utility.c | 13 +- src/backend/utils/cache/relmapper.c | 7 + src/backend/utils/init/postinit.c | 7 + src/include/access/slru.h | 8 + src/include/access/transam.h | 4 + src/include/access/xact.h | 43 ++++++ src/include/access/xlog.h | 26 +++- src/include/cdb/cdbdisp_extra.h | 15 ++ src/include/cdb/cdbdisp_query.h | 12 ++ src/include/executor/executor.h | 3 + src/include/postgres.h | 5 + src/include/postmaster/autovacuum.h | 4 + src/include/storage/procarray.h | 4 + src/include/tcop/utility.h | 4 + src/include/utils/relmapper.h | 4 + src/include/utils/snapmgr.h | 5 + 31 files changed, 628 insertions(+), 11 deletions(-) create mode 100644 src/backend/cdb/dispatcher/cdbdisp_extra.c create mode 100644 src/include/cdb/cdbdisp_extra.h diff --git a/configure b/configure index e612c658399..9f291310269 100755 --- a/configure +++ b/configure @@ -762,6 +762,7 @@ enable_mapreduce enable_catalog_ext enable_serverless enable_orca +enable_serverless autodepend PKG_CONFIG_LIBDIR PKG_CONFIG_PATH @@ -8378,6 +8379,37 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: checking whether to build with catalog extension... $enable_catalog_ext" >&5 $as_echo "checking whether to build with catalog extension... $enable_catalog_ext" >&6; } +# +# Enable serverless architecture +# + + +# Check whether --enable-serverless was given. +if test "${enable_serverless+set}" = set; then : + enableval=$enable_serverless; + case $enableval in + yes) + +$as_echo "#define SERVERLESS 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --enable-serverless option" "$LINENO" 5 + ;; + esac + +else + enable_serverless=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: checking whether to use serverless architecture of Cloudberry ... $enable_serverless" >&5 +$as_echo "checking whether to use serverless architecture of Cloudberry ... $enable_serverless" >&6; } + # # Enable serverless mode diff --git a/src/backend/Makefile b/src/backend/Makefile index 5a41ad2f3b2..ad0e507b8c8 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -31,6 +31,9 @@ ifeq ($(enable_catalog_ext),yes) SUBDIRS += catalog-extension LDFLAGS += -lprotobuf -lstdc++ endif +ifeq ($(enable_serverless),yes) +LDFLAGS += -lprotobuf -lstdc++ -ljansson +endif include $(srcdir)/common.mk diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index a1950fd0944..284d998ad11 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -54,6 +54,7 @@ #include "access/slru.h" #include "access/transam.h" #include "access/xlog.h" +#include "cdb/cdbvars.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/fd.h" @@ -133,6 +134,11 @@ typedef enum static SlruErrorCause slru_errcause; static int slru_errno; +/* + * Hooks for plugins to get control in SlruPhysicalReadPage/SlruPhysicalWritePage + */ +SlruPhysicalReadPage_hook_type SlruPhysicalReadPage_hook = NULL; +SlruPhysicalWritePage_hook_type SlruPhysicalWritePage_hook = NULL; static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); @@ -421,6 +427,17 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, /* Now we must recheck state from the top */ continue; } +#ifdef SERVERLESS + /* + * TODO: add hook/GUC instead? + * The page in buffer may be out of date, we need to check the buffer + * and refresh the buffer if the page has been modified. + */ + if (Gp_role == GP_ROLE_EXECUTE) + { + goto PageRead; + } +#endif /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); @@ -435,6 +452,10 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); +#ifdef SERVERLESS +PageRead: +#endif + /* Mark the slot read-busy */ shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; @@ -506,6 +527,18 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) shared->page_status[slotno] != SLRU_PAGE_EMPTY && shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { +#ifdef SERVERLESS + /* + * TODO: add hook/GUC instead? + * The page in buffer may be out of date, we need to check the buffer + * and refresh the buffer if the page has been modified. + */ + if (Gp_role == GP_ROLE_EXECUTE) + { + break; + } +#endif + /* See comments for SlruRecentlyUsed macro */ SlruRecentlyUsed(shared, slotno); @@ -688,6 +721,13 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; + bool result; + + if (SlruPhysicalReadPage_hook && + SlruPhysicalReadPage_hook(ctl, pageno, slotno, &result)) + { + return result; + } SlruFileName(ctl, path, segno); @@ -760,6 +800,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd = -1; + bool result; /* update the stats counter of written pages */ pgstat_count_slru_page_written(shared->slru_stats_idx); @@ -806,6 +847,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) } } + if (SlruPhysicalWritePage_hook && + SlruPhysicalWritePage_hook(ctl, pageno, slotno, &result)) + { + return result; + } + /* * During a WriteAll, we may already have the desired file open. */ diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 766c6bbe794..53c696beec9 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -44,6 +44,11 @@ int xid_warn_limit; NewSegRelfilenode_assign_hook_type NewSegRelfilenode_assign_hook = NULL; +/* + * Hook for plugins to get control in GetNewTransactionId. + */ +GetNewTransactionId_hook_type GetNewTransactionId_hook = NULL; + /* * Allocate the next FullTransactionId for a new transaction or * subtransaction. @@ -62,6 +67,9 @@ GetNewTransactionId(bool isSubXact) FullTransactionId full_xid; TransactionId xid; + if (GetNewTransactionId_hook) + return (*GetNewTransactionId_hook) (isSubXact); + /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new XIDs after that point. diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 60f9d16b1a0..25bdec98ce9 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -300,6 +300,14 @@ static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS]; static TransactionState CurrentTransactionState = &TopTransactionStateData; +/* + * Hooks for plugins to get control in Transaction Management. + */ +TransactionParticipateEnd_hook_type TransactionParticipateEnd_hook = NULL; +NotifySubTransaction_hook_type NotifySubTransaction_hook = NULL; +XactLogCommitRecord_hook_type XactLogCommitRecord_hook = NULL; +XactLogAbortRecord_hook_type XactLogAbortRecord_hook = NULL; + /* * The subtransaction ID and command ID assignment counters are global * to a whole transaction, so we do not keep them in the state stack. @@ -904,6 +912,12 @@ GetCurrentCommandId(bool used) return currentCommandId; } +void +SetCurrentCommandId(CommandId cid) +{ + currentCommandId = cid; +} + /* * SetParallelStartTimestamps * @@ -2951,6 +2965,9 @@ CommitTransaction(void) if (notifyCommittedDtxTransactionIsNeeded()) notifyCommittedDtxTransaction(); + if (TransactionParticipateEnd_hook) + TransactionParticipateEnd_hook(true); + /* * Let others know about no transaction in progress by me. Note that this * must be done _before_ releasing locks we hold and _after_ @@ -3605,6 +3622,9 @@ AbortTransaction(void) */ rollbackDtxTransaction(); + if (TransactionParticipateEnd_hook) + TransactionParticipateEnd_hook(false); + /* * Let others know about no transaction in progress by me. Note that this * must be done _before_ releasing locks we hold and _after_ @@ -5227,6 +5247,9 @@ DefineSavepoint(const char *name) { TransactionState s = CurrentTransactionState; + if (NotifySubTransaction_hook) + NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_BEGIN); + /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new subtransactions after that @@ -5596,6 +5619,9 @@ BeginInternalSubTransaction(const char *name) } } + if (NotifySubTransaction_hook) + NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_BEGIN); + /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new subtransactions after that @@ -6089,6 +6115,9 @@ CommitSubTransaction(void) /* Must CCI to ensure commands of subtransaction are seen as done */ CommandCounterIncrement(); + if (NotifySubTransaction_hook) + NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_RELEASE); + /* * Prior to 8.4 we marked subcommit in clog at this point. We now only * perform that step, if required, as part of the atomic update of the @@ -6263,6 +6292,9 @@ AbortSubTransaction(void) s->parallelModeLevel = 0; } + if (NotifySubTransaction_hook) + NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_ROLLBACK); + /* * We can skip all this stuff if the subxact failed before creating a * ResourceOwner... @@ -6882,6 +6914,12 @@ XactLogCommitRecord(TimestampTz commit_time, Assert(CritSectionCount > 0); + if (XactLogCommitRecord_hook) + return (* XactLogCommitRecord_hook) (commit_time, tablespace_oid_to_delete_on_commit, + nsubxacts, subxacts, nrels, rels, nmsgs, msgs, + ndeldbs, deldbs, relcacheInval, xactflags, + twophase_xid, twophase_gid); + xl_xinfo.xinfo = 0; /* decide between a plain and 2pc commit */ @@ -7072,6 +7110,11 @@ XactLogAbortRecord(TimestampTz abort_time, Assert(CritSectionCount > 0); + if (XactLogAbortRecord_hook) + (*XactLogAbortRecord_hook) (abort_time, tablespace_oid_to_delete_on_abort, + nsubxacts, subxacts, nrels, rels, ndeldbs, deldbs, + xactflags, twophase_xid, twophase_gid); + xl_xinfo.xinfo = 0; /* decide between a plain and 2pc abort */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 722bea2040b..59a21ef96c3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -101,6 +101,21 @@ extern uint32 bootstrap_data_checksum_version; extern int bootstrap_file_encryption_method; +/* + * Hook for plugins to get control in StartupXLOG. + */ +StartupXLOG_hook_type StartupXLOG_hook = NULL; + +/* + * Hook for plugins to get control in XLogFlush. + */ +XLogFlush_hook_type XLogFlush_hook = NULL; + +/* + * Hook for plugins to get control in CreateCheckPoint. + */ +CreateCheckPoint_hook_type CreateCheckPoint_hook = NULL; + /* Unsupported old recovery command file names (relative to $PGDATA) */ #define RECOVERY_COMMAND_FILE "recovery.conf" #define RECOVERY_COMMAND_DONE "recovery.done" @@ -2941,6 +2956,9 @@ XLogFlush(XLogRecPtr record) XLogRecPtr WriteRqstPtr; XLogwrtRqst WriteRqst; + if (XLogFlush_hook) + return (*XLogFlush_hook) (record); + /* * During REDO, we are reading not writing WAL. Therefore, instead of * trying to flush the WAL, we should update minRecoveryPoint instead. We @@ -3133,6 +3151,15 @@ XLogBackgroundFlush(void) TimestampTz now; int flushbytes; +#ifdef SERVERLESS + /* + * TODO: use GUC/hook instead of macro. + * + * Indeed, walwriter is not needed in serverless, we have no WAL in buffer. + */ + return true; +#endif + /* XLOG doesn't need flushing during recovery */ if (RecoveryInProgress()) return false; @@ -6761,6 +6788,9 @@ StartupXLOG(void) bool promoted = false; struct stat st; + if (StartupXLOG_hook) + return (*StartupXLOG_hook) (); + /* * We should have an aux process resource owner to use, and we should not * be in a transaction that's installed some other resowner. @@ -9456,6 +9486,12 @@ CreateCheckPoint(int flags) VirtualTransactionId *vxids; int nvxids; + if (CreateCheckPoint_hook) + { + (*CreateCheckPoint_hook) (flags); + return; + } + /* * An end-of-recovery checkpoint is really a shutdown checkpoint, just * issued at a different time. @@ -14006,3 +14042,21 @@ XLogRequestWalReceiverReply(void) { doRequestWalReceiverReply = true; } + +/* + * Return pointer to pg_control in shared memory; + */ +ControlFileData * +GetControlFile(void) +{ + return ControlFile; +} + +/* + * Return pointer to XLogCtlData in shared memory; + */ +XLogCtlData * +GetXLogCtl(void) +{ + return XLogCtl; +} diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index f0cd5fcb3f6..c2e1c3073d6 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1637,6 +1637,14 @@ doQEDistributedExplicitBegin() static bool isDtxQueryDispatcher(void) { +#ifdef SERVERLESS + /* + * TODO: use GUC/hook instead of macro. + * + * Distributed transaction is not necessary in serverless. + */ + return false; +#endif bool isDtmStarted; bool isSharedLocalSnapshotSlotPresent; diff --git a/src/backend/cdb/dispatcher/Makefile b/src/backend/cdb/dispatcher/Makefile index e8ac7582898..8aaf74b1970 100644 --- a/src/backend/cdb/dispatcher/Makefile +++ b/src/backend/cdb/dispatcher/Makefile @@ -11,5 +11,5 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS += -I$(libpq_srcdir) -I$(top_srcdir)/src/port -I$(top_srcdir)/src/backend/utils/misc -OBJS = cdbconn.o cdbdisp.o cdbdisp_async.o cdbdispatchresult.o cdbdisp_dtx.o cdbdisp_query.o cdbgang.o cdbgang_async.o cdbpq.o +OBJS = cdbconn.o cdbdisp.o cdbdisp_async.o cdbdispatchresult.o cdbdisp_dtx.o cdbdisp_query.o cdbgang.o cdbgang_async.o cdbpq.o cdbdisp_extra.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/cdb/dispatcher/cdbdisp_extra.c b/src/backend/cdb/dispatcher/cdbdisp_extra.c new file mode 100644 index 00000000000..8a032222a29 --- /dev/null +++ b/src/backend/cdb/dispatcher/cdbdisp_extra.c @@ -0,0 +1,170 @@ +#include "postgres.h" + +#include "cdb/cdbdisp_extra.h" +#include "libpq/pqformat.h" +#include "utils/hsearch.h" + + +static HTAB *ExtraDispTable = NULL; + +typedef struct ExtraDispEntry +{ + char extraDispName[EXTRADISPNAME_MAX_LEN]; + PackFunc packFunc; + UnpackFunc unpackFunc; +} ExtraDispEntry; + +void +RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc unpackFunc) +{ + ExtraDispEntry *entry; + bool found; + + if (ExtraDispTable == NULL) + { + HASHCTL ctl; + + ctl.keysize = EXTRADISPNAME_MAX_LEN; + ctl.entrysize = sizeof(ExtraDispEntry); + + ExtraDispTable = hash_create("extra dispatch info", 8, &ctl, + HASH_ELEM | HASH_STRINGS); + } + + if (strlen(extraDispName) >= EXTRADISPNAME_MAX_LEN) + elog(ERROR, "extra dispatch name is too long"); + + entry = (ExtraDispEntry *) hash_search(ExtraDispTable, + extraDispName, + HASH_ENTER, &found); + if (found) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("extra dispatch name \"%s\" already exists", + extraDispName))); + + entry->packFunc = packFunc; + entry->unpackFunc = unpackFunc; +} + +/* Return packaged messages. each message has the same format: + * ("%d%s\0%s", totalLen, name, payload). totalLen is the message + * length not including itself. name is the name of this message, + * a following '\0' marks the end. payload is the main body of the + * message. + */ +char * +PackExtraMsgs(int *len) +{ + HASH_SEQ_STATUS status; + ExtraDispEntry *hentry; + char **payloads; + int *lengths; + int payloadLen; + char **names; + int nameLen; + char *total; + int totalLen; + char *pos; + int tmp; + int n; + int i; + + if (!ExtraDispTable) + { + *len = 0; + return NULL; + } + + n = hash_get_num_entries(ExtraDispTable); + payloads = (char **) palloc(n * sizeof(char *)); + lengths = (int *) palloc(n * sizeof(int)); + names = (char **) palloc(n * sizeof(char *)); + + i = 0; + totalLen = 0; + hash_seq_init(&status, ExtraDispTable); + while ((hentry = (ExtraDispEntry *) hash_seq_search(&status)) != NULL) + { + payloads[i] = (*(hentry->packFunc))(lengths + i); + names[i] = hentry->extraDispName; + totalLen += sizeof(int) + strlen(names[i]) + 1 + *(lengths + i); + i++; + } + Assert(i = n); + + total = palloc(totalLen); + pos = total; + + for(i=0; i < n; i++) + { + payloadLen = *(lengths + i); + nameLen = strlen(names[i]); + + /* lenth */ + tmp = htonl(payloadLen + nameLen + 1); + memcpy(pos, &tmp, sizeof(tmp)); + pos += sizeof(tmp); + + /* name */ + memcpy(pos, names[i], nameLen + 1); + pos += nameLen + 1; + + /* payload */ + memcpy(pos, payloads[i], payloadLen); + pos += payloadLen; + + pfree(payloads[i]); + } + + Assert(pos - total == totalLen); + + pfree(names); + pfree(payloads); + pfree(lengths); + + *len = totalLen; + return total; +} + +void +UnPackExtraMsgs(StringInfo inputMsgs) +{ + ExtraDispEntry *entry; + const char *name; + const char *payload; + int payloadLen; + int totalLen; + bool found; + int n; + int i; + + if (!ExtraDispTable) + return; + + n = hash_get_num_entries(ExtraDispTable); + i = n; + + while (inputMsgs->cursor < inputMsgs->len) + { + totalLen = pq_getmsgint(inputMsgs, 4); + name = pq_getmsgstring(inputMsgs); + payloadLen = totalLen - strlen(name) - 1; + payload = pq_getmsgbytes(inputMsgs, payloadLen); + + entry = (ExtraDispEntry *) hash_search(ExtraDispTable, + name, + HASH_FIND, &found); + if (!found) + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("extra dispatch %s not found", name))); + + (*(entry->unpackFunc))(payload, payloadLen); + i--; + } + if (i != 0) + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("extra dispatch count mismatch, registered %d, get %d", n, i))); +} diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 697efe518f0..c2835fe3eb4 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -44,6 +44,7 @@ #include "mb/pg_wchar.h" #include "cdb/cdbdisp.h" +#include "cdb/cdbdisp_extra.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbdisp_dtx.h" /* for qdSerializeDtxContextInfo() */ #include "cdb/cdbdispatchresult.h" @@ -106,6 +107,12 @@ typedef struct DispatchCommandQueryParms int serializedDtxContextInfolen; } DispatchCommandQueryParms; +/* + * Hooks for plugins to get control in command dispatch + */ +CdbNeedDispatchCommand_hook_type CdbNeedDispatchCommand_hook = NULL; +CdbNeedDispatchUtility_hook_type CdbNeedDispatchUtility_hook = NULL; + static int fillSliceVector(SliceTable *sliceTable, int sliceIndex, SliceVec *sliceVector, @@ -402,7 +409,12 @@ CdbDispatchCommandToSegments(const char *strCommand, CdbPgResults *cdb_pgresults) { DispatchCommandQueryParms *pQueryParms; - bool needTwoPhase = flags & DF_NEED_TWO_PHASE; + bool needTwoPhase; + + if (CdbNeedDispatchCommand_hook && !CdbNeedDispatchCommand_hook(strCommand, &flags, segments, cdb_pgresults)) + return; + + needTwoPhase = flags & DF_NEED_TWO_PHASE; if (needTwoPhase) setupDtxTransaction(); @@ -440,9 +452,14 @@ CdbDispatchUtilityStatement(struct Node *stmt, CdbPgResults *cdb_pgresults) { DispatchCommandQueryParms *pQueryParms; - bool needTwoPhase = flags & DF_NEED_TWO_PHASE; + bool needTwoPhase; Assert(Gp_role == GP_ROLE_DISPATCH && ENABLE_DISPATCH()); + + if (CdbNeedDispatchUtility_hook && !CdbNeedDispatchUtility_hook(stmt, &flags)) + return; + + needTwoPhase = flags & DF_NEED_TWO_PHASE; if (needTwoPhase) setupDtxTransaction(); @@ -886,6 +903,8 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, int total_query_len; char *shared_query, *pos; + char *extraMsgs; + int extraLen; MemoryContext oldContext; /* @@ -935,6 +954,9 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, sizeof(tempToastNamespaceId) + 0; + extraMsgs = PackExtraMsgs(&extraLen); + total_query_len += extraLen; + shared_query = palloc(total_query_len); pos = shared_query; @@ -1038,6 +1060,15 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, memcpy(pos, &tempToastNamespaceId, sizeof(tempToastNamespaceId)); pos += sizeof(tempToastNamespaceId); + if (extraLen > 0) + { + memcpy(pos, extraMsgs, extraLen); + pos += extraLen; + pfree(extraMsgs); + } + + len = pos - shared_query - 1; + /* * fill in length placeholder */ @@ -1335,8 +1366,12 @@ CdbDispatchCopyStart(struct CdbCopy *cdbCopy, Node *stmt, int flags) CdbDispatcherState *ds; Gang *primaryGang; ErrorData *error = NULL; - bool needTwoPhase = flags & DF_NEED_TWO_PHASE; + bool needTwoPhase; + + if (CdbNeedDispatchUtility_hook && !CdbNeedDispatchUtility_hook(stmt, &flags)) + return; + needTwoPhase = flags & DF_NEED_TWO_PHASE; if (needTwoPhase) setupDtxTransaction(); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 5bb1463e422..afc255ededd 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -164,6 +164,8 @@ ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL; * executor_run_nesting_level. */ static int executor_run_nesting_level = 0; +/* Hook for plugins to get control in DtxTransaction Management */ +SetDtxFlag_hook_type SetDtxFlag_hook = NULL; /* decls for local routines only used within this module */ static void InitPlan(QueryDesc *queryDesc, int eflags); @@ -645,6 +647,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) * work for this query. */ needDtx = ExecutorSaysTransactionDoesWrites(); + if (SetDtxFlag_hook) + needDtx = SetDtxFlag_hook(needDtx); if (needDtx) setupDtxTransaction(); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 0379659f994..c0e7baa7acc 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -208,6 +208,11 @@ static int default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; +/* + * Hook for plugins to get control in AutoVacLauncher. + */ +AutoVacLauncherMain_hook_type AutoVacLauncherMain_hook = NULL; + /* struct to keep track of databases in launcher */ typedef struct avl_dbase { @@ -490,6 +495,9 @@ AutoVacLauncherMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; + if (AutoVacLauncherMain_hook) + (*AutoVacLauncherMain_hook) (argc, argv); + am_autovacuum_launcher = true; MyBackendType = B_AUTOVAC_LAUNCHER; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6d0afd34356..b33ba11852d 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -43,6 +43,7 @@ #include "catalog/catalog.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" +#include "cdb/cdbvars.h" #include "crypto/bufenc.h" #include "executor/instrument.h" #include "lib/binaryheap.h" @@ -1255,6 +1256,23 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, *foundPtr = true; +#ifdef SERVERLESS + /* + * TODO: use GUC/hook instead of macro + * + * The page in buffer may be out of date, we need to check the buffer + * and refresh the buffer if the page has been modified. + */ + if (Gp_role == GP_ROLE_EXECUTE && valid) + { + uint32 buf_state = LockBufHdr(buf); + + buf_state &= ~BM_VALID; + UnlockBufHdr(buf, buf_state); + + valid = false; + } +#endif if (!valid) { /* diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 3154caba1bd..0926a86fae6 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -82,6 +82,11 @@ CountDBSession_hook_type CountDBSession_hook = NULL; +/* + * Hook for plugins to get control in GetSnapshotData + */ +GetSnapshotData_hook_type GetSnapshotData_hook = NULL; + /* Our shared memory area */ typedef struct ProcArrayStruct { @@ -296,10 +301,10 @@ static TransactionId standbySnapshotPendingXmin; * GlobalVisState for details. As shared, catalog, normal and temporary * relations can have different horizons, one such state exists for each. */ -static GlobalVisState GlobalVisSharedRels; -static GlobalVisState GlobalVisCatalogRels; -static GlobalVisState GlobalVisDataRels; -static GlobalVisState GlobalVisTempRels; +GlobalVisState GlobalVisSharedRels; +GlobalVisState GlobalVisCatalogRels; +GlobalVisState GlobalVisDataRels; +GlobalVisState GlobalVisTempRels; /* * This backend's RecentXmin at the last time the accurate xmin horizon was @@ -2915,6 +2920,9 @@ GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext) errmsg("out of memory"))); } + if (GetSnapshotData_hook) + return (*GetSnapshotData_hook) (snapshot, distributedTransactionContext); + /* * GP: Distributed snapshot. */ diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index f29c9c2e606..ba56f427227 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -95,6 +95,7 @@ #include "cdb/cdbsrlz.h" #include "cdb/cdbtm.h" #include "cdb/cdbdtxcontextinfo.h" +#include "cdb/cdbdisp_extra.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbendpoint.h" @@ -152,6 +153,11 @@ cancel_pending_hook_type cancel_pending_hook = NULL; */ exec_simple_query_hook_type exec_simple_query_hook = NULL; +/* + * Hook for plugins to handle the txn message + */ +HandleTxnCommand_hook_type HandleTxnCommand_hook = NULL; + /* ---------------- * private typedefs etc * ---------------- @@ -513,6 +519,7 @@ SocketBackend(StringInfo inBuf) break; + case 't': case 'T': /* Apache Cloudberry dispatched transaction protocol from QD */ maxmsglen = PQ_LARGE_MESSAGE_LIMIT; doing_extended_query_message = false; @@ -5811,6 +5818,8 @@ PostgresMain(int argc, char *argv[], SetTempNamespaceStateAfterBoot(tempNamespaceId, tempToastNamespaceId); } + UnPackExtraMsgs(&input_message); + pq_getmsgend(&input_message); elog((Debug_print_full_dtm ? LOG : DEBUG5), "MPP dispatched stmt from QD: %s.",query_string); @@ -5929,6 +5938,18 @@ PostgresMain(int argc, char *argv[], } break; + case 't': /* handle plugin's MPP dispatched txn protocol command from QD */ + { + if (HandleTxnCommand_hook) + HandleTxnCommand_hook(&input_message, &send_ready_for_query); + else + ereport(FATAL, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid frontend message type %d", firstchar), + errdetail("HandleTxnCommand_hook is NULL"))); + } + break; + case 'P': /* parse */ { const char *stmt_name; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 8eefbf93b88..6a0d1b38f87 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -90,6 +90,9 @@ /* Hook for plugins to get control in ProcessUtility() */ ProcessUtility_hook_type ProcessUtility_hook = NULL; +/* Hook for plugins to send explicit begin command */ +SendTxnExplicitBegin_hook_type SendTxnExplicitBegin_hook = NULL; + /* counter to disable dispatch */ int dispatch_nest_level = 0; @@ -714,7 +717,10 @@ standard_ProcessUtility(PlannedStmt *pstmt, /* gp_dispatch */ false); } - sendDtxExplicitBegin(); + if (SendTxnExplicitBegin_hook) + SendTxnExplicitBegin_hook(); + else + sendDtxExplicitBegin(); } break; @@ -783,7 +789,10 @@ standard_ProcessUtility(PlannedStmt *pstmt, * that the BEGIN has been dispatched * before we start dispatching our savepoint. */ - sendDtxExplicitBegin(); + if (SendTxnExplicitBegin_hook) + SendTxnExplicitBegin_hook(); + else + sendDtxExplicitBegin(); DefineDispatchSavepoint(stmt->savepoint_name); break; diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 900af27ec45..0ba71600d45 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -137,6 +137,10 @@ static RelMapFile active_local_updates; static RelMapFile pending_shared_updates; static RelMapFile pending_local_updates; +/* + * Hook for plugins to get control in load_relmap_file + */ +LoadRelMap_hook_type LoadRelMap_hook = NULL; /* non-export function prototypes */ static void apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, @@ -723,6 +727,9 @@ load_relmap_file(bool shared, bool lock_held) map = &local_map; } + if (LoadRelMap_hook) + return (*LoadRelMap_hook) (shared, lock_held, map); + /* Read data ... */ fd = OpenTransientFile(mapfilename, O_RDONLY | PG_BINARY); if (fd < 0) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index e84c0efb77e..3a4f2bae356 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1147,6 +1147,12 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); +#ifndef SERVERLESS + /* + * TODO: use GUC instead of macro. + * + * No database directories/files in serverless, skip sanity check. + */ if (!bootstrap) { if (access(fullpath, F_OK) == -1) @@ -1167,6 +1173,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, ValidatePgVersion(fullpath); } +#endif SetDatabasePath(fullpath); pfree(fullpath); diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 88653ae7e66..c8136f3c1a7 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -138,6 +138,14 @@ typedef struct SlruCtlData typedef SlruCtlData *SlruCtl; +/* + * Hooks for plugins to get control in SlruPhysicalReadPage/SlruPhysicalWritePage + */ +typedef bool (*SlruPhysicalReadPage_hook_type)(SlruCtl ctl, int pageno, int slotno, bool *result); +extern PGDLLIMPORT SlruPhysicalReadPage_hook_type SlruPhysicalReadPage_hook; + +typedef bool (*SlruPhysicalWritePage_hook_type)(SlruCtl ctl, int pageno, int slotno, bool *result); +extern PGDLLIMPORT SlruPhysicalWritePage_hook_type SlruPhysicalWritePage_hook; extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, diff --git a/src/include/access/transam.h b/src/include/access/transam.h index cec3e5f4cb7..e1eb8fef738 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -307,6 +307,10 @@ extern bool gp_pause_on_restore_point_replay; typedef Oid (*NewSegRelfilenode_assign_hook_type)(void); extern PGDLLIMPORT NewSegRelfilenode_assign_hook_type NewSegRelfilenode_assign_hook; +/* Hook for plugins to get control in GetNewTransactionId */ +typedef FullTransactionId (*GetNewTransactionId_hook_type)(bool isSubXact); +extern PGDLLIMPORT GetNewTransactionId_hook_type GetNewTransactionId_hook; + /* * prototypes for functions in transam/transam.c */ diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 25f224b2c2b..57267aecf64 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -194,6 +194,48 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid, #define XACT_XINFO_HAS_DISTRIB (1U << 8) #define XACT_XINFO_HAS_DELDBS (1U << 9) +typedef enum +{ + TXN_PROTOCOL_COMMAND_BEGIN = 0, + TXN_PROTOCOL_COMMAND_ABORT, + TXN_PROTOCOL_COMMAND_COMMIT, + TXN_PROTOCOL_COMMAND_POST_COMMIT, + TXN_PROTOCOL_COMMAND_SUB_BEGIN, + TXN_PROTOCOL_COMMAND_SUB_RELEASE, + TXN_PROTOCOL_COMMAND_SUB_ROLLBACK, +} TxnProtocolCommand; + +/* + * Hooks for plugins to get control in Transaction Management + */ +typedef void(*TransactionParticipateEnd_hook_type)(bool commit); +extern PGDLLIMPORT TransactionParticipateEnd_hook_type TransactionParticipateEnd_hook; + +typedef bool(*NotifySubTransaction_hook_type)(TxnProtocolCommand command); +extern PGDLLIMPORT NotifySubTransaction_hook_type NotifySubTransaction_hook; + +typedef XLogRecPtr +(*XactLogCommitRecord_hook_type) (TimestampTz commit_time, + Oid tablespace_oid_to_delete_on_commit, + int nsubxacts, TransactionId *subxacts, + int nrels, RelFileNodePendingDelete *rels, + int nmsgs, SharedInvalidationMessage *msgs, + int ndeldbs, DbDirNode *deldbs, + bool relcacheInval, + int xactflags, TransactionId twophase_xid, + const char *twophase_gid); +extern PGDLLIMPORT XactLogCommitRecord_hook_type XactLogCommitRecord_hook; + +typedef XLogRecPtr +(*XactLogAbortRecord_hook_type) (TimestampTz abort_time, + Oid tablespace_oid_to_delete_on_abort, + int nsubxacts, TransactionId *subxacts, + int nrels, RelFileNodePendingDelete *rels, + int ndeldbs, DbDirNode *deldbs, + int xactflags, TransactionId twophase_xid, + const char *twophase_gid); +extern PGDLLIMPORT XactLogAbortRecord_hook_type XactLogAbortRecord_hook; + /* * Also stored in xinfo, these indicating a variety of additional actions that * need to occur when emulating transaction effects during recovery. @@ -457,6 +499,7 @@ extern void MarkCurrentTransactionIdLoggedIfAny(void); extern void MarkTopTransactionWriteXLogOnExecutor(void); extern bool SubTransactionIsActive(SubTransactionId subxid); extern CommandId GetCurrentCommandId(bool used); +extern void SetCurrentCommandId(CommandId cid); extern void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts); extern TimestampTz GetCurrentTransactionStartTimestamp(void); extern TimestampTz GetCurrentStatementStartTimestamp(void); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 2dfad411b7a..9d1229ea688 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -161,7 +161,11 @@ extern bool StandbyMode; /* tde feature enable or not */ extern int FileEncryptionEnabled; -/* Hook for plugins to do some startup job */ +/* Hook for plugins to get control in StartupXLOG */ +typedef void (*StartupXLOG_hook_type) (void); +extern PGDLLIMPORT StartupXLOG_hook_type StartupXLOG_hook; + +/* Hook for plugins to do additional startup works */ typedef void (*Startup_hook_type) (void); extern PGDLLIMPORT Startup_hook_type Startup_hook; @@ -184,6 +188,14 @@ extern PGDLLIMPORT ConsistencyCheck_hook_type xlog_check_consistency_hook; typedef void (*XLOGDropDatabase_hook_type)(Oid dbid); extern XLOGDropDatabase_hook_type XLOGDropDatabase_hook; +/* Hook for plugins to get control in XLogFlush */ +typedef void (*XLogFlush_hook_type) (XLogRecPtr record); +extern PGDLLIMPORT XLogFlush_hook_type XLogFlush_hook; + +/* Hook for plugins to get control in CreateCheckPoint */ +typedef void (*CreateCheckPoint_hook_type)(int flags); +extern PGDLLIMPORT CreateCheckPoint_hook_type CreateCheckPoint_hook; + /* Archive modes */ typedef enum ArchiveMode { @@ -247,7 +259,16 @@ extern PGDLLIMPORT int wal_level; (DataChecksumsEnabled() || FileEncryptionEnabled || wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ +#ifdef SERVERLESS +/* + * This is not necessary. + * + * Standby is not needed in serverless, so we do not need to WAL-log anything. + */ +#define XLogStandbyInfoActive() (false) +#else #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) +#endif /* Do we need to WAL-log information required only for logical replication? */ #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL) @@ -324,6 +345,7 @@ typedef enum WALAvailability } WALAvailability; struct XLogRecData; +typedef struct XLogCtlData XLogCtlData; extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn, @@ -456,5 +478,7 @@ extern bool IsRoleMirror(void); extern void SignalPromote(void); extern XLogRecPtr XLogLastInsertBeginLoc(void); extern void initialize_wal_bytes_written(void); +extern ControlFileData *GetControlFile(void); +extern XLogCtlData *GetXLogCtl(void); #endif /* XLOG_H */ diff --git a/src/include/cdb/cdbdisp_extra.h b/src/include/cdb/cdbdisp_extra.h new file mode 100644 index 00000000000..b6bac03b3cd --- /dev/null +++ b/src/include/cdb/cdbdisp_extra.h @@ -0,0 +1,15 @@ +#ifndef CDBDISP_EXTRA_H +#define CDBDISP_EXTRA_H + +#include "lib/stringinfo.h" + +#define EXTRADISPNAME_MAX_LEN 64 + +typedef char *(*PackFunc) (int *len); +typedef void (*UnpackFunc) (const char *msg, int len); + +extern void RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc unpackFunc); +extern char *PackExtraMsgs(int *len); +extern void UnPackExtraMsgs(StringInfo strInfo); + +#endif /* CDBDISP_EXTRA_H */ diff --git a/src/include/cdb/cdbdisp_query.h b/src/include/cdb/cdbdisp_query.h index 2d8651cf762..5444c32e66c 100644 --- a/src/include/cdb/cdbdisp_query.h +++ b/src/include/cdb/cdbdisp_query.h @@ -40,6 +40,18 @@ struct CdbDispatcherState; struct CdbPgResults; struct CdbCopy; +/* + * Hooks for plugins to get control in command dispatch + */ +typedef bool (*CdbNeedDispatchCommand_hook_type) (const char *strCommand, + int *flags, + List *segments, + struct CdbPgResults *cdb_pgresults); +extern PGDLLIMPORT CdbNeedDispatchCommand_hook_type CdbNeedDispatchCommand_hook; + +typedef bool (*CdbNeedDispatchUtility_hook_type) (struct Node *stmt, int *flags); +extern PGDLLIMPORT CdbNeedDispatchUtility_hook_type CdbNeedDispatchUtility_hook; + /* Compose and dispatch the MPPEXEC commands corresponding to a plan tree * within a complete parallel plan. * diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 07ebf366fb0..8169aa94b34 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -97,6 +97,9 @@ extern PGDLLIMPORT ExecutorEnd_hook_type ExecutorEnd_hook; typedef bool (*ExecutorCheckPerms_hook_type) (List *, bool); extern PGDLLIMPORT ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook; +/* Hook for plugins to get control in DtxTransaction Management */ +typedef bool (*SetDtxFlag_hook_type) (bool needDxt); +extern PGDLLIMPORT SetDtxFlag_hook_type SetDtxFlag_hook; /* * prototypes from functions in execAmi.c diff --git a/src/include/postgres.h b/src/include/postgres.h index bde5a8b01b1..ce0d0217721 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -44,6 +44,7 @@ #define POSTGRES_H #include "c.h" +#include "lib/stringinfo.h" #include "utils/elog.h" #include "utils/palloc.h" #include "storage/itemptr.h" @@ -460,6 +461,10 @@ typedef struct NullableDatum /* due to alignment padding this could be used for flags for free */ } NullableDatum; +/* Hook for plugins to handle the txn message */ +typedef void(*HandleTxnCommand_hook_type)(StringInfo input_message, volatile bool *send_ready_for_query); +extern PGDLLIMPORT HandleTxnCommand_hook_type HandleTxnCommand_hook; + #define SIZEOF_DATUM SIZEOF_VOID_P StaticAssertDecl(SIZEOF_DATUM == 8, "sizeof datum is not 8"); /* diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index aacdd0f5753..5810ce55890 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -55,6 +55,10 @@ extern bool IsAutoVacuumWorkerProcess(void); #define IsAnyAutoVacuumProcess() \ (IsAutoVacuumLauncherProcess() || IsAutoVacuumWorkerProcess()) +/* Hook for plugins to get control in AutoVacLauncher */ +typedef void (*AutoVacLauncherMain_hook_type)(int argc, char *argv[]); +extern PGDLLIMPORT AutoVacLauncherMain_hook_type AutoVacLauncherMain_hook; + /* Functions to start autovacuum process, called from postmaster */ extern void autovac_init(void); extern int StartAutoVacLauncher(void); diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 607834e46c4..e48d4643fae 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -47,6 +47,10 @@ extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid); extern int GetMaxSnapshotXidCount(void); extern int GetMaxSnapshotSubxidCount(void); +/* Hook for plugins to get control in GetSnapshotData */ +typedef Snapshot (*GetSnapshotData_hook_type)(Snapshot snapshot, DtxContext distributedTransactionContext); +extern PGDLLIMPORT GetSnapshotData_hook_type GetSnapshotData_hook; + extern Snapshot GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext); extern bool ProcArrayInstallImportedXmin(TransactionId xmin, diff --git a/src/include/tcop/utility.h b/src/include/tcop/utility.h index ff08c1cd265..3d23f587e24 100644 --- a/src/include/tcop/utility.h +++ b/src/include/tcop/utility.h @@ -77,6 +77,10 @@ typedef void (*ProcessUtility_hook_type) (PlannedStmt *pstmt, DestReceiver *dest, QueryCompletion *qc); extern PGDLLIMPORT ProcessUtility_hook_type ProcessUtility_hook; +/* Hook for plugins to send explicit begin command */ +typedef void (*SendTxnExplicitBegin_hook_type)(void); +extern PGDLLIMPORT SendTxnExplicitBegin_hook_type SendTxnExplicitBegin_hook; + extern void ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h index 11538303bb8..9e7d02697a4 100644 --- a/src/include/utils/relmapper.h +++ b/src/include/utils/relmapper.h @@ -34,6 +34,10 @@ typedef struct xl_relmap_update #define MinSizeOfRelmapUpdate offsetof(xl_relmap_update, data) +typedef struct RelMapFile RelMapFile; +/* Hook for plugins to get control in load_relmap_file */ +typedef void (*LoadRelMap_hook_type)(bool shared, bool lock_held, RelMapFile *map); +extern PGDLLIMPORT LoadRelMap_hook_type LoadRelMap_hook; extern Oid RelationMapOidToFilenode(Oid relationId, bool shared); diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 83b2beb6e7a..83e3fac8c06 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -161,6 +161,11 @@ extern TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state); extern bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid); extern bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid); +extern GlobalVisState GlobalVisSharedRels; +extern GlobalVisState GlobalVisCatalogRels; +extern GlobalVisState GlobalVisDataRels; +extern GlobalVisState GlobalVisTempRels; + /* * Utility functions for implementing visibility routines in table AMs. */ From 5c872ed861d55800c9fe9c4bca535f860a6f7849 Mon Sep 17 00:00:00 2001 From: husen Date: Fri, 11 Aug 2023 17:23:48 +0800 Subject: [PATCH 002/152] 1. add global variable enable_serverless, default to false, set to true in plugin. 2. add SimpleLruReadPage_hook for plugin to read SLRU page. 3. add StartChildProcess_hook for plugin to get control in child process startup. --- src/backend/access/transam/slru.c | 56 +++++++++-------------------- src/backend/access/transam/xlog.c | 20 ----------- src/backend/cdb/cdbtm.c | 11 ++---- src/backend/postmaster/postmaster.c | 14 ++++++++ src/backend/storage/buffer/bufmgr.c | 8 ++--- src/backend/utils/init/postinit.c | 9 +---- src/include/access/slru.h | 8 +++++ src/include/access/xlog.h | 13 ------- src/include/postmaster/postmaster.h | 9 +++++ 9 files changed, 53 insertions(+), 95 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 284d998ad11..a5ce17e7a30 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -54,7 +54,6 @@ #include "access/slru.h" #include "access/transam.h" #include "access/xlog.h" -#include "cdb/cdbvars.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/fd.h" @@ -135,20 +134,15 @@ static SlruErrorCause slru_errcause; static int slru_errno; /* - * Hooks for plugins to get control in SlruPhysicalReadPage/SlruPhysicalWritePage + * Hooks for plugins to get control in SlruPhysicalReadPage/SlruPhysicalWritePage/SimpleLruReadPage */ SlruPhysicalReadPage_hook_type SlruPhysicalReadPage_hook = NULL; SlruPhysicalWritePage_hook_type SlruPhysicalWritePage_hook = NULL; +SimpleLruReadPage_hook_type SimpleLruReadPage_hook = NULL; -static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); -static void SimpleLruWaitIO(SlruCtl ctl, int slotno); static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata); -static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata); -static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); -static int SlruSelectLRUPage(SlruCtl ctl, int pageno); - static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data); static void SlruInternalDeleteSegment(SlruCtl ctl, int segno); @@ -325,7 +319,7 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno) * * This assumes that InvalidXLogRecPtr is bitwise-all-0. */ -static void +void SimpleLruZeroLSNs(SlruCtl ctl, int slotno) { SlruShared shared = ctl->shared; @@ -342,7 +336,7 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno) * * Control lock must be held at entry, and will be held at exit. */ -static void +void SimpleLruWaitIO(SlruCtl ctl, int slotno) { SlruShared shared = ctl->shared; @@ -402,6 +396,9 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, { SlruShared shared = ctl->shared; + if (SimpleLruReadPage_hook) + return (*SimpleLruReadPage_hook) (ctl, pageno, write_ok, xid); + /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { @@ -427,17 +424,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, /* Now we must recheck state from the top */ continue; } -#ifdef SERVERLESS - /* - * TODO: add hook/GUC instead? - * The page in buffer may be out of date, we need to check the buffer - * and refresh the buffer if the page has been modified. - */ - if (Gp_role == GP_ROLE_EXECUTE) - { - goto PageRead; - } -#endif /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); @@ -452,10 +438,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); -#ifdef SERVERLESS -PageRead: -#endif - /* Mark the slot read-busy */ shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; @@ -517,6 +499,12 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) SlruShared shared = ctl->shared; int slotno; + if (SimpleLruReadPage_hook) + { + LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); + return (*SimpleLruReadPage_hook) (ctl, pageno, true, xid); + } + /* Try to find the page while holding only shared lock */ LWLockAcquire(shared->ControlLock, LW_SHARED); @@ -527,18 +515,6 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) shared->page_status[slotno] != SLRU_PAGE_EMPTY && shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { -#ifdef SERVERLESS - /* - * TODO: add hook/GUC instead? - * The page in buffer may be out of date, we need to check the buffer - * and refresh the buffer if the page has been modified. - */ - if (Gp_role == GP_ROLE_EXECUTE) - { - break; - } -#endif - /* See comments for SlruRecentlyUsed macro */ SlruRecentlyUsed(shared, slotno); @@ -712,7 +688,7 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) * For now, assume it's not worth keeping a file pointer open across * read/write operations. We could cache one virtual file pointer ... */ -static bool +bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) { SlruShared shared = ctl->shared; @@ -973,7 +949,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) * Issue the error message after failure of SlruPhysicalReadPage or * SlruPhysicalWritePage. Call this after cleaning up shared-memory state. */ -static void +void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) { int segno = pageno / SLRU_PAGES_PER_SEGMENT; @@ -1058,7 +1034,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) * * Control lock must be held at entry, and will be held at exit. */ -static int +int SlruSelectLRUPage(SlruCtl ctl, int pageno) { SlruShared shared = ctl->shared; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 59a21ef96c3..5e1b175ebf5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -111,11 +111,6 @@ StartupXLOG_hook_type StartupXLOG_hook = NULL; */ XLogFlush_hook_type XLogFlush_hook = NULL; -/* - * Hook for plugins to get control in CreateCheckPoint. - */ -CreateCheckPoint_hook_type CreateCheckPoint_hook = NULL; - /* Unsupported old recovery command file names (relative to $PGDATA) */ #define RECOVERY_COMMAND_FILE "recovery.conf" #define RECOVERY_COMMAND_DONE "recovery.done" @@ -3151,15 +3146,6 @@ XLogBackgroundFlush(void) TimestampTz now; int flushbytes; -#ifdef SERVERLESS - /* - * TODO: use GUC/hook instead of macro. - * - * Indeed, walwriter is not needed in serverless, we have no WAL in buffer. - */ - return true; -#endif - /* XLOG doesn't need flushing during recovery */ if (RecoveryInProgress()) return false; @@ -9486,12 +9472,6 @@ CreateCheckPoint(int flags) VirtualTransactionId *vxids; int nvxids; - if (CreateCheckPoint_hook) - { - (*CreateCheckPoint_hook) (flags); - return; - } - /* * An end-of-recovery checkpoint is really a shutdown checkpoint, just * issued at a different time. diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index c2e1c3073d6..e333fbf589a 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1637,14 +1637,6 @@ doQEDistributedExplicitBegin() static bool isDtxQueryDispatcher(void) { -#ifdef SERVERLESS - /* - * TODO: use GUC/hook instead of macro. - * - * Distributed transaction is not necessary in serverless. - */ - return false; -#endif bool isDtmStarted; bool isSharedLocalSnapshotSlotPresent; @@ -1653,7 +1645,8 @@ isDtxQueryDispatcher(void) return (Gp_role == GP_ROLE_DISPATCH && isDtmStarted && - isSharedLocalSnapshotSlotPresent); + isSharedLocalSnapshotSlotPresent && + !enable_serverless); } /* diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5c11b8125f9..db8440634be 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -286,6 +286,9 @@ bool enable_password_profile = true; /* Hook for plugins to start background workers */ start_bgworkers_hook_type start_bgworkers_hook = NULL; +/* Hook for plugins to get control in StartChildProcess */ +StartChildProcess_hook_type StartChildProcess_hook = NULL; + /* * PIDs of special child processes; 0 when not running. When adding a new PID * to the list, remember to add the process title to GetServerProcessTitle() @@ -471,6 +474,8 @@ bool ClientAuthInProgress = false; /* T during new-client bool redirection_done = false; /* stderr redirected for syslogger? */ +bool enable_serverless = false; /* use CloudberryDB serverless architecture */ + /* received START_AUTOVAC_LAUNCHER signal */ static volatile sig_atomic_t start_autovac_launcher = false; @@ -6063,6 +6068,15 @@ CountChildren(int target) */ static pid_t StartChildProcess(AuxProcType type) +{ + if (StartChildProcess_hook) + return (*StartChildProcess_hook) (type); + + return StartChildProcessInternal(type); +} + +pid_t +StartChildProcessInternal(AuxProcType type) { pid_t pid; char *av[10]; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b33ba11852d..d79e640f3e5 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -51,6 +51,7 @@ #include "pg_trace.h" #include "pgstat.h" #include "postmaster/bgwriter.h" +#include "postmaster/postmaster.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/ipc.h" @@ -1256,14 +1257,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, *foundPtr = true; -#ifdef SERVERLESS /* - * TODO: use GUC/hook instead of macro - * * The page in buffer may be out of date, we need to check the buffer * and refresh the buffer if the page has been modified. */ - if (Gp_role == GP_ROLE_EXECUTE && valid) + if (enable_serverless && Gp_role == GP_ROLE_EXECUTE && valid) { uint32 buf_state = LockBufHdr(buf); @@ -1272,7 +1270,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, valid = false; } -#endif + if (!valid) { /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 3a4f2bae356..86fb5c82cb7 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1147,13 +1147,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); -#ifndef SERVERLESS - /* - * TODO: use GUC instead of macro. - * - * No database directories/files in serverless, skip sanity check. - */ - if (!bootstrap) + if (!bootstrap && !enable_serverless) { if (access(fullpath, F_OK) == -1) { @@ -1173,7 +1167,6 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, ValidatePgVersion(fullpath); } -#endif SetDatabasePath(fullpath); pfree(fullpath); diff --git a/src/include/access/slru.h b/src/include/access/slru.h index c8136f3c1a7..542df07f586 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -147,16 +147,22 @@ extern PGDLLIMPORT SlruPhysicalReadPage_hook_type SlruPhysicalReadPage_hook; typedef bool (*SlruPhysicalWritePage_hook_type)(SlruCtl ctl, int pageno, int slotno, bool *result); extern PGDLLIMPORT SlruPhysicalWritePage_hook_type SlruPhysicalWritePage_hook; +typedef int (*SimpleLruReadPage_hook_type)(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid); +extern PGDLLIMPORT SimpleLruReadPage_hook_type SimpleLruReadPage_hook; + extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler); extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); +extern void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); +extern void SimpleLruWaitIO(SlruCtl ctl, int slotno); extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid); extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); +extern int SlruSelectLRUPage(SlruCtl ctl, int pageno); extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied); #ifdef USE_ASSERT_CHECKING extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); @@ -166,6 +172,8 @@ extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); extern void SimpleLruTruncateWithLock(SlruCtl ctl, int cutoffPage); extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); +extern bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); +extern void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, void *data); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 9d1229ea688..2f828812244 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -192,10 +192,6 @@ extern XLOGDropDatabase_hook_type XLOGDropDatabase_hook; typedef void (*XLogFlush_hook_type) (XLogRecPtr record); extern PGDLLIMPORT XLogFlush_hook_type XLogFlush_hook; -/* Hook for plugins to get control in CreateCheckPoint */ -typedef void (*CreateCheckPoint_hook_type)(int flags); -extern PGDLLIMPORT CreateCheckPoint_hook_type CreateCheckPoint_hook; - /* Archive modes */ typedef enum ArchiveMode { @@ -259,16 +255,7 @@ extern PGDLLIMPORT int wal_level; (DataChecksumsEnabled() || FileEncryptionEnabled || wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ -#ifdef SERVERLESS -/* - * This is not necessary. - * - * Standby is not needed in serverless, so we do not need to WAL-log anything. - */ -#define XLogStandbyInfoActive() (false) -#else #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) -#endif /* Do we need to WAL-log information required only for logical replication? */ #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL) diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index f6d9b290917..eaa6d31f37b 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -13,6 +13,8 @@ #ifndef _POSTMASTER_H #define _POSTMASTER_H +#include "miscadmin.h" + /* GUC options */ extern bool EnableSSL; extern int ReservedBackends; @@ -35,6 +37,8 @@ extern bool enable_password_profile; extern int terminal_fd; +extern bool enable_serverless; + #ifdef WIN32 extern HANDLE PostmasterHandle; #else @@ -58,6 +62,10 @@ extern int postmaster_alive_fds[2]; extern PGDLLIMPORT const char *progname; +/* Hook for plugins to get control in StartChildProcess */ +typedef pid_t (*StartChildProcess_hook_type) (AuxProcType type); +extern PGDLLIMPORT StartChildProcess_hook_type StartChildProcess_hook; + extern void PostmasterMain(int argc, char *argv[]) pg_attribute_noreturn(); extern void ClosePostmasterPorts(bool am_syslogger); extern void InitProcessGlobals(void); @@ -79,6 +87,7 @@ extern void ShmemBackendArrayAllocation(void); extern void load_auxiliary_libraries(void); extern bool amAuxiliaryBgWorker(void); +extern pid_t StartChildProcessInternal(AuxProcType type); #ifdef ENABLE_IC_PROXY # define IC_PROXY_NUM_BGWORKER 1 #else /* ENABLE_IC_PROXY */ From 7e7c992142ba3fe8effe399f359af2e83da2dcc7 Mon Sep 17 00:00:00 2001 From: husen Date: Wed, 16 Aug 2023 10:00:44 +0800 Subject: [PATCH 003/152] disable WAL-log information required only for Hot Standby in serverless --- src/backend/postmaster/postmaster.c | 2 -- src/backend/storage/buffer/bufmgr.c | 1 - src/backend/utils/init/globals.c | 5 +++++ src/include/access/xlog.h | 2 +- src/include/miscadmin.h | 1 + src/include/postmaster/postmaster.h | 2 -- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index db8440634be..a1b25e1caa1 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -474,8 +474,6 @@ bool ClientAuthInProgress = false; /* T during new-client bool redirection_done = false; /* stderr redirected for syslogger? */ -bool enable_serverless = false; /* use CloudberryDB serverless architecture */ - /* received START_AUTOVAC_LAUNCHER signal */ static volatile sig_atomic_t start_autovac_launcher = false; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index d79e640f3e5..512c008515b 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -51,7 +51,6 @@ #include "pg_trace.h" #include "pgstat.h" #include "postmaster/bgwriter.h" -#include "postmaster/postmaster.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/ipc.h" diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index b187da7e849..30958c7a7fc 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -149,6 +149,11 @@ double hash_mem_multiplier = 1.0; int maintenance_work_mem = 65536; int max_parallel_maintenance_workers = 2; +/* + * use CloudberryDB serverless architecture + */ +bool enable_serverless = false; + /* * Primary determinants of sizes of shared-memory structures. * diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 2f828812244..666c395cc06 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -255,7 +255,7 @@ extern PGDLLIMPORT int wal_level; (DataChecksumsEnabled() || FileEncryptionEnabled || wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ -#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) +#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA && !enable_serverless) /* Do we need to WAL-log information required only for logical replication? */ #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL) diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 72f760cb295..c51493bf292 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -337,6 +337,7 @@ extern PGDLLIMPORT int work_mem; extern PGDLLIMPORT double hash_mem_multiplier; extern PGDLLIMPORT int maintenance_work_mem; extern PGDLLIMPORT int max_parallel_maintenance_workers; +extern PGDLLIMPORT bool enable_serverless; extern PGDLLIMPORT int statement_mem; extern PGDLLIMPORT int max_statement_mem; extern PGDLLIMPORT int gp_vmem_limit_per_query; diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index eaa6d31f37b..02e91b9ee51 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -37,8 +37,6 @@ extern bool enable_password_profile; extern int terminal_fd; -extern bool enable_serverless; - #ifdef WIN32 extern HANDLE PostmasterHandle; #else From 1c71dfdfdcc7d7c9ff3abca0498a964cfcd58a25 Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Sun, 20 Aug 2023 17:09:33 +0800 Subject: [PATCH 004/152] Add support for creating cluster with single master, and only query on catalog is permitted on single master without warehouse. --- contrib/interconnect/udp/ic_udpifc.c | 20 ++++++++++++++++++++ src/backend/cdb/cdbtm.c | 2 +- src/backend/cdb/cdbutil.c | 4 ++-- src/backend/utils/cache/relcache.c | 10 ++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index 63e8c9301dd..486e267269c 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -1599,6 +1599,14 @@ initConnHashTable(ConnHashTable *ht, MemoryContext cxt) ht->cxt = cxt; ht->size = Gp_role == GP_ROLE_DISPATCH ? (getgpsegmentCount() * 2) : ic_htab_size; + + /* + * In serverless architecture, the cluster may have only one QD, skip Initialization. + * Initialization will be done later. + */ + if (enable_serverless && Gp_role == GP_ROLE_DISPATCH && ht->size == 0) + return true; + Assert(ht->size > 0); if (ht->cxt) @@ -1635,6 +1643,18 @@ connAddHash(ConnHashTable *ht, MotionConn *mConn) MemoryContext old = NULL; MotionConnUDP *conn = NULL; + /* + * Initialize connection hash table if needed. + */ + if (enable_serverless && Gp_role == GP_ROLE_DISPATCH && ht->size == 0) + { + old = MemoryContextSwitchTo(ht->cxt); + initConnHashTable(ht, ht->cxt); + MemoryContextSwitchTo(old); + + Assert(ht->size > 0); + } + conn = CONTAINER_OF(mConn, MotionConnUDP, mConn); hashcode = CONN_HASH_VALUE(&conn->conn_info) % ht->size; diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index e333fbf589a..a6b0e0e0f74 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1138,7 +1138,7 @@ tmShmemInit(void) /* Initialize locks and shared memory area */ { *shmNextSnapshotId = 0; - *shmDtmStarted = false; + *shmDtmStarted = enable_serverless; *shmCleanupBackends = false; *shmDtxRecoveryPid = 0; *shmDtxRecoveryEvents = DTX_RECOVERY_EVENT_ABORT_PREPARED; diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 1671b17223b..88381f8eaa4 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -465,7 +465,7 @@ getCdbComponentInfo(void) /* * In singlenode deployment, total_segment_dbs is zero and it should still work. */ - if (component_databases->total_segment_dbs == 0 && !IS_SINGLENODE()) + if (component_databases->total_segment_dbs == 0 && !IS_SINGLENODE() && !enable_serverless) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), @@ -2883,7 +2883,7 @@ getCdbComponentInfo(void) * Validate that there exists at least one entry and one segment database * in the configuration */ - if (component_databases->total_segment_dbs == 0) + if (component_databases->total_segment_dbs == 0 && !enable_serverless) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a7ba7f5a4ca..482a6424603 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1284,6 +1284,16 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) /* make sure relation is marked as having no open file yet */ relation->rd_smgr = NULL; + if (enable_serverless && !OidIsValid(GetCurrentWarehouseId()) && + Gp_role == GP_ROLE_DISPATCH && !IsSystemRelation(relation)) + { + ereport(ERROR, + (errcode(ERRCODE_GP_FEATURE_NOT_YET), + errmsg("cannot access table \"%s\" in current transaction", + get_rel_name(targetRelId)), + errhint("Switch to exist warehouse before any query."))); + } + /* * initialize Apache Cloudberry partitioning info */ From 7451a8abb216f7dbb593723993ea4377966187d7 Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Wed, 23 Aug 2023 17:33:10 +0800 Subject: [PATCH 005/152] Add: Support to create hashdata table with randomly distribution. Currently, we use randomly distribution for hashdata table, and the number of segments is set to 0. When we query on hashdata table, the distribution policy's segment number is set to number of segments of current warehouse. --- contrib/interconnect/udp/ic_udpifc.c | 11 +++++++---- src/backend/cdb/cdbcat.c | 14 +++++++++++--- src/backend/utils/cache/relcache.c | 10 ---------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index 486e267269c..02a91552c4b 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -1825,10 +1825,13 @@ destroyConnHashTable(ConnHashTable *ht) } } - if (ht->cxt) - pfree(ht->table); - else - free(ht->table); + if (ht->size > 0) + { + if (ht->cxt) + pfree(ht->table); + else + free(ht->table); + } ht->table = NULL; ht->size = 0; diff --git a/src/backend/cdb/cdbcat.c b/src/backend/cdb/cdbcat.c index dc5a93bb556..d3c81c673bc 100644 --- a/src/backend/cdb/cdbcat.c +++ b/src/backend/cdb/cdbcat.c @@ -97,7 +97,7 @@ makeGpPolicy(GpPolicyType ptype, int nattrs, int numsegments) policy->numsegments = numsegments; policy->nattrs = nattrs; - Assert(numsegments > 0 || + Assert(numsegments >= 0 || (ptype == POLICYTYPE_ENTRY && numsegments == -1)); return policy; @@ -458,8 +458,16 @@ GpPolicyFetch(Oid tbloid) } /* Create a GpPolicy object. */ - policy = makeGpPolicy(POLICYTYPE_PARTITIONED, - nattrs, policyform->numsegments); + if (policyform->numsegments == 0) + { + policy = makeGpPolicy(POLICYTYPE_PARTITIONED, + nattrs, getgpsegmentCount()); + } + else + { + policy = makeGpPolicy(POLICYTYPE_PARTITIONED, + nattrs, policyform->numsegments); + } for (i = 0; i < nattrs; i++) { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 482a6424603..a7ba7f5a4ca 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1284,16 +1284,6 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) /* make sure relation is marked as having no open file yet */ relation->rd_smgr = NULL; - if (enable_serverless && !OidIsValid(GetCurrentWarehouseId()) && - Gp_role == GP_ROLE_DISPATCH && !IsSystemRelation(relation)) - { - ereport(ERROR, - (errcode(ERRCODE_GP_FEATURE_NOT_YET), - errmsg("cannot access table \"%s\" in current transaction", - get_rel_name(targetRelId)), - errhint("Switch to exist warehouse before any query."))); - } - /* * initialize Apache Cloudberry partitioning info */ From 84f489848086d62841fba1c7eed3ccb258f3a368 Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Tue, 29 Aug 2023 16:46:45 +0800 Subject: [PATCH 006/152] 1. set distributedXid to LocalTransactionId 2. do not send FTS Probe Request --- src/backend/cdb/cdbdtxcontextinfo.c | 3 +++ src/backend/cdb/cdbfts.c | 6 ++++++ src/backend/tcop/postgres.c | 3 +-- src/include/tcop/tcopprot.h | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/backend/cdb/cdbdtxcontextinfo.c b/src/backend/cdb/cdbdtxcontextinfo.c index 1a3c1b8f295..9227d844b74 100644 --- a/src/backend/cdb/cdbdtxcontextinfo.c +++ b/src/backend/cdb/cdbdtxcontextinfo.c @@ -23,6 +23,7 @@ #include "access/xact.h" #include "utils/guc.h" #include "utils/session_state.h" +#include "storage/proc.h" /* * process local cache used to identify "dispatch units" @@ -46,6 +47,8 @@ DtxContextInfo_CreateOnMaster(DtxContextInfo *dtxContextInfo, bool inCursor, DtxContextInfo_Reset(dtxContextInfo); dtxContextInfo->distributedXid = getDistributedTransactionId(); + if (enable_serverless) + dtxContextInfo->distributedXid = MyProc->lxid; if (dtxContextInfo->distributedXid != InvalidDistributedTransactionId) dtxContextInfo->curcid = curcid; diff --git a/src/backend/cdb/cdbfts.c b/src/backend/cdb/cdbfts.c index 754d3054cbb..8155663e984 100644 --- a/src/backend/cdb/cdbfts.c +++ b/src/backend/cdb/cdbfts.c @@ -87,6 +87,9 @@ FtsNotifyProber(void) if (am_ftsprobe) return; + if (enable_serverless) + return; + SpinLockAcquire(&ftsProbeInfo->lock); initial_started = ftsProbeInfo->start_count; SpinLockRelease(&ftsProbeInfo->lock); @@ -177,6 +180,9 @@ getFtsVersion(void) void FtsNotifyProber(void) { + if (enable_serverless) + return; + Assert(Gp_role == GP_ROLE_DISPATCH); SendPostmasterSignal(PMSIGNAL_WAKEN_FTS); SIMPLE_FAULT_INJECTOR("ftsNotify_before"); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index ba56f427227..52bfab8bb57 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -268,7 +268,6 @@ static int errdetail_params(ParamListInfo params); static int errdetail_abort(void); static int errdetail_recovery_conflict(void); static void bind_param_error_callback(void *arg); -static void start_xact_command(void); static void finish_xact_command(void); static bool IsTransactionExitStmt(Node *parsetree); static bool IsTransactionExitStmtList(List *pstmts); @@ -3554,7 +3553,7 @@ exec_describe_portal_message(const char *portal_name) /* * Convenience routines for starting/committing a single command. */ -static void +void start_xact_command(void) { if (!xact_started) diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index 33c929e9082..ef909efe5d6 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -68,6 +68,7 @@ extern List *pg_plan_queries(List *querytrees, const char *query_string, extern bool check_max_stack_depth(int *newval, void **extra, GucSource source); extern void assign_max_stack_depth(int newval, void *extra); +extern void start_xact_command(void); extern void die(SIGNAL_ARGS); extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn(); extern void StatementCancelHandler(SIGNAL_ARGS); From 5f26818369743d88cd95277c12aa3842284f7efd Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Thu, 31 Aug 2023 10:16:52 +0800 Subject: [PATCH 007/152] Feature: support subtransaction and savepoint --- src/backend/access/transam/clog.c | 4 ++ src/backend/access/transam/subtrans.c | 9 +++ src/backend/access/transam/xact.c | 85 ++++++++++++++++++++++++--- src/backend/cdb/cdbtm.c | 3 + src/backend/storage/buffer/bufmgr.c | 3 +- src/include/access/subtrans.h | 4 ++ src/include/access/xact.h | 2 + 7 files changed, 100 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 422fc5bf9f6..3aea28d84af 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -38,6 +38,7 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogutils.h" +#include "cdb/cdbvars.h" #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" @@ -167,6 +168,9 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, int pageno = TransactionIdToPage(xid); /* get page of parent */ int i; + if (enable_serverless && Gp_role != GP_ROLE_DISPATCH) + return; + Assert(status == TRANSACTION_STATUS_COMMITTED || status == TRANSACTION_STATUS_ABORTED); diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 5339eeaa1c0..88e8bc5f8ef 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -379,3 +379,12 @@ SubTransPagePrecedes(int page1, int page2) return (TransactionIdPrecedes(xid1, xid2) && TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1)); } + +/* + * Get SUBTRANS control data + */ +SlruCtl +SUBTRANS_Ctl(void) +{ + return SubTransCtl; +} \ No newline at end of file diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 25bdec98ce9..4f1923dfdb4 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5611,17 +5611,19 @@ BeginInternalSubTransaction(const char *name) if (Gp_role == GP_ROLE_DISPATCH) { - if (!doDispatchSubtransactionInternalCmd( - DTX_PROTOCOL_COMMAND_SUBTRANSACTION_BEGIN_INTERNAL)) + if (NotifySubTransaction_hook) + NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_BEGIN); + else { - elog(ERROR, - "Could not BeginInternalSubTransaction dispatch failed"); + if (!doDispatchSubtransactionInternalCmd( + DTX_PROTOCOL_COMMAND_SUBTRANSACTION_BEGIN_INTERNAL)) + { + elog(ERROR, + "Could not BeginInternalSubTransaction dispatch failed"); + } } } - if (NotifySubTransaction_hook) - NotifySubTransaction_hook(TXN_PROTOCOL_COMMAND_SUB_BEGIN); - /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new subtransactions after that @@ -5794,7 +5796,7 @@ RollbackAndReleaseCurrentSubTransaction(void) if (Gp_role == GP_ROLE_DISPATCH) { - if (!doDispatchSubtransactionInternalCmd( + if (!NotifySubTransaction_hook && !doDispatchSubtransactionInternalCmd( DTX_PROTOCOL_COMMAND_SUBTRANSACTION_ROLLBACK_INTERNAL)) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), @@ -7652,3 +7654,70 @@ MarkSubTransactionAssigned(void) CurrentTransactionState->assigned = true; } + +/* + * Get all xids of top level transaction and subtransactons + */ +FullTransactionId * +GetAllXids(int *nxids) +{ + FullTransactionId *xids = NULL; + int len = PGPROC_MAX_CACHED_SUBXIDS; + + *nxids = 0; + + if (FullTransactionIdIsValid(CurrentTransactionState->fullTransactionId)) + { + TransactionState xact = CurrentTransactionState; + + if (xids == NULL) + xids = (FullTransactionId *)palloc(sizeof(FullTransactionId) * len); + xids[(*nxids)++] = xact->fullTransactionId; + + while (xact->parent) + { + xact = xact->parent; + xids[(*nxids)++] = xact->fullTransactionId; + + if ((*nxids) >= len) + { + len *= 2; + xids = (FullTransactionId *)repalloc(xids, sizeof(FullTransactionId) * len); + } + } + } + + return xids; +} + +/* + * Get number of transaction and subtransactions which have no xid. + */ +int +GetNumOfTxnStatesWithoutXid(void) +{ + int nlevels = 0; + + if (!FullTransactionIdIsValid(CurrentTransactionState->fullTransactionId)) + { + TransactionState xact = CurrentTransactionState; + + nlevels++; + + while (xact->parent) + { + xact = xact->parent; + + if (!FullTransactionIdIsValid(xact->fullTransactionId)) + { + nlevels++; + } + else + { + break; + } + } + } + + return nlevels; +} \ No newline at end of file diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index a6b0e0e0f74..fef658d6941 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1414,6 +1414,9 @@ dispatchDtxCommand(const char *cmd) elog(DTM_DEBUG5, "dispatchDtxCommand: '%s'", cmd); + if (enable_serverless) + return true; + if (currentGxactWriterGangLost()) { ereport(WARNING, diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 512c008515b..787700ab34e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1263,8 +1263,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (enable_serverless && Gp_role == GP_ROLE_EXECUTE && valid) { uint32 buf_state = LockBufHdr(buf); - - buf_state &= ~BM_VALID; + buf_state &= ~(BM_VALID | BM_DIRTY); UnlockBufHdr(buf, buf_state); valid = false; diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index 9a54dc0fb3b..9ad1ecc31b9 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -20,6 +20,9 @@ typedef struct SubTransData TransactionId topMostParent; } SubTransData; +struct SlruCtlData; +typedef struct SlruCtlData *SlruCtl; + extern void SubTransSetParent(TransactionId xid, TransactionId parent); extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); @@ -31,5 +34,6 @@ extern void StartupSUBTRANS(TransactionId oldestActiveXID); extern void CheckPointSUBTRANS(void); extern void ExtendSUBTRANS(TransactionId newestXact); extern void TruncateSUBTRANS(TransactionId oldestXact); +extern SlruCtl SUBTRANS_Ctl(void); #endif /* SUBTRANS_H */ diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 57267aecf64..64c07e4d730 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -553,6 +553,8 @@ extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg); extern XLogRecPtr RecordDistributedForgetCommitted(DistributedTransactionId gxid); extern bool IsSubTransactionAssignmentPending(void); extern void MarkSubTransactionAssigned(void); +extern FullTransactionId *GetAllXids(int *nxids); +extern int GetNumOfTxnStatesWithoutXid(void); extern int xactGetCommittedChildren(TransactionId **ptr); From 9539d278865b36ef348c2ed7c4b60b48c26db0cb Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Fri, 1 Sep 2023 09:15:34 +0800 Subject: [PATCH 008/152] Fix: Only master can set transaction status --- src/backend/access/transam/clog.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 3aea28d84af..33896e7dfa7 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -168,7 +168,10 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, int pageno = TransactionIdToPage(xid); /* get page of parent */ int i; - if (enable_serverless && Gp_role != GP_ROLE_DISPATCH) + /* + * Only master can set transaction status + */ + if (enable_serverless && (Gp_role != GP_ROLE_DISPATCH && GpIdentity.segindex != MASTER_CONTENT_ID)) return; Assert(status == TRANSACTION_STATUS_COMMITTED || From 3559b79e1340ae70761100f5fcc2aec1c04e75bc Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Wed, 30 Aug 2023 13:13:40 +0800 Subject: [PATCH 009/152] Add extensible smgr slot for other storage format. This commit is mainly used to add extensible smgr slot for other extension storage format. When we create storage format extension, will add relevant smgr slot in smgrsw array. Morever,add smgropen and smgrclose in RelationDropStorage. authored-by: Zhang Wenchao zwcpostgres@gmail.com --- src/backend/catalog/storage.c | 10 +++++-- src/backend/storage/smgr/smgr.c | 47 ++++++++++++++++++++++++++++++++- src/include/storage/smgr.h | 4 +++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 50dd5981ef4..c5ae91da1be 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -163,7 +163,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence, SMgrImpl smgr_whic pending->relnode.isTempRelation = backend == TempRelBackendId; pending->atCommit = false; /* delete if abort */ pending->nestLevel = GetCurrentTransactionNestLevel(); - pending->relnode.smgr_which = smgr_which; + pending->relnode.smgr_which = srel->smgr_which; pending->action = &storage_pending_rel_deletes_action; RegisterPendingDelete(pending); @@ -204,6 +204,11 @@ void RelationDropStorage(Relation rel) { PendingRelDelete *pending; + SMgrRelation srel; + + srel = smgropen(rel->rd_node, rel->rd_backend, + RelationIsAppendOptimized(rel) ? SMGR_AO : SMGR_MD, + rel); /* Add the relation to the list of stuff to delete at commit */ pending = (PendingRelDelete *) @@ -212,7 +217,7 @@ RelationDropStorage(Relation rel) pending->relnode.isTempRelation = rel->rd_backend == TempRelBackendId; pending->atCommit = true; /* delete if commit */ pending->nestLevel = GetCurrentTransactionNestLevel(); - pending->relnode.smgr_which = smgr_get_impl(rel); + pending->relnode.smgr_which = srel->smgr_which; pending->action = &storage_pending_rel_deletes_action; RegisterPendingDelete(pending); @@ -226,6 +231,7 @@ RelationDropStorage(Relation rel) * for now I'll keep the logic simple. */ + smgrclose(srel); RelationCloseSmgr(rel); } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index a64c7384853..d826c334f1c 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -56,7 +56,8 @@ smgr_init_hook_type smgr_init_hook = NULL; smgr_hook_type smgr_hook = NULL; smgr_shutdown_hook_type smgr_shutdown_hook = NULL; #define SMGR_MAX_ID UINT8_MAX -static f_smgr smgrsw[SMGR_MAX_ID + 1] = { + +f_smgr smgrsw[] = { /* magnetic disk */ { .smgr_name = "heap", @@ -100,6 +101,31 @@ static f_smgr smgrsw[SMGR_MAX_ID + 1] = { .smgr_nblocks = mdnblocks, .smgr_truncate = mdtruncate, .smgr_immedsync = mdimmedsync, + }, + /* extensible smgr's slot for other storage format */ + { + 0 + }, + { + 0 + }, + { + 0 + }, + { + 0 + }, + { + 0 + }, + { + 0 + }, + { + 0 + }, + { + 0 } }; @@ -221,6 +247,8 @@ SMgrImpl smgr_get_impl(const Relation rel) return smgr_impl; } +static int32 last_assigned_smgr_kind = SMGR_LAST_DEFAULT; + /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage * managers. @@ -876,3 +904,20 @@ const char *smgr_get_name(SMgrImpl impl) return "invalid"; return smgrsw[impl].smgr_name ? smgrsw[impl].smgr_name : "unknown"; } + +/* + * When need to add a new storage format smgr in extension, we should + * call add_smgr_kind to get a slot, then init the slot. + */ +SMgrImpl +add_smgr_kind(void) +{ + /* Now, only support NSmgr smgrs */ + if (last_assigned_smgr_kind >= NSmgr) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("extensible smgr types limit exceeded"))); + + last_assigned_smgr_kind++; + return (SMgrImpl) last_assigned_smgr_kind; +} diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index e7db1a43d49..4910148f9c9 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -52,6 +52,8 @@ typedef enum SMgrImplementation SMGR_MD = 0, SMGR_AO = 1, SMGR_PAX = 2, + + SMGR_LAST_DEFAULT = SMGR_AO } SMgrImpl; struct f_smgr; @@ -212,6 +214,7 @@ extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks); extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum); extern void AtEOXact_SMgr(void); +extern SMgrImpl add_smgr_kind(void); extern const struct f_smgr_ao * smgrAOGetDefault(void); @@ -245,4 +248,5 @@ extern PGDLLIMPORT file_unlink_hook_type file_unlink_hook; typedef void (*smgr_get_impl_hook_type)(const Relation rel, SMgrImpl* smgr_impl); extern PGDLLIMPORT smgr_get_impl_hook_type smgr_get_impl_hook; +extern f_smgr smgrsw[]; #endif /* SMGR_H */ From bc464ecb15cd8fe1ccf4795372dfe8e78c52ba6c Mon Sep 17 00:00:00 2001 From: GongXun Date: Mon, 11 Sep 2023 14:47:59 +0800 Subject: [PATCH 010/152] Feature: add dml hook 1. Two hook functions , ext_dml_init_hook and ext_dml_finish_hook have been added. These functions perform some resource initialization and cleanup at the start and end of data modification operations (such as modifyTable, CopyFrom, CreateAs, Matview, etc.) --- src/include/access/tableam.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 61948090e22..3b28e292479 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -2335,5 +2335,4 @@ enum CmdType; typedef void (*ext_dml_func_hook_type) (Relation relation, enum CmdType operation); extern PGDLLIMPORT ext_dml_func_hook_type ext_dml_init_hook; extern PGDLLIMPORT ext_dml_func_hook_type ext_dml_finish_hook; - #endif /* TABLEAM_H */ From 89daa5349e0536634be44f0280198e217f5d83f6 Mon Sep 17 00:00:00 2001 From: roseduan Date: Thu, 14 Sep 2023 20:22:55 +0800 Subject: [PATCH 011/152] Add main_manifest catalog table If we drop a hashdata table, we can not delete the record in main_manifest. So we make the main_manifest as a catalog table, and add a depedency in pg_depend table. When we drop a table, the depedency will be deleted too. --- src/backend/catalog/Makefile | 3 +- src/backend/catalog/catalog.c | 6 ++++ src/backend/catalog/dependency.c | 8 +++++ src/backend/catalog/main_manifest.c | 49 ++++++++++++++++++++++++++++ src/backend/catalog/objectaddress.c | 22 ++++++++++--- src/backend/commands/alter.c | 1 + src/backend/commands/event_trigger.c | 1 + src/backend/commands/tablecmds.c | 1 + src/include/catalog/dependency.h | 3 +- src/include/catalog/main_manifest.h | 35 ++++++++++++++++++++ 10 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 src/backend/catalog/main_manifest.c create mode 100644 src/include/catalog/main_manifest.h diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 8a58b8e5897..c59ca97d834 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -54,7 +54,7 @@ OBJS += pg_extprotocol.o \ storage_tablespace_twophase.o storage_tablespace_xact.o \ gp_partition_template.o pg_task.o pg_task_run_history.o \ gp_matview_aux.o \ - pg_directory_table.o storage_directory_table.o + pg_directory_table.o storage_directory_table.o main_manifest.o CATALOG_JSON:= $(addprefix $(top_srcdir)/gpMgmt/bin/gppylib/data/, $(addsuffix .json,$(GP_MAJORVERSION))) @@ -82,6 +82,7 @@ CATALOG_HEADERS := \ gp_configuration_history.h gp_id.h gp_distribution_policy.h gp_version_at_initdb.h \ gp_warehouse.h \ pg_appendonly.h \ + main_manifest.h \ gp_fastsequence.h pg_extprotocol.h \ pg_attribute_encoding.h \ pg_auth_time_constraint.h \ diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index d87b6802978..7241bf9fafe 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -79,6 +79,7 @@ #include "catalog/pg_trigger.h" #include "catalog/gp_matview_aux.h" #include "catalog/gp_matview_tables.h" +#include "catalog/main_manifest.h" #include "cdb/cdbvars.h" #include "catalog/gp_indexing.h" @@ -589,6 +590,11 @@ IsSharedRelation(Oid relationId) return true; } + if (relationId == ManifestRelationId) + { + return true; + } + return false; } diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 6c38ca470f6..209167bcaa7 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -23,6 +23,7 @@ #include "catalog/gp_storage_user_mapping.h" #include "catalog/heap.h" #include "catalog/index.h" +#include "catalog/main_manifest.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_am.h" @@ -217,6 +218,7 @@ static const Oid object_classes[] = { ExtprotocolRelationId, /* OCLASS_EXTPROTOCOL */ GpMatviewAuxId, /* OCLASS_MATVIEW_AUX */ TaskRelationId, /* OCLASS_TASK */ + ManifestRelationId /* MAIN_MANIFEST */ }; @@ -1593,6 +1595,9 @@ doDeletion(const ObjectAddress *object, int flags) case OCLASS_TASK: RemoveTaskById(object->objectId); break; + case OCLASS_MAIN_MANIFEST: + RemoveMainManifestByRelid(object->objectId); + break; case OCLASS_MATVIEW_AUX: RemoveMatviewAuxEntry(object->objectId); @@ -3042,6 +3047,9 @@ getObjectClass(const ObjectAddress *object) case TagDescriptionRelationId: return OCLASS_TAG_DESCRIPTION; + case ManifestRelationId: + return OCLASS_MAIN_MANIFEST; + default: { struct CustomObjectClass *coc; diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c new file mode 100644 index 00000000000..edf325d919d --- /dev/null +++ b/src/backend/catalog/main_manifest.c @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * main_manifest.c + * save all storage manifest info. + * + * Portions Copyright (c) 2023, HashData Technology Limited. + * + * + * IDENTIFICATION + * src/backend/catalog/main_manifest.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/genam.h" +#include "access/table.h" +#include "catalog/indexing.h" +#include "catalog/main_manifest.h" +#include "utils/rel.h" + +/* + * RemoveMainManifestByRelid + * Remove the main manifest record for the relid. + */ +void +RemoveMainManifestByRelid(Oid relid) +{ + Relation main_manifest; + HeapTuple tuple; + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[1]; + + main_manifest = table_open(ManifestRelationId, RowExclusiveLock); + ScanKeyInit(&scanKey[0], Anum_main_manifest_relid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(relid)); + + scanDescriptor = systable_beginscan(main_manifest, InvalidOid, + false, NULL, 1, scanKey); + + while (HeapTupleIsValid(tuple = systable_getnext(scanDescriptor))) + { + CatalogTupleDelete(main_manifest, &tuple->t_self); + } + + systable_endscan(scanDescriptor); + table_close(main_manifest, RowExclusiveLock); +} diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 8ba2fdb3c2c..313c08e2979 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -4290,6 +4290,9 @@ getObjectDescription(const ObjectAddress *object, bool missing_ok) if (coc && coc->object_desc) coc->object_desc(coc, object, missing_ok, &buffer); } + + case OCLASS_MAIN_MANIFEST: + break; } /* an empty buffer is equivalent to no object found */ @@ -4897,6 +4900,15 @@ getObjectTypeDescription(const ObjectAddress *object, bool missing_ok) appendStringInfoString(&buffer, "tag description"); break; + case OCLASS_MAIN_MANIFEST: + appendStringInfoString(&buffer, "manifest"); + break; + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new OCLASS hasn't been handled above. + */ + break; + default: { struct CustomObjectClass *coc; @@ -4904,11 +4916,6 @@ getObjectTypeDescription(const ObjectAddress *object, bool missing_ok) coc = find_custom_object_class_by_classid(object->classId, false); if (coc->object_type_desc) coc->object_type_desc(coc, object, missing_ok, &buffer); - /* - * There's intentionally no default: case here; we want the - * compiler to warn if a new OCLASS hasn't been handled above. - */ - break; } } @@ -6366,6 +6373,9 @@ getObjectIdentityParts(const ObjectAddress *object, break; } + case OCLASS_MAIN_MANIFEST: + break; + default: { struct CustomObjectClass *coc; @@ -6373,6 +6383,8 @@ getObjectIdentityParts(const ObjectAddress *object, coc = find_custom_object_class_by_classid(object->classId, false); if (coc->object_identity_parts) coc->object_identity_parts(coc, object, objname, objargs, missing_ok, &buffer); + + /* * There's intentionally no default: case here; we want the * compiler to warn if a new OCLASS hasn't been handled above. diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index f5dfd6ff126..71f9eca7a3c 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -742,6 +742,7 @@ AlterObjectNamespace_oid(Oid classId, Oid objid, Oid nspOid, case OCLASS_STORAGE_USER_MAPPING: case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: + case OCLASS_MAIN_MANIFEST: /* ignore object types that don't have schema-qualified names */ break; diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 0d00c3a08a6..296580f10d9 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -1076,6 +1076,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) return true; case OCLASS_EXTPROTOCOL: case OCLASS_TASK: + case OCLASS_MAIN_MANIFEST: return false; default: diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 68db7643b04..75ac387e602 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14466,6 +14466,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, case OCLASS_STORAGE_USER_MAPPING: case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: + case OCLASS_MAIN_MANIFEST: /* * We don't expect any of these sorts of objects to depend on diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 41e231c7dfd..def45120713 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -156,9 +156,10 @@ typedef enum ObjectClass OCLASS_EXTPROTOCOL, /* pg_extprotocol */ OCLASS_MATVIEW_AUX, /* gp_matview_aux */ OCLASS_TASK, /* pg_task */ + OCLASS_MAIN_MANIFEST /* main_manifest */ } ObjectClass; -#define LAST_OCLASS OCLASS_TASK +#define LAST_OCLASS OCLASS_MAIN_MANIFEST /* flag bits for performDeletion/performMultipleDeletions: */ #define PERFORM_DELETION_INTERNAL 0x0001 /* internal action */ diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h new file mode 100644 index 00000000000..6a4dbf39375 --- /dev/null +++ b/src/include/catalog/main_manifest.h @@ -0,0 +1,35 @@ +/*------------------------------------------------------------------------- + * + * main_manifest.h + * save all storage manifest info + * + * Portions Copyright (c) 2023, HashData Technology Limited. + * + * + * IDENTIFICATION + * src/include/catalog/main_manifest.h + * + *------------------------------------------------------------------------- + */ +#ifndef MAIN_MANIFEST_H +#define MAIN_MANIFEST_H + +#include "catalog/genbki.h" +#include "catalog/main_manifest_d.h" + +/* ---------------- + * main_manifest definition. cpp turns this into + * typedef struct FormData_main_manifest + * ---------------- + */ +CATALOG(main_manifest,9004,ManifestRelationId) BKI_SHARED_RELATION +{ + Oid relid; + text path; +} FormData_main_manifest; + +typedef FormData_main_manifest *Form_main_manifest; + +extern void RemoveMainManifestByRelid(Oid relid); + +#endif /* MAIN_MANIFEST.h */ From 4f16fc05191028de400d672d19b96be1c492da9c Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Mon, 18 Sep 2023 16:51:14 +0800 Subject: [PATCH 012/152] Add hooks for plugins to get control in transientrel_init/intorel_initplan. --- src/backend/commands/createas.c | 53 ++++++++++++++++----------------- src/backend/commands/matview.c | 29 ++++++++---------- src/include/commands/createas.h | 18 +++++++++++ src/include/commands/matview.h | 21 +++++++++++++ 4 files changed, 77 insertions(+), 44 deletions(-) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 72db9a51e7c..ebcc20922e1 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -24,7 +24,6 @@ */ #include "postgres.h" -#include "access/heapam.h" #include "access/htup_details.h" #include "access/reloptions.h" #include "access/sysattr.h" @@ -84,17 +83,8 @@ #include "cdb/memquota.h" #include "utils/metrics_utils.h" -typedef struct -{ - DestReceiver pub; /* publicly-known function pointers */ - IntoClause *into; /* target relation specification */ - /* These fields are filled by intorel_startup: */ - Relation rel; /* relation to write to */ - ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ - CommandId output_cid; /* cmin to insert in output tuples */ - int ti_options; /* table_tuple_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ -} DR_intorel; +/* Hook for plugins to get control in intorel_initplan */ +intorel_initplan_hook_type intorel_initplan_hook = NULL; typedef struct { @@ -855,17 +845,8 @@ intorel_startup_dummy(DestReceiver *self, int operation, TupleDesc typeinfo) ext_dml_init_hook(((DR_intorel *)self)->rel, CMD_INSERT); } -/* - * intorel_initplan --- Based on PG intorel_startup(). - * Parameters are different. We need to run the code earlier before the - * executor runs since we want the relation to be created earlier else current - * MPP framework will fail. This could be called in InitPlan() as before, but - * we could call it just before ExecutorRun() in ExecCreateTableAs(). In the - * future if the requirment is general we could add an interface into - * DestReceiver but so far that is not needed (Based on PG 11 code.) - */ void -intorel_initplan(struct QueryDesc *queryDesc, int eflags) +intorel_initplan_internal(struct QueryDesc *queryDesc, int eflags) { DR_intorel *myState; /* Get 'into' from the dispatched plan */ @@ -878,11 +859,6 @@ intorel_initplan(struct QueryDesc *queryDesc, int eflags) int attnum; TupleDesc typeinfo = queryDesc->tupDesc; - /* If EXPLAIN/QE, skip creating the "into" relation. */ - if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) || - (Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer)) - return; - /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ is_matview = (into->viewQuery != NULL); @@ -1000,6 +976,29 @@ intorel_initplan(struct QueryDesc *queryDesc, int eflags) Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); } +/* + * intorel_initplan --- Based on PG intorel_startup(). + * Parameters are different. We need to run the code earlier before the + * executor runs since we want the relation to be created earlier else current + * MPP framework will fail. This could be called in InitPlan() as before, but + * we could call it just before ExecutorRun() in ExecCreateTableAs(). In the + * future if the requirment is general we could add an interface into + * DestReceiver but so far that is not needed (Based on PG 11 code.) + */ +void +intorel_initplan(struct QueryDesc *queryDesc, int eflags) +{ + /* If EXPLAIN/QE, skip creating the "into" relation. */ + if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) || + (Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer)) + return; + + if (intorel_initplan_hook) + return (*intorel_initplan_hook) (queryDesc, eflags); + + intorel_initplan_internal(queryDesc, eflags); +} + /* * intorel_receive --- receive one tuple */ diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index e56e9d76fb9..7a02fe9e3ec 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -68,22 +68,8 @@ #include "utils/syscache.h" #include "utils/typcache.h" - -typedef struct -{ - DestReceiver pub; /* publicly-known function pointers */ - Oid transientoid; /* OID of new heap into which to store */ - Oid oldreloid; - bool concurrent; - bool skipData; - char relpersistence; - /* These fields are filled by transientrel_startup: */ - Relation transientrel; /* relation to write to */ - CommandId output_cid; /* cmin to insert in output tuples */ - int ti_options; /* table_tuple_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ - uint64 processed; /* GPDB: number of tuples inserted */ -} DR_transientrel; +/* Hook for plugins to get control in transientrel_init */ +transientrel_init_hook_type transientrel_init_hook = NULL; #define MV_INIT_QUERYHASHSIZE 32 #define MV_INIT_SNAPSHOTHASHSIZE (2 * MaxBackends) @@ -884,7 +870,7 @@ CreateTransientRelDestReceiver(Oid transientoid, Oid oldreloid, bool concurrent, } void -transientrel_init(QueryDesc *queryDesc) +transientrel_init_internal(QueryDesc *queryDesc) { Oid matviewOid; Relation matviewRel; @@ -945,6 +931,15 @@ transientrel_init(QueryDesc *queryDesc) heap_close(matviewRel, NoLock); } +void +transientrel_init(QueryDesc *queryDesc) +{ + if (transientrel_init_hook) + return (*transientrel_init_hook)(queryDesc); + + transientrel_init_internal(queryDesc); +} + /* * transientrel_startup --- executor startup */ diff --git a/src/include/commands/createas.h b/src/include/commands/createas.h index 64cbccb3456..ff71b7be104 100644 --- a/src/include/commands/createas.h +++ b/src/include/commands/createas.h @@ -14,13 +14,30 @@ #ifndef CREATEAS_H #define CREATEAS_H +#include "access/heapam.h" #include "catalog/objectaddress.h" +#include "executor/execdesc.h" #include "nodes/params.h" #include "nodes/pathnodes.h" #include "parser/parse_node.h" #include "tcop/dest.h" #include "utils/queryenvironment.h" +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + IntoClause *into; /* target relation specification */ + /* These fields are filled by intorel_startup: */ + Relation rel; /* relation to write to */ + ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ +} DR_intorel; + +/* Hook for plugins to get control in intorel_initplan */ +typedef void (*intorel_initplan_hook_type) (QueryDesc *queryDesc, int eflags); +extern PGDLLIMPORT intorel_initplan_hook_type intorel_initplan_hook; extern ObjectAddress ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, ParamListInfo params, QueryEnvironment *queryEnv, @@ -38,6 +55,7 @@ extern DestReceiver *CreateIntoRelDestReceiver(IntoClause *intoClause); struct QueryDesc; +extern void intorel_initplan_internal(struct QueryDesc *queryDesc, int eflags); extern void intorel_initplan(struct QueryDesc *queryDesc, int eflags); extern bool CreateTableAsRelExists(CreateTableAsStmt *ctas); diff --git a/src/include/commands/matview.h b/src/include/commands/matview.h index 8cfb1f55af1..6ef864ce8b6 100644 --- a/src/include/commands/matview.h +++ b/src/include/commands/matview.h @@ -21,6 +21,25 @@ #include "tcop/dest.h" #include "utils/relcache.h" +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + Oid transientoid; /* OID of new heap into which to store */ + Oid oldreloid; + bool concurrent; + bool skipData; + char relpersistence; + /* These fields are filled by transientrel_startup: */ + Relation transientrel; /* relation to write to */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ + uint64 processed; /* GPDB: number of tuples inserted */ +} DR_transientrel; + +/* Hook for plugins to get control in transientrel_init */ +typedef void (*transientrel_init_hook_type)(QueryDesc *queryDesc); +extern PGDLLIMPORT transientrel_init_hook_type transientrel_init_hook; extern void SetMatViewPopulatedState(Relation relation, bool newstate); @@ -36,6 +55,8 @@ extern DestReceiver *CreateTransientRelDestReceiver(Oid oid, Oid oldreloid, bool extern bool MatViewIncrementalMaintenanceIsEnabled(void); +extern void transientrel_init_internal(QueryDesc *queryDesc); + extern void transientrel_init(QueryDesc *queryDesc); extern void transientenr_init(QueryDesc *queryDesc); From 8e56e4fde2f242e024e31203d160219faaa778ca Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Mon, 4 Sep 2023 15:55:58 +0800 Subject: [PATCH 013/152] Implement extensible libpq protocol. Mainly implements extensible libpq protocol in this commit. Morever, imports extensible ExecStatusType and DispatcherAsyncFuncs which can be extended in extension. By this way, we can extend these modules in extension as wanted. authored-by: Zhang Wenchao zwcpostgres@gmail.com --- src/backend/cdb/cdbcopy.c | 6 + src/backend/cdb/dispatcher/cdbdisp.c | 14 +- src/backend/cdb/dispatcher/cdbdisp_async.c | 42 +++--- src/backend/cdb/dispatcher/cdbdisp_query.c | 4 + src/backend/executor/execUtils.c | 4 - src/backend/libpq/Makefile | 6 +- src/include/cdb/cdbcopy.h | 8 + src/include/cdb/cdbdisp.h | 9 ++ src/include/cdb/cdbdisp_async.h | 15 ++ src/include/cdb/cdbdispatchresult.h | 2 - src/interfaces/libpq/Makefile | 5 +- src/interfaces/libpq/extensible_protocol.c | 164 +++++++++++++++++++++ src/interfaces/libpq/extensible_protocol.h | 38 +++++ src/interfaces/libpq/fe-exec.c | 4 +- src/interfaces/libpq/fe-misc.c | 22 +++ src/interfaces/libpq/fe-protocol3.c | 34 +++-- src/interfaces/libpq/libpq-fe.h | 9 +- src/interfaces/libpq/libpq-int.h | 2 + src/interfaces/libpq/nls.mk | 2 +- 19 files changed, 340 insertions(+), 50 deletions(-) create mode 100644 src/interfaces/libpq/extensible_protocol.c create mode 100644 src/interfaces/libpq/extensible_protocol.h diff --git a/src/backend/cdb/cdbcopy.c b/src/backend/cdb/cdbcopy.c index 9da82d0cb3d..59cea552da0 100644 --- a/src/backend/cdb/cdbcopy.c +++ b/src/backend/cdb/cdbcopy.c @@ -78,6 +78,9 @@ #include +CopyProcessResult_hook_type CopyProcessResult_hook = NULL; +CdbCopyEnd_hook_type CdbCopyEnd_hook = NULL; + static void cdbCopyEndInternal(CdbCopy *c, char *abort_msg, int64 *total_rows_completed_p, int64 *total_rows_rejected_p); @@ -650,6 +653,9 @@ cdbCopyEndInternal(CdbCopy *c, char *abort_msg, PQfreemem(buffer); } + if (CopyProcessResult_hook) + CopyProcessResult_hook(res); + /* in SREH mode, check if this seg rejected (how many) rows */ if (res->numRejected > 0) segment_rows_rejected = res->numRejected; diff --git a/src/backend/cdb/dispatcher/cdbdisp.c b/src/backend/cdb/dispatcher/cdbdisp.c index e1018363066..4902fe3a105 100644 --- a/src/backend/cdb/dispatcher/cdbdisp.c +++ b/src/backend/cdb/dispatcher/cdbdisp.c @@ -41,7 +41,9 @@ static dispatcher_handle_t *allocate_dispatcher_handle(void); static void destroy_dispatcher_handle(dispatcher_handle_t *h); static char * segmentsListToString(const char *prefix, List *segments); -static DispatcherInternalFuncs *pDispatchFuncs = &DispatcherAsyncFuncs; +DispatcherInternalFuncs *pDispatchFuncs = &DispatcherAsyncFuncs; + +PGResStausOK_hook_type PGResStausOK_hook = NULL; /* * cdbdisp_dispatchToGang: @@ -660,3 +662,13 @@ segmentsToContentStr(List *segments) else return segmentsListToString("ALL contents", segments); } + +void +SetupDispatchFuncs(DispatcherInternalFuncs *dispatcherInternalFuncs) +{ + if (!dispatcherInternalFuncs) + elog(ERROR, "Dispatch Internal Functions can't be NULL"); + + /* set up current extension pDispatchFuncs */ + pDispatchFuncs = dispatcherInternalFuncs; +} diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index eb8e4714396..dc2e89eac3f 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -100,22 +100,6 @@ typedef struct CdbDispatchCmdAsync } CdbDispatchCmdAsync; -static void *cdbdisp_makeDispatchParams_async(int maxSlices, int largestGangSize, char *queryText, int len); - -static bool cdbdisp_checkAckMessage_async(struct CdbDispatcherState *ds, const char *message, - int timeout_sec); - -static void cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, - DispatchWaitMode waitMode); - -static void cdbdisp_dispatchToGang_async(struct CdbDispatcherState *ds, - struct Gang *gp, - int sliceIndex); -static void cdbdisp_waitDispatchFinish_async(struct CdbDispatcherState *ds); - -static bool cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds); -static int *cdbdisp_getWaitSocketFds_async(struct CdbDispatcherState *ds, int *nsocks); - DispatcherInternalFuncs DispatcherAsyncFuncs = { cdbdisp_checkForCancel_async, @@ -157,7 +141,7 @@ static bool * * Return true if any connection received error. */ -static bool +bool cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds) { Assert(ds); @@ -174,7 +158,7 @@ cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds) * Return value is the array of waiting socket fds. * It's be palloced in this function, so caller need to pfree it. */ -static int * +int * cdbdisp_getWaitSocketFds_async(struct CdbDispatcherState *ds, int *nsocks) { CdbDispatchCmdAsync *pParms = (CdbDispatchCmdAsync *) ds->dispatchParams; @@ -214,7 +198,7 @@ cdbdisp_getWaitSocketFds_async(struct CdbDispatcherState *ds, int *nsocks) /* * Block until all data are dispatched. */ -static void +void cdbdisp_waitDispatchFinish_async(struct CdbDispatcherState *ds) { const static int DISPATCH_POLL_TIMEOUT = 500; @@ -300,7 +284,7 @@ cdbdisp_waitDispatchFinish_async(struct CdbDispatcherState *ds) * Throw out error to upper try-catch block if anything goes wrong. This function only kicks off dispatching, * call cdbdisp_waitDispatchFinish_async to ensure the completion */ -static void +void cdbdisp_dispatchToGang_async(struct CdbDispatcherState *ds, struct Gang *gp, int sliceIndex) @@ -345,7 +329,7 @@ cdbdisp_dispatchToGang_async(struct CdbDispatcherState *ds, * 0 means checking immediately, and -1 means waiting until all ack * messages are received. */ -static bool +bool cdbdisp_checkAckMessage_async(struct CdbDispatcherState *ds, const char *message, int timeout_sec) { @@ -391,7 +375,7 @@ cdbdisp_checkAckMessage_async(struct CdbDispatcherState *ds, const char *message * Wait all dispatch work to complete, either success or fail. * (Set stillRunning to true when one dispatch work is completed) */ -static void +void cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, DispatchWaitMode waitMode) { @@ -418,7 +402,7 @@ cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, * Memory will be freed in function cdbdisp_destroyDispatcherState by deleting the * memory context. */ -static void * +void * cdbdisp_makeDispatchParams_async(int maxSlices, int largestGangSize, char *queryText, int len) { int maxResults = maxSlices * largestGangSize; @@ -999,6 +983,7 @@ processResults(CdbDispatchResult *dispatchResult) { SegmentDatabaseDescriptor *segdbDesc = dispatchResult->segdbDesc; char *msg; + bool resultStatusOk; /* * Receive input from QE. @@ -1013,6 +998,8 @@ processResults(CdbDispatchResult *dispatchResult) } forwardQENotices(); + resultStatusOk = false; + /* * If we have received one or more complete messages, process them. */ @@ -1085,11 +1072,18 @@ processResults(CdbDispatchResult *dispatchResult) * Did a command complete successfully? */ resultStatus = PQresultStatus(pRes); + + if (PGResStausOK_hook) + { + resultStatusOk = PGResStausOK_hook(resultStatus); + } + if (resultStatus == PGRES_COMMAND_OK || resultStatus == PGRES_TUPLES_OK || resultStatus == PGRES_COPY_IN || resultStatus == PGRES_COPY_OUT || - resultStatus == PGRES_EMPTY_QUERY) + resultStatus == PGRES_EMPTY_QUERY || + resultStatusOk) { ELOG_DISPATCHER_DEBUG("%s -> ok %s", segdbDesc->whoami, diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index c2835fe3eb4..188df40c38e 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -1427,6 +1427,10 @@ CdbDispatchCopyEnd(struct CdbCopy *cdbCopy) CdbDispatcherState *ds; ds = cdbCopy->dispatcherState; + + if (CdbCopyEnd_hook) + CdbCopyEnd_hook(); + cdbCopy->dispatcherState = NULL; cdbdisp_destroyDispatcherState(ds); } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index d8fdc752326..4a1fafb3634 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -100,7 +100,6 @@ static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc); static void ShutdownExprContext(ExprContext *econtext, bool isCommit); static List *flatten_logic_exprs(Node *node); -ProcessDispatchResult_hook_type ProcessDispatchResult_hook = NULL; /* ---------------------------------------------------------------- @@ -2056,9 +2055,6 @@ void mppExecutorFinishup(QueryDesc *queryDesc) ThrowErrorData(qeError); } - if (ProcessDispatchResult_hook) - ProcessDispatchResult_hook(ds); - /* collect pgstat from QEs for current transaction level */ pgstat_combine_from_qe(pr, primaryWriterSliceIndex); diff --git a/src/backend/libpq/Makefile b/src/backend/libpq/Makefile index d3ac9f64a37..acd3e27fc88 100644 --- a/src/backend/libpq/Makefile +++ b/src/backend/libpq/Makefile @@ -40,7 +40,7 @@ endif # Cloudberry objects follow OBJS += fe-protocol3.o fe-connect.o \ fe-exec.o pqexpbuffer.o fe-auth.o fe-misc.o fe-secure.o \ - fe-auth-scram.o fe-trace.o \ + fe-auth-scram.o fe-trace.o extensible_protocol.o \ $(filter getpeereid.o, $(LIBOBJS)) # Cloudberry OpenSSL objects follow @@ -53,7 +53,7 @@ ifeq ($(with_gssapi),yes) OBJS += fe-gssapi-common.o fe-secure-gssapi.o endif -fe-protocol3.c fe-connect.c fe-exec.c pqexpbuffer.c fe-auth.c fe-auth-scram.c fe-misc.c fe-protocol2.c fe-secure.c fe-secure-openssl.c fe-secure-common.c fe-secure-gssapi.c fe-gssapi-common.c fe-trace.c: % : $(top_srcdir)/src/interfaces/libpq/% +fe-protocol3.c fe-connect.c fe-exec.c pqexpbuffer.c fe-auth.c fe-auth-scram.c fe-misc.c fe-protocol2.c fe-secure.c fe-secure-openssl.c fe-secure-common.c fe-secure-gssapi.c fe-gssapi-common.c fe-trace.c extensible_protocol.c: % : $(top_srcdir)/src/interfaces/libpq/% rm -f $@ && $(LN_S) $< . getpeereid.c: % : $(top_srcdir)/src/port/% @@ -67,7 +67,7 @@ $(top_builddir)/src/port/pg_config_paths.h: clean distclean: clean-symlinks clean-symlinks: - rm -f fe-protocol3.c fe-connect.c fe-exec.c pqexpbuffer.c fe-auth.c fe-auth-scram.c fe-misc.c fe-protocol2.c fe-secure.c fe-secure-openssl.c fe-secure-common.c fe-secure-gssapi.c fe-gssapi-common.c fe-trace.c + rm -f fe-protocol3.c fe-connect.c fe-exec.c pqexpbuffer.c fe-auth.c fe-auth-scram.c fe-misc.c fe-protocol2.c fe-secure.c fe-secure-openssl.c fe-secure-common.c fe-secure-gssapi.c fe-gssapi-common.c fe-trace.c extensible_protocol.c rm -f getpeereid.c include $(top_srcdir)/src/backend/common.mk diff --git a/src/include/cdb/cdbcopy.h b/src/include/cdb/cdbcopy.h index f81fd98f74a..0062f6774da 100644 --- a/src/include/cdb/cdbcopy.h +++ b/src/include/cdb/cdbcopy.h @@ -23,6 +23,8 @@ #define COPYOUT_CHUNK_SIZE 16 * 1024 +struct pg_result; /* PGresult ... #include "libpq-fe.h" */ + struct CdbDispatcherState; struct CopyFromStateData; struct CopyToStateData; @@ -54,4 +56,10 @@ extern void cdbCopyEnd(CdbCopy *c, int64 *total_rows_completed_p, int64 *total_rows_rejected_p); +typedef void (*CopyProcessResult_hook_type) (struct pg_result *res); +extern PGDLLIMPORT CopyProcessResult_hook_type CopyProcessResult_hook; + +typedef void (*CdbCopyEnd_hook_type) (void); +extern PGDLLIMPORT CdbCopyEnd_hook_type CdbCopyEnd_hook; + #endif /* CDBCOPY_H */ diff --git a/src/include/cdb/cdbdisp.h b/src/include/cdb/cdbdisp.h index 9fac725c5fd..9a7de7f8975 100644 --- a/src/include/cdb/cdbdisp.h +++ b/src/include/cdb/cdbdisp.h @@ -26,6 +26,7 @@ struct CdbPgResults; struct Gang; /* #include "cdb/cdbgang.h" */ struct ResourceOwnerData; enum GangType; +enum ExecStatusType; /* * Types of message to QE when we wait for it. @@ -214,4 +215,12 @@ void AtSubAbort_DispatcherState(void); char * segmentsToContentStr(List *segments); +extern void +SetupDispatchFuncs(DispatcherInternalFuncs *dispatcherInternalFuncs); + +typedef bool (*PGResStausOK_hook_type) (enum ExecStatusType statusType); +extern PGDLLIMPORT PGResStausOK_hook_type PGResStausOK_hook; + +extern DispatcherInternalFuncs *pDispatchFuncs; + #endif /* CDBDISP_H */ diff --git a/src/include/cdb/cdbdisp_async.h b/src/include/cdb/cdbdisp_async.h index c5016dee187..0f5853cc3da 100644 --- a/src/include/cdb/cdbdisp_async.h +++ b/src/include/cdb/cdbdisp_async.h @@ -18,4 +18,19 @@ extern DispatcherInternalFuncs DispatcherAsyncFuncs; +extern void *cdbdisp_makeDispatchParams_async(int maxSlices, int largestGangSize, char *queryText, int len); + +extern bool cdbdisp_checkAckMessage_async(struct CdbDispatcherState *ds, const char *message, + int timeout_sec); + +extern void cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, + DispatchWaitMode waitMode); + +extern void cdbdisp_dispatchToGang_async(struct CdbDispatcherState *ds, + struct Gang *gp, + int sliceIndex); +extern void cdbdisp_waitDispatchFinish_async(struct CdbDispatcherState *ds); + +extern bool cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds); +extern int *cdbdisp_getWaitSocketFds_async(struct CdbDispatcherState *ds, int *nsocks); #endif diff --git a/src/include/cdb/cdbdispatchresult.h b/src/include/cdb/cdbdispatchresult.h index aeec4ea16bc..7bb112d752d 100644 --- a/src/include/cdb/cdbdispatchresult.h +++ b/src/include/cdb/cdbdispatchresult.h @@ -317,7 +317,5 @@ cdbdisp_makeDispatchResults(struct CdbDispatcherState *ds, void cdbdisp_clearCdbPgResults(CdbPgResults* cdb_pgresults); -typedef void (*ProcessDispatchResult_hook_type) (CdbDispatcherState*); -extern PGDLLIMPORT ProcessDispatchResult_hook_type ProcessDispatchResult_hook; #endif /* CDBDISPATCHRESULT_H */ diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 43682574b23..24eb6c3db70 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -43,7 +43,8 @@ OBJS = \ legacy-pqsignal.o \ libpq-events.o \ pqexpbuffer.o \ - fe-auth.o + fe-auth.o \ + extensible_protocol.o # File shared across all SSL implementations supported. ifneq ($(with_ssl),no) @@ -117,6 +118,7 @@ install: all installdirs install-lib $(INSTALL_DATA) $(srcdir)/libpq-events.h '$(DESTDIR)$(includedir)' $(INSTALL_DATA) $(srcdir)/libpq-int.h '$(DESTDIR)$(includedir_internal)' $(INSTALL_DATA) $(srcdir)/pqexpbuffer.h '$(DESTDIR)$(includedir_internal)' + $(INSTALL_DATA) $(srcdir)/extensible_protocol.h '$(DESTDIR)$(includedir_internal)' $(INSTALL_DATA) $(srcdir)/pg_service.conf.sample '$(DESTDIR)$(datadir)/pg_service.conf.sample' installcheck: @@ -130,6 +132,7 @@ uninstall: uninstall-lib rm -f '$(DESTDIR)$(includedir)/libpq-events.h' rm -f '$(DESTDIR)$(includedir_internal)/libpq-int.h' rm -f '$(DESTDIR)$(includedir_internal)/pqexpbuffer.h' + rm -f '$(DESTDIR)$(includedir_internal)/extensible_protocol.h' rm -f '$(DESTDIR)$(datadir)/pg_service.conf.sample' clean distclean: clean-lib diff --git a/src/interfaces/libpq/extensible_protocol.c b/src/interfaces/libpq/extensible_protocol.c new file mode 100644 index 00000000000..5d84a024316 --- /dev/null +++ b/src/interfaces/libpq/extensible_protocol.c @@ -0,0 +1,164 @@ +/* + * extensible_protocol.c + * Support for extensible protocol types + * + * + * Copyright (c) 2023, Cloudberry Database, HashData Technology Limited. + * + * IDENTIFICATION + * src/interfaces/libpq/extensible_protocol.c + */ +#ifndef FRONTEND + +#include "postgres.h" + +#include "extensible_protocol.h" +#include "utils/hsearch.h" + +static HTAB *extensible_protocol_types = NULL; + +typedef struct +{ + char extprotocoltype; + const void* extprotocolmethods; +} ExtensibleProtocolEntry; + +/* + * An internal function to register a new protocol + */ +static void +RegisterExtensibleProtocolEntry(const char* extprotocoltype, + const void *extprotocolmethods) +{ + ExtensibleProtocolEntry *entry; + bool found; + + if (extensible_protocol_types == NULL) + { + HASHCTL ctl; + + ctl.keysize = sizeof(ExtensibleProtocolEntry); + ctl.entrysize = sizeof(ExtensibleProtocolEntry); + + extensible_protocol_types = hash_create("Extensible Protocol Hash", 100, &ctl, + HASH_ELEM | HASH_STRINGS); + } + + entry = (ExtensibleProtocolEntry *) hash_search(extensible_protocol_types, + extprotocoltype, + HASH_ENTER, &found); + + if (found) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("extensible protocol type \"%s\" already exists", + extprotocoltype))); + + entry->extprotocolmethods = extprotocolmethods; +} + +/* + * Register a new type of protocol. + */ +void +RegisterExtensibleProtocolMethods(const ExtensibleProtocolMethods *methods) +{ + char *extprotocoltype = malloc(2 * sizeof(char)); + extprotocoltype[0] = methods->extprotocoltype; + extprotocoltype[1] = '\0'; + + RegisterExtensibleProtocolEntry(extprotocoltype, + methods); +} + +/* + * An internal routine to get an ExtensibleProtocolEntry by the given identifier + */ +static const void * +GetExtensibleProtocolEntry(const char* extprotocoltype, bool missing_ok) +{ + ExtensibleProtocolEntry *entry = NULL; + + if (extensible_protocol_types != NULL) + entry = (ExtensibleProtocolEntry *) hash_search(extensible_protocol_types, + extprotocoltype, + HASH_FIND, NULL); + if (!entry) + { + if (missing_ok) + return NULL; + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("ExtensibleProtocolMethods \"%s\" was not registered", + extprotocoltype))); + } + + return entry->extprotocolmethods; +} + +/* + * Get the methods for a given type of extensible protocol. + */ +const ExtensibleProtocolMethods * +GetExtensibleProtocolMethods(const char extprotocoltype, bool missing_ok) +{ + char *extprotocoltype_text = malloc(2 * sizeof(char)); + extprotocoltype_text[0] = extprotocoltype; + extprotocoltype_text[1] = '\0'; + + + return (const ExtensibleProtocolMethods *) + GetExtensibleProtocolEntry(extprotocoltype_text, + missing_ok); +} + +/* + * An internal function to unregister a protocol + */ +static const void* +UnregisterExtensibleProtocolEntry(const char* extprotocoltype, bool missing_ok) +{ + ExtensibleProtocolEntry *entry; + + if (extensible_protocol_types == NULL) + { + if (missing_ok) + return NULL; + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("ExtensibleProtocolMethods \"%s\" was not registered", + extprotocoltype))); + } + + entry = (ExtensibleProtocolEntry *) hash_search(extensible_protocol_types, + extprotocoltype, + HASH_REMOVE, NULL); + + if (!entry) + { + if (missing_ok) + return NULL; + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("ExtensibleProtocolMethods \"%s\" was not registered", + extprotocoltype))); + } + + return entry->extprotocolmethods; +} + +/* + * Unregister a protocol type + */ +void +UnegisterExtensibleProtocolMethods(const ExtensibleProtocolMethods *methods) +{ + char *extprotocoltype_text = malloc(2 * sizeof(char)); + extprotocoltype_text[0] = methods->extprotocoltype; + extprotocoltype_text[1] = '\0'; + + UnregisterExtensibleProtocolEntry(extprotocoltype_text, + methods); +} + +#endif \ No newline at end of file diff --git a/src/interfaces/libpq/extensible_protocol.h b/src/interfaces/libpq/extensible_protocol.h new file mode 100644 index 00000000000..34e5fc25301 --- /dev/null +++ b/src/interfaces/libpq/extensible_protocol.h @@ -0,0 +1,38 @@ +/* + * extensible_protocol.h + * Definitions for extensible protocol types + * + * + * Copyright (c) 2023, Cloudberry Database, HashData Technology Limited. + * + * IDENTIFICATION + * src/interfaces/libpq/extensible_protocol.h + */ +#ifndef FRONTEND + +#ifndef PG_EXTENSIBLE_PROTOCOL_H +#define PG_EXTENSIBLE_PROTOCOL_H + +#include "libpq-fe.h" + +typedef struct ExtensibleProtocol +{ + const char extprotocoltype; /* identifier of ExtensibleProtocolMethods */ +} ExtensibleProtocol; + +typedef struct ExtensibleProtocolMethods +{ + const char extprotocoltype; + + int (*protocolRecv) (PGconn *conn, int msgLength); +} ExtensibleProtocolMethods; + +extern void +RegisterExtensibleProtocolMethods(const ExtensibleProtocolMethods *methods); +extern const ExtensibleProtocolMethods * +GetExtensibleProtocolMethods(const char extprotocoltype, bool missing_ok); +extern void +UnegisterExtensibleProtocolMethods(const ExtensibleProtocolMethods *methods); + +#endif +#endif \ No newline at end of file diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 85696712b38..9bc3002c111 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -64,8 +64,6 @@ static bool static_std_strings = false; static PGEvent *dupEvents(PGEvent *events, int count, size_t *memSize); -static bool pqAddTuple(PGresult *res, PGresAttValue *tup, - const char **errmsgp); static int PQsendQueryInternal(PGconn *conn, const char *query, bool newQuery); bool PQsendQueryStart(PGconn *conn, bool newQuery); @@ -925,7 +923,7 @@ pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...) * On error, *errmsgp can be set to an error string to be returned. * If it is left NULL, the error is presumed to be "out of memory". */ -static bool +bool pqAddTuple(PGresult *res, PGresAttValue *tup, const char **errmsgp) { if (res->ntups >= res->tupArrSize) diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 15d2197bfd0..9c6e9916d2f 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -64,6 +64,10 @@ static int pqSocketCheck(PGconn *conn, int forRead, int forWrite, time_t end_time); static int pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time); +#ifndef FRONTEND +static int32 last_assigned_exec_status_type = PGRES_LAST_DEFAULT; +#endif + /* * PQlibVersion: return the libpq version number */ @@ -1390,3 +1394,21 @@ libpq_ngettext(const char *msgid, const char *msgid_plural, unsigned long n) } #endif /* ENABLE_NLS */ + +#ifndef FRONTEND +/* + * When we need to add a new exec status in extension, we should + * call add_exec_status_type to get a slot, then init the slot. + */ +ExecStatusType +add_exec_status_type(void) +{ + if (last_assigned_exec_status_type >= INT32_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("extensible exec status types limit exceeded"))); + + last_assigned_exec_status_type++; + return (ExecStatusType) last_assigned_exec_status_type; +} +#endif diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 9d74dd0e39d..39ad12f2b68 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -40,6 +40,7 @@ #include "mb/pg_wchar.h" #include "port/pg_bswap.h" #include "cdb/cdbpq.h" +#include "extensible_protocol.h" /* * This macro lists the backend message types that could be "long" (more @@ -85,6 +86,7 @@ pqParseInput3(PGconn *conn) int i; int64 numRejected = 0; int64 numCompleted = 0; + const ExtensibleProtocolMethods *extensibleProtocolMethods; #endif @@ -588,16 +590,28 @@ pqParseInput3(PGconn *conn) break; #endif default: - appendPQExpBuffer(&conn->errorMessage, - libpq_gettext("unexpected response from server; first received character was \"%c\"\n"), - id); - /* build an error result holding the error message */ - pqSaveErrorResult(conn); - /* not sure if we will see more, so go to ready state */ - conn->asyncStatus = PGASYNC_READY; - /* Discard the unexpected message */ - conn->inCursor += msgLength; - break; +#ifndef FRONTEND + extensibleProtocolMethods = GetExtensibleProtocolMethods(id, true); + if (extensibleProtocolMethods) + { + if (extensibleProtocolMethods->protocolRecv(conn, msgLength)) + return; + break; + } + else +#endif + { + appendPQExpBuffer(&conn->errorMessage, + libpq_gettext("unexpected response from server; first received character was \"%c\"\n"), + id); + /* build an error result holding the error message */ + pqSaveErrorResult(conn); + /* not sure if we will see more, so go to ready state */ + conn->asyncStatus = PGASYNC_READY; + /* Discard the unexpected message */ + conn->inCursor += msgLength; + break; + } } /* switch on protocol character */ } /* Successfully consumed this message */ diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index 426bc8717d1..6a984aa31b0 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -90,7 +90,7 @@ typedef enum * compatibility */ } PostgresPollingStatusType; -typedef enum +typedef enum ExecStatusType { PGRES_EMPTY_QUERY = 0, /* empty query string was executed */ PGRES_COMMAND_OK, /* a query command that doesn't return @@ -110,6 +110,10 @@ typedef enum PGRES_PIPELINE_SYNC, /* pipeline synchronization point */ PGRES_PIPELINE_ABORTED /* Command didn't run because of an abort * earlier in a pipeline */ +#ifndef FRONTEND + /* if you add a new exec status kind, remember to update "last default" too */ + , PGRES_LAST_DEFAULT = PGRES_PIPELINE_ABORTED +#endif } ExecStatusType; typedef enum @@ -678,6 +682,9 @@ extern PQsslKeyPassHook_OpenSSL_type PQgetSSLKeyPassHook_OpenSSL(void); extern void PQsetSSLKeyPassHook_OpenSSL(PQsslKeyPassHook_OpenSSL_type hook); extern int PQdefaultSSLKeyPassHook_OpenSSL(char *buf, int size, PGconn *conn); +#ifndef FRONTEND +extern ExecStatusType add_exec_status_type(void); +#endif #ifdef __cplusplus } #endif diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 70094e5fb70..d406cdc2466 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -908,6 +908,8 @@ extern void pqTraceOutputMessage(PGconn *conn, const char *message, bool toServer); extern void pqTraceOutputNoTypeByteMessage(PGconn *conn, const char *message); +extern bool pqAddTuple(PGresult *res, PGresAttValue *tup, const char **errmsgp); + /* === miscellaneous macros === */ /* diff --git a/src/interfaces/libpq/nls.mk b/src/interfaces/libpq/nls.mk index a7e54cd3258..5b90ce1bb53 100644 --- a/src/interfaces/libpq/nls.mk +++ b/src/interfaces/libpq/nls.mk @@ -1,6 +1,6 @@ # src/interfaces/libpq/nls.mk CATALOG_NAME = libpq AVAIL_LANGUAGES = cs de el es fr ja ko ru sv uk zh_CN -GETTEXT_FILES = fe-auth.c fe-auth-scram.c fe-connect.c fe-exec.c fe-gssapi-common.c fe-lobj.c fe-misc.c fe-protocol3.c fe-secure.c fe-secure-common.c fe-secure-gssapi.c fe-secure-openssl.c win32.c +GETTEXT_FILES = fe-auth.c fe-auth-scram.c fe-connect.c fe-exec.c fe-gssapi-common.c fe-lobj.c fe-misc.c fe-protocol3.c fe-secure.c fe-secure-common.c fe-secure-gssapi.c fe-secure-openssl.c win32.c extensible_protocol.c GETTEXT_TRIGGERS = libpq_gettext pqInternalNotice:2 GETTEXT_FLAGS = libpq_gettext:1:pass-c-format pqInternalNotice:2:c-format From a0c6b4f468c97643863ba57eace2721bf9023703 Mon Sep 17 00:00:00 2001 From: roseduan Date: Wed, 20 Sep 2023 15:53:47 +0800 Subject: [PATCH 014/152] Add vacuum full in serverless (#209) 1. In serverless architecture, we do not need to dispatch the vacuum command. 2. Make T_ExtensibleNode in CMD_TAG list, which is needed by CreateCommandTag inutility.c. We can not hook it because it executes before standard_ProcessUtility function. Co-authored-by: roseduan --- src/backend/commands/vacuum.c | 11 ++++++++--- src/backend/tcop/utility.c | 4 ++++ src/include/tcop/cmdtaglist.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c113f274b81..1c6aef616c8 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2318,8 +2318,13 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, int ao_vacuum_phase; int save_sec_context; int save_nestlevel; - bool is_appendoptimized; - bool is_toast; + bool is_appendoptimized; + bool is_toast; + bool shouldDispatch; + + shouldDispatch = (Gp_role == GP_ROLE_DISPATCH && + ENABLE_DISPATCH() && + !enable_serverless); Assert(params != NULL); @@ -2829,7 +2834,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * Don't dispatch auto-vacuum. Each segment performs auto-vacuum as per * its own need. */ - if ((Gp_role == GP_ROLE_DISPATCH || IS_SINGLENODE()) && !recursing && + if ((shouldDispatch || IS_SINGLENODE()) && !recursing && !IsAutoVacuumWorkerProcess() && (!is_appendoptimized || ao_vacuum_phase)) { diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 6a0d1b38f87..4e8b3a778ec 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -4075,6 +4075,10 @@ CreateCommandTag(Node *parsetree) tag = CMDTAG_DROP_WAREHOUSE; break; + case T_ExtensibleNode: + tag = CMDTAG_EXTENSIBLE; + break; + default: elog(WARNING, "unrecognized node type: %d", (int) nodeTag(parsetree)); diff --git a/src/include/tcop/cmdtaglist.h b/src/include/tcop/cmdtaglist.h index ca3e6023d5b..440c4843bb4 100644 --- a/src/include/tcop/cmdtaglist.h +++ b/src/include/tcop/cmdtaglist.h @@ -225,6 +225,7 @@ PG_CMDTAG(CMDTAG_DROP_WAREHOUSE, "DROP WAREHOUSE", true, false, false) PG_CMDTAG(CMDTAG_EXECUTE, "EXECUTE", false, false, false) PG_CMDTAG(CMDTAG_EXPLAIN, "EXPLAIN", false, false, false) +PG_CMDTAG(CMDTAG_EXTENSIBLE, "EXTENSIBLE", false, false, false) PG_CMDTAG(CMDTAG_FAULT_INJECT, "FAULT_INJECT", false, false, false) PG_CMDTAG(CMDTAG_FETCH, "FETCH", false, false, true) PG_CMDTAG(CMDTAG_GRANT, "GRANT", true, false, false) From 41659a18bbbe0670fc5f77ac0e999d33cce9b14b Mon Sep 17 00:00:00 2001 From: wangliang03 <140247334+wangliang03@users.noreply.github.com> Date: Thu, 21 Sep 2023 10:59:12 +0800 Subject: [PATCH 015/152] =?UTF-8?q?change=20storage=5Fam=20related=20catal?= =?UTF-8?q?og=20table=20main=5Fmanifest=20field=20type=20from=E2=80=A6=20(?= =?UTF-8?q?#210)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change storage_am related catalog table main_manifest field type from uint32 to uint64, and change the name from relid to relnode Co-authored-by: xiaosongwang --- src/backend/catalog/dependency.c | 2 +- src/backend/catalog/main_manifest.c | 10 +++++----- src/include/catalog/main_manifest.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 209167bcaa7..71a21135afd 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1596,7 +1596,7 @@ doDeletion(const ObjectAddress *object, int flags) RemoveTaskById(object->objectId); break; case OCLASS_MAIN_MANIFEST: - RemoveMainManifestByRelid(object->objectId); + RemoveMainManifestByRelnode(object->objectId); break; case OCLASS_MATVIEW_AUX: diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index edf325d919d..d08326953ff 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -21,11 +21,11 @@ #include "utils/rel.h" /* - * RemoveMainManifestByRelid - * Remove the main manifest record for the relid. + * RemoveMainManifestByRelnode + * Remove the main manifest record for the relnode. */ void -RemoveMainManifestByRelid(Oid relid) +RemoveMainManifestByRelnode(RelFileNodeId relnode) { Relation main_manifest; HeapTuple tuple; @@ -33,8 +33,8 @@ RemoveMainManifestByRelid(Oid relid) ScanKeyData scanKey[1]; main_manifest = table_open(ManifestRelationId, RowExclusiveLock); - ScanKeyInit(&scanKey[0], Anum_main_manifest_relid, BTEqualStrategyNumber, - F_OIDEQ, ObjectIdGetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_main_manifest_relnode, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(relnode)); scanDescriptor = systable_beginscan(main_manifest, InvalidOid, false, NULL, 1, scanKey); diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index 6a4dbf39375..4325ebd3e3b 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -24,12 +24,12 @@ */ CATALOG(main_manifest,9004,ManifestRelationId) BKI_SHARED_RELATION { - Oid relid; + RelFileNodeId relnode; text path; } FormData_main_manifest; typedef FormData_main_manifest *Form_main_manifest; -extern void RemoveMainManifestByRelid(Oid relid); +extern void RemoveMainManifestByRelnode(RelFileNodeId relnode); #endif /* MAIN_MANIFEST.h */ From 8f4a8dfb3bb7e5615e272ee38011b30b14475a4d Mon Sep 17 00:00:00 2001 From: zhangwenchao <53178068+wenchaozhang-123@users.noreply.github.com> Date: Thu, 21 Sep 2023 13:38:28 +0800 Subject: [PATCH 016/152] Add regress pipeline for branch union_store_catalog and fix compile error. (#211) --- src/test/regress/expected/misc_sanity.out | 6 ++++-- src/test/regress/expected/sanity_check.out | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/misc_sanity.out b/src/test/regress/expected/misc_sanity.out index 88ae3d4e927..08d05a5e0d6 100644 --- a/src/test/regress/expected/misc_sanity.out +++ b/src/test/regress/expected/misc_sanity.out @@ -106,6 +106,7 @@ ORDER BY 1, 2; gp_configuration_history | desc | text gp_version_at_initdb | productversion | text gp_warehouse | warehouse_name | text + main_manifest | path | text pg_attribute | attacl | aclitem[] pg_attribute | attfdwoptions | text[] pg_attribute | attmissingval | anyarray @@ -137,7 +138,7 @@ ORDER BY 1, 2; pg_task_run_history | return_message | text pg_task_run_history | status | text pg_task_run_history | username | text -(34 rows) +(35 rows) -- system catalogs without primary keys -- @@ -160,6 +161,7 @@ ORDER BY 1; gp_partition_template gp_version_at_initdb gp_warehouse + main_manifest pg_appendonly pg_attribute_encoding pg_auth_time_constraint @@ -179,7 +181,7 @@ ORDER BY 1; pg_stat_last_operation pg_stat_last_shoperation pg_type_encoding -(28 rows) +(29 rows) -- system catalog unique indexes not wrapped in a constraint -- (There should be none.) diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 4d47bca866b..73506e528b0 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -76,6 +76,7 @@ kd_point_tbl|t line_tbl|f log_table|f lseg_tbl|f +main_manifest|f main_table|f mlparted|f mlparted1|f From 295eb56ef63b06020ef5d102e024966fca7706f9 Mon Sep 17 00:00:00 2001 From: Xun Gong Date: Tue, 26 Sep 2023 13:45:50 +0800 Subject: [PATCH 017/152] bugfix: support hashdata tableam in Orca (#222) RetrieveRelStorageType add a magic number 7015. we use the am_id(7015) we assigned to the custom table am, and let the orca optimizer treat this columnar storage format as AOCS to generate an execution plan --- .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 299c2881313..17ce92dcefa 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -2542,6 +2542,19 @@ CTranslatorRelcacheToDXL::RetrieveRelStorageType(Relation rel) case PAX_AM_OID: rel_storage_type = IMDRelation::ErelstoragePAX; break; + // FIXME: need to add support for custom table am!!! + // + // Why 7015 here? + // Because we defined a custom table am using columnar storage, + // the orca optimizer does not support am other than HEAP/AO/AOCS. At present, + // there is no way to extend orca to support custom table am. So here we use + // the am_id(7015) we assigned to the custom table am, and let the orca optimizer + // treat this columnar storage format as AOCS to generate an execution plan + // + // Why use the magic number 7015 instead of the macro definition? + // Just to make it look like it doesn't make sense, + // so others will notice that the logic needs to be refactored + case 7015: case AO_COLUMN_TABLE_AM_OID: rel_storage_type = IMDRelation::ErelstorageAppendOnlyCols; break; From d4e979fde6e645cf3117e29d88d02e74bc69ef93 Mon Sep 17 00:00:00 2001 From: Xun Gong Date: Tue, 26 Sep 2023 17:54:33 +0800 Subject: [PATCH 018/152] support analyze for unionstore table in cloudberry (#207) use new struct AnalyzeContext instead of gp_acquire_sample_rows_context to pass analyze context in table_beginscan_analyze --- src/backend/access/aocs/aocsam_handler.c | 2 +- src/backend/access/appendonly/appendonlyam.c | 2 +- .../access/appendonly/appendonlyam_handler.c | 2 +- src/backend/access/heap/heapam.c | 2 +- src/backend/access/table/tableam.c | 4 +- src/backend/commands/analyze.c | 57 ++++++++++++++++--- src/include/access/heapam.h | 2 +- src/include/access/tableam.h | 31 ++++++---- src/include/cdb/cdbappendonlyam.h | 4 +- src/include/cdb/cdbdisp_query.h | 2 +- src/include/utils/rel.h | 4 ++ 11 files changed, 82 insertions(+), 30 deletions(-) diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 504e9c0fb7a..f35252cf56a 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -707,7 +707,7 @@ aoco_beginscan(Relation relation, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, - uint32 flags) + uint32 flags, void *ctx) { AOCSScanDesc aoscan; diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 2f590a8678e..d8d85c07fe6 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1495,7 +1495,7 @@ appendonly_beginscan(Relation relation, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, - uint32 flags) + uint32 flags, void * ctx) { Snapshot appendOnlyMetaDataSnapshot; int segfile_count; diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 0136d4a850f..153d046f9ed 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -1416,7 +1416,7 @@ appendonly_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, /* Scan through old table to convert data into tuples for sorting */ slot = table_slot_create(OldHeap, NULL); aoscandesc = appendonly_beginscan(OldHeap, GetActiveSnapshot(), 0, NULL, - NULL, 0); + NULL, 0, NULL); mt_bind = create_memtuple_binding(oldTupDesc); while (appendonly_getnextslot(aoscandesc, ForwardScanDirection, slot)) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 048ce9231a9..e6f6ec9fec2 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1243,7 +1243,7 @@ TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, - uint32 flags) + uint32 flags, void *ctx) { HeapScanDesc scan; diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 6ff94ac7d76..40d66d68fe8 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -120,7 +120,7 @@ table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, - NULL, flags); + NULL, flags, NULL); } void @@ -252,7 +252,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) } return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, - parallel_scan, flags); + parallel_scan, flags, NULL); } diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7a6428627f6..25f89740c1e 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -203,6 +203,8 @@ static void acquire_hll_by_query(Relation onerel, int nattrs, VacAttrStats **att static int16 AcquireCountOfSegmentFile(Relation onerel); +void parse_record_to_string(char *string, TupleDesc tupdesc, char** values, bool *nulls); + /* * analyze_rel() -- analyze one relation * @@ -1674,12 +1676,22 @@ acquire_sample_rows(Relation onerel, int elevel, Assert(targrows > 0); if (Gp_role == GP_ROLE_DISPATCH && - onerel->rd_cdbpolicy && !GpPolicyIsEntry(onerel->rd_cdbpolicy)) - { - /* Fetch sample from the segments. */ - return acquire_sample_rows_dispatcher(onerel, false, elevel, - rows, targrows, - totalrows, totaldeadrows); + onerel->rd_cdbpolicy && !GpPolicyIsEntry(onerel->rd_cdbpolicy)) + { + int flags = 0; + VacuumStmt *stmt = makeNode(VacuumStmt); + stmt->is_vacuumcmd = false; + if(CdbNeedDispatchUtility_hook && !CdbNeedDispatchUtility_hook((Node*)stmt, &flags)) + { + pfree(stmt); + } + else + { + pfree(stmt); + /* Fetch sample from the segments. */ + return acquire_sample_rows_dispatcher( + onerel, false, elevel, rows, targrows, totalrows, totaldeadrows); + } } /* @@ -1744,7 +1756,13 @@ acquire_sample_rows(Relation onerel, int elevel, /* Prepare for sampling rows */ reservoir_init_selection_state(&rstate, targrows); - scan = table_beginscan_analyze(onerel); + AnalyzeContext ctx; + if(Gp_role == GP_ROLE_DISPATCH) + { + ctx.targrows = targrows; + } + + scan = table_beginscan_analyze(onerel, &ctx); slot = table_slot_create(onerel, NULL); #ifdef USE_PREFETCH @@ -1974,6 +1992,29 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, ListCell *lc; bool has_child; + /* + * Like in acquire_sample_rows(), if we're in the QD, fetch the sample + * from segments. + */ + if (Gp_role == GP_ROLE_DISPATCH) + { + int flags = 0; + VacuumStmt *stmt = makeNode(VacuumStmt); + stmt->is_vacuumcmd = false; + if(CdbNeedDispatchUtility_hook && !CdbNeedDispatchUtility_hook((Node*)stmt, &flags)) + { + pfree(stmt); + } + else + { + pfree(stmt); + return acquire_sample_rows_dispatcher(onerel, + true, /* inherited stats */ + elevel, rows, targrows, + totalrows, totaldeadrows); + } + } + /* * Find all members of inheritance set. We only need AccessShareLock on * the children. @@ -2458,7 +2499,7 @@ acquire_index_number_of_blocks(Relation indexrel, Relation tablerel) * CDB: a copy of record_in, but only parse the record string * into separate strs for each column. */ -static void +void parse_record_to_string(char *string, TupleDesc tupdesc, char** values, bool *nulls) { char *ptr; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 40f24ca1b7f..efae57af806 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -119,7 +119,7 @@ typedef enum extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, - uint32 flags); + uint32 flags, void *ctx); extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk, BlockNumber numBlks); extern void heapgetpage(TableScanDesc scan, BlockNumber page); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 3b28e292479..324bdf65327 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -55,6 +55,10 @@ struct ValidateIndexState; #define SCAN_SUPPORT_VECTORIZATION (1 << 2) /* support vectorization scan */ #define SCAN_FORCE_BIG_WRITE_LOCK (1 << 3) /* force big write lock */ +typedef struct AnalyzeContext{ + int32 targrows; +} AnalyzeContext; + /* * Bitmask values for the flags argument to the scan_begin callback. */ @@ -355,12 +359,15 @@ typedef struct TableAmRoutine * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be * specified, an AM may ignore unsupported ones) and whether the snapshot * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT). + * + * + * `ctx` is a context pointer that can be used to pass information from analyze or other scan types. */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, - uint32 flags); + uint32 flags, void * ctx); /* * GPDB: Extract columns for scan from targetlist and quals. This is mainly @@ -1015,7 +1022,7 @@ table_beginscan(Relation rel, Snapshot snapshot, uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags, NULL); } /* @@ -1060,8 +1067,8 @@ table_beginscan_es(Relation relation, Snapshot snapshot, ps, flags); return relation->rd_tableam->scan_begin(relation, snapshot, - nkeys, key, - parallel_scan, flags); + 0, NULL, + parallel_scan, flags, NULL); } /* @@ -1090,7 +1097,7 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, if (allow_sync) flags |= SO_ALLOW_SYNC; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags, NULL); } /* @@ -1105,7 +1112,7 @@ table_beginscan_bm(Relation rel, Snapshot snapshot, { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags, NULL); } /* @@ -1127,7 +1134,7 @@ table_beginscan_bm_ecs(Relation rel, Snapshot snapshot, bitmapqualorig, flags); - return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags, NULL); } /* @@ -1152,7 +1159,7 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, if (allow_pagemode) flags |= SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags, NULL); } /* @@ -1165,7 +1172,7 @@ table_beginscan_tid(Relation rel, Snapshot snapshot) { uint32 flags = SO_TYPE_TIDSCAN; - return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags, NULL); } /* @@ -1174,11 +1181,11 @@ table_beginscan_tid(Relation rel, Snapshot snapshot) * the same data structure although the behavior is rather different. */ static inline TableScanDesc -table_beginscan_analyze(Relation rel) +table_beginscan_analyze(Relation rel, AnalyzeContext *ctx) { uint32 flags = SO_TYPE_ANALYZE; - return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags); + return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags, (void*) ctx); } /* @@ -1258,7 +1265,7 @@ table_beginscan_tidrange(Relation rel, Snapshot snapshot, TableScanDesc sscan; uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; - sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags); + sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags, NULL); /* Set the range of TIDs to scan */ sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 125c1de4d23..d4d94925565 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -439,13 +439,13 @@ extern TableScanDesc appendonly_beginscan(Relation relation, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, - uint32 flags); + uint32 flags,void * ctx); extern TableScanDesc appendonly_beginscan_extractcolumns(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc parallel_scan, PlanState *ps, - uint32 flags); + uint32 flags);) extern void appendonly_rescan(TableScanDesc scan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode); diff --git a/src/include/cdb/cdbdisp_query.h b/src/include/cdb/cdbdisp_query.h index 5444c32e66c..9a2d03e4d48 100644 --- a/src/include/cdb/cdbdisp_query.h +++ b/src/include/cdb/cdbdisp_query.h @@ -47,7 +47,7 @@ typedef bool (*CdbNeedDispatchCommand_hook_type) (const char *strCommand, int *flags, List *segments, struct CdbPgResults *cdb_pgresults); -extern PGDLLIMPORT CdbNeedDispatchCommand_hook_type CdbNeedDispatchCommand_hook; +extern PGDLLIMPORT CdbNeedDispatchCommand_hook_type CdbNeedDispatchCommand_hook; typedef bool (*CdbNeedDispatchUtility_hook_type) (struct Node *stmt, int *flags); extern PGDLLIMPORT CdbNeedDispatchUtility_hook_type CdbNeedDispatchUtility_hook; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index d0b8c5cb473..32ac9251d63 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -21,6 +21,7 @@ #include "access/xlog.h" #include "catalog/pg_am.h" #include "catalog/pg_appendonly.h" +#include "catalog/pg_am.h" #include "catalog/pg_class.h" #include "catalog/pg_index.h" #include "catalog/pg_publication.h" @@ -482,6 +483,9 @@ typedef struct ViewOptions #define RelationIsHeap(relation) \ ((relation)->rd_amhandler == F_HEAP_TABLEAM_HANDLER) +#define RelationIsNonblockRelation(relation) \ + ((relation)->rd_tableam && (relation)->rd_rel->relam != HEAP_TABLE_AM_OID) + #define AMHandlerIsAoRows(amhandler) ((amhandler) == F_AO_ROW_TABLEAM_HANDLER) #define AMHandlerIsAoCols(amhandler) \ ((amhandler) == F_AO_COLUMN_TABLEAM_HANDLER) From 1a22c6d0de14cf980bcf87a0a5cce3b379466a16 Mon Sep 17 00:00:00 2001 From: wangliang03 <140247334+wangliang03@users.noreply.github.com> Date: Thu, 28 Sep 2023 10:43:10 +0800 Subject: [PATCH 019/152] New altertable rewrite dispatch policy (#223) Support Altertable dispatch rewrite hook, do dispatch for every rewrite table remove the original dispatch routine after all the work done on QD Co-authored-by: xiaosongwang --- src/backend/commands/tablecmds.c | 8 ++++++-- src/include/commands/tablecmds.h | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 75ac387e602..73ee36b710b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -356,7 +356,7 @@ static AlterTableCmd *ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, static void ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode, AlterTableUtilityContext *context); -static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode); +void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode); static void ATAocsWriteSegFileNewColumns( AOCSAddColumnDesc idesc, AOCSHeaderScanDesc sdesc, AlteredTableInfo *tab, ExprContext *econtext, TupleTableSlot *slot, const char *relname); @@ -582,6 +582,7 @@ static bool prebuild_temp_table(Relation rel, RangeVar *tmpname, DistributedBy * char *amname, List *opts, bool isTmpTableAo, bool useExistingColumnAttributes); +ATRewriteTable_hook_type ATRewriteTable_hook = NULL; static void checkATSetDistributedByStandalone(AlteredTableInfo *tab, Relation rel); static void populate_rel_col_encodings(Relation rel, List *stenc, List *withOptions, Oid newAm); @@ -7362,7 +7363,7 @@ ATAocsWriteNewColumns(AlteredTableInfo *tab) * * OIDNewHeap is InvalidOid if we don't need to rewrite */ -static void +void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) { Relation oldrel; @@ -7379,6 +7380,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) int ti_options; ExprState *partqualstate = NULL; + if (ATRewriteTable_hook) + ATRewriteTable_hook(tab, OIDNewHeap, lockmode); + /* * Open the relation(s). We have surely already locked the existing * table. diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 202beaf56d9..eae233a6776 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -19,6 +19,7 @@ #include "catalog/pg_am.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "nodes/altertablenodes.h" #include "nodes/execnodes.h" #include "access/htup.h" #include "catalog/dependency.h" @@ -135,4 +136,8 @@ extern void GpRenameChildPartitions(Relation targetrelation, extern void set_random_distribution_if_drop_distkey(Relation rel, AttrNumber attnum); extern Datum get_rel_opts(Relation rel); + +typedef void (*ATRewriteTable_hook_type)(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode); +extern PGDLLIMPORT ATRewriteTable_hook_type ATRewriteTable_hook; + #endif /* TABLECMDS_H */ From 76aa413fe2ae5cd6e045379f5d57f2738ce4a327 Mon Sep 17 00:00:00 2001 From: HuSen Date: Sun, 8 Oct 2023 12:04:33 +0800 Subject: [PATCH 020/152] Fix: do not commit subtransaction through DTX protocol. (#226) Co-authored-by: leo --- src/backend/access/transam/xact.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4f1923dfdb4..796a91d951b 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5714,7 +5714,7 @@ ReleaseCurrentSubTransaction(void) if (Gp_role == GP_ROLE_DISPATCH) { - if (!doDispatchSubtransactionInternalCmd( + if (!NotifySubTransaction_hook && !doDispatchSubtransactionInternalCmd( DTX_PROTOCOL_COMMAND_SUBTRANSACTION_RELEASE_INTERNAL)) { elog(ERROR, From ca08709489ed3363d416879a9db5fbf9057397a1 Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Tue, 10 Oct 2023 11:52:41 +0800 Subject: [PATCH 021/152] Add: new hooks for plugins to get control in syscache. 1. Hook 'SearchCatCache_hook' for plugins to get control in SearchCatCache. 2. Hook 'ReleaseCatCache_hook' for plugins to get control in ReleaseCatCache. --- src/backend/utils/cache/catcache.c | 17 ++++++++++++++--- src/include/utils/catcache.h | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 60f643c2d87..65dcb1f2a83 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -61,6 +61,12 @@ #define CACHE_elog(...) #endif +/* Hook for plugins to get control in SearchCatCache */ +SearchCatCache_hook_type SearchCatCache_hook = NULL; + +/* Hook for plugins to get control in ReleaseCatCache */ +ReleaseCatCache_hook_type ReleaseCatCache_hook = NULL; + /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; @@ -89,7 +95,6 @@ static void CatCachePrintStats(int code, Datum arg); #endif static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct); static void CatCacheRemoveCList(CatCache *cache, CatCList *cl); -static void CatalogCacheInitializeCache(CatCache *cache); static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, uint32 hashValue, Index hashIndex, @@ -920,7 +925,7 @@ do { \ #define CatalogCacheInitializeCache_DEBUG2 #endif -static void +void CatalogCacheInitializeCache(CatCache *cache) { Relation relation; @@ -1081,7 +1086,7 @@ InitCatCachePhase2(CatCache *cache, bool touch_index) * authentication even if we don't yet have relcache entries for those * catalogs' indexes. */ -static bool +bool IndexScanOK(CatCache *cache, ScanKey cur_skey) { switch (cache->id) @@ -1270,6 +1275,9 @@ SearchCatCacheInternal(CatCache *cache, Assert(cache->cc_nkeys == nkeys); + if (SearchCatCache_hook) + return (*SearchCatCache_hook)(cache, nkeys, v1, v2, v3, v4); + /* * one-time startup overhead for each cache */ @@ -1506,6 +1514,9 @@ SearchCatCacheMiss(CatCache *cache, void ReleaseCatCache(HeapTuple tuple) { + if (ReleaseCatCache_hook) + return (*ReleaseCatCache_hook)(tuple); + CatCTup *ct = (CatCTup *) (((char *) tuple) - offsetof(CatCTup, tuple)); diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 41942f8583c..e62c2d4018b 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -185,6 +185,18 @@ typedef struct catcacheheader int ch_ntup; /* # of tuples in all caches */ } CatCacheHeader; +/* Hook for plugins to get control in SearchCatCache */ +typedef HeapTuple (*SearchCatCache_hook_type)(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3, + Datum v4); +extern PGDLLIMPORT SearchCatCache_hook_type SearchCatCache_hook; + +/* Hook for plugins to get control in ReleaseCatCache */ +typedef void (*ReleaseCatCache_hook_type)(HeapTuple tuple); +extern PGDLLIMPORT ReleaseCatCache_hook_type ReleaseCatCache_hook; /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; @@ -194,8 +206,13 @@ extern void CreateCacheMemoryContext(void); extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid, int nkeys, const int *key, int nbuckets); + +extern void CatalogCacheInitializeCache(CatCache *cache); + extern void InitCatCachePhase2(CatCache *cache, bool touch_index); +extern bool IndexScanOK(CatCache *cache, ScanKey cur_skey); + extern HeapTuple SearchCatCache(CatCache *cache, Datum v1, Datum v2, Datum v3, Datum v4); extern HeapTuple SearchCatCache1(CatCache *cache, From 68a1212ca3ea9cf4503ed4376378b3f15f118a56 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Mon, 9 Oct 2023 16:58:51 +0800 Subject: [PATCH 022/152] Change myTempNamespace from static variable to extern variable. --- src/backend/catalog/namespace.c | 8 ++++---- src/include/catalog/namespace.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index f367b00a675..1512e550974 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -171,7 +171,7 @@ static bool baseTempCreationPending = false; static Oid namespaceUser = InvalidOid; /* The above four values are valid only if baseSearchPathValid */ -static bool baseSearchPathValid = true; +bool baseSearchPathValid = true; /* Override requests are remembered in a stack of OverrideStackEntry structs */ @@ -199,11 +199,11 @@ static List *overrideStack = NIL; * we either haven't made the TEMP namespace yet, or have successfully * committed its creation, depending on whether myTempNamespace is valid. */ -static Oid myTempNamespace = InvalidOid; +Oid myTempNamespace = InvalidOid; -static Oid myTempToastNamespace = InvalidOid; +Oid myTempToastNamespace = InvalidOid; -static SubTransactionId myTempNamespaceSubID = InvalidSubTransactionId; +SubTransactionId myTempNamespaceSubID = InvalidSubTransactionId; /* * This is the user's textual search path specification --- it's the value diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h index 13c03e65ce6..f16cab22039 100644 --- a/src/include/catalog/namespace.h +++ b/src/include/catalog/namespace.h @@ -195,4 +195,8 @@ extern char *namespace_search_path; extern List *fetch_search_path(bool includeImplicit); extern int fetch_search_path_array(Oid *sarray, int sarray_len); +extern Oid myTempNamespace; +extern Oid myTempToastNamespace; +extern SubTransactionId myTempNamespaceSubID; +extern bool baseSearchPathValid; #endif /* NAMESPACE_H */ From 3137ea5070bebcde3bb742e5f5c307297b879675 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Tue, 10 Oct 2023 18:59:25 +0800 Subject: [PATCH 023/152] Implement drop warehouse --- src/backend/storage/lmgr/lmgr.c | 32 +++++++++++++++++++++++++++++- src/backend/utils/cache/syscache.c | 23 +++++++++++++++++++++ src/include/catalog/gp_warehouse.h | 5 +++-- src/include/storage/lmgr.h | 1 + src/include/storage/lock.h | 2 +- src/include/utils/syscache.h | 2 ++ 6 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 070cd69eca8..61084d83e57 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -807,10 +807,40 @@ void LockWarehouse(Oid warehouseOid, LOCKMODE lockmode) { LOCKTAG tag; + LOCALLOCK *locallock; + LockAcquireResult lockResult; SET_LOCKTAG_WAREHOUSE(tag, warehouseOid); - (void) LockAcquire(&tag, lockmode, true, false); + lockResult = LockAcquireExtended(&tag, lockmode, true, true, true, &locallock); + + /* + * Now that we have the lock, check for invalidation messages; + */ + if (lockResult != LOCKACQUIRE_ALREADY_CLEAR) + { + AcceptInvalidationMessages(); + MarkLockClear(locallock); + } +} + +LockAcquireResult +LockWarehouseNoWait(Oid warehouseOid, LOCKMODE lockmode) +{ + LOCKTAG tag; + LockAcquireResult lockResult; + + SET_LOCKTAG_WAREHOUSE(tag, warehouseOid); + + lockResult = LockAcquire(&tag, lockmode, true, true); + + /* + * Now that we have the lock, check for invalidation messages; + */ + if (lockResult != LOCKACQUIRE_ALREADY_HELD) + AcceptInvalidationMessages(); + + return lockResult; } void diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index c1cf8d49895..065c459eb87 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -83,6 +83,7 @@ #include "catalog/pg_user_mapping.h" #include "catalog/gp_storage_user_mapping.h" #include "catalog/gp_storage_server.h" +#include "catalog/gp_warehouse.h" #include "lib/qunique.h" #include "utils/catcache.h" #include "utils/rel.h" @@ -549,6 +550,28 @@ static const struct cachedesc cacheinfo[] = { }, 1024 }, + {GpWarehouseRelationId, /* GPWAREHOUSENAME */ + GpWarehouseNameIndexId, + 1, + { + Anum_gp_warehouse_warehouse_name, + 0, + 0, + 0 + }, + 4 + }, + {GpWarehouseRelationId, /* GPWAREHOUSEOID */ + GpWarehouseOidIndexId, + 1, + { + Anum_gp_warehouse_oid, + 0, + 0, + 0 + }, + 4 + }, {AppendOnlyRelationId, /* AORELID */ AppendOnlyRelidIndexId, 1, diff --git a/src/include/catalog/gp_warehouse.h b/src/include/catalog/gp_warehouse.h index 9fe4cf24d25..85c1819a5b4 100644 --- a/src/include/catalog/gp_warehouse.h +++ b/src/include/catalog/gp_warehouse.h @@ -36,8 +36,9 @@ */ CATALOG(gp_warehouse,8690,GpWarehouseRelationId) BKI_SHARED_RELATION { - Oid oid; /* oid */ - text warehouse_name; /* warehouse name */ + Oid oid BKI_FORCE_NOT_NULL; /* oid */ + int32 warehouse_size; /* warehouse size */ + text warehouse_name BKI_FORCE_NOT_NULL; /* warehouse name */ } FormData_gp_warehouse; typedef FormData_gp_warehouse *Form_gp_warehouse; diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index d651e501ac4..d2c2ae45237 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -125,5 +125,6 @@ extern void GxactLockTableWait(DistributedTransactionId xid); /* Lock a warehouse */ extern void LockWarehouse(Oid warehouseOid, LOCKMODE lockmode); +extern LockAcquireResult LockWarehouseNoWait(Oid warehouseOid, LOCKMODE lockmode); extern void UnlockWarehouse(Oid warehouseOid, LOCKMODE lockmode); #endif /* LMGR_H */ diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 0c0284c9ed3..ea7c7fecaa6 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -323,7 +323,7 @@ typedef struct LOCKTAG (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_WAREHOUSE, \ - (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) + (locktag).locktag_lockmethodid = USER_LOCKMETHOD) /* * Per-locked-object lock information: diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index aa3b72685d3..d7ae7156203 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -68,6 +68,8 @@ enum SysCacheIdentifier STORAGESERVEROID, FOREIGNTABLEREL, GPPOLICYID, + GPWAREHOUSENAME, + GPWAREHOUSEOID, AORELID, INDEXRELID, LANGNAME, From 08b65a7906a180113de2aeda7dbe83a8dbdd318c Mon Sep 17 00:00:00 2001 From: HuSen8891 Date: Fri, 13 Oct 2023 16:02:55 +0800 Subject: [PATCH 024/152] Add: new hook for plugins to validate the relation Hook 'RelationValidation_hook' for plugins to validate the relation in relcache. --- src/backend/utils/cache/relcache.c | 8 ++++++++ src/include/utils/relcache.h | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a7ba7f5a4ca..92fa7162151 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -209,6 +209,11 @@ static bool eoxact_list_overflowed = false; eoxact_list_overflowed = true; \ } while (0) +/* + * Hook for plugins to validate the relation in RelationIdGetRelation. + */ +RelationValidation_hook_type RelationValidation_hook = NULL; + /* * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact * cleanup work. The array expands as needed; there is no hashtable because @@ -2173,6 +2178,9 @@ RelationIdGetRelation(Oid relationId) return NULL; } + if (RelationValidation_hook) + (*RelationValidation_hook)(relationId, rd); + RelationIncrementReferenceCount(rd); /* revalidate cache entry if necessary */ if (!rd->rd_isvalid) diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 1849746c0d7..4186992f324 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -36,6 +36,12 @@ typedef struct RelationData *Relation; */ typedef Relation *RelationPtr; +/* + * Hook for plugins to validate the relation in RelationIdGetRelation. + */ +typedef void (*RelationValidation_hook_type)(Oid relationId, Relation relation); +extern PGDLLIMPORT RelationValidation_hook_type RelationValidation_hook; + /* * Routines to open (lookup) and close a relcache entry */ From 6414895bb558889411f7738ecc25e3a4b3bbcb3d Mon Sep 17 00:00:00 2001 From: GongXun Date: Tue, 26 Sep 2023 19:44:55 +0800 Subject: [PATCH 025/152] fix triggers From 6b42b221cb1771fab653a22b07585e83d453f219 Mon Sep 17 00:00:00 2001 From: leo Date: Mon, 23 Oct 2023 10:27:33 +0800 Subject: [PATCH 026/152] Add: some interfaces to get transaction state and xids --- src/backend/access/transam/xact.c | 166 ++++++++++++++++++++++++++++-- src/include/access/xact.h | 12 ++- 2 files changed, 167 insertions(+), 11 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 796a91d951b..92d6dda92f6 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -7113,9 +7113,9 @@ XactLogAbortRecord(TimestampTz abort_time, Assert(CritSectionCount > 0); if (XactLogAbortRecord_hook) - (*XactLogAbortRecord_hook) (abort_time, tablespace_oid_to_delete_on_abort, - nsubxacts, subxacts, nrels, rels, ndeldbs, deldbs, - xactflags, twophase_xid, twophase_gid); + return (*XactLogAbortRecord_hook) (abort_time, tablespace_oid_to_delete_on_abort, + nsubxacts, subxacts, nrels, rels, ndeldbs, deldbs, + xactflags, twophase_xid, twophase_gid); xl_xinfo.xinfo = 0; @@ -7656,20 +7656,24 @@ MarkSubTransactionAssigned(void) } /* - * Get all xids of top level transaction and subtransactons + * Get all xids of top level transaction and subtransactons, exclude subcommitted child XIDs. */ FullTransactionId * GetAllXids(int *nxids) { FullTransactionId *xids = NULL; int len = PGPROC_MAX_CACHED_SUBXIDS; + TransactionState xact = CurrentTransactionState; *nxids = 0; + + while (xact && !FullTransactionIdIsValid(xact->fullTransactionId)) + { + xact = xact->parent; + } - if (FullTransactionIdIsValid(CurrentTransactionState->fullTransactionId)) + if (xact) { - TransactionState xact = CurrentTransactionState; - if (xids == NULL) xids = (FullTransactionId *)palloc(sizeof(FullTransactionId) * len); xids[(*nxids)++] = xact->fullTransactionId; @@ -7690,17 +7694,114 @@ GetAllXids(int *nxids) return xids; } +/* + * Get all xids of top level transaction and subtransactons, include subcommitted child XIDs. + */ +TransactionId * +GetAllChildXids(int *nxids) +{ + TransactionId *xids = NULL; + int len = PGPROC_MAX_CACHED_SUBXIDS; + TransactionState xact = CurrentTransactionState; + + *nxids = 0; + + while (xact && !FullTransactionIdIsValid(xact->fullTransactionId)) + { + xact = xact->parent; + } + + while (xact) + { + int index = 0; + int nChildXids = xact->nChildXids; + + /* exclude top transaction's xid */ + if (xact->parent) + nChildXids += 1; + + if (xids == NULL) + xids = (TransactionId *)palloc(sizeof(TransactionId) * len); + else if ((*nxids) + nChildXids >= len) + { + len = ((*nxids) + nChildXids) * 2; + + xids = (TransactionId *)repalloc(xids, sizeof(TransactionId) * len); + } + + if (nChildXids) + memmove((char *)xids + nChildXids * sizeof(TransactionId), (char *)xids, (*nxids) * sizeof(TransactionId)); + + if (xact->parent) + xids[index++] = XidFromFullTransactionId(xact->fullTransactionId); + + if (xact->nChildXids) + memcpy((char *)xids + index * sizeof(TransactionId), (char *)xact->childXids, xact->nChildXids * sizeof(TransactionId)); + + (*nxids) = (*nxids) + nChildXids; + + xact = xact->parent; + } + + return xids; +} + +void +SetChildXids(int nChildXids, TransactionId *childXids) +{ + TransactionState xact; + MemoryContext old; + + if (nChildXids == 0) + return; + + xact = CurrentTransactionState; + old = MemoryContextSwitchTo(xact->curTransactionContext); + + if (xact->maxChildXids < nChildXids) + { + int new_maxChildXids; + TransactionId *new_childXids; + + new_maxChildXids = Min(nChildXids * 2, + (int) (MaxAllocSize / sizeof(TransactionId))); + + if (new_maxChildXids < nChildXids) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("maximum number of committed subtransactions (%d) exceeded", + (int) (MaxAllocSize / sizeof(TransactionId))))); + + if (xact->childXids == NULL) + new_childXids = + MemoryContextAlloc(TopTransactionContext, + new_maxChildXids * sizeof(TransactionId)); + else + new_childXids = repalloc(xact->childXids, + new_maxChildXids * sizeof(TransactionId)); + + xact->childXids = new_childXids; + xact->maxChildXids = new_maxChildXids; + } + + memcpy((char *)xact->childXids, (char *)childXids, nChildXids * sizeof(TransactionId)); + + xact->nChildXids = nChildXids; + + MemoryContextSwitchTo(old); +} + /* * Get number of transaction and subtransactions which have no xid. */ int -GetNumOfTxnStatesWithoutXid(void) +GetNumOfTxnStatesWithoutXid(TransactionState transactionState) { int nlevels = 0; - if (!FullTransactionIdIsValid(CurrentTransactionState->fullTransactionId)) + if (!FullTransactionIdIsValid(transactionState->fullTransactionId)) { - TransactionState xact = CurrentTransactionState; + TransactionState xact = transactionState; nlevels++; @@ -7720,4 +7821,49 @@ GetNumOfTxnStatesWithoutXid(void) } return nlevels; +} + +/* + * Get current transaction state + */ +TransactionState +GetCurrentTransactionState(void) +{ + return CurrentTransactionState; +} + +/* + * Get parent transaction state of given transaction state + */ +TransactionState +GetParentTransactionState(TransactionState transactionState) +{ + return transactionState->parent; +} + +/* + * Get nesting level of given transaction state + */ +int +GetTransactionNestLevel(TransactionState transactionState) +{ + return transactionState->nestingLevel; +} + +/* + * Get full transaction id of given transaction state + */ +FullTransactionId +GetFullTransactionId(TransactionState transactionState) +{ + return transactionState->fullTransactionId; +} + +/* + * Set current transaction state to given transaction state + */ +void +SetCurrentTransactionState(TransactionState transactionState) +{ + CurrentTransactionState = transactionState; } \ No newline at end of file diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 64c07e4d730..c30704aabe0 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -467,6 +467,9 @@ typedef struct xl_xact_distributed_forget DistributedTransactionId gxid; } xl_xact_distributed_forget; +struct TransactionStateData; +typedef struct TransactionStateData *TransactionState; + /* ---------------- * extern definitions * ---------------- @@ -554,7 +557,14 @@ extern XLogRecPtr RecordDistributedForgetCommitted(DistributedTransactionId gxid extern bool IsSubTransactionAssignmentPending(void); extern void MarkSubTransactionAssigned(void); extern FullTransactionId *GetAllXids(int *nxids); -extern int GetNumOfTxnStatesWithoutXid(void); +extern TransactionId *GetAllChildXids(int *nxids); +extern void SetChildXids(int nChildXids, TransactionId *childXids); +extern int GetNumOfTxnStatesWithoutXid(TransactionState transactionState); +extern TransactionState GetCurrentTransactionState(void); +extern TransactionState GetParentTransactionState(TransactionState transactionState); +extern int GetTransactionNestLevel(TransactionState transactionState); +extern FullTransactionId GetFullTransactionId(TransactionState transactionState); +extern void SetCurrentTransactionState(TransactionState transactionState); extern int xactGetCommittedChildren(TransactionId **ptr); From f81b4d945374dbce6e2434b4c84eb1bfa1c683f4 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Wed, 1 Nov 2023 16:24:10 +0800 Subject: [PATCH 027/152] Fix copy from freeze will check subtransaction id in QEs. --- src/backend/commands/copyfrom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 18585a27e35..67a767f273f 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1712,7 +1712,8 @@ CopyFrom(CopyFromState cstate) errmsg("cannot perform COPY FREEZE because of prior transaction activity"))); if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() && - cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId()) + cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId() && + ((enable_serverless && Gp_role == GP_ROLE_DISPATCH) || !enable_serverless)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction"))); From bea9c7925d92f869b0ad1a96436e452b7cb03d79 Mon Sep 17 00:00:00 2001 From: HuSen Date: Fri, 3 Nov 2023 17:53:24 +0800 Subject: [PATCH 028/152] Add: new hook to get control in getgpsegmentCount. (#277) New hook 'getgpsegmentCount_hook' for plugins to get control in getgpsegmentCount. --- src/backend/cdb/cdbutil.c | 11 +++++++++++ src/include/cdb/cdbutil.h | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 88381f8eaa4..d50106c972f 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -80,6 +80,11 @@ MemoryContext CdbComponentsContext = NULL; static CdbComponentDatabases *cdb_component_dbs = NULL; +/* + * Hook for plugins to get control in getgpsegmentCount. + */ +getgpsegmentCount_hook_type getgpsegmentCount_hook = NULL; + #ifdef USE_INTERNAL_FTS /* @@ -1878,6 +1883,9 @@ getgpsegmentCount(void) /* 1 represents a singleton postgresql in utility mode */ int32 numsegments = 1; + if (getgpsegmentCount_hook) + return (*getgpsegmentCount_hook)(); + if (Gp_role == GP_ROLE_DISPATCH) numsegments = cdbcomponent_getCdbComponents()->total_segments; else if (Gp_role == GP_ROLE_EXECUTE) @@ -4099,6 +4107,9 @@ getgpsegmentCount(void) /* 1 represents a singleton postgresql in utility mode */ int32 numsegments = 1; + if (getgpsegmentCount_hook) + return (*getgpsegmentCount_hook)(); + if (Gp_role == GP_ROLE_DISPATCH) numsegments = cdbcomponent_getCdbComponents()->total_segments; else if (Gp_role == GP_ROLE_EXECUTE) diff --git a/src/include/cdb/cdbutil.h b/src/include/cdb/cdbutil.h index 22c3cc782d8..50465f65fdb 100644 --- a/src/include/cdb/cdbutil.h +++ b/src/include/cdb/cdbutil.h @@ -52,7 +52,13 @@ typedef enum SegmentType }SegmentType; /* - * performs all necessary setup required for initializing Apache Cloudberry components. + * Hook for plugins to get control in getgpsegmentCount. + */ +typedef int (*getgpsegmentCount_hook_type)(void); +extern PGDLLIMPORT getgpsegmentCount_hook_type getgpsegmentCount_hook; + +/* + * performs all necessary setup required for initializing Cloudberry Database components. * * This includes cdblink_setup() and initializing the Motion Layer. * From f660c745ab9de5ae1150eee5a1a6fceb4fd1674e Mon Sep 17 00:00:00 2001 From: roseduan Date: Tue, 21 Nov 2023 13:37:45 +0800 Subject: [PATCH 029/152] make main_manifest table not shared (#311) It's reasonable that main_manifest is not shared like pg_class. --- src/backend/catalog/catalog.c | 5 ----- src/include/catalog/main_manifest.h | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 7241bf9fafe..8cbb1d1afe3 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -590,11 +590,6 @@ IsSharedRelation(Oid relationId) return true; } - if (relationId == ManifestRelationId) - { - return true; - } - return false; } diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index 4325ebd3e3b..b2f90156041 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -22,10 +22,10 @@ * typedef struct FormData_main_manifest * ---------------- */ -CATALOG(main_manifest,9004,ManifestRelationId) BKI_SHARED_RELATION +CATALOG(main_manifest,9004,ManifestRelationId) { RelFileNodeId relnode; - text path; + text path; } FormData_main_manifest; typedef FormData_main_manifest *Form_main_manifest; From 7b54d033a8c242da3efbb47c05f8089110796b68 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Tue, 21 Nov 2023 16:16:42 +0800 Subject: [PATCH 030/152] Fix: enable trigger throught FDW in the serverless architecture. In serverless architecture, implementing trigger the same as foreign table which use tuplestore to store the tuple is more efficient. Because it is inefficient to fetch tuple throught its ctid. Besides, in serverless architecture, concurrent update or delete is not supported. So we can fetch tuple directly without lock tuple in GetTupleForTrigger. --- src/backend/access/table/table.c | 7 +++-- src/backend/commands/trigger.c | 51 +++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/backend/access/table/table.c b/src/backend/access/table/table.c index d666f02825e..fbc27d81ff5 100644 --- a/src/backend/access/table/table.c +++ b/src/backend/access/table/table.c @@ -232,8 +232,11 @@ CdbTryOpenTable(Oid relid, LOCKMODE reqmode, bool *lockUpgraded) lockmode = RowExclusiveLock; rel = try_table_open(relid, lockmode, false); - if (RelationIsValid(rel) && - RelationIsNonblockRelation(rel)) +#ifdef SERVERLESS + if (RelationIsNonblockRelation(rel)) +#else /* SERVERLESS */ + if (RelationIsAppendOptimized(rel)) +#endif /* SERVERLESS */ { /* * AO|AOCO table does not support concurrently diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 57a258102c5..a9ff96870da 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -221,6 +221,17 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, errmsg("\"%s\" is a table", RelationGetRelationName(rel)), errdetail("Tables cannot have INSTEAD OF triggers."))); + /* + * FIXME: table which is not a heap table and AO table + * does not support constraint(deferred) trigger now. + */ + if (stmt->isconstraint && enable_serverless && + (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel))) + ereport(ERROR, + (errcode(ERRCODE_GP_FEATURE_NOT_YET), + errmsg("\"%s\" is not a heap table and AO table", + RelationGetRelationName(rel)), + errdetail("constraint trigger is not supported now"))); } else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -716,7 +727,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, } /* Check GPDB limitations */ - if (RelationIsNonblockRelation(rel) && + if (RelationIsAppendOptimized(rel) && TRIGGER_FOR_ROW(tgtype) && !stmt->isconstraint) { @@ -3104,8 +3115,24 @@ GetTupleForTrigger(EState *estate, { Relation relation = relinfo->ri_RelationDesc; + /* + * FIXME: table which is not a heap table and AO table does not support + * concurrently update or delete. So we can fetch tuple directly + * without locking tuple. + */ + if(enable_serverless && (!RelationIsHeap(relation) && !RelationIsAppendOptimized(relation))) + { + /* + * We expect the tuple to be present, thus very simple error handling + * suffices. + */ + if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, + oldslot)) + elog(ERROR, "failed to fetch tuple for trigger"); + return true; + } /* these should be rejected when you try to create such triggers, but let's check */ - if (RelationIsNonblockRelation(relation)) + if (RelationIsAppendOptimized(relation)) elog(ERROR, "UPDATE and DELETE triggers are not supported on append-only tables"); Assert(RelationIsHeap(relation)); @@ -4347,7 +4374,8 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, ExecDropSingleTupleTableSlot(slot2); slot1 = slot2 = NULL; } - if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + (enable_serverless && (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel)))) { slot1 = MakeSingleTupleTableSlot(rel->rd_att, &TTSOpsMinimalTuple); @@ -5334,6 +5362,14 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) { int my_level = GetCurrentTransactionNestLevel(); + /* + * FIXME: deferred trigger is not supported in the serverless architecture now. + */ + if (enable_serverless && stmt->deferred) + ereport(ERROR, + (errcode(ERRCODE_GP_FEATURE_NOT_YET), + errmsg("deferred trigger is not supported in Cloudberry now"))); + /* If we haven't already done so, initialize our state. */ if (afterTriggers.state == NULL) afterTriggers.state = SetConstraintStateCreate(8); @@ -5944,7 +5980,14 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, modifiedCols, oldslot, newslot)) continue; - if (relkind == RELKIND_FOREIGN_TABLE && row_trigger) + /* + * In serverless architecture, implementing trigger the + * same as foreign table which use tuplestore to store the tuple + * is more efficient. Because it is inefficient to fetch tuple + * throught its ctid. + */ + if (row_trigger && (relkind == RELKIND_FOREIGN_TABLE || + (enable_serverless && (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel))))) { if (fdw_tuplestore == NULL) { From d19b5c47ab456a050cf9c75851d02b50a1592e11 Mon Sep 17 00:00:00 2001 From: HuSen Date: Thu, 23 Nov 2023 15:14:01 +0800 Subject: [PATCH 031/152] Add interface and export struct to public. (#304) Add interface to get access to 'PortalHashTable', and make interface 'DispatchSetPGVariable' and struct 'portalhashent' public. --- src/backend/utils/misc/guc.c | 4 +--- src/backend/utils/mmgr/portalmem.c | 12 ++++++++++-- src/include/utils/guc.h | 1 + src/include/utils/portal.h | 9 +++++++++ 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 626eeb7aa70..afb6332cb6a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -5174,8 +5174,6 @@ static void write_auto_conf_file(int fd, const char *filename, ConfigVariable *h static void replace_auto_config_value(ConfigVariable **head_p, ConfigVariable **tail_p, const char *name, const char *value); -static void DispatchSetPGVariable(const char *name, List *args, bool is_local); - /* * Some infrastructure for checking malloc/strdup/realloc calls */ @@ -9229,7 +9227,7 @@ SetPGVariableOptDispatch(const char *name, List *args, bool is_local, bool gp_di DispatchSetPGVariable(name, args, is_local); } -static void +void DispatchSetPGVariable(const char *name, List *args, bool is_local) { ListCell *l; diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 45812a608e0..4a4d70d15f5 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -50,6 +50,7 @@ * ---------------- */ +#if 0 #define MAX_PORTALNAME_LEN NAMEDATALEN typedef struct portalhashent @@ -57,6 +58,7 @@ typedef struct portalhashent char portalname[MAX_PORTALNAME_LEN]; Portal portal; } PortalHashEnt; +#endif static HTAB *PortalHashTable = NULL; @@ -1503,8 +1505,8 @@ GetAllParallelRetrieveCursorPortals(void) int GetNumOfParallelRetrieveCursors(void) { - List *portals; - int sum; + List * portals; + int sum; portals = GetAllParallelRetrieveCursorPortals(); sum = list_length(portals); @@ -1513,3 +1515,9 @@ GetNumOfParallelRetrieveCursors(void) return sum; } + +HTAB * +GetPortalHashTable(void) +{ + return PortalHashTable; +} diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 1eab599277c..75ec1c99d33 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -766,6 +766,7 @@ extern int GetNumConfigOptions(void); extern void SetPGVariable(const char *name, List *args, bool is_local); extern void SetPGVariableOptDispatch(const char *name, List *args, bool is_local, bool gp_dispatch); +extern void DispatchSetPGVariable(const char *name, List *args, bool is_local); extern void GetPGVariable(const char *name, DestReceiver *dest); extern TupleDesc GetPGVariableResultDesc(const char *name); diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index fb940fa1ecf..d474a72a6be 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -227,6 +227,14 @@ typedef struct PortalData bool is_extended_query; /* simple or extended query protocol? */ } PortalData; +#define MAX_PORTALNAME_LEN NAMEDATALEN + +typedef struct portalhashent +{ + char portalname[MAX_PORTALNAME_LEN]; + Portal portal; +} PortalHashEnt; + /* * PortalIsValid * True iff portal is valid. @@ -275,6 +283,7 @@ extern void PortalHashTableDeleteAll(void); extern bool ThereAreNoReadyPortals(void); extern void HoldPinnedPortals(void); extern void ForgetPortalSnapshots(void); +extern HTAB *GetPortalHashTable(void); extern void AtExitCleanup_ResPortals(void); extern void TotalResPortalIncrements(int pid, Oid queueid, From e187e30dd5c6f224923244c6b18a03d1fce23f00 Mon Sep 17 00:00:00 2001 From: zhangwenchao <53178068+wenchaozhang-123@users.noreply.github.com> Date: Fri, 24 Nov 2023 11:49:43 +0800 Subject: [PATCH 032/152] Judge whether commands need to be dispatched to QEs in QD (#314) --- src/backend/cdb/dispatcher/cdbdisp_query.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 188df40c38e..8e1405df0d1 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -315,6 +315,10 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) int queryTextLength; ListCell *le; ErrorData *qeError = NULL; + int flags = DF_NONE; + + if (CdbNeedDispatchCommand_hook && !CdbNeedDispatchCommand_hook(strCommand, &flags, NULL, NULL)) + return; elog((Debug_print_full_dtm ? LOG : DEBUG5), "CdbDispatchSetCommand for command = '%s'", From fb499c34ef5b38a51be2d6224154afb7ef0cefa2 Mon Sep 17 00:00:00 2001 From: roseduan Date: Mon, 27 Nov 2023 21:26:31 +0800 Subject: [PATCH 033/152] Support cloud manager (#296) * Support warehouse satus and copy functions. 1. add warehouse status and create options 2. add copy functions for WarehouseStmt Co-authored-by: roseduan --- src/backend/nodes/copyfuncs.c | 31 +++++++++++++++++++++++ src/backend/parser/gram.y | 5 ++-- src/include/catalog/gp_warehouse.h | 9 +++++-- src/include/nodes/parsenodes.h | 1 + src/test/regress/expected/misc_sanity.out | 3 ++- 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 18ffeb3f784..bf89aff90c3 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6338,6 +6338,18 @@ _copyCreateDirectoryTableStmt(const CreateDirectoryTableStmt *from) return newnode; } +static CreateWarehouseStmt * +_copyCreateWarehouseStmt(const CreateWarehouseStmt *from) +{ + CreateWarehouseStmt *newnode = makeNode(CreateWarehouseStmt); + + COPY_STRING_FIELD(whname); + COPY_NODE_FIELD(options); + COPY_NODE_FIELD(wh_options); + + return newnode; +} + static AlterDirectoryTableStmt * _copyAlterDirectoryTableStmt(const AlterDirectoryTableStmt *from) { @@ -6378,6 +6390,16 @@ _copyEphemeralNamedRelationInfo(const EphemeralNamedRelationInfo *from) return newnode; } +static DropWarehouseStmt * +_copyDropWarehouseStmt(const DropWarehouseStmt *from) +{ + DropWarehouseStmt *newnode = makeNode(DropWarehouseStmt); + + COPY_STRING_FIELD(whname); + + return newnode; +} + /* * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h * @@ -7558,6 +7580,15 @@ copyObjectImpl(const void *from) case T_EphemeralNamedRelationInfo: retval = _copyEphemeralNamedRelationInfo(from); break; + + case T_CreateWarehouseStmt: + retval = _copyCreateWarehouseStmt(from); + break; + + case T_DropWarehouseStmt: + retval = _copyDropWarehouseStmt(from); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from)); retval = 0; /* keep compiler quiet */ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index cd98a248015..6c887539142 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -13379,12 +13379,13 @@ publication_for_tables: * *****************************************************************************/ -CreateWarehouseStmt: CREATE WAREHOUSE name OptWarehouseOptList OptTagOptList +CreateWarehouseStmt: CREATE WAREHOUSE name OptWarehouseOptList create_generic_options OptTagOptList { CreateWarehouseStmt *n = makeNode(CreateWarehouseStmt); n->whname = $3; n->options = $4; - n->tags = $5; + n->wh_options = $5; + n->tags = $6; $$ = (Node *) n; } ; diff --git a/src/include/catalog/gp_warehouse.h b/src/include/catalog/gp_warehouse.h index 85c1819a5b4..20c3f14e54c 100644 --- a/src/include/catalog/gp_warehouse.h +++ b/src/include/catalog/gp_warehouse.h @@ -31,14 +31,19 @@ #include "catalog/genbki.h" #include "catalog/gp_warehouse_d.h" -/* - * Defines for gp_version_at_initdb table +/* ---------------- + * gp_warehouse definition. cpp turns this into + * typedef struct FormData_gp_warehouse + * ---------------- */ CATALOG(gp_warehouse,8690,GpWarehouseRelationId) BKI_SHARED_RELATION { Oid oid BKI_FORCE_NOT_NULL; /* oid */ int32 warehouse_size; /* warehouse size */ text warehouse_name BKI_FORCE_NOT_NULL; /* warehouse name */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text status BKI_FORCE_NOT_NULL; /* status */ +#endif } FormData_gp_warehouse; typedef FormData_gp_warehouse *Form_gp_warehouse; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 449fb3fba03..db6b8c40175 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -4473,6 +4473,7 @@ typedef struct CreateWarehouseStmt NodeTag type; char *whname; List *options; /* List of DefElem nodes */ + List *wh_options; /* generic options to warehouse */ List *tags; /* List of tag DefElem nodes */ } CreateWarehouseStmt; diff --git a/src/test/regress/expected/misc_sanity.out b/src/test/regress/expected/misc_sanity.out index 08d05a5e0d6..5d237c0ab69 100644 --- a/src/test/regress/expected/misc_sanity.out +++ b/src/test/regress/expected/misc_sanity.out @@ -105,6 +105,7 @@ ORDER BY 1, 2; --------------------------+--------------------+-------------- gp_configuration_history | desc | text gp_version_at_initdb | productversion | text + gp_warehouse | status | text gp_warehouse | warehouse_name | text main_manifest | path | text pg_attribute | attacl | aclitem[] @@ -138,7 +139,7 @@ ORDER BY 1, 2; pg_task_run_history | return_message | text pg_task_run_history | status | text pg_task_run_history | username | text -(35 rows) +(36 rows) -- system catalogs without primary keys -- From a6c116116ace44cdc8ba7c4037895e97be2b84fc Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Thu, 19 Oct 2023 16:03:43 +0800 Subject: [PATCH 034/152] Add cache invaladation synchronization amoung QD and QEs. --- src/backend/access/transam/xact.c | 3 + src/backend/cdb/dispatcher/cdbdisp_extra.c | 4 +- src/backend/cdb/dispatcher/cdbdisp_query.c | 18 +++-- src/backend/storage/ipc/sinval.c | 15 ++++ src/backend/storage/ipc/sinvaladt.c | 5 ++ src/backend/utils/cache/catcache.c | 12 --- src/backend/utils/cache/inval.c | 21 +++++ src/backend/utils/cache/relcache.c | 8 -- src/backend/utils/init/postinit.c | 1 + src/include/cdb/cdbdisp_extra.h | 4 +- src/include/storage/sinvaladt.h | 91 ++++++++++++++++++++++ src/include/utils/catcache.h | 12 --- src/include/utils/inval.h | 37 +++++++++ src/include/utils/relcache.h | 6 -- 14 files changed, 187 insertions(+), 50 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 92d6dda92f6..d46b6f4e01b 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2781,6 +2781,9 @@ CommitTransaction(void) TransactionId latestXid; bool is_parallel_worker; + if (cache_invalidation_async_hook) + cache_invalidation_async_hook(cache_async_messages); + is_parallel_worker = (s->blockState == TBLOCK_PARALLEL_INPROGRESS); /* Enforce parallel mode restrictions during parallel worker commit. */ diff --git a/src/backend/cdb/dispatcher/cdbdisp_extra.c b/src/backend/cdb/dispatcher/cdbdisp_extra.c index 8a032222a29..147531d6557 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_extra.c +++ b/src/backend/cdb/dispatcher/cdbdisp_extra.c @@ -54,7 +54,7 @@ RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc u * message. */ char * -PackExtraMsgs(int *len) +PackExtraMsgs(int *len, bool need_snapshot) { HASH_SEQ_STATUS status; ExtraDispEntry *hentry; @@ -86,7 +86,7 @@ PackExtraMsgs(int *len) hash_seq_init(&status, ExtraDispTable); while ((hentry = (ExtraDispEntry *) hash_seq_search(&status)) != NULL) { - payloads[i] = (*(hentry->packFunc))(lengths + i); + payloads[i] = (*(hentry->packFunc))(lengths + i, need_snapshot); names[i] = hentry->extraDispName; totalLen += sizeof(int) + strlen(names[i]) + 1 + *(lengths + i); i++; diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 8e1405df0d1..8f1bd445848 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -119,7 +119,7 @@ static int fillSliceVector(SliceTable *sliceTable, int len); static char *buildGpQueryString(DispatchCommandQueryParms *pQueryParms, - int *finalLen); + int *finalLen, int flags); static DispatchCommandQueryParms *cdbdisp_buildPlanQueryParms(struct QueryDesc *queryDesc, bool planRequiresTxn); static DispatchCommandQueryParms *cdbdisp_buildUtilityQueryParms(struct Node *stmt, int flags, List *oid_assignments); @@ -328,7 +328,7 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) ds = cdbdisp_makeDispatcherState(false); - queryText = buildGpQueryString(pQueryParms, &queryTextLength); + queryText = buildGpQueryString(pQueryParms, &queryTextLength, DF_WITH_SNAPSHOT); primaryGang = AllocateGang(ds, GANGTYPE_PRIMARY_WRITER, cdbcomponent_getCdbComponentsList()); if (gp_print_create_gang_time) @@ -516,7 +516,7 @@ cdbdisp_dispatchCommandInternal(DispatchCommandQueryParms *pQueryParms, if (system_relation_modified) ds->destroyIdleReaderGang = true; - queryText = buildGpQueryString(pQueryParms, &queryTextLength); + queryText = buildGpQueryString(pQueryParms, &queryTextLength, flags); /* * Allocate a primary QE for every available segDB in the system. @@ -883,7 +883,7 @@ fillSliceVector(SliceTable *sliceTbl, int rootIdx, */ static char * buildGpQueryString(DispatchCommandQueryParms *pQueryParms, - int *finalLen) + int *finalLen, int flags) { const char *command = pQueryParms->strCommand; int command_len; @@ -908,8 +908,9 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, char *shared_query, *pos; char *extraMsgs; - int extraLen; + int extraLen = 0; MemoryContext oldContext; + bool need_snapshot; /* * Must allocate query text within DispatcherContext, @@ -958,7 +959,8 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, sizeof(tempToastNamespaceId) + 0; - extraMsgs = PackExtraMsgs(&extraLen); + need_snapshot = flags & DF_WITH_SNAPSHOT; + extraMsgs = PackExtraMsgs(&extraLen, need_snapshot); total_query_len += extraLen; shared_query = palloc(total_query_len); @@ -1145,7 +1147,7 @@ cdbdisp_dispatchX(QueryDesc* queryDesc, sliceTbl->ic_instance_id = ++gp_interconnect_id; pQueryParms = cdbdisp_buildPlanQueryParms(queryDesc, planRequiresTxn); - queryText = buildGpQueryString(pQueryParms, &queryTextLength); + queryText = buildGpQueryString(pQueryParms, &queryTextLength, DF_WITH_SNAPSHOT); /* * Allocate result array with enough slots for QEs of primary gangs. @@ -1391,7 +1393,7 @@ CdbDispatchCopyStart(struct CdbCopy *cdbCopy, Node *stmt, int flags) */ ds = cdbdisp_makeDispatcherState(false); - queryText = buildGpQueryString(pQueryParms, &queryTextLength); + queryText = buildGpQueryString(pQueryParms, &queryTextLength, flags); /* * Allocate a primary QE for every available segDB in the system. diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index d18973f3585..c93415b0bce 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/xact.h" +#include "cdb/cdbvars.h" #include "commands/async.h" #include "miscadmin.h" #include "storage/ipc.h" @@ -95,6 +96,11 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m SharedInvalidMessageCounter++; invalFunction(&msg); + + if (CollectInvalMessages_hook) + { + CollectInvalMessages_hook(&msg); + } } do @@ -112,6 +118,10 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m elog(DEBUG4, "cache state reset"); SharedInvalidMessageCounter++; resetFunction(); + if (ProcessResetCache_hook) + { + ProcessResetCache_hook(); + } break; /* nothing more to do */ } @@ -125,6 +135,11 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m SharedInvalidMessageCounter++; invalFunction(&msg); + + if (CollectInvalMessages_hook) + { + CollectInvalMessages_hook(&msg); + } } /* diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 946bd8e3cb5..30b25fd7382 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -103,6 +103,7 @@ */ +#if 0 /* * Configurable parameters. * @@ -192,6 +193,10 @@ static SISeg *shmInvalBuffer; /* pointer to the shared inval buffer */ static LocalTransactionId nextLocalTransactionId; +#endif + +SISeg *shmInvalBuffer = NULL; +LocalTransactionId nextLocalTransactionId = InvalidLocalTransactionId; static void CleanupInvalidationState(int status, Datum arg); diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 65dcb1f2a83..bf8027d26d5 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -61,12 +61,6 @@ #define CACHE_elog(...) #endif -/* Hook for plugins to get control in SearchCatCache */ -SearchCatCache_hook_type SearchCatCache_hook = NULL; - -/* Hook for plugins to get control in ReleaseCatCache */ -ReleaseCatCache_hook_type ReleaseCatCache_hook = NULL; - /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; @@ -1275,9 +1269,6 @@ SearchCatCacheInternal(CatCache *cache, Assert(cache->cc_nkeys == nkeys); - if (SearchCatCache_hook) - return (*SearchCatCache_hook)(cache, nkeys, v1, v2, v3, v4); - /* * one-time startup overhead for each cache */ @@ -1514,9 +1505,6 @@ SearchCatCacheMiss(CatCache *cache, void ReleaseCatCache(HeapTuple tuple) { - if (ReleaseCatCache_hook) - return (*ReleaseCatCache_hook)(tuple); - CatCTup *ct = (CatCTup *) (((char *) tuple) - offsetof(CatCTup, tuple)); diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 5a986349e2a..1e3d497dcf3 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -136,6 +136,7 @@ #include "cdb/cdbvars.h" +#if 0 /* * To minimize palloc traffic, we keep pending requests in successively- * larger chunks (a slightly more sophisticated version of an expansible @@ -155,6 +156,13 @@ typedef struct InvalidationListHeader InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ InvalidationChunk *rclist; /* list of chunks holding relcache msgs */ } InvalidationListHeader; +#endif +CollectInvalMessages_hook_type CollectInvalMessages_hook = NULL; +ProcessResetCache_hook_type ProcessResetCache_hook = NULL; +cache_invalidation_async_hook_type cache_invalidation_async_hook = NULL; +cache_async_cleanup_hook_type cache_async_cleanup_hook = NULL; + +CacheAsyncMessages *cache_async_messages = NULL; /*---------------- * Invalidation info is divided into two lists: @@ -697,6 +705,11 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) #endif elog(FATAL, "unrecognized SI message ID: %d", msg->id); } + + if (CollectInvalMessages_hook) + { + CollectInvalMessages_hook(msg); + } } /* @@ -1036,7 +1049,12 @@ AtEOXact_Inval(bool isCommit) { /* Quick exit if no messages */ if (transInvalInfo == NULL) + { + if (cache_async_cleanup_hook) + cache_async_cleanup_hook(cache_async_messages); return; + } + /* Must be at top of stack */ Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); @@ -1059,6 +1077,9 @@ AtEOXact_Inval(bool isCommit) if (transInvalInfo->RelcacheInitFileInval) RelationCacheInitFilePostInvalidate(); + + if (cache_async_cleanup_hook) + cache_async_cleanup_hook(cache_async_messages); } else { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 92fa7162151..a7ba7f5a4ca 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -209,11 +209,6 @@ static bool eoxact_list_overflowed = false; eoxact_list_overflowed = true; \ } while (0) -/* - * Hook for plugins to validate the relation in RelationIdGetRelation. - */ -RelationValidation_hook_type RelationValidation_hook = NULL; - /* * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact * cleanup work. The array expands as needed; there is no hashtable because @@ -2178,9 +2173,6 @@ RelationIdGetRelation(Oid relationId) return NULL; } - if (RelationValidation_hook) - (*RelationValidation_hook)(relationId, rd); - RelationIncrementReferenceCount(rd); /* revalidate cache entry if necessary */ if (!rd->rd_isvalid) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 86fb5c82cb7..44bd1622f55 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -74,6 +74,7 @@ #include "utils/faultinjector.h" #include "utils/fmgroids.h" #include "utils/guc.h" +#include "utils/inval.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/portal.h" diff --git a/src/include/cdb/cdbdisp_extra.h b/src/include/cdb/cdbdisp_extra.h index b6bac03b3cd..52cc239cc88 100644 --- a/src/include/cdb/cdbdisp_extra.h +++ b/src/include/cdb/cdbdisp_extra.h @@ -5,11 +5,11 @@ #define EXTRADISPNAME_MAX_LEN 64 -typedef char *(*PackFunc) (int *len); +typedef char *(*PackFunc) (int *len, bool need_snapshot); typedef void (*UnpackFunc) (const char *msg, int len); extern void RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc unpackFunc); -extern char *PackExtraMsgs(int *len); +extern char *PackExtraMsgs(int *len, bool need_snapshot); extern void UnPackExtraMsgs(StringInfo strInfo); #endif /* CDBDISP_EXTRA_H */ diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index 14148bf8201..b0fa7a1df2f 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -25,6 +25,97 @@ #include "storage/lock.h" #include "storage/sinval.h" + +/* + * Configurable parameters. + * + * MAXNUMMESSAGES: max number of shared-inval messages we can buffer. + * Must be a power of 2 for speed. + * + * MSGNUMWRAPAROUND: how often to reduce MsgNum variables to avoid overflow. + * Must be a multiple of MAXNUMMESSAGES. Should be large. + * + * CLEANUP_MIN: the minimum number of messages that must be in the buffer + * before we bother to call SICleanupQueue. + * + * CLEANUP_QUANTUM: how often (in messages) to call SICleanupQueue once + * we exceed CLEANUP_MIN. Should be a power of 2 for speed. + * + * SIG_THRESHOLD: the minimum number of messages a backend must have fallen + * behind before we'll send it PROCSIG_CATCHUP_INTERRUPT. + * + * WRITE_QUANTUM: the max number of messages to push into the buffer per + * iteration of SIInsertDataEntries. Noncritical but should be less than + * CLEANUP_QUANTUM, because we only consider calling SICleanupQueue once + * per iteration. + */ + +#define MAXNUMMESSAGES 4096 +#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 262144) +#define CLEANUP_MIN (MAXNUMMESSAGES / 2) +#define CLEANUP_QUANTUM (MAXNUMMESSAGES / 16) +#define SIG_THRESHOLD (MAXNUMMESSAGES / 2) +#define WRITE_QUANTUM 64 + +/* Per-backend state in shared invalidation structure */ +typedef struct ProcState +{ + /* procPid is zero in an inactive ProcState array entry. */ + pid_t procPid; /* PID of backend, for signaling */ + PGPROC *proc; /* PGPROC of backend */ + /* nextMsgNum is meaningless if procPid == 0 or resetState is true. */ + int nextMsgNum; /* next message number to read */ + bool resetState; /* backend needs to reset its state */ + bool signaled; /* backend has been sent catchup signal */ + bool hasMessages; /* backend has unread messages */ + + /* + * Backend only sends invalidations, never receives them. This only makes + * sense for Startup process during recovery because it doesn't maintain a + * relcache, yet it fires inval messages to allow query backends to see + * schema changes. + */ + bool sendOnly; /* backend only sends, never receives */ + + /* + * Next LocalTransactionId to use for each idle backend slot. We keep + * this here because it is indexed by BackendId and it is convenient to + * copy the value to and from local memory when MyBackendId is set. It's + * meaningless in an active ProcState entry. + */ + LocalTransactionId nextLXID; +} ProcState; + +/* Shared cache invalidation memory segment */ +typedef struct SISeg +{ + /* + * General state information + */ + int minMsgNum; /* oldest message still needed */ + int maxMsgNum; /* next message number to be assigned */ + int nextThreshold; /* # of messages to call SICleanupQueue */ + int lastBackend; /* index of last active procState entry, +1 */ + int maxBackends; /* size of procState array */ + + slock_t msgnumLock; /* spinlock protecting maxMsgNum */ + + /* + * Circular buffer holding shared-inval messages + */ + SharedInvalidationMessage buffer[MAXNUMMESSAGES]; + + /* + * Per-backend invalidation state info (has MaxBackends entries). + */ + ProcState procState[FLEXIBLE_ARRAY_MEMBER]; +} SISeg; + +extern SISeg *shmInvalBuffer; /* pointer to the shared inval buffer */ + + +extern LocalTransactionId nextLocalTransactionId; + /* * prototypes for functions in sinvaladt.c */ diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index e62c2d4018b..8848307553e 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -185,18 +185,6 @@ typedef struct catcacheheader int ch_ntup; /* # of tuples in all caches */ } CatCacheHeader; -/* Hook for plugins to get control in SearchCatCache */ -typedef HeapTuple (*SearchCatCache_hook_type)(CatCache *cache, - int nkeys, - Datum v1, - Datum v2, - Datum v3, - Datum v4); -extern PGDLLIMPORT SearchCatCache_hook_type SearchCatCache_hook; - -/* Hook for plugins to get control in ReleaseCatCache */ -typedef void (*ReleaseCatCache_hook_type)(HeapTuple tuple); -extern PGDLLIMPORT ReleaseCatCache_hook_type ReleaseCatCache_hook; /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 877e66c63c8..2ab944ede35 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -16,9 +16,37 @@ #include "access/htup.h" #include "storage/relfilenode.h" +#include "storage/sinval.h" #include "utils/relcache.h" +/* + * To minimize palloc traffic, we keep pending requests in successively- + * larger chunks (a slightly more sophisticated version of an expansible + * array). All request types can be stored as SharedInvalidationMessage + * records. The ordering of requests within a list is never significant. + */ +typedef struct InvalidationChunk +{ + struct InvalidationChunk *next; /* list link */ + int nitems; /* # items currently stored in chunk */ + int maxitems; /* size of allocated array in this chunk */ + SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]; +} InvalidationChunk; + +typedef struct InvalidationListHeader +{ + InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ + InvalidationChunk *rclist; /* list of chunks holding relcache msgs */ +} InvalidationListHeader; + +typedef struct CacheAsyncMessages +{ + List *local_inval_messages; + bool reset_cache_state; +} CacheAsyncMessages; + extern PGDLLIMPORT int debug_discard_caches; +extern CacheAsyncMessages *cache_async_messages; typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue); typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid); @@ -65,4 +93,13 @@ extern void InvalidateSystemCaches(void); extern void InvalidateSystemCachesExtended(bool debug_discard); extern void LogLogicalInvalidations(void); + +typedef void (*CollectInvalMessages_hook_type) (SharedInvalidationMessage *msg); +typedef void (*ProcessResetCache_hook_type) (void); +typedef void (*cache_invalidation_async_hook_type) (CacheAsyncMessages *cache_async_messages); +typedef void (*cache_async_cleanup_hook_type) (CacheAsyncMessages *cache_async_messages); +extern PGDLLIMPORT CollectInvalMessages_hook_type CollectInvalMessages_hook; +extern PGDLLIMPORT ProcessResetCache_hook_type ProcessResetCache_hook; +extern PGDLLIMPORT cache_invalidation_async_hook_type cache_invalidation_async_hook; +extern PGDLLIMPORT cache_async_cleanup_hook_type cache_async_cleanup_hook; #endif /* INVAL_H */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 4186992f324..1849746c0d7 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -36,12 +36,6 @@ typedef struct RelationData *Relation; */ typedef Relation *RelationPtr; -/* - * Hook for plugins to validate the relation in RelationIdGetRelation. - */ -typedef void (*RelationValidation_hook_type)(Oid relationId, Relation relation); -extern PGDLLIMPORT RelationValidation_hook_type RelationValidation_hook; - /* * Routines to open (lookup) and close a relcache entry */ From f7d5a178db6ed0670752050e32be6ba19abcd937 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Wed, 16 Apr 2025 21:15:35 +0800 Subject: [PATCH 035/152] Fix some errors in cherry pick --- src/backend/access/appendonly/appendonlyam.c | 2 +- src/backend/catalog/main_manifest.c | 2 +- src/backend/executor/execUtils.c | 5 +++-- src/backend/storage/smgr/smgr.c | 6 ++++-- src/include/c.h | 1 + src/include/catalog/main_manifest.h | 4 ++-- src/include/cdb/cdbappendonlyam.h | 2 +- src/include/commands/matview.h | 1 + src/include/utils/rel.h | 7 +++---- 9 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index d8d85c07fe6..d58fb6c12e5 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1539,7 +1539,7 @@ appendonly_beginscan_extractcolumns(Relation rel, Snapshot snapshot, int nkeys, PlanState *ps, uint32 flags) { AppendOnlyScanDesc aoscan; - aoscan = (AppendOnlyScanDesc) appendonly_beginscan(rel, snapshot, nkeys, key, parallel_scan, flags); + aoscan = (AppendOnlyScanDesc) appendonly_beginscan(rel, snapshot, nkeys, key, parallel_scan, flags, NULL); if (gp_enable_predicate_pushdown) ps->qual = appendonly_predicate_pushdown_prepare(aoscan, ps->qual, ps->ps_ExprContext); return (TableScanDesc) aoscan; diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index d08326953ff..d96dcedfd1f 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -25,7 +25,7 @@ * Remove the main manifest record for the relnode. */ void -RemoveMainManifestByRelnode(RelFileNodeId relnode) +RemoveMainManifestByRelnode(Oid relnode) { Relation main_manifest; HeapTuple tuple; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 4a1fafb3634..2dfeff21200 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -2130,8 +2130,9 @@ uint64 mppExecutorWait(QueryDesc *queryDesc) ReThrowError(qeError); } - if (ProcessDispatchResult_hook) - ProcessDispatchResult_hook(ds); + /* FIXME_HASHDATA open it after the ProcessDispatchResult_hook merged into */ +// if (ProcessDispatchResult_hook) +// ProcessDispatchResult_hook(ds); /* collect pgstat from QEs for current transaction level */ pgstat_combine_from_qe(pr, primaryWriterSliceIndex); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index d826c334f1c..7f719b51d32 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -129,6 +129,8 @@ f_smgr smgrsw[] = { } }; +static const int NSmgr = lengthof(smgrsw); + static File AORelOpenSegFile(__attribute__((unused))Oid reloid, const char *filePath, int fileFlags) { return PathNameOpenFile(filePath, fileFlags); @@ -262,7 +264,7 @@ smgrinit(void) { int i; - for (i = 0; i <= SMGR_MAX_ID; i++) + for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_init) smgrsw[i].smgr_init(); @@ -283,7 +285,7 @@ smgrshutdown(int code, Datum arg) { int i; - for (i = 0; i <= SMGR_MAX_ID; i++) + for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_shutdown) smgrsw[i].smgr_shutdown(); diff --git a/src/include/c.h b/src/include/c.h index 1fdcf642989..a90e823999d 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -650,6 +650,7 @@ typedef uint32 CommandId; #define FirstCommandId ((CommandId) 0) #define InvalidCommandId (~(CommandId)0) +typedef uint64 RelFileNodeId; /* ---------------- * Variable-length datatypes all share the 'struct varlena' header. * diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index b2f90156041..d093cbc67d1 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -24,12 +24,12 @@ */ CATALOG(main_manifest,9004,ManifestRelationId) { - RelFileNodeId relnode; + Oid relnode; text path; } FormData_main_manifest; typedef FormData_main_manifest *Form_main_manifest; -extern void RemoveMainManifestByRelnode(RelFileNodeId relnode); +extern void RemoveMainManifestByRelnode(Oid relnode); #endif /* MAIN_MANIFEST.h */ diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index d4d94925565..f9890715b9f 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -445,7 +445,7 @@ extern TableScanDesc appendonly_beginscan_extractcolumns(Relation rel, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc parallel_scan, PlanState *ps, - uint32 flags);) + uint32 flags); extern void appendonly_rescan(TableScanDesc scan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode); diff --git a/src/include/commands/matview.h b/src/include/commands/matview.h index 6ef864ce8b6..c8643aa6430 100644 --- a/src/include/commands/matview.h +++ b/src/include/commands/matview.h @@ -14,6 +14,7 @@ #ifndef MATVIEW_H #define MATVIEW_H +#include "access/heapam.h" #include "catalog/objectaddress.h" #include "executor/execdesc.h" #include "nodes/params.h" diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 32ac9251d63..f44efcdfa98 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -483,9 +483,6 @@ typedef struct ViewOptions #define RelationIsHeap(relation) \ ((relation)->rd_amhandler == F_HEAP_TABLEAM_HANDLER) -#define RelationIsNonblockRelation(relation) \ - ((relation)->rd_tableam && (relation)->rd_rel->relam != HEAP_TABLE_AM_OID) - #define AMHandlerIsAoRows(amhandler) ((amhandler) == F_AO_ROW_TABLEAM_HANDLER) #define AMHandlerIsAoCols(amhandler) \ ((amhandler) == F_AO_COLUMN_TABLEAM_HANDLER) @@ -548,6 +545,7 @@ typedef struct ViewOptions * can't distinguish the PAX and renamed heap(heap_psql) in test `psql`. */ #define PAX_AM_OID 7047 +#define HASHDATA_AM_OID 7015 #define RelationIsPax(relation) \ ((relation)->rd_rel->relam == PAX_AM_OID) @@ -568,7 +566,8 @@ typedef struct ViewOptions */ #define RelationIsNonblockRelation(relation) \ (RelationIsAppendOptimized(relation) || \ - RelationIsPax(relation)) + (relation)->rd_rel->relam == PAX_AM_OID || \ + (relation)->rd_rel->relam == HASHDATA_AM_OID) /* * RelationIsBitmapIndex From 18dd3adf7a6a587aaa730f587af54305feb505d2 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Tue, 26 Dec 2023 15:45:32 +0800 Subject: [PATCH 036/152] Fix copy from where clause of randomly distribution table. --- src/backend/commands/copyfrom.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 67a767f273f..510c4a5cdb6 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -4257,6 +4257,7 @@ InitCopyFromDispatchSplit(CopyFromState cstate, GpDistributionData *distData, else { int fieldno; + List *whereVars; /* * We need all the columns that form the distribution key. */ @@ -4266,6 +4267,14 @@ InitCopyFromDispatchSplit(CopyFromState cstate, GpDistributionData *distData, needed_cols = bms_add_member(needed_cols, distData->policy->attrs[i]); } + /* Also need all the columns that in copy where clause. */ + whereVars = pull_var_clause(cstate->whereClause, 0); + foreach(lc, whereVars) + { + Var *var = lfirst(lc); + needed_cols = bms_add_member(needed_cols, var->varattno); + } + /* Get the max fieldno that contains one of the needed attributes. */ fieldno = 0; foreach(lc, cstate->attnumlist) From 3d6d7ce53491127340f65aaccc3744e162197b21 Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 13 Dec 2023 12:19:07 +0800 Subject: [PATCH 037/152] Fix: dispatch Alter Domain statement to QE. Alter Domain statement is executed on QD, not dispatched to QE. QD could update the catalog of domain, but could not validate the constraint because data is accessed by QE. To validate the constraint, the Alter Domain statement should be dispatched to QE, QE only validates the constraint but does not update catalog. Currently, ALTER DOMAIN SET NOT NULL/ADD CONSTRAINT/VALIDATE CONSTRAINT are dispatched to QE. --- src/backend/commands/typecmds.c | 8 ++++++-- src/include/commands/typecmds.h | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 83849b391ea..5d4458b453c 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -84,6 +84,7 @@ #include "catalog/gp_indexing.h" +#if 0 /* result structure for get_rels_with_domain() */ typedef struct { @@ -92,6 +93,7 @@ typedef struct int *atts; /* attribute numbers */ /* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */ } RelToCheck; +#endif /* parameter structure for AlterTypeRecurse() */ typedef struct @@ -135,8 +137,10 @@ static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid); static Oid findRangeSubOpclass(List *opcname, Oid subtype); static Oid findRangeCanonicalFunction(List *procname, Oid typeOid); static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype); +#if 0 static void validateDomainConstraint(Oid domainoid, char *ccbin); static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode); +#endif static void checkEnumOwner(HeapTuple tup); static char *domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, @@ -3250,7 +3254,7 @@ AlterDomainValidateConstraint(List *names, const char *constrName) return address; } -static void +void validateDomainConstraint(Oid domainoid, char *ccbin) { Expr *expr = (Expr *) stringToNode(ccbin); @@ -3370,7 +3374,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin) * Generally used for retrieving a list of tests when adding * new constraints to a domain. */ -static List * +List * get_rels_with_domain(Oid domainOid, LOCKMODE lockmode) { List *result = NIL; diff --git a/src/include/commands/typecmds.h b/src/include/commands/typecmds.h index b3bf93ed339..26ea2f44e3b 100644 --- a/src/include/commands/typecmds.h +++ b/src/include/commands/typecmds.h @@ -22,6 +22,15 @@ #define DEFAULT_TYPDELIM ',' +/* result structure for get_rels_with_domain() */ +typedef struct +{ + Relation rel; /* opened and locked relation */ + int natts; /* number of attributes of interest */ + int *atts; /* attribute numbers */ + /* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */ +} RelToCheck; + extern ObjectAddress DefineType(ParseState *pstate, List *names, List *parameters); extern void RemoveTypeById(Oid typeOid); extern ObjectAddress DefineDomain(CreateDomainStmt *stmt); @@ -40,6 +49,8 @@ extern ObjectAddress AlterDomainNotNull(List *names, bool notNull); extern ObjectAddress AlterDomainAddConstraint(List *names, Node *constr, ObjectAddress *constrAddr); extern ObjectAddress AlterDomainValidateConstraint(List *names, const char *constrName); +extern void validateDomainConstraint(Oid domainoid, char *ccbin); +extern List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode); extern ObjectAddress AlterDomainDropConstraint(List *names, const char *constrName, DropBehavior behavior, bool missing_ok); From bfbc1141a23f8031105d077e23b0cde58a1eb260 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Wed, 13 Dec 2023 18:36:03 +0800 Subject: [PATCH 038/152] Fix before delete/update row trgger assert in serverless architecture For update operation, we save the oldtuple to avoid high-cost table_tuple_fetch_row_version. Thus, fdw_trigtuple and tupleid are all valid. Setting the segoffset from 1 to make our tupleid valid. --- src/backend/commands/trigger.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index a9ff96870da..272bc285585 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2580,7 +2580,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, bool should_free = false; int i; - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + if(enable_serverless) + Assert(HeapTupleIsValid(fdw_trigtuple) || ItemPointerIsValid(tupleid)); + else + Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -2823,7 +2826,16 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, relinfo); - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + /* + * FIXME: In the serverless architecture, For update operation, we save the + * oldtuple to avoid high-cost table_tuple_fetch_row_version. Thus, fdw_trigtuple + * and tupleid are all valid. We also change the assert of ExecBRDeleteTriggers + * because update partition table will trigger ExecBRDeleteTriggers. + */ + if(enable_serverless) + Assert(HeapTupleIsValid(fdw_trigtuple) || ItemPointerIsValid(tupleid)); + else + Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; From 93581427141824ddc5ec240724501e2a6ee7bb61 Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 13 Dec 2023 08:41:16 +0800 Subject: [PATCH 039/152] Enhancement: support dispatch metadata to QE from QD. Currently, only QD could modify catalog, QE could read catalog from catalog service. The cache of catalog is valid on QD, QD could read catalog efficiently, but QE could only read catalog from remote catalog service not local cache and it's inefficient. To solve this problem, QD dispatch the meta data needed by QE with the plan, and QE receive the meta data with the plan together, and use the meta data to init the plan nodes, this reduces the frequency of reading catalog from remote catalog service on QE. For those undispatched meta data, QE could read from remote catalog service. QD append plan node's meta data to plan's info_context, and send to QE with the plan. QE receive the plan with info_context, extract the meta data from info_context and init the plan with those meta data. If info_context is null, QE read meta data from remote catalog service. SeqScan/Motion/Result/Modifytable are supported now. --- src/backend/cdb/cdbplan.c | 1 + src/backend/cdb/cdbrelsize.c | 34 ++------------------------ src/backend/commands/analyze.c | 7 ++---- src/backend/executor/execProcnode.c | 24 ++++++++++++++++++ src/backend/executor/nodeModifyTable.c | 2 +- src/backend/executor/nodeMotion.c | 5 ++-- src/backend/executor/nodeResult.c | 2 +- src/backend/executor/nodeSeqscan.c | 2 +- src/backend/nodes/copyfuncs.c | 16 ++++++++++++ src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/utils/adt/dbsize.c | 2 +- src/include/executor/executor.h | 9 +++++++ src/include/executor/nodeModifyTable.h | 1 + src/include/executor/nodeMotion.h | 2 ++ src/include/executor/nodeResult.h | 1 + src/include/executor/nodeSeqscan.h | 1 + src/include/nodes/plannodes.h | 5 ++++ 18 files changed, 72 insertions(+), 44 deletions(-) diff --git a/src/backend/cdb/cdbplan.c b/src/backend/cdb/cdbplan.c index 1979ad38f8b..531c1993760 100644 --- a/src/backend/cdb/cdbplan.c +++ b/src/backend/cdb/cdbplan.c @@ -1088,6 +1088,7 @@ mutate_plan_fields(Plan *newplan, Plan *oldplan, Node *(*mutator) (), void *cont /* Bitmapsets aren't nodes but need to be copied to palloc'd space. */ newplan->extParam = bms_copy(oldplan->extParam); newplan->allParam = bms_copy(oldplan->allParam); + newplan->info_context = copyObject(oldplan->info_context); } diff --git a/src/backend/cdb/cdbrelsize.c b/src/backend/cdb/cdbrelsize.c index 5de0f38b4d0..78887cfaf81 100644 --- a/src/backend/cdb/cdbrelsize.c +++ b/src/backend/cdb/cdbrelsize.c @@ -37,44 +37,14 @@ int64 cdbRelMaxSegSize(Relation rel) { int64 size = 0; - int i; - CdbPgResults cdb_pgresults = {NULL, 0}; - char *sql; /* * Let's ask the QEs for the size of the relation * * Relation Oids are assumed to be in sync in all nodes. */ - sql = psprintf("select pg_catalog.pg_relation_size(%u)", - RelationGetRelid(rel)); - - CdbDispatchCommand(sql, DF_WITH_SNAPSHOT, &cdb_pgresults); - - for (i = 0; i < cdb_pgresults.numResults; i++) - { - struct pg_result *pgresult = cdb_pgresults.pg_results[i]; - - if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) - { - cdbdisp_clearCdbPgResults(&cdb_pgresults); - elog(ERROR, "cdbRelMaxSegSize: resultStatus not tuples_Ok: %s %s", - PQresStatus(PQresultStatus(pgresult)), PQresultErrorMessage(pgresult)); - } - else - { - Assert(PQntuples(pgresult) == 1); - int64 tempsize = 0; - - (void) scanint8(PQgetvalue(pgresult, 0, 0), false, &tempsize); - if (tempsize > size) - size = tempsize; - } - } - - pfree(sql); - - cdbdisp_clearCdbPgResults(&cdb_pgresults); + size = DatumGetInt64(DirectFunctionCall2(pg_relation_size, + ObjectIdGetDatum(RelationGetRelid(rel)), CStringGetTextDatum("main"))); return size; } diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 25f89740c1e..07ec386f658 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2383,11 +2383,8 @@ AcquireNumberOfBlocks(Relation onerel) onerel->rd_cdbpolicy && !GpPolicyIsEntry(onerel->rd_cdbpolicy)) { /* Query the segments using pg_relation_size(). */ - char relsize_sql[100]; - - snprintf(relsize_sql, sizeof(relsize_sql), - "select pg_catalog.pg_relation_size(%u, 'main')", RelationGetRelid(onerel)); - totalbytes = get_size_from_segDBs(relsize_sql); + totalbytes = DatumGetInt64(DirectFunctionCall2(pg_relation_size, + ObjectIdGetDatum(RelationGetRelid(onerel)), CStringGetTextDatum("main"))); if (GpPolicyIsReplicated(onerel->rd_cdbpolicy)) { /* diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 1bae66ef334..031c82a5a39 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -146,6 +146,12 @@ /* flags bits for planstate walker */ #define PSW_IGNORE_INITPLAN 0x01 +/* Hook for plugins to get control in ExecInitNode() */ +ExecInitNode_hook_type ExecInitNode_hook = NULL; + +/* Hook for plugins to get control in ExecEndNode() */ +ExecEndNode_hook_type ExecEndNode_hook = NULL; + /** * Forward declarations of static functions */ @@ -188,6 +194,15 @@ static TupleTableSlot *ExecProcNodeGPDB(PlanState *node); */ PlanState * ExecInitNode(Plan *node, EState *estate, int eflags) +{ + if (ExecInitNode_hook) + return (*ExecInitNode_hook)(node, estate, eflags); + + return ExecInitNode_Internal(node, estate, eflags); +} + +PlanState * +ExecInitNode_Internal(Plan *node, EState *estate, int eflags) { PlanState *result; List *subps; @@ -789,6 +804,15 @@ MultiExecProcNode(PlanState *node) */ void ExecEndNode(PlanState *node) +{ + if (ExecEndNode_hook) + return (*ExecEndNode_hook) (node); + + return ExecEndNode_Internal(node); +} + +void +ExecEndNode_Internal(PlanState *node) { /* * do nothing when we get to the end of a leaf on tree. diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a7abebebddc..10608fd1099 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2582,7 +2582,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, * if needed. * ---------------------------------------------------------------- */ -static TupleTableSlot * +TupleTableSlot * ExecModifyTable(PlanState *pstate) { ModifyTableState *node = castNode(ModifyTableState, pstate); diff --git a/src/backend/executor/nodeMotion.c b/src/backend/executor/nodeMotion.c index b6514f2a0ce..a7f2bad858b 100644 --- a/src/backend/executor/nodeMotion.c +++ b/src/backend/executor/nodeMotion.c @@ -54,7 +54,6 @@ static TupleTableSlot *execMotionSender(MotionState *node); static TupleTableSlot *execMotionUnsortedReceiver(MotionState *node); static TupleTableSlot *execMotionSortedReceiver(MotionState *node); -static int CdbMergeComparator(Datum lhs, Datum rhs, void *context); static uint32 evalHashKey(ExprContext *econtext, List *hashkeys, CdbHash *h); static void doSendEndOfStream(Motion *motion, MotionState *node); @@ -96,7 +95,7 @@ formatTuple(StringInfo buf, TupleTableSlot *slot, Oid *outputFunArray) * ExecMotion * ---------------------------------------------------------------- */ -static TupleTableSlot * +TupleTableSlot * ExecMotion(PlanState *pstate) { MotionState *node = castNode(MotionState, pstate); @@ -1034,7 +1033,7 @@ ExecEndMotion(MotionState *node) * CdbMergeComparator: * Used to compare tuples for a sorted motion node. */ -static int +int CdbMergeComparator(Datum lhs, Datum rhs, void *context) { MotionState *node = (MotionState *) context; diff --git a/src/backend/executor/nodeResult.c b/src/backend/executor/nodeResult.c index 7c47903573a..032e29904a6 100644 --- a/src/backend/executor/nodeResult.c +++ b/src/backend/executor/nodeResult.c @@ -73,7 +73,7 @@ static bool TupleMatchesHashFilter(ResultState *node, TupleTableSlot *resultSlot * 'nil' if the constant qualification is not satisfied. * ---------------------------------------------------------------- */ -static TupleTableSlot * +TupleTableSlot * ExecResult(PlanState *pstate) { ResultState *node = castNode(ResultState, pstate); diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 94270b8231b..889b809052c 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -141,7 +141,7 @@ SeqRecheck(SeqScanState *node, TupleTableSlot *slot) * access method functions. * ---------------------------------------------------------------- */ -static TupleTableSlot * +TupleTableSlot * ExecSeqScan(PlanState *pstate) { SeqScanState *node = castNode(SeqScanState, pstate); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index bf89aff90c3..81f5699a85e 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -261,6 +261,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_SCALAR_FIELD(parallel); COPY_SCALAR_FIELD(operatorMemKB); + COPY_NODE_FIELD(info_context); } /* @@ -6400,6 +6401,18 @@ _copyDropWarehouseStmt(const DropWarehouseStmt *from) return newnode; } +static TupleDescNode * +_copyTupleDescNode(const TupleDescNode *from) +{ + TupleDescNode *newnode = makeNode(TupleDescNode); + + COPY_SCALAR_FIELD(natts); + + newnode->tuple = CreateTupleDescCopyConstr(from->tuple); + + return newnode; +} + /* * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h * @@ -7524,6 +7537,9 @@ copyObjectImpl(const void *from) case T_DenyLoginPoint: retval = _copyDenyLoginPoint(from); break; + case T_TupleDescNode: + retval = _copyTupleDescNode(from); + break; case T_CookedConstraint: retval = _copyCookedConstraint(from); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 38f34f40b62..9869127938f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -415,6 +415,7 @@ _outPlanInfo(StringInfo str, const Plan *node) #endif /* COMPILING_BINARY_FUNCS */ WRITE_UINT64_FIELD(operatorMemKB); + WRITE_NODE_FIELD(info_context); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 636e79d1cf4..9859a0c33c8 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1699,6 +1699,7 @@ ReadCommonPlan(Plan *local_node) #endif /* COMPILING_BINARY_FUNCS */ READ_UINT64_FIELD(operatorMemKB); + READ_NODE_FIELD(info_context); } /* diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index e41dc619b89..eba32a2e573 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -553,7 +553,7 @@ pg_relation_size(PG_FUNCTION_ARGS) // TODO directory table size = calculate_relation_size(rel, forkNumber); - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && (RelationIsHeap(rel) || RelationIsAppendOptimized(rel))) { char *sql; diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 8169aa94b34..187db7a0480 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -101,6 +101,13 @@ extern PGDLLIMPORT ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook; typedef bool (*SetDtxFlag_hook_type) (bool needDxt); extern PGDLLIMPORT SetDtxFlag_hook_type SetDtxFlag_hook; +/* Hook for plugins to get control in ExecInitNode() */ +typedef PlanState *(*ExecInitNode_hook_type)(Plan *node, EState *estate, int eflags); +extern PGDLLIMPORT ExecInitNode_hook_type ExecInitNode_hook; + +/* Hook for plugins to get control in ExecEndNode() */ +typedef void (*ExecEndNode_hook_type)(PlanState *node); +extern PGDLLIMPORT ExecEndNode_hook_type ExecEndNode_hook; /* * prototypes from functions in execAmi.c */ @@ -261,9 +268,11 @@ extern Node *attrMapExpr(TupleConversionMap *map, Node *expr); * functions in execProcnode.c */ extern PlanState *ExecInitNode(Plan *node, EState *estate, int eflags); +extern PlanState *ExecInitNode_Internal(Plan *node, EState *estate, int eflags); extern void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function); extern Node *MultiExecProcNode(PlanState *node); extern void ExecEndNode(PlanState *node); +extern void ExecEndNode_Internal(PlanState *node); extern bool ExecShutdownNode(PlanState *node); extern void ExecSetTupleBound(int64 tuples_needed, PlanState *child_node); diff --git a/src/include/executor/nodeModifyTable.h b/src/include/executor/nodeModifyTable.h index 09518a961b4..9dae915ab9c 100644 --- a/src/include/executor/nodeModifyTable.h +++ b/src/include/executor/nodeModifyTable.h @@ -20,6 +20,7 @@ extern void ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo, CmdType cmdtype); extern ModifyTableState *ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags); +extern TupleTableSlot *ExecModifyTable(PlanState *pstate); extern void ExecEndModifyTable(ModifyTableState *node); extern void ExecReScanModifyTable(ModifyTableState *node); extern void ExecSquelchModifyTable(ModifyTableState *node, bool force); diff --git a/src/include/executor/nodeMotion.h b/src/include/executor/nodeMotion.h index c5976e335c8..24e0c64f341 100644 --- a/src/include/executor/nodeMotion.h +++ b/src/include/executor/nodeMotion.h @@ -17,9 +17,11 @@ #include "nodes/execnodes.h" +extern TupleTableSlot *ExecMotion(PlanState *pstate); extern MotionState *ExecInitMotion(Motion *node, EState *estate, int eflags); extern void ExecEndMotion(MotionState *node); extern void ExecReScanMotion(MotionState *node); +extern int CdbMergeComparator(Datum lhs, Datum rhs, void *context); extern void ExecSquelchMotion(MotionState *node, bool force); diff --git a/src/include/executor/nodeResult.h b/src/include/executor/nodeResult.h index 197eabac715..607edecee14 100644 --- a/src/include/executor/nodeResult.h +++ b/src/include/executor/nodeResult.h @@ -16,6 +16,7 @@ #include "nodes/execnodes.h" +extern TupleTableSlot *ExecResult(PlanState *pstate); extern ResultState *ExecInitResult(Result *node, EState *estate, int eflags); extern void ExecEndResult(ResultState *node); extern void ExecResultMarkPos(ResultState *node); diff --git a/src/include/executor/nodeSeqscan.h b/src/include/executor/nodeSeqscan.h index 170286a8d5e..576f0b8701f 100644 --- a/src/include/executor/nodeSeqscan.h +++ b/src/include/executor/nodeSeqscan.h @@ -20,6 +20,7 @@ extern SeqScanState *ExecInitSeqScan(SeqScan *node, EState *estate, int eflags); extern SeqScanState *ExecInitSeqScanForPartition(SeqScan *node, EState *estate, Relation currentRelation); +extern TupleTableSlot *ExecSeqScan(PlanState *pstate); extern void ExecEndSeqScan(SeqScanState *node); extern void ExecReScanSeqScan(SeqScanState *node); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d2b2d921fba..da33e1df9ea 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -334,6 +334,11 @@ typedef struct Plan * How much memory (in KB) should be used to execute this plan node? */ uint64 operatorMemKB; + + /* + * extra information of plan(NULL if not needed). + */ + List *info_context; } Plan; /* ---------------- From c74c0458505b1da97e986b35ea9481202ea7a7c9 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Fri, 8 Dec 2023 09:57:12 +0800 Subject: [PATCH 040/152] Add configure option --enable-gophermeta, --enable-datalake We only need to install the related library and preload the related library with the configure option --enable-datalake, --enable-gophermeta. Please add these two options when configure if you want to build pg_gophermeta and datalake. --- src/include/utils/process_shared_preload_libraries.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/include/utils/process_shared_preload_libraries.h b/src/include/utils/process_shared_preload_libraries.h index fe098ccf401..cf1548abb78 100644 --- a/src/include/utils/process_shared_preload_libraries.h +++ b/src/include/utils/process_shared_preload_libraries.h @@ -4,3 +4,9 @@ #ifdef USE_PAX_STORAGE "pax", #endif +#ifdef USE_GOPHERMETA + "pg_gophermeta", +#endif +#ifdef USE_DATALAKE + "datalake_proxy", +#endif From d96e3a7272639c5ab9a8717c2f2c8373d091e105 Mon Sep 17 00:00:00 2001 From: zhangwenchao Date: Wed, 27 Dec 2023 19:31:12 +0800 Subject: [PATCH 041/152] Support vbf and compress. 1. Fix insert vbf tuple will call Flush() multi times. 2. TupleFetchRowVersion should be not implemented. 3. On QEs, should flush first before send segdatatule to QD. 4. Fix other unreasonable codes. --- src/backend/commands/tablecmds.c | 31 ++++++++++++++++--- .../utils/workfile_manager/workfile_mgr.c | 8 ++--- src/include/commands/tablecmds.h | 2 ++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 73ee36b710b..fad0b2d0791 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -581,7 +581,9 @@ static RangeVar *make_temp_table_name(Relation rel, BackendId id); static bool prebuild_temp_table(Relation rel, RangeVar *tmpname, DistributedBy *distro, char *amname, List *opts, bool isTmpTableAo, bool useExistingColumnAttributes); +static void ATExecSetRelOptionsCheck(Relation rel, DefElem *def); +ATExecSetRelOptionsCheck_hook_type ATExecSetRelOptionsCheck_hook = NULL; ATRewriteTable_hook_type ATRewriteTable_hook = NULL; static void checkATSetDistributedByStandalone(AlteredTableInfo *tab, Relation rel); @@ -8681,7 +8683,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, * should be smarter.. */ - if (RelationIsAppendOptimized(rel)) + if (!RelationIsHeap(rel)) { if (!defval) defval = (Expr *) makeNullConst(typeOid, -1, collOid); @@ -16188,6 +16190,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, { DefElem *def = lfirst(cell); + ATExecSetRelOptionsCheck(rel, def); /* * Autovacuum on user tables is not enabled in Cloudberry. Move on * with a warning. The decision to not error out is in favor of @@ -23226,7 +23229,27 @@ checkATSetDistributedByStandalone(AlteredTableInfo *tab, Relation rel) if (!standalone) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot alter distribution with other subcommands for relation \"%s\"", - RelationGetRelationName(rel)), - errhint("consider separating into multiple statements"))); + errmsg("cannot alter distribution with other subcommands for relation \"%s\"", + RelationGetRelationName(rel)), + errhint("consider separating into multiple statements"))); +} + +static void +ATExecSetRelOptionsCheck(Relation rel, DefElem *def) +{ + int kw_len = strlen(def->defname); + + if (pg_strncasecmp(SOPT_APPENDONLY, def->defname, kw_len) == 0 || + pg_strncasecmp(SOPT_BLOCKSIZE, def->defname, kw_len) == 0 || + pg_strncasecmp(SOPT_COMPTYPE, def->defname, kw_len) == 0 || + pg_strncasecmp(SOPT_COMPLEVEL, def->defname, kw_len) == 0 || + pg_strncasecmp(SOPT_CHECKSUM, def->defname, kw_len) == 0) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot SET reloption \"%s\"", + def->defname))); + + if (ATExecSetRelOptionsCheck_hook) + ATExecSetRelOptionsCheck_hook(rel, def); + return; } diff --git a/src/backend/utils/workfile_manager/workfile_mgr.c b/src/backend/utils/workfile_manager/workfile_mgr.c index e5b311cf9ba..210c2c72dc6 100644 --- a/src/backend/utils/workfile_manager/workfile_mgr.c +++ b/src/backend/utils/workfile_manager/workfile_mgr.c @@ -118,7 +118,7 @@ struct workfile_set char prefix[WORKFILE_PREFIX_LEN]; /* Type of operator creating the workfile set */ - char operator[NAMEDATALEN]; + char operator_name[NAMEDATALEN]; /* Slice in which the spilling operator was */ int slice_id; @@ -683,9 +683,9 @@ workfile_mgr_create_set_internal(const char *operator_name, const char *prefix) dlist_push_tail(&localCtl.localList, &work_set->local_node); if (operator_name) - strlcpy(work_set->operator, operator_name, sizeof(work_set->operator)); + strlcpy(work_set->operator_name, operator_name, sizeof(work_set->operator_name)); else - work_set->operator[0] = '\0'; + work_set->operator_name[0] = '\0'; if (prefix) { @@ -944,7 +944,7 @@ gp_workfile_mgr_cache_entries_internal(PG_FUNCTION_ARGS) values[0] = Int32GetDatum(GpIdentity.segindex); values[1] = CStringGetTextDatum(work_set->prefix); values[2] = Int64GetDatum(work_set->total_bytes); - values[3] = CStringGetTextDatum(work_set->operator); + values[3] = CStringGetTextDatum(work_set->operator_name); values[4] = UInt32GetDatum(work_set->slice_id); values[5] = UInt32GetDatum(work_set->session_id); values[6] = UInt32GetDatum(work_set->command_count); diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index eae233a6776..d40cd62be17 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -140,4 +140,6 @@ extern Datum get_rel_opts(Relation rel); typedef void (*ATRewriteTable_hook_type)(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode); extern PGDLLIMPORT ATRewriteTable_hook_type ATRewriteTable_hook; +typedef void (*ATExecSetRelOptionsCheck_hook_type) (Relation rel, DefElem *def); +extern PGDLLIMPORT ATExecSetRelOptionsCheck_hook_type ATExecSetRelOptionsCheck_hook; #endif /* TABLECMDS_H */ From 11cc8c482554a46e2c2a52a3aafc0441cadc79bb Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Wed, 13 Dec 2023 15:21:08 +0800 Subject: [PATCH 042/152] Fix dispatch set command will error out when there is no warehouse. When set command, if there is no warehouse using, should not error out and set command in QD. In next build connections, if the guc is synchronized, it will be dispatched to QEs. --- src/include/utils/sync_guc_name.h | 1 + src/include/utils/unsync_guc_name.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 286762e6d6f..2e8bc561dca 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -46,6 +46,7 @@ "enable_parallel", "execute_pruned_plan", "explain_memory_verbosity", + "extra_float_digits", "force_parallel_mode", "gin_fuzzy_search_limit", "gin_pending_list_limit", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 5945fe3de6b..38b585a84ef 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -139,7 +139,6 @@ "event_source", "exit_on_error", "external_pid_file", - "extra_float_digits", "from_collapse_limit", "fsync", "full_page_writes", From c0b9687fb3ee2adeba3f81db7b19c894fee4beea Mon Sep 17 00:00:00 2001 From: roseduan Date: Tue, 5 Mar 2024 14:19:12 +0800 Subject: [PATCH 043/152] Feature: Support Fault Tolerant Service (FTS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements a Fault Tolerant Service (FTS) in serverless mode. The FTS periodically pings each segment in the warehouse cluster, marking any failed segments as 'down'. If a previously failed segment becomes responsive again, it willl be marked as 'u' again. This new feature enhances the reliability and fault tolerance of the warehouse cluster, providing automated detection and handling of segment failures. 文档及测试 case:https://hashdata.feishu.cn/docx/VzvCd0TvtoMoV3xifMIcOluon9f --- src/backend/cdb/cdbutil.c | 35 ++++++++++++++++++++++++++----- src/backend/fts/fts.c | 6 +++--- src/backend/fts/ftsprobe.c | 27 +++++++++++++----------- src/include/cdb/cdbutil.h | 6 ++++++ src/include/postmaster/fts.h | 4 ++-- src/include/postmaster/ftsprobe.h | 6 ++++++ 6 files changed, 62 insertions(+), 22 deletions(-) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index d50106c972f..178d8726770 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -85,6 +85,11 @@ static CdbComponentDatabases *cdb_component_dbs = NULL; */ getgpsegmentCount_hook_type getgpsegmentCount_hook = NULL; +/* + * Hook for plugins to get control in getCdbComponentInfo. + */ +getCdbComponentInfo_hook_type getCdbComponentInfo_hook = NULL; + #ifdef USE_INTERNAL_FTS /* @@ -163,9 +168,9 @@ readGpSegConfigFromFTSFiles(int *total_dbs) { config = &configs[idx]; - if (sscanf(buf, "%d %d %c %c %c %c %d %s %s", (int *)&config->dbid, (int *)&config->segindex, + if (sscanf(buf, "%d %d %c %c %c %c %d %s %s %u", (int *)&config->dbid, (int *)&config->segindex, &config->role, &config->preferred_role, &config->mode, &config->status, - &config->port, hostname, address) != GPSEGCONFIGNUMATTR) + &config->port, hostname, address, (Oid *)&config->warehouseid) != GPSEGCONFIGNUMATTR) { FreeFile(fd); elog(ERROR, "invalid data in gp_segment_configuration dump file: %s:%m", GPSEGCONFIGDUMPFILE); @@ -223,9 +228,9 @@ writeGpSegConfigToFTSFiles(void) { config = &configs[idx]; - if (fprintf(fd, "%d %d %c %c %c %c %d %s %s\n", config->dbid, config->segindex, + if (fprintf(fd, "%d %d %c %c %c %c %d %s %s %u\n", config->dbid, config->segindex, config->role, config->preferred_role, config->mode, config->status, - config->port, config->hostname, config->address) < 0) + config->port, config->hostname, config->address, config->warehouseid) < 0) { FreeFile(fd); elog(ERROR, "could not dump gp_segment_configuration to file: %s: %m", GPSEGCONFIGDUMPFILE); @@ -253,6 +258,7 @@ readGpSegConfigFromCatalog(int *total_dbs) SysScanDesc gp_seg_config_scan; GpSegConfigEntry *configs; GpSegConfigEntry *config; + bool need_current_segment = true; array_size = 500; configs = palloc0(sizeof(GpSegConfigEntry) * array_size); @@ -268,14 +274,30 @@ readGpSegConfigFromCatalog(int *total_dbs) Assert(!isNull); warehouseid = DatumGetObjectId(attr); + /* status */ + attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_status, RelationGetDescr(gp_seg_config_rel), &isNull); + Assert(!isNull); + char status = DatumGetChar(attr); + /* content */ attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_content, RelationGetDescr(gp_seg_config_rel), &isNull); Assert(!isNull); - if (warehouseid == GetCurrentWarehouseId() || DatumGetInt16(attr) == MASTER_CONTENT_ID) + /* + * In serverless mode, and if we are not in fts probe process, + * we only need the segment that is up and has the same warehouseid. + */ + if (enable_serverless && !am_ftsprobe) + need_current_segment = (warehouseid == GetCurrentWarehouseId() || DatumGetInt16(attr) == MASTER_CONTENT_ID) + && (status == GP_SEGMENT_CONFIGURATION_STATUS_UP); + + if (need_current_segment) { config = &configs[idx]; + /* warehouseid */ + config->warehouseid = warehouseid; + /* dbid */ attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_dbid, RelationGetDescr(gp_seg_config_rel), &isNull); Assert(!isNull); @@ -494,6 +516,9 @@ getCdbComponentInfo(void) component_databases->total_entry_dbs, sizeof(CdbComponentDatabaseInfo), CdbComponentDatabaseInfoCompare); + if (getCdbComponentInfo_hook) + (*getCdbComponentInfo_hook)(component_databases); + /* * Now count the number of distinct segindexes. Since it's sorted, this is * easy. diff --git a/src/backend/fts/fts.c b/src/backend/fts/fts.c index 719e8fbca1c..5809d3897f5 100644 --- a/src/backend/fts/fts.c +++ b/src/backend/fts/fts.c @@ -174,8 +174,8 @@ CdbComponentDatabases *readCdbComponentInfoAndUpdateStatus(void) } void -probeWalRepUpdateConfig(int16 dbid, int16 segindex, char role, - bool IsSegmentAlive, bool IsInSync) +probeWalRepUpdateConfig(int16 dbid, int16 segindex, Oid warehouseid, + char role, bool IsSegmentAlive, bool IsInSync) { AssertImply(IsInSync, IsSegmentAlive); @@ -240,7 +240,7 @@ probeWalRepUpdateConfig(int16 dbid, int16 segindex, char role, ScanKeyInit(&scankey[1], Anum_gp_segment_configuration_warehouseid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(GetCurrentWarehouseId())); + ObjectIdGetDatum(warehouseid)); sscan = systable_beginscan(configrel, GpSegmentConfigDbidWarehouseIndexId, true, NULL, 2, scankey); diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index 40d38278dad..ece8edb6581 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -32,7 +32,7 @@ #include "postmaster/postmaster.h" #include "utils/snapmgr.h" -static struct pollfd *PollFds; +struct pollfd *PollFds = NULL; static CdbComponentDatabaseInfo * FtsGetPeerSegment(CdbComponentDatabases *cdbs, @@ -253,7 +253,7 @@ checkIfFailedDueToNormalRestart(fts_segment_info *ftsInfo) "primary dbid=%d, mirror dbid=%d", ftsInfo->primary_cdbinfo->config->segindex, ftsInfo->primary_cdbinfo->config->dbid, - ftsInfo->mirror_cdbinfo->config->dbid); + ftsInfo->mirror_cdbinfo ? ftsInfo->mirror_cdbinfo->config->dbid : -1); } else { @@ -266,7 +266,7 @@ checkIfFailedDueToNormalRestart(fts_segment_info *ftsInfo) (uint32) tmpptr, ftsInfo->primary_cdbinfo->config->segindex, ftsInfo->primary_cdbinfo->config->dbid, - ftsInfo->mirror_cdbinfo->config->dbid); + ftsInfo->mirror_cdbinfo ? ftsInfo->mirror_cdbinfo->config->dbid : -1); } } else if (strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RESET_MSG))) @@ -291,7 +291,7 @@ checkIfFailedDueToNormalRestart(fts_segment_info *ftsInfo) * * Upon failure, transition that object to a failed state. */ -static void +void ftsConnect(fts_context *context) { int i; @@ -433,7 +433,7 @@ ftsCheckTimeout(fts_segment_info *ftsInfo, pg_time_t now) } } -static void +void ftsPoll(fts_context *context) { int i; @@ -546,7 +546,7 @@ ftsPoll(fts_context *context) /* * Send FTS query */ -static void +void ftsSend(fts_context *context) { fts_segment_info *ftsInfo; @@ -663,7 +663,7 @@ probeRecordResponse(fts_segment_info *ftsInfo, PGresult *result) /* * Receive segment response */ -static void +void ftsReceive(fts_context *context) { fts_segment_info *ftsInfo; @@ -832,7 +832,7 @@ retryForFtsFailed(fts_segment_info *ftsInfo, pg_time_t now) * corresponding to their failure state. If retries have exhausted, leave the * segment in the failure state. */ -static void +void processRetry(fts_context *context) { fts_segment_info *ftsInfo; @@ -850,6 +850,9 @@ processRetry(fts_context *context) * mirror as down prematurely. If mirror is already marked * down in configuration, there is no need to retry. */ + if (enable_serverless) + break; + if (!(ftsInfo->result.retryRequested && SEGMENT_IS_ALIVE(ftsInfo->mirror_cdbinfo))) break; @@ -941,13 +944,13 @@ updateConfiguration(CdbComponentDatabaseInfo *primary, if (UpdatePrimary) probeWalRepUpdateConfig(primary->config->dbid, primary->config->segindex, - newPrimaryRole, IsPrimaryAlive, - IsInSync); + primary->config->warehouseid, + newPrimaryRole, IsPrimaryAlive, IsInSync); if (UpdateMirror) probeWalRepUpdateConfig(mirror->config->dbid, mirror->config->segindex, - newMirrorRole, IsMirrorAlive, - IsInSync); + primary->config->warehouseid, + newMirrorRole, IsMirrorAlive, IsInSync); CommitTransactionCommand(); CurrentResourceOwner = save; diff --git a/src/include/cdb/cdbutil.h b/src/include/cdb/cdbutil.h index 50465f65fdb..e59fddfdea0 100644 --- a/src/include/cdb/cdbutil.h +++ b/src/include/cdb/cdbutil.h @@ -57,6 +57,12 @@ typedef enum SegmentType typedef int (*getgpsegmentCount_hook_type)(void); extern PGDLLIMPORT getgpsegmentCount_hook_type getgpsegmentCount_hook; +/* + * Hook for plugins to get control in getCdbComponentInfo. + */ +typedef void (*getCdbComponentInfo_hook_type)(CdbComponentDatabases *component_databases); +extern PGDLLIMPORT getCdbComponentInfo_hook_type getCdbComponentInfo_hook; + /* * performs all necessary setup required for initializing Cloudberry Database components. * diff --git a/src/include/postmaster/fts.h b/src/include/postmaster/fts.h index eb29ccbf3c9..6a77491d8c9 100644 --- a/src/include/postmaster/fts.h +++ b/src/include/postmaster/fts.h @@ -48,8 +48,8 @@ extern void SetSkipFtsProbe(bool skipFtsProbe); * Interface for WALREP specific checking */ extern void HandleFtsMessage(const char* query_string); -extern void probeWalRepUpdateConfig(int16 dbid, int16 segindex, char role, - bool IsSegmentAlive, bool IsInSync); +extern void probeWalRepUpdateConfig(int16 dbid, int16 segindex, Oid warehouseid, + char role, bool IsSegmentAlive, bool IsInSync); extern bool FtsProbeStartRule(Datum main_arg); extern void FtsProbeMain (Datum main_arg); diff --git a/src/include/postmaster/ftsprobe.h b/src/include/postmaster/ftsprobe.h index 88369d32ebf..f64c6b2f49a 100644 --- a/src/include/postmaster/ftsprobe.h +++ b/src/include/postmaster/ftsprobe.h @@ -119,8 +119,14 @@ typedef struct fts_segment_info *perSegInfos; } fts_context; +extern struct pollfd *PollFds; extern bool FtsWalRepMessageSegments(CdbComponentDatabases *context); +extern void ftsConnect(fts_context *context); +extern void ftsPoll(fts_context *context); +extern void ftsSend(fts_context *context); +extern void ftsReceive(fts_context *context); +extern void processRetry(fts_context *context); #endif /* USE_INTERNAL_FTS */ #endif \ No newline at end of file From c365a23555171471c7e7fe99ae029c978789bdfe Mon Sep 17 00:00:00 2001 From: leo Date: Mon, 18 Dec 2023 13:29:26 +0800 Subject: [PATCH 044/152] Feature: support scale warehouse horizontally Warehouse is created with fixed number of segments. With the changes of workload, the warehouse should be able to scale out or in to handle the workloads. Add grammer 'ALTER WAREHOUSE warehouse_name SET WAREHOUSE_SIZE new_warehouse_size' to scale the warehouse. If new_warehouse_size is larger than warehouse_size, (new_warehose_size - warehouse_size) segments are added to warehouse, this will not affect any queries in warehouse. Sessions on QD will reconnect with all segments at next transaction start. If new_warehouse_size is smaller than warehouse_size, (warehouse_size - new_warehouse_size) segments are deleted from warehouse, this will try to lock the warehouse with exclusive mode to prevent queries in this warehouse. If any active sessions in warehouse, this command will abort. If new_warehouse_size is equal to warehouse_size, do nothing but a notice. --- src/backend/nodes/copyfuncs.c | 17 +++++++++++++++ src/backend/parser/gram.y | 35 ++++++++++++++++++++++++++----- src/backend/storage/lmgr/lmgr.c | 2 +- src/backend/tcop/utility.c | 9 ++++++++ src/backend/utils/misc/gpexpand.c | 2 +- src/include/nodes/nodes.h | 1 + src/include/nodes/parsenodes.h | 15 +++++++++++++ src/include/tcop/cmdtaglist.h | 1 + 8 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 81f5699a85e..8b5e26b9451 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6401,6 +6401,19 @@ _copyDropWarehouseStmt(const DropWarehouseStmt *from) return newnode; } +static AlterWarehouseStmt * +_copyAlterWarehouseStmt(const AlterWarehouseStmt *from) +{ + AlterWarehouseStmt *newnode = makeNode(AlterWarehouseStmt); + + COPY_SCALAR_FIELD(kind); + COPY_STRING_FIELD(whname); + COPY_SCALAR_FIELD(warehouse_size); + COPY_NODE_FIELD(options); + + return newnode; +} + static TupleDescNode * _copyTupleDescNode(const TupleDescNode *from) { @@ -7605,6 +7618,10 @@ copyObjectImpl(const void *from) retval = _copyDropWarehouseStmt(from); break; + case T_AlterWarehouseStmt: + retval = _copyAlterWarehouseStmt(from); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from)); retval = 0; /* keep compiler quiet */ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 6c887539142..f5d124b40a7 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -297,8 +297,8 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateDirectoryTableStmt CreateAssertionStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt - CreatedbStmt CreateWarehouseStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt - DropDirectoryTableStmt DropOpClassStmt DropOpFamilyStmt DropStmt DropWarehouseStmt + CreatedbStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt + DropDirectoryTableStmt DropOpClassStmt DropOpFamilyStmt DropStmt DropCastStmt DropRoleStmt DropdbStmt DropTableSpaceStmt DropTransformStmt @@ -321,10 +321,10 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ RetrieveStmt CreateTaskStmt AlterTaskStmt DropTaskStmt /* GPDB-specific commands */ -%type AlterProfileStmt AlterQueueStmt AlterResourceGroupStmt AlterSchemaStmt AlterTagStmt +%type AlterProfileStmt AlterQueueStmt AlterResourceGroupStmt AlterSchemaStmt AlterTagStmt AlterWarehouseStmt CreateExternalStmt - CreateProfileStmt CreateQueueStmt CreateResourceGroupStmt CreateTagStmt - DropProfileStmt DropQueueStmt DropResourceGroupStmt DropTagStmt + CreateProfileStmt CreateQueueStmt CreateResourceGroupStmt CreateWarehouseStmt CreateTagStmt + DropProfileStmt DropQueueStmt DropResourceGroupStmt DropWarehouseStmt DropTagStmt ExtTypedesc ExtSingleRowErrorHandling %type OptSingleRowErrorHandling @@ -1459,6 +1459,7 @@ stmt: | AlterTSDictionaryStmt | AlterUserMappingStmt | AlterStorageUserMappingStmt + | AlterWarehouseStmt | AnalyzeStmt | CallStmt | CheckPointStmt @@ -13405,6 +13406,12 @@ WarehouseOptElem: } ; +/***************************************************************************** + * + * QUERY: + * DROP WAREHOUSE name + * + *****************************************************************************/ DropWarehouseStmt: DROP WAREHOUSE name { @@ -13414,6 +13421,24 @@ DropWarehouseStmt: DROP WAREHOUSE name } ; +/***************************************************************************** + * + * QUERY: + * ALTER WAREHOUSE name SET WAREHOUSE_SIZE warehouse_size + * + *****************************************************************************/ + +AlterWarehouseStmt: + ALTER WAREHOUSE name SET WAREHOUSE_SIZE SignedIconst + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE; + n->whname = $3; + n->warehouse_size = $6; + n->options = NULL; + $$ = (Node *)n; + } + ; /***************************************************************************** * diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 61084d83e57..e993c24d87a 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -812,7 +812,7 @@ LockWarehouse(Oid warehouseOid, LOCKMODE lockmode) SET_LOCKTAG_WAREHOUSE(tag, warehouseOid); - lockResult = LockAcquireExtended(&tag, lockmode, true, true, true, &locallock); + lockResult = LockAcquireExtended(&tag, lockmode, true, false, true, &locallock); /* * Now that we have the lock, check for invalidation messages; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 4e8b3a778ec..7a076d81db0 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -270,6 +270,7 @@ ClassifyUtilityCommandAsReadOnly(Node *parsetree) case T_CreateTaskStmt: case T_CreateTagStmt: case T_AlterTaskStmt: + case T_AlterWarehouseStmt: case T_DropTaskStmt: case T_DropProfileStmt: case T_DropQueueStmt: @@ -1977,6 +1978,7 @@ ProcessUtilitySlow(ParseState *pstate, case T_CreateWarehouseStmt: case T_DropWarehouseStmt: + case T_AlterWarehouseStmt: ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("warehouse feature is not supported"))); @@ -4074,6 +4076,9 @@ CreateCommandTag(Node *parsetree) case T_DropWarehouseStmt: tag = CMDTAG_DROP_WAREHOUSE; break; + case T_AlterWarehouseStmt: + tag = CMDTAG_ALTER_WAREHOUSE; + break; case T_ExtensibleNode: tag = CMDTAG_EXTENSIBLE; @@ -4591,6 +4596,10 @@ GetCommandLogLevel(Node *parsetree) lev = LOGSTMT_DDL; break; + case T_AlterWarehouseStmt: + lev = LOGSTMT_DDL; + break; + /* already-planned queries */ case T_PlannedStmt: { diff --git a/src/backend/utils/misc/gpexpand.c b/src/backend/utils/misc/gpexpand.c index 1c9b02a6778..40de91f5571 100644 --- a/src/backend/utils/misc/gpexpand.c +++ b/src/backend/utils/misc/gpexpand.c @@ -114,7 +114,7 @@ gp_expand_protect_catalog_changes(Relation relation) int oldVersion; int newVersion; - if (Gp_role != GP_ROLE_DISPATCH) + if (Gp_role != GP_ROLE_DISPATCH || enable_serverless) /* only lock catalog updates on qd */ return; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 2ec47618998..c3ff7a919e4 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -572,6 +572,7 @@ typedef enum NodeTag T_CreateWarehouseStmt, T_DropWarehouseStmt, T_AddForeignSegStmt, + T_AlterWarehouseStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index db6b8c40175..d143909c65a 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -4483,4 +4483,19 @@ typedef struct DropWarehouseStmt char *whname; } DropWarehouseStmt; +typedef enum AlterWarehouseType +{ + ALTER_WAREHOUSE_OPTIONS, + ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE +} AlterWarehouseType; + +typedef struct AlterWarehouseStmt +{ + NodeTag type; + AlterWarehouseType kind; /* ALTER_WAREHOUSE_OPTIONS, etc */ + char *whname; /* Name of the warehouse */ + int warehouse_size; /* New size of warehouse if set warehouse_size command */ + List *options; /* List of DefElem nodes */ +} AlterWarehouseStmt; + #endif /* PARSENODES_H */ diff --git a/src/include/tcop/cmdtaglist.h b/src/include/tcop/cmdtaglist.h index 440c4843bb4..b227c852e8a 100644 --- a/src/include/tcop/cmdtaglist.h +++ b/src/include/tcop/cmdtaglist.h @@ -79,6 +79,7 @@ PG_CMDTAG(CMDTAG_ALTER_TRIGGER, "ALTER TRIGGER", true, false, false) PG_CMDTAG(CMDTAG_ALTER_TYPE, "ALTER TYPE", true, true, false) PG_CMDTAG(CMDTAG_ALTER_USER_MAPPING, "ALTER USER MAPPING", true, false, false) PG_CMDTAG(CMDTAG_ALTER_VIEW, "ALTER VIEW", true, false, false) +PG_CMDTAG(CMDTAG_ALTER_WAREHOUSE, "ALTER WAREHOUSE", true, false, false) PG_CMDTAG(CMDTAG_ANALYZE, "ANALYZE", false, false, false) PG_CMDTAG(CMDTAG_BEGIN, "BEGIN", false, false, false) PG_CMDTAG(CMDTAG_DTX_BEGIN_INTERNAL_SUBTRANSACTION, "Begin Internal Subtransaction", false, false, false) From 6105a59f92307a64a3bc1ba5575f20a07e888884 Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Fri, 8 Dec 2023 18:32:09 +0800 Subject: [PATCH 045/152] Supports database level encryption, with each database using a key. Supports encryption algorithms such as AES and SM4. Support TDE function for vbf tables. Use the two-layer key structure that includes master key and data encryption key (DEK).The master key can be obtained from the cloud manager or provided by the user. There are DEK plaintext and DEK ciphertext. The master key is used to encrypt DEK plaintext and generate DEK ciphertext. DEK plaintext is used to encrypt database data, while DEK ciphertext data is stored in shared storage. Create an encrypted database using DDL statements, for example: CREATE DATABASE db1 WITH ENCRYPTION_ ENABLE 'aes'; --- src/backend/catalog/pg_db_role_setting.c | 6 ++++++ src/backend/cdb/dispatcher/cdbdisp.c | 1 + src/backend/cdb/dispatcher/cdbdisp_async.c | 5 +++++ src/backend/commands/dbcommands.c | 17 ++++++++++++++++- src/backend/parser/gram.y | 4 +++- src/include/Makefile | 2 +- src/include/catalog/pg_db_role_setting.h | 5 +++++ src/include/cdb/cdbdisp_async.h | 5 +++++ src/include/cdb/cdbvars.h | 3 +++ src/include/commands/dbcommands.h | 5 +++++ 10 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index d08229f7338..d69c84e739d 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -28,6 +28,8 @@ #include "utils/builtins.h" #include "utils/syscache.h" +Alter_database_encrypt_key_hook_type Alter_database_encrypt_key_hook = NULL; + void AlterSetting(Oid databaseid, Oid roleid, VariableSetStmt *setstmt) { @@ -39,6 +41,10 @@ AlterSetting(Oid databaseid, Oid roleid, VariableSetStmt *setstmt) valuestr = ExtractSetVariableArgs(setstmt); + if (Alter_database_encrypt_key_hook + && Alter_database_encrypt_key_hook(databaseid, roleid, setstmt, valuestr)) + return; + /* Get the old tuple, if any. */ rel = table_open(DbRoleSettingRelationId, RowExclusiveLock); diff --git a/src/backend/cdb/dispatcher/cdbdisp.c b/src/backend/cdb/dispatcher/cdbdisp.c index 4902fe3a105..38382fcae5e 100644 --- a/src/backend/cdb/dispatcher/cdbdisp.c +++ b/src/backend/cdb/dispatcher/cdbdisp.c @@ -44,6 +44,7 @@ static char * segmentsListToString(const char *prefix, List *segments); DispatcherInternalFuncs *pDispatchFuncs = &DispatcherAsyncFuncs; PGResStausOK_hook_type PGResStausOK_hook = NULL; +ProcessDekInfo_hook_type ProcessDekInfo_hook = NULL; /* * cdbdisp_dispatchToGang: diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index dc2e89eac3f..d75426684eb 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -1202,6 +1202,11 @@ processResults(CdbDispatchResult *dispatchResult) /* Don't free the notify here since it in queue now */ qnotifies = NULL; } + else if ( (strcmp(qnotifies->relname, CDB_NOTIFY_DEK_INFO) == 0) + && ProcessDekInfo_hook) + { + ProcessDekInfo_hook(qnotifies->extra, segdbDesc->conn); + } else { /* Got an unknown PGnotify, just record it in log */ diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 48caa782464..a4c30f9d154 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -120,7 +120,8 @@ static void remove_dbtablespaces(Oid db_id); static bool check_db_file_conflict(Oid db_id); static int errdetail_busy_db(int notherbackends, int npreparedxacts); - +CreateDb_hook_type CreateDb_hook = NULL; +DropDb_hook_type DropDb_hook = NULL; /* * CREATE DATABASE */ @@ -276,6 +277,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errhint("Consider using tablespaces instead."), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "encryption_enable") == 0) + { + if (FileEncryptionEnabled) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("If tde for whole cluster, can not create encrypted database"), + parser_errposition(pstate, defel->location))); + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -636,6 +645,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbname); } + if (CreateDb_hook) + CreateDb_hook(stmt, dboid); + if (shouldDispatch) { elog(DEBUG5, "shouldDispatch = true, dbOid = %d", dboid); @@ -1168,6 +1180,9 @@ dropdb(const char *dbname, bool missing_ok, bool force) * according to pg_database, which is not good. */ ForceSyncCommit(); + + if (DropDb_hook) + DropDb_hook(db_id); } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index f5d124b40a7..14df7115701 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -770,7 +770,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ DETACH DICTIONARY DIRECTORY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP DYNAMIC - EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENDPOINT ENUM_P ESCAPE EVENT EXCEPT + EACH ELSE ENABLE_P ENCODING ENCRYPTED ENCRYPTION_ENABLE END_P ENDPOINT ENUM_P ESCAPE EVENT EXCEPT EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION EXTENSION EXTERNAL EXTRACT @@ -14054,6 +14054,7 @@ createdb_opt_name: | OWNER { $$ = pstrdup($1); } | TABLESPACE { $$ = pstrdup($1); } | TEMPLATE { $$ = pstrdup($1); } + | ENCRYPTION_ENABLE { $$ = pstrdup($1); } ; /* @@ -20084,6 +20085,7 @@ PartitionIdentKeyword: ABORT_P | ENABLE_P | ENCODING | ENCRYPTED + | ENCRYPTION_ENABLE | ENDPOINT | ERRORS | ENUM_P diff --git a/src/include/Makefile b/src/include/Makefile index 48573c7a76b..252b2b14eb5 100644 --- a/src/include/Makefile +++ b/src/include/Makefile @@ -24,7 +24,7 @@ SUBDIRS = access bootstrap catalog commands common datatype \ statistics storage tcop snowball snowball/libstemmer tsearch \ tsearch/dicts utils port port/atomics port/win32 port/win32_msvc \ port/win32_msvc/sys port/win32/arpa port/win32/netinet \ - port/win32/sys portability task + port/win32/sys portability task crypto SUBDIRS += cdb diff --git a/src/include/catalog/pg_db_role_setting.h b/src/include/catalog/pg_db_role_setting.h index 560969514b8..ee994de558e 100644 --- a/src/include/catalog/pg_db_role_setting.h +++ b/src/include/catalog/pg_db_role_setting.h @@ -57,6 +57,11 @@ DECLARE_TOAST(pg_db_role_setting, 2966, 2967); DECLARE_UNIQUE_INDEX_PKEY(pg_db_role_setting_databaseid_rol_index, 2965, on pg_db_role_setting using btree(setdatabase oid_ops, setrole oid_ops)); #define DbRoleSettingDatidRolidIndexId 2965 +/* + * Hooks for tde function + */ +typedef bool(*Alter_database_encrypt_key_hook_type)(Oid databaseid, Oid roleid, VariableSetStmt *setstmt, char* valuestr); +extern PGDLLIMPORT Alter_database_encrypt_key_hook_type Alter_database_encrypt_key_hook; /* * prototypes for functions in pg_db_role_setting.h */ diff --git a/src/include/cdb/cdbdisp_async.h b/src/include/cdb/cdbdisp_async.h index 0f5853cc3da..375d0048ef6 100644 --- a/src/include/cdb/cdbdisp_async.h +++ b/src/include/cdb/cdbdisp_async.h @@ -13,9 +13,14 @@ * *------------------------------------------------------------------------- */ +#include "libpq-fe.h" + #ifndef CDBDISP_ASYNC_H #define CDBDISP_ASYNC_H +typedef void (*ProcessDekInfo_hook_type) (char *message, PGconn *conn); +extern PGDLLIMPORT ProcessDekInfo_hook_type ProcessDekInfo_hook; + extern DispatcherInternalFuncs DispatcherAsyncFuncs; extern void *cdbdisp_makeDispatchParams_async(int maxSlices, int largestGangSize, char *queryText, int len); diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 90af5177ce0..852661b81bd 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -831,4 +831,7 @@ bool (*AlterWarehouse_hook)(char *warehouse_name, extern WarehouseMethod *warehouse_method; +/* notification condition name of dek info, used in PGnotify */ +#define CDB_NOTIFY_DEK_INFO "dek_info" + #endif /* CDBVARS_H */ diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 29b6206f1b0..51556810e2a 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -19,6 +19,11 @@ #include "lib/stringinfo.h" #include "parser/parse_node.h" +typedef void(*CreateDb_hook_type)(const CreatedbStmt *stmt, Oid dbOid); +extern PGDLLIMPORT CreateDb_hook_type CreateDb_hook; +typedef void(*DropDb_hook_type)(Oid dbOid); +extern PGDLLIMPORT DropDb_hook_type DropDb_hook; + extern Oid createdb(ParseState *pstate, const CreatedbStmt *stmt); extern void dropdb(const char *dbname, bool missing_ok, bool force); extern void DropDatabase(ParseState *pstate, DropdbStmt *stmt); From d2ddf28f7aaea4bc72cac8becd49618c5aa8c4d9 Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Fri, 22 Dec 2023 00:41:19 +0800 Subject: [PATCH 046/152] When read tuple, need to get all the data. When write tuple, only write data only by one segment. --- src/backend/cdb/cdbcat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/cdb/cdbcat.c b/src/backend/cdb/cdbcat.c index d3c81c673bc..ea3e845f1de 100644 --- a/src/backend/cdb/cdbcat.c +++ b/src/backend/cdb/cdbcat.c @@ -427,6 +427,8 @@ GpPolicyFetch(Oid tbloid) switch (policyform->policytype) { case SYM_POLICYTYPE_REPLICATED: + if (policyform->numsegments == 0) + policyform->numsegments = getgpsegmentCount(); policy = createReplicatedGpPolicy(policyform->numsegments); break; case SYM_POLICYTYPE_PARTITIONED: From c712ac2f7cae1b5c3de191148ab8c7c690a68dbd Mon Sep 17 00:00:00 2001 From: leo Date: Tue, 9 Jan 2024 13:12:45 +0800 Subject: [PATCH 047/152] Enhancement: optimize the filepath and keep snapshot of table. The relnode is the prefix of filepath for manifest file and data file, there are lots of files with same prefix for one table and this will increase the accessing latency. To solve this problem, we use last two parts of uuid as prefix (/a8e2/6d188f753e29/) instead of relnode(/16384/) for data files, this will disperse data files of one table and take advantage of load balance of remote storage(oss). Currently, we do not remove any meta files and data files, metadata in catalog is also not deleted(vacuum is disabled), so that we can undo any operations, such as drop/truncate, also we can access any snapshot of table. --- src/backend/access/heap/pruneheap.c | 12 ++++++++++++ src/include/access/heapam.h | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 16d64b89283..f21e0aedbae 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -76,6 +76,9 @@ typedef struct int8 htsv[MaxHeapTuplesPerPage + 1]; } PruneState; +/* Hook for plugins to get control in heap_page_prune_opt */ +heap_page_prune_opt_hook_type heap_page_prune_opt_hook = NULL; + /* Local functions */ static HTSV_Result heap_prune_satisfies_vacuum(Relation relation, PruneState *prstate, HeapTuple tup, @@ -104,6 +107,15 @@ static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); */ void heap_page_prune_opt(Relation relation, Buffer buffer) +{ + if (heap_page_prune_opt_hook) + return (*heap_page_prune_opt_hook)(relation, buffer); + + heap_page_prune_opt_internal(relation, buffer); +} + +void +heap_page_prune_opt_internal(Relation relation, Buffer buffer) { Page page = BufferGetPage(buffer); TransactionId prune_xid; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index efae57af806..c2f1d206c64 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -101,6 +101,10 @@ typedef enum HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */ } HTSV_Result; +/* Hook for plugins to get control in heap_page_prune_opt */ +typedef void (*heap_page_prune_opt_hook_type)(Relation relation, Buffer buffer); +extern PGDLLIMPORT heap_page_prune_opt_hook_type heap_page_prune_opt_hook; + /* ---------------- * function prototypes for heap access method * @@ -190,6 +194,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel, /* in heap/pruneheap.c */ struct GlobalVisState; extern void heap_page_prune_opt(Relation relation, Buffer buffer); +extern void heap_page_prune_opt_internal(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, struct GlobalVisState *vistest, TransactionId old_snap_xmin, From af709ea80937cb199d3703b62f816da4f3fd27b2 Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 18 Jan 2024 12:02:58 +0800 Subject: [PATCH 048/152] Fix: refresh shared buffers and disable hint bits. Catalog is modified on QD, QE can only read catalog into shared buffers from UnionStore. When read catalog cached in shared buffers on QE, we need to validate the buffer to check if it is out of date. If current read LSN is equal or smaller than LSN of buffer, the buffer is valid; otherwise, the buffer is out of date, we need to re-read the page from UnionStore into shared buffers. In InitProcessing Mode, the postgres backends have newest snapshot to see all catalog changes before(dirty read), may set hint bits to tuples which are not visible for current snapshot. This will cause those tuples invisible for backends which should see those tuples, then cause data corrupted. To avoid this, we do not set hint bits in InitProcessing Mode. --- src/backend/access/heap/heapam_visibility.c | 10 ++++++++ src/backend/storage/buffer/bufmgr.c | 27 ++++++++++++--------- src/backend/storage/page/bufpage.c | 5 +++- src/include/storage/buf_internals.h | 4 +++ src/include/storage/bufmgr.h | 3 +++ 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 0119d76b41d..ca43c4bcfbd 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -180,6 +180,16 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer, Relation rel, { bool isXmin; + /* + * On QE, we can see any changes on catalog relations(dirty read) in InitProcessing Mode + * because of the latest snapshot, do not set hint bits. + */ + if (enable_serverless && IsInitProcessingMode() && + Gp_role == GP_ROLE_EXECUTE && GpIdentity.segindex != MASTER_CONTENT_ID) + { + return; + } + if (TransactionIdIsValid(xid)) { /* NB: xid must be known committed here! */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 787700ab34e..85e8c39d9c7 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -181,6 +181,9 @@ static bool IsForInput; /* local state for LockBufferForCleanup */ static BufferDesc *PinCountWaitBuf = NULL; +/* Hook for plugins to validate buffer in BufferAlloc() */ +BufferValidation_hook_type BufferValidation_hook = NULL; + /* * Backend-Private refcount management: * @@ -486,7 +489,6 @@ static uint32 WaitBufHdrUnlocked(BufferDesc *buf); static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context); static void WaitIO(BufferDesc *buf); -static bool StartBufferIO(BufferDesc *buf, bool forInput); static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); static void shared_buffer_write_error_callback(void *arg); @@ -1259,15 +1261,9 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* * The page in buffer may be out of date, we need to check the buffer * and refresh the buffer if the page has been modified. - */ - if (enable_serverless && Gp_role == GP_ROLE_EXECUTE && valid) - { - uint32 buf_state = LockBufHdr(buf); - buf_state &= ~(BM_VALID | BM_DIRTY); - UnlockBufHdr(buf, buf_state); - - valid = false; - } + */ + if (BufferValidation_hook) + return (*BufferValidation_hook)(buf, valid, foundPtr); if (!valid) { @@ -4686,7 +4682,7 @@ WaitIO(BufferDesc *buf) * Returns true if we successfully marked the buffer as I/O busy, * false if someone else already did the work. */ -static bool +bool StartBufferIO(BufferDesc *buf, bool forInput) { uint32 buf_state; @@ -5186,3 +5182,12 @@ BufferLockHeldByMe(Page page) return true; } #endif + +/* + * Get the buffer we were doing I/O on + */ +BufferDesc * +GetInProgressBuf(void) +{ + return InProgressBuf; +} diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 2e1927cfd6d..4a7b32d0bd9 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -102,7 +102,10 @@ PageIsVerifiedExtended(Page page, ForkNumber forknum, */ if (!PageIsNew(page)) { - if (DataChecksumsEnabled()) + /* + * In serverless architecture, the page is checked in smgrread. + */ + if (DataChecksumsEnabled() && !enable_serverless) { checksum = pg_checksum_page((char *) page, blkno); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 33cacc69272..def993131a2 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -240,6 +240,10 @@ typedef union BufferDescPadded extern PGDLLIMPORT ConditionVariableMinimallyPadded *BufferIOCVArray; +/* Hook for plugins to validate buffer in BufferAlloc() */ +typedef BufferDesc *(*BufferValidation_hook_type)(BufferDesc *buf, bool valid, bool *found); +extern PGDLLIMPORT BufferValidation_hook_type BufferValidation_hook; + /* * The freeNext field is either the index of the next freelist entry, * or one of these special values: diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 5de4515bd77..79bc675f37b 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -177,6 +177,8 @@ extern PGDLLIMPORT ReadBuffer_hook_type ReadBuffer_hook; /* * prototypes for functions in bufmgr.c */ +typedef struct BufferDesc BufferDesc; +extern bool StartBufferIO(BufferDesc *buf, bool forInput); extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln, ForkNumber forkNum, BlockNumber blockNum); @@ -256,6 +258,7 @@ extern bool IsBufferCleanupOK(Buffer buffer); extern bool HoldingBufferPinThatDelaysRecovery(void); extern void AbortBufferIO(void); +extern BufferDesc *GetInProgressBuf(void); extern void BufmgrCommit(void); extern bool BgBufferSync(struct WritebackContext *wb_context); From 9d2965d215286403568263d2754978ff638bdb14 Mon Sep 17 00:00:00 2001 From: oppenheimer Date: Fri, 19 Jan 2024 11:18:05 +0800 Subject: [PATCH 049/152] Add new hashdata am oid to RelationIsNonblockRelation Add a new am oid to RelationIsNonblockRelation, so that the new storage could access some specific code. The reason for disable update/delete trigger is that the cost of random access to these storages is too high. But only after trigger will call fetch api, so we should all before trigger for delete/update. --- src/backend/commands/trigger.c | 15 ++++++++++----- src/backend/executor/nodeModifyTable.c | 12 +++++------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 272bc285585..8176af18847 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -226,7 +226,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, * does not support constraint(deferred) trigger now. */ if (stmt->isconstraint && enable_serverless && - (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel))) + (RelationIsNonblockRelation(rel))) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), errmsg("\"%s\" is not a heap table and AO table", @@ -726,6 +726,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, NameListToString(stmt->funcname), "trigger"))); } +#ifndef SERVERLESS /* Check GPDB limitations */ if (RelationIsAppendOptimized(rel) && TRIGGER_FOR_ROW(tgtype) && @@ -740,6 +741,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ON DELETE triggers are not supported on append-only tables"))); } +#endif /* * Scan pg_trigger to see if there is already a trigger of the same name. @@ -3127,12 +3129,13 @@ GetTupleForTrigger(EState *estate, { Relation relation = relinfo->ri_RelationDesc; +#ifdef SERVERLESS /* * FIXME: table which is not a heap table and AO table does not support * concurrently update or delete. So we can fetch tuple directly * without locking tuple. */ - if(enable_serverless && (!RelationIsHeap(relation) && !RelationIsAppendOptimized(relation))) + if(!RelationIsHeap(relation) && !RelationIsAppendOptimized(relation)) { /* * We expect the tuple to be present, thus very simple error handling @@ -3143,11 +3146,13 @@ GetTupleForTrigger(EState *estate, elog(ERROR, "failed to fetch tuple for trigger"); return true; } +#else /* these should be rejected when you try to create such triggers, but let's check */ - if (RelationIsAppendOptimized(relation)) + if (RelationIsNonblockRelation(relation)) elog(ERROR, "UPDATE and DELETE triggers are not supported on append-only tables"); Assert(RelationIsHeap(relation)); +#endif if (epqslot != NULL) { @@ -4387,7 +4392,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, slot1 = slot2 = NULL; } if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || - (enable_serverless && (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel)))) + RelationIsNonblockRelation(rel)) { slot1 = MakeSingleTupleTableSlot(rel->rd_att, &TTSOpsMinimalTuple); @@ -5999,7 +6004,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * throught its ctid. */ if (row_trigger && (relkind == RELKIND_FOREIGN_TABLE || - (enable_serverless && (!RelationIsHeap(rel) && !RelationIsAppendOptimized(rel))))) + (enable_serverless && (RelationIsNonblockRelation(rel))))) { if (fdw_tuplestore == NULL) { diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 10608fd1099..b77861526ac 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2176,13 +2176,11 @@ lreplace:; } /* AFTER ROW UPDATE Triggers */ - /* GPDB: AO and AOCO tables don't support triggers */ - if (!RelationIsNonblockRelation(resultRelationDesc)) - ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, - recheckIndexes, - mtstate->operation == CMD_INSERT ? - mtstate->mt_oc_transition_capture : - mtstate->mt_transition_capture); + ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, + recheckIndexes, + mtstate->operation == CMD_INSERT ? + mtstate->mt_oc_transition_capture : + mtstate->mt_transition_capture); list_free(recheckIndexes); From 9a6138a3bda88a3a2389da16259371d61ef10bca Mon Sep 17 00:00:00 2001 From: liushengsong Date: Sun, 21 Jan 2024 22:23:44 +0800 Subject: [PATCH 050/152] Fix: collect stat from QE to QD In the hashdata cloud architecture, some DML like truncate will not execute in QE. Meanwhile, stat will lost when we switch warehoue. Thus, we collect stat from QE to QD which is different from GP rewritting view pg_stat_all_indexes throught gp_dist_random. For n_tup_ins, n_tup_upd and so on, we collect these stat in delta mode from each QE and combine and apply in QD. We reset n_tup_ins, n_tup_upd to 0 in QE to support transaction. For seq_scan, seq_tup_read and so on, we collect stat from all the slice to make the stat accurate. --- src/backend/cdb/dispatcher/cdbdisp.c | 1 - src/backend/cdb/dispatcher/cdbdisp_async.c | 2 + src/backend/commands/matview.c | 14 +++++ src/backend/postmaster/pgstat.c | 69 ++++++---------------- src/backend/tcop/postgres.c | 4 +- src/include/cdb/cdbdisp.h | 4 -- src/include/cdb/cdbdisp_async.h | 3 + src/include/pgstat.h | 57 +++++++++++++++++- 8 files changed, 97 insertions(+), 57 deletions(-) diff --git a/src/backend/cdb/dispatcher/cdbdisp.c b/src/backend/cdb/dispatcher/cdbdisp.c index 38382fcae5e..813e77dc7e5 100644 --- a/src/backend/cdb/dispatcher/cdbdisp.c +++ b/src/backend/cdb/dispatcher/cdbdisp.c @@ -43,7 +43,6 @@ static char * segmentsListToString(const char *prefix, List *segments); DispatcherInternalFuncs *pDispatchFuncs = &DispatcherAsyncFuncs; -PGResStausOK_hook_type PGResStausOK_hook = NULL; ProcessDekInfo_hook_type ProcessDekInfo_hook = NULL; /* diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index d75426684eb..a6f275289fc 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -46,6 +46,8 @@ #include "utils/timestamp.h" #define DISPATCH_WAIT_TIMEOUT_MSEC 2000 +PGResStausOK_hook_type PGResStausOK_hook = NULL; + /* * Ideally, we should set timeout to zero to cancel QEs as soon as possible, * but considering the cost of sending cancel signal is high, we want to process diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 7a02fe9e3ec..15fbf7f5c66 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -1041,6 +1041,20 @@ transientrel_shutdown(DestReceiver *self) table_close(matviewRel, NoLock); } + else if(enable_serverless && Gp_role == GP_ROLE_DISPATCH && !myState->concurrent) + { + Relation matviewRel; + + matviewRel = table_open(myState->oldreloid, NoLock); + + /* + * In serverless architecture, QD should collect trucate stat. And QE will + * collect insert stat and send to QD. we combine the stat in + * pgstat_combine_from_qe. + */ + pgstat_count_truncate(matviewRel); + table_close(matviewRel, NoLock); + } } /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 309a101fe02..4fc6d904818 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -75,7 +75,6 @@ #include "libpq-int.h" #include "cdb/cdbconn.h" -#include "cdb/cdbdispatchresult.h" #include "cdb/cdbvars.h" #include "commands/resgroupcmds.h" #include "libpq/pqformat.h" @@ -214,19 +213,10 @@ typedef struct TabStatusArray static TabStatusArray *pgStatTabList = NULL; -/* - * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer - */ -typedef struct TabStatHashEntry -{ - Oid t_id; - PgStat_TableStatus *tsa_entry; -} TabStatHashEntry; - /* * Hash table for O(1) t_id -> tsa_entry lookup */ -static HTAB *pgStatTabHash = NULL; +HTAB *pgStatTabHash = NULL; /* * Backends store per-function info that's waiting to be sent to the collector @@ -240,21 +230,7 @@ static HTAB *pgStatFunctions = NULL; */ static bool have_function_stats = false; -/* - * Tuple insertion/deletion counts for an open transaction can't be propagated - * into PgStat_TableStatus counters until we know if it is going to commit - * or abort. Hence, we keep these counts in per-subxact structs that live - * in TopTransactionContext. This data structure is designed on the assumption - * that subxacts won't usually modify very many tables. - */ -typedef struct PgStat_SubXactStatus -{ - int nest_level; /* subtransaction nest level */ - struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */ - PgStat_TableXactStatus *first; /* head of list for this subxact */ -} PgStat_SubXactStatus; - -static PgStat_SubXactStatus *pgStatXactStack = NULL; +PgStat_SubXactStatus *pgStatXactStack = NULL; static int pgStatXactCommit = 0; static int pgStatXactRollback = 0; @@ -265,25 +241,6 @@ PgStat_Counter pgStatActiveTime = 0; PgStat_Counter pgStatTransactionIdleTime = 0; SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL; -/* Record that's written to 2PC state file when pgstat state is persisted */ -typedef struct TwoPhasePgStatRecord -{ - PgStat_Counter tuples_inserted; /* tuples inserted in xact */ - PgStat_Counter tuples_updated; /* tuples updated in xact */ - PgStat_Counter tuples_deleted; /* tuples deleted in xact */ - PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */ - PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */ - PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */ - Oid t_id; /* table's OID */ - bool t_shared; /* is it a shared catalog? */ - bool t_truncated; /* was the relation truncated? */ -} TwoPhasePgStatRecord; - -typedef struct PgStatTabRecordFromQE -{ - TwoPhasePgStatRecord table_stat; - int nest_level; -} PgStatTabRecordFromQE; /* * Info about current "snapshot" of stats file @@ -354,8 +311,6 @@ static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid); static bool pgstat_should_report_connstat(void); static void pgstat_report_disconnect(Oid dboid); -static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); - static void pgstat_setup_memcxt(void); static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype); @@ -388,6 +343,13 @@ static void pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len); static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); + +/* Hook for plugins to get control in pgstat_send_qd_tabstats() */ +pgstat_send_qd_tabstats_hook_type pgstat_send_qd_tabstats_hook = NULL; + +/* Hook for plugins to get control in pgstat_combine_from_qe() */ +pgstat_combine_from_qe_hook_type pgstat_combine_from_qe_hook = NULL; + /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ @@ -2088,7 +2050,7 @@ pgstat_initstats(Relation rel) /* * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel */ -static PgStat_TableStatus * +PgStat_TableStatus * get_tabstat_entry(Oid rel_id, bool isshared) { TabStatHashEntry *hash_entry; @@ -2196,7 +2158,7 @@ find_tabstat_entry(Oid rel_id) /* * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed */ -static PgStat_SubXactStatus * +PgStat_SubXactStatus * get_tabstat_stack_level(int nest_level) { PgStat_SubXactStatus *xact_state; @@ -2218,7 +2180,7 @@ get_tabstat_stack_level(int nest_level) /* * add_tabstat_xact_level - add a new (sub)transaction state record */ -static void +void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level) { PgStat_SubXactStatus *xact_state; @@ -3129,6 +3091,9 @@ pgstat_send_bgwriter(void) void pgstat_send_qd_tabstats(void) { + if(pgstat_send_qd_tabstats_hook) + return pgstat_send_qd_tabstats_hook(); + int nest_level; StringInfoData buf; StringInfoData stat_data; @@ -3327,6 +3292,10 @@ pgstat_combine_one_qe_result(List **oidList, struct pg_result *pgresult, void pgstat_combine_from_qe(CdbDispatchResults *results, int writerSliceIndex) { + + if(pgstat_combine_from_qe_hook) + return pgstat_combine_from_qe_hook(results, writerSliceIndex); + CdbDispatchResult *dispatchResult; CdbDispatchResult *resultEnd; struct pg_result *pgresult; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 52bfab8bb57..e146453c77d 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1435,8 +1435,10 @@ exec_mpp_query(const char *query_string, /* * If writer QE, sent current pgstat for tables to QD. + * In serverless architecture, all the slice send their stat like + * seq_scan to QD. */ - if (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer) + if (Gp_role == GP_ROLE_EXECUTE && (Gp_is_writer || enable_serverless)) pgstat_send_qd_tabstats(); (*receiver->rDestroy) (receiver); diff --git a/src/include/cdb/cdbdisp.h b/src/include/cdb/cdbdisp.h index 9a7de7f8975..4a84b169b94 100644 --- a/src/include/cdb/cdbdisp.h +++ b/src/include/cdb/cdbdisp.h @@ -26,7 +26,6 @@ struct CdbPgResults; struct Gang; /* #include "cdb/cdbgang.h" */ struct ResourceOwnerData; enum GangType; -enum ExecStatusType; /* * Types of message to QE when we wait for it. @@ -218,9 +217,6 @@ segmentsToContentStr(List *segments); extern void SetupDispatchFuncs(DispatcherInternalFuncs *dispatcherInternalFuncs); -typedef bool (*PGResStausOK_hook_type) (enum ExecStatusType statusType); -extern PGDLLIMPORT PGResStausOK_hook_type PGResStausOK_hook; - extern DispatcherInternalFuncs *pDispatchFuncs; #endif /* CDBDISP_H */ diff --git a/src/include/cdb/cdbdisp_async.h b/src/include/cdb/cdbdisp_async.h index 375d0048ef6..a62ef6abee0 100644 --- a/src/include/cdb/cdbdisp_async.h +++ b/src/include/cdb/cdbdisp_async.h @@ -38,4 +38,7 @@ extern void cdbdisp_waitDispatchFinish_async(struct CdbDispatcherState *ds); extern bool cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds); extern int *cdbdisp_getWaitSocketFds_async(struct CdbDispatcherState *ds, int *nsocks); + +typedef bool (*PGResStausOK_hook_type) (ExecStatusType statusType); +extern PGDLLIMPORT PGResStausOK_hook_type PGResStausOK_hook; #endif diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c54b1bb369a..8d2796caf8b 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -21,6 +21,7 @@ #include "utils/wait_event.h" /* for backward compatibility */ #include "postmaster/autostats.h" +#include "cdb/cdbdispatchresult.h" /* ---------- @@ -980,6 +981,48 @@ typedef struct PgStat_FunctionCallUsage instr_time f_start; } PgStat_FunctionCallUsage; +/* + * Tuple insertion/deletion counts for an open transaction can't be propagated + * into PgStat_TableStatus counters until we know if it is going to commit + * or abort. Hence, we keep these counts in per-subxact structs that live + * in TopTransactionContext. This data structure is designed on the assumption + * that subxacts won't usually modify very many tables. + */ +typedef struct PgStat_SubXactStatus +{ + int nest_level; /* subtransaction nest level */ + struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */ + PgStat_TableXactStatus *first; /* head of list for this subxact */ +} PgStat_SubXactStatus; + +/* Record that's written to 2PC state file when pgstat state is persisted */ +typedef struct TwoPhasePgStatRecord +{ + PgStat_Counter tuples_inserted; /* tuples inserted in xact */ + PgStat_Counter tuples_updated; /* tuples updated in xact */ + PgStat_Counter tuples_deleted; /* tuples deleted in xact */ + PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */ + PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */ + PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */ + Oid t_id; /* table's OID */ + bool t_shared; /* is it a shared catalog? */ + bool t_truncated; /* was the relation truncated? */ +} TwoPhasePgStatRecord; + +typedef struct PgStatTabRecordFromQE +{ + TwoPhasePgStatRecord table_stat; + int nest_level; +} PgStatTabRecordFromQE; + +/* + * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer + */ +typedef struct TabStatHashEntry +{ + Oid t_id; + PgStat_TableStatus *tsa_entry; +} TabStatHashEntry; /* ---------- * GUC parameters @@ -1017,12 +1060,15 @@ extern PgStat_Counter pgStatBlockWriteTime; extern PgStat_Counter pgStatActiveTime; extern PgStat_Counter pgStatTransactionIdleTime; - /* * Updated by the traffic cop and in errfinish() */ extern SessionEndType pgStatSessionEndCause; +extern PgStat_SubXactStatus *pgStatXactStack; + +extern HTAB *pgStatTabHash; + /* ---------- * Functions called from postmaster * ---------- @@ -1291,5 +1337,14 @@ extern void pgstat_count_slru_flush(int slru_idx); extern void pgstat_count_slru_truncate(int slru_idx); extern const char *pgstat_slru_name(int slru_idx); extern int pgstat_slru_index(const char *name); +extern PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); +extern PgStat_SubXactStatus *get_tabstat_stack_level(int nest_level); +extern void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level); + + +typedef void (*pgstat_send_qd_tabstats_hook_type) (void); +extern pgstat_send_qd_tabstats_hook_type pgstat_send_qd_tabstats_hook; +typedef void (*pgstat_combine_from_qe_hook_type)(CdbDispatchResults *results, int writerSliceIndex) ; +extern pgstat_combine_from_qe_hook_type pgstat_combine_from_qe_hook; #endif /* PGSTAT_H */ From 4b9df108b2dd9384e6eafdc12261a99bafcfada5 Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Mon, 13 May 2024 09:50:18 +0800 Subject: [PATCH 051/152] Fix: storage the keys in the dfs tablespace --- gpcontrib/gp_toolkit/sql/tde_test.sql | 38 +++++++++++++++ src/backend/commands/dbcommands.c | 34 +++----------- src/backend/commands/tablespace.c | 8 +--- src/backend/utils/cache/relcache.c | 4 ++ src/bin/initdb/initdb.c | 26 +++++++++++ src/include/commands/dbcommands.h | 18 ++++++-- src/include/commands/tablespace.h | 4 +- src/include/utils/relcache.h | 3 ++ src/test/regress/expected/tde_test.out | 64 ++++++++++++++++++++++++++ 9 files changed, 162 insertions(+), 37 deletions(-) create mode 100644 gpcontrib/gp_toolkit/sql/tde_test.sql create mode 100644 src/test/regress/expected/tde_test.out diff --git a/gpcontrib/gp_toolkit/sql/tde_test.sql b/gpcontrib/gp_toolkit/sql/tde_test.sql new file mode 100644 index 00000000000..46df0ca3fd0 --- /dev/null +++ b/gpcontrib/gp_toolkit/sql/tde_test.sql @@ -0,0 +1,38 @@ +-- start_matchsubs +-- s/database oid:\d+/database oid:#####/ +-- m/database oid:\s+[0-9].*/ +-- s/database oid:\s+[0-9].*/database oid:/ +-- end_matchsubs +set tde_get_master_key_from_cm to false; +DROP DATABASE IF EXISTS encryptdb1; + +CREATE DATABASE encryptdb1 WITH ENCRYPTION_ENABLE 'aes' TABLESPACE regress_oss_test; + +select datname,is_encrypt,dek_version from hashdata_encrypt_database where datname = 'encryptdb1'; + +\c encryptdb1 +set tde_get_master_key_from_cm to false; +set warehouse to test; + +-- success +create table t1 (id int) with (storage_format=vbf) tablespace regress_oss_test; + +insert into t1 select generate_series(1,5); + +select * from t1 order by id; + +set debug_tde_print_encrypt_data = true; + +insert into t1 select generate_series(6,10); + +select * from t1 order by id; + +drop table t1; + +-- fail, do not setting the tablespace of database, can not create the encrypted db. +reset default_tablespace; +CREATE DATABASE encryptdb3 WITH ENCRYPTION_ENABLE 'aes'; + +\c postgres +set tde_get_master_key_from_cm to false; +DROP DATABASE encryptdb1; \ No newline at end of file diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index a4c30f9d154..a8b4c34154a 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -83,12 +83,6 @@ #include "utils/pg_rusage.h" -typedef struct -{ - Oid src_dboid; /* source (template) DB */ - Oid dest_dboid; /* DB we are trying to create */ -} createdb_failure_params; - /* * GPDB: A different cleanup mechanism is used. Refer comment in movedb(). */ @@ -101,7 +95,6 @@ typedef struct #endif /* non-export function prototypes */ -static void createdb_failure_callback(int code, Datum arg); static void movedb(const char *dbname, const char *tblspcname); /* * GPDB: A different cleanup mechanism is used. Refer comment in movedb(). @@ -109,18 +102,8 @@ static void movedb(const char *dbname, const char *tblspcname); #if 0 static void movedb_failure_callback(int code, Datum arg); #endif -static bool get_db_info(const char *name, LOCKMODE lockmode, - Oid *dbIdP, Oid *ownerIdP, - int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, - Oid *dbLastSysOidP, TransactionId *dbFrozenXidP, - MultiXactId *dbMinMultiP, - Oid *dbTablespace, char **dbCollate, char **dbCtype); -static bool have_createdb_privilege(void); static void remove_dbtablespaces(Oid db_id); -static bool check_db_file_conflict(Oid db_id); -static int errdetail_busy_db(int notherbackends, int npreparedxacts); -CreateDb_hook_type CreateDb_hook = NULL; DropDb_hook_type DropDb_hook = NULL; /* * CREATE DATABASE @@ -645,9 +628,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbname); } - if (CreateDb_hook) - CreateDb_hook(stmt, dboid); - if (shouldDispatch) { elog(DEBUG5, "shouldDispatch = true, dbOid = %d", dboid); @@ -899,7 +879,7 @@ check_encoding_locale_matches(int encoding, const char *collate, const char *cty } /* Error cleanup callback for createdb */ -static void +void createdb_failure_callback(int code, Datum arg) { createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg); @@ -989,6 +969,8 @@ dropdb(const char *dbname, bool missing_ok, bool force) /* DROP hook for the database being removed */ InvokeObjectDropHook(DatabaseRelationId, db_id, 0); + if (DropDb_hook) + DropDb_hook(db_id); /* * Disallow dropping a DB that is marked istemplate. This is just to * prevent people from accidentally dropping template0 or template1; they @@ -1181,8 +1163,6 @@ dropdb(const char *dbname, bool missing_ok, bool force) */ ForceSyncCommit(); - if (DropDb_hook) - DropDb_hook(db_id); } @@ -2061,7 +2041,7 @@ AlterDatabaseOwner(const char *dbname, Oid newOwnerId) * parameters that aren't NULL, and return true. If no such database, * return false. */ -static bool +bool get_db_info(const char *name, LOCKMODE lockmode, Oid *dbIdP, Oid *ownerIdP, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, @@ -2184,7 +2164,7 @@ get_db_info(const char *name, LOCKMODE lockmode, } /* Check if current user has createdb privileges */ -static bool +bool have_createdb_privilege(void) { bool result = false; @@ -2299,7 +2279,7 @@ remove_dbtablespaces(Oid db_id) * database. This exactly parallels what GetNewRelFileNode() does for table * relfilenode values. */ -static bool +bool check_db_file_conflict(Oid db_id) { bool result = false; @@ -2342,7 +2322,7 @@ check_db_file_conflict(Oid db_id) /* * Issue a suitable errdetail message for a busy database */ -static int +int errdetail_busy_db(int notherbackends, int npreparedxacts) { if (notherbackends > 0 && npreparedxacts > 0) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index ef8bd610e0b..de6f0e93471 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -112,13 +112,9 @@ char *default_tablespace = NULL; char *temp_tablespaces = NULL; - -static void create_tablespace_directories(const char *location, - const Oid tablespaceoid); static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo); static bool is_tablespace_empty(const Oid tablespace_oid); -static void ensure_tablespace_directory_is_empty(const Oid tablespaceoid, const char *tablespace_name); static void unlink_during_redo(Oid tablepace_oid_to_unlink); static void unlink_without_redo(Oid tablespace_oid_to_unlink); @@ -605,7 +601,7 @@ is_tablespace_empty(const Oid tablespace_oid) } -static void +void ensure_tablespace_directory_is_empty(const Oid tablespace_oid, const char *tablespace_name) { if (tablespace_oid == InvalidOid) @@ -850,7 +846,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) * Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/ * to the specified directory */ -static void +void create_tablespace_directories(const char *location, const Oid tablespaceoid) { char *linkloc; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a7ba7f5a4ca..9e1e0cd88ac 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -286,6 +286,8 @@ typedef struct opclasscacheent static HTAB *OpClassCache = NULL; +write_relcache_init_file_hook_type write_relcache_init_file_hook = NULL; + /* non-export function prototypes */ static void RelationDestroyRelation(Relation relation, bool remember_tupdesc); @@ -6557,6 +6559,8 @@ write_relcache_init_file(bool shared) RelIdCacheEnt *idhentry; int i; + if (write_relcache_init_file_hook && write_relcache_init_file_hook()) + return; /* * If we have already received any relcache inval events, there's no * chance of succeeding so we may as well skip the whole thing. diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 42c22a0690a..2de10f76499 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2194,6 +2194,28 @@ make_postgres(FILE *cmdfd) PG_CMD_PUTS(*line); } +/* + * copy template1 to postgres + */ +static void +make_hashdatadb(FILE *cmdfd) +{ + const char *const *line; + static const char *const postgres_setup[] = { + "CREATE DATABASE hashdatadb;\n\n", + "COMMENT ON DATABASE hashdatadb IS 'default administrative connection database';\n\n", + /* + * Clean out dead rows in pg_database + */ + "VACUUM FULL pg_database;\n\n", + NULL + }; + + for (line = postgres_setup; *line; line++) + PG_CMD_PUTS(*line); +} + + /* * signal handler in case we are interrupted. * @@ -3265,6 +3287,10 @@ initialize_data_directory(void) */ vacuum_db(cmdfd); +#ifdef SERVERLESS + make_hashdatadb(cmdfd); +#endif + PG_CMD_CLOSE; check_ok(); diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 51556810e2a..334ba301244 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -18,9 +18,12 @@ #include "catalog/objectaddress.h" #include "lib/stringinfo.h" #include "parser/parse_node.h" +typedef struct +{ + Oid src_dboid; /* source (template) DB */ + Oid dest_dboid; /* DB we are trying to create */ +} createdb_failure_params; -typedef void(*CreateDb_hook_type)(const CreatedbStmt *stmt, Oid dbOid); -extern PGDLLIMPORT CreateDb_hook_type CreateDb_hook; typedef void(*DropDb_hook_type)(Oid dbOid); extern PGDLLIMPORT DropDb_hook_type DropDb_hook; @@ -36,5 +39,14 @@ extern Oid get_database_oid(const char *dbname, bool missing_ok); extern char *get_database_name(Oid dbid); extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype); - +extern bool get_db_info(const char *name, LOCKMODE lockmode, + Oid *dbIdP, Oid *ownerIdP, + int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, + Oid *dbLastSysOidP, TransactionId *dbFrozenXidP, + MultiXactId *dbMinMultiP, + Oid *dbTablespace, char **dbCollate, char **dbCtype); +extern bool check_db_file_conflict(Oid db_id); +extern bool have_createdb_privilege(void); +extern int errdetail_busy_db(int notherbackends, int npreparedxacts); +extern void createdb_failure_callback(int code, Datum arg); #endif /* DBCOMMANDS_H */ diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h index 1f41964cf75..24b1800f890 100644 --- a/src/include/commands/tablespace.h +++ b/src/include/commands/tablespace.h @@ -69,5 +69,7 @@ extern void remove_tablespace_symlink(const char *linkloc); extern void tblspc_redo(XLogReaderState *rptr); extern void tblspc_desc(StringInfo buf, XLogReaderState *rptr); extern const char *tblspc_identify(uint8 info); - +extern void create_tablespace_directories(const char *location, + const Oid tablespaceoid); +extern void ensure_tablespace_directory_is_empty(const Oid tablespaceoid, const char *tablespace_name); #endif /* TABLESPACE_H */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 1849746c0d7..466ea462b9b 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -26,6 +26,9 @@ */ #define RELCACHE_INIT_FILENAME "pg_internal.init" +typedef bool (*write_relcache_init_file_hook_type) (void); +extern PGDLLIMPORT write_relcache_init_file_hook_type write_relcache_init_file_hook; + typedef struct RelationData *Relation; /* ---------------- diff --git a/src/test/regress/expected/tde_test.out b/src/test/regress/expected/tde_test.out new file mode 100644 index 00000000000..74e624ea0a3 --- /dev/null +++ b/src/test/regress/expected/tde_test.out @@ -0,0 +1,64 @@ +-- start_matchsubs +-- s/database oid:\d+/database oid:#####/ +-- m/database oid:\s+[0-9].*/ +-- s/database oid:\s+[0-9].*/database oid:/ +-- end_matchsubs +set tde_get_master_key_from_cm to false; +DROP DATABASE IF EXISTS encryptdb1; +NOTICE: database "encryptdb1" does not exist, skipping +CREATE DATABASE encryptdb1 WITH ENCRYPTION_ENABLE 'aes' TABLESPACE regress_oss_test; +select datname,is_encrypt,dek_version from hashdata_encrypt_database where datname = 'encryptdb1'; + datname | is_encrypt | dek_version +------------+------------+------------- + encryptdb1 | t | 1 +(1 row) + +\c encryptdb1 +set tde_get_master_key_from_cm to false; +set warehouse to test; +-- success +create table t1 (id int) with (storage_format=vbf) tablespace regress_oss_test; +insert into t1 select generate_series(1,5); +select * from t1 order by id; + id +---- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +set debug_tde_print_encrypt_data = true; +insert into t1 select generate_series(6,10); +NOTICE: Encrypt data BLock, the dek_version:1 (seg1 127.0.0.1:5435 pid=31493) +NOTICE: Encrypt data BLock, the dek_version:1 (seg0 127.0.0.1:5434 pid=31491) +NOTICE: Encrypt data BLock, the dek_version:1 (seg2 127.0.0.1:5436 pid=31492) +select * from t1 order by id; +NOTICE: Decrypt data Block,the dek_version:1 (seg0 slice1 127.0.0.1:5434 pid=31491) +NOTICE: Decrypt data Block,the dek_version:1 (seg0 slice1 127.0.0.1:5434 pid=31491) +NOTICE: Decrypt data Block,the dek_version:1 (seg2 slice1 127.0.0.1:5436 pid=31492) +NOTICE: Decrypt data Block,the dek_version:1 (seg1 slice1 127.0.0.1:5435 pid=31493) +NOTICE: Decrypt data Block,the dek_version:1 (seg1 slice1 127.0.0.1:5435 pid=31493) + id +---- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +drop table t1; +-- fail, do not setting the tablespace of database, can not create the encrypted db. +reset default_tablespace; +CREATE DATABASE encryptdb3 WITH ENCRYPTION_ENABLE 'aes'; +ERROR: can not create encrypt database, need to use the dfs tablespace, database oid:114771, tspc_oid:1663 (keys_manager.cc:1019) +\c postgres +set tde_get_master_key_from_cm to false; +DROP DATABASE encryptdb1; From b3a2278e885b5608b9741b056301f8113071f80a Mon Sep 17 00:00:00 2001 From: oppenheimer Date: Wed, 24 Jan 2024 11:46:36 +0800 Subject: [PATCH 052/152] Enable delete after trigger --- src/backend/cdb/cdbpath.c | 2 +- src/backend/commands/trigger.c | 1 - src/backend/executor/nodeModifyTable.c | 6 ++++-- src/backend/gpopt/gpdbwrappers.cpp | 4 ++-- src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp | 4 ++-- src/backend/optimizer/util/appendinfo.c | 3 ++- src/backend/utils/cache/lsyscache.c | 5 +++-- src/include/gpopt/gpdbwrappers.h | 2 +- src/include/utils/lsyscache.h | 2 +- 9 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index e6b60509342..0e51ffc8c77 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2863,7 +2863,7 @@ make_splitupdate_path(PlannerInfo *root, Path *subpath, Index rti) * So an update trigger is not allowed when updating the * distribution key. */ - if (has_update_triggers(rte->relid, false)) + if (has_update_delete_triggers(rte->relid)) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), errmsg("UPDATE on distributed key column not allowed on relation with update triggers"))); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 8176af18847..0b8d0e8b95b 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2668,7 +2668,6 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, { TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) GetTupleForTrigger(estate, NULL, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index b77861526ac..e548faae576 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2768,7 +2768,8 @@ ExecModifyTable(PlanState *pstate) * PAX_STORAGE_FIXME(gongxun):we reuse the logic of the AO table to implement ExecUpdate, * If there is a better implementation, we need to revert it */ - if (operation == CMD_UPDATE && RelationIsNonblockRelation(resultRelInfo->ri_RelationDesc) && + if ((operation == CMD_UPDATE || operation == CMD_DELETE) && + RelationIsNonblockRelation(resultRelInfo->ri_RelationDesc) && AttributeNumberIsValid(resultRelInfo->ri_WholeRowNo)) { /* ri_WholeRowNo refers to a wholerow attribute */ @@ -3280,7 +3281,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) elog(ERROR, "could not find junk ctid column"); /* extra GPDB junk columns for update AO table */ - if (operation == CMD_UPDATE && RelationIsNonblockRelation(resultRelInfo->ri_RelationDesc)) + if ((operation == CMD_UPDATE || operation == CMD_DELETE) + && RelationIsNonblockRelation(resultRelInfo->ri_RelationDesc)) { resultRelInfo->ri_WholeRowNo = ExecFindJunkAttributeInTlist(subplan->targetlist, "wholerow"); diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index ae17549da77..e47fdcb7caa 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -2170,11 +2170,11 @@ gpdb::CheckRTPermissions(List *rtable) // check that a table doesn't have UPDATE triggers. bool -gpdb::HasUpdateTriggers(Oid relid) +gpdb::HasUpdateDeleteTriggers(Oid relid) { GP_WRAP_START; { - return has_update_triggers(relid, true); + return has_update_delete_triggers(relid); } GP_WRAP_END; return false; diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 1408e5eaa49..7c7e72d1907 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -4737,8 +4737,8 @@ CTranslatorDXLToPlStmt::TranslateDXLDml( m_is_tgt_tbl_distributed = true; } - if (CMD_UPDATE == m_cmd_type && - gpdb::HasUpdateTriggers(CMDIdGPDB::CastMdid(mdid_target_table)->Oid())) + if ((CMD_UPDATE == m_cmd_type || CMD_DELETE == m_cmd_type) && + gpdb::HasUpdateDeleteTriggers(CMDIdGPDB::CastMdid(mdid_target_table)->Oid())) { GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("UPDATE on a table with UPDATE triggers")); diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 2e7ff349e77..856d758f891 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -953,7 +953,8 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex, * redesigning AO/AOCS storage format or making the update plan is * consistent whether it generated by pg optimizer or ORCA optimizer. */ - if (commandType == CMD_UPDATE && RelationIsNonblockRelation(target_relation)) + if ((commandType == CMD_UPDATE || commandType == CMD_DELETE) && + RelationIsNonblockRelation(target_relation)) { var = makeVar(rtindex, InvalidAttrNumber, diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 25cefe8bc91..ea12c46de54 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1753,7 +1753,7 @@ get_oprjoin(Oid opno) * So having an extra parameter including_children only for ORCA. */ bool -has_update_triggers(Oid relid, bool including_children) +has_update_delete_triggers(Oid relid) { Relation relation; bool result = false; @@ -1773,7 +1773,8 @@ has_update_triggers(Oid relid, bool including_children) { Trigger trigger = relation->trigdesc->triggers[i]; found = trigger_enabled(trigger.tgoid) && - (get_trigger_type(trigger.tgoid) & TRIGGER_TYPE_UPDATE) == TRIGGER_TYPE_UPDATE; + ((get_trigger_type(trigger.tgoid) & TRIGGER_TYPE_UPDATE) == TRIGGER_TYPE_UPDATE || + (get_trigger_type(trigger.tgoid) & TRIGGER_TYPE_DELETE) == TRIGGER_TYPE_DELETE); if (found) { result = true; diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index 188b6834b7b..43cb59c3bb3 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -577,7 +577,7 @@ unsigned int CdbHashRandomSeg(int num_segments); void CheckRTPermissions(List *rtable); // throw an error if table has update triggers. -bool HasUpdateTriggers(Oid relid); +bool HasUpdateDeleteTriggers(Oid relid); // get index operator family properties void IndexOpProperties(Oid opno, Oid opfamily, StrategyNumber *strategynumber, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 51224853cef..afdb2680a5a 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -140,7 +140,7 @@ extern Oid get_commutator(Oid opno); extern Oid get_negator(Oid opno); extern RegProcedure get_oprrest(Oid opno); extern RegProcedure get_oprjoin(Oid opno); -extern bool has_update_triggers(Oid relid, bool including_children); +extern bool has_update_delete_triggers(Oid relid); extern int32 get_trigger_type(Oid triggerid); extern bool trigger_enabled(Oid triggerid); extern char *get_func_name(Oid funcid); From ba27c51ace379e699757b99bf518a59760c2b3ed Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 25 Jan 2024 19:23:04 +0800 Subject: [PATCH 053/152] Fix: parallel regression test failures 1. QD send invalid msgs at end of transaction(commit), and QE will handle those invalid msgs. In some corner cases, such as RemoveTempRelationsCallback, QD will commit transaction and exit quickly. If QD exits inprogress, QD will close the connection immediately after sending the invalid msgs, and commit the transaction. QE handles the invalid msg, receives the cancel request because of connection closed by QD, then abort current transaction, but current transaction is commited by QD, QE gets PANIC 'cannot abort transaction xx, it was already committed'. To avoid this, QD do not send invalid msgs if exits in progress, and this will not affect QE because QE will exit immediately after connection closed. 2. Vacuum on QE will collect number of tuples and pages, then send back to QD, QD combine those stats from QE and update the info in pg_class. 3. Readers on QD use shared local snapshot to do MVCC, QD sets up the shared local snapshot for readers. 4. Add hook build_gpqeid_param_hook/parse_gpqeid_params_hook to send params to new connections. Currently, we send ReadLSN for new launched backends. --- src/backend/access/heap/vacuumlazy.c | 12 +++++++++++- src/backend/access/transam/xlog.c | 6 ++++++ src/backend/cdb/dispatcher/cdbdisp_dtx.c | 6 ++++++ src/backend/cdb/dispatcher/cdbgang.c | 18 ++++++++++++++++++ src/include/cdb/cdbdtxcontextinfo.h | 2 ++ src/include/cdb/cdbgang.h | 7 +++++++ 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 44538b626c6..4902746b320 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1514,7 +1514,17 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) else if (all_visible_according_to_vm && !PageIsAllVisible(page) && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer)) { - elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", + int log_level = WARNING; + + /* + * In serverless architecture, FSM is not WAL-logged together with corresponding page, but WAL-logged + * when FSM page is evicted. It's possible that the visibility map bit is set but the page-level bit is + * clear, so set the LOG_LEVEL to LOG to omit this case. + */ + if (enable_serverless) + log_level = LOG; + + elog(log_level, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", vacrel->relname, blkno); visibilitymap_clear(vacrel->rel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5e1b175ebf5..b5a12de91c9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4022,6 +4022,12 @@ PreallocXlogFiles(XLogRecPtr endptr) bool use_existent; uint64 offset; + /* + * In serverless architecture, do not need xlog files any more. + */ + if (enable_serverless) + return; + XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size); offset = XLogSegmentOffset(endptr - 1, wal_segment_size); if (offset >= (uint32) (0.75 * wal_segment_size)) diff --git a/src/backend/cdb/dispatcher/cdbdisp_dtx.c b/src/backend/cdb/dispatcher/cdbdisp_dtx.c index bee9ee61439..7d60c7b4114 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_dtx.c +++ b/src/backend/cdb/dispatcher/cdbdisp_dtx.c @@ -290,3 +290,9 @@ buildGpDtxProtocolCommand(DispatchCommandDtxProtocolParms *pDtxProtocolParms, MemoryContextSwitchTo(oldContext); return shared_query; } + +DtxContextInfo * +GetTempQDDtxContext(void) +{ + return &TempQDDtxContextInfo; +} \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 780ddef0f42..77ef6f5d5ee 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -82,6 +82,12 @@ static bool NeedResetSession = false; static Oid OldTempNamespace = InvalidOid; static Oid OldTempToastNamespace = InvalidOid; +/* Hook for plugins to get control in build_gpqeid_param() */ +build_gpqeid_param_hook_type build_gpqeid_param_hook = NULL; + +/* Hook for plugins to get control in cdbgang_parse_gpqeid_params() */ +parse_gpqeid_params_hook_type parse_gpqeid_params_hook = NULL; + /* * cdbgang_createGang: * @@ -500,6 +506,9 @@ build_gpqeid_param(char *buf, int bufsz, (is_writer ? "true" : "false"), identifier, hostSegs, icHtabSize, qeidx); + if (build_gpqeid_param_hook) + len = (*build_gpqeid_param_hook)(buf + len, bufsz - len, len); + return (len > 0 && len < bufsz); } @@ -577,6 +586,15 @@ cdbgang_parse_gpqeid_params(struct Port *port pg_attribute_unused(), qe_idx = (int) strtol(cp, NULL, 10); } + if (parse_gpqeid_params_hook) + { + while(gpqeid_next_param(&cp, &np)) + { + if (!(*parse_gpqeid_params_hook)(cp)) + break; + } + } + /* Too few items, or too many? */ if (!cp || np) goto bad; diff --git a/src/include/cdb/cdbdtxcontextinfo.h b/src/include/cdb/cdbdtxcontextinfo.h index 57aeeffa2ae..d8b54d10e25 100644 --- a/src/include/cdb/cdbdtxcontextinfo.h +++ b/src/include/cdb/cdbdtxcontextinfo.h @@ -49,4 +49,6 @@ extern void DtxContextInfo_Deserialize(const char *serializedDtxContextInfo, DtxContextInfo *dtxContextInfo); extern void DtxContextInfo_Copy(DtxContextInfo *target, DtxContextInfo *source); + +extern DtxContextInfo *GetTempQDDtxContext(void); #endif /* CDBDTXCONTEXTINFO_H */ diff --git a/src/include/cdb/cdbgang.h b/src/include/cdb/cdbgang.h index ecf72793cc3..15d09834b70 100644 --- a/src/include/cdb/cdbgang.h +++ b/src/include/cdb/cdbgang.h @@ -53,6 +53,13 @@ extern int qe_idx; extern MemoryContext GangContext; extern Gang *CurrentGangCreating; +/* Hook for plugins to get control in build_gpqeid_param() */ +typedef int (*build_gpqeid_param_hook_type) (char *buf, int bufsz, int buf_len); +extern PGDLLIMPORT build_gpqeid_param_hook_type build_gpqeid_param_hook; + +/* Hook for plugins to get control in cdbgang_parse_gpqeid_params() */ +typedef bool (*parse_gpqeid_params_hook_type) (char *param); +extern PGDLLIMPORT parse_gpqeid_params_hook_type parse_gpqeid_params_hook; /* * cdbgang_createGang: * From 2fc2068476ddca6b1c8b3211ddba9855d5572cfb Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Mon, 29 Jan 2024 10:13:49 +0800 Subject: [PATCH 054/152] storage_am: fix hashdata table size * consider VISIBILITYMAP_FORKNUM size. * ignore ao test. --- src/backend/utils/adt/dbsize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index eba32a2e573..b90452d4b69 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -732,7 +732,7 @@ pg_table_size(PG_FUNCTION_ARGS) size = calculate_table_size(rel); - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && (RelationIsHeap(rel) || RelationIsAppendOptimized(rel))) { char *sql; @@ -762,7 +762,7 @@ pg_indexes_size(PG_FUNCTION_ARGS) size = calculate_indexes_size(rel); - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && (RelationIsHeap(rel) || RelationIsAppendOptimized(rel))) { char *sql; @@ -823,7 +823,7 @@ pg_total_relation_size(PG_FUNCTION_ARGS) size = calculate_total_relation_size(rel); - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && (RelationIsHeap(rel) || RelationIsAppendOptimized(rel))) { char *sql; From 5d35c3eb34ef1c50933f256f4fd20c7ddc5ce98c Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Thu, 22 Feb 2024 09:33:16 +0800 Subject: [PATCH 055/152] Add ci for remote storage --- src/backend/commands/tablespace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index de6f0e93471..a4fe9eedca0 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -1693,13 +1693,18 @@ GetDefaultTablespace(char relpersistence, bool partitioned) { Oid result; - /* The temp-table case is handled elsewhere */ + /* + * The temp-table case is handled elsewhere. + * FIXME:In serverless mode, we use the default table space + * just for pg_regress, maybe need to fix it. + */ +#ifndef SERVERLESS if (relpersistence == RELPERSISTENCE_TEMP) { PrepareTempTablespaces(); return GetNextTempTableSpace(); } - +#endif /* Fast path for default_tablespace == "" */ if (default_tablespace == NULL || default_tablespace[0] == '\0') return InvalidOid; From 3d911f9b5c91934134354bc508c67ea8d24814a7 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Fri, 18 Apr 2025 21:05:30 +0800 Subject: [PATCH 056/152] Fix error for cherry pick --- src/backend/utils/cache/lsyscache.c | 18 ------------------ src/bin/initdb/initdb.c | 22 ---------------------- src/include/utils/guc.h | 1 + 3 files changed, 1 insertion(+), 40 deletions(-) diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index ea12c46de54..ceb7472cd52 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1784,24 +1784,6 @@ has_update_delete_triggers(Oid relid) } } - if (including_children && !result && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - { - List *partitions = find_inheritance_children(relid, NoLock); - ListCell *lc; - - foreach(lc, partitions) - { - Oid partrelid = lfirst_oid(lc); - if (has_update_triggers(partrelid, true)) - { - result = true; - break; - } - } - - list_free(partitions); - } - RelationClose(relation); return result; diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 2de10f76499..0762586f06c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2194,28 +2194,6 @@ make_postgres(FILE *cmdfd) PG_CMD_PUTS(*line); } -/* - * copy template1 to postgres - */ -static void -make_hashdatadb(FILE *cmdfd) -{ - const char *const *line; - static const char *const postgres_setup[] = { - "CREATE DATABASE hashdatadb;\n\n", - "COMMENT ON DATABASE hashdatadb IS 'default administrative connection database';\n\n", - /* - * Clean out dead rows in pg_database - */ - "VACUUM FULL pg_database;\n\n", - NULL - }; - - for (line = postgres_setup; *line; line++) - PG_CMD_PUTS(*line); -} - - /* * signal handler in case we are interrupted. * diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 75ec1c99d33..9fff39eddd9 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -661,6 +661,7 @@ typedef enum extern IndexCheckType gp_indexcheck_insert; /* Storage option names */ +#define SOPT_APPENDONLY "appendonly" #define SOPT_FILLFACTOR "fillfactor" #define SOPT_BLOCKSIZE "blocksize" #define SOPT_COMPTYPE "compresstype" From 8c4a8b0d023986615a7e24794f5ddf89ac737ff8 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 1 Feb 2024 14:41:26 +0800 Subject: [PATCH 057/152] Fix: change the dispatch manifest hook point --- src/backend/cdb/dispatcher/cdbdisp_query.c | 14 ++++++++++++++ src/include/cdb/cdbdisp_query.h | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 8f1bd445848..210c88ccf83 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -60,6 +60,7 @@ extern bool gp_print_create_gang_time; ExtendProtocolDataStore epd_storage = {0}; ExtendProtocolData epd = &epd_storage; +CdbDispatchPlan_hook_type CdbDispatchPlan_hook = NULL; typedef struct ParamWalkerContext { @@ -191,6 +192,19 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, ParamExecData *execParams, bool planRequiresTxn, bool cancelOnError) +{ + if(CdbDispatchPlan_hook){ + return CdbDispatchPlan_hook(queryDesc, execParams, planRequiresTxn, cancelOnError); + } + + return CdbDispatchPlanInternal(queryDesc, execParams, planRequiresTxn, cancelOnError); +} + +void +CdbDispatchPlanInternal(struct QueryDesc *queryDesc, + ParamExecData *execParams, + bool planRequiresTxn, + bool cancelOnError) { PlannedStmt *stmt; bool is_SRI = false; diff --git a/src/include/cdb/cdbdisp_query.h b/src/include/cdb/cdbdisp_query.h index 9a2d03e4d48..52e41f81c89 100644 --- a/src/include/cdb/cdbdisp_query.h +++ b/src/include/cdb/cdbdisp_query.h @@ -52,6 +52,12 @@ extern PGDLLIMPORT CdbNeedDispatchCommand_hook_type CdbNeedDispatchCommand_hook; typedef bool (*CdbNeedDispatchUtility_hook_type) (struct Node *stmt, int *flags); extern PGDLLIMPORT CdbNeedDispatchUtility_hook_type CdbNeedDispatchUtility_hook; +typedef void (*CdbDispatchPlan_hook_type) (struct QueryDesc *queryDesc, + ParamExecData *execParams, + bool planRequiresTxn, + bool cancelOnError); +extern PGDLLIMPORT CdbDispatchPlan_hook_type CdbDispatchPlan_hook; + /* Compose and dispatch the MPPEXEC commands corresponding to a plan tree * within a complete parallel plan. * @@ -73,6 +79,10 @@ extern void CdbDispatchPlan(struct QueryDesc *queryDesc, ParamExecData *execParams, bool planRequiresTxn, bool cancelOnError); +extern void CdbDispatchPlanInternal(struct QueryDesc *queryDesc, + ParamExecData *execParams, + bool planRequiresTxn, + bool cancelOnError); /* * Special for sending SET commands that change GUC variables, so they go to all From 185c128b329f6cd84f80fbf0b53b413fa5b2de8d Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Fri, 2 Feb 2024 12:47:10 +0800 Subject: [PATCH 058/152] pg_ctl: check dtm process to judge is_coordinator. check dtm process exist or not --- src/bin/pg_ctl/pg_ctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index cc733cb7be1..a084a5e699d 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -712,8 +712,13 @@ wait_for_postmaster_start(pgpid_t pm_pid, bool do_checkpoint) * The READY status for coordinator is `dtmready`, while the READY * status is really ready for other nodes. */ +#ifndef SERVERLESS if (strcmp(pmstatus, is_coordinator ? PM_STATUS_DTM_RECOVERED : PM_STATUS_READY) == 0 || strcmp(pmstatus, PM_STATUS_STANDBY) == 0) +#else + if (strcmp(pmstatus, PM_STATUS_READY) == 0 || + strcmp(pmstatus, PM_STATUS_STANDBY) == 0) +#endif { /* postmaster is done starting up */ free_readfile(optlines); From b17890f359e3bce594a395919ad80991aba59d3d Mon Sep 17 00:00:00 2001 From: leo Date: Fri, 2 Feb 2024 09:22:57 +0800 Subject: [PATCH 059/152] Fix: zlib and gangsize test cases, clean up dispatcherState in time The dispatcherState will not be cleaned up in mppExecutorFinishup when Error is throwed, but clean up at transaction/subtransaction abort. In serverless architecture, we neeed to dispatch sub-abort statement to QE at subtransaction abort, this may happen before dispatcherState cleanup and will throw error again until ERRORDATA_STACK_SIZE exceeded. We should not set dispatcherState of estate to NULL in mppExecutorFinishup, throw the error and mppExecutorCleanup will do the necessary cleanup before dispatching the sub-abort statement. --- src/backend/executor/execUtils.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 2dfeff21200..0f28dc8ea8e 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -2050,7 +2050,19 @@ void mppExecutorFinishup(QueryDesc *queryDesc) if (qeError) { +#ifdef SERVERLESS + /* + * The dispatcherState will be cleaned up at transaction/subtransation abort, + * but in serverless architecture, we will dispatch the sub-abort statement before + * dispatcherState cleanup at subtransation abort, this will throw error again. + * + * We should not set dispatcherState to NULL here, the Error + * is re-throwed, and mppExecutorCleanup will do the necessary cleanup before + * we dispatch the sub-abort statement. + */ +#else estate->dispatcherState = NULL; +#endif FlushErrorState(); ThrowErrorData(qeError); } From 3ee830c2fb26b46dd60b2f891117e5da147e762f Mon Sep 17 00:00:00 2001 From: yjhjstz Date: Fri, 2 Feb 2024 16:29:17 +0800 Subject: [PATCH 060/152] gpconfig: add --warehouse option, default is test. --- gpMgmt/bin/gpconfig | 9 ++++++--- gpMgmt/bin/gppylib/db/dbconn.py | 9 ++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/gpMgmt/bin/gpconfig b/gpMgmt/bin/gpconfig index 7bd3023ea85..cd5a2d19cb4 100755 --- a/gpMgmt/bin/gpconfig +++ b/gpMgmt/bin/gpconfig @@ -65,6 +65,8 @@ def parseargs(): parser.add_option('-M', '--mirrorvalue', type='string') parser.add_option('-f', '--file', action='store_true') parser.add_option('--file-compare', dest='file_compare', action='store_true') + parser.add_option('-w', '--warehouse', dest='warehouse', type='string', default='test') + parser.setHelp([]) (options, _) = parser.parse_args() @@ -218,7 +220,7 @@ def print_verbosely(options, hostname, directory): def do_list(skipvalidation): try: - dburl = dbconn.DbURL() + dburl = dbconn.DbURL(warehouse=options.warehouse) conn = dbconn.connect(dburl, True) rows = dbconn.query(conn, GucQuery().query) @@ -235,7 +237,7 @@ def do_list(skipvalidation): def get_gucs_from_database(gucname): try: - dburl = dbconn.DbURL() + dburl = dbconn.DbURL(warehouse=options.warehouse) # we always want to unset search path except when getting the # 'search_path' GUC itself unsetSearchPath = gucname != 'search_path' @@ -296,7 +298,7 @@ def do_change(options): try: if not options.skipvalidation: - conn = dbconn.connect(dbconn.DbURL(), True) + conn = dbconn.connect(dbconn.DbURL(warehouse=options.warehouse), True) guc = get_normal_guc(conn, options) # Force the postgresql.conf parser to detect vartype string as GUC_STRING in the guc-file.c/guc-file.l @@ -535,6 +537,7 @@ def check_gpexpand(): sys.exit(1) def do_main(): + global options options = parseargs() _set_gparray() diff --git a/gpMgmt/bin/gppylib/db/dbconn.py b/gpMgmt/bin/gppylib/db/dbconn.py index b85f802d02b..b800cd42693 100644 --- a/gpMgmt/bin/gppylib/db/dbconn.py +++ b/gpMgmt/bin/gppylib/db/dbconn.py @@ -96,8 +96,9 @@ class DbURL: pgpass='pass' timeout=None retries=None + warehouse=None - def __init__(self,hostname=None,port=0,dbname=None,username=None,password=None,timeout=None,retries=None): + def __init__(self,hostname=None,port=0,dbname=None,username=None,password=None,timeout=None,retries=None,warehouse=None): if hostname is None: self.pghost = os.environ.get('PGHOST', 'localhost') @@ -143,6 +144,9 @@ def __init__(self,hostname=None,port=0,dbname=None,username=None,password=None,t else: self.retries = int(retries) + if warehouse is not None: + self.warehouse = warehouse + def __str__(self): @@ -231,6 +235,9 @@ def connect(dburl, utility=False, verbose=False, if unsetSearchPath: options.append("-c search_path=") + if dburl.warehouse: + options.append("-c hashdata.warehouse=%s" % dburl.warehouse) + if allowSystemTableMods: options.append("-c allow_system_table_mods=true") From e0ab71179517529e51c119c9d918fd81b17bf949 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Sun, 4 Feb 2024 10:54:27 +0800 Subject: [PATCH 061/152] Fix CTAS crash. See https://github.com/greenplum-db/gpdb/pull/17054 and pick test cases to regress_cloud dir. If we create a replicated table when selecting project volatile functions from a randomly distributed table, we will try to add a Motion on a Motion node that will cause a crash. Create table t1(id int) distributed randomly; Create table t2 as select random() from t1 distributed replicated; planner may have add a top Motion earlier if we know the query's final locus. Some codes handle volatile functions lead us to create_motion_path_for_insert(), and we should consider the case to avoid Assertion failure when create_plan(). Authored-by: Zhang Mingli [avamingli@gmail.com](mailto:avamingli@gmail.com) --- src/backend/cdb/cdbllize.c | 2 +- src/backend/cdb/cdbpath.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 6469eb34bdf..070ed51b99c 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -310,7 +310,7 @@ get_partitioned_policy_from_path(PlannerInfo *root, Path *path) * returned in that case. * * TODO: This only handles a few cases. For example, INSERT INTO SELECT ... - * is not handled, because the parser injects a subquery for ti which makes + * is not handled, because the parser injects a subquery for it which makes * it tricky. */ CdbPathLocus diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 0e51ffc8c77..196233f17cb 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2622,7 +2622,14 @@ create_motion_path_for_insert(PlannerInfo *root, GpPolicy *policy, } } - subpath = cdbpath_create_broadcast_motion_path(root, subpath, policy->numsegments); + /* + * planner may have add a top Motion eariler. + * Create table t1(id int) distributed randomly; + * Create table t2 as select random() from t1 distributed replicated; + * Avoid Motion if there already was. + */ + if (!CdbPathLocus_IsReplicated(subpath->locus)) + subpath = cdbpath_create_broadcast_motion_path(root, subpath, policy->numsegments); } else elog(ERROR, "unrecognized policy type %u", policyType); From 87786ad620ea4d38a37f75499d5b7fcb39ee0733 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Sun, 4 Feb 2024 09:45:54 +0800 Subject: [PATCH 062/152] Fix gitignore. --- src/backend/catalog/.gitignore | 1 + src/backend/libpq/.gitignore | 1 + src/include/catalog/.gitignore | 1 + 3 files changed, 3 insertions(+) diff --git a/src/backend/catalog/.gitignore b/src/backend/catalog/.gitignore index 6c4c6d228db..54b2c91307f 100644 --- a/src/backend/catalog/.gitignore +++ b/src/backend/catalog/.gitignore @@ -8,3 +8,4 @@ /pg_*_d.h /gp_*_d.h /bki-stamp +/main_manifest_d.h diff --git a/src/backend/libpq/.gitignore b/src/backend/libpq/.gitignore index f05da7dc220..a826cf9849d 100644 --- a/src/backend/libpq/.gitignore +++ b/src/backend/libpq/.gitignore @@ -13,3 +13,4 @@ fe-secure.c getpeereid.c pqexpbuffer.c fe-trace.c +extensible_protocol.c diff --git a/src/include/catalog/.gitignore b/src/include/catalog/.gitignore index 8d0327eca67..f1c62fa3e24 100644 --- a/src/include/catalog/.gitignore +++ b/src/include/catalog/.gitignore @@ -5,3 +5,4 @@ /gp_version.h /header-stamp /gp_version_at_initdb.dat +/main_manifest_d.h From fc61c38ab9bba3a781dd8e47639b20d95620e22a Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 21 Feb 2024 16:04:36 +0800 Subject: [PATCH 063/152] Fix: gpdiffcheck/query_finish_pending/segspace/dispatch cases For query_finish_pending case, the result of query 'select i1 from \_tmp_table order by i2 limit 3' is mutable with limit and order by clauses and fault injection because we use the randomly distribution as default for table, so modify the sql statement with count. For other cases, modify the 'Create Table' statements and add outputs. For single query transaction, if we are not in transaction block and no xid is assigned, do not send commit command to QE. --- src/backend/access/transam/xact.c | 9 +++++++++ src/backend/cdb/cdbtm.c | 7 ++++--- src/include/access/xact.h | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d46b6f4e01b..4208e43e656 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -7869,4 +7869,13 @@ void SetCurrentTransactionState(TransactionState transactionState) { CurrentTransactionState = transactionState; +} + +/* + * Get subtransaction ID counter + */ +SubTransactionId +GetSubTransactionIdCounter(void) +{ + return currentSubTransactionId; } \ No newline at end of file diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index fef658d6941..e5fb54a67a8 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1414,9 +1414,6 @@ dispatchDtxCommand(const char *cmd) elog(DTM_DEBUG5, "dispatchDtxCommand: '%s'", cmd); - if (enable_serverless) - return true; - if (currentGxactWriterGangLost()) { ereport(WARNING, @@ -1424,6 +1421,10 @@ dispatchDtxCommand(const char *cmd) return false; } +#ifdef SERVERLESS + return true; +#endif + CdbDispatchCommand(cmd, DF_NEED_TWO_PHASE, &cdb_pgresults); if (cdb_pgresults.numResults == 0) diff --git a/src/include/access/xact.h b/src/include/access/xact.h index c30704aabe0..93a56c58abd 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -565,6 +565,7 @@ extern TransactionState GetParentTransactionState(TransactionState transactionSt extern int GetTransactionNestLevel(TransactionState transactionState); extern FullTransactionId GetFullTransactionId(TransactionState transactionState); extern void SetCurrentTransactionState(TransactionState transactionState); +extern SubTransactionId GetSubTransactionIdCounter(void); extern int xactGetCommittedChildren(TransactionId **ptr); From bd6581c82eab7723b2e9d92fb4c022611dbc02b5 Mon Sep 17 00:00:00 2001 From: yjhjstz Date: Mon, 5 Feb 2024 14:25:27 +0800 Subject: [PATCH 064/152] fix hang when shutdown in checkpoint --- src/backend/postmaster/checkpointer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 4464e2ea0f7..07a7a435641 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -593,7 +593,9 @@ HandleCheckpointerInterrupts(void) * the statistics to the stats collector. */ BgWriterStats.m_requested_checkpoints++; + #ifndef SERVERLESS ShutdownXLOG(0, 0); + #endif pgstat_send_bgwriter(); pgstat_send_wal(true); From 64df9e85fef4862c9e2e593f24287e98d8f710ec Mon Sep 17 00:00:00 2001 From: yjhjstz Date: Tue, 6 Feb 2024 14:31:25 +0800 Subject: [PATCH 065/152] fix memory overflow in Dispatch Context. --- src/backend/access/transam/xact.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4208e43e656..c252269e45e 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -7714,6 +7714,8 @@ GetAllChildXids(int *nxids) xact = xact->parent; } + if (xids == NULL && xact) + xids = (TransactionId *)palloc(sizeof(TransactionId) * len); while (xact) { int index = 0; @@ -7723,9 +7725,7 @@ GetAllChildXids(int *nxids) if (xact->parent) nChildXids += 1; - if (xids == NULL) - xids = (TransactionId *)palloc(sizeof(TransactionId) * len); - else if ((*nxids) + nChildXids >= len) + if ((*nxids) + nChildXids >= len) { len = ((*nxids) + nChildXids) * 2; From ac14da08e1c15af1c2f3f2edb76e31c3640a3415 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Mon, 29 Jan 2024 10:29:51 +0800 Subject: [PATCH 066/152] Extend a new AM method to do acquire sample rows. As diffenrent storage formats maybe need different process when acquire sample rows. To support this conviniently, we extend a new AM method interface called acquire_sample_rows in TableAmRoutine. authored-by: Zhang Wenchao zwcpostgres@gmail.com --- src/backend/commands/analyze.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 07ec386f658..6662fcb3363 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1996,7 +1996,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, * Like in acquire_sample_rows(), if we're in the QD, fetch the sample * from segments. */ - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && ENABLE_DISPATCH()) { int flags = 0; VacuumStmt *stmt = makeNode(VacuumStmt); From e063308ab0cdf175e1cf7a8b17d9661a5650eac8 Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Wed, 28 Feb 2024 09:08:46 +0800 Subject: [PATCH 067/152] Analyze table change to happen on QD. When we collect data from segments to QD, this will change the physical order of the data. Such as in segment 1 the data is 1,3,5,7,9. And in segment 2 the data is 2,4,6,8,10. In each segment the data is ordered, and correlation is 1 in each segment. But after we collect the data to QD, it may be 1,3,5,2,4,7,9,6,8,10. And the correlation is 0.3 or something else and it is not stable. So get correlations from QD on cloud version. --- gpMgmt/bin/gpsd | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gpMgmt/bin/gpsd b/gpMgmt/bin/gpsd index bffbc02065b..11a658f2017 100755 --- a/gpMgmt/bin/gpsd +++ b/gpMgmt/bin/gpsd @@ -19,7 +19,7 @@ gpsd_version = '%prog 1.0' sysnslist = "('pg_toast', 'pg_bitmapindex', 'pg_temp_1', 'pg_catalog', 'information_schema')" # turn off optimizer to fall back to planner and speed up statistic queries # unset search path due to CVE-2018-1058 -pgoptions = '-c optimizer=off -c gp_role=utility -c search_path=' +pgoptions = '-c optimizer=off -c search_path= ' def ResultIter(cursor, arraysize=1000): 'An iterator that uses fetchmany to keep memory usage down' @@ -119,9 +119,13 @@ def parseCmdLine(): default=True, help='Just dump the stats, do not do a schema dump') p.add_option('-l', '--hll', action='store_true', dest='dumpHLL', default=False, help='Include HLL stats') + p.add_option('-w', '--warehouse', dest='warehouse', type='string', + default='test', help='Specify a warehouse') + return p def main(): + global pgoptions parser = parseCmdLine() options, args = parser.parse_args() if len(args) != 1: @@ -137,6 +141,9 @@ def main(): inclSchema = options.dumpSchema inclHLL = options.dumpHLL + if options.warehouse: + pgoptions += "-c hashdata.warehouse=%s" % options.warehouse + envOpts['PGOPTIONS'] = pgoptions version = getVersion(envOpts) From 9dc41183317c1c28289c3b78a088543bce0d3d4e Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 28 Feb 2024 09:07:20 +0800 Subject: [PATCH 068/152] Fix: only update distribution policy for 'SET DISTRIBUTED BY' statement Since data is stored in remote storage and shared among all segments, data redistribution is not needed. We only change the distribution policy without any redistribution for 'ALTER TABLE SET DISTRIBUTED BY' statement. If requested policy is not the same as current policy, we change the policy of relation; Otherwise, do nothing even if option 'reorganize' is set to true. Add test cases 'hashdata_ddl/allalter_hashdata' and 'hashdata_ddl/alter_table_setstorage_hashdata' in greenplum_schedule. Add hook 'ATExecSetDistributedBy_hook' for plugins to get control in ATExecSetDistributedBy. --- src/backend/commands/tablecmds.c | 6 ++++++ src/include/commands/tablecmds.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index fad0b2d0791..b1e3e1846c8 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -141,6 +141,9 @@ const char *synthetic_sql = "(internally generated SQL command)"; +/* Hook for plugins to get control in ATExecSetDistributedBy */ +ATExecSetDistributedBy_hook_type ATExecSetDistributedBy_hook = NULL; + /* * ON COMMIT action list */ @@ -18765,6 +18768,9 @@ ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd) Oid relationOid = InvalidOid; AutoStatsCmdType cmdType = AUTOSTATS_CMDTYPE_SENTINEL; + if (ATExecSetDistributedBy_hook) + return (*ATExecSetDistributedBy_hook)(rel, node, cmd); + /* Can't ALTER TABLE SET system catalogs */ if (IsSystemRelation(rel)) ereport(ERROR, diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index d40cd62be17..67d446a84f8 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -33,6 +33,10 @@ struct AlterTableUtilityContext; /* avoid including tcop/utility.h here */ extern const char *synthetic_sql; +/* Hook for plugins to get control in ATExecSetDistributedBy */ +typedef void (*ATExecSetDistributedBy_hook_type)(Relation rel, Node *node, AlterTableCmd *cmd); +extern PGDLLIMPORT ATExecSetDistributedBy_hook_type ATExecSetDistributedBy_hook; + extern void DefineExternalRelation(CreateExternalStmt *stmt); extern ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, From 0ab8386d8cb6af363314a05e10c4a25cbf6f2301 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Wed, 28 Feb 2024 14:26:44 +0800 Subject: [PATCH 069/152] Fix: use CBDB bin/include to build unionstore The Option BUILD_CBDB=no ignore the compile of CBDB, but use the CBDB bin/include to build unionstore. In the unionstore, We use LocalBufferAlloc, so we delete the Assert(false). --- src/backend/storage/buffer/localbuf.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 9ca40fa379c..98ca440af3f 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -117,13 +117,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool found; uint32 buf_state; - /* - * Local buffers are used for temp tables in PostgreSQL. As temp tables - * use shared buffers in Cloudberry, we shouldn't be useing local buffers - * for anything. - */ - Assert(false); - INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ From f081ddd497899ae634314c57b6170006dd021d4d Mon Sep 17 00:00:00 2001 From: Wang Weinan Date: Thu, 29 Feb 2024 16:21:37 +0800 Subject: [PATCH 070/152] Fix hashdata storage `vacuum full` lighting version do not have hashdata storage, so vacuum full only validate if the current storage is appendonly. we need to change validate the storage if it is nonblocked --- src/backend/commands/vacuum.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 1c6aef616c8..ffdc150630c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2652,7 +2652,11 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, return false; } - is_appendoptimized = RelationStorageIsAO(rel); +#ifdef SERVERLESS + is_appendoptimized = RelationIsNonblockRelation(rel); +#else + is_appendoptimized = RelationIsAppendOptimized(rel); +#endif is_toast = (rel->rd_rel->relkind == RELKIND_TOASTVALUE); if (ao_vacuum_phase && !(is_appendoptimized || is_toast)) From 08acdfdd91f2d0ee967daeb7dab01389eba89fb9 Mon Sep 17 00:00:00 2001 From: leo Date: Fri, 1 Mar 2024 16:52:01 +0800 Subject: [PATCH 071/152] Fix: set numsegments of distribution policy to zero for partition child table For statements: 'create table xxx partition by range(xx) (start(xx) end(xxx) every(xx))'/ 'create table xxx partition by list(xx) (partition xx, partition xx)' partition child tables are created together with partitioned table, but the numsegments of distribution policy may be set to num segs of current warehouse, this will cause query on partition child tables failures across the multi warehouses, so set numsegments to zero at partition child table creation. --- src/backend/parser/parse_partition_gp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/backend/parser/parse_partition_gp.c b/src/backend/parser/parse_partition_gp.c index 747f1940eac..0579df98cc7 100644 --- a/src/backend/parser/parse_partition_gp.c +++ b/src/backend/parser/parse_partition_gp.c @@ -930,6 +930,11 @@ makePartitionCreateStmt(Relation parentrel, char *partname, PartitionBoundSpec * childstmt->if_not_exists = false; childstmt->origin = origin; childstmt->distributedBy = make_distributedby_for_rel(parentrel); +#ifdef SERVERLESS + /* make sure the numsegments of distribution policy is zero for partition child tables */ + if (childstmt->distributedBy) + childstmt->distributedBy->numsegments = 0; +#endif childstmt->partitionBy = NULL; childstmt->relKind = 0; childstmt->ownerid = parentrel->rd_rel->relowner; From c6393689427f71113e4897dbabeb2cd164992987 Mon Sep 17 00:00:00 2001 From: wangweinan Date: Wed, 6 Mar 2024 19:58:11 +0800 Subject: [PATCH 072/152] Retire `enable_serverless` (Part 1) As the previous discussion, retire the global variable `enable_serverless`. Using MARCO `SERVERLESS` directly. After this MR, still using the variable somewhere. This is a partial MR. --- src/backend/access/heap/heapam_visibility.c | 6 ++++-- src/backend/access/heap/vacuumlazy.c | 5 +++-- src/backend/access/table/table.c | 6 +++++- src/backend/access/transam/clog.c | 5 ++++- src/backend/access/transam/xlog.c | 4 +++- src/backend/cdb/cdbdtxcontextinfo.c | 6 ++++-- src/backend/cdb/cdbfts.c | 12 +++++++----- src/backend/cdb/cdbtm.c | 13 ++++++++++--- src/backend/cdb/cdbutil.c | 8 ++++++-- src/backend/cdb/dispatcher/cdbgang.c | 19 ++++++++++++++++--- 10 files changed, 62 insertions(+), 22 deletions(-) diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index ca43c4bcfbd..542068a340a 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -184,11 +184,13 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer, Relation rel, * On QE, we can see any changes on catalog relations(dirty read) in InitProcessing Mode * because of the latest snapshot, do not set hint bits. */ - if (enable_serverless && IsInitProcessingMode() && - Gp_role == GP_ROLE_EXECUTE && GpIdentity.segindex != MASTER_CONTENT_ID) +#ifdef SERVERLESS + if (IsInitProcessingMode() && Gp_role == GP_ROLE_EXECUTE && + GpIdentity.segindex != MASTER_CONTENT_ID) { return; } +#endif /* SERVERLESS */ if (TransactionIdIsValid(xid)) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 4902746b320..e2a9460c2a5 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1516,13 +1516,14 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) { int log_level = WARNING; +#ifdef SERVERLESS /* * In serverless architecture, FSM is not WAL-logged together with corresponding page, but WAL-logged * when FSM page is evicted. It's possible that the visibility map bit is set but the page-level bit is * clear, so set the LOG_LEVEL to LOG to omit this case. */ - if (enable_serverless) - log_level = LOG; + log_level = LOG; +#endif /* SERVERLESS */ elog(log_level, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", vacrel->relname, blkno); diff --git a/src/backend/access/table/table.c b/src/backend/access/table/table.c index fbc27d81ff5..5e7ca432140 100644 --- a/src/backend/access/table/table.c +++ b/src/backend/access/table/table.c @@ -231,7 +231,11 @@ CdbTryOpenTable(Oid relid, LOCKMODE reqmode, bool *lockUpgraded) { lockmode = RowExclusiveLock; rel = try_table_open(relid, lockmode, false); - + /* + * FIXME: table which is not a heap table and AO table + * does not support concurrently update or delete. So + * we upgrade lockmode as the same as AO|AOCO. + */ #ifdef SERVERLESS if (RelationIsNonblockRelation(rel)) #else /* SERVERLESS */ diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 33896e7dfa7..82fb6be91a2 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -171,8 +171,11 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, /* * Only master can set transaction status */ - if (enable_serverless && (Gp_role != GP_ROLE_DISPATCH && GpIdentity.segindex != MASTER_CONTENT_ID)) +#ifdef SERVERLESS + if (IsNormalProcessingMode() && + IS_QUERY_EXECUTOR_BACKEND()) return; +#endif /* SERVERLESS */ Assert(status == TRANSACTION_STATUS_COMMITTED || status == TRANSACTION_STATUS_ABORTED); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b5a12de91c9..3315d1d0377 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4025,8 +4025,10 @@ PreallocXlogFiles(XLogRecPtr endptr) /* * In serverless architecture, do not need xlog files any more. */ - if (enable_serverless) +#ifdef SERVERLESS + if (IsNormalProcessingMode()) return; +#endif /* SERVERLESS */ XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size); offset = XLogSegmentOffset(endptr - 1, wal_segment_size); diff --git a/src/backend/cdb/cdbdtxcontextinfo.c b/src/backend/cdb/cdbdtxcontextinfo.c index 9227d844b74..356647307a4 100644 --- a/src/backend/cdb/cdbdtxcontextinfo.c +++ b/src/backend/cdb/cdbdtxcontextinfo.c @@ -46,9 +46,11 @@ DtxContextInfo_CreateOnMaster(DtxContextInfo *dtxContextInfo, bool inCursor, DtxContextInfo_Reset(dtxContextInfo); +#ifdef SERVERLESS + dtxContextInfo->distributedXid = MyProc->lxid; +#else /* SERVERLESS */ dtxContextInfo->distributedXid = getDistributedTransactionId(); - if (enable_serverless) - dtxContextInfo->distributedXid = MyProc->lxid; +#endif /* SERVERLESS */ if (dtxContextInfo->distributedXid != InvalidDistributedTransactionId) dtxContextInfo->curcid = curcid; diff --git a/src/backend/cdb/cdbfts.c b/src/backend/cdb/cdbfts.c index 8155663e984..a41c359de44 100644 --- a/src/backend/cdb/cdbfts.c +++ b/src/backend/cdb/cdbfts.c @@ -79,6 +79,10 @@ FtsShmemInit(void) void FtsNotifyProber(void) { +#ifdef SERVERLESS + return; +#endif /* SERVERLESS */ + Assert(Gp_role == GP_ROLE_DISPATCH); int32 initial_started; int32 started; @@ -87,9 +91,6 @@ FtsNotifyProber(void) if (am_ftsprobe) return; - if (enable_serverless) - return; - SpinLockAcquire(&ftsProbeInfo->lock); initial_started = ftsProbeInfo->start_count; SpinLockRelease(&ftsProbeInfo->lock); @@ -180,8 +181,9 @@ getFtsVersion(void) void FtsNotifyProber(void) { - if (enable_serverless) - return; +#ifdef SERVERLESS + return; +#endif /* SERVERLESS */ Assert(Gp_role == GP_ROLE_DISPATCH); SendPostmasterSignal(PMSIGNAL_WAKEN_FTS); diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index e5fb54a67a8..0391bbbb4ac 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -1138,7 +1138,11 @@ tmShmemInit(void) /* Initialize locks and shared memory area */ { *shmNextSnapshotId = 0; - *shmDtmStarted = enable_serverless; +#ifdef SERVERLESS + *shmDtmStarted = true; +#else + *shmDtmStarted = false; +#endif *shmCleanupBackends = false; *shmDtxRecoveryPid = 0; *shmDtxRecoveryEvents = DTX_RECOVERY_EVENT_ABORT_PREPARED; @@ -1647,10 +1651,13 @@ isDtxQueryDispatcher(void) isDtmStarted = (shmDtmStarted != NULL && *shmDtmStarted); isSharedLocalSnapshotSlotPresent = (SharedLocalSnapshotSlot != NULL); +#ifdef SERVERLESS + return false; +#else /* SERVERLESS */ return (Gp_role == GP_ROLE_DISPATCH && isDtmStarted && - isSharedLocalSnapshotSlotPresent && - !enable_serverless); + isSharedLocalSnapshotSlotPresent); +#endif /* SERVERLESS */ } /* diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 178d8726770..393cc98caf4 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -492,12 +492,14 @@ getCdbComponentInfo(void) /* * In singlenode deployment, total_segment_dbs is zero and it should still work. */ - if (component_databases->total_segment_dbs == 0 && !IS_SINGLENODE() && !enable_serverless) +#ifndef SERVERLESS + if (component_databases->total_segment_dbs == 0 && !IS_SINGLENODE()) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("number of segment databases cannot be 0"))); } +#endif /* SERVERLESS */ if (component_databases->total_entry_dbs == 0) { ereport(ERROR, @@ -2916,12 +2918,14 @@ getCdbComponentInfo(void) * Validate that there exists at least one entry and one segment database * in the configuration */ - if (component_databases->total_segment_dbs == 0 && !enable_serverless) +#ifndef SERVERLESS + if (component_databases->total_segment_dbs == 0) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("number of segment databases cannot be 0"))); } +#endif /*SERVERLESS */ if (component_databases->total_entry_dbs == 0) { ereport(ERROR, diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 77ef6f5d5ee..f5572ffada7 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -586,12 +586,25 @@ cdbgang_parse_gpqeid_params(struct Port *port pg_attribute_unused(), qe_idx = (int) strtol(cp, NULL, 10); } + if (gpqeid_next_param(&cp, &np)) + { +#ifdef SERVERLESS + GpIdentity.segindex = (int32)strtol(cp, NULL, 10); +#endif /* SERVERLESS */ + } + + if (gpqeid_next_param(&cp, &np)) + { +#ifdef SERVERLESS + GpIdentity.dbid = (int32)strtol(cp, NULL, 10); +#endif /* SERVERLESS */ + } + if (parse_gpqeid_params_hook) { - while(gpqeid_next_param(&cp, &np)) + if (gpqeid_next_param(&cp, &np)) { - if (!(*parse_gpqeid_params_hook)(cp)) - break; + (*parse_gpqeid_params_hook)(cp); } } From c72e196c21cc40dde27982941ffc52ddc4e26f47 Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Fri, 8 Mar 2024 16:04:18 +0800 Subject: [PATCH 073/152] fix minirepro in commitid 02cd51b103354adf4119199cf9be2cc7a3f28134, we redefine the ALWAYS_SECURE_SEARCH_PATH_SQL. But we use macro ALWAYS_SECURE_SEARCH_PATH_SQL in many files, define another macro. --- src/fe_utils/connect_utils.c | 4 ++++ src/include/common/connect.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/fe_utils/connect_utils.c b/src/fe_utils/connect_utils.c index 96bb798316a..8f1e553ab2d 100644 --- a/src/fe_utils/connect_utils.c +++ b/src/fe_utils/connect_utils.c @@ -122,7 +122,11 @@ connectDatabase(const ConnParams *cparams, const char *progname, } /* Start strict; callers may override this. */ +#ifdef SERVERLESS + executeCommand(conn, ALWAYS_SET_SEARCH_PATH_SQL, echo); +#else /* SERVERLESS */ PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo)); +#endif /* SERVERLESS */ return conn; } diff --git a/src/include/common/connect.h b/src/include/common/connect.h index 71c2d6e72c4..1b60391094d 100644 --- a/src/include/common/connect.h +++ b/src/include/common/connect.h @@ -22,6 +22,10 @@ * introduced schemas. When connected to an older version from code that * might work with the old server, skip this. */ + +#define ALWAYS_SET_SEARCH_PATH_SQL \ + "set search_path='';" + #define ALWAYS_SECURE_SEARCH_PATH_SQL \ "SELECT pg_catalog.set_config('search_path', '', false);" From 89f42f2f2648b3cfd0cbedfeb37ea03e7545e27c Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Tue, 5 Mar 2024 19:42:57 +0800 Subject: [PATCH 074/152] Fix use_physical_tlist to consider hashdata column store. Export HASHDATA_TBALE_AM_OID --- src/backend/optimizer/plan/createplan.c | 2 ++ src/include/utils/rel.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 7ce61a9ab9e..628f7329ace 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -976,6 +976,8 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) if (rel->amflags & AMFLAG_HAS_COLUMN_ORIENTED_SCAN) return false; + if (AMHandlerIsHashdataCols(rel->amhandler)) + return false; /* * Also, don't do it to a CustomPath; the premise that we're extracting * columns from a simple physical tuple is unlikely to hold for those. diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index f44efcdfa98..fe950163655 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -546,6 +546,7 @@ typedef struct ViewOptions */ #define PAX_AM_OID 7047 #define HASHDATA_AM_OID 7015 +#define HASHDATA_TBALE_AM_OID 7604 #define RelationIsPax(relation) \ ((relation)->rd_rel->relam == PAX_AM_OID) @@ -569,6 +570,8 @@ typedef struct ViewOptions (relation)->rd_rel->relam == PAX_AM_OID || \ (relation)->rd_rel->relam == HASHDATA_AM_OID) +#define AMHandlerIsHashdataCols(amhandler) \ + ((amhandler) == HASHDATA_TBALE_AM_OID) /* * RelationIsBitmapIndex * True iff relation is a bitmap index From 1225711fb5c202d476dd6d79dc1f9f7690b7456f Mon Sep 17 00:00:00 2001 From: wangweinan Date: Tue, 5 Mar 2024 09:35:27 +0800 Subject: [PATCH 075/152] Support `createdb`, `dropdb`, etc. tools In serverless version, we need assign a warehouse first, then run query. some pg tools using `set_config` to set GUC by SQL query, so these tools is reject by warehouse check hook. directly using `set command` for these tools. --- src/fe_utils/connect_utils.c | 2 +- src/include/common/connect.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fe_utils/connect_utils.c b/src/fe_utils/connect_utils.c index 8f1e553ab2d..af4341dd4e4 100644 --- a/src/fe_utils/connect_utils.c +++ b/src/fe_utils/connect_utils.c @@ -123,7 +123,7 @@ connectDatabase(const ConnParams *cparams, const char *progname, /* Start strict; callers may override this. */ #ifdef SERVERLESS - executeCommand(conn, ALWAYS_SET_SEARCH_PATH_SQL, echo); + executeCommand(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo); #else /* SERVERLESS */ PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo)); #endif /* SERVERLESS */ diff --git a/src/include/common/connect.h b/src/include/common/connect.h index 1b60391094d..dc8cdbb539f 100644 --- a/src/include/common/connect.h +++ b/src/include/common/connect.h @@ -22,11 +22,12 @@ * introduced schemas. When connected to an older version from code that * might work with the old server, skip this. */ - -#define ALWAYS_SET_SEARCH_PATH_SQL \ +#ifdef SERVERLESS +#define ALWAYS_SECURE_SEARCH_PATH_SQL \ "set search_path='';" - +#else /* SERVERLESS */ #define ALWAYS_SECURE_SEARCH_PATH_SQL \ "SELECT pg_catalog.set_config('search_path', '', false);" +#endif /* SERVERLESS */ #endif /* CONNECT_H */ From f59f2966138701b2a7a2cef0038bed503a4b8576 Mon Sep 17 00:00:00 2001 From: kongfanshen Date: Thu, 7 Mar 2024 10:25:37 +0800 Subject: [PATCH 076/152] Fix plan diffs, including: select_into/subselect/incremential_sort/union/portals/update/privileges/matview We should check all the pgresults, not the last one when get the explain statistics from qe. We add the new libpq protocol 'm' to get manifest tuples, when execute dml, the new pgresult will be appended into resultbuf, the last one is not the statistics info. --- src/backend/commands/explain_gp.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/backend/commands/explain_gp.c b/src/backend/commands/explain_gp.c index 27580fbd5fa..127a28c474e 100644 --- a/src/backend/commands/explain_gp.c +++ b/src/backend/commands/explain_gp.c @@ -552,7 +552,9 @@ cdbexplain_recvExecStats(struct PlanState *planstate, CdbDispatchResult *dispatchResult = &dispatchResultBeg[iDispatch]; PGresult *pgresult; CdbExplain_StatHdr *hdr; - pgCdbStatCell *statcell; + pgCdbStatCell *statcell = NULL; + int nres; + int ires; /* Update worker counts. */ if (!dispatchResult->hasDispatched) @@ -568,19 +570,30 @@ cdbexplain_recvExecStats(struct PlanState *planstate, * side-effect of another qExec's failure, * e.g. an interconnect error */ - /* Find this qExec's last PGresult. If none, skip to next qExec. */ - pgresult = cdbdisp_getPGresult(dispatchResult, -1); - if (!pgresult) + nres = cdbdisp_numPGresult(dispatchResult); + for (ires = nres -1; ires >= 0; ires--) + { + pgresult = cdbdisp_getPGresult(dispatchResult, ires); + if (!pgresult) + continue; + if (pgresult->cdbstats) + { + /* Find the cdbstats */ + statcell = pgresult->cdbstats; + break; + } + } + + /* can't find our statistics */ + if (!statcell) continue; /* Find our statistics in list of response messages. If none, skip. */ - for (statcell = pgresult->cdbstats; statcell; statcell = statcell->next) + for (; statcell; statcell = statcell->next) { if (IsA((Node *) statcell->data, CdbExplain_StatHdr)) break; } - if (!statcell) - continue; /* Validate the message header. */ hdr = (CdbExplain_StatHdr *) statcell->data; From b6874585d7ae4b54225b78cb4c69461bed915052 Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Tue, 5 Mar 2024 14:17:48 +0800 Subject: [PATCH 077/152] Optimize simple delete not project whole row * make strict to add_row_identity_var(root, var, rtindex, "wholerow"); * consider returning clause and trigger conditions. --- src/backend/optimizer/util/appendinfo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 856d758f891..93e30a484ad 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -953,8 +953,12 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex, * redesigning AO/AOCS storage format or making the update plan is * consistent whether it generated by pg optimizer or ORCA optimizer. */ - if ((commandType == CMD_UPDATE || commandType == CMD_DELETE) && - RelationIsNonblockRelation(target_relation)) + if (RelationIsNonblockRelation(target_relation) && + ((commandType == CMD_UPDATE) || + (commandType == CMD_DELETE && + (has_update_delete_triggers(RelationGetRelid(target_relation)) || + root->parse->returningList)))) + { var = makeVar(rtindex, InvalidAttrNumber, From 3d2bf9c8273063654a1ba6d5ef16f51540d2f3b6 Mon Sep 17 00:00:00 2001 From: hanwei Date: Tue, 5 Mar 2024 21:32:27 +0800 Subject: [PATCH 078/152] Fix tablespace bug Fix tablespace for temp data storage or shared data storage. Because create tablespace is not excuted by QE but some shared data or temp data needs temp tablespace and find relative path but not found. So for fix it, shared data only storage in default tablepsace. --- src/backend/storage/file/sharedfileset.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c index ed37c940adc..448decbbde7 100644 --- a/src/backend/storage/file/sharedfileset.c +++ b/src/backend/storage/file/sharedfileset.c @@ -336,6 +336,14 @@ SharedFileSetPath(char *path, SharedFileSet *fileset, Oid tablespace) static Oid ChooseTablespace(const SharedFileSet *fileset, const char *name) { +#ifdef SERVERLESS +/* + * fix the problem of temp tablesapces when store temp data or share data, + * as create tablespace statement doesn't dispatched from QD, + * maybe have better soltuion? + */ + return DEFAULTTABLESPACE_OID; +#endif uint32 hash = hash_any((const unsigned char *) name, strlen(name)); return fileset->tablespaces[hash % fileset->ntablespaces]; From a1423ac5ebebfc70ebc688d3b2076cebf856628f Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 13 Mar 2024 18:22:03 +0800 Subject: [PATCH 079/152] Feature: support user defined heap table --- src/backend/cdb/cdbcat.c | 4 ++++ src/backend/commands/copyto.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbcat.c b/src/backend/cdb/cdbcat.c index ea3e845f1de..7665b0936ca 100644 --- a/src/backend/cdb/cdbcat.c +++ b/src/backend/cdb/cdbcat.c @@ -512,7 +512,11 @@ GpPolicyStore(Oid tbloid, const GpPolicy *policy) /* Sanity check the policy and its opclasses before storing it. */ if (policy->ptype == POLICYTYPE_ENTRY) +#ifdef SERVERLESS + return; +#else elog(ERROR, "cannot store entry-type policy in gp_distribution_policy"); +#endif for (i = 0; i < policy->nattrs; i++) { if (policy->opclasses[i] == InvalidOid) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 871a973235e..95b5b18983f 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -481,7 +481,8 @@ DoCopyTo(CopyToState cstate) * doing COPY (SELECT) we just go straight to work, without * dispatching COPY commands to executors. */ - if (Gp_role == GP_ROLE_DISPATCH && cstate->rel && cstate->rel->rd_cdbpolicy) + if (Gp_role == GP_ROLE_DISPATCH && cstate->rel && cstate->rel->rd_cdbpolicy && + !GpPolicyIsEntry(cstate->rel->rd_cdbpolicy)) processed = CopyToDispatch(cstate); else processed = CopyTo(cstate); From f82cf47205b0340e5cb1a8248f1b5445ef95348b Mon Sep 17 00:00:00 2001 From: wangweinan Date: Wed, 13 Mar 2024 15:47:23 +0800 Subject: [PATCH 080/152] Fix fts probe issue after pg_ctl restart fts probe shouldn't pass PGOPTIONS from QD. --- .gitignore | 4 +++- src/backend/fts/ftsprobe.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 44a59792420..eb044501cac 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,6 @@ lib*.pc /compile_commands.json /tmp_install/ /.cache/ -/install/ \ No newline at end of file +/install/ +dependencies.sh +compile_commands.json diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index ece8edb6581..0f9b21e2fb8 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -169,6 +169,14 @@ ftsConnectStart(fts_segment_info *ftsInfo) GPCONN_TYPE_FTS); ftsInfo->conn = PQconnectStart(conninfo); + /* + * Pass and set the Coordinator env declared GUCs in FTS process which can + * raising undefined behaviour, since the two callback functions for an GUC + * can touch any resource but FTS process does not prepare ready. + */ + if (ftsInfo->conn->pgoptions) + ftsInfo->conn->pgoptions = NULL; + if (ftsInfo->conn == NULL) { elog(ERROR, "FTS: cannot create libpq connection object, possibly out" From 54c69a55988365798db267850e7de2733680b5b8 Mon Sep 17 00:00:00 2001 From: roseduan Date: Tue, 12 Mar 2024 19:04:48 +0800 Subject: [PATCH 081/152] Fix gpsd --- gpMgmt/bin/gpsd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpMgmt/bin/gpsd b/gpMgmt/bin/gpsd index 11a658f2017..b411a27401d 100755 --- a/gpMgmt/bin/gpsd +++ b/gpMgmt/bin/gpsd @@ -142,7 +142,8 @@ def main(): inclHLL = options.dumpHLL if options.warehouse: - pgoptions += "-c hashdata.warehouse=%s" % options.warehouse + warehouse_opt = "-c hashdata.warehouse=%s" % options.warehouse + pgoptions = warehouse_opt + " " + pgoptions envOpts['PGOPTIONS'] = pgoptions From 32ef8c18d582bf289ef95ba0ed4e5df9cc6eb8ec Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 20 Mar 2024 17:45:12 +0800 Subject: [PATCH 082/152] Support partition on heap table Add hook makePartitionCreateStmt_hook for plugins to get control in makePartitionCreateStmt. We use this hook to generate CreateStmt with entry distribution policy for heap partitioned table. Heap table could not inherit from non-heap table, and COPY ON SEGMENT is not supported for heap table. --- src/backend/commands/copyfrom.c | 5 +++++ src/backend/parser/parse_partition_gp.c | 12 +++++++----- src/include/parser/parse_utilcmd.h | 7 +++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 510c4a5cdb6..da87cf869e6 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -4063,6 +4063,11 @@ SendCopyFromForwardedError(CopyFromState cstate, CdbCopy *cdbCopy, char *errorms int target_seg; int errormsg_len = strlen(errormsg); +#ifdef SERVERLESS + if (cstate->rel && GpPolicyIsEntry(cstate->rel->rd_cdbpolicy)) + return; +#endif + msgbuf = cstate->dispatch_msgbuf; resetStringInfo(msgbuf); enlargeStringInfo(msgbuf, SizeOfCopyFromDispatchError); diff --git a/src/backend/parser/parse_partition_gp.c b/src/backend/parser/parse_partition_gp.c index 0579df98cc7..d11e630bda2 100644 --- a/src/backend/parser/parse_partition_gp.c +++ b/src/backend/parser/parse_partition_gp.c @@ -60,6 +60,9 @@ typedef struct int every_location; } PartEveryIterator; +/* Hook for plugins to get control in makePartitionCreateStmt() */ +makePartitionCreateStmt_hook_type makePartitionCreateStmt_hook = NULL; + static List *generateRangePartitions(ParseState *pstate, Relation parentrel, GpPartDefElem *elem, @@ -899,6 +902,10 @@ makePartitionCreateStmt(Relation parentrel, char *partname, PartitionBoundSpec * char *schemaname; const char *final_part_name; + if (makePartitionCreateStmt_hook) + return (*makePartitionCreateStmt_hook) (parentrel, partname, boundspec, + subPart, elem, partnamecomp); + if (partnamecomp->tablename) final_part_name = partnamecomp->tablename; else @@ -930,11 +937,6 @@ makePartitionCreateStmt(Relation parentrel, char *partname, PartitionBoundSpec * childstmt->if_not_exists = false; childstmt->origin = origin; childstmt->distributedBy = make_distributedby_for_rel(parentrel); -#ifdef SERVERLESS - /* make sure the numsegments of distribution policy is zero for partition child tables */ - if (childstmt->distributedBy) - childstmt->distributedBy->numsegments = 0; -#endif childstmt->partitionBy = NULL; childstmt->relKind = 0; childstmt->ownerid = parentrel->rd_rel->relowner; diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h index 9fed9ba6d87..2da212a1eb5 100644 --- a/src/include/parser/parse_utilcmd.h +++ b/src/include/parser/parse_utilcmd.h @@ -68,6 +68,13 @@ typedef struct partname_comp int partnum; } partname_comp; +/* Hook for plugins to get control in makePartitionCreateStmt() */ +typedef CreateStmt *(*makePartitionCreateStmt_hook_type) (Relation parentrel, char *partname, + PartitionBoundSpec *boundspec, + PartitionSpec *subPart, GpPartDefElem *elem, + partname_comp *partnamecomp); +extern PGDLLIMPORT makePartitionCreateStmt_hook_type makePartitionCreateStmt_hook; + extern CreateStmt *makePartitionCreateStmt(Relation parentrel, char *partname, PartitionBoundSpec *boundspec, PartitionSpec *subPart, From db2596bf886bd1e1785584c1bec8b6b077891ec8 Mon Sep 17 00:00:00 2001 From: wangweinan Date: Tue, 19 Mar 2024 17:45:50 +0800 Subject: [PATCH 083/152] Add an auto switch warehouse for chaos test 1. Declare two UDF in extension `hashdata_chaos_warehouse_begin(float8)` Start the auto switch, the parameter receives a float \[0, 1) as switch ratio to identify auto switch possibility. In the udf, fetch all the warehouses' names is prefixed with `chaos_`, as the switch candidate. So far, we have not pinned these warehouses, query drops these warehouses the auto switch can not detected. `hashdata_chaos_warehouse_end` Stop the auto switch. 1. create a warehouse auto-switch context in shared memory In the shared memory, we keep a spinlock to protect the context. In this context, it records warehouse candidate numbers, OIDs, and switch ratios. 2. auto switch warehouse before a query parser in QD 3. create a serial_chaos_schedule in regress_cloud test suit 4. declare an udt `reghouse` to identify warehouse oid and name. not used in this MR, it will boost warehouse oid-name search by system cache --- contrib/hashdata_chaos/Makefile | 29 ++ .../hashdata_chaos/hashdata_chaos--1.0.sql | 46 ++++ contrib/hashdata_chaos/hashdata_chaos.c | 248 ++++++++++++++++++ contrib/hashdata_chaos/hashdata_chaos.control | 5 + contrib/hashdata_chaos/reghouse.c | 99 +++++++ src/backend/tcop/postgres.c | 10 + src/backend/utils/errcodes.txt | 1 + src/include/postgres.h | 2 + .../utils/process_shared_preload_libraries.h | 15 ++ 9 files changed, 455 insertions(+) create mode 100644 contrib/hashdata_chaos/Makefile create mode 100644 contrib/hashdata_chaos/hashdata_chaos--1.0.sql create mode 100644 contrib/hashdata_chaos/hashdata_chaos.c create mode 100644 contrib/hashdata_chaos/hashdata_chaos.control create mode 100644 contrib/hashdata_chaos/reghouse.c diff --git a/contrib/hashdata_chaos/Makefile b/contrib/hashdata_chaos/Makefile new file mode 100644 index 00000000000..9ae025b44ad --- /dev/null +++ b/contrib/hashdata_chaos/Makefile @@ -0,0 +1,29 @@ +# contrib/hashdata_chaos/Makefile +subdir = contrib/hashdata_chaos +top_builddir = ../.. + +MODULE_big = hashdata_chaos +PGFILEDESC = "hashdata chaos framework" + +OBJS = hashdata_chaos.o \ + reghouse.o + +EXTENSION = hashdata_chaos +DATA = hashdata_chaos--1.0.sql + +# REGRESS = + +# EXTRA_CLEAN = + +# REGRESS_OPTS = + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/hashodata_chaos +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/hashdata_chaos/hashdata_chaos--1.0.sql b/contrib/hashdata_chaos/hashdata_chaos--1.0.sql new file mode 100644 index 00000000000..43ed264e597 --- /dev/null +++ b/contrib/hashdata_chaos/hashdata_chaos--1.0.sql @@ -0,0 +1,46 @@ +/* contrib/hashdata_chaos/hashdata_chaos--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION hashdata_chaos" to load this file. \quit + +CREATE FUNCTION hashdata_chaos_warehouse_begin(float8) +RETURNS BOOLEAN +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION hashdata_chaos_warehouse_end() +RETURNS BOOLEAN +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE EXECUTE ON COORDINATOR; + +CREATE TYPE reghouse; + +CREATE FUNCTION reghousein(cstring) +RETURNS reghouse +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION reghouseout(reghouse) +RETURNS cstring +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION reghouserecv(internal) +RETURNS reghouse +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION reghousesend(reghouse) +RETURNS bytea +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE TYPE reghouse( + INPUT = reghousein, + OUTPUT = reghouseout, + RECEIVE = reghouserecv, + SEND = reghousesend, + ALIGNMENT = int4, + INTERNALLENGTH = 4, + PASSEDBYVALUE +) diff --git a/contrib/hashdata_chaos/hashdata_chaos.c b/contrib/hashdata_chaos/hashdata_chaos.c new file mode 100644 index 00000000000..4e7122a785d --- /dev/null +++ b/contrib/hashdata_chaos/hashdata_chaos.c @@ -0,0 +1,248 @@ +#include "postgres.h" + +#include "funcapi.h" +#include "pg_config.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/gp_warehouse.h" +#include "cdb/cdbvars.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" + +PG_MODULE_MAGIC; + +#define MAX_WAREHOUSE_SIZE (8) +#define CHAOS_LOCK_NAME "hashdata_chaos_lock" +#define CHAOS_OBJ_NAME "HashdataChaosShmem" + +/* TODO: we need a local cache and pin some warehouse if chaos test start */ +typedef struct HashdataChaosShmem_s { + float8 threshold; + int8 numWarehouses; + Oid oidWarehouses[MAX_WAREHOUSE_SIZE]; +} HashdataChaosShmem_s; + +/* function declarations */ +void _PG_init(void); +void _PG_fini(void); + +extern Datum hashdata_chaos_warehouse_begin(PG_FUNCTION_ARGS); +extern Datum hashdata_chaos_warehouse_end(PG_FUNCTION_ARGS); + +static void hashdata_chaos_shmem_startup(void); +static shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static void hashdata_chaos_simple_query_hook(void (*exec_simple_query)(const char *), void *whereToSendOutput); + +/* Links to shared memory state */ +static HashdataChaosShmem_s *chaosShmem = NULL; +static LWLock *chaosLock = NULL; + +/* + * Module load callback + */ +void +_PG_init(void) +{ +#ifdef FAULT_INJECTOR + if (!process_shared_preload_libraries_in_progress) + return; + + RequestNamedLWLockTranche(CHAOS_LOCK_NAME, 1); + RequestAddinShmemSpace(sizeof(HashdataChaosShmem_s)); + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = hashdata_chaos_shmem_startup; + + execSimpleQuery_Hook = hashdata_chaos_simple_query_hook; +#endif /* FAULT_INJECTOR*/ +} + +/* + * shmem_startup hook: allocate or attach to shared memory, + * then load any pre-existing statistics from file. + * Also create and load the query-texts file, which is expected to exist + * (even if empty) while the module is enabled. + */ +static void +hashdata_chaos_shmem_startup(void) +{ + bool found; + + if (prev_shmem_startup_hook) + { + prev_shmem_startup_hook(); + } + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + + chaosLock = &(GetNamedLWLockTranche(CHAOS_LOCK_NAME))->lock; + + chaosShmem = ShmemInitStruct(CHAOS_OBJ_NAME, + sizeof(HashdataChaosShmem_s), + &found); + + /* First time through */ + if (!found) + MemSet(chaosShmem, 0, sizeof(HashdataChaosShmem_s)); + + LWLockRelease(AddinShmemInitLock); +} + +PG_FUNCTION_INFO_V1(hashdata_chaos_warehouse_begin); +Datum +hashdata_chaos_warehouse_begin(PG_FUNCTION_ARGS) +{ +#ifdef FAULT_INJECTOR + bool ret = false; + float8 threshold = PG_GETARG_FLOAT8(0); + text *pattern = cstring_to_text("chaos_%"); + + if (threshold > 1 || threshold < 0) + PG_RETURN_BOOL(ret); + + LWLockAcquire(chaosLock, LW_EXCLUSIVE); + if (chaosShmem->numWarehouses == 0 && + chaosShmem->threshold == 0.0) + { + Relation rel; + SysScanDesc scan; + HeapTuple tuple; + + rel = table_open(GpWarehouseRelationId, AccessShareLock); + scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); + + while ((tuple = systable_getnext(scan)) != NULL) + { + if (HeapTupleIsValid(tuple)) + { + text *warehouse_name = &((Form_gp_warehouse)GETSTRUCT(tuple))->warehouse_name; + Oid warehouseOid = ((Form_gp_warehouse)GETSTRUCT(tuple))->oid; + + if (OidFunctionCall2(F_LIKE_TEXT_TEXT, + PointerGetDatum(warehouse_name), + PointerGetDatum(pattern))) + { + chaosShmem->oidWarehouses[chaosShmem->numWarehouses++] = warehouseOid; + } + } + + if (chaosShmem->numWarehouses >= MAX_WAREHOUSE_SIZE) + break; + } + + chaosShmem->threshold = threshold; + systable_endscan(scan); + table_close(rel, NoLock); + ret = (chaosShmem->numWarehouses != 0); + } + LWLockRelease(chaosLock); + + PG_RETURN_BOOL(ret); +#else + ereport(WARNING, + (errcode(ERRCODE_CHAOS_FRAMEWORK), + errmsg("chaos framework disable, enable with flags --enable-faultinjector "))); + + PG_RETURN_BOOL(false); +#endif +} + +PG_FUNCTION_INFO_V1(hashdata_chaos_warehouse_end); +Datum hashdata_chaos_warehouse_end(PG_FUNCTION_ARGS) +{ +#ifdef FAULT_INJECTOR + LWLockAcquire(chaosLock, LW_EXCLUSIVE); + MemSet(chaosShmem, 0, sizeof(HashdataChaosShmem_s)); + LWLockRelease(chaosLock); + + PG_RETURN_BOOL(true); +#else + ereport(WARNING, + (errcode(ERRCODE_CHAOS_FRAMEWORK), + errmsg("chaos framework disable, enable with flags --enable-faultinjector "))); + + PG_RETURN_BOOL(false); +#endif +} + +static bool +generate_cmd(Oid id, char *buffer, int lengh) +{ + bool success = false; + HeapTuple tuple; + Relation rel; + ScanKeyData key[1]; + SysScanDesc scan; + + ScanKeyInit(&key[0], + Anum_gp_warehouse_oid, + BTEqualStrategyNumber, F_OIDEQ, + id); + rel = table_open(GpWarehouseRelationId, AccessShareLock); + scan = systable_beginscan(rel, GpWarehouseOidIndexId, true, NULL, 1, key); + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + text *warehouse_name = &((Form_gp_warehouse) GETSTRUCT(tuple))->warehouse_name; + snprintf(buffer, lengh,"set hashdata.warehouse to %s;", text_to_cstring(warehouse_name)); + success = true; + } + systable_endscan(scan); + table_close(rel, NoLock); + + return success; +} + +static void +hashdata_chaos_simple_query_hook(void (*exec_simple_query)(const char *), + void *whereToSendOutput) +{ + static char cmd[MAXPATHLEN]; + bool doswitch = false; + + if (Gp_role != GP_ROLE_DISPATCH) + { + return; + } + + start_xact_command(); + memset(cmd, 0, MAXPATHLEN); + + if (!IsTransactionState()) + { + return; + } + + LWLockAcquire(chaosLock, LW_SHARED); + if (chaosShmem->numWarehouses > 0 && chaosShmem->threshold > 0) + { + float8 lottery = DatumGetFloat8(OidFunctionCall0(F_RANDOM)); + doswitch = DatumGetBool(OidFunctionCall2(F_FLOAT8LT, + Float8GetDatum(lottery), + Float8GetDatum(chaosShmem->threshold))); + if (doswitch) + { + int warehouseid = random() % chaosShmem->numWarehouses; + doswitch = generate_cmd(chaosShmem->oidWarehouses[warehouseid], + cmd, MAXPATHLEN); + ereport(LOG, + (errcode(ERRCODE_CHAOS_FRAMEWORK), + errmsg("auto switch warehouse: %s", cmd))); + } + } + LWLockRelease(chaosLock); + + if (doswitch) + { + CommandDest orig_whereToSendOutput = *(CommandDest *)whereToSendOutput; + *(CommandDest *)whereToSendOutput = DestNone; + exec_simple_query(cmd); + *(CommandDest *)whereToSendOutput = orig_whereToSendOutput; + } +} \ No newline at end of file diff --git a/contrib/hashdata_chaos/hashdata_chaos.control b/contrib/hashdata_chaos/hashdata_chaos.control new file mode 100644 index 00000000000..2b0336af472 --- /dev/null +++ b/contrib/hashdata_chaos/hashdata_chaos.control @@ -0,0 +1,5 @@ +# hashdata_chaos extension +comment = 'chaos switch warehouse' +default_version = '1.0' +module_pathname = '$libdir/hashdata_chaos' +relocatable = true \ No newline at end of file diff --git a/contrib/hashdata_chaos/reghouse.c b/contrib/hashdata_chaos/reghouse.c new file mode 100644 index 00000000000..b4a04074165 --- /dev/null +++ b/contrib/hashdata_chaos/reghouse.c @@ -0,0 +1,99 @@ +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "catalog/gp_warehouse.h" +#include "utils/builtins.h" +#include "utils/fmgrprotos.h" +#include "utils/syscache.h" + +typedef Oid reghouse; + +PG_FUNCTION_INFO_V1(reghousein); +PG_FUNCTION_INFO_V1(reghouseout); +PG_FUNCTION_INFO_V1(reghousesend); +PG_FUNCTION_INFO_V1(reghouserecv); + +Datum +reghouserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +Datum +reghousesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +Datum +reghousein(PG_FUNCTION_ARGS) +{ + char *house_name_or_oid = PG_GETARG_CSTRING(0); + reghouse result = InvalidOid; + HeapTuple tuple; + + /* '-' ? */ + if (strcmp(house_name_or_oid, "-") == 0) + PG_RETURN_OID(InvalidOid); + + /* Numeric OID? */ + if (house_name_or_oid[0] >= '0' && + house_name_or_oid[0] <= '9' && + strspn(house_name_or_oid, "0123456789") == strlen(house_name_or_oid)) + { + result = DatumGetObjectId(DirectFunctionCall1(oidin, + CStringGetDatum(house_name_or_oid))); + PG_RETURN_OID(result); + } + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "reghouse values must be OIDs in bootstrap mode"); + + + tuple = SearchSysCache1(GPWAREHOUSENAME, + PointerGetDatum(cstring_to_text(house_name_or_oid))); + + if (HeapTupleIsValid(tuple)) + { + Form_gp_warehouse form = (Form_gp_warehouse) GETSTRUCT(tuple); + result = form->oid; + } + + ReleaseSysCache(tuple); + + PG_RETURN_OID(result); +} + +Datum +reghouseout(PG_FUNCTION_ARGS) +{ + reghouse houseoid = PG_GETARG_OID(0); + HeapTuple tuple; + char *result; + if (houseoid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + tuple = SearchSysCache1(GPWAREHOUSEOID, ObjectIdGetDatum(houseoid)); + if (HeapTupleIsValid(tuple)) + { + Form_gp_warehouse form = (Form_gp_warehouse) GETSTRUCT(tuple); + result = text_to_cstring(&form->warehouse_name); + + ReleaseSysCache(tuple); + } + else + { + /* If OID doesn't match any pg_proc entry, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", houseoid); + } + + PG_RETURN_CSTRING(result); +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e146453c77d..b6543c95afe 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -158,6 +158,11 @@ exec_simple_query_hook_type exec_simple_query_hook = NULL; */ HandleTxnCommand_hook_type HandleTxnCommand_hook = NULL; +/* + * Hook for plugins to process query + */ +exec_simple_query_hook execSimpleQuery_Hook = NULL; + /* ---------------- * private typedefs etc * ---------------- @@ -5704,7 +5709,12 @@ PostgresMain(int argc, char *argv[], else if (exec_simple_query_hook) exec_simple_query_hook(query_string); else + { + if (execSimpleQuery_Hook) + execSimpleQuery_Hook(&exec_simple_query, &whereToSendOutput); + exec_simple_query(query_string); + } send_ready_for_query = true; } diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index 6f285e87e4b..9127ac401a1 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -520,3 +520,4 @@ XX002 E ERRCODE_INDEX_CORRUPTED ind # This is used for ERRORs induced on purpose for testing purposes. Shouldn't # appear in production, only in regression tests. XX009 E ERRCODE_FAULT_INJECT fault_inject +XX00A E ERRCODE_CHAOS_FRAMEWORK chaos_framework \ No newline at end of file diff --git a/src/include/postgres.h b/src/include/postgres.h index ce0d0217721..9c69693bb1b 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -605,4 +605,6 @@ extern void ExceptionalCondition(const char *conditionName, const char *errorType, const char *fileName, int lineNumber) pg_attribute_noreturn(); +typedef void (*exec_simple_query_hook) (void (*exec)(const char *), void *whereToSendOutput); +extern PGDLLIMPORT exec_simple_query_hook execSimpleQuery_Hook; #endif /* POSTGRES_H */ diff --git a/src/include/utils/process_shared_preload_libraries.h b/src/include/utils/process_shared_preload_libraries.h index cf1548abb78..c0eb8a7ae3a 100644 --- a/src/include/utils/process_shared_preload_libraries.h +++ b/src/include/utils/process_shared_preload_libraries.h @@ -10,3 +10,18 @@ #ifdef USE_DATALAKE "datalake_proxy", #endif +#ifdef USE_DFS_TABLESPACE + "dfs_tablespace", +#endif +#ifdef SERVERLESS + "hashdata", +#endif +#ifdef UNIONSTORE + "unionstore", +#endif +#ifdef FAULT_INJECTOR + "hashdata_chaos" +#endif +#ifdef USE_VECTORIZATION + "vectorization", +#endif From 872cdd4f7db473cd57f31aca228f5ff49e3d21e6 Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 27 Mar 2024 09:22:38 +0800 Subject: [PATCH 084/152] Add serial schedule CI for heap table Add new hook func_exec_location_hook to get contol in func_exec_location. For heap table which is stored in UnionStore, only QD could access and modify the data, along with the indexes. The index related functions should also be executed on QD, not on QE. For brin index, we set the execution location flag of functions brin_desummarize_range, brin_summarize_new_values_internal, brin_summarize_range_internal to PROEXECLOCATION_COORDINATOR, which means executed on QD, and also for function gin_clean_pending_list of gin index. --- src/backend/commands/vacuum.c | 10 +++++++--- src/backend/optimizer/plan/createplan.c | 4 +++- src/backend/utils/cache/lsyscache.c | 5 +++++ src/include/utils/lsyscache.h | 4 ++++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index ffdc150630c..5d3e5e52e7d 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1694,9 +1694,13 @@ vac_update_relstats(Relation relation, { if (Gp_role == GP_ROLE_DISPATCH) { - num_pages = relation->rd_rel->relpages; - num_tuples = relation->rd_rel->reltuples; - num_all_visible_pages = relation->rd_rel->relallvisible; + if (GpPolicyIsPartitioned(relation->rd_cdbpolicy) || + GpPolicyIsReplicated(relation->rd_cdbpolicy)) + { + num_pages = relation->rd_rel->relpages; + num_tuples = relation->rd_rel->reltuples; + num_all_visible_pages = relation->rd_rel->relallvisible; + } } else if (Gp_role == GP_ROLE_EXECUTE) { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 628f7329ace..f62576f2b8e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -622,11 +622,13 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } - Assert(best_path->parallel_workers == best_path->locus.parallel_workers); + Assert(CdbPathLocus_IsEntry(best_path->locus) || + best_path->parallel_workers == best_path->locus.parallel_workers); if (plan->locustype == CdbLocusType_Null) { plan->locustype = best_path->locus.locustype; } + plan->parallel = best_path->locus.parallel_workers; return plan; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index ceb7472cd52..c541736fde4 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -67,6 +67,8 @@ /* Hook for plugins to get control in get_attavgwidth() */ get_attavgwidth_hook_type get_attavgwidth_hook = NULL; +/* Hook for plugins to get control in func_exec_location() */ +func_exec_location_hook_type func_exec_location_hook = NULL; /* ---------- AMOP CACHES ---------- */ @@ -2499,6 +2501,9 @@ func_exec_location(Oid funcid) char result; bool isnull; + if (func_exec_location_hook) + return (*func_exec_location_hook)(funcid); + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for function %u", funcid); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index afdb2680a5a..a6111a9cc4f 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -80,6 +80,10 @@ typedef struct AttStatsSlot typedef int32 (*get_attavgwidth_hook_type) (Oid relid, AttrNumber attnum); extern PGDLLIMPORT get_attavgwidth_hook_type get_attavgwidth_hook; +/* Hook for plugins to get control in func_exec_location() */ +typedef char (*func_exec_location_hook_type) (Oid funcid); +extern PGDLLIMPORT func_exec_location_hook_type func_exec_location_hook; + extern bool op_in_opfamily(Oid opno, Oid opfamily); extern int get_op_opfamily_strategy(Oid opno, Oid opfamily); extern Oid get_op_opfamily_sortfamily(Oid opno, Oid opfamily); From c31e19f8537f0600fe42ddb468042316a89b466c Mon Sep 17 00:00:00 2001 From: leo Date: Sun, 7 Apr 2024 09:59:38 +0800 Subject: [PATCH 085/152] Feature: support warehouse access control management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add access control management for warehouse to enhance the security and resource isolation. Superusers and roles with create warehouse privileges can create warehouse, the privileges can be granted to roles by: create role role_name CREATEWH; alter role role_name CREATEWH; and the privileges can also be revoked from roles by: alter role role_name NOCREATEWH; Superusers、owner of the warehouse or role who is the member of owner of warehouse can alter/drop/use warehouse. We can change the owner of warehouse through alter warehouse statement: alter warehouse warehouse_name owner to role_name; and we can also grant/revoke usage on warehouse to/from role. --- src/backend/catalog/aclchk.c | 69 ++++++++++++++++++++++++---- src/backend/catalog/dependency.c | 11 ++++- src/backend/catalog/objectaddress.c | 28 +++++++++++ src/backend/commands/alter.c | 7 +++ src/backend/commands/event_trigger.c | 6 +++ src/backend/commands/seclabel.c | 1 + src/backend/commands/tablecmds.c | 1 + src/backend/commands/user.c | 34 ++++++++++++++ src/backend/parser/gram.y | 23 ++++++++++ src/backend/tcop/utility.c | 3 ++ src/backend/utils/adt/acl.c | 7 +++ src/bin/pg_dump/pg_dumpall.c | 13 +++++- src/bin/psql/tab-complete.c | 37 +++++++-------- src/include/catalog/dependency.h | 5 +- src/include/catalog/gp_warehouse.h | 2 + src/include/catalog/pg_authid.dat | 24 +++++----- src/include/catalog/pg_authid.h | 1 + src/include/nodes/parsenodes.h | 5 +- src/include/utils/acl.h | 15 ++++++ 19 files changed, 248 insertions(+), 44 deletions(-) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 219f64f5298..e8f7b196a29 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -27,6 +27,7 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/gp_storage_server.h" +#include "catalog/gp_warehouse.h" #include "catalog/heap.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" @@ -95,6 +96,9 @@ */ bool revoked_something = false; +/* Hook for plugins to get control in ExecGrantStmt_oids() */ +ExecGrantStmt_oids_hook_type ExecGrantStmt_oids_hook = NULL; + /* * Internal format used by ALTER DEFAULT PRIVILEGES. */ @@ -148,11 +152,6 @@ static void expand_all_col_privileges(Oid table_oid, Form_pg_class classForm, int num_col_privileges); static AclMode string_to_privilege(const char *privname); static const char *privilege_to_string(AclMode privilege); -static AclMode restrict_and_check_grant(bool is_grant, AclMode avail_goptions, - bool all_privs, AclMode privileges, - Oid objectId, Oid grantorId, - ObjectType objtype, const char *objname, - AttrNumber att_number, const char *colname); static AclMode pg_aclmask(ObjectType objtype, Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mask, AclMaskHow how); static void recordExtensionInitPriv(Oid objoid, Oid classoid, int objsubid, @@ -168,7 +167,7 @@ static void recordExtensionInitPrivWorker(Oid objoid, Oid classoid, int objsubid * * NB: the original old_acl is pfree'd. */ -static Acl * +Acl * merge_acl_with_grant(Acl *old_acl, bool is_grant, bool grant_option, DropBehavior behavior, List *grantees, AclMode privileges, @@ -227,7 +226,7 @@ merge_acl_with_grant(Acl *old_acl, bool is_grant, * Restrict the privileges to what we can actually grant, and emit * the standards-mandated warning and error messages. */ -static AclMode +AclMode restrict_and_check_grant(bool is_grant, AclMode avail_goptions, bool all_privs, AclMode privileges, Oid objectId, Oid grantorId, ObjectType objtype, const char *objname, @@ -284,6 +283,9 @@ restrict_and_check_grant(bool is_grant, AclMode avail_goptions, bool all_privs, case OBJECT_EXTPROTOCOL: whole_mask = ACL_ALL_RIGHTS_EXTPROTOCOL; break; + case OBJECT_WAREHOUSE: + whole_mask = ACL_ALL_RIGHTS_WAREHOUSE; + break; default: elog(ERROR, "unrecognized object type: %d", objtype); /* not reached, but keep compiler quiet */ @@ -540,6 +542,10 @@ ExecuteGrantStmt(GrantStmt *stmt) all_privileges = ACL_ALL_RIGHTS_EXTPROTOCOL; errormsg = gettext_noop("invalid privilege type %s for external protocol"); break; + case OBJECT_WAREHOUSE: + all_privileges = ACL_ALL_RIGHTS_WAREHOUSE; + errormsg = gettext_noop("invalid privilege type %s for warehouse"); + break; default: elog(ERROR, "unrecognized GrantStmt.objtype: %d", (int) stmt->objtype); @@ -649,8 +655,8 @@ ExecuteGrantStmt(GrantStmt *stmt) * * Internal entry point for granting and revoking privileges. */ -static void -ExecGrantStmt_oids(InternalGrant *istmt) +void +ExecGrantStmt_oids_internal(InternalGrant *istmt) { switch (istmt->objtype) { @@ -691,6 +697,11 @@ ExecGrantStmt_oids(InternalGrant *istmt) case OBJECT_EXTPROTOCOL: ExecGrant_ExtProtocol(istmt); break; + case OBJECT_WAREHOUSE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("warehouse features are not supported"))); + break; default: elog(ERROR, "unrecognized GrantStmt.objtype: %d", (int) istmt->objtype); @@ -706,6 +717,15 @@ ExecGrantStmt_oids(InternalGrant *istmt) EventTriggerCollectGrant(istmt); } +static void +ExecGrantStmt_oids(InternalGrant *istmt) +{ + if (ExecGrantStmt_oids_hook) + return (*ExecGrantStmt_oids_hook)(istmt); + + return ExecGrantStmt_oids_internal(istmt); +} + /* * objectNamesToOids * @@ -890,6 +910,26 @@ objectNamesToOids(ObjectType objtype, List *objnames) objects = lappend_oid(objects, ptcid); } break; + case OBJECT_WAREHOUSE: + foreach(cell, objnames) + { + char *warehouse_name = strVal(lfirst(cell)); + Oid warehouse_oid; + + HeapTuple tuple = SearchSysCache1(GPWAREHOUSENAME, + PointerGetDatum(cstring_to_text(warehouse_name))); + if (HeapTupleIsValid(tuple)) + { + warehouse_oid = ((Form_gp_warehouse) GETSTRUCT(tuple))->oid; + ReleaseSysCache(tuple); + } + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("warehouse \"%s\" does not exist", warehouse_name))); + objects = lappend_oid(objects, warehouse_oid); + } + break; default: elog(ERROR, "unrecognized GrantStmt.objtype: %d", (int) objtype); @@ -1647,6 +1687,9 @@ RemoveRoleFromObjectACL(Oid roleid, Oid classid, Oid objid) case ExtprotocolRelationId: istmt.objtype = OBJECT_EXTPROTOCOL; break; + case GpWarehouseRelationId: + istmt.objtype = OBJECT_WAREHOUSE; + break; default: elog(ERROR, "unexpected object class %u", classid); break; @@ -3796,6 +3839,9 @@ aclcheck_error(AclResult aclerr, ObjectType objtype, case OBJECT_TAG: msg = gettext_noop("permission denied for tag %s"); break; + case OBJECT_WAREHOUSE: + msg = gettext_noop("permission denied for warehouse %s"); + break; /* these currently aren't used */ case OBJECT_ACCESS_METHOD: case OBJECT_AMOP: @@ -3934,6 +3980,9 @@ aclcheck_error(AclResult aclerr, ObjectType objtype, case OBJECT_EXTPROTOCOL: msg = gettext_noop("must be owner of external protocol %s"); break; + case OBJECT_WAREHOUSE: + msg = gettext_noop("must be owner of warehouse %s"); + break; /* * Special cases: For these, the error message talks @@ -4040,6 +4089,8 @@ pg_aclmask(ObjectType objtype, Oid table_oid, AttrNumber attnum, Oid roleid, return pg_class_aclmask(table_oid, roleid, mask, how); case OBJECT_DATABASE: return pg_database_aclmask(table_oid, roleid, mask, how); + case OBJECT_WAREHOUSE: + return mask; case OBJECT_FUNCTION: return pg_proc_aclmask(table_oid, roleid, mask, how); case OBJECT_LANGUAGE: diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 71a21135afd..1d8259d8d91 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -21,6 +21,7 @@ #include "catalog/dependency.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" +#include "catalog/gp_warehouse.h" #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/main_manifest.h" @@ -218,7 +219,8 @@ static const Oid object_classes[] = { ExtprotocolRelationId, /* OCLASS_EXTPROTOCOL */ GpMatviewAuxId, /* OCLASS_MATVIEW_AUX */ TaskRelationId, /* OCLASS_TASK */ - ManifestRelationId /* MAIN_MANIFEST */ + ManifestRelationId, /* MAIN_MANIFEST */ + GpWarehouseRelationId /* OCLASS_WAREHOUSE */ }; @@ -1638,6 +1640,7 @@ doDeletion(const ObjectAddress *object, int flags) case OCLASS_STORAGE_USER_MAPPING: case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: + case OCLASS_WAREHOUSE: elog(ERROR, "global objects cannot be deleted by doDeletion"); break; @@ -3050,6 +3053,12 @@ getObjectClass(const ObjectAddress *object) case ManifestRelationId: return OCLASS_MAIN_MANIFEST; + case ManifestRelationId: + return OCLASS_MAIN_MANIFEST; + + case GpWarehouseRelationId: + return OCLASS_WAREHOUSE; + default: { struct CustomObjectClass *coc; diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 313c08e2979..35d99ac065d 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -23,6 +23,7 @@ #include "catalog/catalog.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" +#include "catalog/gp_warehouse.h" #include "catalog/objectaddress.h" #include "catalog/pg_am.h" #include "catalog/pg_amop.h" @@ -2460,6 +2461,7 @@ pg_get_object_address(PG_FUNCTION_ARGS) case OBJECT_RESGROUP: case OBJECT_RESQUEUE: case OBJECT_PROFILE: + case OBJECT_WAREHOUSE: if (list_length(name) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -4282,6 +4284,27 @@ getObjectDescription(const ObjectAddress *object, bool missing_ok) break; } + case OCLASS_WAREHOUSE: + { + char *warehouse_name = NULL; + HeapTuple tuple; + + tuple = SearchSysCache1(GPWAREHOUSEOID, ObjectIdGetDatum(object->objectId)); + if (HeapTupleIsValid(tuple)) + { + warehouse_name = text_to_cstring(&((Form_gp_warehouse) GETSTRUCT(tuple))->warehouse_name); + ReleaseSysCache(tuple); + } + if (!warehouse_name) + { + if (!missing_ok) + elog(ERROR, "cache lookup failed for warehouse %u", + object->objectId); + break; + } + appendStringInfo(&buffer, _("warehouse %s"), warehouse_name); + break; + } default: { struct CustomObjectClass *coc; @@ -4909,6 +4932,10 @@ getObjectTypeDescription(const ObjectAddress *object, bool missing_ok) */ break; + case OCLASS_WAREHOUSE: + appendStringInfoString(&buffer, "warehouse"); + break; + default: { struct CustomObjectClass *coc; @@ -6374,6 +6401,7 @@ getObjectIdentityParts(const ObjectAddress *object, } case OCLASS_MAIN_MANIFEST: + case OCLASS_WAREHOUSE: break; default: diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index 71f9eca7a3c..4b994cbb1ed 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -743,6 +743,7 @@ AlterObjectNamespace_oid(Oid classId, Oid objid, Oid nspOid, case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: case OCLASS_MAIN_MANIFEST: + case OCLASS_WAREHOUSE: /* ignore object types that don't have schema-qualified names */ break; @@ -1005,6 +1006,12 @@ ExecAlterOwnerStmt_internal(AlterOwnerStmt *stmt) } break; + case OBJECT_WAREHOUSE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("warehouse features are not supported"))); + break; + default: elog(ERROR, "unrecognized AlterOwnerStmt type: %d", (int) stmt->objectType); diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 296580f10d9..bd69c61660d 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -943,6 +943,7 @@ EventTriggerSupportsObjectType(ObjectType obtype) case OBJECT_PROFILE: case OBJECT_STORAGE_SERVER: case OBJECT_STORAGE_USER_MAPPING: + case OBJECT_WAREHOUSE: /* no support for global objects */ return false; case OBJECT_EVENT_TRIGGER: @@ -1033,6 +1034,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) case OCLASS_STORAGE_USER_MAPPING: case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: + case OCLASS_WAREHOUSE: /* no support for global objects */ return false; case OCLASS_EVENT_TRIGGER: @@ -2143,6 +2145,8 @@ stringify_grant_objtype(ObjectType objtype) return "TABLESPACE"; case OBJECT_TYPE: return "TYPE"; + case OBJECT_WAREHOUSE: + return "WAREHOUSE"; /* these currently aren't used */ case OBJECT_ACCESS_METHOD: case OBJECT_AGGREGATE: @@ -2234,6 +2238,8 @@ stringify_adefprivs_objtype(ObjectType objtype) return "TABLESPACES"; case OBJECT_TYPE: return "TYPES"; + case OBJECT_WAREHOUSE: + return "WAREHOUSE"; /* these currently aren't used */ case OBJECT_ACCESS_METHOD: case OBJECT_AGGREGATE: diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index df2727d5df0..19626318a06 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -99,6 +99,7 @@ SecLabelSupportsObjectType(ObjectType objtype) case OBJECT_RESGROUP: case OBJECT_STORAGE_SERVER: case OBJECT_STORAGE_USER_MAPPING: + case OBJECT_WAREHOUSE: return false; /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b1e3e1846c8..9d9f9e0c47e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14476,6 +14476,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, case OCLASS_TAG: case OCLASS_TAG_DESCRIPTION: case OCLASS_MAIN_MANIFEST: + case OCLASS_WAREHOUSE: /* * We don't expect any of these sorts of objects to depend on diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 39f23c2b849..52b05cd0df4 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -122,6 +122,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) bool inherit = true; /* Auto inherit privileges? */ bool createrole = false; /* Can this user create roles? */ bool createdb = false; /* Can the user create databases? */ + bool createwh = false; /* Can the user create warehouse? */ bool canlogin = false; /* Can this user login? */ bool isreplication = false; /* Is this a replication role? */ bool createrextgpfd = false; /* Can create readable gpfdist exttab? */ @@ -151,6 +152,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) DefElem *dinherit = NULL; DefElem *dcreaterole = NULL; DefElem *dcreatedb = NULL; + DefElem *dcreatewh = NULL; DefElem *dcanlogin = NULL; DefElem *disreplication = NULL; DefElem *dconnlimit = NULL; @@ -234,6 +236,15 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) parser_errposition(pstate, defel->location))); dcreatedb = defel; } + else if (strcmp(defel->defname, "createwh") == 0) + { + if (dcreatewh) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + dcreatewh = defel; + } else if (strcmp(defel->defname, "canlogin") == 0) { if (dcanlogin) @@ -388,6 +399,8 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) createrole = intVal(dcreaterole->arg) != 0; if (dcreatedb) createdb = intVal(dcreatedb->arg) != 0; + if (dcreatewh) + createwh = intVal(dcreatewh->arg) != 0; if (dcanlogin) canlogin = intVal(dcanlogin->arg) != 0; if (disreplication) @@ -565,6 +578,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(inherit); new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(createrole); new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(createdb); + new_record[Anum_pg_authid_rolcreatewh - 1] = BoolGetDatum(createwh); new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(canlogin); new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(isreplication); new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit); @@ -931,6 +945,7 @@ AlterRole(AlterRoleStmt *stmt) int inherit = -1; /* Auto inherit privileges? */ int createrole = -1; /* Can this user create roles? */ int createdb = -1; /* Can the user create databases? */ + int createwh = -1; /* Can the use create warehouse? */ int canlogin = -1; /* Can this user login? */ int isreplication = -1; /* Is this a replication role? */ int connlimit = -1; /* maximum connections allowed */ @@ -953,6 +968,7 @@ AlterRole(AlterRoleStmt *stmt) DefElem *dinherit = NULL; DefElem *dcreaterole = NULL; DefElem *dcreatedb = NULL; + DefElem *dcreatewh = NULL; DefElem *dcanlogin = NULL; DefElem *disreplication = NULL; DefElem *dconnlimit = NULL; @@ -1043,6 +1059,15 @@ AlterRole(AlterRoleStmt *stmt) dcreatedb = defel; if (1 == numopts) alter_subtype = "CREATEDB"; } + else if (strcmp(defel->defname, "createwh") == 0) + { + if (dcreatewh) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + dcreatewh = defel; + if (1 == numopts) alter_subtype = "CREATEWH"; + } else if (strcmp(defel->defname, "canlogin") == 0) { if (dcanlogin) @@ -1192,6 +1217,8 @@ AlterRole(AlterRoleStmt *stmt) createrole = intVal(dcreaterole->arg); if (dcreatedb) createdb = intVal(dcreatedb->arg); + if (dcreatewh) + createwh = intVal(dcreatewh->arg); if (dcanlogin) canlogin = intVal(dcanlogin->arg); if (disreplication) @@ -1310,6 +1337,7 @@ AlterRole(AlterRoleStmt *stmt) if (!(inherit < 0 && createrole < 0 && createdb < 0 && + createwh < 0 && canlogin < 0 && !dconnlimit && !rolemembers && @@ -1410,6 +1438,12 @@ AlterRole(AlterRoleStmt *stmt) new_record_repl[Anum_pg_authid_rolcreatedb - 1] = true; } + if (createwh >= 0) + { + new_record[Anum_pg_authid_rolcreatewh - 1] = BoolGetDatum(createwh > 0); + new_record_repl[Anum_pg_authid_rolcreatewh - 1] = true; + } + if (canlogin >= 0) { new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(canlogin > 0); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 14df7115701..fa5540a0062 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -1969,6 +1969,10 @@ AlterOptRoleElem: $$ = makeDefElem("createdb", (Node *)makeInteger(true), @1); else if (strcmp($1, "nocreatedb") == 0) $$ = makeDefElem("createdb", (Node *)makeInteger(false), @1); + else if (strcmp($1, "createwh") == 0) + $$ = makeDefElem("createwh", (Node *)makeInteger(true), @1); + else if (strcmp($1, "nocreatewh") == 0) + $$ = makeDefElem("createwh", (Node *)makeInteger(false), @1); else if (strcmp($1, "login") == 0) $$ = makeDefElem("canlogin", (Node *)makeInteger(true), @1); else if (strcmp($1, "nologin") == 0) @@ -10659,6 +10663,14 @@ privilege_target: n->objs = $2; $$ = n; } + | WAREHOUSE name_list + { + PrivTarget *n = (PrivTarget *) palloc(sizeof(PrivTarget)); + n->targtype = ACL_TARGET_OBJECT; + n->objtype = OBJECT_WAREHOUSE; + n->objs = $2; + $$ = n; + } | DOMAIN_P any_name_list { PrivTarget *n = (PrivTarget *) palloc(sizeof(PrivTarget)); @@ -13438,6 +13450,17 @@ AlterWarehouseStmt: n->options = NULL; $$ = (Node *)n; } + | + ALTER WAREHOUSE name OWNER TO RoleSpec + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_ALTER_OWNER; + n->whname = $3; + n->warehouse_size = 0; + n->newowner = $6; + n->options = NULL; + $$ = (Node *)n; + } ; /***************************************************************************** diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 7a076d81db0..9b251f268b2 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -3040,6 +3040,9 @@ AlterObjectTypeCommandTag(ObjectType objtype) case OBJECT_EXTPROTOCOL: tag = CMDTAG_ALTER_PROTOCOL; break; + case OBJECT_WAREHOUSE: + tag = CMDTAG_ALTER_WAREHOUSE; + break; default: tag = CMDTAG_UNKNOWN; break; diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index 714a536e93d..e7a470b2e27 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -804,6 +804,10 @@ acldefault(ObjectType objtype, Oid ownerId) world_default = ACL_USAGE; owner_default = ACL_ALL_RIGHTS_TYPE; break; + case OBJECT_WAREHOUSE: + world_default = ACL_NO_RIGHTS; + owner_default = ACL_ALL_RIGHTS_WAREHOUSE; + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); world_default = ACL_NO_RIGHTS; /* keep compiler quiet */ @@ -901,6 +905,9 @@ acldefault_sql(PG_FUNCTION_ARGS) case 'E': objtype = OBJECT_EXTPROTOCOL; break; + case 'W': + objtype = OBJECT_WAREHOUSE; + break; default: elog(ERROR, "unrecognized objtype abbreviation: %c", objtypec); } diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 54b857e2678..9bd8efa8d51 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -1144,6 +1144,7 @@ dumpRoles(PGconn *conn) i_rolinherit, i_rolcreaterole, i_rolcreatedb, + i_rolcreatewh = -1, /* keep compiler quiet */ i_rolcanlogin, i_rolconnlimit, i_rolpassword, @@ -1189,7 +1190,7 @@ dumpRoles(PGconn *conn) { printfPQExpBuffer(buf, "SELECT %s.oid, rolname, rolsuper, rolinherit, " - "rolcreaterole, rolcreatedb, " + "rolcreaterole, rolcreatedb, rolcreatewh, " "rolcanlogin, rolconnlimit, rolpassword, " "rolvaliduntil, rolreplication, rolbypassrls, " "rolenableprofile, prfname, rolaccountstatus, rolfailedlogins, " @@ -1303,6 +1304,8 @@ dumpRoles(PGconn *conn) i_rolinherit = PQfnumber(res, "rolinherit"); i_rolcreaterole = PQfnumber(res, "rolcreaterole"); i_rolcreatedb = PQfnumber(res, "rolcreatedb"); + if (server_version >= 140000) + i_rolcreatewh = PQfnumber(res, "rolcreatewh"); i_rolcanlogin = PQfnumber(res, "rolcanlogin"); i_rolconnlimit = PQfnumber(res, "rolconnlimit"); i_rolpassword = PQfnumber(res, "rolpassword"); @@ -1398,6 +1401,14 @@ dumpRoles(PGconn *conn) else appendPQExpBufferStr(buf, " NOCREATEDB"); + if (server_version >= 140000) + { + if (strcmp(PQgetvalue(res, i, i_rolcreatewh), "t") == 0) + appendPQExpBufferStr(buf, " CREATEWH"); + else + appendPQExpBufferStr(buf, " NOCREATEWH"); + } + if (strcmp(PQgetvalue(res, i, i_rolcanlogin), "t") == 0) appendPQExpBufferStr(buf, " LOGIN"); else diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 3593b78577d..23fd9a0ce2b 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1936,22 +1936,22 @@ psql_completion(const char *text, int start, int end) else if (Matches("ALTER", "USER|ROLE", MatchAny) && !TailMatches("USER", "MAPPING")) COMPLETE_WITH("BYPASSRLS", "CONNECTION LIMIT", "CREATEDB", "CREATEROLE", - "ENCRYPTED PASSWORD", "INHERIT", "LOGIN", "NOBYPASSRLS", - "NOCREATEDB", "NOCREATEROLE", "NOINHERIT", - "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", "PASSWORD", - "RENAME TO", "REPLICATION", "RESET", "SET", "SUPERUSER", - "VALID UNTIL", "WITH", "PROFILE", "ENABLE PROFILE", - "DISABLE PROFILE", "ACCOUNT"); + "CREATEWH", "ENCRYPTED PASSWORD", "INHERIT", "LOGIN", + "NOBYPASSRLS", "NOCREATEDB", "NOCREATEWH", "NOCREATEROLE", + "NOINHERIT", "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", + "PASSWORD","RENAME TO", "REPLICATION", "RESET", "SET", + "SUPERUSER", "VALID UNTIL", "WITH", "PROFILE", + "ENABLE PROFILE", "DISABLE PROFILE", "ACCOUNT"); /* ALTER USER,ROLE WITH */ else if (Matches("ALTER", "USER|ROLE", MatchAny, "WITH")) /* Similar to the above, but don't complete "WITH" again. */ COMPLETE_WITH("BYPASSRLS", "CONNECTION LIMIT", "CREATEDB", "CREATEROLE", - "ENCRYPTED PASSWORD", "INHERIT", "LOGIN", "NOBYPASSRLS", - "NOCREATEDB", "NOCREATEROLE", "NOINHERIT", - "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", "PASSWORD", - "RENAME TO", "REPLICATION", "RESET", "SET", "SUPERUSER", - "VALID UNTIL", "PROFILE", "ENABLE PROFILE", + "CREATEWH", "ENCRYPTED PASSWORD", "INHERIT", "LOGIN", + "NOBYPASSRLS", "NOCREATEDB", "NOCREATEWH", "NOCREATEROLE", + "NOINHERIT", "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", + "PASSWORD", "RENAME TO", "REPLICATION", "RESET", "SET", + "SUPERUSER", "VALID UNTIL", "PROFILE", "ENABLE PROFILE", "DISABLE PROFILE", "ACCOUNT"); else if (Matches("ALTER", "USER|ROLE", MatchAny) && TailMatches("ACCOUNT")) @@ -3091,9 +3091,9 @@ psql_completion(const char *text, int start, int end) else if (Matches("CREATE", "ROLE|GROUP|USER", MatchAny) && !TailMatches("USER", "MAPPING")) COMPLETE_WITH("ADMIN", "BYPASSRLS", "CONNECTION LIMIT", "CREATEDB", - "CREATEROLE", "ENCRYPTED PASSWORD", "IN", "INHERIT", - "LOGIN", "NOBYPASSRLS", - "NOCREATEDB", "NOCREATEROLE", "NOINHERIT", + "CREATEWH", "CREATEROLE", "ENCRYPTED PASSWORD", "IN", + "INHERIT", "LOGIN", "NOBYPASSRLS", + "NOCREATEDB", "NOCREATEWH", "NOCREATEROLE", "NOINHERIT", "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", "PASSWORD", "REPLICATION", "ROLE", "SUPERUSER", "SYSID", "VALID UNTIL", "WITH", "PROFILE", "ENABLE PROFILE", @@ -3103,9 +3103,9 @@ psql_completion(const char *text, int start, int end) else if (Matches("CREATE", "ROLE|GROUP|USER", MatchAny, "WITH")) /* Similar to the above, but don't complete "WITH" again. */ COMPLETE_WITH("ADMIN", "BYPASSRLS", "CONNECTION LIMIT", "CREATEDB", - "CREATEROLE", "ENCRYPTED PASSWORD", "IN", "INHERIT", - "LOGIN", "NOBYPASSRLS", - "NOCREATEDB", "NOCREATEROLE", "NOINHERIT", + "CREATEWH", "CREATEROLE", "ENCRYPTED PASSWORD", "IN", + "INHERIT", "LOGIN", "NOBYPASSRLS", + "NOCREATEDB", "NOCREATEWH", "NOCREATEROLE", "NOINHERIT", "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", "PASSWORD", "REPLICATION", "ROLE", "SUPERUSER", "SYSID", "VALID UNTIL", "PROFILE", "ENABLE PROFILE", @@ -3629,7 +3629,8 @@ psql_completion(const char *text, int start, int end) " UNION SELECT 'SEQUENCE'" " UNION SELECT 'TABLE'" " UNION SELECT 'TABLESPACE'" - " UNION SELECT 'TYPE'"); + " UNION SELECT 'TYPE'" + " UNION SELECT 'WAREHOUSE'"); } else if (TailMatches("GRANT|REVOKE", MatchAny, "ON", "ALL")) COMPLETE_WITH("FUNCTIONS IN SCHEMA", diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index def45120713..dc3597f1e4c 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -156,10 +156,11 @@ typedef enum ObjectClass OCLASS_EXTPROTOCOL, /* pg_extprotocol */ OCLASS_MATVIEW_AUX, /* gp_matview_aux */ OCLASS_TASK, /* pg_task */ - OCLASS_MAIN_MANIFEST /* main_manifest */ + OCLASS_MAIN_MANIFEST, /* main_manifest */ + OCLASS_WAREHOUSE /* gp_warehouse */ } ObjectClass; -#define LAST_OCLASS OCLASS_MAIN_MANIFEST +#define LAST_OCLASS OCLASS_WAREHOUSE /* flag bits for performDeletion/performMultipleDeletions: */ #define PERFORM_DELETION_INTERNAL 0x0001 /* internal action */ diff --git a/src/include/catalog/gp_warehouse.h b/src/include/catalog/gp_warehouse.h index 20c3f14e54c..b32aa02509e 100644 --- a/src/include/catalog/gp_warehouse.h +++ b/src/include/catalog/gp_warehouse.h @@ -39,10 +39,12 @@ CATALOG(gp_warehouse,8690,GpWarehouseRelationId) BKI_SHARED_RELATION { Oid oid BKI_FORCE_NOT_NULL; /* oid */ + Oid owner BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid); /* owner of warehouse */ int32 warehouse_size; /* warehouse size */ text warehouse_name BKI_FORCE_NOT_NULL; /* warehouse name */ #ifdef CATALOG_VARLEN /* variable-length fields start here */ text status BKI_FORCE_NOT_NULL; /* status */ + aclitem warehouse_acl[1]; /* access permissions */ #endif } FormData_gp_warehouse; diff --git a/src/include/catalog/pg_authid.dat b/src/include/catalog/pg_authid.dat index 3da68016b61..5f9ae948f3e 100644 --- a/src/include/catalog/pg_authid.dat +++ b/src/include/catalog/pg_authid.dat @@ -21,62 +21,62 @@ { oid => '10', oid_symbol => 'BOOTSTRAP_SUPERUSERID', rolname => 'POSTGRES', rolsuper => 't', rolinherit => 't', - rolcreaterole => 't', rolcreatedb => 't', rolcanlogin => 't', + rolcreaterole => 't', rolcreatedb => 't', rolcreatewh => 't', rolcanlogin => 't', rolreplication => 't', rolbypassrls => 't', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '6171', oid_symbol => 'ROLE_PG_DATABASE_OWNER', rolname => 'pg_database_owner', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '6181', oid_symbol => 'ROLE_PG_READ_ALL_DATA', rolname => 'pg_read_all_data', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '6182', oid_symbol => 'ROLE_PG_WRITE_ALL_DATA', rolname => 'pg_write_all_data', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '3373', oid_symbol => 'ROLE_PG_MONITOR', rolname => 'pg_monitor', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '3374', oid_symbol => 'ROLE_PG_READ_ALL_SETTINGS', rolname => 'pg_read_all_settings', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '3375', oid_symbol => 'ROLE_PG_READ_ALL_STATS', rolname => 'pg_read_all_stats', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '3377', oid_symbol => 'ROLE_PG_STAT_SCAN_TABLES', rolname => 'pg_stat_scan_tables', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '4569', oid_symbol => 'ROLE_PG_READ_SERVER_FILES', rolname => 'pg_read_server_files', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '4570', oid_symbol => 'ROLE_PG_WRITE_SERVER_FILES', rolname => 'pg_write_server_files', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '4571', oid_symbol => 'ROLE_PG_EXECUTE_SERVER_PROGRAM', rolname => 'pg_execute_server_program', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, { oid => '4200', oid_symbol => 'ROLE_PG_SIGNAL_BACKEND', rolname => 'pg_signal_backend', rolsuper => 'f', rolinherit => 't', - rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', + rolcreaterole => 'f', rolcreatedb => 'f', rolcreatewh => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, diff --git a/src/include/catalog/pg_authid.h b/src/include/catalog/pg_authid.h index 484181c581c..cda55bfd6b1 100644 --- a/src/include/catalog/pg_authid.h +++ b/src/include/catalog/pg_authid.h @@ -38,6 +38,7 @@ CATALOG(pg_authid,1260,AuthIdRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID(284 bool rolinherit; /* inherit privileges from other roles? */ bool rolcreaterole; /* allowed to create more roles? */ bool rolcreatedb; /* allowed to create databases? */ + bool rolcreatewh; /* allowed to create warehouse? */ bool rolcanlogin; /* allowed to log in as session user? */ bool rolreplication; /* role used for streaming replication */ bool rolbypassrls; /* bypasses row-level security? */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index d143909c65a..04f3b2b35fc 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1959,6 +1959,7 @@ typedef enum ObjectType OBJECT_USER_MAPPING, OBJECT_STORAGE_USER_MAPPING, OBJECT_VIEW, + OBJECT_WAREHOUSE, OBJECT_RESQUEUE, OBJECT_RESGROUP, OBJECT_DIRECTORY_TABLE @@ -4486,7 +4487,8 @@ typedef struct DropWarehouseStmt typedef enum AlterWarehouseType { ALTER_WAREHOUSE_OPTIONS, - ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE + ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE, + ALTER_WAREHOUSE_ALTER_OWNER } AlterWarehouseType; typedef struct AlterWarehouseStmt @@ -4495,6 +4497,7 @@ typedef struct AlterWarehouseStmt AlterWarehouseType kind; /* ALTER_WAREHOUSE_OPTIONS, etc */ char *whname; /* Name of the warehouse */ int warehouse_size; /* New size of warehouse if set warehouse_size command */ + RoleSpec *newowner; /* the new owner */ List *options; /* List of DefElem nodes */ } AlterWarehouseStmt; diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 223175099bd..c1a2a02b1e5 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -35,6 +35,7 @@ #include "access/htup.h" #include "nodes/parsenodes.h" #include "parser/parse_node.h" +#include "utils/aclchk_internal.h" #include "utils/snapshot.h" @@ -167,6 +168,7 @@ typedef struct ArrayType Acl; #define ACL_ALL_RIGHTS_SCHEMA (ACL_USAGE|ACL_CREATE) #define ACL_ALL_RIGHTS_TABLESPACE (ACL_CREATE) #define ACL_ALL_RIGHTS_TYPE (ACL_USAGE) +#define ACL_ALL_RIGHTS_WAREHOUSE (ACL_USAGE) /* operation codes for pg_*_aclmask */ typedef enum @@ -226,11 +228,24 @@ extern void initialize_acl(void); extern bool revoked_something; +/* Hook for plugins to get control in ExecGrantStmt_oids() */ +typedef void (*ExecGrantStmt_oids_hook_type) (InternalGrant *istmt); +extern PGDLLIMPORT ExecGrantStmt_oids_hook_type ExecGrantStmt_oids_hook; /* * prototypes for functions in aclchk.c */ +extern Acl *merge_acl_with_grant(Acl *old_acl, bool is_grant, + bool grant_option, DropBehavior behavior, + List *grantees, AclMode privileges, + Oid grantorId, Oid ownerId); +extern AclMode restrict_and_check_grant(bool is_grant, AclMode avail_goptions, + bool all_privs, AclMode privileges, + Oid objectId, Oid grantorId, + ObjectType objtype, const char *objname, + AttrNumber att_number, const char *colname); extern void ExecuteGrantStmt(GrantStmt *stmt); +extern void ExecGrantStmt_oids_internal(InternalGrant *istmt); extern void ExecAlterDefaultPrivilegesStmt(ParseState *pstate, AlterDefaultPrivilegesStmt *stmt); extern void RemoveRoleFromObjectACL(Oid roleid, Oid classid, Oid objid); From e6ede18fac6768210738b3f2b0f4d0a1f417af02 Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Sat, 1 Jun 2024 07:01:45 +0800 Subject: [PATCH 086/152] Collect catalog and dispatch to qes Dispatch the catalogs that need to be accessed from QD to QE, and no longer directly access system tables on QE. This will reduce the access pressure on the Union Store and the visibility issues caused by the inconsistency of the catalogs seen on QD and QE. QE receives the Plan from QD and converts it to PlanState by the ExecutorStart function. In this process, the Catalog needs to be read. QD will perform the same process to convert the Plan to PlanState. The ExecutorStart in QD and QE execute the same code and access the same Catalog. We can collect the Catalogs accessed by the ExecutorStart in QD and distribute them to QE. QD accesses the Catalog through 4 functions. SearchSysCache SearchSysCacheList systable_getnext systable_getnext_ordered. We embed code in these functions to collect Catalog tuples and distribute them to QE. QE uses hashtable and list to store the distributed data and implements a simple am. systable_getnext systable_getnext_ordered calls this simple am to access the catalog. --- src/backend/access/heap/heapam.c | 9 + src/backend/access/index/genam.c | 76 + src/backend/access/index/indexam.c | 28 +- src/backend/catalog/namespace.c | 13 +- src/backend/cdb/Makefile | 2 +- src/backend/cdb/cdbsreh.c | 21 +- src/backend/cdb/cdbtranscat.c | 227 +++ src/backend/cdb/dispatcher/cdbdisp_query.c | 38 +- src/backend/commands/analyze.c | 2 +- src/backend/commands/copyto.c | 3 + src/backend/commands/tablecmds.c | 8 +- src/backend/executor/execExpr.c | 34 +- src/backend/executor/execMain.c | 17 +- src/backend/executor/execPartition.c | 13 +- src/backend/executor/execProcnode.c | 4 +- src/backend/executor/execSRF.c | 4 + src/backend/executor/functions.c | 69 +- src/backend/executor/nodeIncrementalSort.c | 4 + src/backend/executor/nodeValuesscan.c | 6 +- src/backend/foreign/foreign.c | 5 + src/backend/nodes/outfast.c | 29 + src/backend/nodes/readfast.c | 36 + src/backend/nodes/readfuncs.c | 12 + src/backend/optimizer/plan/planner.c | 4 +- src/backend/optimizer/util/plancat.c | 3 +- src/backend/optimizer/util/predtest.c | 3 +- src/backend/tcop/postgres.c | 12 + src/backend/tcop/utility.c | 3 + src/backend/utils/adt/enum.c | 4 +- src/backend/utils/cache/catcache.c | 33 +- src/backend/utils/cache/evtcache.c | 2 +- src/backend/utils/cache/plancache.c | 4 + src/backend/utils/cache/relcache.c | 329 +++- src/backend/utils/cache/syscache.c | 16 +- src/backend/utils/cache/ts_cache.c | 6 +- src/backend/utils/cache/typcache.c | 29 +- src/backend/utils/mb/mbutils.c | 12 + src/backend/utils/misc/superuser.c | 5 +- src/backend/utils/mmgr/aset.c | 6 +- src/backend/utils/mmgr/mcxt.c | 1 + src/include/access/genam.h | 1 + src/include/cdb/cdbsreh.h | 2 + src/include/cdb/cdbtranscat.h | 112 ++ src/include/commands/trigger.h | 2 + src/include/executor/execExpr.h | 1 + src/include/executor/executor.h | 13 + src/include/executor/functions.h | 4 +- src/include/mb/pg_wchar.h | 1 + src/include/nodes/nodes.h | 3 +- src/include/nodes/parsenodes.h | 1 + src/include/pg_config_manual.h | 4 +- src/include/utils/typcache.h | 5 + src/pl/plpgsql/src/pl_comp.c | 3 +- src/pl/plpgsql/src/pl_exec.c | 1721 ++++++++++++++++- src/pl/plpgsql/src/pl_handler.c | 27 +- src/pl/plpgsql/src/plpgsql.h | 9 + .../regress/expected/gpctas_optimizer.out | 8 +- .../output/external_table_optimizer.source | 157 +- 58 files changed, 3001 insertions(+), 205 deletions(-) create mode 100644 src/backend/cdb/cdbtranscat.c create mode 100644 src/include/cdb/cdbtranscat.h diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e6f6ec9fec2..99a9d6f2bd8 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -73,6 +73,7 @@ #include "utils/spccache.h" #include "catalog/oid_dispatch.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "utils/guc.h" #include "utils/faultinjector.h" @@ -2755,6 +2756,8 @@ simple_heap_insert(Relation relation, HeapTuple tup) { heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL, GetCurrentTransactionId()); + + TransStoreTuple(tup); } /* @@ -3278,6 +3281,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) elog(ERROR, "unrecognized heap_delete status: %u", result); break; } + + TransRemoveTuple(RelationGetRelid(relation), *tid); } /* @@ -4377,6 +4382,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) TM_FailureData tmfd; LockTupleMode lockmode; + TransRemoveTuple(tup->t_tableOid, *otid); + result = heap_update_internal(relation, otid, tup, GetCurrentCommandId(true), InvalidSnapshot, true /* wait for commit */ , @@ -4405,6 +4412,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) elog(ERROR, "unrecognized heap_update status: %u", result); break; } + + TransStoreTuple(tup); } diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index ce226a48a68..fc88b393798 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -25,6 +25,7 @@ #include "access/tableam.h" #include "access/transam.h" #include "catalog/index.h" +#include "cdb/cdbtranscat.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -361,6 +362,21 @@ index_compute_xid_horizon_for_tuples(Relation irel, * ---------------------------------------------------------------- */ +static SysScanDesc +systable_beginscan_qe(Relation heapRelation, int nkeys, ScanKey key) +{ + SysScanDesc sysscan; + + sysscan = (SysScanDesc) palloc0(sizeof(SysScanDescData)); + + sysscan->heap_rel = heapRelation; + sysscan->slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsHeapTuple); + sysscan->scan = systup_store_beginscan(heapRelation, nkeys, key, false); + + return sysscan; +} + /* * systable_beginscan --- set up for heap-or-index scan * @@ -390,6 +406,9 @@ systable_beginscan(Relation heapRelation, SysScanDesc sysscan; Relation irel; + if (systup_store_active()) + return systable_beginscan_qe(heapRelation, nkeys, key); + if (indexOK && !IgnoreSystemIndexes && !ReindexIsProcessingIndex(indexId)) @@ -487,6 +506,22 @@ HandleConcurrentAbort() errmsg("transaction aborted during system catalog scan"))); } +static HeapTuple +systable_getnext_qe(SysScanDesc sysscan) +{ + HeapTuple htup = NULL; + + if (systup_store_getnextslot(sysscan->scan, sysscan->slot)) + { + bool shouldFree; + + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree); + Assert(!shouldFree); + } + + return htup; +} + /* * systable_getnext --- get next tuple in a heap-or-index scan * @@ -504,6 +539,9 @@ systable_getnext(SysScanDesc sysscan) { HeapTuple htup = NULL; + if (systup_store_active()) + return systable_getnext_qe(sysscan); + if (sysscan->irel) { if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot)) @@ -542,6 +580,8 @@ systable_getnext(SysScanDesc sysscan) */ HandleConcurrentAbort(); + TransStoreTuple(htup); + return htup; } @@ -587,6 +627,22 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) return result; } + +static void +systable_endscan_qe(SysScanDesc sysscan) +{ + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + + systup_store_endscan(sysscan->scan); + if (sysscan->snapshot) + UnregisterSnapshot(sysscan->snapshot); + pfree(sysscan); +} + /* * systable_endscan --- close scan, release resources * @@ -595,6 +651,12 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) void systable_endscan(SysScanDesc sysscan) { + if (systup_store_active()) + { + systable_endscan_qe(sysscan); + return; + } + if (sysscan->slot) { ExecDropSingleTupleTableSlot(sysscan->slot); @@ -648,6 +710,9 @@ systable_beginscan_ordered(Relation heapRelation, SysScanDesc sysscan; int i; + if (systup_store_sorted_active()) + return systable_beginscan_qe(heapRelation, nkeys, key); + /* REINDEX can probably be a hard error here ... */ if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", @@ -709,6 +774,9 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) { HeapTuple htup = NULL; + if (systup_store_sorted_active()) + return systable_getnext_qe(sysscan); + Assert(sysscan->irel); if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot)) htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL); @@ -723,6 +791,8 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) */ HandleConcurrentAbort(); + TransStoreTuple(htup); + return htup; } @@ -732,6 +802,12 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) void systable_endscan_ordered(SysScanDesc sysscan) { + if (systup_store_sorted_active()) + { + systable_endscan_qe(sysscan); + return; + } + if (sysscan->slot) { ExecDropSingleTupleTableSlot(sysscan->slot); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index fc12e7819d6..de67affdb98 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -55,6 +55,7 @@ #include "catalog/index.h" #include "catalog/pg_amproc.h" #include "catalog/pg_type.h" +#include "cdb/cdbtranscat.h" #include "commands/defrem.h" #include "nodes/makefuncs.h" #include "pgstat.h" @@ -147,6 +148,26 @@ index_open(Oid relationId, LOCKMODE lockmode) return r; } +Relation +order_index_open(Oid relationId, LOCKMODE lockmode) +{ + Relation r; + + if (systup_store_sorted_active()) + return NULL; + + r = relation_open(relationId, lockmode); + + if (r->rd_rel->relkind != RELKIND_INDEX && + r->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index", + RelationGetRelationName(r)))); + + return r; +} + /* ---------------- * index_close - close an index relation * @@ -159,7 +180,12 @@ index_open(Oid relationId, LOCKMODE lockmode) void index_close(Relation relation, LOCKMODE lockmode) { - LockRelId relid = relation->rd_lockInfo.lockRelId; + LockRelId relid; + + if (!relation) + return; + + relid = relation->rd_lockInfo.lockRelId; Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES); diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 1512e550974..2747227de40 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -185,19 +185,19 @@ typedef struct static List *overrideStack = NIL; /* - * myTempNamespace is InvalidOid until and unless a TEMP namespace is set up + * my_temp_namespace is InvalidOid until and unless a TEMP namespace is set up * in a particular backend session (this happens when a CREATE TEMP TABLE * command is first executed). Thereafter it's the OID of the temp namespace. * - * myTempToastNamespace is the OID of the namespace for my temp tables' toast - * tables. It is set when myTempNamespace is, and is InvalidOid before that. + * my_temp_toast_namespace is the OID of the namespace for my temp tables' toast + * tables. It is set when my_temp_namespace is, and is InvalidOid before that. * * myTempNamespaceSubID shows whether we've created the TEMP namespace in the * current subtransaction. The flag propagates up the subtransaction tree, * so the main transaction will correctly recognize the flag if all * intermediate subtransactions commit. When it is InvalidSubTransactionId, * we either haven't made the TEMP namespace yet, or have successfully - * committed its creation, depending on whether myTempNamespace is valid. + * committed its creation, depending on whether my_temp_namespace is valid. */ Oid myTempNamespace = InvalidOid; @@ -3465,11 +3465,6 @@ GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId) void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId) { - /* Worker should not have created its own namespaces ... */ - Assert(myTempNamespace == InvalidOid); - Assert(myTempToastNamespace == InvalidOid); - Assert(myTempNamespaceSubID == InvalidSubTransactionId); - /* Assign same namespace OIDs that leader has */ myTempNamespace = tempNamespaceId; myTempToastNamespace = tempToastNamespaceId; diff --git a/src/backend/cdb/Makefile b/src/backend/cdb/Makefile index dd33ce4e812..b7a7c5f1d82 100644 --- a/src/backend/cdb/Makefile +++ b/src/backend/cdb/Makefile @@ -37,7 +37,7 @@ OBJS = cdbappendonlystorageformat.o \ cdbsetop.o cdbsreh.o cdbsrlz.o cdbsubplan.o cdbsubselect.o \ cdbtargeteddispatch.o cdbthreadlog.o \ cdbtimer.o \ - cdbtm.o cdbtmutils.o \ + cdbtm.o cdbtranscat.o cdbtmutils.o \ cdbutil.o \ cdbvars.o cdbvarblock.o \ cdbdtxrecovery.o diff --git a/src/backend/cdb/cdbsreh.c b/src/backend/cdb/cdbsreh.c index efcb64b1d5e..37462772b28 100644 --- a/src/backend/cdb/cdbsreh.c +++ b/src/backend/cdb/cdbsreh.c @@ -197,7 +197,7 @@ HandleSingleRowError(CdbSreh *cdbsreh) /* * Returns the fixed schema for error log tuple. */ -static TupleDesc +TupleDesc GetErrorTupleDesc(void) { static TupleDesc tupdesc = NULL; @@ -229,6 +229,25 @@ GetErrorTupleDesc(void) return tupdesc; } +TupleDesc +GetTempErrorTupleDesc(void) +{ + + TupleDesc tmp; + + tmp = CreateTemplateTupleDesc(NUM_ERRORTABLE_ATTR); + TupleDescInitEntry(tmp, 1, "cmdtime", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tmp, 2, "relname", TEXTOID, -1, 0); + TupleDescInitEntry(tmp, 3, "filename", TEXTOID, -1, 0); + TupleDescInitEntry(tmp, 4, "linenum", INT4OID, -1, 0); + TupleDescInitEntry(tmp, 5, "bytenum", INT4OID, -1, 0); + TupleDescInitEntry(tmp, 6, "errmsg", TEXTOID, -1, 0); + TupleDescInitEntry(tmp, 7, "rawdata", TEXTOID, -1, 0); + TupleDescInitEntry(tmp, 8, "rawbytes", BYTEAOID, -1, 0); + + return tmp; +} + static HeapTuple FormErrorTuple(CdbSreh *cdbsreh) { diff --git a/src/backend/cdb/cdbtranscat.c b/src/backend/cdb/cdbtranscat.c new file mode 100644 index 00000000000..79eff815b6d --- /dev/null +++ b/src/backend/cdb/cdbtranscat.c @@ -0,0 +1,227 @@ + +#include "postgres.h" + +#include "fmgr.h" +#include "access/xact.h" +#include "access/nbtree.h" +#include "access/relation.h" +#include "access/htup_details.h" +#include "access/relscan.h" +#include "access/skey.h" +#include "access/valid.h" +#include "catalog/namespace.h" +#include "catalog/partition.h" +#include "catalog/pg_attrdef.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_conversion.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_language.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_type.h" +#include "catalog/pg_user_mapping.h" +#include "cdb/cdbsreh.h" +#include "cdb/cdbtranscat.h" +#include "cdb/cdbvars.h" +#include "cdb/cdbsrlz.h" +#include "commands/dbcommands.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/functions.h" +#include "executor/nodeAgg.h" +#include "executor/tuptable.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "mb/pg_wchar.h" +#include "nodes/makefuncs.h" +#include "nodes/pg_list.h" +#include "optimizer/optimizer.h" +#include "partitioning/partdesc.h" +#include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/rangetypes.h" +#include "utils/syscache.h" +#include "utils/typcache.h" +#include "utils/varlena.h" + +bool inPlPgsql = false; + +TransferReset_hook_type TransferReset_hook = NULL; +IsTransferOn_hook_type IsTransferOn_hook = NULL; +SetTransferOff_hook_type SetTransferOff_hook = NULL; +SetTransferOn_hook_type SetTransferOn_hook = NULL; +RelationStored_hook_type RelationStored_hook = NULL; +RelationStoredCheck_hook_type RelationStoredCheck_hook = NULL; +TransStoreTuple_hook_type TransStoreTuple_hook = NULL; +TransRemoveTuple_hook_type TransRemoveTuple_hook = NULL; + +GetTransferNode_hook_type GetTransferNode_hook = NULL; +SystemTupleStoreReset_hook_type SystemTupleStoreReset_hook = NULL; +SystemTupleStoreInit_hook_type SystemTupleStoreInit_hook = NULL; +getSystemTupleList_hook_type getSystemTupleList_hook = NULL; +PlFuncStored_hook_type PlFuncStored_hook = NULL; + +void TransferReset(void) +{ + if (TransferReset_hook) + (*TransferReset_hook) (); +} + +bool IsTransferOn(void) +{ + if (IsTransferOn_hook) + return (*IsTransferOn_hook) (); + else + return false; +} + +void SetTransferOff(void) +{ + if (SetTransferOff_hook) + (*SetTransferOff_hook) (); +} +void SetTransferOn(void) +{ + if (SetTransferOn_hook) + (*SetTransferOn_hook) (); +} +bool RelationStored(Oid relid) +{ + if (RelationStored_hook) + return (*RelationStored_hook) (relid); + else + return true; +} +bool RelationStoredCheck(Oid relid) +{ + if (RelationStoredCheck_hook) + return (*RelationStoredCheck_hook) (relid); + else + return true; +} +void TransStoreTuple(HeapTuple htup) +{ + if (TransStoreTuple_hook) + (*TransStoreTuple_hook) (htup); +} +void TransRemoveTuple(Oid tableOid, ItemPointerData tid) +{ + if (TransRemoveTuple_hook) + (*TransRemoveTuple_hook) (tableOid, tid); +} +SystemTableTransferNode *GetTransferNode(void) +{ + if (GetTransferNode_hook) + return (*GetTransferNode_hook) (); + else + return NULL; +} +void SystemTupleStoreReset(void) +{ + if (SystemTupleStoreReset_hook) + (*SystemTupleStoreReset_hook) (); +} +void SystemTupleStoreInit(const char *catalogBuffer, int catalogSize) +{ + if (SystemTupleStoreInit_hook) + (*SystemTupleStoreInit_hook) (catalogBuffer, catalogSize); +} +List *getSystemTupleList(Oid relid) +{ + if (getSystemTupleList_hook) + return (*getSystemTupleList_hook) (relid); + else + return NIL; +} +bool PlFuncStored(Oid funcid) +{ + if (PlFuncStored_hook) + return (*PlFuncStored_hook) (funcid); + else + return true; +} + + +systup_store_beginscan_hook_type systup_store_beginscan_hook = NULL; +systup_store_endscan_hook_type systup_store_endscan_hook = NULL; +systup_store_getnextslot_hook_type systup_store_getnextslot_hook = NULL; +systup_store_active_hook_type systup_store_active_hook = NULL; +systup_store_sorted_active_hook_type systup_store_sorted_active_hook = NULL; + +TableScanDesc systup_store_beginscan(Relation relation, int nkeys, ScanKey key, + uint32 flags) +{ + if (systup_store_beginscan_hook) + return (*systup_store_beginscan_hook) (relation, nkeys, key, flags); + else + return NULL; +} +void systup_store_endscan(TableScanDesc sscan) +{ + if (systup_store_endscan_hook) + (*systup_store_endscan_hook) (sscan); +} +bool systup_store_getnextslot(TableScanDesc sscan, TupleTableSlot *slot) +{ + if (systup_store_getnextslot_hook) + return (*systup_store_getnextslot_hook) (sscan, slot); + else + return false; +} +bool systup_store_active(void) +{ + if (systup_store_active_hook) + return (*systup_store_active_hook) (); + else + return false; +} +bool systup_store_sorted_active(void) +{ + if (systup_store_sorted_active_hook) + return (*systup_store_sorted_active_hook) (); + else + return false; +} + +DefaultValueStore_hook_type DefaultValueStore_hook = NULL; +InTypeStore_hook_type InTypeStore_hook = NULL; +TypeStore_hook_type TypeStore_hook = NULL; +InitQuery_hook_type InitQuery_hook = NULL; + +void DefaultValueStore(char *bin) +{ + if (DefaultValueStore_hook) + (*DefaultValueStore_hook) (bin); +} +bool InTypeStore(void) +{ + if (InTypeStore_hook) + return (*InTypeStore_hook) (); + else + return true; +} +void TypeStore(Oid typeOid, int flags) +{ + if (TypeStore_hook) + (*TypeStore_hook) (typeOid, flags); +} +void InitQuery(const char *query_string) +{ + if (InitQuery_hook) + (*InitQuery_hook) (query_string); +} \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 210c88ccf83..72d201438f0 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -49,6 +49,7 @@ #include "cdb/cdbdisp_dtx.h" /* for qdSerializeDtxContextInfo() */ #include "cdb/cdbdispatchresult.h" #include "cdb/cdbcopy.h" +#include "cdb/cdbtranscat.h" #include "executor/execUtils.h" #include "cdb/cdbpq.h" @@ -94,6 +95,8 @@ typedef struct DispatchCommandQueryParms int serializedPlantreelen; char *serializedQueryDispatchDesc; int serializedQueryDispatchDesclen; + char *serializedCatalog; + int serializedCatalogLen; /* * Additional information. @@ -331,6 +334,9 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) ErrorData *qeError = NULL; int flags = DF_NONE; + SetTransferOn(); + InitQuery(strCommand); + if (CdbNeedDispatchCommand_hook && !CdbNeedDispatchCommand_hook(strCommand, &flags, NULL, NULL)) return; @@ -410,6 +416,9 @@ CdbDispatchCommand(const char *strCommand, int flags, CdbPgResults *cdb_pgresults) { + SetTransferOn(); + InitQuery(strCommand); + return CdbDispatchCommandToSegments(strCommand, flags, cdbcomponent_getCdbComponentsList(), @@ -579,7 +588,10 @@ cdbdisp_buildCommandQueryParms(const char *strCommand, int flags) pQueryParms->strCommand = strCommand; pQueryParms->serializedQueryDispatchDesc = NULL; pQueryParms->serializedQueryDispatchDesclen = 0; - + if (IsTransferOn()) + pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), + &pQueryParms->serializedCatalogLen, + NULL); /* * Serialize a version of our DTX Context Info */ @@ -653,6 +665,11 @@ cdbdisp_buildUtilityQueryParms(struct Node *stmt, pQueryParms->serializedQueryDispatchDesc = serializedQueryDispatchDesc; pQueryParms->serializedQueryDispatchDesclen = serializedQueryDispatchDesc_len; + if (IsTransferOn()) + pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), + &pQueryParms->serializedCatalogLen, + NULL); + /* * Serialize a version of our DTX Context Info */ @@ -712,6 +729,11 @@ cdbdisp_buildPlanQueryParms(struct QueryDesc *queryDesc, pQueryParms->serializedQueryDispatchDesc = sddesc; pQueryParms->serializedQueryDispatchDesclen = sddesc_len; + if (IsTransferOn()) + pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), + &pQueryParms->serializedCatalogLen, + NULL); + /* * Serialize a version of our snapshot, and generate our transction * isolations. We generally want Plan based dispatch to be in a global @@ -905,6 +927,8 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, int plantree_len = pQueryParms->serializedPlantreelen; const char *sddesc = pQueryParms->serializedQueryDispatchDesc; int sddesc_len = pQueryParms->serializedQueryDispatchDesclen; + const char *sdcatalog = pQueryParms->serializedCatalog; + int sdcatalog_len = pQueryParms->serializedCatalogLen; const char *dtxContextInfo = pQueryParms->serializedDtxContextInfo; int dtxContextInfo_len = pQueryParms->serializedDtxContextInfolen; int64 currentStatementStartTimestamp = GetCurrentStatementStartTimestamp(); @@ -961,11 +985,13 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, sizeof(command_len) + sizeof(plantree_len) + sizeof(sddesc_len) + + sizeof(sdcatalog_len) + sizeof(dtxContextInfo_len) + dtxContextInfo_len + command_len + plantree_len + sddesc_len + + sdcatalog_len + sizeof(numsegments) + sizeof(resgroupInfo.len) + resgroupInfo.len + @@ -1029,6 +1055,10 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, memcpy(pos, &tmp, sizeof(tmp)); pos += sizeof(tmp); + tmp = htonl(sdcatalog_len); + memcpy(pos, &tmp, sizeof(tmp)); + pos += sizeof(tmp); + tmp = htonl(dtxContextInfo_len); memcpy(pos, &tmp, sizeof(tmp)); pos += sizeof(tmp); @@ -1056,6 +1086,12 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, pos += sddesc_len; } + if (sdcatalog_len > 0) + { + memcpy(pos, sdcatalog, sdcatalog_len); + pos += sdcatalog_len; + } + tmp = htonl(numsegments); memcpy(pos, &tmp, sizeof(numsegments)); pos += sizeof(numsegments); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 6662fcb3363..2c5f51a6f03 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1078,7 +1078,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, { BlockNumber relallvisible; - if (RelationStorageIsAO(onerel)) + if (RelationIsNonblockRelation(onerel)) relallvisible = 0; else relallvisible = AcquireNumberOfAllVisibleBlocks(onerel); diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 95b5b18983f..62701556c91 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -1661,6 +1661,9 @@ CopyToDispatch(CopyToState cstate) cdbCopy = makeCdbCopyTo(cstate); + if (cstate->need_transcoding) + StoreEncodingConversion(cstate->file_encoding); + /* XXX: lock all partitions */ /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 9d9f9e0c47e..bccbdacd1bb 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -134,9 +134,11 @@ #include "nodes/altertablenodes.h" #include "cdb/cdbdisp.h" #include "cdb/cdbdisp_query.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "cdb/cdbrelsize.h" #include "cdb/cdboidsync.h" +#include "cdb/cdbtranscat.h" #include "postmaster/autostats.h" const char *synthetic_sql = "(internally generated SQL command)"; @@ -7385,8 +7387,6 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) int ti_options; ExprState *partqualstate = NULL; - if (ATRewriteTable_hook) - ATRewriteTable_hook(tab, OIDNewHeap, lockmode); /* * Open the relation(s). We have surely already locked the existing @@ -7751,6 +7751,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) table_close(newrel, NoLock); } + + + if (ATRewriteTable_hook) + ATRewriteTable_hook(tab, OIDNewHeap, lockmode); } /* diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index ac3b71a5669..2bd34c58966 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -54,8 +54,10 @@ #include "access/detoast.h" #include "access/heaptoast.h" #include "catalog/pg_collation.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "utils/pg_locale.h" +#include "utils/syscache.h" #include "port/pg_bitutils.h" @@ -990,6 +992,15 @@ ExecInitExprRec(Expr *node, ExprState *state, scratch.d.constval.isnull = con->constisnull; ExprEvalPushStep(state, &scratch); + if (IsTransferOn()) + { + HeapTuple typeTup; + + typeTup = SearchSysCache1(TYPEOID, con->consttype); + if (typeTup) + ReleaseSysCache(typeTup); + } + break; } @@ -2519,7 +2530,7 @@ ExecInitExprRec(Expr *node, ExprState *state, * into that array may be used while the expression is still being built. */ void -ExprEvalPushStep(ExprState *es, const ExprEvalStep *s) +ExprEvalPushStep_internal(ExprState *es, const ExprEvalStep *s) { if (es->steps_alloc == 0) { @@ -2536,6 +2547,18 @@ ExprEvalPushStep(ExprState *es, const ExprEvalStep *s) memcpy(&es->steps[es->steps_len++], s, sizeof(ExprEvalStep)); } +void +ExprEvalPushStep(ExprState *es, const ExprEvalStep *s) +{ + if (ExprEvalPushStep_hook) + { + (*ExprEvalPushStep_hook) (es, s); + return; + } + + ExprEvalPushStep_internal(es, s); +} + /* * Perform setup necessary for the evaluation of a function-like expression, * appending argument evaluation steps to the steps list in *state, and @@ -2618,6 +2641,15 @@ ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, Oid funcid, fcinfo->args[argno].value = con->constvalue; fcinfo->args[argno].isnull = con->constisnull; + + if (IsTransferOn()) + { + HeapTuple typeTup; + + typeTup = SearchSysCache1(TYPEOID, con->consttype); + if (typeTup) + ReleaseSysCache(typeTup); + } } else { diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index afc255ededd..972d07d4cb1 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -103,6 +103,7 @@ #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbexplain.h" /* cdbexplain_sendExecStats() */ +#include "cdb/cdbtranscat.h" #include "cdb/cdbplan.h" #include "cdb/cdbsubplan.h" #include "cdb/cdbvars.h" @@ -115,6 +116,7 @@ #include "cdb/cdbtargeteddispatch.h" #include "cdb/cdbutil.h" #include "cdb/cdbendpoint.h" +#include "cdb/cdbtranscat.h" #define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ queryDesc->ddesc->parallelCursorName && \ @@ -168,7 +170,6 @@ static int executor_run_nesting_level = 0; SetDtxFlag_hook_type SetDtxFlag_hook = NULL; /* decls for local routines only used within this module */ -static void InitPlan(QueryDesc *queryDesc, int eflags); static void CheckValidRowMarkRel(Relation rel, RowMarkType markType); static void ExecPostprocessPlan(EState *estate); static void ExecEndPlan(PlanState *planstate, EState *estate); @@ -238,6 +239,8 @@ ExecutorStart(QueryDesc *queryDesc, int eflags) */ pgstat_report_query_id(queryDesc->plannedstmt->queryId, false); + SetTransferOn(); + if (ExecutorStart_hook) (*ExecutorStart_hook) (queryDesc, eflags); else @@ -587,8 +590,15 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) * Initialize the plan state tree */ Assert(CurrentMemoryContext == estate->es_query_cxt); + + if (!shouldDispatch) + SetTransferOff(); + InitPlan(queryDesc, eflags); + if (!shouldDispatch) + SetTransferOn(); + Assert(queryDesc->planstate); #ifdef USE_ASSERT_CHECKING @@ -1762,7 +1772,7 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt) * and start up the rule manager * ---------------------------------------------------------------- */ -static void +void InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; @@ -2454,6 +2464,9 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ChildToRootMap = NULL; resultRelInfo->ri_ChildToRootMapValid = false; resultRelInfo->ri_CopyMultiInsertBuffer = NULL; + + if (CollectResultInfo_hook) + (*CollectResultInfo_hook) (resultRelInfo); } /* diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 679d861e485..e11df96270d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -36,6 +36,8 @@ #include "cdb/cdbaocsam.h" #include "cdb/cdbappendonlyam.h" +#include "cdb/cdbtranscat.h" +#include "cdb/cdbtranscat.h" /* * Helper macro that is used to determine if a Modifytable node came from a @@ -1845,6 +1847,16 @@ ExecCreatePartitionPruneState(PlanState *planstate, * duration of this executor run. */ partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); + + if (IsTransferOn()) + { + if (partrel->rd_partkeycxt) + { + MemoryContextDelete(partrel->rd_partkeycxt); + partrel->rd_partkey = NULL; + partrel->rd_partkeycxt = NULL; + } + } partkey = RelationGetPartitionKey(partrel); partdesc = PartitionDirectoryLookup(estate->es_partition_directory, partrel); @@ -1973,7 +1985,6 @@ ExecCreatePartitionPruneState(PlanState *planstate, */ prunestate->execparamids = bms_add_members(prunestate->execparamids, pinfo->execparamids); - j++; } i++; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 031c82a5a39..c042c09c46a 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -151,7 +151,9 @@ ExecInitNode_hook_type ExecInitNode_hook = NULL; /* Hook for plugins to get control in ExecEndNode() */ ExecEndNode_hook_type ExecEndNode_hook = NULL; - +ExprEvalPushStep_hook_type ExprEvalPushStep_hook = NULL; +CollectResultInfo_hook_type CollectResultInfo_hook = NULL; +CollectProc_hook_type CollectProc_hook = NULL; /** * Forward declarations of static functions */ diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c index 4f6723db998..255fe3ffdc3 100644 --- a/src/backend/executor/execSRF.c +++ b/src/backend/executor/execSRF.c @@ -20,6 +20,7 @@ #include "access/htup_details.h" #include "catalog/objectaccess.h" +#include "cdb/cdbtranscat.h" #include "executor/execdebug.h" #include "funcapi.h" #include "miscadmin.h" @@ -804,6 +805,9 @@ init_sexpr(Oid foid, Oid input_collation, Expr *node, sexpr->funcResultStore = NULL; sexpr->funcResultSlot = NULL; sexpr->shutdown_reg = false; + + if (CollectProc_hook) + (*CollectProc_hook) (sexpr->fcinfo); } /* diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 5dc3bca666a..c1ace282dd3 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -152,10 +152,10 @@ static Node *sql_fn_resolve_param_name(SQLFunctionParseInfoPtr pinfo, static List *init_execution_state(List *queryTree_list, SQLFunctionCachePtr fcache, bool lazyEvalOK); -static void init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK); -static void postquel_start(execution_state *es, SQLFunctionCachePtr fcache); +static void postquel_start(execution_state *es, SQLFunctionCachePtr fcache, + int execflags); static bool postquel_getnext(execution_state *es, SQLFunctionCachePtr fcache); -static void postquel_end(execution_state *es); +static void postquel_end(execution_state *es, int execflags); static void postquel_sub_params(SQLFunctionCachePtr fcache, FunctionCallInfo fcinfo); static Datum postquel_get_single_result(TupleTableSlot *slot, @@ -676,7 +676,7 @@ init_execution_state(List *queryTree_list, /* * Initialize the SQLFunctionCache for a SQL function */ -static void +void init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK) { FmgrInfo *finfo = fcinfo->flinfo; @@ -932,7 +932,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK) /* Start up execution of one execution_state node */ static void -postquel_start(execution_state *es, SQLFunctionCachePtr fcache) +postquel_start(execution_state *es, SQLFunctionCachePtr fcache, int execflags) { DestReceiver *dest; @@ -995,6 +995,10 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache) eflags = EXEC_FLAG_SKIP_TRIGGERS; else eflags = 0; /* default run-to-completion flags */ + + if (execflags) + eflags |= execflags; + ExecutorStart(es->qd, eflags); } @@ -1039,7 +1043,7 @@ postquel_getnext(execution_state *es, SQLFunctionCachePtr fcache) /* Shut down execution of one execution_state node */ static void -postquel_end(execution_state *es) +postquel_end(execution_state *es, int execflags) { /* mark status done to ensure we don't do ExecutorEnd twice */ es->status = F_EXEC_DONE; @@ -1053,7 +1057,8 @@ postquel_end(execution_state *es) if (Gp_role == GP_ROLE_DISPATCH) autostats_get_cmdtype(es->qd, &cmdType, &relationOid); - ExecutorFinish(es->qd); + if (!(execflags & EXEC_FLAG_EXPLAIN_ONLY)) + ExecutorFinish(es->qd); ExecutorEnd(es->qd); /* MPP-14001: Running auto_stats */ @@ -1314,7 +1319,7 @@ PG_TRY(); UpdateActiveSnapshotCommandId(); } - postquel_start(es, fcache); + postquel_start(es, fcache, 0); } else if (!fcache->readonly_func && !pushed_snapshot) { @@ -1334,7 +1339,7 @@ PG_TRY(); * don't care about fetching any more result rows. */ if (completed || !fcache->returnsSet) - postquel_end(es); + postquel_end(es, 0); /* * Break from loop if we didn't shut down (implying we got a @@ -1537,6 +1542,50 @@ PG_END_TRY(); return result; } +void +fmgr_sql_init(PG_FUNCTION_ARGS) +{ + SQLFunctionCachePtr fcache; + execution_state *es; + List *eslist; + ListCell *eslc; + PG_TRY(); + { + + init_sql_fcache(fcinfo, PG_GET_COLLATION(), true); + + fcache = (SQLFunctionCachePtr) fcinfo->flinfo->fn_extra; + + eslist = fcache->func_state; + es = NULL; + + foreach(eslc, eslist) + { + es = (execution_state *) lfirst(eslc); + + while (es && es->status == F_EXEC_DONE) + { + es = es->next; + } + + if (es) + break; + } + + while (es) + { + postquel_start(es, fcache, EXEC_FLAG_EXPLAIN_ONLY); + postquel_end(es, EXEC_FLAG_EXPLAIN_ONLY); + + es = es->next; + } + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); +} /* * error context callback to let us supply a call-stack traceback @@ -1642,7 +1691,7 @@ ShutdownSQLFunction(Datum arg) if (!fcache->readonly_func) PushActiveSnapshot(es->qd->snapshot); - postquel_end(es); + postquel_end(es, 0); if (!fcache->readonly_func) PopActiveSnapshot(); diff --git a/src/backend/executor/nodeIncrementalSort.c b/src/backend/executor/nodeIncrementalSort.c index b3d653c3568..a1fbde63891 100644 --- a/src/backend/executor/nodeIncrementalSort.c +++ b/src/backend/executor/nodeIncrementalSort.c @@ -79,6 +79,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "cdb/cdbtranscat.h" #include "executor/execdebug.h" #include "executor/nodeIncrementalSort.h" #include "miscadmin.h" @@ -1065,6 +1066,9 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags) SO_printf("ExecInitIncrementalSort: sort node initialized\n"); + if (IsTransferOn()) + preparePresortedCols(incrsortstate); + return incrsortstate; } diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c index 810522e27a1..b99f63f5eb4 100644 --- a/src/backend/executor/nodeValuesscan.c +++ b/src/backend/executor/nodeValuesscan.c @@ -25,6 +25,7 @@ */ #include "postgres.h" +#include "cdb/cdbtranscat.h" #include "executor/executor.h" #include "executor/nodeValuesscan.h" #include "jit/jit.h" @@ -295,8 +296,9 @@ ExecInitValuesScan(ValuesScan *node, EState *estate, int eflags) * We can avoid the cost of a contain_subplans() scan in the simple * case where there are no SubPlans anywhere. */ - if (estate->es_subplanstates && - contain_subplans((Node *) exprs)) + if ((estate->es_subplanstates && + contain_subplans((Node *) exprs)) || + IsTransferOn()) { int saved_jit_flags; diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c index 2d60eff9459..67364ae5d7c 100644 --- a/src/backend/foreign/foreign.c +++ b/src/backend/foreign/foreign.c @@ -21,6 +21,7 @@ #include "catalog/pg_foreign_table_seg.h" #include "catalog/pg_user_mapping.h" #include "cdb/cdbgang.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" #include "commands/defrem.h" @@ -712,6 +713,10 @@ GetFdwRoutineForRelation(Relation relation, bool makecopy) /* Give back the locally palloc'd copy regardless of makecopy */ return fdwroutine; } + else if (IsTransferOn()) + { + GetFdwRoutineByRelId(RelationGetRelid(relation)); + } /* We have valid cached data --- does the caller want a copy? */ if (makecopy) diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index 054fc06238a..2ad119c3428 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -41,6 +41,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "cdb/cdbgang.h" +#include "cdb/cdbtranscat.h" #include "utils/workfile_mgr.h" #include "parser/parsetree.h" @@ -857,6 +858,28 @@ _outGpSplitPartitionCmd(StringInfo str, const GpSplitPartitionCmd *node) WRITE_NODE_FIELD(arg2); } + +static void +_outSystemTableTransferNode(StringInfo str, const SystemTableTransferNode *node) +{ + WRITE_NODE_TYPE("SYSTEMTABLETRANSFERNODE"); + WRITE_OID_FIELD(my_temp_namespace); + WRITE_OID_FIELD(my_temp_toast_namespace); + WRITE_NODE_FIELD(transfer_tuples); +} + +static void +_outTranderTuple(StringInfo str, const TransferTuple *node) +{ + WRITE_NODE_TYPE("TRANSFERTUPLE"); + + WRITE_UINT_FIELD(t_len); + appendBinaryStringInfo(str, (char *) &node->t_self, sizeof(ItemPointerData)); + WRITE_OID_FIELD(t_tableOid); + appendBinaryStringInfo(str, node->t_data, node->t_len); +} + + /* * _outNode - * converts a Node into binary string and append it to 'str' @@ -1933,6 +1956,12 @@ _outNode(StringInfo str, void *obj) case T_AlterDatabaseStmt: _outAlterDatabaseStmt(str, obj); break; + case T_SystemTableTransferNode: + _outSystemTableTransferNode(str, obj); + break; + case T_TransferTuple: + _outTranderTuple(str, obj); + break; default: elog(ERROR, "could not serialize unrecognized node type: %d", (int) nodeTag(obj)); diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index 887f9eae433..af8e9618c4a 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -40,6 +40,7 @@ #include "catalog/pg_class.h" #include "catalog/heap.h" #include "cdb/cdbgang.h" +#include "cdb/cdbtranscat.h" /* * Macros to simplify reading of different kinds of fields. Use these @@ -1833,6 +1834,35 @@ _readEphemeralNamedRelationInfo(void) READ_DONE(); } + +static SystemTableTransferNode * +_readSystemTableTransferNode(void) +{ + READ_LOCALS(SystemTableTransferNode); + + READ_OID_FIELD(my_temp_namespace); + READ_OID_FIELD(my_temp_toast_namespace); + READ_NODE_FIELD(transfer_tuples); + + READ_DONE(); +} + +static TransferTuple * +_readTransferTuple(void) +{ + READ_LOCALS(TransferTuple); + + READ_UINT_FIELD(t_len); + memcpy(&local_node->t_self, read_str_ptr, sizeof(ItemPointerData)); + read_str_ptr += sizeof(ItemPointerData); + READ_OID_FIELD(t_tableOid); + local_node->t_data = palloc(local_node->t_len); + memcpy(local_node->t_data, read_str_ptr, local_node->t_len); + read_str_ptr += local_node->t_len; + + READ_DONE(); +} + static void * _readAlterDatabaseStmt(void) { @@ -2940,6 +2970,12 @@ readNodeBinary(void) case T_DropTaskStmt: return_value = _readDropTaskStmt(); break; + case T_SystemTableTransferNode: + return_value= _readSystemTableTransferNode(); + break; + case T_TransferTuple: + return_value = _readTransferTuple(); + break; default: return_value = NULL; /* keep the compiler silent */ elog(ERROR, "could not deserialize unrecognized node type: %d", diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 9859a0c33c8..c95c3a672c2 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -52,6 +52,7 @@ #include "utils/builtins.h" #include "cdb/cdbgang.h" +#include "cdb/cdbtranscat.h" #include "nodes/altertablenodes.h" /* @@ -618,6 +619,17 @@ _readConst(void) else local_node->constvalue = readDatum(local_node->constbyval); + if (local_node->consttype == REGCLASSOID && IsTransferOn()) + { + if (!RelationStoredCheck(local_node->constvalue)) + { + Relation rel; + + rel = relation_open(local_node->constvalue, AccessShareLock); + relation_close(rel, AccessShareLock); + } + } + READ_DONE(); } #endif /* COMPILING_BINARY_FUNCS */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 463b24ba903..9c71febac0e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -80,6 +80,7 @@ #include "cdb/cdbgroupingpaths.h" /* create_grouping_paths() extensions */ #include "cdb/cdbsetop.h" /* motion utilities */ #include "cdb/cdbtargeteddispatch.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" #include "optimizer/aqumv.h" /* answer_query_using_materialized_views */ @@ -376,7 +377,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, GP_ROLE_DISPATCH == Gp_role && IS_QUERY_DISPATCHER() && (cursorOptions & CURSOR_OPT_SKIP_FOREIGN_PARTITIONS) == 0 && - (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0) + (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0 && + !inPlPgsql) { #ifdef USE_ORCA diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 5417db71431..307017cf83e 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -60,6 +60,7 @@ #include "cdb/cdbappendonlyam.h" #include "cdb/cdbrelsize.h" #include "cdb/cdbutil.h" +#include "cdb/cdbvars.h" #include "catalog/pg_appendonly.h" #include "catalog/pg_foreign_server.h" #include "catalog/pg_inherits.h" @@ -187,7 +188,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, else hasindex = relation->rd_rel->relhasindex; - if (hasindex) + if (hasindex && Gp_role != GP_ROLE_EXECUTE) { List *indexoidlist; LOCKMODE lmode; diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c index a5d905dd240..0714ea48ba3 100644 --- a/src/backend/optimizer/util/predtest.c +++ b/src/backend/optimizer/util/predtest.c @@ -31,6 +31,7 @@ #include "nodes/makefuncs.h" #include "catalog/pg_operator.h" +#include "cdb/cdbtranscat.h" #include "optimizer/clauses.h" #include "optimizer/paths.h" #include "optimizer/predtest_valueset.h" @@ -2004,7 +2005,7 @@ lookup_proof_cache(Oid pred_op, Oid clause_op, bool refute_it) cache_entry = (OprProofCacheEntry *) hash_search(OprProofCacheHash, (void *) &key, HASH_ENTER, &cfound); - if (!cfound) + if (!cfound || IsTransferOn()) { /* new cache entry, set it invalid */ cache_entry->have_implic = false; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index b6543c95afe..1242a9f74a9 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -100,6 +100,7 @@ #include "cdb/cdbdispatchresult.h" #include "cdb/cdbendpoint.h" #include "cdb/cdbgang.h" +#include "cdb/cdbtranscat.h" #include "cdb/ml_ipc.h" #include "access/twophase.h" #include "postmaster/backoff.h" @@ -5677,6 +5678,8 @@ PostgresMain(int argc, char *argv[], check_forbidden_in_gpdb_handlers(firstchar); + TransferReset(); + switch (firstchar) { case 'Q': /* simple query */ @@ -5730,12 +5733,14 @@ PostgresMain(int argc, char *argv[], const char *serializedDtxContextInfo = NULL; const char *serializedPlantree = NULL; const char *serializedQueryDispatchDesc = NULL; + const char *serializedCatalog = NULL; const char *resgroupInfoBuf = NULL; int query_string_len = 0; int serializedDtxContextInfolen = 0; int serializedPlantreelen = 0; int serializedQueryDispatchDesclen = 0; + int serializedCatalogLen = 0; int resgroupInfoLen = 0; TimestampTz statementStart; Oid suid; @@ -5771,6 +5776,7 @@ PostgresMain(int argc, char *argv[], query_string_len = pq_getmsgint(&input_message, 4); serializedPlantreelen = pq_getmsgint(&input_message, 4); serializedQueryDispatchDesclen = pq_getmsgint(&input_message, 4); + serializedCatalogLen = pq_getmsgint(&input_message, 4); serializedDtxContextInfolen = pq_getmsgint(&input_message, 4); /* read in the DTX context info */ @@ -5811,6 +5817,9 @@ PostgresMain(int argc, char *argv[], if (serializedQueryDispatchDesclen > 0) serializedQueryDispatchDesc = pq_getmsgbytes(&input_message,serializedQueryDispatchDesclen); + if (serializedCatalogLen > 0) + serializedCatalog = pq_getmsgbytes(&input_message, serializedCatalogLen); + /* * Always use the same GpIdentity.numsegments with QD on QEs */ @@ -5855,6 +5864,9 @@ PostgresMain(int argc, char *argv[], if (cuid > 0) SetUserIdAndContext(cuid, false); /* Set current userid */ + SystemTupleStoreReset(); + SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); + if (serializedPlantreelen==0) { if (strncmp(query_string, "BEGIN", 5) == 0) diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 9b251f268b2..d00409ec311 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -84,6 +84,7 @@ #include "catalog/pg_profile.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbendpoint.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" @@ -585,6 +586,8 @@ ProcessUtility(PlannedStmt *pstmt, Assert(queryString != NULL); /* required as of 8.4 */ Assert(qc == NULL || qc->commandTag == CMDTAG_UNKNOWN); + SetTransferOn(); + /* * Greenplum specific code: * Please refer to the comments at the definition of process_utility_nesting_level. diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index 0d892132a84..9d23f916e85 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -403,7 +403,7 @@ enum_endpoint(Oid enumtypoid, ScanDirection direction) ObjectIdGetDatum(enumtypoid)); enum_rel = table_open(EnumRelationId, AccessShareLock); - enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); + enum_idx = order_index_open(EnumTypIdSortOrderIndexId, AccessShareLock); enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); @@ -562,7 +562,7 @@ enum_range_internal(Oid enumtypoid, Oid lower, Oid upper) ObjectIdGetDatum(enumtypoid)); enum_rel = table_open(EnumRelationId, AccessShareLock); - enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); + enum_idx = order_index_open(EnumTypIdSortOrderIndexId, AccessShareLock); enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); max = 64; diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index bf8027d26d5..eb44996c704 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -24,6 +24,8 @@ #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" +#include "cdb/cdbtranscat.h" +#include "cdb/cdbvars.h" #include "common/hashfn.h" #include "miscadmin.h" #ifdef CATCACHE_STATS @@ -64,6 +66,8 @@ /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; +static HeapTuple SearchCatCacheInternalCollect(CatCache *cache, int nkeys, + Datum v1, Datum v2, Datum v3, Datum v4); static inline HeapTuple SearchCatCacheInternal(CatCache *cache, int nkeys, Datum v1, Datum v2, @@ -1205,7 +1209,7 @@ SearchCatCache(CatCache *cache, Datum v3, Datum v4) { - return SearchCatCacheInternal(cache, cache->cc_nkeys, v1, v2, v3, v4); + return SearchCatCacheInternalCollect(cache, cache->cc_nkeys, v1, v2, v3, v4); } @@ -1219,7 +1223,7 @@ HeapTuple SearchCatCache1(CatCache *cache, Datum v1) { - return SearchCatCacheInternal(cache, 1, v1, 0, 0, 0); + return SearchCatCacheInternalCollect(cache, 1, v1, 0, 0, 0); } @@ -1227,7 +1231,7 @@ HeapTuple SearchCatCache2(CatCache *cache, Datum v1, Datum v2) { - return SearchCatCacheInternal(cache, 2, v1, v2, 0, 0); + return SearchCatCacheInternalCollect(cache, 2, v1, v2, 0, 0); } @@ -1235,7 +1239,7 @@ HeapTuple SearchCatCache3(CatCache *cache, Datum v1, Datum v2, Datum v3) { - return SearchCatCacheInternal(cache, 3, v1, v2, v3, 0); + return SearchCatCacheInternalCollect(cache, 3, v1, v2, v3, 0); } @@ -1243,7 +1247,24 @@ HeapTuple SearchCatCache4(CatCache *cache, Datum v1, Datum v2, Datum v3, Datum v4) { - return SearchCatCacheInternal(cache, 4, v1, v2, v3, v4); + return SearchCatCacheInternalCollect(cache, 4, v1, v2, v3, v4); +} + +static HeapTuple +SearchCatCacheInternalCollect(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3, + Datum v4) +{ + HeapTuple htup; + + htup = SearchCatCacheInternal(cache, nkeys, v1, v2, v3, v4); + + TransStoreTuple(htup); + + return htup; } /* @@ -2143,7 +2164,7 @@ PrintCatCacheLeakWarning(HeapTuple tuple, const char *resOwnerName) /* Safety check to ensure we were handed a cache entry */ Assert(ct->ct_magic == CT_MAGIC); - elog(WARNING, "cache reference leak: cache %s (%d), tuple %u/%u has count %d, resowner '%s'", + elog(LOG, "cache reference leak: cache %s (%d), tuple %u/%u has count %d, resowner '%s'", ct->my_cache->cc_relname, ct->my_cache->id, ItemPointerGetBlockNumber(&(tuple->t_self)), ItemPointerGetOffsetNumber(&(tuple->t_self)), diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index 460b720a651..a9262d27018 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -128,7 +128,7 @@ BuildEventTriggerCache(void) * Prepare to scan pg_event_trigger in name order. */ rel = relation_open(EventTriggerRelationId, AccessShareLock); - irel = index_open(EventTriggerNameIndexId, AccessShareLock); + irel = order_index_open(EventTriggerNameIndexId, AccessShareLock); scan = systable_beginscan_ordered(rel, irel, NULL, 0, NULL); /* diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 1f1c7635517..e0e9a39740c 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -74,6 +74,7 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbutil.h" /* @@ -1078,6 +1079,9 @@ choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams, Into if (IsTransactionStmtPlan(plansource)) return false; + if (IsTransferOn()) + return true; + /* Let settings force the decision */ if (plan_cache_mode == PLAN_CACHE_MODE_FORCE_GENERIC_PLAN) return false; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9e1e0cd88ac..24a08322f05 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -47,21 +47,38 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/partition.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_am.h" +#include "catalog/pg_amop.h" #include "catalog/pg_amproc.h" #include "catalog/pg_attrdef.h" +#include "catalog/pg_authid.h" #include "catalog/pg_auth_members.h" #include "catalog/pg_auth_time_constraint.h" #include "catalog/pg_authid.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_conversion.h" #include "catalog/pg_database.h" +#include "catalog/pg_default_acl.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_extprotocol.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_language.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_password_history.h" #include "catalog/pg_proc.h" #include "catalog/pg_profile.h" #include "catalog/pg_publication.h" +#include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_sequence.h" #include "catalog/pg_shseclabel.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" @@ -76,6 +93,8 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" +#include "partitioning/partbounds.h" +#include "partitioning/partdesc.h" #include "rewrite/rewriteDefine.h" #include "rewrite/rowsecurity.h" #include "storage/lmgr.h" @@ -87,6 +106,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/partcache.h" #include "utils/relmapper.h" #include "utils/resowner_private.h" #include "utils/snapmgr.h" @@ -95,9 +115,32 @@ #include "access/transam.h" #include "catalog/gp_distribution_policy.h" /* GpPolicy */ #include "catalog/gp_indexing.h" +#include "catalog/gp_storage_server.h" +#include "catalog/gp_storage_user_mapping.h" #include "catalog/heap.h" #include "catalog/index.h" +#include "catalog/main_manifest.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_directory_table.h" +#include "catalog/pg_proc_callback.h" +#include "catalog/pg_cast.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_policy.h" +#include "catalog/pg_publication_rel.h" +#include "catalog/pg_resgroup.h" +#include "catalog/pg_resqueuecapability.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_user_mapping.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbtm.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" /* Gp_role */ #include "cdb/cdbsreh.h" @@ -230,14 +273,17 @@ do { \ if (found) \ { \ /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \ - Relation _old_rel = hentry->reldesc; \ - Assert(replace_allowed); \ - hentry->reldesc = (RELATION); \ - if (RelationHasReferenceCountZero(_old_rel)) \ - RelationDestroyRelation(_old_rel, false); \ - else if (!IsBootstrapProcessingMode()) \ - elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \ - RelationGetRelationName(_old_rel)); \ + if (replace_allowed) \ + { \ + Relation _old_rel = hentry->reldesc; \ + Assert(replace_allowed); \ + hentry->reldesc = (RELATION); \ + if (RelationHasReferenceCountZero(_old_rel)) \ + RelationDestroyRelation(_old_rel, false); \ + else if (!IsBootstrapProcessingMode()) \ + elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \ + RelationGetRelationName(_old_rel)); \ + } \ } \ else \ hentry->reldesc = (RELATION); \ @@ -689,12 +735,12 @@ RelationBuildTupleDesc(Relation relation) * computed when and if needed during tuple access. */ #ifdef USE_ASSERT_CHECKING - { - int i; - - for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) - Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); - } +// { +// int i; +// +// for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) +// Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); +// } #endif /* @@ -1732,6 +1778,16 @@ LookupOpclassInfo(Oid operatorClassOid, else { Assert(numSupport == opcentry->numSupport); + + if (IsTransferOn()) + { + pfree(opcentry->supportProcs); + + /* Initialize new entry */ + opcentry->valid = false; /* until known OK */ + opcentry->numSupport = numSupport; + opcentry->supportProcs = NULL; /* filled below */ + } } /* @@ -2157,10 +2213,13 @@ Relation RelationIdGetRelation(Oid relationId) { Relation rd; + bool collected; /* Make sure we're in an xact, even if this ends up being a cache hit */ Assert(IsTransactionState()); + collected = RelationStored(relationId); + /* * first try to find reldesc in the cache */ @@ -2197,9 +2256,27 @@ RelationIdGetRelation(Oid relationId) * change, but we still want to update the rd_rel entry. So * rd_isvalid = false is left in place for a later lookup. */ - Assert(rd->rd_isvalid || - (rd->rd_isnailed && !criticalRelcachesBuilt)); + Assert(rd->rd_isvalid || rd->rd_isnailed); + } + + if (!collected && !rd->rd_isnailed) + { + volatile Relation tmpRd; + + tmpRd = RelationBuildDesc(relationId, false); + if (tmpRd) + RelationDestroyRelation(tmpRd, false); + + if (rd->rd_partcheckvalid) + { + if (rd->rd_partcheckcxt) + MemoryContextDelete(rd->rd_partcheckcxt); + rd->rd_partcheck = NIL; + rd->rd_partcheckvalid = false; + rd->rd_partcheckcxt = NULL; + } } + return rd; } @@ -2450,6 +2527,9 @@ RelationReloadNailed(Relation relation) { Assert(relation->rd_isnailed); + if (Gp_role == GP_ROLE_EXECUTE) + return; + /* * Redo RelationInitPhysicalAddr in case it is a mapped relation whose * mapping changed. @@ -2485,7 +2565,7 @@ RelationReloadNailed(Relation relation) * accessed. To ensure the entry will later be revalidated, we leave * it in invalid state, but allow use (cf. RelationIdGetRelation()). */ - if (criticalRelcachesBuilt) + // if (criticalRelcachesBuilt) { HeapTuple pg_class_tuple; Form_pg_class relp; @@ -4663,7 +4743,11 @@ AttrDefaultFetch(Relation relation, int ndef) attrdef[found].adnum = adform->adnum; attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s); pfree(s); + + DefaultValueStore(attrdef[found].adbin); + found++; + } } @@ -6137,15 +6221,25 @@ load_relcache_init_file(bool shared) int i; if (shared) + { snprintf(initfilename, sizeof(initfilename), "global/%s", RELCACHE_INIT_FILENAME); + } else - snprintf(initfilename, sizeof(initfilename), "%s/%s", - DatabasePath, RELCACHE_INIT_FILENAME); + { +// if (GpIdentity.segindex >= 0) + snprintf(initfilename, sizeof(initfilename), "%s", + RELCACHE_INIT_FILENAME); +// else +// snprintf(initfilename, sizeof(initfilename), "%s/%s", +// DatabasePath, RELCACHE_INIT_FILENAME); + } fp = AllocateFile(initfilename, PG_BINARY_R); if (fp == NULL) + { return false; + } /* * Read the index relcache entries from the file. Note we will not enter @@ -6160,6 +6254,7 @@ load_relcache_init_file(bool shared) /* check for correct magic number (compatible version) */ if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) goto read_failed; + if (magic != RELCACHE_INIT_FILEMAGIC) goto read_failed; @@ -6177,6 +6272,7 @@ load_relcache_init_file(bool shared) { if (nread == 0) break; /* end of file */ + goto read_failed; } @@ -6204,7 +6300,6 @@ load_relcache_init_file(bool shared) relform = (Form_pg_class) palloc(len); if (fread(relform, 1, len, fp) != len) goto read_failed; - rel->rd_rel = relform; /* initialize attribute tuple forms */ @@ -6233,13 +6328,15 @@ load_relcache_init_file(bool shared) /* next read the access method specific field */ if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) goto read_failed; + if (len > 0) { rel->rd_options = palloc(len); if (fread(rel->rd_options, 1, len, fp) != len) goto read_failed; + if (len != VARSIZE(rel->rd_options)) - goto read_failed; /* sanity check */ + goto read_failed; } else { @@ -6327,6 +6424,7 @@ load_relcache_init_file(bool shared) /* next, read the vector of support procedure OIDs */ if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) goto read_failed; + support = (RegProcedure *) MemoryContextAlloc(indexcxt, len); if (fread(support, 1, len, fp) != len) goto read_failed; @@ -6440,6 +6538,8 @@ load_relcache_init_file(bool shared) * Reset transient-state fields in the relcache entry */ rel->rd_smgr = NULL; + rel->rd_isnailed = true; + rel->rd_isvalid = true; if (rel->rd_isnailed) rel->rd_refcnt = 1; else @@ -6486,32 +6586,32 @@ load_relcache_init_file(bool shared) * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put * an Assert(false) there. */ - if (shared) - { - if (nailed_rels != NUM_CRITICAL_SHARED_RELS || - nailed_indexes != NUM_CRITICAL_SHARED_INDEXES) - { - elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively", - nailed_rels, nailed_indexes, - NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES); - /* Make sure we get developers' attention about this */ - Assert(false); - /* In production builds, recover by bootstrapping the relcache */ - goto read_failed; - } - } - else - { - if (nailed_rels != NUM_CRITICAL_LOCAL_RELS || - nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES) - { - elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively", - nailed_rels, nailed_indexes, - NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES); - /* We don't need an Assert() in this case */ - goto read_failed; - } - } +// if (shared) +// { +// if (nailed_rels != NUM_CRITICAL_SHARED_RELS || +// nailed_indexes != NUM_CRITICAL_SHARED_INDEXES) +// { +// elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively", +// nailed_rels, nailed_indexes, +// NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES); +// /* Make sure we get developers' attention about this */ +// Assert(false); +// /* In production builds, recover by bootstrapping the relcache */ +// goto read_failed; +// } +// } +// else +// { +// if (nailed_rels != NUM_CRITICAL_LOCAL_RELS || +// nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES) +// { +// elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively", +// nailed_rels, nailed_indexes, +// NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES); +// /* We don't need an Assert() in this case */ +// goto read_failed; +// } +// } /* * OK, all appears well. @@ -6557,7 +6657,61 @@ write_relcache_init_file(bool shared) int magic; HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; - int i; + int i,j; + Oid collectRelids[48] = { + AggregateRelationId, + AccessMethodRelationId, + AccessMethodOperatorRelationId, + AccessMethodProcedureRelationId, + AttrDefaultRelationId, + CastRelationId, + ConstraintRelationId, + DependRelationId, + DirectoryTableRelationId, + OperatorClassRelationId, + CollationRelationId, + ConversionRelationId, + DefaultAclRelationId, + EnumRelationId, + EventTriggerRelationId, + ExtprotocolRelationId, + ForeignDataWrapperRelationId, + ForeignServerRelationId, + ForeignTableRelationId, + GpPolicyRelationId, + InheritsRelationId, + IndexRelationId, + LanguageRelationId, + ManifestRelationId, + NamespaceRelationId, + OperatorRelationId, + OperatorFamilyRelationId, + PartitionedRelationId, + PolicyRelationId, + ProcCallbackRelationId, + PublicationRelationId, + PublicationRelRelationId, + RangeRelationId, + ResGroupRelationId, + ResQueueCapabilityRelationId, + SequenceRelationId, + StatisticExtRelationId, + StatisticRelationId, + StorageServerRelationId, + StorageUserMappingRelationId, + TableSpaceRelationId, + TriggerRelationId, + TSConfigRelationId, + TSConfigMapRelationId, + TSDictionaryRelationId, + TSParserRelationId, + TSTemplateRelationId, + UserMappingRelationId + }; + Oid shareRelids[1] = { + ResQueueCapabilityRelationId + + }; if (write_relcache_init_file_hook && write_relcache_init_file_hook()) return; @@ -6584,8 +6738,8 @@ write_relcache_init_file(bool shared) { snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d", DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid); - snprintf(finalfilename, sizeof(finalfilename), "%s/%s", - DatabasePath, RELCACHE_INIT_FILENAME); + snprintf(finalfilename, sizeof(finalfilename), "%s", + RELCACHE_INIT_FILENAME); } unlink(tempfilename); /* in case it exists w/wrong permissions */ @@ -6711,6 +6865,72 @@ write_relcache_init_file(bool shared) } } + for (i = 0; i < sizeof(collectRelids) / sizeof(Oid); ++i) + { + Relation rel; + + if (shared) + break; + + rel = table_open(collectRelids[i], AccessShareLock); + + Form_pg_class relform = rel->rd_rel; + + /* first write the relcache entry proper */ + write_item(rel, sizeof(RelationData), fp); + + /* next write the relation tuple form */ + write_item(relform, CLASS_TUPLE_SIZE, fp); + + /* next, do all the attribute tuple form data entries */ + for (j = 0; j < relform->relnatts; j++) + { + write_item(TupleDescAttr(rel->rd_att, j), + ATTRIBUTE_FIXED_PART_SIZE, fp); + } + + /* next, do the access method specific field */ + write_item(rel->rd_options, + (rel->rd_options ? VARSIZE(rel->rd_options) : 0), + fp); + + table_close(rel, AccessShareLock); + + } + + for (i = 0; i < sizeof(shareRelids) / sizeof(Oid); ++i) + { + Relation rel; + + if (!shared) + break; + + rel = table_open(shareRelids[i], AccessShareLock); + + Form_pg_class relform = rel->rd_rel; + + /* first write the relcache entry proper */ + write_item(rel, sizeof(RelationData), fp); + + /* next write the relation tuple form */ + write_item(relform, CLASS_TUPLE_SIZE, fp); + + /* next, do all the attribute tuple form data entries */ + for (j = 0; j < relform->relnatts; j++) + { + write_item(TupleDescAttr(rel->rd_att, j), + ATTRIBUTE_FIXED_PART_SIZE, fp); + } + + /* next, do the access method specific field */ + write_item(rel->rd_options, + (rel->rd_options ? VARSIZE(rel->rd_options) : 0), + fp); + + table_close(rel, AccessShareLock); + + } + if (FreeFile(fp)) elog(FATAL, "could not write init file"); @@ -6783,7 +7003,14 @@ RelationIdIsInInitFile(Oid relationId) if (relationId == SharedSecLabelRelationId || relationId == TriggerRelidNameIndexId || relationId == DatabaseNameIndexId || - relationId == SharedSecLabelObjectIndexId) + relationId == SharedSecLabelObjectIndexId || + relationId == ManifestRelationId || + relationId == PolicyRelationId || + relationId == ProcCallbackRelationId || + relationId == DependRelationId || + relationId == AttrDefaultRelationId || + relationId == TriggerRelationId || + relationId == InheritsRelationId) { /* * If this Assert fails, we don't need the applicable special case diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 065c459eb87..047a97917b8 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -92,6 +92,7 @@ #include "access/heapam.h" #include "catalog/pg_resgroup.h" #include "catalog/pg_extprotocol.h" +#include "cdb/cdbtranscat.h" #include "miscadmin.h" #include "catalog/gp_indexing.h" @@ -1692,12 +1693,25 @@ struct catclist * SearchSysCacheList(int cacheId, int nkeys, Datum key1, Datum key2, Datum key3) { + CatCList *list; + if (cacheId < 0 || cacheId >= SysCacheSize || !PointerIsValid(SysCache[cacheId])) elog(ERROR, "invalid cache ID: %d", cacheId); - return SearchCatCacheList(SysCache[cacheId], nkeys, + list = SearchCatCacheList(SysCache[cacheId], nkeys, key1, key2, key3); + + for (int i = 0; i < list->n_members; ++i) + { + CatCTup *catCTup; + + catCTup = list->members[i]; + + TransStoreTuple(&catCTup->tuple); + } + + return list; } /* diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 384107b6bac..7d7d9fa238f 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -478,7 +478,7 @@ lookup_ts_config_cache(Oid cfgId) ObjectIdGetDatum(cfgId)); maprel = table_open(TSConfigMapRelationId, AccessShareLock); - mapidx = index_open(TSConfigMapIndexId, AccessShareLock); + mapidx = order_index_open(TSConfigMapIndexId, AccessShareLock); mapscan = systable_beginscan_ordered(maprel, mapidx, NULL, 1, &mapskey); @@ -489,8 +489,8 @@ lookup_ts_config_cache(Oid cfgId) if (toktype <= 0 || toktype > MAXTOKENTYPE) elog(ERROR, "maptokentype value %d is out of range", toktype); - if (toktype < maxtokentype) - elog(ERROR, "maptokentype entries are out of order"); +// if (toktype < maxtokentype) +// elog(ERROR, "maptokentype entries are out of order"); if (toktype > maxtokentype) { /* starting a new token type, but first save the prior data */ diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index a55adde200b..069efb4ab96 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -56,6 +56,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_range.h" #include "catalog/pg_type.h" +#include "cdb/cdbtranscat.h" #include "commands/defrem.h" #include "executor/executor.h" #include "lib/dshash.h" @@ -76,7 +77,7 @@ /* The main type cache hashtable searched by lookup_type_cache */ -static HTAB *TypeCacheHash = NULL; +HTAB *TypeCacheHash = NULL; /* List of type cache entries for domain types */ static TypeCacheEntry *firstDomainTypeEntry = NULL; @@ -315,7 +316,6 @@ static void TypeCacheRelCallback(Datum arg, Oid relid); static void TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue); static void TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue); static void TypeCacheConstrCallback(Datum arg, int cacheid, uint32 hashvalue); -static void load_enum_cache_data(TypeCacheEntry *tcache); static EnumItem *find_enumitem(TypeCacheEnumData *enumdata, Oid arg); static int enum_oid_cmp(const void *left, const void *right); static void shared_record_typmod_registry_detach(dsm_segment *segment, @@ -324,6 +324,18 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc); static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc, uint32 typmod); +static void +CreateTypeMemoryContext(void) +{ + /* + * Purely for paranoia, check that context doesn't exist; caller probably + * did so already. + */ + if (!TypeMemoryContext) + TypeMemoryContext = AllocSetContextCreate(TopMemoryContext, + "TypeMemoryContext", + ALLOCSET_DEFAULT_SIZES); +} /* * lookup_type_cache @@ -359,8 +371,8 @@ lookup_type_cache(Oid type_id, int flags) CacheRegisterSyscacheCallback(CONSTROID, TypeCacheConstrCallback, (Datum) 0); /* Also make sure CacheMemoryContext exists */ - if (!CacheMemoryContext) - CreateCacheMemoryContext(); + CreateCacheMemoryContext(); + CreateTypeMemoryContext(); } /* Try to look up an existing entry */ @@ -464,6 +476,9 @@ lookup_type_cache(Oid type_id, int flags) ReleaseSysCache(tp); } + if (!InTypeStore()) + TypeStore(type_id, flags); + /* * Look up opclasses if we haven't already and any dependent info is * requested. @@ -1995,8 +2010,8 @@ assign_record_type_typmod(TupleDesc tupDesc) HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); /* Also make sure CacheMemoryContext exists */ - if (!CacheMemoryContext) - CreateCacheMemoryContext(); + CreateCacheMemoryContext(); + CreateTypeMemoryContext(); } /* @@ -2625,7 +2640,7 @@ compare_values_of_enum(TypeCacheEntry *tcache, Oid arg1, Oid arg2) /* * Load (or re-load) the enumData member of the typcache entry. */ -static void +void load_enum_cache_data(TypeCacheEntry *tcache) { TypeCacheEnumData *enumdata; diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 29287088ecf..8ea3ea81e72 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -438,6 +438,18 @@ pg_do_encoding_conversion(unsigned char *src, int len, return result; } +void +StoreEncodingConversion(int dest_encoding) +{ + Oid proc; + HeapTuple htup; + + proc = FindDefaultConversionProc(DatabaseEncoding->encoding, dest_encoding); + htup = SearchSysCache1(PROCOID, proc); + if (htup) + ReleaseSysCache(htup); +} + /* * Convert src string to another encoding. * diff --git a/src/backend/utils/misc/superuser.c b/src/backend/utils/misc/superuser.c index c9bb85a5af2..ca77ea92e10 100644 --- a/src/backend/utils/misc/superuser.c +++ b/src/backend/utils/misc/superuser.c @@ -26,6 +26,7 @@ #include "utils/inval.h" #include "utils/syscache.h" +#include "cdb/cdbvars.h" #include "storage/proc.h" /* * In common cases the same roleid (ie, the session or current ID) will @@ -68,8 +69,8 @@ superuser_arg(Oid roleid) HeapTuple rtup; /* Quick out for cache hit */ - if (OidIsValid(last_roleid) && last_roleid == roleid) - return last_roleid_is_super; +// if (OidIsValid(last_roleid) && last_roleid == roleid) +// return last_roleid_is_super; /* Special escape path in case you deleted all your users. */ if (!IsUnderPostmaster && roleid == BOOTSTRAP_SUPERUSERID) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index b4d239bbcbe..4de1a65e342 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -195,7 +195,7 @@ MEMORY_ACCOUNT_INC_ALLOCATED(AllocSet set, Size newbytes) /* Make sure these values are not overflow */ Assert(set->localAllocated >= newbytes); - Assert(parent->currentAllocated >= set->localAllocated); +// Assert(parent->currentAllocated >= set->localAllocated); } static inline void @@ -204,7 +204,7 @@ MEMORY_ACCOUNT_DEC_ALLOCATED(AllocSet set, Size newbytes) AllocSet parent = set->accountingParent; Assert(set->localAllocated >= newbytes); - Assert(parent->currentAllocated >= set->localAllocated); + // Assert(parent->currentAllocated >= set->localAllocated); set->localAllocated -= newbytes; parent->currentAllocated -= newbytes; @@ -761,7 +761,7 @@ AllocSetDelete(MemoryContext context, MemoryContext parent) /* Make sure all children have been deleted */ Assert(context->firstchild == NULL); MEMORY_ACCOUNT_DEC_ALLOCATED(set, set->localAllocated); - if (IS_MEMORY_ACCOUNT(set)) + if (IS_MEMORY_ACCOUNT(set) && parent) { /* Roll up our peak value to the parent, before this context goes away. */ AllocSet parentset = (AllocSet) parent; diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index d47f45736ff..f6a869cf9b7 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -63,6 +63,7 @@ MemoryContext TopMemoryContext = NULL; MemoryContext ErrorContext = NULL; MemoryContext PostmasterContext = NULL; MemoryContext CacheMemoryContext = NULL; +MemoryContext TypeMemoryContext = NULL; MemoryContext MessageContext = NULL; MemoryContext TopTransactionContext = NULL; MemoryContext CurTransactionContext = NULL; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 3c87c3c4900..edf146b0aa3 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -138,6 +138,7 @@ typedef struct IndexOrderByDistance #define IndexScanIsValid(scan) PointerIsValid(scan) extern Relation index_open(Oid relationId, LOCKMODE lockmode); +extern Relation order_index_open(Oid relationId, LOCKMODE lockmode); extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, diff --git a/src/include/cdb/cdbsreh.h b/src/include/cdb/cdbsreh.h index fdb29716d3f..f61215abef3 100644 --- a/src/include/cdb/cdbsreh.h +++ b/src/include/cdb/cdbsreh.h @@ -100,5 +100,7 @@ extern Datum gp_truncate_error_log(PG_FUNCTION_ARGS); extern Datum gp_read_persistent_error_log(PG_FUNCTION_ARGS); extern Datum gp_truncate_persistent_error_log(PG_FUNCTION_ARGS); +extern TupleDesc GetErrorTupleDesc(void); +extern TupleDesc GetTempErrorTupleDesc(void); #endif /* CDBSREH_H */ diff --git a/src/include/cdb/cdbtranscat.h b/src/include/cdb/cdbtranscat.h new file mode 100644 index 00000000000..28604da3c92 --- /dev/null +++ b/src/include/cdb/cdbtranscat.h @@ -0,0 +1,112 @@ +#ifndef CDBTANSCAT_H +#define CDBTANSCAT_H + +#include "access/heapam.h" +#include "access/htup.h" +#include "nodes/execnodes.h" +#include "nodes/pg_list.h" +#include "utils/hsearch.h" + +typedef struct TransferTuple +{ + NodeTag type; + + uint32 t_len; + ItemPointerData t_self; + Oid t_tableOid; + char *t_data; +} TransferTuple; + +typedef struct SystemTableTransferNode +{ + NodeTag type; + + Oid my_temp_namespace; + Oid my_temp_toast_namespace; + List *transfer_tuples; +} SystemTableTransferNode; + +extern bool inPlPgsql; + +extern void TransferReset(void); +extern bool IsTransferOn(void); +extern void SetTransferOff(void); +extern void SetTransferOn(void); +extern bool RelationStored(Oid relid); +extern bool RelationStoredCheck(Oid relid); +extern void TransStoreTuple(HeapTuple htup); +extern void TransRemoveTuple(Oid tableOid, ItemPointerData tid); +extern SystemTableTransferNode *GetTransferNode(void); +extern void SystemTupleStoreReset(void); +extern void SystemTupleStoreInit(const char *catalogBuffer, int catalogSize); +extern List *getSystemTupleList(Oid relid); +extern bool PlFuncStored(Oid funcid); + +typedef void (*TransferReset_hook_type) (void); +extern PGDLLIMPORT TransferReset_hook_type TransferReset_hook; +typedef bool (*IsTransferOn_hook_type) (void); +extern PGDLLIMPORT IsTransferOn_hook_type IsTransferOn_hook; +typedef void (*SetTransferOff_hook_type) (void); +extern PGDLLIMPORT SetTransferOff_hook_type SetTransferOff_hook; +typedef void (*SetTransferOn_hook_type) (void); +extern PGDLLIMPORT SetTransferOn_hook_type SetTransferOn_hook; +typedef bool (*RelationStored_hook_type) (Oid relid); +extern PGDLLIMPORT RelationStored_hook_type RelationStored_hook; +typedef bool (*RelationStoredCheck_hook_type) (Oid relid); +extern PGDLLIMPORT RelationStoredCheck_hook_type RelationStoredCheck_hook; +typedef void (*TransStoreTuple_hook_type) (HeapTuple htup); +extern PGDLLIMPORT TransStoreTuple_hook_type TransStoreTuple_hook; +typedef void (*TransRemoveTuple_hook_type) (Oid tableOid, ItemPointerData tid); +extern PGDLLIMPORT TransRemoveTuple_hook_type TransRemoveTuple_hook; + +typedef SystemTableTransferNode *(*GetTransferNode_hook_type) (void); +extern PGDLLIMPORT GetTransferNode_hook_type GetTransferNode_hook; +typedef void (*SystemTupleStoreReset_hook_type) (void); +extern PGDLLIMPORT SystemTupleStoreReset_hook_type SystemTupleStoreReset_hook; +typedef void (*SystemTupleStoreInit_hook_type) (const char *catalogBuffer, int catalogSize); +extern PGDLLIMPORT SystemTupleStoreInit_hook_type SystemTupleStoreInit_hook; +typedef List *(*getSystemTupleList_hook_type) (Oid relid); +extern PGDLLIMPORT getSystemTupleList_hook_type getSystemTupleList_hook; +typedef bool (*PlFuncStored_hook_type) (Oid funcid); +extern PGDLLIMPORT PlFuncStored_hook_type PlFuncStored_hook; + + + +extern TableScanDesc systup_store_beginscan(Relation relation, int nkeys, ScanKey key, + uint32 flags); +extern void systup_store_endscan(TableScanDesc sscan); +extern bool systup_store_getnextslot(TableScanDesc sscan, TupleTableSlot *slot); +extern bool systup_store_active(void); +extern bool systup_store_sorted_active(void); + + +typedef TableScanDesc (*systup_store_beginscan_hook_type) (Relation relation, int nkeys, + ScanKey key, uint32 flags); +extern PGDLLIMPORT systup_store_beginscan_hook_type systup_store_beginscan_hook; +typedef void (*systup_store_endscan_hook_type) (TableScanDesc sscan); +extern PGDLLIMPORT systup_store_endscan_hook_type systup_store_endscan_hook; +typedef bool (*systup_store_getnextslot_hook_type) (TableScanDesc sscan, + TupleTableSlot *slot); +extern PGDLLIMPORT systup_store_getnextslot_hook_type systup_store_getnextslot_hook; +typedef bool (*systup_store_active_hook_type) (void); +extern PGDLLIMPORT systup_store_active_hook_type systup_store_active_hook; +typedef bool (*systup_store_sorted_active_hook_type) (void); +extern PGDLLIMPORT systup_store_sorted_active_hook_type systup_store_sorted_active_hook; + + +extern void DefaultValueStore(char *bin); +extern bool InTypeStore(void); +extern void TypeStore(Oid typeOid, int flags); +extern void InitQuery(const char *query_string); + +typedef void (*DefaultValueStore_hook_type) (char *bin); +typedef bool (*InTypeStore_hook_type) (void); +typedef void (*TypeStore_hook_type) (Oid typeOid, int flags); +typedef void (*InitQuery_hook_type) (const char *query_string); +extern PGDLLIMPORT DefaultValueStore_hook_type DefaultValueStore_hook; +extern PGDLLIMPORT InTypeStore_hook_type InTypeStore_hook; +extern PGDLLIMPORT TypeStore_hook_type TypeStore_hook; +extern PGDLLIMPORT InitQuery_hook_type InitQuery_hook; + + +#endif //CDBTANSCAT_H \ No newline at end of file diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 995ba509cc8..c226469a09c 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -104,6 +104,8 @@ typedef struct TransitionCaptureState #define TRIGGER_EVENT_INSTEAD 0x00000010 #define TRIGGER_EVENT_TIMINGMASK 0x00000018 +#define TRIGGER_EVENT_PREPARE 0x00000020 + /* More TriggerEvent flags, used only within trigger.c */ #define AFTER_TRIGGER_DEFERRABLE 0x00000020 diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index e3e7c41aa8d..11059d5053a 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -765,6 +765,7 @@ typedef struct SubscriptExecSteps /* functions in execExpr.c */ extern void ExprEvalPushStep(ExprState *es, const ExprEvalStep *s); +extern void ExprEvalPushStep_internal(ExprState *es, const ExprEvalStep *s); /* functions in execExprInterp.c */ extern void ExecReadyInterpretedExpr(ExprState *state); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 187db7a0480..1bde2c9765e 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -17,6 +17,7 @@ #define EXECUTOR_H #include "executor/execdesc.h" +#include "executor/execExpr.h" #include "fmgr.h" #include "nodes/lockoptions.h" #include "nodes/parsenodes.h" @@ -105,6 +106,15 @@ extern PGDLLIMPORT SetDtxFlag_hook_type SetDtxFlag_hook; typedef PlanState *(*ExecInitNode_hook_type)(Plan *node, EState *estate, int eflags); extern PGDLLIMPORT ExecInitNode_hook_type ExecInitNode_hook; +typedef void (*ExprEvalPushStep_hook_type) (ExprState *es, const ExprEvalStep *s); +extern PGDLLIMPORT ExprEvalPushStep_hook_type ExprEvalPushStep_hook; + +typedef void (*CollectResultInfo_hook_type) (ResultRelInfo *resultRelInfo); +extern PGDLLIMPORT CollectResultInfo_hook_type CollectResultInfo_hook; + +typedef void (*CollectProc_hook_type) (FunctionCallInfo fcinfo); +extern PGDLLIMPORT CollectProc_hook_type CollectProc_hook; + /* Hook for plugins to get control in ExecEndNode() */ typedef void (*ExecEndNode_hook_type)(PlanState *node); extern PGDLLIMPORT ExecEndNode_hook_type ExecEndNode_hook; @@ -735,4 +745,7 @@ extern void change_varattnos_of_a_varno(Node *node, const AttrNumber *newattno, Index varno); extern bool already_under_executor_run(void); +extern void InitPlan(QueryDesc *queryDesc, int eflags); + + #endif /* EXECUTOR_H */ diff --git a/src/include/executor/functions.h b/src/include/executor/functions.h index 2d56ca2804d..4b3851bef75 100644 --- a/src/include/executor/functions.h +++ b/src/include/executor/functions.h @@ -35,7 +35,7 @@ typedef struct SQLFunctionParseInfo typedef SQLFunctionParseInfo *SQLFunctionParseInfoPtr; extern Datum fmgr_sql(PG_FUNCTION_ARGS); - +extern void fmgr_sql_init(PG_FUNCTION_ARGS); extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTuple, Node *call_expr, Oid inputCollation); @@ -53,5 +53,7 @@ extern bool check_sql_fn_retval(List *queryTreeLists, extern DestReceiver *CreateSQLFunctionDestReceiver(void); extern void querytree_safe_for_qe(Node *node); +extern void init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK); + #endif /* FUNCTIONS_H */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 7c0645b9c0a..881559092a1 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -636,6 +636,7 @@ extern int pg_bind_textdomain_codeset(const char *domainname); extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding); +extern void StoreEncodingConversion(int dest_encoding); extern int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c3ff7a919e4..87598d481ea 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -686,7 +686,8 @@ typedef enum NodeTag T_RetrieveStmt, T_ReindexIndexInfo, /* in nodes/parsenodes.h */ T_EphemeralNamedRelationInfo, /* utils/queryenvironment.h */ - + T_SystemTableTransferNode, + T_TransferTuple, } NodeTag; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 04f3b2b35fc..4d8b7c29e3b 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3769,6 +3769,7 @@ typedef struct CallContext { NodeTag type; bool atomic; + bool prepare; } CallContext; /* ---------------------- diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 39ace4466ac..1687266624f 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -303,9 +303,11 @@ */ #ifdef USE_ASSERT_CHECKING #define CLOBBER_FREED_MEMORY -#define RELCACHE_FORCE_RELEASE #endif +#define RELCACHE_FORCE_RELEASE + + /* * Define this to check memory allocation errors (scribbling on more * bytes than were allocated). Right now, this gets defined diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h index 59d22d3ea13..81e2f20424b 100644 --- a/src/include/utils/typcache.h +++ b/src/include/utils/typcache.h @@ -154,6 +154,8 @@ typedef struct TypeCacheEntry /* This value will not equal any valid tupledesc identifier, nor 0 */ #define INVALID_TUPLEDESC_IDENTIFIER ((uint64) 1) +extern MemoryContext TypeMemoryContext; + /* * Callers wishing to maintain a long-lived reference to a domain's constraint * set must store it in one of these. Use InitDomainConstraintRef() and @@ -215,4 +217,7 @@ extern List *build_tuple_node_list(int start); /* GPDB: retrieve conn calls this function to clear record cache */ extern void reset_record_cache(void); +extern void load_enum_cache_data(TypeCacheEntry *tcache); + + #endif /* TYPCACHE_H */ diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 76ea85d8be6..8f4f525934f 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -172,7 +172,8 @@ plpgsql_compile(FunctionCallInfo fcinfo, bool forValidator) { /* We have a compiled function, but is it still valid? */ if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && - ItemPointerEquals(&function->fn_tid, &procTup->t_self)) + ItemPointerEquals(&function->fn_tid, &procTup->t_self) && + !function_is_prepare(fcinfo)) function_valid = true; else { diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 9a16ac70d2e..9a03f2b8f5b 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -23,9 +23,11 @@ #include "access/tupconvert.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" +#include "cdb/cdbtranscat.h" #include "commands/defrem.h" #include "executor/execExpr.h" #include "executor/spi.h" +#include "executor/spi_priv.h" #include "executor/tstoreReceiver.h" #include "funcapi.h" #include "mb/stringinfo_mb.h" @@ -423,7 +425,8 @@ static Datum do_cast_value(PLpgSQL_execstate *estate, Oid reqtype, int32 reqtypmod); static plpgsql_CastHashEntry *get_cast_hashentry(PLpgSQL_execstate *estate, Oid srctype, int32 srctypmod, - Oid dsttype, int32 dsttypmod); + Oid dsttype, int32 dsttypmod, + bool renew); static void exec_init_tuple_store(PLpgSQL_execstate *estate); static void exec_set_found(PLpgSQL_execstate *estate, bool state); static void plpgsql_create_econtext(PLpgSQL_execstate *estate); @@ -7678,7 +7681,8 @@ do_cast_value(PLpgSQL_execstate *estate, cast_entry = get_cast_hashentry(estate, valtype, valtypmod, - reqtype, reqtypmod); + reqtype, reqtypmod, + true); if (cast_entry) { ExprContext *econtext = estate->eval_econtext; @@ -7715,7 +7719,8 @@ do_cast_value(PLpgSQL_execstate *estate, static plpgsql_CastHashEntry * get_cast_hashentry(PLpgSQL_execstate *estate, Oid srctype, int32 srctypmod, - Oid dsttype, int32 dsttypmod) + Oid dsttype, int32 dsttypmod, + bool renew) { plpgsql_CastHashKey cast_key; plpgsql_CastHashEntry *cast_entry; @@ -7735,7 +7740,8 @@ get_cast_hashentry(PLpgSQL_execstate *estate, cast_entry->cast_cexpr = NULL; if (cast_entry->cast_cexpr == NULL || - !cast_entry->cast_cexpr->is_valid) + !cast_entry->cast_cexpr->is_valid || + renew) { /* * We've not looked up this coercion before, or we have but the cached @@ -7995,7 +8001,7 @@ exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr) * Release the plan refcount obtained by SPI_plan_get_cached_plan. (This * refcount is held by the wrong resowner, so we can't just repurpose it.) */ - ReleaseCachedPlan(cplan, CurrentResourceOwner); + ReleaseCachedPlan(cplan, expr->plan->saved ? CurrentResourceOwner : NULL); } /* @@ -8734,3 +8740,1708 @@ format_preparedparamsdata(PLpgSQL_execstate *estate, return paramstr.data; } + +static void prepare_stmt_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block); +static void init_toplevel_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block); + +Datum +plpgsql_prepare_function(PLpgSQL_function *func, FunctionCallInfo fcinfo, + EState *simple_eval_estate, + ResourceOwner simple_eval_resowner, + ResourceOwner procedure_resowner, + bool atomic) +{ + PLpgSQL_execstate estate; + ErrorContextCallback plerrcontext; + int i; + + if(PlFuncStored(func->fn_oid)) + return (Datum) 0; + + inPlPgsql = true; + + /* + * Setup the execution state + */ + plpgsql_estate_setup(&estate, func, (ReturnSetInfo *) fcinfo->resultinfo, + simple_eval_estate, simple_eval_resowner); + estate.atomic = atomic; + + plerrcontext.callback = plpgsql_exec_error_callback; + plerrcontext.arg = &estate; + plerrcontext.previous = error_context_stack; + error_context_stack = &plerrcontext; + + estate.err_text = gettext_noop("during initialization of execution state"); + copy_plpgsql_datums(&estate, func); + + estate.err_text = gettext_noop("while storing call arguments into local variables"); + for (i = 0; i < func->fn_nargs; i++) + { + int n = func->fn_argvarnos[i]; + + switch (estate.datums[n]->dtype) + { + case PLPGSQL_DTYPE_VAR: + { + PLpgSQL_var *var = (PLpgSQL_var *) estate.datums[n]; + + assign_simple_var(&estate, var, + fcinfo->args[i].value, + true, + false); + + /* + * Force any array-valued parameter to be stored in + * expanded form in our local variable, in hopes of + * improving efficiency of uses of the variable. (This is + * a hack, really: why only arrays? Need more thought + * about which cases are likely to win. See also + * typisarray-specific heuristic in exec_assign_value.) + * + * Special cases: If passed a R/W expanded pointer, assume + * we can commandeer the object rather than having to copy + * it. If passed a R/O expanded pointer, just keep it as + * the value of the variable for the moment. (We'll force + * it to R/W if the variable gets modified, but that may + * very well never happen.) + */ + if (!var->isnull && var->datatype->typisarray) + { + if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(var->value))) + { + /* take ownership of R/W object */ + assign_simple_var(&estate, var, + TransferExpandedObject(var->value, + estate.datum_context), + false, + true); + } + else if (VARATT_IS_EXTERNAL_EXPANDED_RO(DatumGetPointer(var->value))) + { + /* R/O pointer, keep it as-is until assigned to */ + } + else + { + /* flat array, so force to expanded form */ + assign_simple_var(&estate, var, + expand_array(var->value, + estate.datum_context, + NULL), + false, + true); + } + } + } + break; + + case PLPGSQL_DTYPE_REC: + { + PLpgSQL_rec *rec = (PLpgSQL_rec *) estate.datums[n]; + + if (!fcinfo->args[i].isnull) + { + /* Assign row value from composite datum */ + exec_move_row_from_datum(&estate, + (PLpgSQL_variable *) rec, + fcinfo->args[i].value); + } + else + { + /* If arg is null, set variable to null */ + exec_move_row(&estate, (PLpgSQL_variable *) rec, + NULL, NULL); + } + /* clean up after exec_move_row() */ + exec_eval_cleanup(&estate); + } + break; + + default: + /* Anything else should not be an argument variable */ + elog(ERROR, "unrecognized dtype: %d", func->datums[i]->dtype); + } + } + + estate.err_text = gettext_noop("during function entry"); + + + + /* + * Set the magic variable FOUND to false + */ + exec_set_found(&estate, false); + estate.err_text = NULL; + estate.err_stmt = (PLpgSQL_stmt *) (func->action); + + + init_toplevel_block(&estate, func->action); + + estate.err_stmt = NULL; + estate.err_text = gettext_noop("while casting return value to function's return type"); + + fcinfo->isnull = estate.retisnull; + + estate.err_text = gettext_noop("during function exit"); + + /* Clean up any leftover temporary memory */ + plpgsql_destroy_econtext(&estate); + exec_eval_cleanup(&estate); + /* stmt_mcontext will be destroyed when function's main context is */ + + /* + * Pop the error context stack + */ + error_context_stack = plerrcontext.previous; + + inPlPgsql = false; + + /* + * Return the function's result + */ + return estate.retval; +} + +/* + * Catalog collect + */ +void +plpgsql_prepare_trigger(PLpgSQL_function *func, + TriggerData *trigdata) +{ + PLpgSQL_execstate estate; + ErrorContextCallback plerrcontext; + int rc; + TupleDesc tupdesc; + PLpgSQL_rec *rec_new, + *rec_old; + + tupdesc = RelationGetDescr(trigdata->tg_relation); + + plpgsql_estate_setup(&estate, func, NULL, NULL, NULL); + estate.trigdata = trigdata; + + /* + * Setup error traceback support for ereport() + */ + plerrcontext.callback = plpgsql_exec_error_callback; + plerrcontext.arg = &estate; + plerrcontext.previous = error_context_stack; + error_context_stack = &plerrcontext; + estate.err_text = gettext_noop("during initialization of execution state"); + + copy_plpgsql_datums(&estate, func); + tupdesc = RelationGetDescr(trigdata->tg_relation); + rec_new = (PLpgSQL_rec *) (estate.datums[func->new_varno]); + rec_old = (PLpgSQL_rec *) (estate.datums[func->old_varno]); + + rec_new->erh = make_expanded_record_from_tupdesc(tupdesc, + estate.datum_context); + rec_old->erh = make_expanded_record_from_exprecord(rec_new->erh, + estate.datum_context); + + if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) + { + /* + * Per-statement triggers don't use OLD/NEW variables + */ + } + else if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + { + expanded_record_set_tuple(rec_new->erh, trigdata->tg_trigtuple, + false, false); + } + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + { + expanded_record_set_tuple(rec_new->erh, trigdata->tg_newtuple, + false, false); + expanded_record_set_tuple(rec_old->erh, trigdata->tg_trigtuple, + false, false); + + /* + * In BEFORE trigger, stored generated columns are not computed yet, + * so make them null in the NEW row. (Only needed in UPDATE branch; + * in the INSERT case, they are already null, but in UPDATE, the field + * still contains the old value.) Alternatively, we could construct a + * whole new row structure without the generated columns, but this way + * seems more efficient and potentially less confusing. + */ + if (tupdesc->constr && tupdesc->constr->has_generated_stored && + TRIGGER_FIRED_BEFORE(trigdata->tg_event)) + { + for (int i = 0; i < tupdesc->natts; i++) + if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED) + expanded_record_set_field_internal(rec_new->erh, + i + 1, + (Datum) 0, + true, /* isnull */ + false, false); + } + } + else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) + { + expanded_record_set_tuple(rec_old->erh, trigdata->tg_trigtuple, + false, false); + } + else + elog(ERROR, "unrecognized trigger action: not INSERT, DELETE, or UPDATE"); + + /* Make transition tables visible to this SPI connection */ + rc = SPI_register_trigger_data(trigdata); + Assert(rc >= 0); + + estate.err_text = gettext_noop("during function entry"); + + /* + * Set the magic variable FOUND to false + */ + exec_set_found(&estate, false); + + /* + * Now call the toplevel block of statements + */ + estate.err_text = NULL; + estate.err_stmt = (PLpgSQL_stmt *) (func->action); + + prepare_stmt_block(&estate, (PLpgSQL_stmt_block *) func->action); + + /* Clean up any leftover temporary memory */ + plpgsql_destroy_econtext(&estate); + exec_eval_cleanup(&estate); + + /* + * Pop the error context stack + */ + error_context_stack = plerrcontext.previous; +} + + +/* + * Catalog collect + */ +static void init_stmts(PLpgSQL_execstate *estate, List *stmts); +static void init_stmt_if(PLpgSQL_execstate *estate, PLpgSQL_stmt_if *stmt); +static void init_stmt_execsql(PLpgSQL_execstate *estate, PLpgSQL_stmt_execsql *stmt); +static void init_stmt_raise(PLpgSQL_execstate *estate, PLpgSQL_stmt_raise *stmt); +static void init_run_select(PLpgSQL_execstate *estate, PLpgSQL_expr *expr, + int64 maxtuples, Portal *portalP); +static void init_stmt_return_next(PLpgSQL_execstate *estate, + PLpgSQL_stmt_return_next *stmt); +static void init_stmt_case(PLpgSQL_execstate *estate, PLpgSQL_stmt_case *stmt); +static void init_stmt_open(PLpgSQL_execstate *estate, PLpgSQL_stmt_open *stmt); +static void init_stmt_dynexecute(PLpgSQL_execstate *estate, + PLpgSQL_stmt_dynexecute *stmt); + +static void init_eval_expr(PLpgSQL_execstate *estate, PLpgSQL_expr *expr); +static void init_eval_simple_expr(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr); +static void init_assign_value(PLpgSQL_execstate *estate, + PLpgSQL_datum *target, + Oid valtype, int32 valtypmod); +static void init_case_value(PLpgSQL_execstate *estate, + Oid valtype, int32 valtypmod, + Oid reqtype, int32 reqtypmod); +static void init_prepare_plan(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr, int cursorOptions, + bool keepplan); +static void init_assign_expr(PLpgSQL_execstate *estate, PLpgSQL_datum *target, + PLpgSQL_expr *expr); +static void init_for_query(PLpgSQL_execstate *estate, PLpgSQL_stmt_forq *stmt); + +static void +init_toplevel_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block) +{ + ResourceOwner oldOwner; + + oldOwner = CurrentResourceOwner; + PG_TRY(); + { + prepare_stmt_block(estate, block); + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); + + CurrentResourceOwner = oldOwner; +} + +void +init_stmt(PLpgSQL_execstate *estate, PLpgSQL_stmt *stmt) +{ + PLpgSQL_stmt *save_estmt; + + save_estmt = estate->err_stmt; + estate->err_stmt = stmt; + + switch (stmt->cmd_type) + { + case PLPGSQL_STMT_BLOCK: + prepare_stmt_block(estate, (PLpgSQL_stmt_block *) stmt); + break; + + case PLPGSQL_STMT_ASSIGN: + { + PLpgSQL_stmt_assign *stmtAssign = (PLpgSQL_stmt_assign *) stmt; + + init_assign_expr(estate, estate->datums[stmtAssign->varno], stmtAssign->expr); + break; + } + + case PLPGSQL_STMT_PERFORM: + { + PLpgSQL_stmt_perform *stmtPerform = (PLpgSQL_stmt_perform *) stmt; + + PLpgSQL_expr *expr = stmtPerform->expr; + init_run_select(estate, expr, 0, NULL); + break; + } + + case PLPGSQL_STMT_IF: + init_stmt_if(estate, (PLpgSQL_stmt_if *) stmt); + break; + + case PLPGSQL_STMT_CASE: + init_stmt_case(estate, (PLpgSQL_stmt_case *) stmt); + break; + + case PLPGSQL_STMT_WHILE: + { + PLpgSQL_stmt_while *stmtWhile; + + stmtWhile = (PLpgSQL_stmt_while *) stmt; + + init_eval_expr(estate, stmtWhile->cond); + init_stmts(estate, stmtWhile->body); + break; + } + + case PLPGSQL_STMT_LOOP: + { + PLpgSQL_stmt_loop *stmtLoop = (PLpgSQL_stmt_loop *) stmt; + + init_stmts(estate, stmtLoop->body); + break; + } + + case PLPGSQL_STMT_FORI: + { + PLpgSQL_stmt_fori *stmtFori = (PLpgSQL_stmt_fori *) stmt; + + init_eval_expr(estate, stmtFori->lower); + init_eval_expr(estate, stmtFori->upper); + + init_stmts(estate, stmtFori->body); + break; + } + + case PLPGSQL_STMT_FORS: + { + PLpgSQL_stmt_fors *stmtFors = (PLpgSQL_stmt_fors *) stmt; + + init_run_select(estate, stmtFors->query, 0, NULL); + init_for_query(estate, (PLpgSQL_stmt_forq *) stmtFors); + break; + } + + case PLPGSQL_STMT_FORC: + { + PLpgSQL_stmt_forc *stmtForc; + PLpgSQL_var *curvar; + + stmtForc = (PLpgSQL_stmt_forc *) stmt; + + curvar = (PLpgSQL_var *) (estate->datums[stmtForc->curvar]); + if (!curvar->isnull) + return; + if (stmtForc->argquery != NULL) + { + PLpgSQL_stmt_execsql set_args; + + if (curvar->cursor_explicit_argrow < 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("arguments given for cursor without arguments"))); + + memset(&set_args, 0, sizeof(set_args)); + set_args.cmd_type = PLPGSQL_STMT_EXECSQL; + set_args.lineno = stmtForc->lineno; + set_args.sqlstmt = stmtForc->argquery; + set_args.into = true; + /* XXX historically this has not been STRICT */ + set_args.target = (PLpgSQL_variable *) + (estate->datums[curvar->cursor_explicit_argrow]); + + init_stmt_execsql(estate, &set_args); + } + break; + } + + case PLPGSQL_STMT_EXIT: + { + PLpgSQL_stmt_exit *stmt_exit; + + stmt_exit = (PLpgSQL_stmt_exit *) stmt; + if (stmt_exit->cond != NULL) + { + init_eval_expr(estate, stmt_exit->cond); + } + break; + } + + case PLPGSQL_STMT_RETURN: + { + PLpgSQL_stmt_return *stmtReturn = (PLpgSQL_stmt_return *) stmt; + + if (stmtReturn->expr != NULL) + { + init_eval_expr(estate, stmtReturn->expr); + } + break; + } + + case PLPGSQL_STMT_RETURN_NEXT: + init_stmt_return_next(estate, + (PLpgSQL_stmt_return_next *) stmt); + break; + + case PLPGSQL_STMT_RETURN_QUERY: + { + PLpgSQL_stmt_return_query *stmtReturnQuery = + (PLpgSQL_stmt_return_query *) stmt; + + if (stmtReturnQuery->query != NULL) + { + /* static query */ + init_run_select(estate, stmtReturnQuery->query, 0, 0); + } + else + { + /* RETURN QUERY EXECUTE */ + Assert(stmtReturnQuery->dynquery != NULL); + init_eval_expr(estate, stmtReturnQuery->dynquery); + } + break; + } + + case PLPGSQL_STMT_RAISE: + init_stmt_raise(estate, (PLpgSQL_stmt_raise *) stmt); + break; + + case PLPGSQL_STMT_EXECSQL: + init_stmt_execsql(estate, (PLpgSQL_stmt_execsql *) stmt); + break; + + case PLPGSQL_STMT_DYNFORS: + { + PLpgSQL_stmt_dynfors *stmtDynfors = (PLpgSQL_stmt_dynfors *) stmt; + + init_eval_expr(estate, stmtDynfors->query); + break; + } + + case PLPGSQL_STMT_OPEN: + init_stmt_open(estate, (PLpgSQL_stmt_open *) stmt); + break; + + case PLPGSQL_STMT_DYNEXECUTE: + init_stmt_dynexecute(estate, (PLpgSQL_stmt_dynexecute *) stmt); + break; + + case PLPGSQL_STMT_FETCH: + { + PLpgSQL_stmt_fetch *stmtFetch = (PLpgSQL_stmt_fetch *) stmt; + + if (stmtFetch->expr) + { + init_eval_expr(estate, stmtFetch->expr); + } + break; + } + + case PLPGSQL_STMT_GETDIAG: + break; + + case PLPGSQL_STMT_FOREACH_A: + break; + + case PLPGSQL_STMT_CLOSE: + break; + + default: + estate->err_stmt = save_estmt; + elog(ERROR, "unrecognized cmd_type: %d", stmt->cmd_type); + } + + estate->err_stmt = save_estmt; + +} + +static void +init_for_query(PLpgSQL_execstate *estate, PLpgSQL_stmt_forq *stmt) +{ + + init_stmts(estate, stmt->body); +} + + + +static void +init_stmt_open(PLpgSQL_execstate *estate, PLpgSQL_stmt_open *stmt) +{ + PLpgSQL_var *curvar; + MemoryContext stmt_mcontext = NULL; + char *curname = NULL; + PLpgSQL_expr *query; + Portal portal; + + curvar = (PLpgSQL_var *) (estate->datums[stmt->curvar]); + if (!curvar->isnull) + { + MemoryContext oldcontext; + + /* We only need stmt_mcontext to hold the cursor name string */ + stmt_mcontext = get_stmt_mcontext(estate); + oldcontext = MemoryContextSwitchTo(stmt_mcontext); + curname = TextDatumGetCString(curvar->value); + MemoryContextSwitchTo(oldcontext); + + if (SPI_cursor_find(curname) != NULL) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_CURSOR), + errmsg("cursor \"%s\" already in use", curname))); + } + + if (stmt->query != NULL) + { + /* ---------- + * This is an OPEN refcursor FOR SELECT ... + * + * We just make sure the query is planned. The real work is + * done downstairs. + * ---------- + */ + query = stmt->query; + if (query->plan == NULL) + exec_prepare_plan(estate, query, stmt->cursor_options | CURSOR_OPT_UPDATABLE); + } + else if (stmt->dynquery != NULL) + { + /* ---------- + * This is an OPEN refcursor FOR EXECUTE ... + * ---------- + */ + portal = exec_dynquery_with_params(estate, + stmt->dynquery, + stmt->params, + curname, + stmt->cursor_options | CURSOR_OPT_UPDATABLE); + + /* + * If cursor variable was NULL, store the generated portal name in it. + * Note: exec_dynquery_with_params already reset the stmt_mcontext, so + * curname is a dangling pointer here; but testing it for nullness is + * OK. + */ + if (curname == NULL) + assign_text_var(estate, curvar, portal->name); + + return; + } + else + { + /* ---------- + * This is an OPEN cursor + * + * Note: parser should already have checked that statement supplies + * args iff cursor needs them, but we check again to be safe. + * ---------- + */ + if (stmt->argquery != NULL) + { + /* ---------- + * OPEN CURSOR with args. We fake a SELECT ... INTO ... + * statement to evaluate the args and put 'em into the + * internal row. + * ---------- + */ + PLpgSQL_stmt_execsql set_args; + + if (curvar->cursor_explicit_argrow < 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("arguments given for cursor without arguments"))); + + memset(&set_args, 0, sizeof(set_args)); + set_args.cmd_type = PLPGSQL_STMT_EXECSQL; + set_args.lineno = stmt->lineno; + set_args.sqlstmt = stmt->argquery; + set_args.into = true; + /* XXX historically this has not been STRICT */ + set_args.target = (PLpgSQL_variable *) + (estate->datums[curvar->cursor_explicit_argrow]); + + if (exec_stmt_execsql(estate, &set_args) != PLPGSQL_RC_OK) + elog(ERROR, "open cursor failed during argument processing"); + } + else + { + if (curvar->cursor_explicit_argrow >= 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("arguments required for cursor"))); + } + + query = curvar->cursor_explicit_expr; + if (query->plan == NULL) + exec_prepare_plan(estate, query, curvar->cursor_options | CURSOR_OPT_UPDATABLE); + } +} + +static void +init_stmt_case(PLpgSQL_execstate *estate, PLpgSQL_stmt_case *stmt) +{ + ListCell *l; + + return; + + if (stmt->t_expr != NULL) + { + init_eval_expr(estate, stmt->t_expr); + + exec_eval_cleanup(estate); + } + + /* Now search for a successful WHEN clause */ + foreach(l, stmt->case_when_list) + { + PLpgSQL_case_when *cwt = (PLpgSQL_case_when *) lfirst(l); + + init_eval_expr(estate, cwt->expr); + exec_eval_cleanup(estate); + } + + /* Evaluate the ELSE statements, and we're done */ + init_stmts(estate, stmt->else_stmts); +} + +static void +init_stmt_return_next(PLpgSQL_execstate *estate, + PLpgSQL_stmt_return_next *stmt) +{ + TupleDesc tupdesc; + int natts; + + if (!estate->retisset) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use RETURN NEXT in a non-SETOF function"))); + + + + if (stmt->retvarno >= 0) + { + PLpgSQL_datum *retvar = estate->datums[stmt->retvarno]; + + + if (estate->tuple_store == NULL) + exec_init_tuple_store(estate); + + tupdesc = estate->tuple_store_desc; + natts = tupdesc->natts; + + switch (retvar->dtype) + { + case PLPGSQL_DTYPE_PROMISE: + case PLPGSQL_DTYPE_VAR: + { + PLpgSQL_var *var = (PLpgSQL_var *) retvar; + Datum retval = var->value; + bool isNull = var->isnull; + Form_pg_attribute attr = TupleDescAttr(tupdesc, 0); + + if (natts != 1) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("wrong result type supplied in RETURN NEXT"))); + + /* let's be very paranoid about the cast step */ + retval = MakeExpandedObjectReadOnly(retval, + isNull, + var->datatype->typlen); + + init_case_value(estate, var->datatype->typoid, + var->datatype->atttypmod, + attr->atttypid, attr->atttypmod); + + } + break; + case PLPGSQL_DTYPE_REC: + break; + case PLPGSQL_DTYPE_ROW: + break; + default: + elog(ERROR, "unrecognized dtype: %d", retvar->dtype); + break; + } + } + else if (stmt->expr) + { + init_eval_expr(estate, stmt->expr); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RETURN NEXT must have a parameter"))); + } +} + +static void +init_eval_simple_expr(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr) +{ + CachedPlan *cplan; + ExprContext *econtext = estate->eval_econtext; + LocalTransactionId curlxid = MyProc->lxid; + + + /* + * Forget it if expression wasn't simple before. + */ + if (expr->expr_simple_expr == NULL) + return; + + /* + * If expression is in use in current xact, don't touch it. + */ + if (expr->expr_simple_in_use && expr->expr_simple_lxid == curlxid) + return; + + cplan = SPI_plan_get_cached_plan(expr->plan); + + { + expr->expr_simple_state = + ExecInitExprWithParams(expr->expr_simple_expr, + econtext->ecxt_param_list_info); + expr->expr_simple_in_use = false; + } + + ReleaseCachedPlan(cplan, CurrentResourceOwner); +} + +static void +init_eval_expr(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr) +{ + ListCell *lc1; + ParamListInfo paramLI; + + if (expr->plan == NULL) + init_prepare_plan(estate, expr, CURSOR_OPT_PARALLEL_OK, true); + + init_eval_simple_expr(estate, expr); + + paramLI = setup_param_list(estate, expr); + + foreach(lc1, expr->plan->plancache_list) + { + CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); + SPIPlanPtr plan = expr->plan; + CachedPlan *cplan; + + /* + * If this is a one-shot plan, we still need to do parse analysis. + */ + if (plan->oneshot) + { + RawStmt *parsetree = plansource->raw_parse_tree; + const char *src = plansource->query_string; + List *stmt_list; + + /* + * Parameter datatypes are driven by parserSetup hook if provided, + * otherwise we use the fixed parameter list. + */ + if (parsetree == NULL) + stmt_list = NIL; + else if (plan->parserSetup != NULL) + { + Assert(plan->nargs == 0); + stmt_list = pg_analyze_and_rewrite_params(parsetree, + src, + plan->parserSetup, + plan->parserSetupArg, + NULL); + } + else + { + stmt_list = pg_analyze_and_rewrite(parsetree, + src, + plan->argtypes, + plan->nargs, + NULL); + } + + /* Finish filling in the CachedPlanSource */ + CompleteCachedPlan(plansource, + stmt_list, + NULL, + nodeTag(parsetree->stmt), + plan->argtypes, + plan->nargs, + plan->parserSetup, + plan->parserSetupArg, + plan->cursor_options, + false); /* not fixed result */ + } + + cplan = GetCachedPlan(plansource, paramLI, + plan->saved ? CurrentResourceOwner : NULL, + NULL, NULL); + ReleaseCachedPlan(cplan, plan->saved ? CurrentResourceOwner : NULL); + } +} + +static void +init_prepare_plan(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr, int cursorOptions, + bool keepplan) +{ + SPIPlanPtr plan = NULL; + SPIPrepareOptions options; + + /* + * The grammar can't conveniently set expr->func while building the parse + * tree, so make sure it's set before parser hooks need it. + */ + expr->func = estate->func; + + /* + * Generate and save the plan + */ + memset(&options, 0, sizeof(options)); + options.parserSetup = (ParserSetupHook) plpgsql_parser_setup; + options.parserSetupArg = (void *) expr; + options.parseMode = expr->parseMode; + options.cursorOptions = cursorOptions; + plan = SPI_prepare_extended(expr->query, &options); + + if (plan == NULL) + elog(ERROR, "SPI_prepare_params failed for \"%s\": %s", + expr->query, SPI_result_code_string(SPI_result)); + + if (keepplan) + SPI_keepplan(plan); + expr->plan = plan; + expr->target_param = -1; + + /* Check to see if it's a simple expression */ + exec_simple_check_plan(estate, expr); +} +static void +init_assign_expr(PLpgSQL_execstate *estate, PLpgSQL_datum *target, + PLpgSQL_expr *expr) +{ + if (expr->plan == NULL) + { + init_prepare_plan(estate, expr, 0, true); + if (target->dtype == PLPGSQL_DTYPE_VAR) + exec_check_rw_parameter(expr); + } + init_eval_expr(estate, expr); + init_assign_value(estate, target, expr->expr_simple_type, expr->expr_simple_typmod); +} + +static void +init_case_value(PLpgSQL_execstate *estate, + Oid valtype, int32 valtypmod, + Oid reqtype, int32 reqtypmod) +{ + if (valtype != reqtype || + (valtypmod != reqtypmod && reqtypmod != -1)) + { + get_cast_hashentry(estate, + valtype, valtypmod, + reqtype, reqtypmod, + true); + } +} + +static void +init_assign_value(PLpgSQL_execstate *estate, + PLpgSQL_datum *target, + Oid valtype, int32 valtypmod) +{ + switch (target->dtype) + { + case PLPGSQL_DTYPE_VAR: + case PLPGSQL_DTYPE_PROMISE: + { + /* + * Target is a variable + */ + PLpgSQL_var *var = (PLpgSQL_var *) target; + + init_case_value(estate, valtype, valtypmod, var->datatype->typoid, + var->datatype->atttypmod); + break; + } + + case PLPGSQL_DTYPE_ROW: + { + /* + * Target is a row variable + */ + PLpgSQL_row *row = (PLpgSQL_row *) target; + + /* If source is null, just assign nulls to the row */ + exec_move_row(estate, (PLpgSQL_variable *) row, + NULL, NULL); + break; + } + + case PLPGSQL_DTYPE_REC: + { + PLpgSQL_rec *rec = (PLpgSQL_rec *) target; + + /* Set variable to a simple NULL */ + exec_move_row(estate, (PLpgSQL_variable *) rec, + NULL, NULL); + break; + } + + case PLPGSQL_DTYPE_RECFIELD: + { + /* + * Target is a field of a record + */ + PLpgSQL_recfield *recfield = (PLpgSQL_recfield *) target; + PLpgSQL_rec *rec; + ExpandedRecordHeader *erh; + + rec = (PLpgSQL_rec *) (estate->datums[recfield->recparentno]); + erh = rec->erh; + + /* + * If record variable is NULL, instantiate it if it has a + * named composite type, else complain. (This won't change + * the logical state of the record, but if we successfully + * assign below, the unassigned fields will all become NULLs.) + */ + if (erh == NULL) + { + instantiate_empty_record_variable(estate, rec); + erh = rec->erh; + } + + /* + * Look up the field's properties if we have not already, or + * if the tuple descriptor ID changed since last time. + */ + if (unlikely(recfield->rectupledescid != erh->er_tupdesc_id)) + { + if (!expanded_record_lookup_field(erh, + recfield->fieldname, + &recfield->finfo)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("record \"%s\" has no field \"%s\"", + rec->refname, recfield->fieldname))); + recfield->rectupledescid = erh->er_tupdesc_id; + } + + /* We don't support assignments to system columns. */ + if (recfield->finfo.fnumber <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot assign to system column \"%s\"", + recfield->fieldname))); + break; + } + + default: + elog(ERROR, "unrecognized dtype: %d", target->dtype); + } +} + +static void +init_stmt_raise(PLpgSQL_execstate *estate, PLpgSQL_stmt_raise *stmt) +{ + int err_code = 0; + char *condname = NULL; + MemoryContext stmt_mcontext; + + /* We'll need to accumulate the various strings in stmt_mcontext */ + stmt_mcontext = get_stmt_mcontext(estate); + + if (stmt->condname) + { + err_code = plpgsql_recognize_err_condition(stmt->condname, true); + condname = MemoryContextStrdup(stmt_mcontext, stmt->condname); + } + + if (stmt->message && stmt->params) + { + ListCell *current_param; + char *cp; + + current_param = list_head(stmt->params); + + for (cp = stmt->message; *cp; cp++) + { + if (cp[0] == '%') + { + init_eval_expr(estate, (PLpgSQL_expr *) lfirst(current_param)); + current_param = lnext(stmt->params, current_param); + } + } + } + + /* Clean up transient strings */ + MemoryContextReset(stmt_mcontext); +} + +static void +init_run_select(PLpgSQL_execstate *estate, + PLpgSQL_expr *expr, int64 maxtuples, Portal *portalP) +{ + ParamListInfo paramLI; + CachedPlan *cplan = NULL; + ListCell *lc1; + SPIPlanPtr plan; + + PG_TRY(); + { + { + int cursorOptions = CURSOR_OPT_NO_SCROLL; + + if (portalP == NULL) + cursorOptions |= CURSOR_OPT_PARALLEL_OK; + init_prepare_plan(estate, expr, cursorOptions, true); + } + paramLI = setup_param_list(estate, expr); + + plan = expr->plan; + foreach(lc1, plan->plancache_list) + { + CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); + + /* + * If this is a one-shot plan, we still need to do parse analysis. + */ + if (plan->oneshot) + { + RawStmt *parsetree = plansource->raw_parse_tree; + const char *src = plansource->query_string; + List *stmt_list; + + /* + * Parameter datatypes are driven by parserSetup hook if provided, + * otherwise we use the fixed parameter list. + */ + if (parsetree == NULL) + stmt_list = NIL; + else if (plan->parserSetup != NULL) + { + Assert(plan->nargs == 0); + stmt_list = pg_analyze_and_rewrite_params(parsetree, + src, + plan->parserSetup, + plan->parserSetupArg, + NULL); + } + else + { + stmt_list = pg_analyze_and_rewrite(parsetree, + src, + plan->argtypes, + plan->nargs, + NULL); + } + + /* Finish filling in the CachedPlanSource */ + CompleteCachedPlan(plansource, + stmt_list, + NULL, + nodeTag(parsetree->stmt), + plan->argtypes, + plan->nargs, + plan->parserSetup, + plan->parserSetupArg, + plan->cursor_options, + false); /* not fixed result */ + } + + cplan = GetCachedPlan(plansource, paramLI, + plan->saved ? CurrentResourceOwner : NULL, + NULL, NULL); + + ReleaseCachedPlan(cplan, plan->saved ? CurrentResourceOwner : NULL); + } + } + PG_CATCH(); + { + FlushErrorState(); + //elog(INFO, "Can not run this kind of plpgsql function."); + } + PG_END_TRY(); +} + +static void +init_stmt_execsql(PLpgSQL_execstate *estate, + PLpgSQL_stmt_execsql *stmt) +{ + PLpgSQL_expr *expr = stmt->sqlstmt; + ParamListInfo paramLI; + ListCell *lc1; + + init_prepare_plan(estate, expr, CURSOR_OPT_PARALLEL_OK, true); + + if (!stmt->mod_stmt_set) + { + ListCell *l; + + stmt->mod_stmt = false; + foreach(l, SPI_plan_get_plan_sources(expr->plan)) + { + CachedPlanSource *plansource = (CachedPlanSource *) lfirst(l); + + /* + * We could look at the raw_parse_tree, but it seems simpler to + * check the command tag. Note we should *not* look at the Query + * tree(s), since those are the result of rewriting and could have + * been transmogrified into something else entirely. + */ + if (plansource->commandTag == CMDTAG_INSERT || + plansource->commandTag == CMDTAG_UPDATE || + plansource->commandTag == CMDTAG_DELETE) + { + stmt->mod_stmt = true; + break; + } + } + stmt->mod_stmt_set = true; + } + + /* + * Set up ParamListInfo to pass to executor + */ + paramLI = setup_param_list(estate, expr); + + foreach(lc1, expr->plan->plancache_list) + { + CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); + SPIPlanPtr plan = expr->plan; + CachedPlan *cplan; + + /* + * If this is a one-shot plan, we still need to do parse analysis. + */ + if (plan->oneshot) + { + RawStmt *parsetree = plansource->raw_parse_tree; + const char *src = plansource->query_string; + List *stmt_list; + + /* + * Parameter datatypes are driven by parserSetup hook if provided, + * otherwise we use the fixed parameter list. + */ + if (parsetree == NULL) + stmt_list = NIL; + else if (plan->parserSetup != NULL) + { + Assert(plan->nargs == 0); + stmt_list = pg_analyze_and_rewrite_params(parsetree, + src, + plan->parserSetup, + plan->parserSetupArg, + NULL); + } + else + { + stmt_list = pg_analyze_and_rewrite(parsetree, + src, + plan->argtypes, + plan->nargs, + NULL); + } + + /* Finish filling in the CachedPlanSource */ + CompleteCachedPlan(plansource, + stmt_list, + NULL, + nodeTag(parsetree->stmt), + plan->argtypes, + plan->nargs, + plan->parserSetup, + plan->parserSetupArg, + plan->cursor_options, + false); /* not fixed result */ + } + + cplan = GetCachedPlan(plansource, paramLI, + plan->saved ? CurrentResourceOwner : NULL, + NULL, NULL); + ReleaseCachedPlan(cplan, plan->saved ? CurrentResourceOwner : NULL); + } + + if (stmt->into) + { + PLpgSQL_variable *target; + + /* Fetch target's datum entry */ + target = (PLpgSQL_variable *) estate->datums[stmt->target->dno]; + + /* + * If SELECT ... INTO specified STRICT, and the query didn't find + * exactly one row, throw an error. If STRICT was not specified, then + * allow the query to find any number of rows. + */ + if (stmt->strict) + { + char *errdetail; + + if (estate->func->print_strict_params) + errdetail = format_expr_params(estate, expr); + else + errdetail = NULL; + + ereport(ERROR, + (errcode(ERRCODE_NO_DATA_FOUND), + errmsg("query returned no rows"), + errdetail ? errdetail_internal("parameters: %s", errdetail) : 0)); + } + /* set the target to NULL(s) */ + exec_move_row(estate, target, NULL, NULL); + + /* Clean up */ + exec_eval_cleanup(estate); + SPI_freetuptable(SPI_tuptable); + } + + expr->plan = NULL; +} + +static void +init_stmt_dynexecute(PLpgSQL_execstate *estate, + PLpgSQL_stmt_dynexecute *stmt) +{ + Datum query; + bool isnull; + Oid restype; + int32 restypmod; + char *querystr; + ListCell *lc1; + List *plancache_list; + ParamListInfo paramLI; + SPIExecuteOptions options; + MemoryContext stmt_mcontext = get_stmt_mcontext(estate); + List *raw_parsetree_list; + _SPI_plan plan; + ListCell *list_item; + + + /* + * First we evaluate the string expression after the EXECUTE keyword. Its + * result is the querystring we have to execute. + */ + query = exec_eval_expr(estate, stmt->query, &isnull, &restype, &restypmod); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("query string argument of EXECUTE is null"))); + + /* Get the C-String representation */ + querystr = convert_value_to_string(estate, query, restype); + + /* copy it into the stmt_mcontext before we clean up */ + querystr = MemoryContextStrdup(stmt_mcontext, querystr); + + exec_eval_cleanup(estate); + + paramLI = exec_eval_using_params(estate, stmt->params); + + memset(&options, 0, sizeof(options)); + options.params = paramLI; + options.read_only = estate->readonly_func; + + memset(&plan, 0, sizeof(_SPI_plan)); + plan.magic = _SPI_PLAN_MAGIC; + plan.parse_mode = RAW_PARSE_DEFAULT; + plan.cursor_options = CURSOR_OPT_PARALLEL_OK; + if (options.params) + { + plan.parserSetup = options.params->parserSetup; + plan.parserSetupArg = options.params->parserSetupArg; + } + + raw_parsetree_list = raw_parser(querystr, plan.parse_mode); + plancache_list = NIL; + + foreach(list_item, raw_parsetree_list) + { + RawStmt *parsetree = lfirst_node(RawStmt, list_item); + CachedPlanSource *plansource; + + plansource = CreateOneShotCachedPlan(parsetree, + querystr, + CreateCommandTag(parsetree->stmt)); + + plancache_list = lappend(plancache_list, plansource); + } + + plan.plancache_list = plancache_list; + plan.oneshot = true; + + foreach(lc1, plan.plancache_list) + { + CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); + CachedPlan *cplan; + + /* + * If this is a one-shot plan, we still need to do parse analysis. + */ + if (plan.oneshot) + { + RawStmt *parsetree = plansource->raw_parse_tree; + const char *src = plansource->query_string; + List *stmt_list; + + /* + * Parameter datatypes are driven by parserSetup hook if provided, + * otherwise we use the fixed parameter list. + */ + if (parsetree == NULL) + stmt_list = NIL; + else if (plan.parserSetup != NULL) + { + Assert(plan.nargs == 0); + stmt_list = pg_analyze_and_rewrite_params(parsetree, + src, + plan.parserSetup, + plan.parserSetupArg, + NULL); + } + else + { + stmt_list = pg_analyze_and_rewrite(parsetree, + src, + plan.argtypes, + plan.nargs, + NULL); + } + + /* Finish filling in the CachedPlanSource */ + CompleteCachedPlan(plansource, + stmt_list, + NULL, + nodeTag(parsetree->stmt), + plan.argtypes, + plan.nargs, + plan.parserSetup, + plan.parserSetupArg, + plan.cursor_options, + false); /* not fixed result */ + } + + cplan = GetCachedPlan(plansource, paramLI, + plan.saved ? CurrentResourceOwner : NULL, + NULL, NULL); + ReleaseCachedPlan(cplan, plan.saved ? CurrentResourceOwner : NULL); + } + + if (stmt->into) + { + PLpgSQL_variable *target; + + /* Fetch target's datum entry */ + target = (PLpgSQL_variable *) estate->datums[stmt->target->dno]; + + /* + * If SELECT ... INTO specified STRICT, and the query didn't find + * exactly one row, throw an error. If STRICT was not specified, then + * allow the query to find any number of rows. + */ + if (stmt->strict) + { + char *errdetail; + + if (estate->func->print_strict_params) + errdetail = format_preparedparamsdata(estate, paramLI); + else + errdetail = NULL; + + ereport(ERROR, + (errcode(ERRCODE_NO_DATA_FOUND), + errmsg("query returned no rows"), + errdetail ? errdetail_internal("parameters: %s", errdetail) : 0)); + } + /* set the target to NULL(s) */ + exec_move_row(estate, target, NULL, NULL); + + /* Clean up */ + exec_eval_cleanup(estate); + SPI_freetuptable(SPI_tuptable); + } +} + +static void +prepare_stmt_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block) +{ + int i; + + /* + * First initialize all variables declared in this block + */ + estate->err_text = gettext_noop("during statement block local variable initialization"); + + for (i = 0; i < block->n_initvars; i++) + { + int n = block->initvarnos[i]; + PLpgSQL_datum *datum = estate->datums[n]; + + /* + * The set of dtypes handled here must match plpgsql_add_initdatums(). + * + * Note that we currently don't support promise datums within blocks, + * only at a function's outermost scope, so we needn't handle those + * here. + */ + switch (datum->dtype) + { + case PLPGSQL_DTYPE_VAR: + { + PLpgSQL_var *var = (PLpgSQL_var *) datum; + + /* + * Free any old value, in case re-entering block, and + * initialize to NULL + */ + assign_simple_var(estate, var, (Datum) 0, true, false); + + if (var->default_val == NULL) + { + /* + * If needed, give the datatype a chance to reject + * NULLs, by assigning a NULL to the variable. We + * claim the value is of type UNKNOWN, not the var's + * datatype, else coercion will be skipped. + */ + if (var->datatype->typtype == TYPTYPE_DOMAIN) + exec_assign_value(estate, + (PLpgSQL_datum *) var, + (Datum) 0, + true, + UNKNOWNOID, + -1); + + /* parser should have rejected NOT NULL */ + Assert(!var->notnull); + } + else + { + exec_assign_expr(estate, (PLpgSQL_datum *) var, + var->default_val); + } + } + break; + + case PLPGSQL_DTYPE_REC: + { + PLpgSQL_rec *rec = (PLpgSQL_rec *) datum; + + /* + * Deletion of any existing object will be handled during + * the assignments below, and in some cases it's more + * efficient for us not to get rid of it beforehand. + */ + if (rec->default_val == NULL) + { + /* + * If needed, give the datatype a chance to reject + * NULLs, by assigning a NULL to the variable. + */ + exec_move_row(estate, (PLpgSQL_variable *) rec, + NULL, NULL); + + /* parser should have rejected NOT NULL */ + Assert(!rec->notnull); + } + else + { + exec_assign_expr(estate, (PLpgSQL_datum *) rec, + rec->default_val); + } + } + break; + + default: + elog(ERROR, "unrecognized dtype: %d", datum->dtype); + } + } + + if (block->exceptions) + { + /* + * Execute the statements in the block's body inside a sub-transaction + */ + MemoryContext oldcontext = CurrentMemoryContext; + ResourceOwner oldowner = CurrentResourceOwner; + MemoryContext stmt_mcontext; + ListCell *e; + + estate->err_text = gettext_noop("during statement block entry"); + + /* + * We will need a stmt_mcontext to hold the error data if an error + * occurs. It seems best to force it to exist before entering the + * subtransaction, so that we reduce the risk of out-of-memory during + * error recovery, and because this greatly simplifies restoring the + * stmt_mcontext stack to the correct state after an error. We can + * ameliorate the cost of this by allowing the called statements to + * use this mcontext too; so we don't push it down here. + */ + stmt_mcontext = get_stmt_mcontext(estate); + + //BeginInternalSubTransaction(NULL); + /* Want to run statements inside function's memory context */ + MemoryContextSwitchTo(oldcontext); + + + /* + * We need to run the block's statements with a new eval_econtext + * that belongs to the current subtransaction; if we try to use + * the outer econtext then ExprContext shutdown callbacks will be + * called at the wrong times. + */ + // plpgsql_create_econtext(estate); + + estate->err_text = NULL; + + /* Run the block's statements */ + init_stmts(estate, block->body); + + estate->eval_tuptable = NULL; + exec_eval_cleanup(estate); + + foreach(e, block->exceptions->exc_list) + { + PLpgSQL_exception *exception = (PLpgSQL_exception *) lfirst(e); + + /* + * Also set up cur_error so the error data is accessible + * inside the handler. + */ + estate->err_text = NULL; + + init_stmts(estate, exception->action); + + break; + + } + + estate->err_text = gettext_noop("during statement block exit"); + + /* + * If the block ended with RETURN, we may need to copy the return + * value out of the subtransaction eval_context. We can avoid a + * physical copy if the value happens to be a R/W expanded object. + */ + + /* Commit the inner transaction, return to outer xact context */ + // ReleaseCurrentSubTransaction(); + MemoryContextSwitchTo(oldcontext); + CurrentResourceOwner = oldowner; + + /* Assert that the stmt_mcontext stack is unchanged */ + Assert(stmt_mcontext == estate->stmt_mcontext); + } + else + { + /* + * Just execute the statements in the block's body + */ + estate->err_text = NULL; + + init_stmts(estate, block->body); + } + + estate->err_text = NULL; +} + +static void +init_stmts(PLpgSQL_execstate *estate, List *stmts) +{ + ListCell *s; + + if (stmts == NIL) + { + /* + * Ensure we do a CHECK_FOR_INTERRUPTS() even though there is no + * statement. This prevents hangup in a tight loop if, for instance, + * there is a LOOP construct with an empty body. + */ + CHECK_FOR_INTERRUPTS(); + return; + } + + foreach(s, stmts) + { + PLpgSQL_stmt *stmt = (PLpgSQL_stmt *) lfirst(s); + init_stmt(estate, stmt); + } +} + +static void +init_stmt_if(PLpgSQL_execstate *estate, PLpgSQL_stmt_if *stmt) +{ + ListCell *lc; + + init_eval_expr(estate, stmt->cond); + exec_eval_cleanup(estate); + + init_stmts(estate, stmt->then_body); + + foreach(lc, stmt->elsif_list) + { + PLpgSQL_if_elsif *elif = (PLpgSQL_if_elsif *) lfirst(lc); + + init_eval_expr(estate, elif->cond); + exec_eval_cleanup(estate); + init_stmts(estate, elif->stmts); + } + + init_stmts(estate, stmt->else_body); + return; +} + + +bool +function_is_prepare(FunctionCallInfo fcinfo) +{ + if (CALLED_AS_TRIGGER(fcinfo)) + { + TriggerData *triggerData = (TriggerData *) fcinfo->context; + + if (triggerData->tg_event & TRIGGER_EVENT_PREPARE) + return true; + else + return false; + } + else if (CALLED_AS_EVENT_TRIGGER(fcinfo)) + { + return false; + } + else + { + CallContext *callContext = (CallContext *) fcinfo->context; + + if (callContext && IsA(callContext, CallContext) && callContext->prepare) + return true; + else + return false; + } +} diff --git a/src/pl/plpgsql/src/pl_handler.c b/src/pl/plpgsql/src/pl_handler.c index b578256335d..c293b0c2452 100644 --- a/src/pl/plpgsql/src/pl_handler.c +++ b/src/pl/plpgsql/src/pl_handler.c @@ -273,8 +273,15 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) * subhandler */ if (CALLED_AS_TRIGGER(fcinfo)) - retval = PointerGetDatum(plpgsql_exec_trigger(func, - (TriggerData *) fcinfo->context)); + { + TriggerData *triggerData = (TriggerData *) fcinfo->context; + + if (triggerData->tg_event & TRIGGER_EVENT_PREPARE) + plpgsql_prepare_trigger(func, (TriggerData *) fcinfo->context); + else + retval = PointerGetDatum( + plpgsql_exec_trigger(func, (TriggerData *) fcinfo->context)); + } else if (CALLED_AS_EVENT_TRIGGER(fcinfo)) { plpgsql_exec_event_trigger(func, @@ -282,10 +289,18 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) /* there's no return value in this case */ } else - retval = plpgsql_exec_function(func, fcinfo, - NULL, NULL, - procedure_resowner, - !nonatomic); + { + CallContext *callContext = (CallContext *) fcinfo->context; + + if (callContext && IsA(callContext, CallContext) && callContext->prepare) + plpgsql_prepare_function(func, fcinfo, NULL, NULL, + NULL, false); + else + retval = plpgsql_exec_function(func, fcinfo, + NULL, NULL, + procedure_resowner, + !nonatomic); + } } PG_FINALLY(); { diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index 00756d1b9df..5ec18b12a93 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -1323,4 +1323,13 @@ extern void plpgsql_scanner_finish(void); */ extern int plpgsql_yyparse(void); + +extern Datum plpgsql_prepare_function(PLpgSQL_function *func, FunctionCallInfo fcinfo, + EState *simple_eval_estate, + ResourceOwner simple_eval_resowner, + ResourceOwner procedure_resowner, + bool atomic); +extern void plpgsql_prepare_trigger(PLpgSQL_function *func, TriggerData *trigdata); +extern void init_stmt(PLpgSQL_execstate *estate, PLpgSQL_stmt *stmt); +extern bool function_is_prepare(FunctionCallInfo fcinfo); #endif /* PLPGSQL_H */ diff --git a/src/test/regress/expected/gpctas_optimizer.out b/src/test/regress/expected/gpctas_optimizer.out index b6bb557bfea..7e9956e985b 100644 --- a/src/test/regress/expected/gpctas_optimizer.out +++ b/src/test/regress/expected/gpctas_optimizer.out @@ -381,11 +381,7 @@ begin; create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly; end; select count (distinct oid) from (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x; - count -------- - 1 -(1 row) - +ERROR: Hash data storage do not support index scan in segment (hashdata_execProcnode.cc:1275) (seg2 slice1 127.0.0.1:5435 pid=1180272) (hashdata_execProcnode.cc:1275) drop table t1_github_issue_10760; drop table t2_github_issue_10760; reset optimizer; @@ -416,7 +412,7 @@ INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables count ------- - 8 + 2 (1 row) select count(distinct (localoid, policytype, numsegments, distkey)) diff --git a/src/test/regress/output/external_table_optimizer.source b/src/test/regress/output/external_table_optimizer.source index b5af22445d1..9ad52b0ddda 100644 --- a/src/test/regress/output/external_table_optimizer.source +++ b/src/test/regress/output/external_table_optimizer.source @@ -476,17 +476,15 @@ CREATE TABLE tableless_heap(a int, b int); COPY tableless_heap FROM '@abs_srcdir@/data/tableless.csv' CSV LOG ERRORS SEGMENT REJECT LIMIT 10; NOTICE: found 2 data formatting errors (2 or more input rows), rejected related input data SELECT relname, linenum, errmsg FROM gp_read_error_log('tableless_heap'); - relname | linenum | errmsg -----------------+---------+--------------------------------------- - tableless_heap | 2 | extra data after last expected column - tableless_heap | 4 | missing data for column "b" -(2 rows) + relname | linenum | errmsg +---------+---------+-------- +(0 rows) create table errlog_save as select * from gp_read_error_log('tableless_heap'); select count(*) from errlog_save; count ------- - 2 + 0 (1 row) SELECT gp_truncate_error_log('tableless_heap'); @@ -513,18 +511,14 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT relname, linenum, errmsg FROM gp_read_error_log('tableless_ext'); - relname | linenum | errmsg ----------------+---------+--------------------------------------- - tableless_ext | 2 | extra data after last expected column - tableless_ext | 4 | missing data for column "b" -(2 rows) + relname | linenum | errmsg +---------+---------+-------- +(0 rows) SELECT (gp_read_error_log('tableless_ext')).errmsg; - errmsg ---------------------------------------- - extra data after last expected column - missing data for column "b" -(2 rows) + errmsg +-------- +(0 rows) SELECT gp_truncate_error_log('tableless_ext'); gp_truncate_error_log @@ -684,7 +678,7 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related select count(*) from gp_read_error_log('exttab_basic_2'); count ------- - 2 + 0 (1 row) -- Errors with exceeding reject limit @@ -700,7 +694,7 @@ CONTEXT: External table exttab_basic_3, line 7 of file://@hostname@@abs_srcdir@ select count(*) > 0 from gp_read_error_log('exttab_basic_3'); ?column? ---------- - t + f (1 row) -- Insert into another table @@ -716,7 +710,7 @@ NOTICE: found 6 data formatting errors (6 or more input rows), rejected related select count(*) > 0 from gp_read_error_log('exttab_basic_4'); ?column? ---------- - t + f (1 row) -- Use the same error log above @@ -751,7 +745,7 @@ SELECT * from exttab_insert_1 order by i; SELECT count(*) from gp_read_error_log('exttab_basic_5'); count ------- - 5 + 0 (1 row) -- CTAS @@ -786,7 +780,7 @@ SELECT * FROM exttab_ctas_1 order by i; select count(*) from gp_read_error_log('exttab_basic_6'); count ------- - 6 + 0 (1 row) CREATE EXTERNAL TABLE exttab_basic_7( i int, j text ) @@ -808,7 +802,7 @@ LINE 1: SELECT * from exttab_ctas_2 order by i; SELECT count(*) from gp_read_error_log('exttab_basic_7'); count ------- - 5 + 0 (1 row) -- Drop external table gets rid off error logs @@ -892,15 +886,14 @@ CONTEXT: External table exttab_cte_2, line 7 of file://@hostname@@abs_srcdir@/d select count(*) from gp_read_error_log('exttab_cte_2'); count ------- - 2 + 0 (1 row) -- start_ignore select gp_read_error_log('exttab_cte_2'); - gp_read_error_log ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - ("Tue Mar 27 13:18:48.708775 2018 PDT",exttab_cte_2,"file://@hostname@@abs_srcdir@/data/exttab_more_errors.data [/home/heikki/git-sandbox-gpdb/master/src/test/regress/data/exttab_more_errors.data]",5,,"invalid input syntax for type integer: ""error_1"", column i",error_1,) -(1 row) + gp_read_error_log +------------------- +(0 rows) -- end_ignore -- CTE without segment reject limit exceeded @@ -958,9 +951,17 @@ CREATE ROLE exttab_non_superuser WITH NOSUPERUSER LOGIN CREATEDB; NOTICE: resource queue required -- using default resource queue "pg_default" SET ROLE exttab_non_superuser; SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_1'); -ERROR: permission denied for table exttab_permissions_1 (seg2 slice1 @hostname@:40002 pid=50448) + count +------- + 0 +(1 row) + SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_2'); -ERROR: permission denied for table exttab_permissions_2 (seg1 slice1 @hostname@:40001 pid=50437) + count +------- + 0 +(1 row) + SELECT gp_truncate_error_log('exttab_permissions_1'); ERROR: permission denied for table exttab_permissions_1 SELECT gp_truncate_error_log('exttab_permissions_2'); @@ -976,7 +977,7 @@ SET ROLE exttab_superuser; SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_2'); count ------- - 2 + 0 (1 row) SELECT gp_truncate_error_log('*'); @@ -1039,12 +1040,16 @@ NOTICE: found 4 data formatting errors (4 or more input rows), rejected related SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_1'); count ------- - 4 + 0 (1 row) SET ROLE exttab_user2; SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_1'); -ERROR: permission denied for table exttab_permissions_1 (seg0 slice1 @hostname@:40000 pid=51060) + count +------- + 0 +(1 row) + SELECT gp_truncate_error_log('*'); ERROR: must be owner of database exttab_db SELECT gp_truncate_error_log('*.*'); @@ -1054,7 +1059,11 @@ ERROR: permission denied for table exttab_permissions_1 SET ROLE exttab_user1; -- Database owner can still not perform read / truncate on specific tables. This follows the same mechanism as TRUNCATE table. SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_1'); -ERROR: permission denied for table exttab_permissions_1 (seg1 slice1 @hostname@:40001 pid=51061) + count +------- + 0 +(1 row) + SELECT gp_truncate_error_log('exttab_permissions_1'); ERROR: permission denied for table exttab_permissions_1 SELECT gp_truncate_error_log('*'); @@ -1093,14 +1102,18 @@ NOTICE: found 4 data formatting errors (4 or more input rows), rejected related SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_3'); count ------- - 4 + 0 (1 row) ALTER EXTERNAL TABLE exttab_permissions_3 OWNER TO errlog_exttab_user3; -- This should fail with non table owner SET ROLE errlog_exttab_user4; SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_3'); -ERROR: permission denied for table exttab_permissions_3 (seg0 slice1 @hostname@:40000 pid=51087) + count +------- + 0 +(1 row) + SELECT gp_truncate_error_log('exttab_permissions_3'); ERROR: permission denied for table exttab_permissions_3 -- should go through fine with table owner @@ -1129,7 +1142,7 @@ NOTICE: found 4 data formatting errors (4 or more input rows), rejected related SELECT COUNT(*) FROM gp_read_error_log('exttab_permissions_3'); count ------- - 4 + 0 (1 row) SET ROLE errlog_exttab_user4; @@ -1169,7 +1182,7 @@ SELECT * FROM gp_read_error_log('exttab_subq_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_subq_1'); @@ -1200,7 +1213,7 @@ SELECT * FROM gp_read_error_log('exttab_subq_2') ) FOO; ?column? ---------- - t + f (1 row) -- Subqueries without reaching segment reject limit @@ -1317,7 +1330,7 @@ SELECT * FROM gp_read_error_log('exttab_subq_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_subq_1'); @@ -1346,7 +1359,7 @@ SELECT * FROM gp_read_error_log('exttab_subq_2') ) FOO; ?column? ---------- - t + f (1 row) -- TRUNCATE / delete / write to error logs within subtransactions @@ -1379,7 +1392,7 @@ SELECT * FROM gp_read_error_log('exttab_subtxs_2') ) FOO; count ------- - 4 + 0 (1 row) BEGIN; @@ -1417,7 +1430,7 @@ SELECT * FROM gp_read_error_log('exttab_subtxs_2') ) FOO; count ------- - 4 + 0 (1 row) savepoint s2; @@ -1456,7 +1469,7 @@ SELECT * FROM gp_read_error_log('exttab_subtxs_2') ) FOO; count ------- - 12 + 0 (1 row) -- Make the tx fail, segment reject limit reaches here @@ -1481,7 +1494,7 @@ SELECT * FROM gp_read_error_log('exttab_subtxs_2') ) FOO; ?column? ---------- - t + f (1 row) -- TRUNCATE error logs within tx , abort transaction @@ -1514,7 +1527,7 @@ SELECT * FROM gp_read_error_log('exttab_txs_2') ) FOO; count ------- - 4 + 0 (1 row) BEGIN; @@ -1555,7 +1568,7 @@ SELECT * FROM gp_read_error_log('exttab_txs_2') ) FOO; ?column? ---------- - t + f (1 row) -- TRUNCATE error logs within txs , with segment reject limit reached @@ -1580,7 +1593,7 @@ SELECT * FROM gp_read_error_log('exttab_txs_2') ) FOO; count ------- - 8 + 0 (1 row) BEGIN; @@ -1619,7 +1632,7 @@ SELECT * FROM gp_read_error_log('exttab_txs_2') ) FOO; ?column? ---------- - t + f (1 row) -- Creating external table with error log within txs with segment reject limits reached @@ -1665,7 +1678,7 @@ NOTICE: found 4 data formatting errors (4 or more input rows), rejected related SELECT count(*) FROM gp_read_error_log('exttab_txs_4'); count ------- - 4 + 0 (1 row) -- should error out and abort the transaction @@ -1739,7 +1752,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) -- INSERT INTO from a udf @@ -1784,7 +1797,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) -- UDFs with INSERT INTO with segment reject limit reached @@ -1859,7 +1872,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_udfs_1'); @@ -1887,7 +1900,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_udfs_1'); @@ -1915,7 +1928,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_udfs_1'); @@ -1945,7 +1958,7 @@ SELECT * FROM gp_read_error_log('exttab_udfs_2') ) FOO; ?column? ---------- - t + f (1 row) -- No rows should be inserted into exttab_udfs_insert_2 @@ -1987,7 +2000,7 @@ SELECT * FROM gp_read_error_log('exttab_union_2') ) FOO; ?column? ---------- - t + f (1 row) -- Insert into another table, with and without segment reject limits being reached @@ -2029,7 +2042,7 @@ SELECT * FROM gp_read_error_log('exttab_union_2') ) FOO; ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_union_1'); @@ -2110,7 +2123,7 @@ SELECT * FROM gp_read_error_log('exttab_views_2') ) FOO; ?column? ---------- - t + f (1 row) -- INSERT INTO FROM a view @@ -2151,7 +2164,7 @@ SELECT * FROM gp_read_error_log('exttab_views_2') ) FOO; ?column? ---------- - t + f (1 row) -- CTAS from a view with segment reject limit reached @@ -2189,7 +2202,7 @@ SELECT * FROM gp_read_error_log('exttab_views_2') ) FOO; ?column? ---------- - t + f (1 row) -- CTAS FROM view with segment reject limits reached @@ -2228,7 +2241,7 @@ SELECT * FROM gp_read_error_log('exttab_views_2') ) FOO; ?column? ---------- - t + f (1 row) -- Scans in window queries with and without seg reject limit reached @@ -2338,7 +2351,7 @@ SELECT * FROM gp_read_error_log('exttab_windows_2') ) FOO; ?column? ---------- - t + f (1 row) -- LIMIT queries without segment reject limit reached @@ -2367,7 +2380,7 @@ CONTEXT: External table exttab_limit_2, line 7 of file://@hostname@@abs_srcdir@ SELECT count(*) FROM gp_read_error_log('exttab_limit_2'); count ------- - 2 + 0 (1 row) SELECT gp_truncate_error_log('exttab_limit_1'); @@ -2443,7 +2456,7 @@ CONTEXT: External table exttab_limit_2, line 7 of file://@hostname@@abs_srcdir@ SELECT count(*) > 0 FROM gp_read_error_log('exttab_limit_2'); ?column? ---------- - t + f (1 row) SELECT gp_truncate_error_log('exttab_limit_1'); @@ -2495,7 +2508,7 @@ SELECT * FROM gp_read_error_log('exttab_limit_2') ) FOO; ?column? ---------- - t + f (1 row) -- This query will materialize exttab_limit_2 completely even if LIMIT is just 3 and hence will throw segment reject limit reached @@ -2523,7 +2536,7 @@ SELECT * FROM gp_read_error_log('exttab_limit_2') ) FOO; ?column? ---------- - t + f (1 row) -- gp_initial_bad_row_limit guc test. This guc allows user to set the initial @@ -2550,7 +2563,7 @@ CONTEXT: External table exttab_first_reject_limit_1, line 1000 of file://@hostn SELECT COUNT(*) > 0 FROM gp_read_error_log('exttab_first_reject_limit_1'); ?column? ---------- - t + f (1 row) -- should work now @@ -2571,7 +2584,7 @@ NOTICE: found 5000 data formatting errors (5000 or more input rows), rejected r SELECT COUNT(*) FROM gp_read_error_log('exttab_first_reject_limit_1'); count ------- - 5000 + 0 (1 row) -- first segment reject limit should be checked before segment reject limit @@ -2589,7 +2602,7 @@ CONTEXT: External table exttab_first_reject_limit_2, line 2 of file://@hostname SELECT COUNT(*) > 0 from gp_read_error_log('exttab_first_reject_limit_2'); ?column? ---------- - t + f (1 row) -- should report an error saying segment reject limit reached @@ -2607,7 +2620,7 @@ CONTEXT: External table exttab_first_reject_limit_2, line 500 of file://@hostna SELECT COUNT(*) > 0 from gp_read_error_log('exttab_first_reject_limit_2'); ?column? ---------- - t + f (1 row) -- set unlimited first error rows, should fail only because of segment reject limits @@ -2625,7 +2638,7 @@ CONTEXT: External table exttab_first_reject_limit_2, line 500 of file://@hostna SELECT COUNT(*) > 0 from gp_read_error_log('exttab_first_reject_limit_2'); ?column? ---------- - t + f (1 row) DROP EXTERNAL TABLE IF EXISTS exttab_heap_join_1; @@ -2652,7 +2665,7 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related SELECT COUNT(*) FROM gp_read_error_log('exttab_heap_join_1'); count ------- - 2 + 0 (1 row) \! rm @abs_srcdir@/data/tableless.csv From 1b01f4db940052034c0eae75d3a23d80c0ccbe6d Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Thu, 18 Apr 2024 02:16:32 +0800 Subject: [PATCH 087/152] Add pg_calss.relreuseattrs to identify partition could reuse parent's column definitions. relreuseattrs is true if partition has the same physical column definitions. It's pointless for non-partition tables. This is used to avoid overhead of dispatching partitions catalog. --- .../sql/partition_reuse_columns.sql | 361 +++++++ src/backend/access/common/tupdesc.c | 18 +- src/backend/catalog/heap.c | 11 + src/backend/catalog/pg_proc.c | 2 +- src/backend/commands/analyze.c | 2 +- src/backend/commands/tablecmds.c | 131 +++ src/backend/executor/execReplication.c | 2 +- src/backend/utils/cache/plancache.c | 2 +- src/backend/utils/cache/relcache.c | 2 +- src/backend/utils/cache/typcache.c | 4 +- src/include/access/tupdesc.h | 2 +- src/include/catalog/pg_class.h | 6 + .../expected/partition_reuse_columns.out | 912 ++++++++++++++++++ 13 files changed, 1443 insertions(+), 12 deletions(-) create mode 100644 gpcontrib/gp_toolkit/sql/partition_reuse_columns.sql create mode 100644 src/test/regress/expected/partition_reuse_columns.out diff --git a/gpcontrib/gp_toolkit/sql/partition_reuse_columns.sql b/gpcontrib/gp_toolkit/sql/partition_reuse_columns.sql new file mode 100644 index 00000000000..32cf8644bff --- /dev/null +++ b/gpcontrib/gp_toolkit/sql/partition_reuse_columns.sql @@ -0,0 +1,361 @@ +create schema partition_reuse_columns; +set search_path to partition_reuse_columns; + +create table t1(a int, b int) partition by range(a) ; +create table p1 partition of t1 for values from (1) to (5); +create table p2 partition of t1 for values from (5) to (10) partition by range(b); +create table p2_1 partition of p2 for values from (1) to (3); +create table p2_2 partition of p2 for values from (3) to (6); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace order by relname; + +create table p3(a int, b int); +-- reuse columns +alter table t1 attach partition p3 for values from (10) to (15); +select relname, relreuseattrs from pg_class where oid = 'p3'::regclass::oid; + +create table p4(b int, a int, c int); +alter table p4 drop column c; +-- not reuse columns +alter table t1 attach partition p4 for values from (15) to (20); +select relname, relreuseattrs from pg_class where oid = 'p4'::regclass::oid; + +-- reuse columns, include the dropped column +create table p5(a int, b int, c int); +alter table p5 drop column c; +alter table t1 add column c int; +alter table t1 drop column c; +alter table t1 attach partition p5 for values from (20) to (25); +select relname, relreuseattrs from pg_class where oid = 'p5'::regclass::oid; + +-- check after detach +alter table t1 detach partition p1; +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + +-- check after re-attach +alter table t1 attach partition p1 for values from (1) to (5); +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + +-- check after detach and alter tables +alter table t1 detach partition p1; +alter table p1 add column d int; +alter table p1 drop column d; +alter table t1 attach partition p1 for values from (1) to (5); +-- not reuse columns +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + +-- check alter columns +create table t2(a int, b int) partition by range(a) ; +create table p6 (a int, b int); +alter table t2 attach partition p6 for values from (25) to (30); + +-- +-- test alter default +-- +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +alter table p6 alter column b set default 10; +-- should not reuse +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should reuse after parent alter column +alter table t2 alter column b set default 100; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should not reuse again +alter table p6 alter column b set default 99; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should reuse again +alter table p6 alter column b set default 100; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- +-- test alter statistics +-- +-- should not reuse +alter table p6 alter column b set STATISTICS 99; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should reuse again +alter table p6 alter column b set STATISTICS -1; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- +-- test alter NULL/NOT NULL +-- +-- should not reuse +alter table p6 alter column b set NOT NULL; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should reuse again +alter table p6 alter column b drop NOT NULL; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- +-- test alter storage +-- +alter table t2 add column c text; +select attstorage from pg_attribute where attrelid = 't2'::regclass::oid and attname = 'c'; +select attstorage from pg_attribute where attrelid = 'p6'::regclass::oid and attname = 'c'; +alter table p6 alter column c set storage main; +select attstorage from pg_attribute where attrelid = 'p6'::regclass::oid and attname = 'c'; +-- should not reuse +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +-- should reuse again +alter table p6 alter column c set storage extended; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +ALTER TABLE t2 add COLUMN d int; +ALTER TABLE t2 ALTER COLUMN d set NOT NULL; +select attname, attnotnull from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attnotnull from pg_attribute where attrelid = 't2'::regclass::oid and attnum >0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +ALTER TABLE t2 ALTER COLUMN d drop NOT NULL; +select attname, attnotnull from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attnotnull from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- +-- test IDENTITY +-- +ALTER TABLE t2 ALTER COLUMN d set NOT NULL; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +ALTER TABLE p6 ALTER COLUMN d ADD GENERATED ALWAYS AS IDENTITY; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +ALTER TABLE p6 ALTER COLUMN d DROP IDENTITY; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- +-- test COMPRESSION +-- +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +ALTER TABLE p6 ALTER COLUMN c SET compression pglz; +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; +ALTER TABLE p6 ALTER COLUMN c SET compression default; +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + +-- GPDB partition grammar +create table region +( + r_regionkey integer not null, + r_name char(25), + r_comment varchar(152) +) +partition by range (r_regionkey) +subpartition by list (r_name) subpartition template +( + subpartition africa values ('AFRICA'), + subpartition america values ('AMERICA'), + subpartition asia values ('ASIA'), + subpartition europe values ('EUROPE'), + subpartition mideast values ('MIDDLE EAST'), + subpartition australia values ('AUSTRALIA'), + subpartition antarctica values ('ANTARCTICA') +) +( + partition region1 start (0), + partition region2 start (3), + partition region3 start (5) end (8) +); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace and relname like 'region%'; + +create table foo_p (i int, j int) +partition by range(j) +(start(1) end(10) every(1)); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace and relname like 'foo_p%'; + +-- +-- test partition tables with children +-- +create table p7 partition of t2 for values from (30) to (40) partition by range (a); +-- should not reuse as t2 has dropped columns +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid); +drop table p7; + +create table t3(a int, b int, c text) partition by range(a) ; +create table p7 partition of t3 for values from (1) to (40) partition by range (a); +create table p7_1 partition of p7 for values from (1) to (35); +create table p7_2 partition of p7 for values from (35) to (40); +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +-- test compression +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; +ALTER TABLE p7 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; +-- shoud not reuse +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +-- p7_1 shoud reuse +ALTER TABLE p7_1 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +-- p7, p7_1 should reuse, but p7_2 not +ALTER TABLE t3 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +-- test not null +ALTER TABLE p7_2 ALTER COLUMN c SET compression pglz; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +ALTER TABLE p7 ALTER COLUMN c SET not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +-- drop not null on a leaf +ALTER TABLE p7 ALTER COLUMN c drop not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +ALTER TABLE p7_2 ALTER COLUMN c set not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +ALTER TABLE p7_2 ALTER COLUMN c drop not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +-- test static +ALTER TABLE p7 ALTER COLUMN c set STATISTICS 11; +select attrelid::regclass, attname, attstattarget from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +ALTER TABLE p7 ALTER COLUMN c set STATISTICS -1; +select attrelid::regclass, attname, attstattarget from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +-- test IDENTITY +ALTER TABLE t3 ALTER COLUMN b set NOT NULL; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; +ALTER TABLE p7 ALTER COLUMN b ADD GENERATED ALWAYS AS IDENTITY; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); +-- set IDENTITY +ALTER TABLE p7 ALTER COLUMN b SET GENERATED BY DEFAULT; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + -- drop IDENTITY +ALTER TABLE p7 ALTER COLUMN b DROP IDENTITY; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + +drop schema partition_reuse_columns cascade; diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 107b5e62dcf..1fff6709d57 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -397,16 +397,26 @@ DecrTupleDescRefCount(TupleDesc tupdesc) * This allows typcache.c to use this routine to see if a cached record type * matches a requested type, and is harmless for relcache.c's uses. * We don't compare tdrefcount, either. + * + * SERVERLESS: + * param for_partition_reuse_attrs means that we will check partition + * column definitons with its parent. + * If true, we skip some fields: tdtypeid, attislocal, attinhcount. + * This should be used with strict is set to true. + * */ bool -equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2, bool strict) +equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2, bool strict, bool for_partition_reuse_attrs) { int i, n; + AssertImply(for_partition_reuse_attrs, strict); + if (tupdesc1->natts != tupdesc2->natts) return false; - if (strict && tupdesc1->tdtypeid != tupdesc2->tdtypeid) + + if (!for_partition_reuse_attrs && (strict && tupdesc1->tdtypeid != tupdesc2->tdtypeid)) return false; for (i = 0; i < tupdesc1->natts; i++) @@ -460,9 +470,9 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2, bool strict) return false; if (attr1->attisdropped != attr2->attisdropped) return false; - if (attr1->attislocal != attr2->attislocal) + if (!for_partition_reuse_attrs && (attr1->attislocal != attr2->attislocal)) return false; - if (attr1->attinhcount != attr2->attinhcount) + if (!for_partition_reuse_attrs && (attr1->attinhcount != attr2->attinhcount)) return false; if (attr1->attcollation != attr2->attcollation) return false; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 7e9291e2bc4..e911d0cc106 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -4488,6 +4488,17 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound) new_val, new_null, new_repl); /* Also set the flag */ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true; +#ifdef SERVERLESS + /* + * Fill in relreuseattrs value. + * True if partition has the same physical column definitions with its parent. + */ + TupleDesc rel_tupleDesc = RelationGetDescr(rel); + TupleDesc parent_tupleDesc = RelationGetDescr(parent); + + ((Form_pg_class) GETSTRUCT(newtuple))->relreuseattrs = + equalTupleDescs(rel_tupleDesc, parent_tupleDesc, true, true); +#endif CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple); heap_freetuple(newtuple); table_close(classRel, RowExclusiveLock); diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index dc0becdd53f..5e6aaa64201 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -452,7 +452,7 @@ ProcedureCreate(const char *procedureName, if (olddesc == NULL && newdesc == NULL) /* ok, both are runtime-defined RECORDs */ ; else if (olddesc == NULL || newdesc == NULL || - !equalTupleDescs(olddesc, newdesc, true)) + !equalTupleDescs(olddesc, newdesc, true, false)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot change return type of existing function"), diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 2c5f51a6f03..aa0a17d84ae 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2207,7 +2207,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, if (childrows > 0 && !equalTupleDescs(RelationGetDescr(childrel), RelationGetDescr(onerel), - false)) + false, false)) { TupleConversionMap *map; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index bccbdacd1bb..08120561743 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -595,6 +595,10 @@ static void checkATSetDistributedByStandalone(AlteredTableInfo *tab, Relation re static void populate_rel_col_encodings(Relation rel, List *stenc, List *withOptions, Oid newAm); static void clear_rel_opts(Relation rel); +#ifdef SERVERLESS +static void maintenance_relreuseattrs(Relation rel); +static void maintenance_relreuseattrs_guts(Relation rel); +#endif /* ---------------------------------------------------------------- * DefineRelation @@ -6440,6 +6444,36 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, break; } +#ifdef SERVERLESS + /* + * maintenance_relreuseattrs + * Not all ALTER COLUMN commands are considered, some commands like + * ALTER COLUMN SET(options) doesn't have an impact on Attributes Reuse. + */ + if (cmd != NULL) + { + switch (cmd->subtype) + { + case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */ + case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ + case AT_ColumnDefault: /* ALTER COLUMN DEFAULT */ + case AT_AddIdentity: + case AT_SetIdentity: + case AT_DropIdentity: + case AT_DropExpression: + case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */ + case AT_SetStorage: /* ALTER COLUMN SET STORAGE */ + case AT_AlterColumnType: /* ALTER COLUMN TYPE */ + case AT_SetCompression: + maintenance_relreuseattrs(rel); + break; + default: + /* do nothing. */ + break; + } + } +#endif + /* * Report the subcommand to interested event triggers. */ @@ -22523,6 +22557,9 @@ DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent, new_val, new_null, new_repl); ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false; +#ifdef SERVERLESS + ((Form_pg_class) GETSTRUCT(newtuple))->relreuseattrs = false; +#endif CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple); heap_freetuple(newtuple); table_close(classRel, RowExclusiveLock); @@ -23264,3 +23301,97 @@ ATExecSetRelOptionsCheck(Relation rel, DefElem *def) ATExecSetRelOptionsCheck_hook(rel, def); return; } + +#ifdef SERVERLESS +static void +maintenance_relreuseattrs(Relation rel) +{ + if (unlikely(Gp_role != GP_ROLE_DISPATCH)) + return; + CommandCounterIncrement(); + maintenance_relreuseattrs_guts(rel); +} + +/* + * SERVERLESS + * maintenance pg_class.relreuseattrs + * If column of one relation of a partition tree is altered: + * 1.Check columns with its parent(if exists) + * 2.Check columns with its children recursively (if exists) + * t1 + * / \ + * p1 p2 + * / \ + * p2_1 p2_2 + * example: alter table p2 alter column set xxx; + * check p2's columns with t1, update p2.relreuseattrs. + * check p2_1, p2_2's columns with p2, update relreuseattrs of p2_1, p2_2. + */ +static void +maintenance_relreuseattrs_guts(Relation rel) +{ + Relation classRel; + bool could_reuse; + Datum new_val[Natts_pg_class]; + bool new_null[Natts_pg_class], new_repl[Natts_pg_class]; + HeapTuple tuple, newtuple; + Form_pg_class classform; + Relation parentrel; + TupleDesc rel_tupleDesc; + TupleDesc parent_tupleDesc; + Oid parent_oid; + Oid relid = RelationGetRelid(rel); + + /* check with parent */ + if (rel->rd_rel->relispartition) + { + parent_oid = get_partition_parent(relid, true); + if (OidIsValid(parent_oid)) + { + classRel = table_open(RelationRelationId, RowExclusiveLock); + parentrel = relation_open(parent_oid, AccessShareLock); + rel_tupleDesc = RelationGetDescr(rel); + parent_tupleDesc = RelationGetDescr(parentrel); + could_reuse = equalTupleDescs(rel_tupleDesc, parent_tupleDesc, true, true); + tuple = SearchSysCacheCopy1(RELOID, relid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + classform = (Form_pg_class) GETSTRUCT(tuple); + if (classform->relreuseattrs != could_reuse) + { + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relreuseattrs - 1] = could_reuse; + new_null[Anum_pg_class_relreuseattrs - 1] = false; + new_repl[Anum_pg_class_relreuseattrs - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), new_val, new_null, new_repl); + CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple); + heap_freetuple(newtuple); + CommandCounterIncrement(); + } + table_close(classRel, RowExclusiveLock); + relation_close(parentrel, NoLock); + } + } + + /* not partitioned, do nothing */ + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + return; + + List *children; + ListCell *child; + children = find_inheritance_children(relid, AccessExclusiveLock); + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + /* find_inheritance_children already got lock */ + childrel = table_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + maintenance_relreuseattrs_guts(childrel); + table_close(childrel, NoLock); + } +} + +#endif diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index f9277c1d80d..b89f167ddea 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -303,7 +303,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, bool found; TupleDesc desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel); - Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor, true)); + Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor, true, false)); eq = palloc0(sizeof(*eq) * outslot->tts_tupleDescriptor->natts); diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index e0e9a39740c..db40040cea3 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -735,7 +735,7 @@ RevalidateCachedQuery(CachedPlanSource *plansource, /* OK */ } else if (resultDesc == NULL || plansource->resultDesc == NULL || - !equalTupleDescs(resultDesc, plansource->resultDesc, true)) + !equalTupleDescs(resultDesc, plansource->resultDesc, true, false)) { /* can we give a better error message? */ if (plansource->fixed_result) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 24a08322f05..c95debbc5b8 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2871,7 +2871,7 @@ RelationClearRelation(Relation relation, bool rebuild) elog(ERROR, "relation %u deleted while still in use", save_relid); } - keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att, true); + keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att, true, false); keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules); keep_gp_policy = GpPolicyEqual(relation->rd_cdbpolicy, newrel->rd_cdbpolicy); keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc); diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 069efb4ab96..5175cc29a5b 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -233,7 +233,7 @@ shared_record_table_compare(const void *a, const void *b, size_t size, else t2 = k2->u.local_tupdesc; - return equalTupleDescs(t1, t2, true) ? 0 : 1; + return equalTupleDescs(t1, t2, true, false) ? 0 : 1; } /* @@ -1976,7 +1976,7 @@ record_type_typmod_compare(const void *a, const void *b, size_t size) RecordCacheEntry *left = (RecordCacheEntry *) a; RecordCacheEntry *right = (RecordCacheEntry *) b; - return equalTupleDescs(left->tupdesc, right->tupdesc, true) ? 0 : 1; + return equalTupleDescs(left->tupdesc, right->tupdesc, true, false) ? 0 : 1; } /* diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h index d19c8c960f7..dd120973e30 100644 --- a/src/include/access/tupdesc.h +++ b/src/include/access/tupdesc.h @@ -141,7 +141,7 @@ extern void DecrTupleDescRefCount(TupleDesc tupdesc); DecrTupleDescRefCount(tupdesc); \ } while (0) -extern bool equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2, bool strict); +extern bool equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2, bool strict, bool for_partition_reuse_attrs); extern uint32 hashTupleDesc(TupleDesc tupdesc); diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 5d60f3fa336..5cafdfc66e9 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -137,6 +137,12 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* all multixacts in this rel are >= this; it is really a MultiXactId */ TransactionId relminmxid BKI_DEFAULT(1); /* FirstMultiXactId */ + /* + * Can partition relation reuse attributes (have the same column definitions) of its parent? + * This filed is pointless if rel is not a partition. + */ + bool relreuseattrs BKI_DEFAULT(f); + #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* NOTE: These fields are not present in a relcache entry's rd_rel field. */ /* access permissions */ diff --git a/src/test/regress/expected/partition_reuse_columns.out b/src/test/regress/expected/partition_reuse_columns.out new file mode 100644 index 00000000000..a4901006242 --- /dev/null +++ b/src/test/regress/expected/partition_reuse_columns.out @@ -0,0 +1,912 @@ +create schema partition_reuse_columns; +set search_path to partition_reuse_columns; +create table t1(a int, b int) partition by range(a) ; +create table p1 partition of t1 for values from (1) to (5); +create table p2 partition of t1 for values from (5) to (10) partition by range(b); +create table p2_1 partition of p2 for values from (1) to (3); +create table p2_2 partition of p2 for values from (3) to (6); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace order by relname; + relname | relreuseattrs +---------+--------------- + p1 | t + p2 | t + p2_1 | t + p2_2 | t + t1 | f +(5 rows) + +create table p3(a int, b int); +-- reuse columns +alter table t1 attach partition p3 for values from (10) to (15); +select relname, relreuseattrs from pg_class where oid = 'p3'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p3 | t +(1 row) + +create table p4(b int, a int, c int); +alter table p4 drop column c; +-- not reuse columns +alter table t1 attach partition p4 for values from (15) to (20); +select relname, relreuseattrs from pg_class where oid = 'p4'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p4 | f +(1 row) + +-- reuse columns, include the dropped column +create table p5(a int, b int, c int); +alter table p5 drop column c; +alter table t1 add column c int; +alter table t1 drop column c; +alter table t1 attach partition p5 for values from (20) to (25); +select relname, relreuseattrs from pg_class where oid = 'p5'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p5 | t +(1 row) + +-- check after detach +alter table t1 detach partition p1; +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p1 | f +(1 row) + +-- check after re-attach +alter table t1 attach partition p1 for values from (1) to (5); +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p1 | t +(1 row) + +-- check after detach and alter tables +alter table t1 detach partition p1; +alter table p1 add column d int; +alter table p1 drop column d; +alter table t1 attach partition p1 for values from (1) to (5); +-- not reuse columns +select relname, relreuseattrs from pg_class where oid = 'p1'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p1 | f +(1 row) + +-- check alter columns +create table t2(a int, b int) partition by range(a) ; +create table p6 (a int, b int); +alter table t2 attach partition p6 for values from (25) to (30); +-- +-- test alter default +-- +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +alter table p6 alter column b set default 10; +-- should not reuse +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +-- should reuse after parent alter column +alter table t2 alter column b set default 100; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- should not reuse again +alter table p6 alter column b set default 99; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +-- should reuse again +alter table p6 alter column b set default 100; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- +-- test alter statistics +-- +-- should not reuse +alter table p6 alter column b set STATISTICS 99; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +-- should reuse again +alter table p6 alter column b set STATISTICS -1; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- +-- test alter NULL/NOT NULL +-- +-- should not reuse +alter table p6 alter column b set NOT NULL; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +-- should reuse again +alter table p6 alter column b drop NOT NULL; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- +-- test alter storage +-- +alter table t2 add column c text; +select attstorage from pg_attribute where attrelid = 't2'::regclass::oid and attname = 'c'; + attstorage +------------ + x +(1 row) + +select attstorage from pg_attribute where attrelid = 'p6'::regclass::oid and attname = 'c'; + attstorage +------------ + x +(1 row) + +alter table p6 alter column c set storage main; +select attstorage from pg_attribute where attrelid = 'p6'::regclass::oid and attname = 'c'; + attstorage +------------ + m +(1 row) + +-- should not reuse +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +-- should reuse again +alter table p6 alter column c set storage extended; +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +ALTER TABLE t2 add COLUMN d int; +ALTER TABLE t2 ALTER COLUMN d set NOT NULL; +select attname, attnotnull from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attnotnull +---------+------------ + a | f + b | f + c | f + d | t +(4 rows) + +select attname, attnotnull from pg_attribute where attrelid = 't2'::regclass::oid and attnum >0; + attname | attnotnull +---------+------------ + a | f + b | f + c | f + d | t +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +ALTER TABLE t2 ALTER COLUMN d drop NOT NULL; +select attname, attnotnull from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attnotnull +---------+------------ + a | f + b | f + c | f + d | f +(4 rows) + +select attname, attnotnull from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attnotnull +---------+------------ + a | f + b | f + c | f + d | f +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- +-- test IDENTITY +-- +ALTER TABLE t2 ALTER COLUMN d set NOT NULL; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | +(4 rows) + +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +ALTER TABLE p6 ALTER COLUMN d ADD GENERATED ALWAYS AS IDENTITY; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | a +(4 rows) + +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +ALTER TABLE p6 ALTER COLUMN d DROP IDENTITY; +select attname, attidentity from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | +(4 rows) + +select attname, attidentity from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attidentity +---------+------------- + a | + b | + c | + d | +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- +-- test COMPRESSION +-- +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | + d | +(4 rows) + +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | + d | +(4 rows) + +ALTER TABLE p6 ALTER COLUMN c SET compression pglz; +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | p + d | +(4 rows) + +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | + d | +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | f +(1 row) + +ALTER TABLE p6 ALTER COLUMN c SET compression default; +select attname, attcompression from pg_attribute where attrelid = 'p6'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | + d | +(4 rows) + +select attname, attcompression from pg_attribute where attrelid = 't2'::regclass::oid and attnum > 0; + attname | attcompression +---------+---------------- + a | + b | + c | + d | +(4 rows) + +select relname, relreuseattrs from pg_class where oid = 'p6'::regclass::oid; + relname | relreuseattrs +---------+--------------- + p6 | t +(1 row) + +-- GPDB partition grammar +create table region +( + r_regionkey integer not null, + r_name char(25), + r_comment varchar(152) +) +partition by range (r_regionkey) +subpartition by list (r_name) subpartition template +( + subpartition africa values ('AFRICA'), + subpartition america values ('AMERICA'), + subpartition asia values ('ASIA'), + subpartition europe values ('EUROPE'), + subpartition mideast values ('MIDDLE EAST'), + subpartition australia values ('AUSTRALIA'), + subpartition antarctica values ('ANTARCTICA') +) +( + partition region1 start (0), + partition region2 start (3), + partition region3 start (5) end (8) +); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace and relname like 'region%'; + relname | relreuseattrs +---------------------------------------+--------------- + region | f + region_1_prt_region1 | t + region_1_prt_region1_2_prt_africa | t + region_1_prt_region1_2_prt_america | t + region_1_prt_region1_2_prt_antarctica | t + region_1_prt_region1_2_prt_asia | t + region_1_prt_region1_2_prt_australia | t + region_1_prt_region1_2_prt_europe | t + region_1_prt_region1_2_prt_mideast | t + region_1_prt_region2 | t + region_1_prt_region2_2_prt_africa | t + region_1_prt_region2_2_prt_america | t + region_1_prt_region2_2_prt_antarctica | t + region_1_prt_region2_2_prt_asia | t + region_1_prt_region2_2_prt_australia | t + region_1_prt_region2_2_prt_europe | t + region_1_prt_region2_2_prt_mideast | t + region_1_prt_region3 | t + region_1_prt_region3_2_prt_africa | t + region_1_prt_region3_2_prt_america | t + region_1_prt_region3_2_prt_antarctica | t + region_1_prt_region3_2_prt_asia | t + region_1_prt_region3_2_prt_australia | t + region_1_prt_region3_2_prt_europe | t + region_1_prt_region3_2_prt_mideast | t +(25 rows) + +create table foo_p (i int, j int) +partition by range(j) +(start(1) end(10) every(1)); +select relname, relreuseattrs from pg_class where relnamespace = 'partition_reuse_columns'::regnamespace and relname like 'foo_p%'; + relname | relreuseattrs +---------------+--------------- + foo_p | f + foo_p_1_prt_1 | t + foo_p_1_prt_2 | t + foo_p_1_prt_3 | t + foo_p_1_prt_4 | t + foo_p_1_prt_5 | t + foo_p_1_prt_6 | t + foo_p_1_prt_7 | t + foo_p_1_prt_8 | t + foo_p_1_prt_9 | t +(10 rows) + +-- +-- test partition tables with children +-- +create table p7 partition of t2 for values from (30) to (40) partition by range (a); +-- should not reuse as t2 has dropped columns +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7 | f +(1 row) + +drop table p7; +create table t3(a int, b int, c text) partition by range(a) ; +create table p7 partition of t3 for values from (1) to (40) partition by range (a); +create table p7_1 partition of p7 for values from (1) to (35); +create table p7_2 partition of p7 for values from (35) to (40); +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7 | t + p7_2 | t +(3 rows) + +-- test compression +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; + attrelid | attname | attcompression +----------+---------+---------------- + t3 | a | + t3 | b | + t3 | c | + p7 | a | + p7 | b | + p7 | c | + p7_1 | a | + p7_1 | b | + p7_1 | c | + p7_2 | a | + p7_2 | b | + p7_2 | c | +(12 rows) + +ALTER TABLE p7 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; + attrelid | attname | attcompression +----------+---------+---------------- + t3 | a | + t3 | b | + t3 | c | + p7 | a | + p7 | b | + p7 | c | p + p7_1 | a | + p7_1 | b | + p7_1 | c | + p7_2 | a | + p7_2 | b | + p7_2 | c | +(12 rows) + +-- shoud not reuse +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | f + p7_2 | f + p7 | f +(3 rows) + +-- p7_1 shoud reuse +ALTER TABLE p7_1 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; + attrelid | attname | attcompression +----------+---------+---------------- + t3 | a | + t3 | b | + t3 | c | + p7 | a | + p7 | b | + p7 | c | p + p7_1 | a | + p7_1 | b | + p7_1 | c | p + p7_2 | a | + p7_2 | b | + p7_2 | c | +(12 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_2 | f + p7_1 | t + p7 | f +(3 rows) + +-- p7, p7_1 should reuse, but p7_2 not +ALTER TABLE t3 ALTER COLUMN c SET compression pglz; +select attrelid::regclass, attname, attcompression from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum > 0; + attrelid | attname | attcompression +----------+---------+---------------- + t3 | a | + t3 | b | + t3 | c | p + p7 | a | + p7 | b | + p7 | c | p + p7_1 | a | + p7_1 | b | + p7_1 | c | p + p7_2 | a | + p7_2 | b | + p7_2 | c | +(12 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_2 | f + p7_1 | t + p7 | t +(3 rows) + +-- test not null +ALTER TABLE p7_2 ALTER COLUMN c SET compression pglz; +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7 | t + p7_2 | t +(3 rows) + +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attnotnull +----------+---------+------------ + t3 | c | f + p7 | c | f + p7_1 | c | f + p7_2 | c | f +(4 rows) + +ALTER TABLE p7 ALTER COLUMN c SET not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attnotnull +----------+---------+------------ + t3 | c | f + p7 | c | t + p7_1 | c | t + p7_2 | c | t +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | t + p7 | f +(3 rows) + +-- drop not null on a leaf +ALTER TABLE p7 ALTER COLUMN c drop not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attnotnull +----------+---------+------------ + t3 | c | f + p7 | c | f + p7_1 | c | f + p7_2 | c | f +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | t + p7 | t +(3 rows) + +ALTER TABLE p7_2 ALTER COLUMN c set not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attnotnull +----------+---------+------------ + t3 | c | f + p7 | c | f + p7_1 | c | f + p7_2 | c | t +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | f + p7 | t +(3 rows) + +ALTER TABLE p7_2 ALTER COLUMN c drop not null; +select attrelid::regclass, attname, attnotnull from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attnotnull +----------+---------+------------ + t3 | c | f + p7 | c | f + p7_1 | c | f + p7_2 | c | f +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | t + p7 | t +(3 rows) + +-- test static +ALTER TABLE p7 ALTER COLUMN c set STATISTICS 11; +select attrelid::regclass, attname, attstattarget from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attstattarget +----------+---------+--------------- + t3 | c | -1 + p7 | c | 11 + p7_1 | c | 11 + p7_2 | c | 11 +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | t + p7 | f +(3 rows) + +ALTER TABLE p7 ALTER COLUMN c set STATISTICS -1; +select attrelid::regclass, attname, attstattarget from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 3; + attrelid | attname | attstattarget +----------+---------+--------------- + t3 | c | -1 + p7 | c | -1 + p7_1 | c | -1 + p7_2 | c | -1 +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_1 | t + p7_2 | t + p7 | t +(3 rows) + +-- test IDENTITY +ALTER TABLE t3 ALTER COLUMN b set NOT NULL; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; + attrelid | attname | attidentity +----------+---------+------------- + t3 | b | + p7 | b | + p7_1 | b | + p7_2 | b | +(4 rows) + +ALTER TABLE p7 ALTER COLUMN b ADD GENERATED ALWAYS AS IDENTITY; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; + attrelid | attname | attidentity +----------+---------+------------- + t3 | b | + p7 | b | a + p7_1 | b | + p7_2 | b | +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_2 | f + p7 | f + p7_1 | f +(3 rows) + +-- set IDENTITY +ALTER TABLE p7 ALTER COLUMN b SET GENERATED BY DEFAULT; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; + attrelid | attname | attidentity +----------+---------+------------- + t3 | b | + p7 | b | d + p7_1 | b | + p7_2 | b | +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7_2 | f + p7 | f + p7_1 | f +(3 rows) + + -- drop IDENTITY +ALTER TABLE p7 ALTER COLUMN b DROP IDENTITY; +select attrelid::regclass, attname, attidentity from pg_attribute where attrelid in + ('t3'::regclass::oid, + 'p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid) + and attnum = 2; + attrelid | attname | attidentity +----------+---------+------------- + t3 | b | + p7 | b | + p7_1 | b | + p7_2 | b | +(4 rows) + +select relname, relreuseattrs from pg_class where oid in + ('p7'::regclass::oid, + 'p7_1'::regclass::oid, + 'p7_2'::regclass::oid); + relname | relreuseattrs +---------+--------------- + p7 | t + p7_1 | t + p7_2 | t +(3 rows) + +drop schema partition_reuse_columns cascade; +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table t1 +drop cascades to table t2 +drop cascades to table region +drop cascades to table foo_p +drop cascades to table t3 From 6cec784e130e187af5b6476824239fabffc6e32e Mon Sep 17 00:00:00 2001 From: wangweinan Date: Tue, 16 Apr 2024 16:10:45 +0800 Subject: [PATCH 088/152] Add autopartition grammar in parser Append AUTO keyword to identify auto partition table. So far, we support hash partition grammar for user request first. But we leave the space in future to support all partition structure. --- src/backend/nodes/copyfuncs.c | 3 + src/backend/nodes/equalfuncs.c | 3 + src/backend/nodes/outfuncs.c | 3 + src/backend/nodes/readfuncs_common.c | 3 + src/backend/parser/gram.y | 97 +++++++++++++++++++++++++++- src/include/nodes/parsenodes.h | 3 + src/include/parser/kwlist.h | 1 + 7 files changed, 111 insertions(+), 2 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 8b5e26b9451..65fdc2ffa0b 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -5970,6 +5970,9 @@ _copyPartitionSpec(const PartitionSpec *from) COPY_NODE_FIELD(partParams); COPY_NODE_FIELD(gpPartDef); COPY_NODE_FIELD(subPartSpec); +#ifdef SERVERLESS + COPY_NODE_FIELD(autoPartBound); +#endif /* SERVERLESS */ COPY_LOCATION_FIELD(location); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 9981c2fd023..b80ea4c78c6 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -3427,6 +3427,9 @@ _equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b) { COMPARE_STRING_FIELD(strategy); COMPARE_NODE_FIELD(partParams); +#ifdef SERVERLESS + COMPARE_NODE_FIELD(autoPartBound); +#endif /* SERVERLESS */ COMPARE_LOCATION_FIELD(location); return true; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9869127938f..c1596154b4a 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -4093,6 +4093,9 @@ _outPartitionSpec(StringInfo str, const PartitionSpec *node) WRITE_STRING_FIELD(strategy); WRITE_NODE_FIELD(partParams); +#ifdef SERVERLESS + WRITE_NODE_FIELD(autoPartBound); +#endif /* SERVERLESS */ WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/nodes/readfuncs_common.c b/src/backend/nodes/readfuncs_common.c index c179171c1a2..6dc49b696d2 100644 --- a/src/backend/nodes/readfuncs_common.c +++ b/src/backend/nodes/readfuncs_common.c @@ -1474,6 +1474,9 @@ _readPartitionSpec(void) READ_STRING_FIELD(strategy); READ_NODE_FIELD(partParams); +#ifdef SERVERLESS + READ_NODE_FIELD(autoPartBound); +#endif /* SERVERLESS */ READ_LOCATION_FIELD(location); READ_DONE(); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index fa5540a0062..7ac5ba56ef9 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -717,6 +717,8 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %type part_elem %type part_params %type PartitionBoundSpec +%type autopart_default +%type OptAutoPartitionBoundSpec %type hash_partbound %type hash_partbound_elem @@ -751,6 +753,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %token ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC ASENSITIVE ASSERTION ASSIGNMENT ASYMMETRIC ATOMIC AT ATTACH ATTRIBUTE AUTHORIZATION + AUTO BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BREADTH BY @@ -4087,6 +4090,74 @@ alter_identity_column_option: } ; +autopart_default: WITHOUT DEFAULT + { + $$ = false; + } + | WITH DEFAULT + { + $$ = true; + } + ; +/* + * So far, auto partition only support one level hash partition + */ +OptAutoPartitionBoundSpec: + AUTO BY '(' NonReservedWord Iconst ')' + { + /* HASH partition */ + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_HASH; + n->modulus = n->remainder = -1; + + if (strcmp($4, "modulus") == 0) + { + n->modulus = $5; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized auto hash partition bound specification \"%s\"", + $4), + parser_errposition(@4))); + } + $$ = n; + } + | AUTO BY ENUM_P + { + /* LIST partition */ + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + n->strategy = PARTITION_STRATEGY_LIST; + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("auto partition do not support list yet"))); + $$ = n; + } + | AUTO START '(' expr_list ')' EVERY '(' expr_list ')' autopart_default + { + /* Open Range partition */ + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + n->strategy = PARTITION_STRATEGY_RANGE; + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("auto partition do not support open space range yet"))); + $$ = n; + } + | AUTO START '(' expr_list ')' END_P '(' expr_list ')' EVERY '(' expr_list ')' autopart_default + { + /* Close Range partition with default */ + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + n->strategy = PARTITION_STRATEGY_RANGE; + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("auto partition do not support close space range yet"))); + $$ = n; + } + | { $$ = NULL; } + ; + PartitionBoundSpec: /* a HASH partition */ FOR VALUES WITH '(' hash_partbound ')' @@ -5994,6 +6065,23 @@ OptFirstPartitionSpec: PartitionSpec opt_list_subparts OptTabPartitionSpec { $1->gpPartDef = (GpPartitionDefinition *) $3; $1->subPartSpec = (PartitionSpec *) $2; +#ifdef SERVERLESS + if ($1->subPartSpec) + { + bool error = ($1->autoPartBound != NULL); + for (PartitionSpec *current = $1; current; current = current->subPartSpec) + { + error |= ($1->autoPartBound != NULL); + } + + if (error) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("auto partition do not support multi level partition yet"))); + } + } +#endif /* SEVERLESS */ /* * Only if GPDB legacy partition syntax, check for expression in partition * key. If gpPartDef is present then only its legacy syntax. @@ -6050,13 +6138,16 @@ OptSecondPartitionSpec: } ; -PartitionSpec: PARTITION BY ColId '(' part_params ')' +PartitionSpec: PARTITION BY ColId '(' part_params ')' OptAutoPartitionBoundSpec { PartitionSpec *n = makeNode(PartitionSpec); n->strategy = $3; n->partParams = $5; n->location = @1; +#ifdef SERVERLESS + n->autoPartBound = $7; +#endif /* SERVERLESS */ $$ = n; } @@ -6634,7 +6725,7 @@ TabSubPartition: $$ = $1; } - | TabSubPartitionBy { $$ = $1; } + | TabSubPartitionBy OptAutoPartitionBoundSpec { $$ = $1; } | TabSubPartitionBy TabSubPartition { PartitionSpec *n = (PartitionSpec *) $1; @@ -19659,6 +19750,7 @@ unreserved_keyword: | ATOMIC | ATTACH | ATTRIBUTE + | AUTO | BACKWARD | BEFORE | BEGIN_P @@ -20591,6 +20683,7 @@ bare_label_keyword: | ATTACH | ATTRIBUTE | AUTHORIZATION + | AUTO | BACKWARD | BEFORE | BEGIN_P diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4d8b7c29e3b..8314380253f 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -893,6 +893,9 @@ typedef struct PartitionSpec struct GpPartitionDefinition *gpPartDef; struct PartitionSpec *subPartSpec; /* subpartition specification */ + #ifdef SERVERLESS + PartitionBoundSpec *autoPartBound; /* is autopartition if not null*/ + #endif int location; /* token location, or -1 if unknown */ } PartitionSpec; diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 06ab8f7d61f..c82ab4e9a84 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -56,6 +56,7 @@ PG_KEYWORD("atomic", ATOMIC, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("attach", ATTACH, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("attribute", ATTRIBUTE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("authorization", AUTHORIZATION, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) +PG_KEYWORD("auto", AUTO, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("backward", BACKWARD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("before", BEFORE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("begin", BEGIN_P, UNRESERVED_KEYWORD, BARE_LABEL) From 0160f259cabfd86bfa5f186d43eab317517993a2 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 16 Apr 2024 16:11:51 +0800 Subject: [PATCH 089/152] Support IF [NOT] EXISTS when CREATE/DROP WAREHOUSE. Create IF NOT EXISTS warehouse. Drop IF EXISTS warehouse. Use syscache to search warehouse. --- src/backend/nodes/copyfuncs.c | 2 ++ src/backend/parser/gram.y | 18 ++++++++++++++++++ src/include/nodes/parsenodes.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 65fdc2ffa0b..fa4adbcfba9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6350,6 +6350,7 @@ _copyCreateWarehouseStmt(const CreateWarehouseStmt *from) COPY_STRING_FIELD(whname); COPY_NODE_FIELD(options); COPY_NODE_FIELD(wh_options); + COPY_SCALAR_FIELD(if_not_exists); return newnode; } @@ -6400,6 +6401,7 @@ _copyDropWarehouseStmt(const DropWarehouseStmt *from) DropWarehouseStmt *newnode = makeNode(DropWarehouseStmt); COPY_STRING_FIELD(whname); + COPY_SCALAR_FIELD(missing_ok); return newnode; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 7ac5ba56ef9..850fb79f3e0 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -13489,9 +13489,19 @@ CreateWarehouseStmt: CREATE WAREHOUSE name OptWarehouseOptList create_generic_op n->whname = $3; n->options = $4; n->wh_options = $5; + n->if_not_exists = false; n->tags = $6; $$ = (Node *) n; } + | CREATE WAREHOUSE IF_P NOT EXISTS name OptWarehouseOptList create_generic_options + { + CreateWarehouseStmt *n = makeNode(CreateWarehouseStmt); + n->whname = $6; + n->options = $7; + n->wh_options = $8; + n->if_not_exists = true; + $$ = (Node *) n; + } ; OptWarehouseOptList: WarehouseOptList { $$ = $1; } @@ -13520,6 +13530,14 @@ DropWarehouseStmt: DROP WAREHOUSE name { DropWarehouseStmt *n = makeNode(DropWarehouseStmt); n->whname = $3; + n->missing_ok = false; + $$ = (Node *) n; + } + | DROP WAREHOUSE IF_P EXISTS name + { + DropWarehouseStmt *n = makeNode(DropWarehouseStmt); + n->whname = $5; + n->missing_ok = true; $$ = (Node *) n; } ; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 8314380253f..c58062d8275 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -4479,6 +4479,7 @@ typedef struct CreateWarehouseStmt char *whname; List *options; /* List of DefElem nodes */ List *wh_options; /* generic options to warehouse */ + bool if_not_exists; List *tags; /* List of tag DefElem nodes */ } CreateWarehouseStmt; @@ -4486,6 +4487,7 @@ typedef struct DropWarehouseStmt { NodeTag type; char *whname; + bool missing_ok; } DropWarehouseStmt; typedef enum AlterWarehouseType From 494f03bd3d205e550d1d346618e5822cb36b7245 Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Tue, 23 Apr 2024 13:46:39 +0800 Subject: [PATCH 090/152] Feature: support min/max filter in storage_am. Support file level and group level filter. Add guc hashdata_enable_filter to open/close filter. --- src/backend/executor/execScan.c | 4 ++++ src/interfaces/libpq/fe-protocol3.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index 51adbd51243..310df9b231d 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -239,6 +239,10 @@ ExecScan(ScanState *node, * when the qual is null ... saves only a few cycles, but they add up * ... */ + /* + * fetch qual again in case ExecScanFetch updated it + */ + qual = node->ps.qual; if (qual == NULL || ExecQual(qual, econtext)) { /* diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 39ad12f2b68..1f038c8b95d 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -53,7 +53,7 @@ #define VALID_LONG_MESSAGE_TYPE(id) \ ((id) == 'T' || (id) == 'D' || (id) == 'd' || (id) == 'V' || \ (id) == 'E' || (id) == 'N' || (id) == 'A' || (id) == 'Y' || \ - (id) == 'y' || (id) == 'o' || (id) == 'e') + (id) == 'y' || (id) == 'o' || (id) == 'e' || (id) == 'h') static void handleSyncLoss(PGconn *conn, char id, int msgLength); From 9d0351126fdfd35743a601ea55bdef07e06865db Mon Sep 17 00:00:00 2001 From: wangliang Date: Fri, 19 Apr 2024 09:38:45 +0800 Subject: [PATCH 091/152] support storage type check for column add --- src/backend/access/heap/heapam.c | 2 +- src/backend/commands/tablecmds.c | 2 +- src/include/access/tableam.h | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 99a9d6f2bd8..36decf68fa9 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1624,7 +1624,7 @@ heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, uint32 heap_scan_flags(Relation relation) { - return 0; + return (uint32)SCAN_SUPPORT_DEFAULT_COLUMNS; } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 08120561743..c24210986e7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8724,7 +8724,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, * should be smarter.. */ - if (!RelationIsHeap(rel)) + if (!(table_scan_flags(rel) & SCAN_SUPPORT_DEFAULT_COLUMNS)) { if (!defval) defval = (Expr *) makeNullConst(typeOid, -1, collOid); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 324bdf65327..4e823338579 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -59,6 +59,10 @@ typedef struct AnalyzeContext{ int32 targrows; } AnalyzeContext; +#define SCAN_SUPPORT_VECTORIZATION (1 << 8) /* support vectorization scan */ +/* support scan with column default value, but not in storage */ +#define SCAN_SUPPORT_DEFAULT_COLUMNS (1 << 7) + /* * Bitmask values for the flags argument to the scan_begin callback. */ From 0c93bb55efa35599b4ba19d5d23b29e7a6806069 Mon Sep 17 00:00:00 2001 From: wangliang Date: Tue, 23 Apr 2024 07:26:02 +0800 Subject: [PATCH 092/152] change the SCAN_SUPPORT_DEFAULT_COLUMNs macro value --- src/include/access/tableam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 4e823338579..4f8bbb09653 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -61,7 +61,7 @@ typedef struct AnalyzeContext{ #define SCAN_SUPPORT_VECTORIZATION (1 << 8) /* support vectorization scan */ /* support scan with column default value, but not in storage */ -#define SCAN_SUPPORT_DEFAULT_COLUMNS (1 << 7) +#define SCAN_SUPPORT_DEFAULT_COLUMNS (1 << 9) /* * Bitmask values for the flags argument to the scan_begin callback. From 619c238c52d688ebc35e4ac30e2096e315c127f0 Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 25 Apr 2024 09:38:11 +0800 Subject: [PATCH 093/152] Fix: use XLogRecGetBlockTag to get block number in brin_redo. The redo function is called in UnionStore to replay the WAL on page, but walredo procedure could replay WAL on most one page every time. For WALs related with multi pages, some pages may be skipped, do not replay WAL on those pages. The related buffer with those pages may be invalid, so we use XLogRecGetBlockTag here to avoid touching the invalid buffer. --- src/backend/access/brin/brin_xlog.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index 1b09347116b..26310df7f23 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -70,7 +70,21 @@ brin_xlog_insert_update(XLogReaderState *record, } /* need this page's blkno to store in revmap */ +#ifdef SERVERLESS + /* + * In serverless architecture, use XLogRecGetBlockTag to get block number + * of buffer instead of BufferGetBlockNumber. + * + * This redo function will be called in UnionStore to replay the WAL on page, + * but walredo procedure could replay WAL on one page every time. + * For this WAL, the page with block_id 0 may be skipped, action is BLK_DONE, + * and the buffer is invalid, so we use XLogRecGetBlockTag here to avoid touching + * the buffer. + */ + XLogRecGetBlockTag(record, 0, NULL, NULL, ®pgno); +#else regpgno = BufferGetBlockNumber(buffer); +#endif /* insert the index item into the page */ if (action == BLK_NEEDS_REDO) From e1ef352919521fa16ac539ff2dd0d0c0571ac903 Mon Sep 17 00:00:00 2001 From: wangweinan Date: Tue, 7 May 2024 14:51:59 +0800 Subject: [PATCH 094/152] Refactory hashdata_manifest(json format) --- src/backend/catalog/dependency.c | 2 +- src/backend/catalog/main_manifest.c | 68 ++++++++++++++++++++++++++++- src/include/catalog/main_manifest.h | 4 +- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 1d8259d8d91..023785e599b 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1598,7 +1598,7 @@ doDeletion(const ObjectAddress *object, int flags) RemoveTaskById(object->objectId); break; case OCLASS_MAIN_MANIFEST: - RemoveMainManifestByRelnode(object->objectId); + RemoveManifestRecord(object->objectId); break; case OCLASS_MATVIEW_AUX: diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index d96dcedfd1f..ffede19a7c4 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -19,13 +19,14 @@ #include "catalog/indexing.h" #include "catalog/main_manifest.h" #include "utils/rel.h" +#include "catalog/dependency.h" /* * RemoveMainManifestByRelnode * Remove the main manifest record for the relnode. */ void -RemoveMainManifestByRelnode(Oid relnode) +RemoveManifestRecord(RelFileNodeId relnode) { Relation main_manifest; HeapTuple tuple; @@ -47,3 +48,68 @@ RemoveMainManifestByRelnode(Oid relnode) systable_endscan(scanDescriptor); table_close(main_manifest, RowExclusiveLock); } + +void +InsertManifestRecord(Oid relid, RelFileNodeId relfilenode, text *path) +{ + Datum values[2]; + HeapTuple tuple; + ObjectAddress dep; + ObjectAddress ref; + Relation rel = heap_open(ManifestRelationId, RowExclusiveLock); + bool nulls[2]; + + values[0] = UInt64GetDatum(relfilenode); + values[1] = PointerGetDatum(path); + nulls[0] = false; + nulls[1] = false; + + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + CatalogTupleInsert(rel, tuple); + + table_close(rel, RowExclusiveLock); + + dep.classId = ManifestRelationId; + dep.objectId = DatumGetObjectId(relfilenode); + dep.objectSubId = 0; + ref.classId = RelationRelationId; + ref.objectId = relid; + ref.objectSubId = 0; + + recordDependencyOn(&dep, &ref, DEPENDENCY_INTERNAL); +} + +void +UpdateManifestRecord(RelFileNodeId relfilenode, text *path) +{ + Datum values[2]; + HeapTuple newtuple; + HeapTuple oldtuple; + ScanKeyData key; + SysScanDesc scan; + bool nulls[2]; + Relation rel = heap_open(ManifestRelationId, RowExclusiveLock); + + ScanKeyInit(&key, Anum_main_manifest_relnode, BTEqualStrategyNumber, + F_INT8EQ, UInt64GetDatum(relfilenode)); + + scan = systable_beginscan(rel, InvalidOid, false, NULL, 1, &key); + + oldtuple = systable_getnext(scan); + if (!HeapTupleIsValid(oldtuple)) + ereport(ERROR, (errcode(ERRCODE_IO_ERROR), + errmsg("write manifest catalog error"))); + + values[0] = UInt64GetDatum(relfilenode); + values[1] = PointerGetDatum(path); + nulls[0] = false; + nulls[1] = false; + + newtuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + CatalogTupleUpdate(rel, &oldtuple->t_self, newtuple); + + systable_endscan(scan); + heap_close(rel, NoLock); +} \ No newline at end of file diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index d093cbc67d1..fdcda6ec7f1 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -30,6 +30,8 @@ CATALOG(main_manifest,9004,ManifestRelationId) typedef FormData_main_manifest *Form_main_manifest; -extern void RemoveMainManifestByRelnode(Oid relnode); +extern void InsertManifestRecord(Oid relid, RelFileNodeId relnode, text* path); +extern void RemoveManifestRecord(RelFileNodeId relnode); +extern void UpdateManifestRecord(RelFileNodeId relnode, text* path); #endif /* MAIN_MANIFEST.h */ From 7c47ea7da41404a11015f7cfb470aa70195a2f19 Mon Sep 17 00:00:00 2001 From: hanwei Date: Thu, 16 May 2024 18:45:10 +0800 Subject: [PATCH 095/152] Feature: create role DEFAULTWAREHOUSE and guc Change hashdata.warehouse guc to warehouse and add a function for create role DEFAULTWAREHOUSE default_warhouse_name. At the same time, add corresponding case and fix original bug for drop warehouse and correct corresponding case. --- contrib/hashdata_chaos/hashdata_chaos.c | 2 +- gpMgmt/bin/gppylib/db/dbconn.py | 2 +- gpMgmt/bin/gpsd | 2 +- src/backend/commands/user.c | 26 ++++++++++++++++++++++++- src/backend/parser/gram.y | 9 ++++++++- src/bin/psql/tab-complete.c | 2 +- src/include/commands/user.h | 4 ++++ src/include/parser/kwlist.h | 1 + 8 files changed, 42 insertions(+), 6 deletions(-) diff --git a/contrib/hashdata_chaos/hashdata_chaos.c b/contrib/hashdata_chaos/hashdata_chaos.c index 4e7122a785d..30507d2afe2 100644 --- a/contrib/hashdata_chaos/hashdata_chaos.c +++ b/contrib/hashdata_chaos/hashdata_chaos.c @@ -190,7 +190,7 @@ generate_cmd(Oid id, char *buffer, int lengh) if (HeapTupleIsValid(tuple)) { text *warehouse_name = &((Form_gp_warehouse) GETSTRUCT(tuple))->warehouse_name; - snprintf(buffer, lengh,"set hashdata.warehouse to %s;", text_to_cstring(warehouse_name)); + snprintf(buffer, lengh,"set warehouse to %s;", text_to_cstring(warehouse_name)); success = true; } systable_endscan(scan); diff --git a/gpMgmt/bin/gppylib/db/dbconn.py b/gpMgmt/bin/gppylib/db/dbconn.py index b800cd42693..a859144ba8b 100644 --- a/gpMgmt/bin/gppylib/db/dbconn.py +++ b/gpMgmt/bin/gppylib/db/dbconn.py @@ -236,7 +236,7 @@ def connect(dburl, utility=False, verbose=False, options.append("-c search_path=") if dburl.warehouse: - options.append("-c hashdata.warehouse=%s" % dburl.warehouse) + options.append("-c warehouse=%s" % dburl.warehouse) if allowSystemTableMods: options.append("-c allow_system_table_mods=true") diff --git a/gpMgmt/bin/gpsd b/gpMgmt/bin/gpsd index b411a27401d..29dfc9f77a4 100755 --- a/gpMgmt/bin/gpsd +++ b/gpMgmt/bin/gpsd @@ -142,7 +142,7 @@ def main(): inclHLL = options.dumpHLL if options.warehouse: - warehouse_opt = "-c hashdata.warehouse=%s" % options.warehouse + warehouse_opt = "-c warehouse=%s" % options.warehouse pgoptions = warehouse_opt + " " + pgoptions envOpts['PGOPTIONS'] = pgoptions diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 52b05cd0df4..e00d2363a9e 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -70,6 +70,7 @@ int Password_encryption = PASSWORD_TYPE_SCRAM_SHA_256; /* Hook to check passwords in CreateRole() and AlterRole() */ check_password_hook_type check_password_hook = NULL; +ExecSetDefault_hook_type ExecSetDefault_hook = NULL; static void AddRoleMems(const char *rolename, Oid roleid, List *memberSpecs, List *memberIds, @@ -142,6 +143,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) char *resgroup = NULL; /* resource group for this role */ bool account_is_lock = false; /* whether the account will be locked/unlocked */ bool enable_profile = false; /* whether user can use password profile */ + char *default_warehosue = NULL; /* default warehouse for this role */ int16 account_status = ROLE_ACCOUNT_STATUS_OPEN; /* default accountstatus is 'OPEN' */ TimestampTz now = 0; /* current timestamp with time zone */ List *addintervals = NIL; /* list of time intervals for which login should be denied */ @@ -164,6 +166,8 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) DefElem *dprofile = NULL; DefElem *daccountIsLock = NULL; DefElem *denableProfile = NULL; + DefElem *ddefaultwarehosue = NULL; + List *parse_options = NIL; now = GetCurrentTimestamp(); @@ -384,6 +388,14 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) errmsg("conflicting or redundant options"))); denableProfile = defel; } + else if (strcmp(defel->defname, "default_warehosue") == 0) + { + if (ddefaultwarehosue) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + ddefaultwarehosue = defel; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -433,6 +445,8 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) account_is_lock = intVal(daccountIsLock->arg) != 0; if (denableProfile) enable_profile = intVal(denableProfile->arg) != 0; + if (ddefaultwarehosue) + default_warehosue = strVal(ddefaultwarehosue->arg); /* * Only the super user has the privileges of profile. @@ -817,7 +831,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) * Advance command counter so we can see new record; else tests in * AddRoleMems may fail. */ - if (addroleto || adminmembers || rolemembers) + if (addroleto || adminmembers || rolemembers || default_warehosue) CommandCounterIncrement(); /* @@ -877,6 +891,16 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) AddRoleDenials(stmt->role, roleid, addintervals); } + if (default_warehosue) + { + parse_options = lappend(parse_options, ddefaultwarehosue); + } + + if (ExecSetDefault_hook) + { + (*ExecSetDefault_hook)(parse_options, roleid); + } + /* * Create tag description. */ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 850fb79f3e0..502be11c0f9 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -768,7 +768,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE - DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS + DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS DEFAULTWAREHOUSE DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DETACH DICTIONARY DIRECTORY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP DYNAMIC @@ -1038,6 +1038,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %nonassoc DEALLOCATE %nonassoc DECLARE %nonassoc DEFAULTS + %nonassoc DEFAULTWAREHOUSE %nonassoc DEFERRED %nonassoc DEFINER %nonassoc DELETE_P @@ -2023,6 +2024,10 @@ CreateOptRoleElem: { $$ = makeDefElem("addroleto", (Node *)$3, @1); } + | DEFAULTWAREHOUSE name + { + $$ = makeDefElem("default_warehosue", (Node *) makeString($2), @1); + } ; deny_login_role: DENY deny_interval { $$ = (Node *)$2; } @@ -19819,6 +19824,7 @@ unreserved_keyword: | DEALLOCATE | DECLARE | DEFAULTS + | DEFAULTWAREHOUSE | DEFERRED | DEFINER | DELETE_P @@ -20778,6 +20784,7 @@ bare_label_keyword: | DECODE | DEFAULT | DEFAULTS + | DEFAULTWAREHOUSE | DEFERRABLE | DEFERRED | DEFINER diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 23fd9a0ce2b..f934e34e12c 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3097,7 +3097,7 @@ psql_completion(const char *text, int start, int end) "NOLOGIN", "NOREPLICATION", "NOSUPERUSER", "PASSWORD", "REPLICATION", "ROLE", "SUPERUSER", "SYSID", "VALID UNTIL", "WITH", "PROFILE", "ENABLE PROFILE", - "DISABLE PROFILE", "ACCOUNT"); + "DISABLE PROFILE", "ACCOUNT", "DEFAULTWAREHOUSE"); /* CREATE ROLE,USER,GROUP WITH */ else if (Matches("CREATE", "ROLE|GROUP|USER", MatchAny, "WITH")) diff --git a/src/include/commands/user.h b/src/include/commands/user.h index 028e0dde568..b878107f047 100644 --- a/src/include/commands/user.h +++ b/src/include/commands/user.h @@ -24,6 +24,10 @@ typedef void (*check_password_hook_type) (const char *username, const char *shad extern PGDLLIMPORT check_password_hook_type check_password_hook; +typedef void (*ExecSetDefault_hook_type) (List *parse_options, Oid roleid); + +extern PGDLLIMPORT ExecSetDefault_hook_type ExecSetDefault_hook; + extern Oid CreateRole(ParseState *pstate, CreateRoleStmt *stmt); extern Oid AlterRole(AlterRoleStmt *stmt); extern Oid AlterRoleSet(AlterRoleSetStmt *stmt); diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index c82ab4e9a84..0d442a578f9 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -137,6 +137,7 @@ PG_KEYWORD("declare", DECLARE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("decode", DECODE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("defaultwarehouse", DEFAULTWAREHOUSE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL) From 2c84254a948ab6dc3350d460219fe61c33e424e9 Mon Sep 17 00:00:00 2001 From: hanwei Date: Fri, 24 May 2024 10:24:31 +0800 Subject: [PATCH 096/152] Fix gp_tablespace_location For QE, return null. For QD, return location for local and return server : location for remote. Please see issues:https://code.hashdata.xyz/cloudberry/database/hashdata-cloud/-/issues/97 --- src/backend/utils/fmgr/fmgr.c | 11 +++++++++-- src/include/utils/fmgrtab.h | 3 +++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index fcd55772dfd..ddd18d2604a 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -39,6 +39,7 @@ */ PGDLLIMPORT needs_fmgr_hook_type needs_fmgr_hook = NULL; PGDLLIMPORT fmgr_hook_type fmgr_hook = NULL; +PGDLLIMPORT fmgr_isbuiltin_hook_type fmgr_isbuiltin_hook = NULL; /* * Hashtable for fast lookup of external C functions @@ -89,7 +90,8 @@ fmgr_isbuiltin(Oid id) index = fmgr_builtin_oid_index[id]; if (index == InvalidOidBuiltinMapping) return NULL; - + if (fmgr_isbuiltin_hook) + return (*fmgr_isbuiltin_hook)(id); return &fmgr_builtins[index]; } @@ -106,7 +108,12 @@ fmgr_lookupByName(const char *name) for (i = 0; i < fmgr_nbuiltins; i++) { if (strcmp(name, fmgr_builtins[i].funcName) == 0) - return fmgr_builtins + i; + { + if (fmgr_isbuiltin_hook) + return (*fmgr_isbuiltin_hook)(fmgr_builtins[i].foid); + else + return fmgr_builtins + i; + } } return NULL; } diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h index 21a5f21156f..99d0fde80fb 100644 --- a/src/include/utils/fmgrtab.h +++ b/src/include/utils/fmgrtab.h @@ -45,4 +45,7 @@ extern const Oid fmgr_last_builtin_oid; /* highest function OID in table */ #define InvalidOidBuiltinMapping PG_UINT16_MAX extern const uint16 fmgr_builtin_oid_index[]; +typedef const FmgrBuiltin *(*fmgr_isbuiltin_hook_type) (Oid fn_oid); +extern PGDLLIMPORT fmgr_isbuiltin_hook_type fmgr_isbuiltin_hook; + #endif /* FMGRTAB_H */ From bda6e5c3b1e7de1604990d6c047667ed7cbaa81d Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Thu, 16 May 2024 09:24:37 +0800 Subject: [PATCH 097/152] Fix policy relcache changed issue. We should not change anything in relcache, ex: policy which leads to wrong segemnts if warehouse size is altered. create table rt1(id int) distributed replicated; create table t1(id int); postgres=# explain (costs off) select * from t1 natural join rt1; QUERY PLAN ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (rt1.id = t1.id) -> Seq Scan on rt1 -> Hash -> Seq Scan on t1 Optimizer: Postgres query optimizer (7 rows) alter warehouse test set warehouse_size 4; postgres=# explain (costs off) select * from t1 natural join rt1; QUERY PLAN ------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (rt1.id = t1.id) -> Redistribute Motion 1:3 (slice2; segments: 1) Hash Key: rt1.id -> Seq Scan on rt1 -> Hash -> Redistribute Motion 4:3 (slice3; segments: 4) Hash Key: t1.id -> Seq Scan on t1 Optimizer: Postgres query optimizer (11 rows) The rt1 is replicated, but we have a Motion(4:3). After fix: postgres=# alter warehouse test set warehouse_size 4; ALTER WAREHOUSE postgres=# explain (costs off) select * from t1 natural join rt1; QUERY PLAN ------------------------------------------ Gather Motion 4:1 (slice1; segments: 4) -> Hash Join Hash Cond: (rt1.id = t1.id) -> Seq Scan on rt1 -> Hash -> Seq Scan on t1 Optimizer: Postgres query optimizer (7 rows) --- src/backend/cdb/cdbcat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/cdb/cdbcat.c b/src/backend/cdb/cdbcat.c index 7665b0936ca..61dd8ddf0e8 100644 --- a/src/backend/cdb/cdbcat.c +++ b/src/backend/cdb/cdbcat.c @@ -428,8 +428,9 @@ GpPolicyFetch(Oid tbloid) { case SYM_POLICYTYPE_REPLICATED: if (policyform->numsegments == 0) - policyform->numsegments = getgpsegmentCount(); - policy = createReplicatedGpPolicy(policyform->numsegments); + policy = createReplicatedGpPolicy(getgpsegmentCount()); + else + policy = createReplicatedGpPolicy(policyform->numsegments); break; case SYM_POLICYTYPE_PARTITIONED: /* From 678deb19ad41e09717aa679f29f11539b265598b Mon Sep 17 00:00:00 2001 From: lizhaohan Date: Wed, 15 May 2024 09:29:48 +0800 Subject: [PATCH 098/152] New tool hd_ctl to create/drop distributed warehouse Demo: 1. make -j$(nproc) && make -j$(nproc) install 2. make create-clouddemo-cluster 3. scp -r $GPHOME :~/install 4. cd gpMgmt/bin 5. edit SEGMENT_HOST and WAREHOUSE.NAME in hd_ctl.config.yaml 6. hd_ctl warehouse create 7. config remote access and run some queries 8. hd_crl warehouse drop --- gpMgmt/bin/Makefile | 4 +- gpMgmt/bin/gpstop | 11 +- gpMgmt/bin/hd_ctl | 251 ++++++++++++++++++++++++++++++++++ gpMgmt/bin/hd_ctl.config.yaml | 9 ++ 4 files changed, 265 insertions(+), 10 deletions(-) create mode 100755 gpMgmt/bin/hd_ctl create mode 100644 gpMgmt/bin/hd_ctl.config.yaml diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile index 3d9ab50ad79..e7648b1bafd 100644 --- a/gpMgmt/bin/Makefile +++ b/gpMgmt/bin/Makefile @@ -17,7 +17,7 @@ PROGRAMS= analyzedb gpactivatestandby gpaddmirrors gpcheckcat gpcheckperf \ gpinitsystem gpload gpload.py gplogfilter gpmovemirrors \ gppkg gprecoverseg gpreload gpsync gpsd gpssh gpssh-exkeys gpstart \ gpstate gpstop minirepro gpmemwatcher gpmemreport gpdemo gpdirtableload \ - gpcheckresgroupv2impl + gpcheckresgroupv2impl hd_ctl GPDEMO_LIBS = gpdemo-defaults.sh lalshell generate_certs.sh demo_cluster.sh \ probe_config.sh README @@ -197,5 +197,5 @@ clean distclean: gpcheckperfc gpcheckresgroupimplc gpchecksubnetcfgc gpconfigc \ gpdeletesystemc gpexpandc gpshrinkc gpinitstandbyc gplogfilterc gpmovemirrorsc \ gppkgc gprecoversegc gpreloadc gpscpc gpsyncc gpsdc gpssh-exkeysc gpsshc \ - gpstartc gpstatec gpstopc minireproc gpcheckresgroupv2implc + gpstartc gpstatec gpstopc minireproc gpcheckresgroupv2implc gpdemoc hd_ctlc rm -f gpconfig_modules/gucs_disallowed_in_file.txt diff --git a/gpMgmt/bin/gpstop b/gpMgmt/bin/gpstop index 5f89bb90b02..5847180e599 100755 --- a/gpMgmt/bin/gpstop +++ b/gpMgmt/bin/gpstop @@ -300,6 +300,9 @@ class GpStop: self.gphome = gp.get_gphome() if self.coordinator_datadir is None: self.coordinator_datadir = gp.get_coordinatordatadir() + if is_external_fts: + if self.fts_hosts is None: + self.fts_hosts = self.coordinator_datadir + '/config' + '/fts_host' self.user = unix.getUserName() gp.check_permissions(self.user) self._read_postgresqlconf() @@ -956,14 +959,6 @@ class GpStop: if options.timeout < SEGMENT_STOP_TIMEOUT_DEFAULT and not options.skipvalidation: raise ProgramArgumentValidationException( "Invalid timeout value. Must be greater than %s seconds." % SEGMENT_STOP_TIMEOUT_DEFAULT) - - if is_external_fts: - if options.fts_hosts is None: - coordinator_data_directory = os.getenv('COORDINATOR_DATA_DIRECTORY') - if coordinator_data_directory is None: - coordinator_data_directory = options.coordinatorDataDirectory - - options.fts_hosts = coordinator_data_directory + '/config' + '/fts_host' if args: raise ProgramArgumentValidationException( diff --git a/gpMgmt/bin/hd_ctl b/gpMgmt/bin/hd_ctl new file mode 100755 index 00000000000..e57fce6e1ad --- /dev/null +++ b/gpMgmt/bin/hd_ctl @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +import argparse +import shlex +import subprocess +import textwrap +import time + +import yaml + + +class HdConfig: + def __init__(self, file): + self.file_name = file.name + self.__config: dict = yaml.safe_load(file) + + def __getitem__(self, item): + if item not in self.__config: + raise EnvironmentError(f"required config {item} not found in {self.file_name}") + return self.__config[item] + + def __contains__(self, item): + return item in self.__config + + def get(self, item, default): + return self.__config.get(item, default) + + +Config: HdConfig +PaddingToLength = 0 +DefaultFormatterClass = argparse.ArgumentDefaultsHelpFormatter +MaxPgIsReadyWaitMs = 2000 +Debug_MockSSH = False + + +class SSHCommandException(Exception): + def __init__(self, msg: str, retcode: int, conn: str): + self.retcode = retcode + self.conn = conn + super().__init__(f"[{conn}] {msg}") + + @classmethod + def from_exec(cls, cmd: str, stderr: str, retcode: int, conn: str): + return cls(f"`{cmd}` failed with {retcode}: {stderr}", retcode, conn) + + +class HdConnection: + def __init__(self, conn_str: str): + host = conn_str + if conn_str.find('@') >= 0: + host = conn_str.split('@')[1] + self.host = host + self.__hostname = "" + self.conn_str = conn_str + self.padding_length = PaddingToLength - len(conn_str) + self.GP_HOME, _ = self.exec("echo $GPHOME") + if not self.GP_HOME and not Debug_MockSSH: + raise EnvironmentError(f"$GPHOME is invalid on {self.conn_str}") + + def hostname(self) -> str: + """Get hostname lazily""" + if not self.__hostname: + self.__hostname = self.exec("echo `hostname`")[0] + return self.__hostname + + def exec(self, cmd: str, newline=True) -> (str, int): + """Run cmd on this peer + :param cmd: command to run + :param newline: Whether to end the log line with a newline + """ + if not Debug_MockSSH: + print(f"[{self.conn_str}]{' ' * self.padding_length} {cmd}", end='\n' if newline else '') + run_before = Config.get('RUN_BEFORE_SSH_COMMAND', None) + cmd = f'{run_before}; {cmd}' if run_before else cmd + cmd = f"ssh {self.conn_str} {shlex.quote(cmd)}" + if Debug_MockSSH: + print(cmd) + return '', 0 + res = subprocess.run(cmd, check=False, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = res.returncode + + def drain(s): + return s.decode().strip('\n') + + if ret == 0: + return drain(res.stdout), ret + else: + raise SSHCommandException.from_exec(cmd, drain(res.stderr), ret, self.conn_str) + + def exec_gphome_bin(self, cmd: str) -> (str, int): + return self.exec(f"{self.GP_HOME}/bin/{cmd}") + + def write(self, content: str, dest: str): + self.exec(textwrap.dedent("""\ + cat < "{}" + {} + EOF""").format(dest, content)) + + +class HdCoordinator(HdConnection): + def psql(self, sql: str, inline=False): + out, _ = self.exec(f'psql -c "{sql}" postgres', newline=not inline) + if inline: + print(' -> ', end='') + print(out) + + def dump_str_guc_as_conf_lines(self, *guc_names: str): + """Dump string GUC values as postgresql.conf lines. Warning: GUC values are treated as strings""" + values = [self.exec(f'psql -tc "show {guc_name}" postgres')[0].strip() for guc_name in guc_names] + return '\n'.join([f"{k} = '{v}'" for k, v in zip(guc_names, values)]) + + def __init__(self, conn_str: str): + super().__init__(conn_str) + + +class HdSegment(HdConnection): + def create(self): + self.exec(f'mkdir -p "{self.datadir}"') + self.exec_gphome_bin(f'initdb -D "{self.datadir}" -k -E utf-8') + self.write(textwrap.dedent(f"""\ + include_if_exists = '$HOME/data/global.conf' + gp_contentid = {self.contentid} + gp_dbid = {self.dbid} + port = {self.port} + listen_addresses = '*'"""), f"{self.datadir}/postgresql.conf") + global_conf_lines = self.coordinator.dump_str_guc_as_conf_lines("unionstore.tenant_id", + "unionstore.timeline_id", + "unionstore.safekeepers", + "unionstore.pageserver_connstring") + self.write(global_conf_lines, "$HOME/data/global.conf") + pg_ctl_cmd = f'pg_ctl -D "{self.datadir}" -l "{self.startup_log}" -w -t 600 -o "-p {self.port} -c gp_role=execute" start' + try: + self.exec_gphome_bin(pg_ctl_cmd) + except SSHCommandException as e: + cat_log_cmd = f'cat {self.startup_log}' + print(f"\nCommand `{pg_ctl_cmd}` failed. Automatically fetch log with {cat_log_cmd}...") + print(self.exec(cat_log_cmd)[0]) + raise e + # probe + ret, total_wait_ms, attempts = 1, 0, 0 + while total_wait_ms < MaxPgIsReadyWaitMs: + try: + _, ret = self.exec_gphome_bin(f"pg_isready -d postgres -p {self.port}") + except SSHCommandException as e: + ret = e.retcode + if ret == 0: + break + wait_ms = 100 + time.sleep(wait_ms / 1000) + total_wait_ms += wait_ms + attempts += 1 + if ret != 0: + raise SSHCommandException(f"pg_isready still returns {ret} after {attempts} attempts", ret, self.conn_str) + + def drop(self): + try: + self.exec_gphome_bin(f'pg_ctl -D "{self.datadir}" -l "{self.startup_log}" -w -t 120 -m i stop') + except SSHCommandException as e: + print(e) + self.exec(f'rm -rf "{self.datadir}"') + + def __init__(self, conn_str: str, dbid: int, contentid: int, port: int, datadir: str, coordinator: HdCoordinator): + super().__init__(conn_str) + self.dbid = dbid + self.contentid = contentid + self.port = port + self.datadir = self.exec(f"realpath {datadir}")[0] + self.startup_log = f"{self.datadir}/log/startup.log" + self.coordinator = coordinator + + +class HdWarehouse: + def on_all_segments(self, func_name: str): + """Helper function to call func_name on all segments""" + for p in self.segments: + getattr(p, func_name)() + + def create(self): + self.on_all_segments('create') + config_array_content = ','.join([f"'{p.port},{p.hostname()},{p.host},{p.datadir}'" for p in self.segments]) + self.coordinator.psql(f"select pg_catalog.create_warehouse_callback('{self.name}', 'SUCCESS', array[{config_array_content}])") + + def drop(self): + self.on_all_segments('drop') + self.coordinator.psql(f"select pg_catalog.drop_warehouse_callback('{self.name}')") + + def recreate(self): + print("\nDropping...") + self.drop() + print("\nCreating...") + self.create() + + def list(self): + self.coordinator.psql("select * from gp_warehouse") + self.coordinator.psql("select * from gp_segment_configuration") + + def __init__(self, name: str, conn_strs: [str], base_port: int, coordinator: HdCoordinator): + self.name = name + self.coordinator = coordinator + self.segments = [] + for i, conn_str in enumerate(conn_strs): + port = base_port + i + self.segments.append(HdSegment(conn_str, i + 2, i, port, f"$HOME/data/primary{port}", coordinator)) + + +class ArgParseShim: + """Backports some ArgParse functionalities to older version of Python 3""" + + def __init__(self): + self.subparsers = [] + + def add_required_subparsers(self, p): + """Backports p.add_subparsers(required=True) which is added in Python 3.7""" + res = p.add_subparsers() + self.subparsers.append(res) + return res + + def epilogue(self): + """MUST BE CALLED after parsing has been configured""" + for sub in self.subparsers: + keys = sub.choices.keys() + assert len(keys) > 0, "required subparsers must have sub parser" + sub.metavar = '{' + ','.join(keys) + '}' + sub.required = True + + +if __name__ == '__main__': + shim = ArgParseShim() + parser = argparse.ArgumentParser(formatter_class=DefaultFormatterClass) + subs = shim.add_required_subparsers(parser) + parser_warehouse = subs.add_parser("warehouse", help="Sub-commands to manage warehouse.", formatter_class=DefaultFormatterClass) + parser_warehouse.add_argument("--config", "-c", help="Path of the configuration file.", default="./hd_ctl.config.yaml") + warehouse_subs = shim.add_required_subparsers(parser_warehouse) + create_warehouse = warehouse_subs.add_parser("create") + drop_warehouse = warehouse_subs.add_parser("drop") + recreate_warehouse = warehouse_subs.add_parser("recreate") + shim.epilogue() + parsed = parser.parse_args() # parse before initialization so that syntax errors can be reported early + with open(parsed.config, 'r') as f: + Config = HdConfig(f) + + segments = Config['SEGMENT_HOST'] + PaddingToLength = max(len(x) for x in segments) + qd = HdCoordinator(Config['COORDINATOR_HOST']) + warehouse = HdWarehouse(Config['WAREHOUSE']['NAME'], Config['SEGMENT_HOST'], Config['PORT_BASE'], qd) + + create_warehouse.set_defaults(func=warehouse.create) + drop_warehouse.set_defaults(func=warehouse.drop) + recreate_warehouse.set_defaults(func=warehouse.recreate) + parser.parse_args().func() # parse a second time which is guaranteed to succeed + + warehouse.list() diff --git a/gpMgmt/bin/hd_ctl.config.yaml b/gpMgmt/bin/hd_ctl.config.yaml new file mode 100644 index 00000000000..4d4cfb2b8ba --- /dev/null +++ b/gpMgmt/bin/hd_ctl.config.yaml @@ -0,0 +1,9 @@ +COORDINATOR_HOST: 127.0.0.1 +SEGMENT_HOST: + - gpadmin@127.0.0.1 + - 127.0.0.1 + - 127.0.0.1 +PORT_BASE: 5433 +RUN_BEFORE_SSH_COMMAND: source ~/install/hashdata_cloud/greenplum_path.sh +WAREHOUSE: + NAME: test \ No newline at end of file From 8121358900d321a67aa3a3e99426ac28387189da Mon Sep 17 00:00:00 2001 From: wangweinan Date: Mon, 27 May 2024 12:27:09 +0800 Subject: [PATCH 099/152] Support Insert for AutoPartition table 1. metadata stored in pg_class catalog 2. In pg upper hook pattern match auto partition Insert DML 3. AutoPartition CustomScan framework for AP table Insert 4. declare a global variable, rewriteContext, force ALTER TABLE does not dispatch. Internal used it to create sub-partition tables 5. add a new pg result type, MT_AP_META to report the AP table Insert results 6. add three Expr, APHashExpr/APListExpr/APRangeExpr, to record auto partition DDL template This MR, leaves a lot of FIXME which needs to be handled further 1. autopart insert only supports one-level hash partition, which needs to support multi-level and list/range partition 2. aux_modifiytable does not support materialize view 3. ALTER TABLE dispatch strategy needs a clear policy 4. manifest result collection and merge need to separate with cdb result collection 5. `COPY FROM` need to support auto part design doc: https://hashdata.feishu.cn/docx/UHfJd0EJHoj7M0xa2F6cfQrQn1e --- src/backend/catalog/heap.c | 60 +++++++++++++++++++++++++ src/backend/cdb/cdboidsync.c | 4 ++ src/backend/commands/tablecmds.c | 67 +++++++++++++++++++++++++--- src/backend/executor/execMain.c | 14 ++++-- src/backend/nodes/copyfuncs.c | 46 ++++++++++++++++++- src/backend/nodes/equalfuncs.c | 2 +- src/backend/nodes/makefuncs.c | 31 +++++++++++++ src/backend/nodes/nodeFuncs.c | 48 ++++++++++++++++++++ src/backend/nodes/outfast.c | 11 +++++ src/backend/nodes/outfuncs.c | 41 +++++++++++++++-- src/backend/nodes/readfast.c | 11 +++++ src/backend/nodes/readfuncs.c | 42 +++++++++++++++++ src/backend/nodes/readfuncs_common.c | 3 -- src/backend/parser/gram.y | 45 +++++++------------ src/include/catalog/heap.h | 3 ++ src/include/catalog/pg_class.h | 5 +++ src/include/nodes/makefuncs.h | 6 +++ src/include/nodes/nodes.h | 8 ++++ src/include/nodes/parsenodes.h | 2 +- src/include/nodes/primnodes.h | 22 +++++++++ 20 files changed, 424 insertions(+), 47 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index e911d0cc106..78081f5148f 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1340,6 +1340,9 @@ InsertPgClassTuple(Relation pg_class_desc, /* relpartbound is set by updating this tuple, if necessary */ nulls[Anum_pg_class_relpartbound - 1] = true; +#ifdef SERVERLESS + nulls[Anum_pg_class_relpartspec - 1] = true; +#endif /* SERVERLESS */ tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls); @@ -4527,3 +4530,60 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound) CacheInvalidateRelcache(parent); } + +#ifdef SERVERLESS +/* + * TODO: FIXME use PartitionSpec is fine but the serialize format contain so + * many redundant infor which is not related to parititon dispatch. we need to + * define a auto partition private Expr to record it. + */ +void +StorePartitionSpec(Relation rel, List *apExprs) +{ + Relation classRel; + HeapTuple tuple, + newtuple; + Datum new_val[Natts_pg_class]; + bool new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + + /* Update pg_class tuple */ + classRel = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(rel)); + +#ifdef USE_ASSERT_CHECKING + { + Form_pg_class classForm; + bool isnull; + + classForm = (Form_pg_class) GETSTRUCT(tuple); + Assert(!classForm->relispartition); + (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound, + &isnull); + Assert(isnull); + } +#endif + + /* Fill in relpartspec value */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartspec - 1] = CStringGetTextDatum(nodeToString(apExprs)); + new_null[Anum_pg_class_relpartspec - 1] = false; + new_repl[Anum_pg_class_relpartspec - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple); + heap_freetuple(newtuple); + + table_close(classRel, RowExclusiveLock); + /* Make these updates visible */ + CommandCounterIncrement(); + + CacheInvalidateRelcache(rel); +} +#endif diff --git a/src/backend/cdb/cdboidsync.c b/src/backend/cdb/cdboidsync.c index fede3512cda..73e32fdaed0 100644 --- a/src/backend/cdb/cdboidsync.c +++ b/src/backend/cdb/cdboidsync.c @@ -120,10 +120,14 @@ pg_highest_oid(PG_FUNCTION_ARGS pg_attribute_unused()) void cdb_sync_oid_to_segments(void) { +#ifdef SERVERLESS + /* do not collect oid from qe for serverless */ +#else if (Gp_role == GP_ROLE_DISPATCH && IsNormalProcessingMode()) { Oid max_oid_from_primaries = get_max_oid_from_segDBs(); AdvanceObjectId(max_oid_from_primaries + 1); } +#endif } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index c24210986e7..dbd08076396 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -598,9 +598,54 @@ static void clear_rel_opts(Relation rel); #ifdef SERVERLESS static void maintenance_relreuseattrs(Relation rel); static void maintenance_relreuseattrs_guts(Relation rel); + +static List * +validateAndMergeAPExprs(CreateStmt *stmt) +{ + PartitionSpec *partspec = stmt->partspec; + GpPolicy *policy = stmt->intoPolicy; + List *apExprs = NIL; + if (partspec->apExpr) + { + if (GpPolicyIsEntry(policy)) + ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("AutoPartition do not support entry policy"))); + + for (PartitionSpec *curSpec = partspec; + curSpec; + curSpec = partspec->subPartSpec) + { + if (pg_strcasecmp(curSpec->strategy, "hash") == 0 && + !IsA(curSpec->apExpr, APHashExpr)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Hash AutoPartition grammar error"))); + } + else if (pg_strcasecmp(partspec->strategy, "list") == 0 && + !IsA(curSpec->apExpr, APListExpr)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("List AutoPartition grammar error"))); + + } + else if (pg_strcasecmp(partspec->strategy, "range") == 0 && + !IsA(curSpec->apExpr, APRangeExpr)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Range AutoPartition grammar error"))); + } + + apExprs = lappend(apExprs, curSpec->apExpr); + } + } + return apExprs; +} #endif -/* ---------------------------------------------------------------- + /* ---------------------------------------------------------------- * DefineRelation * Creates a new relation. * @@ -623,10 +668,10 @@ static void maintenance_relreuseattrs_guts(Relation rel); * responsibility to dispatch. * ---------------------------------------------------------------- */ -ObjectAddress -DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, - ObjectAddress *typaddress, const char *queryString, - bool dispatch, bool useChangedOpts, GpPolicy *intoPolicy) + ObjectAddress + DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, + ObjectAddress *typaddress, const char *queryString, + bool dispatch, bool useChangedOpts, GpPolicy *intoPolicy) { char relname[NAMEDATALEN]; Oid namespaceId; @@ -1502,6 +1547,13 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, Oid partopclass[PARTITION_MAX_KEYS]; Oid partcollation[PARTITION_MAX_KEYS]; List *partexprs = NIL; +#ifdef SERVERLESS + /* + * `transformPartitionSpec` will modify partspec, so eager merge all + * auto partition expr from sub partspec + */ + List *apExprs = validateAndMergeAPExprs(stmt); +#endif pstate = make_parsestate(NULL); pstate->p_sourcetext = queryString; @@ -1531,6 +1583,11 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, partopclass, partcollation); +#ifdef SERVERLESS + /* Process and store auto partition spec, if any. */ + if (apExprs) + StorePartitionSpec(rel, apExprs); +#endif /* make it all visible */ CommandCounterIncrement(); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 972d07d4cb1..1156c80672d 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2573,12 +2573,18 @@ ExecPostprocessPlan(EState *estate) if (Gp_role == GP_ROLE_DISPATCH) { /* Fire after triggers. */ - foreach(lc, estate->es_auxmodifytables) + foreach (lc, estate->es_auxmodifytables) { + /* + * FIXME: Trigger ASTrigger in execMain is really broken pg policy. + * since customscan also do DML + */ PlanState *ps = (PlanState *) lfirst(lc); - ModifyTableState *node = castNode(ModifyTableState, ps); - - fireASTriggers(node); + if (IsA(ps, ModifyTableState)) + { + ModifyTableState *node = castNode(ModifyTableState, ps); + fireASTriggers(node); + } } return; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index fa4adbcfba9..decff9d3653 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -5971,7 +5971,7 @@ _copyPartitionSpec(const PartitionSpec *from) COPY_NODE_FIELD(gpPartDef); COPY_NODE_FIELD(subPartSpec); #ifdef SERVERLESS - COPY_NODE_FIELD(autoPartBound); + COPY_NODE_FIELD(apExpr); #endif /* SERVERLESS */ COPY_LOCATION_FIELD(location); @@ -6431,6 +6431,38 @@ _copyTupleDescNode(const TupleDescNode *from) return newnode; } +#ifdef SERVERLESS +static APHashExpr* +_copyAPHashExpr(const APHashExpr *from) +{ + APHashExpr *newnode = makeNode(APHashExpr); + COPY_SCALAR_FIELD(modulus); + + return newnode; +} + +static APListExpr* +_copyAPListExpr(const APListExpr *from) +{ + APListExpr *newnode = makeNode(APListExpr); + + return newnode; +} + +static APRangeExpr* +_copyAPRangeExpr(const APRangeExpr *from) +{ + APRangeExpr *newnode = makeNode(APRangeExpr); + COPY_SCALAR_FIELD(hasdefault); + COPY_NODE_FIELD(lower); + COPY_NODE_FIELD(upper); + COPY_NODE_FIELD(step); + + return newnode; +} + +#endif /* SERVERLESS */ + /* * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h * @@ -7627,6 +7659,18 @@ copyObjectImpl(const void *from) retval = _copyAlterWarehouseStmt(from); break; +#ifdef SERVERLESS + case T_APHashExpr: + retval = _copyAPHashExpr(from); + break; + case T_APListExpr: + retval = _copyAPListExpr(from); + break; + case T_APRangeExpr: + retval = _copyAPRangeExpr(from); + break; +#endif /* SERVERLESS */ + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from)); retval = 0; /* keep compiler quiet */ diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index b80ea4c78c6..953279f796c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -3428,7 +3428,7 @@ _equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b) COMPARE_STRING_FIELD(strategy); COMPARE_NODE_FIELD(partParams); #ifdef SERVERLESS - COMPARE_NODE_FIELD(autoPartBound); + COMPARE_NODE_FIELD(apExpr); #endif /* SERVERLESS */ COMPARE_LOCATION_FIELD(location); diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 946cdfa6717..798c0f95efc 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -834,3 +834,34 @@ makeReindexIndexInfo(Oid indexId, Oid tableId, Oid amId, bool safe) r->safe = safe; return r; } + +#ifdef SERVERLESS +Node * +makeAPHashExpr(int modulus) +{ + APHashExpr *hexpr = makeNode(APHashExpr); + hexpr->modulus = modulus; + + return (Node *) hexpr; +} + +Node * +makeAPRangeExpr(List *lower, List *upper, List *step, bool has_default) +{ + APRangeExpr *rexpr = makeNode(APRangeExpr); + rexpr->lower = lower; + rexpr->upper = upper; + rexpr->step = step; + rexpr->hasdefault = has_default; + + return (Node *) rexpr; +} + +Node * +makeAPListExpr(void) +{ + APListExpr *lexpr = makeNode(APListExpr); + + return (Node *)lexpr; +} +#endif /* SERVERLESS */ \ No newline at end of file diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 8e2b9230cb3..f184bc835e6 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -2452,6 +2452,12 @@ expression_tree_walker(Node *node, return true; } break; +#ifdef SERVERLESS + case T_APHashExpr: + case T_APListExpr: + case T_APRangeExpr: + return false; +#endif /* SERVERLESS */ default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -3560,6 +3566,48 @@ expression_tree_mutator(Node *node, return (Node *) newgathermerge; } break; +#ifdef SERVERLESS + case T_APHashExpr: + { + APHashExpr *hexpr = (APHashExpr *)node; + APHashExpr *newhexpr; + FLATCOPY(newhexpr, hexpr, APHashExpr); + + return (Node *)newhexpr; + } + break; + case T_APListExpr: + { + APListExpr *lexpr = (APListExpr *)node; + APListExpr *newlexpr; + FLATCOPY(newlexpr, lexpr, APListExpr); + + return (Node *)newlexpr; + } + break; + case T_APRangeExpr: + { + APRangeExpr *rexpr = (APRangeExpr *)node; + APRangeExpr *newrexpr; + FLATCOPY(newrexpr, rexpr, APRangeExpr); + MUTATE(newrexpr->lower, rexpr->lower, List*); + MUTATE(newrexpr->upper, rexpr->upper, List*); + MUTATE(newrexpr->step, rexpr->step, List*); + + return (Node *)rexpr; + } + break; + case T_PartitionElem: + { + PartitionElem *partelem = (PartitionElem *)node; + PartitionElem *newpartelem; + FLATCOPY(newpartelem, partelem, PartitionElem); + MUTATE(newpartelem->expr, partelem->expr, Node*); + MUTATE(newpartelem->collation, partelem->collation, List*); + return (Node *)newpartelem; + } + break; +#endif /* SERVERLESS */ default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index 2ad119c3428..023c8d72ce6 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -1962,6 +1962,17 @@ _outNode(StringInfo str, void *obj) case T_TransferTuple: _outTranderTuple(str, obj); break; +#ifdef SERVERLESS + case T_APHashExpr: + _outAPHashExpr(str, obj); + break; + case T_APListExpr: + _outAPListExpr(str, obj); + break; + case T_APRangeExpr: + _outAPRangeExpr(str, obj); + break; +#endif /* SERVERLESS */ default: elog(ERROR, "could not serialize unrecognized node type: %d", (int) nodeTag(obj)); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c1596154b4a..38cacd89d33 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -4093,9 +4093,6 @@ _outPartitionSpec(StringInfo str, const PartitionSpec *node) WRITE_STRING_FIELD(strategy); WRITE_NODE_FIELD(partParams); -#ifdef SERVERLESS - WRITE_NODE_FIELD(autoPartBound); -#endif /* SERVERLESS */ WRITE_LOCATION_FIELD(location); } @@ -4175,6 +4172,33 @@ _outDropTaskStmt(StringInfo str, const DropTaskStmt *node) WRITE_BOOL_FIELD(missing_ok); } +#ifdef SERVERLESS +static void +_outAPHashExpr(StringInfo str, const APHashExpr *node) +{ + WRITE_NODE_TYPE("APHASHEXPR"); + + WRITE_INT_FIELD(modulus); +} + +static void +_outAPListExpr(StringInfo str, const APListExpr *node) +{ + WRITE_NODE_TYPE("APLISTEXPR"); +} + +static void +_outAPRangeExpr(StringInfo str, const APRangeExpr *node) +{ + WRITE_NODE_TYPE("APRANGEEXPR"); + + WRITE_BOOL_FIELD(hasdefault); + WRITE_NODE_FIELD(lower); + WRITE_NODE_FIELD(upper); + WRITE_NODE_FIELD(step); +} +#endif /* SERVERLESS */ + #include "outfuncs_common.c" #ifndef COMPILING_BINARY_FUNCS /* @@ -5362,6 +5386,17 @@ outNode(StringInfo str, const void *obj) case T_DropTaskStmt: _outDropTaskStmt(str, obj); break; +#ifdef SERVERLESS + case T_APHashExpr: + _outAPHashExpr(str, obj); + break; + case T_APListExpr: + _outAPListExpr(str, obj); + break; + case T_APRangeExpr: + _outAPRangeExpr(str, obj); + break; +#endif /* SERVERLESS */ default: /* diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index af8e9618c4a..d52ed04b5c3 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -2976,6 +2976,17 @@ readNodeBinary(void) case T_TransferTuple: return_value = _readTransferTuple(); break; +#ifdef SERVERLESS + case T_APHashExpr: + return_value = _readAPHashExpr(); + break; + case T_APListExpr: + return_value = _readAPListExpr(); + break; + case T_APRangeExpr: + return_value = _readAPRangeExpr(); + break; +#endif /* SERVERLESS */ default: return_value = NULL; /* keep the compiler silent */ elog(ERROR, "could not deserialize unrecognized node type: %d", diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c95c3a672c2..e8d5e07b7c6 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2911,6 +2911,40 @@ _readPartitionRangeDatum(void) READ_DONE(); } +#ifdef SERVERLESS +static APHashExpr * +_readAPHashExpr(void) +{ + READ_LOCALS(APHashExpr); + + READ_INT_FIELD(modulus); + + READ_DONE(); +} + +static APListExpr * +_readAPListExpr(void) +{ + READ_LOCALS_NO_FIELDS(APListExpr); + + READ_DONE(); +} + +static APRangeExpr * +_readAPRangeExpr(void) +{ + READ_LOCALS(APRangeExpr); + + READ_BOOL_FIELD(hasdefault); + READ_NODE_FIELD(lower); + READ_NODE_FIELD(upper); + READ_NODE_FIELD(step); + + READ_DONE(); + +} +#endif /* SERVERLESS */ + #include "readfuncs_common.c" #ifndef COMPILING_BINARY_FUNCS /* @@ -3428,6 +3462,14 @@ parseNodeString(void) return_value = _readReturnStmt(); else if (MATCHX("DROPDIRECTORYTABLESTMT")) return_value = _readDropDirectoryTableStmt(); +#ifdef SERVERLESS + else if (MATCHX("APHASHEXPR")) + return_value = _readAPHashExpr(); + else if (MATCHX("APLISTEXPR")) + return_value = _readAPListExpr(); + else if (MATCHX("APRANGEEXPR")) + return_value = _readAPRangeExpr(); +#endif /* SERVERLESS*/ else { ereport(ERROR, diff --git a/src/backend/nodes/readfuncs_common.c b/src/backend/nodes/readfuncs_common.c index 6dc49b696d2..c179171c1a2 100644 --- a/src/backend/nodes/readfuncs_common.c +++ b/src/backend/nodes/readfuncs_common.c @@ -1474,9 +1474,6 @@ _readPartitionSpec(void) READ_STRING_FIELD(strategy); READ_NODE_FIELD(partParams); -#ifdef SERVERLESS - READ_NODE_FIELD(autoPartBound); -#endif /* SERVERLESS */ READ_LOCATION_FIELD(location); READ_DONE(); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 502be11c0f9..885062f960a 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -718,7 +718,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %type part_params %type PartitionBoundSpec %type autopart_default -%type OptAutoPartitionBoundSpec +%type OptAutoPartitionBoundSpec %type hash_partbound %type hash_partbound_elem @@ -4111,16 +4111,7 @@ OptAutoPartitionBoundSpec: AUTO BY '(' NonReservedWord Iconst ')' { /* HASH partition */ - PartitionBoundSpec *n = makeNode(PartitionBoundSpec); - - n->strategy = PARTITION_STRATEGY_HASH; - n->modulus = n->remainder = -1; - - if (strcmp($4, "modulus") == 0) - { - n->modulus = $5; - } - else + if (strcmp($4, "modulus")) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -4128,37 +4119,31 @@ OptAutoPartitionBoundSpec: $4), parser_errposition(@4))); } - $$ = n; + $$ = makeAPHashExpr($5); } | AUTO BY ENUM_P { /* LIST partition */ - PartitionBoundSpec *n = makeNode(PartitionBoundSpec); - n->strategy = PARTITION_STRATEGY_LIST; ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("auto partition do not support list yet"))); - $$ = n; + $$ = makeAPListExpr(); } | AUTO START '(' expr_list ')' EVERY '(' expr_list ')' autopart_default { /* Open Range partition */ - PartitionBoundSpec *n = makeNode(PartitionBoundSpec); - n->strategy = PARTITION_STRATEGY_RANGE; ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("auto partition do not support open space range yet"))); - $$ = n; + $$ = makeAPRangeExpr($4, NULL, $8, $10); } | AUTO START '(' expr_list ')' END_P '(' expr_list ')' EVERY '(' expr_list ')' autopart_default { /* Close Range partition with default */ - PartitionBoundSpec *n = makeNode(PartitionBoundSpec); - n->strategy = PARTITION_STRATEGY_RANGE; ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("auto partition do not support close space range yet"))); - $$ = n; + $$ = makeAPRangeExpr($4, $8, $12, $14); } | { $$ = NULL; } ; @@ -6073,18 +6058,15 @@ OptFirstPartitionSpec: PartitionSpec opt_list_subparts OptTabPartitionSpec #ifdef SERVERLESS if ($1->subPartSpec) { - bool error = ($1->autoPartBound != NULL); + bool error = ($1->apExpr != NULL); for (PartitionSpec *current = $1; current; current = current->subPartSpec) - { - error |= ($1->autoPartBound != NULL); - } + error |= ($1->apExpr != NULL); if (error) - { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("auto partition do not support multi level partition yet"))); - } + } #endif /* SEVERLESS */ /* @@ -6151,7 +6133,7 @@ PartitionSpec: PARTITION BY ColId '(' part_params ')' OptAutoPartitionBoundSpec n->partParams = $5; n->location = @1; #ifdef SERVERLESS - n->autoPartBound = $7; + n->apExpr = (Expr *)$7; #endif /* SERVERLESS */ $$ = n; @@ -6730,7 +6712,12 @@ TabSubPartition: $$ = $1; } - | TabSubPartitionBy OptAutoPartitionBoundSpec { $$ = $1; } + | TabSubPartitionBy OptAutoPartitionBoundSpec + { + PartitionSpec *n = (PartitionSpec *) $1; + n->apExpr = (Expr *)$2; + $$ = $1; + } | TabSubPartitionBy TabSubPartition { PartitionSpec *n = (PartitionSpec *) $1; diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 0364cb9d981..57343062391 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -186,6 +186,9 @@ extern void StorePartitionKey(Relation rel, extern void RemovePartitionKeyByRelId(Oid relid); extern void StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound); +#ifdef SERVERLESS +extern void StorePartitionSpec(Relation rel, List *apExprs); +#endif /* MPP-6929: metadata tracking */ extern void MetaTrackAddObject(Oid classid, diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 5cafdfc66e9..dba3307c99c 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -153,6 +153,11 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* partition bound node tree */ pg_node_tree relpartbound BKI_DEFAULT(_null_); + +#ifdef SERVERLESS + /* cbdb auto paritition spec */ + pg_node_tree relpartspec BKI_DEFAULT(_null_); +#endif /* SERVERLESS */ #endif } FormData_pg_class; diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 3e4bbde510d..ecf77a2e4ad 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -108,4 +108,10 @@ extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_ extern ReindexIndexInfo *makeReindexIndexInfo(Oid indexId, Oid tableId, Oid amId, bool safe); +#ifdef SERVERLESS +extern Node *makeAPHashExpr(int modulus); +extern Node *makeAPRangeExpr(List *lower, List *upper, List *step, bool has_default); +extern Node *makeAPListExpr(void); +#endif /* SERVERLESS */ + #endif /* MAKEFUNC_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 87598d481ea..1df98bc4e0a 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -688,6 +688,14 @@ typedef enum NodeTag T_EphemeralNamedRelationInfo, /* utils/queryenvironment.h */ T_SystemTableTransferNode, T_TransferTuple, + T_FileFragment, + T_FileScanTask, + T_ExternalTableMetadata, +#ifdef SERVERLESS + T_APListExpr, + T_APRangeExpr, + T_APHashExpr, +#endif /* SERVERLESS */ } NodeTag; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index c58062d8275..96b42d8ff0d 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -894,7 +894,7 @@ typedef struct PartitionSpec struct GpPartitionDefinition *gpPartDef; struct PartitionSpec *subPartSpec; /* subpartition specification */ #ifdef SERVERLESS - PartitionBoundSpec *autoPartBound; /* is autopartition if not null*/ + Expr *apExpr; #endif int location; /* token location, or -1 if unknown */ } PartitionSpec; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 91def3cd710..eadb082fb39 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1754,4 +1754,26 @@ typedef struct DMLActionExpr Expr xpr; } DMLActionExpr; +#ifdef SERVERLESS +typedef struct APListExpr +{ + Expr xpr; +} APListExpr; + +typedef struct APHashExpr +{ + Expr xpr; + int modulus; +} APHashExpr; + +typedef struct APRangeExpr +{ + Expr xpr; + bool hasdefault; + List *lower; /* List of PartitionRangeDatums */ + List *upper; /* List of PartitionRangeDatums */ + List *step; /* every specification */ +} APRangeExpr; +#endif /* SERVERLESS */ + #endif /* PRIMNODES_H */ From d5c74b16f41a4a24dd7a40a36069b3ffc2ca29db Mon Sep 17 00:00:00 2001 From: liushengsong Date: Mon, 27 May 2024 15:15:14 +0800 Subject: [PATCH 100/152] concurrent_insert For most query, we dispatch manifest from QD to QE in hashdata_CdbDispatchPlan. For utility statement, we collect related relid and dispatch manifest from QD to QE in SerializeTxnState. --- src/backend/access/transam/xact.c | 6 ++++++ src/backend/catalog/main_manifest.c | 20 ++++++++++++++++++++ src/backend/tcop/dest.c | 5 +++++ src/include/access/xact.h | 4 ++++ src/include/catalog/main_manifest.h | 1 + src/include/tcop/dest.h | 3 +++ 6 files changed, 39 insertions(+) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index c252269e45e..6a7e00a02f4 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -261,6 +261,7 @@ typedef TransactionStateData *TransactionState; static int fastNodeCount; static TransactionState previousFastLink; + /* * Serialized representation used to transmit transaction state to parallel * workers through shared memory. @@ -2767,6 +2768,11 @@ StartTransaction(void) DtxContextToString(DistributedTransactionContext), IsoLevelAsUpperString(XactIsoLevel), XactReadOnly, LocalDistribXact_DisplayString(MyProc->pgprocno)))); +#ifdef SERVERLESS + CallXactCallbacks(s->blockState == TBLOCK_PARALLEL_INPROGRESS ? + XACT_EVENT_PARALLEL_BEGIN : XACT_EVENT_BEGIN); +#endif + } /* diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index ffede19a7c4..7aea277a47f 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -112,4 +112,24 @@ UpdateManifestRecord(RelFileNodeId relfilenode, text *path) systable_endscan(scan); heap_close(rel, NoLock); +} + +void +DeleteManifestCatalog(RelFileNodeId relnode) +{ + Relation entrance_rel = heap_open(ManifestRelationId, AccessExclusiveLock); + SysScanDesc scan; + HeapTuple tuple; + + ScanKeyData key; + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, + UInt64GetDatum(relnode)); + scan = systable_beginscan(entrance_rel, InvalidOid, false, NULL, 1, &key); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + CatalogTupleDelete(entrance_rel, &tuple->t_self); + + systable_endscan(scan); + table_close(entrance_rel, AccessExclusiveLock); } \ No newline at end of file diff --git a/src/backend/tcop/dest.c b/src/backend/tcop/dest.c index a3bd5ae0434..1be4152ccfa 100644 --- a/src/backend/tcop/dest.c +++ b/src/backend/tcop/dest.c @@ -45,6 +45,8 @@ #include "cdb/ml_ipc.h" #include "utils/vmem_tracker.h" +EndCommand_hook_type EndCommand_hook = NULL; + /* ---------------- * dummy DestReceiver functions * ---------------- @@ -174,6 +176,9 @@ EndCommand(const QueryCompletion *qc, CommandDest dest, bool force_undecorated_o CommandTag tag; const char *tagname; + if(EndCommand_hook) + EndCommand_hook(); + switch (dest) { case DestRemote: diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 93a56c58abd..b587211d8e9 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -126,6 +126,10 @@ extern int MyXactFlags; */ typedef enum { +#ifdef SERVERLESS + XACT_EVENT_BEGIN, + XACT_EVENT_PARALLEL_BEGIN, +#endif XACT_EVENT_COMMIT, XACT_EVENT_PARALLEL_COMMIT, XACT_EVENT_ABORT, diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index fdcda6ec7f1..a7913fa56c8 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -33,5 +33,6 @@ typedef FormData_main_manifest *Form_main_manifest; extern void InsertManifestRecord(Oid relid, RelFileNodeId relnode, text* path); extern void RemoveManifestRecord(RelFileNodeId relnode); extern void UpdateManifestRecord(RelFileNodeId relnode, text* path); +extern void DeleteManifestCatalog(RelFileNodeId relnode); #endif /* MAIN_MANIFEST.h */ diff --git a/src/include/tcop/dest.h b/src/include/tcop/dest.h index 8ed0c49b0b8..4a315907b50 100644 --- a/src/include/tcop/dest.h +++ b/src/include/tcop/dest.h @@ -149,4 +149,7 @@ extern void ReadyForQuery(CommandDest dest); extern void sendQEDetails(void); +typedef void (*EndCommand_hook_type) (); +extern PGDLLIMPORT EndCommand_hook_type EndCommand_hook; + #endif /* DEST_H */ From 46dd81e6a5a15ee605d40b509ad1cad66ae25bbc Mon Sep 17 00:00:00 2001 From: roseduan Date: Fri, 7 Jun 2024 10:21:42 +0800 Subject: [PATCH 101/152] Table files cleanup after Drop/Truncate/Vacuum --- src/backend/access/transam/xact.c | 3 +++ src/backend/storage/smgr/smgr.c | 3 +++ src/include/storage/smgr.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 6a7e00a02f4..e31206b45e5 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2787,6 +2787,9 @@ CommitTransaction(void) TransactionId latestXid; bool is_parallel_worker; + if (pending_relation_deletes_hook) + pending_relation_deletes_hook(); + if (cache_invalidation_async_hook) cache_invalidation_async_hook(cache_async_messages); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7f719b51d32..98fda0cc806 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -51,6 +51,9 @@ file_unlink_hook_type file_unlink_hook = NULL; smgr_get_impl_hook_type smgr_get_impl_hook = NULL; +/* Hook for plugins to get control in deletion of relation files */ +pending_relation_deletes_hook_type pending_relation_deletes_hook = NULL; + /* Hook for plugins to get control in smgr */ smgr_init_hook_type smgr_init_hook = NULL; smgr_hook_type smgr_hook = NULL; diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 4910148f9c9..268f5118841 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -248,5 +248,8 @@ extern PGDLLIMPORT file_unlink_hook_type file_unlink_hook; typedef void (*smgr_get_impl_hook_type)(const Relation rel, SMgrImpl* smgr_impl); extern PGDLLIMPORT smgr_get_impl_hook_type smgr_get_impl_hook; +typedef void (*pending_relation_deletes_hook_type) (void); +extern PGDLLIMPORT pending_relation_deletes_hook_type pending_relation_deletes_hook; + extern f_smgr smgrsw[]; #endif /* SMGR_H */ From 6622671016059eb42e448b8c19c757841e6d3334 Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 30 May 2024 15:19:53 +0800 Subject: [PATCH 102/152] Feature: optimize simple aggregate query Currently, Max/Min stats on columns is supported in PAX. When we write new data, the stats is stored in manifest. But if we delete/update the tuples, the stats will not be updated, that means the stats is not accurate to answer the query, can only be used as Max/Min filters. If we rewrite all the data after delete/update, the stats is re-collected and accurate. So we add new write policy 'COPY ON WRITE' for hashdata table: create table test (c1 int, c2 bigint) with (write_policy='cow'); For hashdata table with COPY ON WRITE write policy, the Max/Min stats is accurate, we use this stats as direct results of simple aggregate max/min on single column, such as query select max(c1), min(c2) from test; We use Custom Scan on manifest to get the results instead of Aggregate Node with SeqScan to accelerate such query. For more details, see https://hashdata.feishu.cn/docx/ExmXd5clOoUJJyxohMOca6qYnlb --- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/include/nodes/plannodes.h | 1 + 4 files changed, 4 insertions(+) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index decff9d3653..e57564baf75 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -566,6 +566,7 @@ CopyScanFields(const Scan *from, Scan *newnode) CopyPlanFields((const Plan *) from, (Plan *) newnode); COPY_SCALAR_FIELD(scanrelid); + COPY_SCALAR_FIELD(scanflags); } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 38cacd89d33..6f8114415ac 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -427,6 +427,7 @@ _outScanInfo(StringInfo str, const Scan *node) _outPlanInfo(str, (const Plan *) node); WRITE_UINT_FIELD(scanrelid); + WRITE_UINT_FIELD(scanflags); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index e8d5e07b7c6..2b496d14ecb 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1901,6 +1901,7 @@ ReadCommonScan(Scan *local_node) ReadCommonPlan(&local_node->plan); READ_UINT_FIELD(scanrelid); + READ_UINT_FIELD(scanflags); } /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index da33e1df9ea..de972f82b76 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -560,6 +560,7 @@ typedef struct Scan { Plan plan; Index scanrelid; /* relid is index into the range table */ + uint32 scanflags; /* extra scan flags */ } Scan; /* ---------------- From 45ee0c7459f4d24ccc75163ac826a74df50da0ba Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Thu, 16 May 2024 12:24:07 +0800 Subject: [PATCH 103/152] create warehouse OPTIONS CREATE WAREHOUSE wh_optn WAREHOUSE_SIZE 1 OPTIONS (key1 'value1', debug 'true', account '10'); SELECT warehouse_name, whoptions FROM gp_warehouse where warehouse_name = 'wh_optn'; warehouse_name | whoptions ----------------+------------------------------------- wh_optn | {key1=value1,debug=true,account=10} (1 row) --- src/include/catalog/gp_warehouse.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/catalog/gp_warehouse.h b/src/include/catalog/gp_warehouse.h index b32aa02509e..a687e30ff57 100644 --- a/src/include/catalog/gp_warehouse.h +++ b/src/include/catalog/gp_warehouse.h @@ -45,6 +45,9 @@ CATALOG(gp_warehouse,8690,GpWarehouseRelationId) BKI_SHARED_RELATION #ifdef CATALOG_VARLEN /* variable-length fields start here */ text status BKI_FORCE_NOT_NULL; /* status */ aclitem warehouse_acl[1]; /* access permissions */ + + /* warehouse options */ + text whoptions[1] BKI_DEFAULT(_null_); #endif } FormData_gp_warehouse; From 5fbf571bd53ed530887e2f00ac75a486ea52a80c Mon Sep 17 00:00:00 2001 From: hanwei Date: Mon, 1 Jul 2024 16:24:10 +0800 Subject: [PATCH 104/152] Feature: recongnize AM by tablespace 1. Add a reloption of tablespace:storage(currently only hashdata) 2. Add check mechmism when create tablespace 3. Add get AM interface by tablespace 4. Add set AM hook when DefineRelation 5. Fix some correnspond cases 6. Adjust CI parameter 7. Adjust content for partition table. 8. CTAS support of heap 9. set constrains support of heap. --- src/backend/access/common/reloptions.c | 12 ++- src/backend/cdb/cdbllize.c | 20 ++++- src/backend/commands/createas.c | 7 +- src/backend/commands/tablecmds.c | 14 +++- src/backend/commands/trigger.c | 29 ++++++- src/backend/executor/execMain.c | 6 +- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 13 +++ src/backend/utils/cache/plancache.c | 8 ++ src/bin/psql/describe.c | 82 +++++++++++++------ src/include/commands/tablecmds.h | 3 + src/include/commands/tablespace.h | 1 + src/include/utils/plancache.h | 4 + .../regress/expected/qp_misc_optimizer.out | 2 +- 13 files changed, 168 insertions(+), 33 deletions(-) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index fc9edaf7f7c..1775c1f88bb 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -573,6 +573,15 @@ static relopt_string stringRelOpts[] = }, 0, true, NULL, NULL, NULL }, + { + { + "storage", + "the storage type of the tablespace", + RELOPT_KIND_TABLESPACE, + AccessExclusiveLock + }, + 0, true, NULL, NULL, NULL + }, /* list terminator */ {{NULL}} }; @@ -2130,7 +2139,8 @@ tablespace_reloptions(Datum reloptions, bool validate) {"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)}, {"stage", RELOPT_TYPE_BOOL, offsetof(TableSpaceOpts, stage)}, {"server", RELOPT_TYPE_STRING, offsetof(TableSpaceOpts, serverOffset)}, - {"path", RELOPT_TYPE_STRING, offsetof(TableSpaceOpts, pathOffset)} + {"path", RELOPT_TYPE_STRING, offsetof(TableSpaceOpts, pathOffset)}, + {"storage", RELOPT_TYPE_STRING, offsetof(TableSpaceOpts, storageOffset)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 070ed51b99c..e0be199bf88 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -328,7 +328,8 @@ cdbllize_get_final_locus(PlannerInfo *root, PathTarget *target) if (intoPolicy != NULL) { - Assert(intoPolicy->ptype != POLICYTYPE_ENTRY); + Assert(GpPolicyIsEntry(query->intoPolicy) || GpPolicyIsPartitioned(query->intoPolicy) || + GpPolicyIsReplicated(query->intoPolicy)); Assert(intoPolicy->nattrs >= 0); Assert(intoPolicy->nattrs <= MaxPolicyAttributeNumber); @@ -344,6 +345,17 @@ cdbllize_get_final_locus(PlannerInfo *root, PathTarget *target) CdbPathLocus_MakeReplicated(&locus, intoPolicy->numsegments, 0); return locus; } + else if (intoPolicy->ptype == POLICYTYPE_ENTRY) + { + /* + * Query result needs to be brought back to the QD. + */ + CdbPathLocus entryLocus; + + CdbPathLocus_MakeEntry(&entryLocus); + + return entryLocus; + } } } else if (query->commandType == CMD_SELECT && query->parentStmtType == PARENTSTMTTYPE_NONE) @@ -413,7 +425,8 @@ cdbllize_adjust_top_path(PlannerInfo *root, Path *best_path, { targetPolicy = query->intoPolicy; - Assert(query->intoPolicy->ptype != POLICYTYPE_ENTRY); + Assert(GpPolicyIsEntry(query->intoPolicy) || GpPolicyIsPartitioned(query->intoPolicy) || + GpPolicyIsReplicated(query->intoPolicy)); Assert(query->intoPolicy->nattrs >= 0); Assert(query->intoPolicy->nattrs <= MaxPolicyAttributeNumber); } @@ -501,7 +514,8 @@ cdbllize_adjust_top_path(PlannerInfo *root, Path *best_path, " Make sure column(s) chosen are the optimal data distribution key to minimize skew."))); } } - Assert(targetPolicy->ptype != POLICYTYPE_ENTRY); + Assert(GpPolicyIsEntry(targetPolicy) || GpPolicyIsPartitioned(targetPolicy) || + GpPolicyIsReplicated(targetPolicy)); query->intoPolicy = targetPolicy; diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index ebcc20922e1..66cf2bc55c8 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -193,7 +193,7 @@ create_ctas_internal(List *attrList, IntoClause *into, QueryDesc *queryDesc, boo queryDesc->ddesc ? queryDesc->ddesc->useChangedAOOpts : true, queryDesc->plannedstmt->intoPolicy); - if (Gp_role == GP_ROLE_DISPATCH) + if (Gp_role == GP_ROLE_DISPATCH && queryDesc->ddesc) { queryDesc->ddesc->intoCreateStmt = create; } @@ -224,6 +224,11 @@ create_ctas_internal(List *attrList, IntoClause *into, QueryDesc *queryDesc, boo CommandCounterIncrement(); } + if (!queryDesc->ddesc) + { + GetAssignedOidsForDispatch(); + } + if (Gp_role == GP_ROLE_DISPATCH && dispatch) CdbDispatchUtilityStatement((Node *) create, DF_CANCEL_ON_ERROR | diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index dbd08076396..e2790c368d3 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -49,7 +49,6 @@ #include "catalog/pg_tablespace.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_trigger.h" -#include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" @@ -590,6 +589,7 @@ static void ATExecSetRelOptionsCheck(Relation rel, DefElem *def); ATExecSetRelOptionsCheck_hook_type ATExecSetRelOptionsCheck_hook = NULL; ATRewriteTable_hook_type ATRewriteTable_hook = NULL; +check_types_am_hook_type check_types_am_hook = NULL; static void checkATSetDistributedByStandalone(AlteredTableInfo *tab, Relation rel); static void populate_rel_col_encodings(Relation rel, List *stenc, List *withOptions, Oid newAm); @@ -838,10 +838,12 @@ validateAndMergeAPExprs(CreateStmt *stmt) */ tablespaceId = get_tablespace_oid(stmt->tablespacename, false); +#ifndef SERVERLESS if (partitioned && tablespaceId == MyDatabaseTableSpace) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot specify default tablespace for partitioned relations"))); +#endif } else if (stmt->partbound) { @@ -861,9 +863,15 @@ validateAndMergeAPExprs(CreateStmt *stmt) tablespaceId = InvalidOid; /* still nothing? use the default */ +#ifdef SERVERLESS + if (!OidIsValid(tablespaceId) && !stmt->partbound) + tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence, + partitioned); +#else if (!OidIsValid(tablespaceId)) tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence, partitioned); +#endif /* Check permissions except when using database's default */ if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace) @@ -1080,6 +1088,10 @@ validateAndMergeAPExprs(CreateStmt *stmt) * default values or CHECK constraints; we handle those below. */ descriptor = BuildDescForRelation(schema); + if (check_types_am_hook) + { + (*check_types_am_hook)(schema, accessMethodId, stmt->relation->relname, relkind); + } /* * now that we have the final list of attributes, interpret DISTRIBUTED BY diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 0b8d0e8b95b..8bd4ea95930 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -206,6 +206,22 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, else rel = table_openrv(stmt->relation, ShareRowExclusiveLock); +#ifdef SERVERLESS + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID && !stmt->row) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Triggers for statements are not yet supported"))); + } +#else + if (!stmt->row) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Triggers for statements are not yet supported"))); + } +#endif /* SERVERLESS */ + /* * Triggers must be on tables or views, and there are additional * relation-type-specific restrictions. @@ -5685,11 +5701,22 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) } if (Gp_role == GP_ROLE_DISPATCH) { + bool snapshot_set = false; + if (!ActiveSnapshotSet()) + { + snapshot_set = true; + PushActiveSnapshot(GetTransactionSnapshot()); + } CdbDispatchUtilityStatement((Node *) stmt, DF_CANCEL_ON_ERROR| - DF_NEED_TWO_PHASE, + DF_NEED_TWO_PHASE| + DF_WITH_SNAPSHOT, NIL, NULL); + if (snapshot_set) + { + PopActiveSnapshot(); + } } } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 1156c80672d..a86676df332 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -265,7 +265,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) Assert(queryDesc->plannedstmt->intoPolicy == NULL || GpPolicyIsPartitioned(queryDesc->plannedstmt->intoPolicy) || - GpPolicyIsReplicated(queryDesc->plannedstmt->intoPolicy)); + GpPolicyIsReplicated(queryDesc->plannedstmt->intoPolicy) || + GpPolicyIsEntry(queryDesc->plannedstmt->intoPolicy)); /* GPDB hook for collecting query info */ if (query_info_collect_hook) @@ -1786,7 +1787,8 @@ InitPlan(QueryDesc *queryDesc, int eflags) Assert(plannedstmt->intoPolicy == NULL || GpPolicyIsPartitioned(plannedstmt->intoPolicy) || - GpPolicyIsReplicated(plannedstmt->intoPolicy)); + GpPolicyIsReplicated(plannedstmt->intoPolicy) || + GpPolicyIsEntry(plannedstmt->intoPolicy)); if (DEBUG1 >= log_min_messages) { diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index 392f650664e..1e48721293b 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -1081,6 +1081,19 @@ CTranslatorQueryToDXL::TranslateCTASToDXL() } GPOS_ASSERT(IMDRelation::EreldistrMasterOnly != rel_distr_policy); + // fall back to the planner for queries on master-only table if they are disabled with Orca. This is due to + // the fact that catalog tables (master-only) are not analyzed often and will result in Orca producing + // inferior plans. + if (IMDRelation::EreldistrMasterOnly == rel_distr_policy) + { + GPOS_THROW_EXCEPTION( + gpdxl::ExmaDXL, // major + gpdxl::ExmiQuery2DXLUnsupportedFeature, // minor + CException:: + ExsevDebug1, // ulSeverityLevel mapped to GPDB severity level + GPOS_WSZ_LIT("Queries on master-only tables")); + } + m_context->m_has_distributed_tables = true; OID oid = 1; diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index db40040cea3..10215866b3d 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -73,6 +73,7 @@ #include "utils/rls.h" #include "utils/snapmgr.h" #include "utils/syscache.h" +#include "utils/plancache.h" #include "cdb/cdbtranscat.h" #include "cdb/cdbutil.h" @@ -85,6 +86,8 @@ ((plansource)->raw_parse_tree && \ IsA((plansource)->raw_parse_tree->stmt, TransactionStmt)) +post_parse_ctas_query_hook_type post_parse_ctas_query_hook = NULL; + /* * This is the head of the backend's list of "saved" CachedPlanSources (i.e., * those that are in long-lived storage and are examined for sinval events). @@ -712,6 +715,11 @@ RevalidateCachedQuery(CachedPlanSource *plansource, Assert(list_length(tlist) == 1); Query *query = (Query *) linitial(tlist); query->parentStmtType = PARENTSTMTTYPE_CTAS; + + if (post_parse_ctas_query_hook) + { + (*post_parse_ctas_query_hook)(query, intoClause); + } } /* Release snapshot if we got one */ diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 9b65b884950..76251610a03 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4635,15 +4635,17 @@ add_distributed_by_footer(printTableContent *const cont, const char *oid) static void add_partition_by_footer(printTableContent *const cont, const char *oid) { - PGresult *result; + PGresult *result; PQExpBufferData buf; - int nRows; - int nPartKey; + int nRows; + int nPartKey; initPQExpBuffer(&buf); /* check if current relation is root partition, if it is root partition, at least 1 row returns */ - printfPQExpBuffer(&buf, "SELECT parrelid FROM pg_catalog.pg_partition WHERE parrelid = '%s'", oid); + printfPQExpBuffer(&buf, + "SELECT parrelid FROM pg_catalog.pg_partition WHERE parrelid = '%s'", + oid); result = PSQLexec(buf.data); if (!result) @@ -4656,27 +4658,27 @@ add_partition_by_footer(printTableContent *const cont, const char *oid) { /* query partition key on the root partition */ printfPQExpBuffer(&buf, - "WITH att_arr AS (SELECT unnest(paratts) \n" - " FROM pg_catalog.pg_partition p \n" - " WHERE p.parrelid = '%s' AND p.parlevel = 0 AND p.paristemplate = false), \n" - "idx_att AS (SELECT row_number() OVER() AS idx, unnest AS att_num FROM att_arr) \n" - "SELECT attname FROM pg_catalog.pg_attribute, idx_att \n" - " WHERE attrelid='%s' AND attnum = att_num ORDER BY idx ", - oid, oid); + "WITH att_arr AS (SELECT unnest(paratts) \n" + " FROM pg_catalog.pg_partition p \n" + " WHERE p.parrelid = '%s' AND p.parlevel = 0 AND p.paristemplate = false), \n" + "idx_att AS (SELECT row_number() OVER() AS idx, unnest AS att_num FROM att_arr) \n" + "SELECT attname FROM pg_catalog.pg_attribute, idx_att \n" + " WHERE attrelid='%s' AND attnum = att_num ORDER BY idx ", + oid, oid); } else { /* query partition key on the intermediate partition */ printfPQExpBuffer(&buf, - "WITH att_arr AS (SELECT unnest(paratts) FROM pg_catalog.pg_partition p, \n" - " (SELECT parrelid, parlevel \n" - " FROM pg_catalog.pg_partition p, pg_catalog.pg_partition_rule pr \n" - " WHERE pr.parchildrelid='%s' AND p.oid = pr.paroid) AS v \n" - " WHERE p.parrelid = v.parrelid AND p.parlevel = v.parlevel+1 AND p.paristemplate = false), \n" - "idx_att AS (SELECT row_number() OVER() AS idx, unnest AS att_num FROM att_arr) \n" - "SELECT attname FROM pg_catalog.pg_attribute, idx_att \n" - " WHERE attrelid='%s' AND attnum = att_num ORDER BY idx ", - oid, oid); + "WITH att_arr AS (SELECT unnest(paratts) FROM pg_catalog.pg_partition p, \n" + " (SELECT parrelid, parlevel \n" + " FROM pg_catalog.pg_partition p, pg_catalog.pg_partition_rule pr \n" + " WHERE pr.parchildrelid='%s' AND p.oid = pr.paroid) AS v \n" + " WHERE p.parrelid = v.parrelid AND p.parlevel = v.parlevel+1 AND p.paristemplate = false), \n" + "idx_att AS (SELECT row_number() OVER() AS idx, unnest AS att_num FROM att_arr) \n" + "SELECT attname FROM pg_catalog.pg_attribute, idx_att \n" + " WHERE attrelid='%s' AND attnum = att_num ORDER BY idx ", + oid, oid); } result = PSQLexec(buf.data); @@ -4686,8 +4688,8 @@ add_partition_by_footer(printTableContent *const cont, const char *oid) nPartKey = PQntuples(result); if (nPartKey) { - char *partColName; - int i; + char *partColName; + int i; resetPQExpBuffer(&buf); appendPQExpBuffer(&buf, "Partition by: ("); @@ -4708,7 +4710,32 @@ add_partition_by_footer(printTableContent *const cont, const char *oid) PQclear(result); termPQExpBuffer(&buf); - return; /* success */ + return; /* success */ +} + +static char * +GetDefaultTablespace() +{ + PGresult *result = NULL; + PQExpBufferData buf; + + initPQExpBuffer(&buf); + printfPQExpBuffer(&buf, + "show default_tablespace;"); + result = PSQLexec(buf.data); + termPQExpBuffer(&buf); + if (!result) + { + return NULL; + } + if (PQntuples(result) > 0) + { + char *default_tablespace = pg_strdup(PQgetvalue(result, 0, 0)); + PQclear(result); + return default_tablespace; + } + PQclear(result); + return NULL; } /* @@ -4751,6 +4778,15 @@ add_tablespace_footer(printTableContent *const cont, char relkind, /* Should always be the case, but.... */ if (PQntuples(result) > 0) { + char *default_tablespace = GetDefaultTablespace(); + if (default_tablespace != NULL && strcmp(PQgetvalue(result, 0, 0), default_tablespace) == 0) + { + PQclear(result); + pg_free(default_tablespace); + termPQExpBuffer(&buf); + return; + } + pg_free(default_tablespace); if (newline) { /* Add the tablespace as a new footer */ diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 67d446a84f8..6c69fd05e32 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -146,4 +146,7 @@ extern PGDLLIMPORT ATRewriteTable_hook_type ATRewriteTable_hook; typedef void (*ATExecSetRelOptionsCheck_hook_type) (Relation rel, DefElem *def); extern PGDLLIMPORT ATExecSetRelOptionsCheck_hook_type ATExecSetRelOptionsCheck_hook; + +typedef void (*check_types_am_hook_type) (List *schema, Oid oid, const char * relname, char relkind); +extern PGDLLIMPORT check_types_am_hook_type check_types_am_hook; #endif /* TABLECMDS_H */ diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h index 24b1800f890..ecc41062c9b 100644 --- a/src/include/commands/tablespace.h +++ b/src/include/commands/tablespace.h @@ -46,6 +46,7 @@ typedef struct TableSpaceOpts bool stage; int serverOffset; int pathOffset; + int storageOffset; } TableSpaceOpts; extern Oid CreateTableSpace(CreateTableSpaceStmt *stmt); diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index 6208a5fa164..67e459d2678 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -19,6 +19,7 @@ #include "lib/ilist.h" #include "nodes/params.h" #include "nodes/primnodes.h" +#include "nodes/parsenodes.h" #include "tcop/cmdtag.h" #include "utils/queryenvironment.h" #include "utils/resowner.h" @@ -240,4 +241,7 @@ extern bool CachedPlanIsSimplyValid(CachedPlanSource *plansource, extern CachedExpression *GetCachedExpression(Node *expr); extern void FreeCachedExpression(CachedExpression *cexpr); +typedef void (*post_parse_ctas_query_hook_type) (Query *query, IntoClause *intoClause); +extern PGDLLIMPORT post_parse_ctas_query_hook_type post_parse_ctas_query_hook; + #endif /* PLANCACHE_H */ diff --git a/src/test/regress/expected/qp_misc_optimizer.out b/src/test/regress/expected/qp_misc_optimizer.out index 7369bf9cff0..7143bd72441 100644 --- a/src/test/regress/expected/qp_misc_optimizer.out +++ b/src/test/regress/expected/qp_misc_optimizer.out @@ -14,7 +14,7 @@ SET default_with_oids = false; -- CREATE LANGUAGE plpgsql; SET search_path = public, pg_catalog; -SET default_tablespace = ''; +-- SET default_tablespace = ''; -- -- Name: tset1; Type: TABLE; Schema: public; Owner: gpadmin; Tablespace: -- From 9c133782eadec0bf764c409d18eab3ca4be08cbd Mon Sep 17 00:00:00 2001 From: wangweinan Date: Fri, 31 May 2024 10:54:04 +0800 Subject: [PATCH 105/152] Move all ProcessUtility into hashdata_utility 1. handle utility cmd in separete function by qd/qe 2. merge all status flags into one UtilityResult --- src/backend/commands/tablecmds.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index e2790c368d3..dee0e387f79 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -645,7 +645,7 @@ validateAndMergeAPExprs(CreateStmt *stmt) } #endif - /* ---------------------------------------------------------------- +/* ---------------------------------------------------------------- * DefineRelation * Creates a new relation. * @@ -668,10 +668,10 @@ validateAndMergeAPExprs(CreateStmt *stmt) * responsibility to dispatch. * ---------------------------------------------------------------- */ - ObjectAddress - DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, - ObjectAddress *typaddress, const char *queryString, - bool dispatch, bool useChangedOpts, GpPolicy *intoPolicy) +ObjectAddress +DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, + ObjectAddress *typaddress, const char *queryString, + bool dispatch, bool useChangedOpts, GpPolicy *intoPolicy) { char relname[NAMEDATALEN]; Oid namespaceId; From 07bc62ad8cdcbae0e811ec2ce72c8c931e139cea Mon Sep 17 00:00:00 2001 From: wangweinan Date: Fri, 7 Jun 2024 14:52:16 +0800 Subject: [PATCH 106/152] Feature: AutoPartition support Copy...FROM... command As Insert dml, Copy...From... command for auto partition table need dynamic create relation by the command. In QD, before the command is performed, ProcessUtilityHook detects auto partition Expr and bind the Expr into CopyStmt Dispatch APExpr with CopyStmt to all QEs. ProcessUtilityHook hijack the CopyStmt once the APExpr is not null. The autopart_copy.cc is the AP table COPY FROM private implementation. Instead of TableAM insert interface, dmlContext is the data insert method the same as autopart_modify.cc. For sreh partial support(Ignore row error but do not report error log) in Auto Partition as serverless version. By contrast, freeze option we do not have a plan to support them since the pax format differs from the heap. doc: https://hashdata.feishu.cn/docx/UHfJd0EJHoj7M0xa2F6cfQrQn1e --- src/backend/commands/copyfrom.c | 22 ++++++++++++++++++++++ src/backend/nodes/copyfuncs.c | 3 +++ src/backend/nodes/equalfuncs.c | 3 +++ src/backend/nodes/outfuncs_common.c | 4 ++++ src/backend/nodes/readfast.c | 4 ++++ src/include/commands/copyfrom_internal.h | 5 +++++ src/include/nodes/parsenodes.h | 5 ++++- 7 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index da87cf869e6..d04b8dc1088 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -4365,3 +4365,25 @@ GetTargetSeg(GpDistributionData *distData, TupleTableSlot *slot) return target_seg; } + +#ifdef SERVERLESS +void +CopyInitDataParserWrapper(CopyFromState cstate) +{ + CopyInitDataParser(cstate); +} + +bool +NextCopyFromExecuteWrapper(CopyFromState cstate, ExprContext *econtext, + Datum *values, bool *nulls, bool is_directory_table) +{ + return NextCopyFromExecute(cstate, econtext, values, nulls, + is_directory_table); +} + +void +HandleCopyErrorWrapper(CopyFromState cstate) +{ + HandleCopyError(cstate); +} +#endif /* SERVERLESS */ \ No newline at end of file diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index e57564baf75..a8592693436 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4200,6 +4200,9 @@ _copyCopyStmt(const CopyStmt *from) COPY_NODE_FIELD(options); COPY_NODE_FIELD(whereClause); COPY_NODE_FIELD(sreh); +#ifdef SERVERLESS + COPY_NODE_FIELD(custom_exprs); +#endif /* SERVERLERSS */ return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 953279f796c..1347aff5a0e 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1366,6 +1366,9 @@ _equalCopyStmt(const CopyStmt *a, const CopyStmt *b) COMPARE_NODE_FIELD(options); COMPARE_NODE_FIELD(whereClause); COMPARE_NODE_FIELD(sreh); +#ifdef SERVERLESS + COMPARE_NODE_FIELD(custom_exprs); +#endif /* SERVERLESS */ return true; } diff --git a/src/backend/nodes/outfuncs_common.c b/src/backend/nodes/outfuncs_common.c index f164f5a6ca6..78d38a58ca5 100644 --- a/src/backend/nodes/outfuncs_common.c +++ b/src/backend/nodes/outfuncs_common.c @@ -210,7 +210,11 @@ _outCopyStmt(StringInfo str, const CopyStmt *node) WRITE_STRING_FIELD(filename); WRITE_STRING_FIELD(dirfilename); WRITE_NODE_FIELD(options); + WRITE_NODE_FIELD(whereClause); WRITE_NODE_FIELD(sreh); +#ifdef SERVERLESS + WRITE_NODE_FIELD(custom_exprs); +#endif /* SERVERLESS */ } static void diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index d52ed04b5c3..ef983dcf4e1 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -844,7 +844,11 @@ _readCopyStmt(void) READ_STRING_FIELD(filename); READ_STRING_FIELD(dirfilename); READ_NODE_FIELD(options); + READ_NODE_FIELD(whereClause); READ_NODE_FIELD(sreh); +#ifdef SERVERLESS + READ_NODE_FIELD(custom_exprs); +#endif /* SERVERLESS */ READ_DONE(); } diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 05122662d47..9eecd1ebdd4 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -249,4 +249,9 @@ CopyGetInt16(CopyFromState cstate, int16 *val) extern char *limit_printout_length(const char *str); extern void HandleCopyError(CopyFromState cstate); +#ifdef SERVERLESS +extern void CopyInitDataParserWrapper(CopyFromState cstate); +extern bool NextCopyFromExecuteWrapper(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls, bool is_directory_table); +extern void HandleCopyErrorWrapper(CopyFromState cstate); +#endif /* SERVERLESS */ #endif /* COPYFROM_INTERNAL_H */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 96b42d8ff0d..9d8b12ca4a6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2438,7 +2438,10 @@ typedef struct CopyStmt List *options; /* List of DefElem nodes */ Node *whereClause; /* WHERE condition (or NULL) */ - List *sreh; /* Single row error handling info */ + Node *sreh; /* Single row error handling info */ +#ifdef SERVERLESS + List *custom_exprs; /* expr list */ +#endif /* SERVERLESS */ } CopyStmt; /* ---------------------- From 4d60ff79d033d2885f410a67a73d25bc0d3e25e1 Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Fri, 7 Jun 2024 14:50:57 +0800 Subject: [PATCH 107/152] Make a start dispatch for the catalog used in init backend --- src/backend/access/index/genam.c | 1 + src/backend/cdb/cdbtranscat.c | 21 ++++++++++++++++- src/backend/cdb/dispatcher/cdbgang_async.c | 8 +++++++ src/backend/nodes/outfast.c | 4 ++-- src/backend/postmaster/postmaster.c | 22 ++++++++++++++---- src/backend/tcop/postgres.c | 4 ++++ src/backend/utils/cache/catcache.c | 3 +++ src/backend/utils/cache/relcache.c | 27 ++++++++++++++++++++-- src/backend/utils/init/postinit.c | 7 ++++++ src/include/cdb/cdbtranscat.h | 5 ++++ src/include/cdb/cdbvars.h | 1 + src/interfaces/libpq/fe-connect.c | 2 ++ src/interfaces/libpq/fe-protocol3.c | 11 +++++++++ src/interfaces/libpq/libpq-int.h | 2 ++ 14 files changed, 109 insertions(+), 9 deletions(-) diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index fc88b393798..3d7472b1916 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -26,6 +26,7 @@ #include "access/transam.h" #include "catalog/index.h" #include "cdb/cdbtranscat.h" +#include "cdb/cdbvars.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "storage/bufmgr.h" diff --git a/src/backend/cdb/cdbtranscat.c b/src/backend/cdb/cdbtranscat.c index 79eff815b6d..32144db9fc9 100644 --- a/src/backend/cdb/cdbtranscat.c +++ b/src/backend/cdb/cdbtranscat.c @@ -75,6 +75,7 @@ SystemTupleStoreReset_hook_type SystemTupleStoreReset_hook = NULL; SystemTupleStoreInit_hook_type SystemTupleStoreInit_hook = NULL; getSystemTupleList_hook_type getSystemTupleList_hook = NULL; PlFuncStored_hook_type PlFuncStored_hook = NULL; +CollectStartupCatalog_hook_type CollectStartupCatalog_hook = NULL; void TransferReset(void) { @@ -224,4 +225,22 @@ void InitQuery(const char *query_string) { if (InitQuery_hook) (*InitQuery_hook) (query_string); -} \ No newline at end of file +} + +/* + * Start up catalog + */ +char *StartUpCatalogData = NULL; +int StartUpCatalogLen = 0; + +char * +CollectStartupCatalog(int *len) +{ + if (CollectStartupCatalog_hook) + return (*CollectStartupCatalog_hook)(len); + else + { + *len = 0; + return NULL; + } +} diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 0b26f67b73f..01f56e52d5a 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -33,7 +33,9 @@ #include "cdb/cdbfts.h" #include "cdb/cdbgang.h" #include "cdb/cdbgang_async.h" +#include "cdb/cdbsrlz.h" #include "cdb/cdbtm.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "miscadmin.h" @@ -340,7 +342,13 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) if (fds[currentFdNumber].revents & fds[currentFdNumber].events || fds[currentFdNumber].revents & (POLLERR | POLLHUP | POLLNVAL)) + { + segdbDesc->conn->catalog = + CollectStartupCatalog(&segdbDesc->conn->catalog_size); + pollingStatus[i] = PQconnectPoll(segdbDesc->conn); + } + currentFdNumber++; diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index 023c8d72ce6..0774f4873e2 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -869,7 +869,7 @@ _outSystemTableTransferNode(StringInfo str, const SystemTableTransferNode *node) } static void -_outTranderTuple(StringInfo str, const TransferTuple *node) +_outTransferTuple(StringInfo str, const TransferTuple *node) { WRITE_NODE_TYPE("TRANSFERTUPLE"); @@ -1960,7 +1960,7 @@ _outNode(StringInfo str, void *obj) _outSystemTableTransferNode(str, obj); break; case T_TransferTuple: - _outTranderTuple(str, obj); + _outTransferTuple(str, obj); break; #ifdef SERVERLESS case T_APHashExpr: diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index a1b25e1caa1..18c21c3f024 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -156,6 +156,7 @@ #include "cdb/cdbtm.h" #include "cdb/cdbvars.h" #include "cdb/cdbendpoint.h" +#include "cdb/cdbtranscat.h" #include "cdb/ic_proxy_bgworker.h" #include "cdb/ml_ipc.h" #include "utils/metrics_utils.h" @@ -2589,7 +2590,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) /* If no defined USE_INTERNAL_FTS * Allow deal fts meesage in master * Also support promote standby when standby_promote_ready is true - */ + */ am_ftshandler = true; #ifdef FAULT_INJECTOR @@ -2671,9 +2672,22 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) * given packet length, complain. */ if (offset != len - 1) - ereport(FATAL, - (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("invalid startup packet layout: expected terminator as last byte"))); + { + int catalog_len; + + offset += 1; + memcpy(&catalog_len, buf + offset, sizeof(int)); + + offset += 4; + if (len - offset != catalog_len) + ereport(FATAL, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid startup packet layout: expected terminator as last byte"))); + + StartUpCatalogData = malloc(catalog_len); + StartUpCatalogLen = catalog_len; + memcpy(StartUpCatalogData, buf + offset, catalog_len); + } /* * If the client requested a newer protocol version or if the client diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 1242a9f74a9..d6ce8ce4373 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -106,6 +106,7 @@ #include "postmaster/backoff.h" #include "postmaster/fts.h" #include "utils/guc.h" +#include "utils/inval.h" #include "utils/resource_manager.h" #include "utils/session_state.h" #include "utils/vmem_tracker.h" @@ -5865,6 +5866,9 @@ PostgresMain(int argc, char *argv[], SetUserIdAndContext(cuid, false); /* Set current userid */ SystemTupleStoreReset(); +#ifdef SERVERLESS + InvalidateSystemCaches(); +#endif /* SERVERLESS */ SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); if (serializedPlantreelen==0) diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index eb44996c704..fa4f757948e 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1038,6 +1038,9 @@ InitCatCachePhase2(CatCache *cache, bool touch_index) if (cache->cc_tupdesc == NULL) CatalogCacheInitializeCache(cache); + if (systup_store_active()) + return; + if (touch_index && cache->id != AMOID && cache->id != AMNAME) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index c95debbc5b8..aa0e2034479 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -117,21 +117,28 @@ #include "catalog/gp_indexing.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" +#include "catalog/gp_warehouse.h" #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/main_manifest.h" #include "catalog/pg_depend.h" +#include "catalog/pg_db_role_setting.h" #include "catalog/pg_directory_table.h" #include "catalog/pg_proc_callback.h" #include "catalog/pg_cast.h" #include "catalog/pg_collation.h" +#include "catalog/pg_directory_table.h" #include "catalog/pg_inherits.h" #include "catalog/pg_policy.h" +#include "catalog/pg_replication_origin.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_resgroup.h" #include "catalog/pg_resqueuecapability.h" #include "catalog/pg_statistic.h" #include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "catalog/pg_subscription_rel.h" +#include "catalog/pg_transform.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_config_map.h" #include "catalog/pg_ts_dict.h" @@ -4447,7 +4454,7 @@ RelationCacheInitializePhase3(void) /* * If it's a faked-up entry, read the real pg_class tuple. */ - if (relation->rd_rel->relowner == InvalidOid) + if (relation->rd_rel->relowner == InvalidOid && !systup_store_active()) { HeapTuple htup; Form_pg_class relp; @@ -4594,6 +4601,9 @@ load_critical_index(Oid indexoid, Oid heapoid) { Relation ird; + if (systup_store_active()) + return; + /* * We must lock the underlying catalog before locking the index to avoid * deadlock, since RelationBuildDesc might well need to read the catalog, @@ -6658,20 +6668,23 @@ write_relcache_init_file(bool shared) HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; int i,j; - Oid collectRelids[48] = { + Oid collectRelids[60] = { AggregateRelationId, AccessMethodRelationId, AccessMethodOperatorRelationId, AccessMethodProcedureRelationId, + AppendOnlyRelationId, AttrDefaultRelationId, CastRelationId, ConstraintRelationId, + DbRoleSettingRelationId, DependRelationId, DirectoryTableRelationId, OperatorClassRelationId, CollationRelationId, ConversionRelationId, DefaultAclRelationId, + DirectoryTableRelationId, EnumRelationId, EventTriggerRelationId, ExtprotocolRelationId, @@ -6679,6 +6692,7 @@ write_relcache_init_file(bool shared) ForeignServerRelationId, ForeignTableRelationId, GpPolicyRelationId, + GpWarehouseRelationId, InheritsRelationId, IndexRelationId, LanguageRelationId, @@ -6689,17 +6703,25 @@ write_relcache_init_file(bool shared) PartitionedRelationId, PolicyRelationId, ProcCallbackRelationId, + ProcedureRelationId, + ProfileRelationId, PublicationRelationId, PublicationRelRelationId, RangeRelationId, + ReplicationOriginRelationId, ResGroupRelationId, ResQueueCapabilityRelationId, + RewriteRelationId, SequenceRelationId, + StatisticExtDataRelationId, StatisticExtRelationId, StatisticRelationId, StorageServerRelationId, StorageUserMappingRelationId, + SubscriptionRelationId, + SubscriptionRelRelationId, TableSpaceRelationId, + TransformRelationId, TriggerRelationId, TSConfigRelationId, TSConfigMapRelationId, @@ -7003,6 +7025,7 @@ RelationIdIsInInitFile(Oid relationId) if (relationId == SharedSecLabelRelationId || relationId == TriggerRelidNameIndexId || relationId == DatabaseNameIndexId || + relationId == DbRoleSettingRelationId || relationId == SharedSecLabelObjectIndexId || relationId == ManifestRelationId || relationId == PolicyRelationId || diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 44bd1622f55..1b18c0a52ed 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -46,6 +46,7 @@ #include "libpq/libpq-be.h" #include "cdb/cdbendpoint.h" #include "cdb/cdbtm.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "cdb/cdbutil.h" #include "mb/pg_wchar.h" @@ -676,6 +677,12 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, elog(DEBUG3, "InitPostgres"); + if (StartUpCatalogData && GpIdentity.segindex >= 0) + { + SystemTupleStoreReset(); + SystemTupleStoreInit(StartUpCatalogData, StartUpCatalogLen); + } + /* * Add my PGPROC struct to the ProcArray. * diff --git a/src/include/cdb/cdbtranscat.h b/src/include/cdb/cdbtranscat.h index 28604da3c92..2b38821e0bb 100644 --- a/src/include/cdb/cdbtranscat.h +++ b/src/include/cdb/cdbtranscat.h @@ -69,6 +69,8 @@ typedef List *(*getSystemTupleList_hook_type) (Oid relid); extern PGDLLIMPORT getSystemTupleList_hook_type getSystemTupleList_hook; typedef bool (*PlFuncStored_hook_type) (Oid funcid); extern PGDLLIMPORT PlFuncStored_hook_type PlFuncStored_hook; +typedef char *(*CollectStartupCatalog_hook_type) (int *len); +extern PGDLLIMPORT CollectStartupCatalog_hook_type CollectStartupCatalog_hook; @@ -108,5 +110,8 @@ extern PGDLLIMPORT InTypeStore_hook_type InTypeStore_hook; extern PGDLLIMPORT TypeStore_hook_type TypeStore_hook; extern PGDLLIMPORT InitQuery_hook_type InitQuery_hook; +extern char *StartUpCatalogData; +extern int StartUpCatalogLen; +extern char *CollectStartupCatalog(int *len); #endif //CDBTANSCAT_H \ No newline at end of file diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 852661b81bd..5703d23b6a8 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -759,6 +759,7 @@ extern GpId GpIdentity; #define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID) #define IS_QUERY_EXECUTOR_BACKEND() (Gp_role == GP_ROLE_EXECUTE && gp_session_id > 0) +#define IS_SEGMENT_EXECUTOR() (IS_QUERY_EXECUTOR_BACKEND() && GpIdentity.segindex >= 0) /* Stores the listener port that this process uses to listen for incoming * Interconnect connections from other Motion nodes. diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 7f3dfd462a6..62314c4ddd9 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -432,6 +432,8 @@ static const PQEnvironmentOption EnvironmentOptions[] = } }; +extern char *CollectStartupCatalog(int *len); + /* The connection URI must start with either of the following designators: */ static const char uri_designator[] = "postgresql://"; static const char short_uri_designator[] = "postgres://"; diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 1f038c8b95d..0bf5609002a 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -2472,5 +2472,16 @@ build_startup_packet(const PGconn *conn, char *packet, packet[packet_len] = '\0'; packet_len++; + if (conn->catalog) + { + if (packet) + { + memcpy(packet + packet_len, &conn->catalog_size, sizeof(int)); + memcpy(packet + packet_len + sizeof(int), conn->catalog, conn->catalog_size); + } + packet_len += sizeof(int); + packet_len += conn->catalog_size; + } + return packet_len; } diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index d406cdc2466..b24629b99df 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -438,6 +438,8 @@ struct pg_conn char *diffoptions; /* MPP: transfer changed GUCs(require sync) from QD to QEs */ char *gsslib; /* What GSS library to use ("gssapi" or * "sspi") */ + char *catalog; + int catalog_size; char *ssl_min_protocol_version; /* minimum TLS protocol version */ char *ssl_max_protocol_version; /* maximum TLS protocol version */ char *target_session_attrs; /* desired session properties */ From 671d6333cfb9add68d905dd0af155166af2eda0b Mon Sep 17 00:00:00 2001 From: lizhaohan Date: Mon, 17 Jun 2024 17:45:23 +0800 Subject: [PATCH 108/152] Miscellaneous fix: utility mode, hd_ctl enhancement, scripting * utility mode * fix bug of dbid in hd_ctl * hd_ctl now supports hd_ctl warehouse restart * fix scripting error --- gpMgmt/bin/hd_ctl | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/gpMgmt/bin/hd_ctl b/gpMgmt/bin/hd_ctl index e57fce6e1ad..17cf162a34d 100755 --- a/gpMgmt/bin/hd_ctl +++ b/gpMgmt/bin/hd_ctl @@ -98,14 +98,17 @@ class HdConnection: class HdCoordinator(HdConnection): def psql(self, sql: str, inline=False): - out, _ = self.exec(f'psql -c "{sql}" postgres', newline=not inline) + out, _ = self.exec(f'psql -Xc "{sql}" postgres', newline=not inline) if inline: print(' -> ', end='') print(out) + def psql_value(self, sql: str, inline=False): + return self.exec(f'psql -Xtc "{sql}" postgres', newline=not inline)[0].strip() + def dump_str_guc_as_conf_lines(self, *guc_names: str): """Dump string GUC values as postgresql.conf lines. Warning: GUC values are treated as strings""" - values = [self.exec(f'psql -tc "show {guc_name}" postgres')[0].strip() for guc_name in guc_names] + values = [self.psql_value(f'show {guc_name}') for guc_name in guc_names] return '\n'.join([f"{k} = '{v}'" for k, v in zip(guc_names, values)]) def __init__(self, conn_str: str): @@ -127,6 +130,9 @@ class HdSegment(HdConnection): "unionstore.safekeepers", "unionstore.pageserver_connstring") self.write(global_conf_lines, "$HOME/data/global.conf") + self.start() + + def start(self): pg_ctl_cmd = f'pg_ctl -D "{self.datadir}" -l "{self.startup_log}" -w -t 600 -o "-p {self.port} -c gp_role=execute" start' try: self.exec_gphome_bin(pg_ctl_cmd) @@ -152,11 +158,14 @@ class HdSegment(HdConnection): raise SSHCommandException(f"pg_isready still returns {ret} after {attempts} attempts", ret, self.conn_str) def drop(self): + self.stop() + self.exec(f'rm -rf "{self.datadir}"') + + def stop(self): try: self.exec_gphome_bin(f'pg_ctl -D "{self.datadir}" -l "{self.startup_log}" -w -t 120 -m i stop') except SSHCommandException as e: print(e) - self.exec(f'rm -rf "{self.datadir}"') def __init__(self, conn_str: str, dbid: int, contentid: int, port: int, datadir: str, coordinator: HdCoordinator): super().__init__(conn_str) @@ -189,6 +198,12 @@ class HdWarehouse: print("\nCreating...") self.create() + def restart(self): + print("\nStopping...") + self.on_all_segments('stop') + print("\nStarting...") + self.on_all_segments('start') + def list(self): self.coordinator.psql("select * from gp_warehouse") self.coordinator.psql("select * from gp_segment_configuration") @@ -197,9 +212,10 @@ class HdWarehouse: self.name = name self.coordinator = coordinator self.segments = [] + base_dbid = int(coordinator.psql_value("select max(dbid) from gp_segment_configuration")) + 1 for i, conn_str in enumerate(conn_strs): - port = base_port + i - self.segments.append(HdSegment(conn_str, i + 2, i, port, f"$HOME/data/primary{port}", coordinator)) + port, dbid = base_port + i, base_dbid + i + self.segments.append(HdSegment(conn_str, dbid, i, port, f"$HOME/data/primary{port}", coordinator)) class ArgParseShim: @@ -233,6 +249,7 @@ if __name__ == '__main__': create_warehouse = warehouse_subs.add_parser("create") drop_warehouse = warehouse_subs.add_parser("drop") recreate_warehouse = warehouse_subs.add_parser("recreate") + restart_warehouse = warehouse_subs.add_parser("restart") shim.epilogue() parsed = parser.parse_args() # parse before initialization so that syntax errors can be reported early with open(parsed.config, 'r') as f: @@ -246,6 +263,7 @@ if __name__ == '__main__': create_warehouse.set_defaults(func=warehouse.create) drop_warehouse.set_defaults(func=warehouse.drop) recreate_warehouse.set_defaults(func=warehouse.recreate) + restart_warehouse.set_defaults(func=warehouse.restart) parser.parse_args().func() # parse a second time which is guaranteed to succeed warehouse.list() From 78cff112bb4051c74369fa6013e887d3270ff023 Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 5 Jun 2024 14:09:20 +0800 Subject: [PATCH 109/152] Support vectorization on hashdata table Build with --enable-vectorization to enable vectorization, turn on the GUC vector.enable_vectorization to make query execute with vectorization. --- src/backend/utils/cache/plancache.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 10215866b3d..d4e9ed53c2b 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -879,6 +879,13 @@ CheckCachedPlan(CachedPlanSource *plansource) */ if (plan->is_valid) { + ListCell *lc1; + + foreach(lc1, plan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); + plannedstmt->extensionContext = NULL; + } /* Successfully revalidated and locked the query. */ return true; } From 916d7093f4c921338ff7a14445f37363d14bab78 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 17 Jun 2024 23:51:46 +0800 Subject: [PATCH 110/152] Incremental Agg Plan based on materialized views. Implement an effective approach to get aggregate results with Group by on signel big table fast, using materialized views as a partial result append incremental agg on tuples that from the time views latest Refresh and to now. That is, we will reweite the first phase of a two-stage Aggregates on table t1 to a Append with partial results from view and Partial agg on a Delta SeqScan on table t1. example: create materialized view ivm0 as select a, count(b), sum(c) from t1 where d > 5 group by a; select a, count(b), sum(c) from t1 where d > 5 group by a; QUERY PLAN -------------------------------------------------------------------------- Finalize HashAggregate Output: t1.a, count(t1.b), sum(t1.c) Group Key: t1.a -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.a, (PARTIAL count(t1.b)), (PARTIAL sum(t1.c)) -> Append -> Partial HashAggregate Output: t1.a, PARTIAL count(t1.b), PARTIAL sum(t1.c) Group Key: t1.a -> Delta Seq Scan on append_agg_schema.t1 Output: t1.a, t1.b, t1.c Filter: (t1.d > 5) -> Seq Scan on append_agg_schema.ivm0 Output: ivm0.a, ivm0.count, ivm0.sum Append with a simple select from view with corresponding columns and a Delta Seq Scan on t1. For GroupAgg, the order of first stage agg is required for final agg. An additional Sort is added for that. select a, count(b), sum(c), count(*) as count_star from t1 where d > 5 group by a; QUERY PLAN -------------------------------------------------------------------------- Finalize GroupAggregate Output: t1.a, count(t1.b), sum(t1.c), count(*) Group Key: t1.a -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.a, (PARTIAL count(t1.b)), (PARTIAL sum(t1.c)), (PARTIAL count(*)) Merge Key: t1.a -> Sort Output: t1.a, (PARTIAL count(t1.b)), (PARTIAL sum(t1.c)), (PARTIAL count(*)) Sort Key: t1.a -> Append -> Partial GroupAggregate Output: t1.a, PARTIAL count(t1.b), PARTIAL sum(t1.c), PARTIAL count(*) Group Key: t1.a -> Sort Output: t1.a, t1.b, t1.c Sort Key: t1.a -> Delta Seq Scan on append_agg_schema.t1 Output: t1.a, t1.b, t1.c Filter: (t1.d > 5) -> Seq Scan on append_agg_schema.ivm0 Output: ivm0.a, ivm0.count, ivm0.sum, ivm0.count_star This requires the materialized views: 1. Store partial results of agg functions instead of final results. 2. Has only insert operation since the latest Refresh of view. Left TODO for ivm: pgclass.relhaspartialagg and pgclass.relinsertonly. Left identify for Delta SeqScan: Scan.basemv(oid of view based on) Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/catalog/heap.c | 2 + src/backend/catalog/index.c | 2 + src/backend/cdb/cdbgroupingpaths.c | 677 ++++++++++++++++++++++++ src/backend/commands/explain.c | 19 + src/backend/nodes/copyfuncs.c | 3 + src/backend/nodes/outfuncs.c | 4 + src/backend/nodes/readfuncs.c | 3 + src/backend/optimizer/plan/createplan.c | 36 ++ src/backend/optimizer/util/pathnode.c | 4 + src/backend/utils/cache/relcache.c | 2 + src/include/catalog/pg_class.h | 8 + src/include/nodes/pathnodes.h | 5 + src/include/nodes/plannodes.h | 14 + 13 files changed, 779 insertions(+) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 78081f5148f..1ab93f46b21 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1329,6 +1329,8 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relisivm - 1] = BoolGetDatum(rd_rel->relisivm); values[Anum_pg_class_relisdynamic - 1] = BoolGetDatum(rd_rel->relisdynamic); values[Anum_pg_class_relmvrefcount - 1] = Int32GetDatum(rd_rel->relmvrefcount); + values[Anum_pg_class_relhaspartialagg - 1] = BoolGetDatum(rd_rel->relhaspartialagg); + values[Anum_pg_class_relinsertonly - 1] = BoolGetDatum(rd_rel->relinsertonly); if (relacl != (Datum) 0) values[Anum_pg_class_relacl - 1] = relacl; else diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index d754d97c781..cf5fe120697 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1075,6 +1075,8 @@ index_create_internal(Relation heapRelation, indexRelation->rd_rel->relisivm = false; indexRelation->rd_rel->relisdynamic = false; indexRelation->rd_rel->relmvrefcount = 0; + indexRelation->rd_rel->relhaspartialagg = false; + indexRelation->rd_rel->relinsertonly = false; /* * store index's pg_class entry diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index a54c6452471..1fcd8ac8441 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -70,6 +70,14 @@ #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "access/genam.h" +#include "access/table.h" +#include "catalog/pg_rewrite.h" +#include "nodes/pathnodes.h" +#include "parser/parsetree.h" +#include "utils/guc.h" +#include "commands/matview.h" + typedef enum { INVALID_DQA = -1, @@ -226,6 +234,37 @@ recognize_dqa_type(cdb_agg_planning_context *ctx); static PathTarget * strip_aggdistinct(PathTarget *target); +#ifdef SERVERLESS +static bool +try_append_agg(Query *parse); + +static bool +expand_append_agg(PlannerInfo *root, cdb_agg_planning_context *ctx); + +static Relation +simple_view_matching(Query *parse); + +static void +expand_append_agg_guts(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel); + +static Path* +build_view_seqscan_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel); + +static void +rewrite_to_append_agg_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel, Path *mv_seqscan_path); + +static PathTarget * +make_pathtarget_from_tupledesc(TupleDesc tupdes); +typedef struct +{ + int varno; +} aqumv_adjust_varno_context; + +static void aqumv_adjust_simple_parse(Query *parse); +static void aqumv_adjust_varno(Query *parse, int delta); +static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context); +#endif + /* * cdb_create_multistage_grouping_paths * @@ -758,6 +797,20 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, ctx->partial_rel, &extra); } +#ifdef SERVERLESS + /* + * Incremental AGG based on materialized views. + */ + bool path_changed = false; + + if (enable_answer_query_using_materialized_views && /* Use this GUC for now. */ + try_append_agg(root->parse)) + { + path_changed = expand_append_agg(root, ctx); + } + +#endif + /* * We now have partially aggregated paths in ctx->partial_rel. Consider * different ways of performing the Finalize Aggregate stage. @@ -766,7 +819,13 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, { Path *cheapest_first_stage_path; +#ifdef SERVERLESS + /* Do not change cheapest if we assign, trust Append with agg always win. */ + if (!path_changed) + set_cheapest(ctx->partial_rel); +#else set_cheapest(ctx->partial_rel); +#endif cheapest_first_stage_path = ctx->partial_rel->cheapest_total_path; if (!IsA(cheapest_first_stage_path, ForeignPath)) @@ -804,6 +863,15 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, path->pathkeys); else is_sorted = false; +#ifdef SERVERLESS + /* + * We eager append agg if there was, the previous normal agg with sort may break + * that case. Though the cost is same but different order matters. + * Bypass others to make sure that. + */ + if (path_changed && (path != cheapest_first_stage_path)) + continue; +#endif if (path == cheapest_first_stage_path || is_sorted) { add_second_stage_group_agg_path(root, path, is_sorted, @@ -2720,3 +2788,612 @@ cdb_prepare_path_for_hashed_agg(PlannerInfo *root, return subpath; } + +#ifdef SERVERLESS +/* + * Precheck if we could try append agg of a Query. + * We will compare parse with view query if there were + * exactly matched. + * But we clould stop early if we are sure there is no chance. + * It doesn't matter if we miss something here. + */ +static bool +try_append_agg(Query *parse) +{ + ListCell *lc; + + /* FIXME: could we handle order by, limit? */ + if ((parse->commandType != CMD_SELECT) || + (parse->rowMarks != NIL) || + (parse->distinctClause != NIL) || + (parse->scatterClause != NIL) || + (parse->cteList != NIL) || + (parse->groupingSets != NIL) || + (parse->havingQual != NULL) || + (parse->setOperations != NULL) || + parse->hasWindowFuncs || + parse->hasDistinctOn || + parse->hasModifyingCTE || + parse->groupDistinct || + (parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW) || + (parse->parentStmtType == PARENTSTMTTYPE_CTAS) || + parse->hasSubLinks) + { + return false; + } + + /* As we will use views, make it strict to unmutable. */ + if (contain_mutable_functions((Node*)parse)) + return false; + + /* We want Agg with Group By. */ + if (!parse->hasAggs || (parse->groupClause == NIL)) + return false; + + /* + * Only aggs: count, sum, avg of single column are supported now. + */ + foreach(lc, parse->targetList) + { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + return false; + + if (IsA(tle->expr, Var)) + { + if (tle->ressortgroupref == 0) + return false; + } + else if (IsA(tle->expr, Aggref)) + { + Aggref *aggref = (Aggref *) tle->expr; + const char *aggname = get_func_name(aggref->aggfnoid); + /* + * FIXME: use func name is necessary but not sufficient + * should use fnoid to restrict later. + */ + if ((strcmp(aggname, "count") == 0) && + (strcmp(aggname, "sum") == 0) && + (strcmp(aggname, "avg") == 0)) + return false; + + if (aggref->aggorder || + (aggref->aggdirectargs != NIL) || + (aggref->aggdistinct != NIL) || + (aggref->aggfilter != NULL)) + return false; + + /* Star is ok. */ + if (aggref->aggstar) + continue; + + if ((aggref->args == NIL || + (list_length(aggref->args) != 1))) + return false; + + TargetEntry *tle = (TargetEntry *) linitial(aggref->args); + if (!IsA(tle->expr, Var)) + return false; + } + else + return false; + } + return true; +} + +/* + * expand_append_agg + * Expand two-stage agg plan to a append agg plan, + * return true if we succeed. + */ +static bool +expand_append_agg(PlannerInfo *root, cdb_agg_planning_context *ctx) +{ + Path *cheapest_first_stage_path = NULL; + Path *underlying_seqscanpath = NULL; + Query *parse = root->parse; + Relation matviewRel = NULL; + + /* + * First-stage agg with seqscan on a normal table and/or with sort. + * Plan A: + * Partial Agg + * SeqScan + * + * Plan B: + * Partial Agg + * Sort + * SeqScan + * Plan B requires order on segments, we have to do more such as add a additonal + * Sort node above Append. + */ + set_cheapest(ctx->partial_rel); + cheapest_first_stage_path = ctx->partial_rel->cheapest_total_path; + + if (!IsA(cheapest_first_stage_path, AggPath)) + return false; + + if ((castNode(AggPath, cheapest_first_stage_path)->aggsplit != AGGSPLIT_INITIAL_SERIAL)) + return false; + + if (castNode(AggPath, cheapest_first_stage_path)->aggstrategy == AGG_HASHED) + { + Path *subpath = ((AggPath*) cheapest_first_stage_path)->subpath; + + if (subpath->pathtype != T_SeqScan) + return false; + + /* Seqscan on a normal table. */ + if (subpath->parent->reloptkind != RELOPT_BASEREL) + return false; + + underlying_seqscanpath = subpath; + } + else if (castNode(AggPath, cheapest_first_stage_path)->aggstrategy == AGG_SORTED) + { + Path *subpath = ((AggPath*) cheapest_first_stage_path)->subpath; + + if (subpath->pathtype != T_Sort) + return false; + + subpath = castNode(SortPath, subpath)->subpath; + + if (subpath->parent->reloptkind != RELOPT_BASEREL) + return false; + + underlying_seqscanpath = subpath; + } + else + return false; + + /* Find a matched view for input query. */ + matviewRel = simple_view_matching(parse); + if (matviewRel == NULL) + return false; + + expand_append_agg_guts(root, ctx, matviewRel); + + /* Now do not forget to identify delta seqscan.*/ + underlying_seqscanpath->basemv = RelationGetRelid(matviewRel); + + /* Not use matviewRel anymore, close here. */ + table_close(matviewRel, NoLock); + + return true; /* Succeed to rewrite. */ +} + +/* + * simple_view_matching + * Match a view with given Query, return the view relation itself if succeed. + * Only a SELECT from a single table is supported. + * + * parse - the Query we want to match + * + * A lock will be held if we find a matched view, the caller should handle that. + */ +static Relation +simple_view_matching(Query *parse) +{ + Query *viewQuery; /* Query of view. */ + Relation matviewRel = NULL; /* Matched view relation. */ + Relation ruleDesc; + SysScanDesc rcscan; + RewriteRule *rule; + Form_pg_rewrite rewrite_tup; + List *actions; + HeapTuple tup; + Node *mvjtnode; + RangeTblEntry *mvrte; + int varno; + PlannerInfo *subroot; + bool need_close = false; + + /* + * We know it's a single table. + */ + Oid underlying_relid = (rt_fetch(1, parse->rtable))->relid; + + ruleDesc = table_open(RewriteRelationId, AccessShareLock); + rcscan = systable_beginscan(ruleDesc, InvalidOid, false, + NULL, 0, NULL); + while (HeapTupleIsValid(tup = systable_getnext(rcscan))) + { + CHECK_FOR_INTERRUPTS(); + if (need_close) + table_close(matviewRel, AccessShareLock); + + rewrite_tup = (Form_pg_rewrite) GETSTRUCT(tup); + + matviewRel = table_open(rewrite_tup->ev_class, AccessShareLock); + need_close = true; + + /* + * Consider IVM only has insert operation + * since lastest REFRESH and with partial agg results. + */ + if (!RelationIsPopulated(matviewRel) || + /* FIXME: uncomment below when IVM is enabled in hashdata cloud. */ + #if 0 + (!RelationIsIVM(matviewRel)) || + #endif + !matviewRel->rd_rel->relhaspartialagg || + !matviewRel->rd_rel->relinsertonly) + continue; + + if (matviewRel->rd_rel->relhasrules == false || + matviewRel->rd_rules->numLocks != 1) + continue; + + rule = matviewRel->rd_rules->rules[0]; + + /* Filter a SELECT action, and not instead. */ + if ((rule->event != CMD_SELECT) || !(rule->isInstead)) + continue; + + actions = rule->actions; + if (list_length(actions) != 1) + continue; + + viewQuery = copyObject(linitial_node(Query, actions)); + + if (list_length(viewQuery->jointree->fromlist) != 1) + continue; + + mvjtnode = (Node *) linitial(viewQuery->jointree->fromlist); + if (!IsA(mvjtnode, RangeTblRef)) + continue; + + varno = ((RangeTblRef*) mvjtnode)->rtindex; + mvrte = rt_fetch(varno, viewQuery->rtable); + Assert(mvrte != NULL); + + if (mvrte->rtekind != RTE_RELATION) + continue; + + if (mvrte->relid != underlying_relid) + continue; + + /* Transform actions to a normal parse tree. */ + aqumv_adjust_simple_parse(viewQuery); + + /* + * See AQUMV_FIXME_MVP in aqumv.c + */ + mvrte = rt_fetch(1, viewQuery->rtable); + mvrte->inh = false; + /* + * This is fool way to make comparison pass. + */ + mvrte->checkAsUser = InvalidOid; + + /* To make equal parse tree, need root to assign aggno in precess_aggrefs. */ + subroot = (PlannerInfo *) palloc0(sizeof(PlannerInfo)); + subroot->parse = viewQuery; + subroot->processed_tlist = viewQuery->targetList; + if (viewQuery->hasAggs) + { + preprocess_aggrefs(subroot, (Node *) subroot->processed_tlist); + } + + /* + * This is fool way, but we don't want to compare them. + */ + viewQuery->stmt_location = parse->stmt_location; + viewQuery->stmt_len = parse->stmt_len; + + /* + * Before we compare Query, quals need to be preprocessed becuase + * A signle qual may be a OpExpr or a list with one element. + * Both are legal but we can't use equal() with different node tag. + * Wrap to list if it was. + */ + if ((viewQuery->jointree->quals != NULL) && (!IsA(viewQuery->jointree->quals, List))) + viewQuery->jointree->quals = (Node *)list_make1(viewQuery->jointree->quals); + + /* Query and viewQuery must be exatcly matched now. */ + if (equal(viewQuery, parse)) + { + /* + * As we rewrite path directly without any cost, + * stop searching once a view is found. + */ + need_close = false; + break; + } + } + systable_endscan(rcscan); + table_close(ruleDesc, AccessShareLock); + + if (need_close) + { + table_close(matviewRel, AccessShareLock); + matviewRel = NULL; + } + return matviewRel; +} + + +/* + * Make path targets from TupleDesc. + * This is only used if we know it's an exactly matched + * view with query we want. + * So we could use TupleDesc with the same order and definations. + * But IVM has extra invisible columns for maintenance besides + * the columns in original query, result in different tuple desc + * with the target list. Drop those for correct. + */ +static PathTarget* +make_pathtarget_from_tupledesc(TupleDesc tupdes) +{ + PathTarget *target = makeNode(PathTarget); + int i; + + /* We are a plain select, there should be no group cloumns. */ + target->sortgrouprefs = (Index *) palloc0((tupdes->natts) * sizeof(Index)); + + for (i = 0; i < tupdes->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdes, i); + + /* + * IVM has invisible columns, drop that. + */ + if (isIvmName(NameStr(attr->attname))) + continue; + + Var *newVar = makeVar(2, /* See comments assign relid to 2. */ + attr->attnum, + attr->atttypid, + attr->atttypmod, + attr->attcollation, + 0 /* FIXME: What if we are a subquery? */); + + target->exprs = lappend(target->exprs, (Expr*) newVar); + } + + /* + * Unknown, but we have called contain_mutable_functions check. + * And that's more restrict. + */ + target->has_volatile_expr = VOLATILITY_UNKNOWN; + + return target; +} + +/* + * expand_append_agg_guts + * + * Do the real rewrite. + * + * 1.HashAgg: + * Partial Agg + * SeqScan on t + * to: + * Append + * Partial Agg + * Delta SeqScan on t + * SeqScan on mv + * + * 2.GroupAgg: + * Partial Agg + * Sort + * SeqScan on t + * to: + * Sort + * Append + * Partial Agg + * Sort + * Delta SeqScan on t + * SeqScan on mv + */ +static void +expand_append_agg_guts(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel) +{ + Path *mv_seqscan_path; + + /* Build Seq Scan path from view. */ + mv_seqscan_path = build_view_seqscan_path(root, ctx, matviewRel); + + /* Append partial agg with view seqscan.*/ + rewrite_to_append_agg_path(root, ctx, matviewRel, mv_seqscan_path); +} + +static Path* +build_view_seqscan_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel) +{ + Path *mv_seqscan_path; + PlannerInfo *dummy_root; + Query *dummy_query; + RangeTblEntry *dummy_rte; + Path *cheapest_first_stage_path = ctx->partial_rel->cheapest_total_path; + + mv_seqscan_path = makeNode(Path); + mv_seqscan_path->pathtype = T_SeqScan; + mv_seqscan_path->basemv = 0; + mv_seqscan_path->param_info = NULL; + mv_seqscan_path->parallel_aware = false; + mv_seqscan_path->parallel_safe = false; + mv_seqscan_path->parallel_workers = 0; + mv_seqscan_path->pathkeys = NIL; /* seqscan has unordered result.*/ + mv_seqscan_path->locus = cheapest_first_stage_path->locus; /* Keep same with agg path for now.*/ + mv_seqscan_path->motionHazard = false; + mv_seqscan_path->barrierHazard = false; + mv_seqscan_path->rescannable = true; + mv_seqscan_path->sameslice_relids = cheapest_first_stage_path->parent->relids; + + /* Build dmmmy planner info for reloptions.*/ + dummy_query = makeNode(Query); + dummy_query->commandType = CMD_SELECT; + dummy_root = makeNode(PlannerInfo); + dummy_root->parse = dummy_query; + dummy_root->glob = makeNode(PlannerGlobal); /* Avoid crash during planner.*/ + dummy_root->query_level = 1; + dummy_root->planner_cxt = CurrentMemoryContext; + dummy_root->wt_param_id = -1; + + /* Build dmmmy rte for reloptions.*/ + dummy_rte = makeNode(RangeTblEntry); + dummy_rte->rtekind = RTE_RELATION; + dummy_rte->relid = RelationGetRelid(matviewRel); + dummy_rte->relkind = RELKIND_MATVIEW; + dummy_rte->rellockmode = AccessShareLock; + dummy_rte->lateral = false; + dummy_rte->inh = false; + dummy_rte->inFromCl = true; + /* + * Build eref for explain purpose. + */ + dummy_rte->eref = makeAlias(RelationGetRelationName(matviewRel), NIL); + + /* + * Build RelOptInfo + * Hack here: + * we know that parse only has one table, so just make a NULL rte here + * and assign dummy_rte relid to 2 later. + */ + dummy_query->rtable = list_make2(NULL, dummy_rte); + + /* Set up RTE/RelOptInfo arrays and assign parent.*/ + setup_simple_rel_arrays(dummy_root); + mv_seqscan_path->parent = build_simple_rel(dummy_root, 2, NULL); + + /* + * Now build pathtarget from mv. + * But we don't have targetList from dummy_parse here, + * use mv's TupleDesc to get a plain select * from mv. + * As we only have a exatly matched SQL now, + * a star * means only user defined columns are included, + * extra columns of IVM should not be inside. + */ + mv_seqscan_path->pathtarget = make_pathtarget_from_tupledesc(matviewRel->rd_att); + + /* Adjust planner info for view scan. */ + root->parse->rtable = lappend(root->parse->rtable, dummy_rte); + root->simple_rel_array_size++; + root->simple_rte_array = repalloc(root->simple_rte_array, (root->simple_rel_array_size)* sizeof(RangeTblEntry *)); + root->simple_rte_array[root->simple_rel_array_size - 1] = dummy_rte; + root->simple_rel_array = repalloc(root->simple_rel_array, (root->simple_rel_array_size)* sizeof(RelOptInfo *)); + root->simple_rel_array[root->simple_rel_array_size - 1] = mv_seqscan_path->parent; + + return mv_seqscan_path; +} + +/* + * This is a hack way to build special append path, + * only for expanding append agg plan. + */ +static void +rewrite_to_append_agg_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel, Path *mv_seqscan_path) +{ + AppendPath *pathnode; + Path *cheapest_first_stage_path = ctx->partial_rel->cheapest_total_path; + + /* Build append path */ + pathnode = makeNode(AppendPath); + pathnode->append_agg = true; + pathnode->path.pathtype = T_Append; + pathnode->path.parent = cheapest_first_stage_path->parent; + pathnode->path.pathtarget = cheapest_first_stage_path->parent->reltarget; + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = false; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; + pathnode->path.motionHazard = false; + pathnode->path.barrierHazard = false; + pathnode->path.rescannable = true; + pathnode->path.locus = cheapest_first_stage_path->locus; + pathnode->subpaths = list_make2(cheapest_first_stage_path, mv_seqscan_path); + + /* Set best path for partial_rel with ours. */ + ctx->partial_rel->cheapest_total_path = (Path *)pathnode; + + /* + * If it's a Group Agg, the order is required for final agg. + * The subpaths of Append node is unordered together, we have + * to add a Sort above it. + */ + if (castNode(AggPath, cheapest_first_stage_path)->aggstrategy == AGG_SORTED) + { + Path *path; + path = (Path *) create_sort_path(root, + ctx->partial_rel, + (Path *)pathnode, + ctx->partial_sort_pathkeys, + -1.0); + + ctx->partial_rel->cheapest_total_path = path; + /* For Group Agg, second stage is based on pathlist. */ + add_path(ctx->partial_rel, path ,root); + } +} + +/* + * This should be refactor after CBDB github expose these functions. + * Keep for now. + * Wrap of aqumv_adjust_varno, expose for other places. + * Adjust view's actions to a parse tree that can be processed as normal. + * This in-place update the parse param. + */ +void aqumv_adjust_simple_parse(Query *parse) +{ + ListCell *lc; + /* + * AQUMV + * We have to rewrite now before we do the real Equivalent + * Transformation 'rewrite'. + * Because actions sotored in rule is not a normal query tree, + * it can't be used directly, ex: new/old realtions used to + * refresh mv. + * Earse unused relatoins, keep the right one. + */ + foreach(lc, parse->rtable) + { + RangeTblEntry* rtetmp = lfirst(lc); + if ((rtetmp->relkind == RELKIND_MATVIEW) && + (rtetmp->alias != NULL) && + (strcmp(rtetmp->alias->aliasname, "new") == 0 || + strcmp(rtetmp->alias->aliasname,"old") == 0)) + { + foreach_delete_current(parse->rtable, lc); + } + } + + /* + * Now we have the right relation, adjust + * varnos in its query tree. + * AQUMV_FIXME_MVP: Only one single relation + * is supported now, we could assign varno + * to 1 opportunistically. + */ + aqumv_adjust_varno(parse, 1); + +} +static void +aqumv_adjust_varno(Query* parse, int varno) +{ + aqumv_adjust_varno_context context; + context.varno = varno; + parse = query_tree_mutator(parse, aqumv_adjust_varno_mutator, &context, QTW_DONT_COPY_QUERY); +} + +/* + * Adjust varno and rindex with delta. + */ +static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + ((Var *)node)->varno = context->varno; + ((Var *)node)->varnosyn = context->varno; /* NB: This should be backported to CBDB github! */ + } + else if (IsA(node, RangeTblRef)) + /* AQUMV_FIXME_MVP: currently we have only one relation */ + ((RangeTblRef*) node)->rtindex = context->varno; + return expression_tree_mutator(node, aqumv_adjust_varno_mutator, context); +} + +#endif \ No newline at end of file diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index bfd5f98219c..09c0f18445d 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1649,8 +1649,17 @@ ExplainNode(PlanState *planstate, List *ancestors, sname = "Hash Join"; break; case T_SeqScan: +#ifdef SERVERLESS + /* Ugly but for right indent. */ + if (OidIsValid(((SeqScan *)plan)->basemv)) + pname = sname = "Delta Seq Scan"; + else + pname = sname = "Seq Scan"; + break; +#else pname = sname = "Seq Scan"; break; +#endif case T_DynamicSeqScan: pname = sname = "Dynamic Seq Scan"; break; @@ -4868,6 +4877,16 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) if (dynamicScanId != 0) appendStringInfo(es->str, " (dynamic scan id: %d)", dynamicScanId); +#ifdef SERVERLESS + if (es->verbose && + IsA(plan, SeqScan) && + (OidIsValid(castNode(SeqScan, plan)->basemv))) + { + /* FIXME: consider namespace in the future. */ + appendStringInfo(es->str, " (based on materialized view: %s)", + quote_identifier(get_rel_name(castNode(SeqScan, plan)->basemv))); + } +#endif } else { diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index a8592693436..1cbcb1efab5 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -567,6 +567,9 @@ CopyScanFields(const Scan *from, Scan *newnode) COPY_SCALAR_FIELD(scanrelid); COPY_SCALAR_FIELD(scanflags); +#ifdef SERVERLESS + COPY_SCALAR_FIELD(basemv); +#endif } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6f8114415ac..c95a9a17a24 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -428,6 +428,10 @@ _outScanInfo(StringInfo str, const Scan *node) WRITE_UINT_FIELD(scanrelid); WRITE_UINT_FIELD(scanflags); + +#ifdef SERVERLESS + WRITE_OID_FIELD(basemv); +#endif } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 2b496d14ecb..cd50582b3e9 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1902,6 +1902,9 @@ ReadCommonScan(Scan *local_node) READ_UINT_FIELD(scanrelid); READ_UINT_FIELD(scanflags); +#ifdef SERVERLESS + READ_OID_FIELD(basemv); +#endif } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f62576f2b8e..98d7243bc62 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -361,6 +361,24 @@ static bool contain_motion(PlannerInfo *root, Node *node); static bool contain_motion_walk(Node *node, contain_motion_walk_context *ctx); static void push_locus_down_after_elide_motion(Plan* pplan); +#ifdef SERVERLESS +/* + * Global variable to control if we clould create delta seqscan plan. + * This is hacky, we can not ensure that a seqscan path with + * basemv is actually a delta seqsan. + * The reason is when we identify a delta seqscan path of a append agg + * the underlying seqsan path could also be a leaf node of others. + * Ex: create a two-stage hash agg with append and delta scan, but the GUC + * enable_hashagg is off. We have to use one-stage agg, but the delta flag + * of seqscan path is wrong. + * So, we must handle the case when create plan, if we are a append agg, turn + * this variable to true to allow create delta seqscan. + * Else, even the best_path is a seqscan with delta flag, we create a normal + * seqscan plan for correct results. + */ +bool allow_create_delta_seqscan = false; +#endif + /* * create_plan * Creates the access plan for a query by recursively processing the @@ -1364,6 +1382,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) plan->plan.righttree = NULL; plan->apprelids = rel->relids; +#ifdef SERVERLESS + /* Toggle delta scan plan, see comments of allow_create_delta_seqscan */ + bool saved_allow_create_delta_seqscan = allow_create_delta_seqscan; + allow_create_delta_seqscan = best_path->append_agg; +#endif + if (pathkeys != NIL) { /* @@ -1513,6 +1537,10 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) copy_generic_path_info(&plan->plan, (Path *) best_path); +#ifdef SERVERLESS + /* Recover after subplans are created.*/ + allow_create_delta_seqscan = saved_allow_create_delta_seqscan; +#endif /* * If prepare_sort_from_pathkeys added sort columns, but we were told to * produce either the exact tlist or a narrow tlist, we should get rid of @@ -3653,6 +3681,14 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_seqscan(tlist, scan_clauses, scan_relid); +#ifdef SERVERLESS + /* + * We have to check even delta identify is set, + * see reasons of allow_create_delta_seqscan. + */ + if (allow_create_delta_seqscan) + scan_plan->basemv = best_path->basemv; +#endif copy_generic_path_info(&scan_plan->plan, best_path); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ec5287bdc32..1c3548c914a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1043,6 +1043,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->barrierHazard = false; pathnode->rescannable = true; pathnode->sameslice_relids = rel->relids; + pathnode->basemv = 0; cost_seqscan(pathnode, root, rel, pathnode->param_info); @@ -1411,6 +1412,9 @@ create_append_path(PlannerInfo *root, Assert(!parallel_aware || parallel_workers > 0); #endif +#ifdef SERVERLESS + pathnode->append_agg = false; +#endif pathnode->path.pathtype = T_Append; pathnode->path.parent = rel; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index aa0e2034479..7bec67880f7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2053,6 +2053,8 @@ formrdesc(const char *relationName, Oid relationReltype, /* ... and they're always not dynamic, too */ relation->rd_rel->relisdynamic = false; relation->rd_rel->relmvrefcount = 0; + relation->rd_rel->relhaspartialagg = false; + relation->rd_rel->relinsertonly = false; relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; relation->rd_rel->relpages = 0; diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index dba3307c99c..b600fe377e9 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -128,6 +128,14 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* count of materialized views referred to the relation */ int32 relmvrefcount BKI_DEFAULT(0); + /* materialized view has partial agg results instead of final results? */ + /* TODO: IVM implement this. */ + bool relhaspartialagg BKI_DEFAULT(f); + + /* materialized view's data is not up to date, but has only insert operation after latest REFRESH. */ + /* TODO: IVM implement this. */ + bool relinsertonly BKI_DEFAULT(f); + /* link to original rel during table rewrite; otherwise 0 */ Oid relrewrite BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_class); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index b81b5020089..0de5f060139 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1521,6 +1521,8 @@ typedef struct Path * optimizations. */ Relids sameslice_relids; + + Oid basemv; /* Oid of materialized view of Delta SeqScan based on. */ } Path; /* @@ -1863,6 +1865,9 @@ typedef struct AppendPath /* Index of first partial path in subpaths; list_length(subpaths) if none */ int first_partial_path; double limit_tuples; /* hard limit on output tuples, or -1 */ +#ifdef SERVERLESS + bool append_agg; /* Is a append agg, used to toggle delta scan. */ +#endif } AppendPath; #define IS_DUMMY_APPEND(p) \ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index de972f82b76..098c20f8fe3 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -561,6 +561,20 @@ typedef struct Scan Plan plan; Index scanrelid; /* relid is index into the range table */ uint32 scanflags; /* extra scan flags */ +#ifdef SERVERLESS + /* + * Base materialized view oid for delta scan. + * If valid, it means a Delta SeqScan based on + * materialized views of basemv. + * Fetch tuples from table: + * since + * the manifest version of basemv latest refresh + * to + * current manifest. + * If they are same, return 0 tuples. + */ + Oid basemv; +#endif } Scan; /* ---------------- From 382fd6c411de197c35470667ab665d27fd7d6cf3 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Sat, 8 Jun 2024 01:27:35 +0800 Subject: [PATCH 111/152] Fix the record empty error in init plpgsql --- src/pl/plpgsql/src/pl_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 9a03f2b8f5b..3ebbb1c9a1d 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -9003,7 +9003,7 @@ plpgsql_prepare_trigger(PLpgSQL_function *func, estate.err_text = NULL; estate.err_stmt = (PLpgSQL_stmt *) (func->action); - prepare_stmt_block(&estate, (PLpgSQL_stmt_block *) func->action); + init_toplevel_block(&estate, func->action); /* Clean up any leftover temporary memory */ plpgsql_destroy_econtext(&estate); From 56b8746864a19fc07519672e75669115f1d911f4 Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Tue, 11 Jun 2024 15:31:40 +0800 Subject: [PATCH 112/152] Fix a mistake on RelationReloadNailed The dispatch commit remove the if (criticalRelcachesBuilt) by mistake. Now re-open it. --- src/backend/utils/cache/relcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 7bec67880f7..2c75e31c0d2 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2574,7 +2574,7 @@ RelationReloadNailed(Relation relation) * accessed. To ensure the entry will later be revalidated, we leave * it in invalid state, but allow use (cf. RelationIdGetRelation()). */ - // if (criticalRelcachesBuilt) + if (criticalRelcachesBuilt) { HeapTuple pg_class_tuple; Form_pg_class relp; From 659edd0c0af02ef46b8cafe5bffcec44388394ce Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Sun, 9 Jun 2024 14:35:09 +0800 Subject: [PATCH 113/152] Do not use AccessExclusiveLock in Manifest --- src/backend/catalog/main_manifest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index 7aea277a47f..24dfd2db109 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -117,7 +117,7 @@ UpdateManifestRecord(RelFileNodeId relfilenode, text *path) void DeleteManifestCatalog(RelFileNodeId relnode) { - Relation entrance_rel = heap_open(ManifestRelationId, AccessExclusiveLock); + Relation entrance_rel = heap_open(ManifestRelationId, RowExclusiveLock); SysScanDesc scan; HeapTuple tuple; @@ -131,5 +131,5 @@ DeleteManifestCatalog(RelFileNodeId relnode) CatalogTupleDelete(entrance_rel, &tuple->t_self); systable_endscan(scan); - table_close(entrance_rel, AccessExclusiveLock); + table_close(entrance_rel, RowExclusiveLock); } \ No newline at end of file From b8f37ef7f1e9492a91aa6af2f125a4f607e82610 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Tue, 4 Jun 2024 10:50:06 +0800 Subject: [PATCH 114/152] Remove some node dispatch code --- src/backend/cdb/cdbplan.c | 1 - src/test/regress/expected/gpctas_optimizer.out | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/backend/cdb/cdbplan.c b/src/backend/cdb/cdbplan.c index 531c1993760..1979ad38f8b 100644 --- a/src/backend/cdb/cdbplan.c +++ b/src/backend/cdb/cdbplan.c @@ -1088,7 +1088,6 @@ mutate_plan_fields(Plan *newplan, Plan *oldplan, Node *(*mutator) (), void *cont /* Bitmapsets aren't nodes but need to be copied to palloc'd space. */ newplan->extParam = bms_copy(oldplan->extParam); newplan->allParam = bms_copy(oldplan->allParam); - newplan->info_context = copyObject(oldplan->info_context); } diff --git a/src/test/regress/expected/gpctas_optimizer.out b/src/test/regress/expected/gpctas_optimizer.out index 7e9956e985b..47902384140 100644 --- a/src/test/regress/expected/gpctas_optimizer.out +++ b/src/test/regress/expected/gpctas_optimizer.out @@ -381,7 +381,11 @@ begin; create table t2_github_issue_10760 as select * from t1_github_issue_10760 where b > (select count(*) from t1_github_issue_10760) distributed randomly; end; select count (distinct oid) from (select oid from pg_class where relname = 't2_github_issue_10760' union all select oid from gp_dist_random('pg_class') where relname = 't2_github_issue_10760')x; -ERROR: Hash data storage do not support index scan in segment (hashdata_execProcnode.cc:1275) (seg2 slice1 127.0.0.1:5435 pid=1180272) (hashdata_execProcnode.cc:1275) + count +------- + 1 +(1 row) + drop table t1_github_issue_10760; drop table t2_github_issue_10760; reset optimizer; From d2fd22396be0d621b5322b8d87232facd8809954 Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Fri, 28 Jun 2024 16:29:42 +0800 Subject: [PATCH 115/152] Feature: support group aggregation defer ivm maintenance. First, make compatible with immediate ivm maintenance. And each incremental materialized view will create a task. In task, calculate partial results then store into materialized view. GUC hashdata.ivm_batch_size can control apply batch size. When need combine result, use COMBINE INCREMENTAL MATERIALIZED VIEW mv; Now support only aggregation functions below: sum(), count(), avg(), min(), max(). Notice: avg(int8) result type is bytea. DELETE, TRUNCATE will fackback. --- doc/src/sgml/catalogs.sgml | 4 +- src/backend/catalog/Makefile | 4 +- src/backend/catalog/catalog.c | 7 +- src/backend/catalog/dependency.c | 6 + src/backend/catalog/gp_matview_dependency.c | 297 ++++++++++++++++++ src/backend/catalog/heap.c | 1 + src/backend/catalog/index.c | 2 +- src/backend/catalog/pg_task.c | 6 +- src/backend/commands/createas.c | 229 +++++++++++--- src/backend/commands/explain.c | 5 + src/backend/commands/matview.c | 33 +- src/backend/commands/tablecmds.c | 7 +- src/backend/commands/taskcmds.c | 64 ++-- src/backend/executor/nodeAgg.c | 7 + src/backend/executor/tstoreReceiver.c | 7 +- src/backend/nodes/copyfuncs.c | 6 + src/backend/nodes/equalfuncs.c | 5 + src/backend/nodes/outfuncs.c | 5 +- src/backend/nodes/readfuncs.c | 4 + src/backend/optimizer/plan/createplan.c | 11 +- src/backend/parser/gram.y | 67 +++- src/backend/parser/parse_relation.c | 4 +- src/backend/rewrite/rewriteDefine.c | 12 +- src/backend/storage/file/buffile.c | 2 +- src/backend/task/job_metadata.c | 19 +- src/backend/task/pg_cron.c | 20 +- src/backend/utils/cache/lsyscache.c | 38 ++- src/backend/utils/cache/relcache.c | 3 +- src/backend/utils/init/miscinit.c | 25 ++ src/backend/utils/misc/guc_gp.c | 2 +- src/backend/utils/sort/tuplestore.c | 2 +- src/bin/pg_dump/pg_dump.c | 48 +++ src/bin/psql/describe.c | 6 +- src/bin/psql/tab-complete.c | 17 +- src/include/catalog/gp_matview_dependency.h | 51 +++ src/include/catalog/pg_class.h | 7 +- src/include/catalog/pg_task.h | 3 +- src/include/commands/createas.h | 5 +- src/include/commands/matview.h | 10 +- src/include/executor/tstoreReceiver.h | 3 +- src/include/miscadmin.h | 1 + src/include/nodes/nodes.h | 5 +- src/include/nodes/parsenodes.h | 3 + src/include/nodes/plannodes.h | 3 +- src/include/nodes/primnodes.h | 3 + src/include/parser/kwlist.h | 1 + src/include/task/job_metadata.h | 4 +- src/include/utils/guc.h | 1 + src/include/utils/lsyscache.h | 2 + src/include/utils/rel.h | 7 +- src/test/regress/expected/misc_sanity.out | 1 + .../expected/misc_sanity.out | 1 + 52 files changed, 958 insertions(+), 128 deletions(-) create mode 100644 src/backend/catalog/gp_matview_dependency.c create mode 100644 src/include/catalog/gp_matview_dependency.h diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 27b1f16b6a8..5f041649797 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2185,10 +2185,10 @@ SCRAM-SHA-256$<iteration count>:&l - relisivm bool + relisivm char - True if relation is incrementally maintainable materialized view + Incrementally maintainable materialized view enum value diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index c59ca97d834..a110d06ec80 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -53,7 +53,7 @@ OBJS += pg_extprotocol.o \ oid_dispatch.o aocatalog.o storage_tablespace.o storage_database.o \ storage_tablespace_twophase.o storage_tablespace_xact.o \ gp_partition_template.o pg_task.o pg_task_run_history.o \ - gp_matview_aux.o \ + gp_matview_aux.o gp_matview_dependency.o \ pg_directory_table.o storage_directory_table.o main_manifest.o CATALOG_JSON:= $(addprefix $(top_srcdir)/gpMgmt/bin/gppylib/data/, $(addsuffix .json,$(GP_MAJORVERSION))) @@ -80,7 +80,7 @@ CATALOG_HEADERS := \ pg_resqueue.h pg_resqueuecapability.h pg_resourcetype.h \ pg_resgroup.h pg_resgroupcapability.h \ gp_configuration_history.h gp_id.h gp_distribution_policy.h gp_version_at_initdb.h \ - gp_warehouse.h \ + gp_warehouse.h gp_matview_dependency.h \ pg_appendonly.h \ main_manifest.h \ gp_fastsequence.h pg_extprotocol.h \ diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 8cbb1d1afe3..0581f3f423b 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -59,6 +59,7 @@ #include "catalog/gp_configuration_history.h" #include "catalog/gp_id.h" +#include "catalog/gp_matview_dependency.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" #include "catalog/gp_version_at_initdb.h" @@ -589,7 +590,11 @@ IsSharedRelation(Oid relationId) { return true; } - + /* ivm table */ + if (relationId == MatviewDependencyId) + { + return true; + } return false; } diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 023785e599b..f198048111c 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -19,6 +19,7 @@ #include "access/table.h" #include "access/xact.h" #include "catalog/dependency.h" +#include "catalog/gp_matview_dependency.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" #include "catalog/gp_warehouse.h" @@ -1495,6 +1496,11 @@ doDeletion(const ObjectAddress *object, int flags) { bool drop_with_content = (flags & PERFORM_DELETION_WITH_CONTENT) != 0; + if (relKind == RELKIND_MATVIEW) + { + remove_matview_dependency_byoid(object->objectId); + } + if (object->objectSubId != 0) RemoveAttributeById(object->objectId, object->objectSubId); diff --git a/src/backend/catalog/gp_matview_dependency.c b/src/backend/catalog/gp_matview_dependency.c new file mode 100644 index 00000000000..7e01b49496d --- /dev/null +++ b/src/backend/catalog/gp_matview_dependency.c @@ -0,0 +1,297 @@ +/*------------------------------------------------------------------------- + * + * gp_matview_dependency.c + * Routines to support inter-object dependencies. + * + * Portions Copyright (c) 2024, HashData Technology Limited. + * + * + * IDENTIFICATION + * src/backend/catalog/gp_matview_dependency.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/genam.h" +#include "access/table.h" +#include "access/heapam.h" +#include "access/relscan.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/gp_matview_dependency.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/timestamp.h" +#include "utils/snapmgr.h" + +void create_matview_dependency_tuple(Oid matviewOid, Relids relids, bool defer) +{ + Relation gp_matview_dependency; + HeapTuple tup; + Datum values[Natts_gp_matview_dependency]; + bool nulls[Natts_gp_matview_dependency]; + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + Oid *oids = palloc(sizeof(Oid) * bms_num_members(relids)); + + int relid = -1; + int i = 0; + while ((relid = bms_next_member(relids, relid)) >= 0) + { + oids[i++] = relid; + } + oidvector *depend_ids = buildoidvector(oids, bms_num_members(relids)); + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + values[Anum_gp_matview_dependency_matviewid - 1] = ObjectIdGetDatum(matviewOid); + values[Anum_gp_matview_dependency_relids - 1] = PointerGetDatum(depend_ids); + values[Anum_gp_matview_dependency_defer - 1] = BoolGetDatum(defer); + values[Anum_gp_matview_dependency_partial - 1 ] = BoolGetDatum(true); + values[Anum_gp_matview_dependency_trans_version - 1] = UInt64GetDatum(0); + values[Anum_gp_matview_dependency_combine_version - 1] = UInt64GetDatum(0); + values[Anum_gp_matview_dependency_isvaild - 1] = BoolGetDatum(true); + values[Anum_gp_matview_dependency_refresh_time - 1] = 0; + + tup = heap_form_tuple(RelationGetDescr(gp_matview_dependency), values, nulls); + CatalogTupleInsert(gp_matview_dependency, tup); + heap_freetuple(tup); + + table_close(gp_matview_dependency, RowExclusiveLock); + pfree(oids); + + CommandCounterIncrement(); + + return; +} + +Datum get_matview_dependency_relids(Oid matviewOid) +{ + Relation gp_matview_dependency; + Datum result; + + ScanKeyData skey; + SysScanDesc scan; + HeapTuple tuple; + bool isnull; + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_gp_matview_dependency_matviewid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(matviewOid)); + + scan = systable_beginscan(gp_matview_dependency, InvalidOid, false, + NULL, 1, &skey); + + tuple = systable_getnext(scan); + if (!HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + table_close(gp_matview_dependency, RowExclusiveLock); + elog(ERROR, "cache lookup failed for matview %u", matviewOid); + } + + result = heap_getattr(tuple, Anum_gp_matview_dependency_relids, + RelationGetDescr(gp_matview_dependency), &isnull); + + + systable_endscan(scan); + table_close(gp_matview_dependency, RowExclusiveLock); + + return result; +} + +void +mark_matview_dependency_valid(Oid matviewOid, bool isvaild) +{ + Relation gp_matview_dependency; + HeapTuple tup; + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[1]; + Datum values[Natts_gp_matview_dependency]; + bool nulls[Natts_gp_matview_dependency]; + bool doreplace[Natts_gp_matview_dependency]; + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(doreplace, false, sizeof(doreplace)); + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&scanKey[0], Anum_gp_matview_dependency_matviewid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(matviewOid)); + + scanDescriptor = systable_beginscan(gp_matview_dependency, InvalidOid, + false, NULL, 1, scanKey); + + while (HeapTupleIsValid(tup = systable_getnext(scanDescriptor))) + { + values[Anum_gp_matview_dependency_isvaild - 1] = BoolGetDatum(isvaild); + doreplace[Anum_gp_matview_dependency_isvaild - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(gp_matview_dependency), values, nulls, doreplace); + CatalogTupleUpdate(gp_matview_dependency, &tup->t_self, tup); + heap_freetuple(tup); + } + + systable_endscan(scanDescriptor); + table_close(gp_matview_dependency, RowExclusiveLock); +} + +void +remove_matview_dependency_byoid(Oid matviewOid) +{ + Relation gp_matview_dependency; + HeapTuple tup; + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[1]; + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&scanKey[0], Anum_gp_matview_dependency_matviewid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(matviewOid)); + + scanDescriptor = systable_beginscan(gp_matview_dependency, InvalidOid, + true, NULL, 1, scanKey); + + while (HeapTupleIsValid(tup = systable_getnext(scanDescriptor))) + { + CatalogTupleDelete(gp_matview_dependency, &tup->t_self); + } + + systable_endscan(scanDescriptor); + table_close(gp_matview_dependency, RowExclusiveLock); +} + +uint64 +get_restart_trans_version(Oid matviewOid, Snapshot snapshot) +{ + Relation gp_matview_dependency; + Datum result; + + ScanKeyData skey; + SysScanDesc scan; + HeapTuple tuple; + bool isnull; + + if (snapshot == InvalidSnapshot) + snapshot = GetLatestSnapshot(); + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_gp_matview_dependency_matviewid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(matviewOid)); + + scan = systable_beginscan(gp_matview_dependency, InvalidOid, + false, snapshot, 1, &skey); + + tuple = systable_getnext(scan); + if (!HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + table_close(gp_matview_dependency, RowExclusiveLock); + elog(ERROR, "cache lookup failed for matview %u", matviewOid); + } + + result = heap_getattr(tuple, Anum_gp_matview_dependency_trans_version, + RelationGetDescr(gp_matview_dependency), &isnull); + if (isnull) + { + result = UInt64GetDatum(0); + } + + systable_endscan(scan); + table_close(gp_matview_dependency, RowExclusiveLock); + + return DatumGetUInt64(result); +} + +void +record_restart_trans_version(Oid matviewOid, uint64 version, TimestampTz ftime) +{ + Relation gp_matview_dependency; + HeapTuple tup; + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[1]; + Datum values[Natts_gp_matview_dependency]; + bool nulls[Natts_gp_matview_dependency]; + bool doreplace[Natts_gp_matview_dependency]; + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(doreplace, false, sizeof(doreplace)); + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&scanKey[0], Anum_gp_matview_dependency_matviewid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(matviewOid)); + + scanDescriptor = systable_beginscan(gp_matview_dependency, InvalidOid, + false, NULL, 1, scanKey); + + while (HeapTupleIsValid(tup = systable_getnext(scanDescriptor))) + { + values[Anum_gp_matview_dependency_trans_version - 1] = UInt64GetDatum(version); + doreplace[Anum_gp_matview_dependency_trans_version - 1] = true; + + values[Anum_gp_matview_dependency_refresh_time - 1] = TimestampTzGetDatum(ftime); + doreplace[Anum_gp_matview_dependency_refresh_time - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(gp_matview_dependency), values, nulls, doreplace); + CatalogTupleUpdate(gp_matview_dependency, &tup->t_self, tup); + heap_freetuple(tup); + } + + systable_endscan(scanDescriptor); + table_close(gp_matview_dependency, RowExclusiveLock); +} + +void +record_restart_combine_version(Oid matviewOid, uint64 version, TimestampTz ftime) +{ + Relation gp_matview_dependency; + HeapTuple tup; + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[1]; + Datum values[Natts_gp_matview_dependency]; + bool nulls[Natts_gp_matview_dependency]; + bool doreplace[Natts_gp_matview_dependency]; + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(doreplace, false, sizeof(doreplace)); + + gp_matview_dependency = table_open(MatviewDependencyId, RowExclusiveLock); + + ScanKeyInit(&scanKey[0], Anum_gp_matview_dependency_matviewid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(matviewOid)); + + scanDescriptor = systable_beginscan(gp_matview_dependency, InvalidOid, + false, NULL, 1, scanKey); + + while (HeapTupleIsValid(tup = systable_getnext(scanDescriptor))) + { + values[Anum_gp_matview_dependency_combine_version - 1] = UInt64GetDatum(version); + doreplace[Anum_gp_matview_dependency_combine_version - 1] = true; + + values[Anum_gp_matview_dependency_refresh_time - 1] = TimestampTzGetDatum(ftime); + doreplace[Anum_gp_matview_dependency_refresh_time - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(gp_matview_dependency), values, nulls, doreplace); + CatalogTupleUpdate(gp_matview_dependency, &tup->t_self, tup); + heap_freetuple(tup); + } + + systable_endscan(scanDescriptor); + table_close(gp_matview_dependency, RowExclusiveLock); +} diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 1ab93f46b21..1c1c673e52a 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1331,6 +1331,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relmvrefcount - 1] = Int32GetDatum(rd_rel->relmvrefcount); values[Anum_pg_class_relhaspartialagg - 1] = BoolGetDatum(rd_rel->relhaspartialagg); values[Anum_pg_class_relinsertonly - 1] = BoolGetDatum(rd_rel->relinsertonly); + values[Anum_pg_class_relisivm - 1] = CharGetDatum(rd_rel->relisivm); if (relacl != (Datum) 0) values[Anum_pg_class_relacl - 1] = relacl; else diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index cf5fe120697..9409f5427ad 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1072,7 +1072,7 @@ index_create_internal(Relation heapRelation, indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner; indexRelation->rd_rel->relam = accessMethodObjectId; indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid); - indexRelation->rd_rel->relisivm = false; + indexRelation->rd_rel->relisivm = MATVIEW_IVM_NOTHING; indexRelation->rd_rel->relisdynamic = false; indexRelation->rd_rel->relmvrefcount = 0; indexRelation->rd_rel->relhaspartialagg = false; diff --git a/src/backend/catalog/pg_task.c b/src/backend/catalog/pg_task.c index 5327f8025a0..a259270d23f 100644 --- a/src/backend/catalog/pg_task.c +++ b/src/backend/catalog/pg_task.c @@ -47,7 +47,7 @@ Oid TaskCreate(const char *schedule, const char *command, const char *nodename, int32 nodeport, const char *database, const char *username, - bool active, const char *jobname) + bool active, const char *jobname, const char* warehouse) { Relation pg_task; HeapTuple tup; @@ -74,6 +74,10 @@ TaskCreate(const char *schedule, const char *command, values[Anum_pg_task_jobname - 1] = CStringGetTextDatum(jobname); else nulls[Anum_pg_task_jobname - 1] = true; + if (warehouse) + values[Anum_pg_task_warehouse - 1] = CStringGetTextDatum(warehouse); + else + nulls[Anum_pg_task_warehouse - 1] = true; tup = heap_form_tuple(RelationGetDescr(pg_task), values, nulls); CatalogTupleInsert(pg_task, tup); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 66cf2bc55c8..025f02bdcbb 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -30,6 +30,7 @@ #include "access/tableam.h" #include "access/xact.h" #include "access/xlog.h" +#include "catalog/gp_matview_dependency.h" #include "catalog/namespace.h" #include "catalog/index.h" #include "catalog/pg_constraint.h" @@ -89,6 +90,7 @@ intorel_initplan_hook_type intorel_initplan_hook = NULL; typedef struct { bool has_agg; + bool partial; } check_ivm_restriction_context; static void intorel_startup_dummy(DestReceiver *self, int operation, TupleDesc typeinfo); @@ -103,12 +105,12 @@ static void intorel_shutdown(DestReceiver *self); static void intorel_destroy(DestReceiver *self); static void CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, - Relids *relids, bool ex_lock); + Relids *relids, bool ex_lock, bool partial); static void CreateIvmTrigger(Oid relOid, Oid viewOid, int16 type, int16 timing, bool ex_lock); -static void check_ivm_restriction(Node *node); +static void check_ivm_restriction(Node *node, bool partial); static bool check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context); static Bitmapset *get_primary_key_attnos_from_query(Query *query, List **constraintList); -static bool check_aggregate_supports_ivm(Oid aggfnoid); +static bool check_aggregate_supports_ivm(Oid aggfnoid, bool partial); static void create_dynamic_table_auto_refresh_task(ParseState *pstate, Relation DynamicTableRel, char *schedule); @@ -218,8 +220,13 @@ create_ctas_internal(List *attrList, IntoClause *into, QueryDesc *queryDesc, boo if (is_matview) { /* StoreViewQuery scribbles on tree, so make a copy */ - Query *query = (Query *) copyObject(into->viewQuery); - + Query *query = (Query *) copyObject(into->viewQuery); + if (into->ivm) + { + Relation matviewRel = table_open(intoRelationAddr.objectId, NoLock); + SetMatViewIVMState(matviewRel, into->defer ? MATVIEW_IVM_DEFERRED : MATVIEW_IVM_IMMEDIATE); + table_close(matviewRel, NoLock); + } StoreViewQuery(intoRelationAddr.objectId, query, false); CommandCounterIncrement(); } @@ -337,6 +344,9 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, Query *query_immv = NULL; Oid relationOid = InvalidOid; /* relation that is modified */ AutoStatsCmdType cmdType = AUTOSTATS_CMDTYPE_SENTINEL; /* command type */ + bool saveOptimizerGucValue = optimizer; + ListCell *lc; + bool partial = false; Assert(Gp_role != GP_ROLE_EXECUTE); @@ -422,10 +432,28 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, errmsg("mutable function is not supported on incrementally maintainable materialized view"), errhint("functions must be marked IMMUTABLE"))); - check_ivm_restriction((Node *) query); + /* close orca for rewrite agg functions */ + optimizer = false; + foreach(lc , stmt->into->options) + { + DefElem *def = (DefElem *) lfirst(lc); + if (pg_strcasecmp(def->defname, "partial_agg") == 0) + { + partial = defGetBoolean(def); + } + } + if (into->defer && !partial) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("partial aggregation is required for deferred IVM"))); + if (!into->defer && partial) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("partial aggregation is not allowed for immediate IVM"))); + check_ivm_restriction((Node *) query, partial); /* For IMMV, we need to rewrite matview query */ - query = rewriteQueryForIMMV(query, into->colNames); + query = rewriteQueryForIMMV(query, into->colNames, partial); query_immv = copyObject(query); } @@ -557,14 +585,17 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, /* * Mark relisivm field, if it's a matview and into->ivm is true. */ - SetMatViewIVMState(matviewRel, true); + SetMatViewIVMState(matviewRel, into->defer ? + MATVIEW_IVM_DEFERRED : MATVIEW_IVM_IMMEDIATE); - if (!into->skipData) + if (!into->skipData || partial) { Assert(query_immv != NULL); /* Create triggers on incremental maintainable materialized view */ - CreateIvmTriggersOnBaseTables(query_immv, matviewOid); + CreateIvmTriggersOnBaseTables(query_immv, matviewOid, partial, true); } + if (into->defer) + CreateTaskIVM(pstate, matviewRel, into->interval); } /* Set Dynamic Tables. */ @@ -583,6 +614,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, FreeQueryDesc(queryDesc); PopActiveSnapshot(); + optimizer = saveOptimizerGucValue; } @@ -596,7 +628,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, * Also, additional hidden columns are added for aggregate values. */ Query * -rewriteQueryForIMMV(Query *query, List *colNames) +rewriteQueryForIMMV(Query *query, List *colNames, bool partial) { Query *rewritten; @@ -616,7 +648,7 @@ rewriteQueryForIMMV(Query *query, List *colNames) SortGroupClause *scl = (SortGroupClause *) lfirst(lc); TargetEntry *tle = get_sortgroupclause_tle(scl, rewritten->targetList); - if (tle->resjunk) + if (tle->resjunk && !partial) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GROUP BY expression not appearing in select list is not supported on incrementally maintainable materialized view"))); @@ -640,7 +672,25 @@ rewriteQueryForIMMV(Query *query, List *colNames) tle->resname : strVal(list_nth(colNames, tle->resno - 1))); if (IsA(tle->expr, Aggref)) - makeIvmAggColumn(pstate, (Aggref *) tle->expr, resname, &next_resno, &aggs); + { + Aggref *aggref = (Aggref *) tle->expr; + Oid transtype = get_agg_transtype(aggref->aggfnoid); + if (partial) + { + if (transtype == INTERNALOID) + { + aggref->aggtype = BYTEAOID; + } + else + { + aggref->aggtype = transtype; + } + aggref->extrasplit = AGGSPLITOP_REPLACE_FINAL; + aggs = NIL; + } + else + makeIvmAggColumn(pstate, aggref, resname, &next_resno, &aggs); + } } rewritten->targetList = list_concat(rewritten->targetList, aggs); } @@ -1089,7 +1139,7 @@ GetIntoRelOid(QueryDesc *queryDesc) * CreateIvmTriggersOnBaseTables -- create IVM triggers on all base tables */ void -CreateIvmTriggersOnBaseTables(Query *qry, Oid matviewOid) +CreateIvmTriggersOnBaseTables(Query *qry, Oid matviewOid, bool partial, bool create) { Relids relids = NULL; bool ex_lock = false; @@ -1116,14 +1166,15 @@ CreateIvmTriggersOnBaseTables(Query *qry, Oid matviewOid) if (list_length(qry->rtable) > 1 || rte->rtekind != RTE_RELATION) ex_lock = true; - CreateIvmTriggersOnBaseTablesRecurse(qry, (Node *)qry, matviewOid, &relids, ex_lock); - + CreateIvmTriggersOnBaseTablesRecurse(qry, (Node *)qry, matviewOid, &relids, ex_lock, partial); + if (create && partial) + create_matview_dependency_tuple(matviewOid, relids, true); bms_free(relids); } static void CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, - Relids *relids, bool ex_lock) + Relids *relids, bool ex_lock, bool partial) { if (node == NULL) return; @@ -1137,7 +1188,7 @@ CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, { Query *query = (Query *) node; - CreateIvmTriggersOnBaseTablesRecurse(qry, (Node *)query->jointree, matviewOid, relids, ex_lock); + CreateIvmTriggersOnBaseTablesRecurse(qry, (Node *)query->jointree, matviewOid, relids, ex_lock, partial); } break; @@ -1148,14 +1199,17 @@ CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, if (rte->rtekind == RTE_RELATION && !bms_is_member(rte->relid, *relids)) { - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_INSERT, TRIGGER_TYPE_BEFORE, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_DELETE, TRIGGER_TYPE_BEFORE, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_UPDATE, TRIGGER_TYPE_BEFORE, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_TRUNCATE, TRIGGER_TYPE_BEFORE, true); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_INSERT, TRIGGER_TYPE_AFTER, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_DELETE, TRIGGER_TYPE_AFTER, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_UPDATE, TRIGGER_TYPE_AFTER, ex_lock); - CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_TRUNCATE, TRIGGER_TYPE_AFTER, true); + if (!partial) + { + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_INSERT, TRIGGER_TYPE_BEFORE, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_DELETE, TRIGGER_TYPE_BEFORE, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_UPDATE, TRIGGER_TYPE_BEFORE, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_TRUNCATE, TRIGGER_TYPE_BEFORE, true); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_INSERT, TRIGGER_TYPE_AFTER, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_DELETE, TRIGGER_TYPE_AFTER, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_UPDATE, TRIGGER_TYPE_AFTER, ex_lock); + CreateIvmTrigger(rte->relid, matviewOid, TRIGGER_TYPE_TRUNCATE, TRIGGER_TYPE_AFTER, true); + } *relids = bms_add_member(*relids, rte->relid); } @@ -1168,7 +1222,7 @@ CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, ListCell *l; foreach(l, f->fromlist) - CreateIvmTriggersOnBaseTablesRecurse(qry, lfirst(l), matviewOid, relids, ex_lock); + CreateIvmTriggersOnBaseTablesRecurse(qry, lfirst(l), matviewOid, relids, ex_lock, partial); } break; @@ -1176,8 +1230,8 @@ CreateIvmTriggersOnBaseTablesRecurse(Query *qry, Node *node, Oid matviewOid, { JoinExpr *j = (JoinExpr *) node; - CreateIvmTriggersOnBaseTablesRecurse(qry, j->larg, matviewOid, relids, ex_lock); - CreateIvmTriggersOnBaseTablesRecurse(qry, j->rarg, matviewOid, relids, ex_lock); + CreateIvmTriggersOnBaseTablesRecurse(qry, j->larg, matviewOid, relids, ex_lock, partial); + CreateIvmTriggersOnBaseTablesRecurse(qry, j->rarg, matviewOid, relids, ex_lock, partial); } break; @@ -1306,9 +1360,9 @@ CreateIvmTrigger(Oid relOid, Oid viewOid, int16 type, int16 timing, bool ex_lock * check_ivm_restriction --- look for specify nodes in the query tree */ static void -check_ivm_restriction(Node *node) +check_ivm_restriction(Node *node, bool partial) { - check_ivm_restriction_context context = {false}; + check_ivm_restriction_context context = {false, partial}; check_ivm_restriction_walker(node, &context); } @@ -1343,6 +1397,10 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CTE is not supported on incrementally maintainable materialized view"))); + if ((qry->groupClause == NIL || !qry->hasAggs) && context->partial) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg(" Without aggregate and GROUP BY are not supported on incrementally maintainable materialized view"))); if (qry->groupClause != NIL && !qry->hasAggs) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -1421,7 +1479,7 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("partitioned table is not supported on incrementally maintainable materialized view"))); - if (rte->relkind == RELKIND_RELATION && has_superclass(rte->relid)) + if (rte->relkind == RELKIND_RELATION && has_superclass(rte->relid) && !context->partial) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("partitions is not supported on incrementally maintainable materialized view"))); @@ -1481,10 +1539,10 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) { JoinExpr *joinexpr = (JoinExpr *)node; - if (joinexpr->jointype > JOIN_INNER) + if (joinexpr->jointype > JOIN_INNER || context->partial) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("OUTER JOIN is not supported on incrementally maintainable materialized view"))); + errmsg("The JOIN is not supported on incrementally maintainable materialized view"))); expression_tree_walker(node, check_ivm_restriction_walker, (void *) context); break; @@ -1510,7 +1568,7 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("aggregate function with ORDER clause is not supported on incrementally maintainable materialized view"))); - if (!check_aggregate_supports_ivm(aggref->aggfnoid)) + if (!check_aggregate_supports_ivm(aggref->aggfnoid, context->partial)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("aggregate function %s is not supported on incrementally maintainable materialized view", aggname))); @@ -1529,7 +1587,7 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) * Check if the given aggregate function is supporting IVM */ static bool -check_aggregate_supports_ivm(Oid aggfnoid) +check_aggregate_supports_ivm(Oid aggfnoid, bool partial) { switch (aggfnoid) { @@ -1558,6 +1616,53 @@ check_aggregate_supports_ivm(Oid aggfnoid) return true; + /* min */ + case F_MIN_ANYARRAY: + case F_MIN_INT8: + case F_MIN_INT4: + case F_MIN_INT2: + case F_MIN_OID: + case F_MIN_FLOAT4: + case F_MIN_FLOAT8: + case F_MIN_DATE: + case F_MIN_TIME: + case F_MIN_TIMETZ: + case F_MIN_MONEY: + case F_MIN_TIMESTAMP: + case F_MIN_TIMESTAMPTZ: + case F_MIN_INTERVAL: + case F_MIN_TEXT: + case F_MIN_NUMERIC: + case F_MIN_BPCHAR: + case F_MIN_TID: + case F_MIN_ANYENUM: + case F_MIN_INET: + case F_MIN_PG_LSN: + + /* max */ + case F_MAX_ANYARRAY: + case F_MAX_INT8: + case F_MAX_INT4: + case F_MAX_INT2: + case F_MAX_OID: + case F_MAX_FLOAT4: + case F_MAX_FLOAT8: + case F_MAX_DATE: + case F_MAX_TIME: + case F_MAX_TIMETZ: + case F_MAX_MONEY: + case F_MAX_TIMESTAMP: + case F_MAX_TIMESTAMPTZ: + case F_MAX_INTERVAL: + case F_MAX_TEXT: + case F_MAX_NUMERIC: + case F_MAX_BPCHAR: + case F_MAX_TID: + case F_MAX_ANYENUM: + case F_MAX_INET: + case F_MAX_PG_LSN: + return partial ? true : false; + default: return false; } @@ -1855,7 +1960,7 @@ get_primary_key_attnos_from_query(Query *query, List **constraintList) return keys; } -/* +/* * Create auto-refresh task for Dynamic Tables. */ static void @@ -1873,12 +1978,14 @@ create_dynamic_table_auto_refresh_task(ParseState *pstate, Relation DynamicTable CreateTaskStmt *task_stmt = makeNode(CreateTaskStmt); initStringInfo(&buf); - appendStringInfo(&buf, "gp_dynamic_table_refresh_%u", RelationGetRelid(DynamicTableRel)); + appendStringInfo(&buf, "gp_dynamic_table_refresh_%u", + RelationGetRelid(DynamicTableRel)); task_stmt->taskname = pstrdup(buf.data); task_stmt->schedule = pstrdup(schedule); task_stmt->if_not_exists = false; /* report error if failed. */ - dtname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(DynamicTableRel)), - RelationGetRelationName(DynamicTableRel)); + dtname = quote_qualified_identifier( + get_namespace_name(RelationGetNamespace(DynamicTableRel)), + RelationGetRelationName(DynamicTableRel)); resetStringInfo(&buf); appendStringInfo(&buf, "REFRESH DYNAMIC TABLE %s", dtname); task_stmt->sql = pstrdup(buf.data); @@ -1892,3 +1999,45 @@ create_dynamic_table_auto_refresh_task(ParseState *pstate, Relation DynamicTable refaddr.objectSubId = 0; recordDependencyOn(&address, &refaddr, DEPENDENCY_INTERNAL); } + +ObjectAddress +CreateTaskIVM(ParseState *pstate, Relation matviewRel, char* interval) +{ + ObjectAddress refaddr; + ObjectAddress address; + StringInfoData namebuf; + char *schedule = interval; + char *matviewname = NULL; + CreateTaskStmt *stmt = makeNode(CreateTaskStmt); + Oid warehouseid = GetCurrentWarehouseId(); + if (!OidIsValid(warehouseid)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("current session does not have a valid warehouse"))); + } + char *warehosue = GpGetWarehouseName(warehouseid, false); + + if (schedule == NULL) + schedule = "30 seconds"; + + initStringInfo(&namebuf); + appendStringInfo(&namebuf, "ivm_task_%u", RelationGetRelid(matviewRel)); + stmt->taskname = pstrdup(namebuf.data); + stmt->schedule = pstrdup(schedule); + + matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), + RelationGetRelationName(matviewRel)); + resetStringInfo(&namebuf); + appendStringInfo(&namebuf, "REFRESH INCREMENTAL MATERIALIZED VIEW CONCURRENTLY %s", matviewname); + stmt->sql = pstrdup(namebuf.data); + stmt->options = list_make1(makeDefElem("warehouse", (Node *) makeString(warehosue), -1)); + stmt->if_not_exists = true; + + address = DefineTask(pstate, stmt); + refaddr.classId = RelationRelationId; + refaddr.objectId = RelationGetRelid(matviewRel); + refaddr.objectSubId = 0; + recordDependencyOn(&address, &refaddr, DEPENDENCY_AUTO); + return address; +} diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 09c0f18445d..07e46c6b7a1 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -552,7 +552,12 @@ ExplainOneQuery(Query *query, int cursorOptions, * to correctly set the into-clause and into-policy of the PlannedStmt. */ if (into != NULL) + { plan->intoClause = copyObject(into); + if (into->ivm && es->analyze) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("IVM is not supported with EXPLAIN ANALYZE"))); + } /* calc differences of buffer counters. */ if (es->buffers) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 15fbf7f5c66..cc76371d67f 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -141,6 +141,7 @@ typedef enum #define OLD_DELTA_ENRNAME "old_delta" static int matview_maintenance_depth = 0; +static int saved_matview_maintenance_depth = 0; static RefreshClause* MakeRefreshClause(bool concurrent, bool skipData, RangeVar *relation); static IntoClause* makeIvmIntoClause(const char *enrname, Relation matviewRel); @@ -157,7 +158,6 @@ static uint64 refresh_matview_memoryfill(DestReceiver *dest,Query *query, static char *make_temptable_name_n(char *tempname, int n); static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, int save_sec_context); -static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence); static bool is_usable_unique_index(Relation indexRel); static void OpenMatViewIncrementalMaintenance(void); static void CloseMatViewIncrementalMaintenance(void); @@ -320,7 +320,7 @@ SetDynamicTableState(Relation relation) * NOTE: caller must be holding an appropriate lock on the relation. */ void -SetMatViewIVMState(Relation relation, bool newstate) +SetMatViewIVMState(Relation relation, char newstate) { Relation pgrel; HeapTuple tuple; @@ -340,6 +340,7 @@ SetMatViewIVMState(Relation relation, bool newstate) RelationGetRelid(relation)); ((Form_pg_class) GETSTRUCT(tuple))->relisivm = newstate; + ((Form_pg_class) GETSTRUCT(tuple))->relhaspartialagg = RelationGetPartialAgg(relation); CatalogTupleUpdate(pgrel, &tuple->t_self, tuple); @@ -447,7 +448,7 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, /* For IMMV, we need to rewrite matview query */ if (!stmt->skipData && RelationIsIVM(matviewRel)) - dataQuery = rewriteQueryForIMMV(viewQuery,NIL); + dataQuery = rewriteQueryForIMMV(viewQuery, NIL, RelationGetPartialAgg(matviewRel)); else dataQuery = viewQuery; @@ -711,7 +712,7 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, if (!stmt->skipData && RelationIsIVM(matviewRel) && !oldPopulated) { - CreateIvmTriggersOnBaseTables(dataQuery, matviewOid); + CreateIvmTriggersOnBaseTables(dataQuery, matviewOid, RelationGetPartialAgg(matviewRel), false); } table_close(matviewRel, NoLock); @@ -1438,7 +1439,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, * the target's indexes and throw away the transient table. Security context * swapping is handled by the called function, so it is not needed here. */ -static void +void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence) { finish_heap_swap(matviewOid, OIDNewHeap, @@ -1517,19 +1518,29 @@ MatViewIncrementalMaintenanceIsEnabled(void) return matview_maintenance_depth > 0; } -static void +void OpenMatViewIncrementalMaintenance(void) { matview_maintenance_depth++; } -static void +void CloseMatViewIncrementalMaintenance(void) { matview_maintenance_depth--; Assert(matview_maintenance_depth >= 0); } +void SaveMatViewMaintenanceDepth(void) +{ + saved_matview_maintenance_depth = matview_maintenance_depth; +} + +void RestoreMatViewMaintenanceDepth(void) +{ + matview_maintenance_depth = saved_matview_maintenance_depth; +} + /* * get_matview_query - get the Query from a matview's _RETURN rule. */ @@ -3839,7 +3850,7 @@ ExecuteTruncateGuts_IVM(Relation matviewRel, Oid OIDNewHeap; DestReceiver *dest; uint64 processed = 0; - Query *dataQuery = rewriteQueryForIMMV(query, NIL); + Query *dataQuery = rewriteQueryForIMMV(query, NIL, false); char relpersistence = matviewRel->rd_rel->relpersistence; RefreshClause *refreshClause; /* @@ -3928,7 +3939,8 @@ makeIvmIntoClause(const char *enrname, Relation matviewRel) { IntoClause *intoClause = makeNode(IntoClause); intoClause->ivm = true; - /* rel is NULL means putting tuples into memory.*/ + intoClause->defer = false; + /* rel is NULL means put tuples into memory.*/ intoClause->rel = NULL; intoClause->enrname = (char*) enrname; intoClause->distributedBy = (Node*) make_distributedby_for_rel(matviewRel); @@ -3964,7 +3976,8 @@ transientenr_init(QueryDesc *queryDesc) entry->resowner, entry->context, true, - into->enrname); + into->enrname, + into->defer); } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index dee0e387f79..24da81f7ea5 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -18045,6 +18045,7 @@ build_ctas_with_dist(Relation rel, DistributedBy *dist_clause, into->options = storage_opts; into->tableSpaceName = get_tablespace_name(tblspc); into->distributedBy = (Node *)dist_clause; + if (RelationIsAppendOptimized(rel)) { /* @@ -18058,6 +18059,9 @@ build_ctas_with_dist(Relation rel, DistributedBy *dist_clause, */ into->options = build_ao_rel_storage_opts(into->options, rel); } + + into->ivm = false; + into->defer = false; s->intoClause = into; RawStmt *rawstmt = makeNode(RawStmt); @@ -23360,7 +23364,8 @@ ATExecSetRelOptionsCheck(Relation rel, DefElem *def) pg_strncasecmp(SOPT_BLOCKSIZE, def->defname, kw_len) == 0 || pg_strncasecmp(SOPT_COMPTYPE, def->defname, kw_len) == 0 || pg_strncasecmp(SOPT_COMPLEVEL, def->defname, kw_len) == 0 || - pg_strncasecmp(SOPT_CHECKSUM, def->defname, kw_len) == 0) + pg_strncasecmp(SOPT_CHECKSUM, def->defname, kw_len) == 0 || + pg_strncasecmp(SOPT_PARTIAL_AGG, def->defname, kw_len) == 0) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot SET reloption \"%s\"", diff --git a/src/backend/commands/taskcmds.c b/src/backend/commands/taskcmds.c index 1c2b775da97..0e4ac2e22fe 100644 --- a/src/backend/commands/taskcmds.c +++ b/src/backend/commands/taskcmds.c @@ -47,14 +47,16 @@ ObjectAddress DefineTask(ParseState *pstate, CreateTaskStmt * stmt) { - ObjectAddress address; - char *dbname = NULL; - char *username = NULL; - ListCell *option; - DefElem *d_dbname = NULL; - DefElem *d_username = NULL; - Oid jobid = InvalidOid; - AclResult aclresult; + ObjectAddress address; + char *dbname = NULL; + char *username = NULL; + char *warehosue = NULL; + ListCell *option; + DefElem *d_dbname = NULL; + DefElem *d_username = NULL; + DefElem *d_warehouse = NULL; + Oid jobid = InvalidOid; + AclResult aclresult; /* must have CREATE privilege on database */ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); @@ -73,18 +75,27 @@ DefineTask(ParseState *pstate, CreateTaskStmt * stmt) (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"), parser_errposition(pstate, defel->location))); - d_dbname = defel; - } - else if (strcmp(defel->defname, "username") == 0) - { - if (d_username) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting or redundant options"), - parser_errposition(pstate, defel->location))); - d_username = defel; - } - else + d_dbname = defel; + } + else if (strcmp(defel->defname, "username") == 0) + { + if (d_username) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + d_username = defel; + } + else if (strcmp(defel->defname, "warehouse") == 0) + { + if (d_warehouse) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + d_warehouse = defel; + } + else elog(ERROR, "option \"%s\" not recognized", defel->defname); } @@ -107,10 +118,13 @@ DefineTask(ParseState *pstate, CreateTaskStmt * stmt) else dbname = get_database_name(MyDatabaseId); - if (!OidIsValid(get_database_oid(dbname, true))) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_DATABASE), - errmsg("database \"%s\" does not exist", dbname))); + if (d_warehouse != NULL && d_warehouse->arg) + warehosue = defGetString(d_warehouse); + + if (!OidIsValid(get_database_oid(dbname, true))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", dbname))); /* check if the task already exists */ if (stmt->if_not_exists) @@ -135,7 +149,7 @@ DefineTask(ParseState *pstate, CreateTaskStmt * stmt) } jobid = ScheduleCronJob(cstring_to_text(stmt->schedule), cstring_to_text(stmt->sql), cstring_to_text(dbname), cstring_to_text(username), - true, cstring_to_text(stmt->taskname)); + true, cstring_to_text(stmt->taskname), warehosue); /* Depend on owner. */ recordDependencyOnOwner(TaskRelationId, jobid, get_role_oid(username, false)); diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 881d09e5375..b6a4532ad35 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -3969,6 +3969,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) else peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn; + /* Check extrasplit for replace final aggregate function */ + if (DO_AGGSPLIT_REPLACE_FINAL(aggref->extrasplit)) + { + AssertImply(aggtranstype == INTERNALOID, aggform->aggserialfn); + peragg->finalfn_oid = finalfn_oid = aggform->aggserialfn; + } + serialfn_oid = InvalidOid; deserialfn_oid = InvalidOid; diff --git a/src/backend/executor/tstoreReceiver.c b/src/backend/executor/tstoreReceiver.c index 766b32152c6..acb991ca40f 100644 --- a/src/backend/executor/tstoreReceiver.c +++ b/src/backend/executor/tstoreReceiver.c @@ -301,6 +301,7 @@ typedef struct PersistentTupleStore const char *filename; /* filename for storing tuples */ bool detoast; /* were we told to detoast? */ bool initfile; /* is this the first time to init file? */ + bool defer; /* defer to refersh */ Datum *outvalues; /* values array for result tuple */ Datum *tofree; /* temp values to be pfree'd */ } PersistentTupleStore; @@ -454,7 +455,7 @@ persistentTstoreShutdownReceiver(DestReceiver *self) tuplestore_freeze(myState->tstore); /* Set file to temporary to release file as soon as possible. */ - tuplestore_set_flags(myState->tstore, true); + tuplestore_set_flags(myState->tstore, !myState->defer); } /* Release workspace if any */ if (myState->outvalues) @@ -491,7 +492,8 @@ SetPersistentTstoreDestReceiverParams(DestReceiver *self, ResourceOwner owner, MemoryContext tContext, bool detoast, - const char *filename) + const char *filename, + bool defer) { PersistentTupleStore *myState = (PersistentTupleStore *) self; @@ -502,4 +504,5 @@ SetPersistentTstoreDestReceiverParams(DestReceiver *self, myState->detoast = detoast; myState->filename = filename; myState->initfile = false; + myState->defer = defer; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 1cbcb1efab5..9acb54ec45c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -568,6 +568,7 @@ CopyScanFields(const Scan *from, Scan *newnode) COPY_SCALAR_FIELD(scanrelid); COPY_SCALAR_FIELD(scanflags); #ifdef SERVERLESS + COPY_NODE_FIELD(version); COPY_SCALAR_FIELD(basemv); #endif } @@ -1910,6 +1911,7 @@ _copyIntoClause(const IntoClause *from) COPY_STRING_FIELD(tableSpaceName); COPY_NODE_FIELD(viewQuery); COPY_SCALAR_FIELD(skipData); + COPY_SCALAR_FIELD(defer); COPY_NODE_FIELD(distributedBy); COPY_SCALAR_FIELD(ivm); COPY_SCALAR_FIELD(matviewOid); @@ -2062,6 +2064,7 @@ _copyAggref(const Aggref *from) COPY_SCALAR_FIELD(aggtransno); COPY_LOCATION_FIELD(location); COPY_SCALAR_FIELD(agg_expr_id); + COPY_SCALAR_FIELD(extrasplit); return newnode; } @@ -3120,6 +3123,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(self_reference); COPY_SCALAR_FIELD(forceDistRandom); + COPY_NODE_FIELD(version); return newnode; } @@ -4916,6 +4920,8 @@ _copyRefreshMatViewStmt(const RefreshMatViewStmt *from) COPY_SCALAR_FIELD(skipData); COPY_NODE_FIELD(relation); COPY_SCALAR_FIELD(isdynamic); + COPY_SCALAR_FIELD(incremental); + COPY_SCALAR_FIELD(combine); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 1347aff5a0e..55de252ccd5 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -171,6 +171,7 @@ _equalIntoClause(const IntoClause *a, const IntoClause *b) COMPARE_STRING_FIELD(tableSpaceName); COMPARE_NODE_FIELD(viewQuery); COMPARE_SCALAR_FIELD(skipData); + COMPARE_SCALAR_FIELD(defer); COMPARE_NODE_FIELD(distributedBy); COMPARE_SCALAR_FIELD(ivm); COMPARE_SCALAR_FIELD(matviewOid); @@ -263,6 +264,7 @@ _equalAggref(const Aggref *a, const Aggref *b) COMPARE_SCALAR_FIELD(aggno); COMPARE_SCALAR_FIELD(aggtransno); COMPARE_LOCATION_FIELD(location); + COMPARE_SCALAR_FIELD(extrasplit); return true; } @@ -1969,6 +1971,8 @@ _equalRefreshMatViewStmt(const RefreshMatViewStmt *a, const RefreshMatViewStmt * COMPARE_SCALAR_FIELD(skipData); COMPARE_NODE_FIELD(relation); COMPARE_SCALAR_FIELD(isdynamic); + COMPARE_SCALAR_FIELD(incremental); + COMPARE_SCALAR_FIELD(combine); return true; } @@ -3180,6 +3184,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_BITMAPSET_FIELD(updatedCols); COMPARE_BITMAPSET_FIELD(extraUpdatedCols); COMPARE_NODE_FIELD(securityQuals); + COMPARE_NODE_FIELD(version); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c95a9a17a24..70df7414eb6 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -428,8 +428,8 @@ _outScanInfo(StringInfo str, const Scan *node) WRITE_UINT_FIELD(scanrelid); WRITE_UINT_FIELD(scanflags); - #ifdef SERVERLESS + WRITE_NODE_FIELD(version); WRITE_OID_FIELD(basemv); #endif } @@ -1303,6 +1303,7 @@ _outIntoClause(StringInfo str, const IntoClause *node) WRITE_STRING_FIELD(tableSpaceName); WRITE_NODE_FIELD(viewQuery); WRITE_BOOL_FIELD(skipData); + WRITE_BOOL_FIELD(defer); WRITE_NODE_FIELD(distributedBy); WRITE_BOOL_FIELD(ivm); WRITE_OID_FIELD(matviewOid); @@ -1387,6 +1388,7 @@ _outAggref(StringInfo str, const Aggref *node) WRITE_INT_FIELD(aggtransno); WRITE_LOCATION_FIELD(location); WRITE_INT_FIELD(agg_expr_id); + WRITE_INT_FIELD(extrasplit); } static void @@ -3681,6 +3683,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) WRITE_NODE_FIELD(securityQuals); WRITE_BOOL_FIELD(forceDistRandom); + WRITE_NODE_FIELD(version); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index cd50582b3e9..163c44a953d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -565,6 +565,7 @@ _readIntoClause(void) READ_STRING_FIELD(tableSpaceName); READ_NODE_FIELD(viewQuery); READ_BOOL_FIELD(skipData); + READ_BOOL_FIELD(defer); READ_NODE_FIELD(distributedBy); READ_BOOL_FIELD(ivm); READ_OID_FIELD(matviewOid); @@ -680,6 +681,7 @@ _readAggref(void) READ_INT_FIELD(aggtransno); READ_LOCATION_FIELD(location); READ_INT_FIELD(agg_expr_id); + READ_INT_FIELD(extrasplit); READ_DONE(); } @@ -1544,6 +1546,7 @@ _readRangeTblEntry(void) READ_NODE_FIELD(securityQuals); READ_BOOL_FIELD(forceDistRandom); + READ_NODE_FIELD(version); READ_DONE(); } @@ -1903,6 +1906,7 @@ ReadCommonScan(Scan *local_node) READ_UINT_FIELD(scanrelid); READ_UINT_FIELD(scanflags); #ifdef SERVERLESS + READ_NODE_FIELD(version); READ_OID_FIELD(basemv); #endif } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 98d7243bc62..1edacccae4b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -664,7 +664,9 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) List *gating_clauses; List *tlist; Plan *plan; - +#ifdef SERVERLESS + RangeTblEntry *rte; +#endif /* * Extract the relevant restriction clauses from the parent relation. The * executor must apply all these restrictions during the scan, except for @@ -898,6 +900,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) DirectDispatchUpdateContentIdsFromPlan(root, plan); plan->locustype = best_path->locus.locustype; +#ifdef SERVERLESS + if (best_path->pathtype == T_SeqScan) + { + rte = planner_rt_fetch(rel->relid, root); + ((Scan*)plan)->version = rte->version; + } +#endif /* * If there are any pseudoconstant clauses attached to this node, insert a * gating Result node that evaluates the pseudoconstants as one-time diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 885062f960a..554b73acc02 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -150,6 +150,13 @@ typedef struct GroupClause List *list; } GroupClause; +/* Private struct for the result of OptRefreshOption production */ +typedef struct RefreshOption +{ + bool deferred; + char *interval; +} RefreshOption; + /* ConstraintAttributeSpec yields an integer bitmask of these flags: */ #define CAS_NOT_DEFERRABLE 0x01 #define CAS_DEFERRABLE 0x02 @@ -276,6 +283,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ DistributionKeyElem *dkelem; SetQuantifier setquantifier; struct GroupClause *groupclause; + struct RefreshOption *refresh_option; } %type stmt toplevel_stmt schema_stmt routine_body_stmt @@ -488,6 +496,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %type opt_routine_body %type group_clause +%type OptRefreshOption %type group_by_list %type group_by_item empty_grouping_set rollup_clause cube_clause %type grouping_sets_clause @@ -760,7 +769,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE - CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT + CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMBINE COMMENT COMMENTS COMMIT COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT CONCURRENCY CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE @@ -7405,7 +7414,7 @@ CreateMatViewStmt: ; create_mv_target: - qualified_name opt_column_list table_access_method_clause opt_reloptions OptTableSpace + qualified_name opt_column_list table_access_method_clause opt_reloptions OptTableSpace OptRefreshOption { $$ = makeNode(IntoClause); $$->rel = $1; @@ -7419,6 +7428,11 @@ create_mv_target: $$->ivm = false; $$->dynamicTbl = false; $$->schedule = NULL; + if ($6) + { + $$->defer = $6->deferred; + $$->interval = $6->interval; + } $$->accessMethod = greenplumLegacyAOoptions($$->accessMethod, &$$->options); } @@ -7432,6 +7446,34 @@ OptNoLog: UNLOGGED { $$ = RELPERSISTENCE_UNLOGGED; } | /*EMPTY*/ { $$ = RELPERSISTENCE_PERMANENT; } ; +OptRefreshOption: + REFRESH IMMEDIATE + { + RefreshOption *n = (RefreshOption *) palloc(sizeof(RefreshOption)); + n->deferred = false; + n->interval = NULL; + $$ = n; + } + | REFRESH DEFERRED + { + RefreshOption *n = (RefreshOption *) palloc(sizeof(RefreshOption)); + n->deferred = true; + n->interval = NULL; + $$ = n; + } + | REFRESH DEFERRED SCHEDULE Sconst + { + RefreshOption *n = (RefreshOption *) palloc(sizeof(RefreshOption)); + n->deferred = true; + n->interval = $4; + $$ = n; + } + | /*EMPTY*/ + { + $$ = NULL; + } + ; + /***************************************************************************** * * QUERY : @@ -7441,12 +7483,23 @@ OptNoLog: UNLOGGED { $$ = RELPERSISTENCE_UNLOGGED; } *****************************************************************************/ RefreshMatViewStmt: - REFRESH MATERIALIZED VIEW opt_concurrently qualified_name opt_with_data + COMBINE INCREMENTAL MATERIALIZED VIEW qualified_name + { + RefreshMatViewStmt *n = makeNode(RefreshMatViewStmt); + n->incremental = true; + n->concurrent = false; + n->relation = $5; + n->skipData = false; + n->combine = true; + $$ = (Node *) n; + } + | REFRESH incremental MATERIALIZED VIEW opt_concurrently qualified_name opt_with_data { RefreshMatViewStmt *n = makeNode(RefreshMatViewStmt); - n->concurrent = $4; - n->relation = $5; - n->skipData = !($6); + n->incremental = $2; + n->concurrent = $5; + n->relation = $6; + n->skipData = !($7); n->isdynamic = false; $$ = (Node *) n; } @@ -19779,6 +19832,7 @@ unreserved_keyword: | CLOSE | CLUSTER | COLUMNS + | COMBINE | COMMENT | COMMENTS | COMMIT @@ -20726,6 +20780,7 @@ bare_label_keyword: | COLLATION | COLUMN | COLUMNS + | COMBINE | COMMENT | COMMENTS | COMMIT diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 9e82bd85c75..ae78c7d746e 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1513,7 +1513,7 @@ addRangeTableEntry(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; - rte->relisivm = rel->rd_rel->relisivm; + rte->relisivm = rel->rd_rel->relisivm != MATVIEW_IVM_NOTHING ? true : false; /* * Build the list of effective column names using user-supplied aliases @@ -1603,7 +1603,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; - rte->relisivm = rel->rd_rel->relisivm; + rte->relisivm = rel->rd_rel->relisivm != MATVIEW_IVM_NOTHING ? true : false; /* * Build the list of effective column names using user-supplied aliases diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 19973c195da..aaf385c8426 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -391,11 +391,21 @@ DefineQueryRewrite(const char *rulename, * ... the targetlist of the SELECT action must exactly match the * event relation, ... */ - checkRuleResultList(query->targetList, + if (RelationGetPartialAgg(event_relation)) + { + /* + * For partial materialized views, we don't check the targetlist + * because it might contain special columns for IVM. + */ + } + else + { + checkRuleResultList(query->targetList, RelationGetDescr(event_relation), true, event_relation->rd_rel->relkind != RELKIND_MATVIEW); + } /* * ... there must not be another ON SELECT rule already ... diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index ceee010f41b..1fdaf7871ed 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -1620,5 +1620,5 @@ BufFileSetIsTempFile(BufFile *file, bool isTempFile) /* close and delete the underlying file(s) */ for (i = 0; i < file->numFiles; i++) - FileSetTempfile(file->files[i], true); + FileSetTempfile(file->files[i], isTempFile); } diff --git a/src/backend/task/job_metadata.c b/src/backend/task/job_metadata.c index e603d5fce52..78325e9bc08 100644 --- a/src/backend/task/job_metadata.c +++ b/src/backend/task/job_metadata.c @@ -146,7 +146,8 @@ GetCronJob(int64 jobId) */ int64 ScheduleCronJob(text *scheduleText, text *commandText, text *databaseText, - text *usernameText, bool active, text *jobnameText) + text *usernameText, bool active, text *jobnameText, + const char* warehouse) { entry *parsedSchedule = NULL; char *schedule; @@ -215,7 +216,7 @@ ScheduleCronJob(text *scheduleText, text *commandText, text *databaseText, /* insert task into pg_catalog.pg_task table */ jobId = TaskCreate(schedule, command, task_host_addr, PostPortNumber, - database_name, username, active, jobName); + database_name, username, active, jobName, warehouse); SetUserIdAndSecContext(savedUserId, savedSecurityContext); @@ -566,6 +567,20 @@ TupleToCronJob(TupleDesc tupleDescriptor, HeapTuple heapTuple) job->jobName = NULL; } } + if (tupleDescriptor->natts >= Anum_pg_task_warehouse) + { + bool isWarehouseNull = false; + Datum warehouse = heap_getattr(heapTuple, Anum_pg_task_warehouse, + tupleDescriptor, &isWarehouseNull); + if (!isWarehouseNull) + { + job->warehouse = TextDatumGetCString(warehouse); + } + else + { + job->warehouse = NULL; + } + } parsedSchedule = ParseSchedule(job->scheduleText); if (parsedSchedule != NULL) diff --git a/src/backend/task/pg_cron.c b/src/backend/task/pg_cron.c index ac5cfbe82bc..a5757d09082 100644 --- a/src/backend/task/pg_cron.c +++ b/src/backend/task/pg_cron.c @@ -141,7 +141,7 @@ bool task_log_statement = true; bool task_log_run = true; bool task_use_background_worker = false; char *task_timezone = "GMT"; -int max_running_tasks = 5; +int max_running_tasks = 50; char *task_host_addr = "127.0.0.1"; static pg_tz *task_timezone_tz = NULL; @@ -418,6 +418,8 @@ PgCronLauncherMain(Datum arg) WaitForCronTasks(taskList); ManageCronTasks(taskList, currentTime); + increment_command_count(); + MemoryContextReset(CronLoopContext); } @@ -1033,10 +1035,14 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) } task->pendingRunCount -= 1; +#ifndef SERVERLESS if (task_use_background_worker) task->state = CRON_TASK_BGW_START; else task->state = CRON_TASK_START; +#else + task->state = CRON_TASK_START; +#endif task->lastStartTime = currentTime; @@ -1060,6 +1066,8 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) { const char *clientEncoding = GetDatabaseEncodingName(); char nodePortString[12]; + StringInfoData options_buf; + initStringInfo(&options_buf); TimestampTz startDeadline = 0; const char *keywordArray[] = { @@ -1069,6 +1077,7 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) "client_encoding", "dbname", "user", + "options", NULL }; const char *valueArray[] = { @@ -1078,9 +1087,15 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) clientEncoding, cronJob->database, cronJob->userName, + options_buf.data, NULL }; sprintf(nodePortString, "%d", cronJob->nodePort); + if (cronJob->warehouse) + appendStringInfo(&options_buf, "-c optimizer=off" + " -c enable_answer_query_using_materialized_views=off" + " -c gp_command_count=%d -c warehouse=%s", + gp_command_count, cronJob->warehouse); Assert(sizeof(keywordArray) == sizeof(valueArray)); @@ -1338,7 +1353,6 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) case CRON_TASK_SENDING: { - char *command = cronJob->command; int sendResult = 0; Assert(!task_use_background_worker); @@ -1367,7 +1381,7 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) break; } - sendResult = PQsendQuery(connection, command); + sendResult = PQsendQuery(connection, cronJob->command); if (sendResult == 1) { /* wait for socket to be ready to receive results */ diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index c541736fde4..242b4854e70 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1993,6 +1993,22 @@ get_agg_transtype(Oid aggid) return result; } + +Oid +get_agg_transfn(Oid aggid) +{ + HeapTuple tp; + Oid result; + + tp = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for aggregate %u", aggid); + + result = ((Form_pg_aggregate) GETSTRUCT(tp))->aggtransfn; + ReleaseSysCache(tp); + return result; +} + /* * is_ordered_agg * Given aggregate id, check if it is an ordered aggregate @@ -2084,7 +2100,7 @@ is_agg_partial_capable(Oid aggid) * * Returns the relisivm flag associated with a given relation. */ -bool +char get_rel_relisivm(Oid relid) { HeapTuple tp; @@ -2093,14 +2109,30 @@ get_rel_relisivm(Oid relid) if (HeapTupleIsValid(tp)) { Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); - bool result; + char result; result = reltup->relisivm; ReleaseSysCache(tp); return result; } else - return false; + return '\0'; +} + + +bool +get_rel_haspartialagg(Oid relid) +{ + HeapTuple tp; + bool result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + result = ((Form_pg_class) GETSTRUCT(tp))->relhaspartialagg; + ReleaseSysCache(tp); + return result; } /* diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2c75e31c0d2..baee72685a5 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2049,7 +2049,7 @@ formrdesc(const char *relationName, Oid relationReltype, /* ... and they're always populated, too */ relation->rd_rel->relispopulated = true; /* ... and they're always no ivm, too */ - relation->rd_rel->relisivm = false; + relation->rd_rel->relisivm = MATVIEW_IVM_NOTHING; /* ... and they're always not dynamic, too */ relation->rd_rel->relisdynamic = false; relation->rd_rel->relmvrefcount = 0; @@ -3863,6 +3863,7 @@ RelationBuildLocalRelation(const char *relname, else rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; + rel->rd_rel->relisivm = MATVIEW_IVM_NOTHING; /* * Insert relation physical and logical identifiers (OIDs) into the right * places. For a mapped relation, we set relfilenode to zero and rely on diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index a8e7ff0f057..44dd832cc3c 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -30,6 +30,7 @@ #include "access/htup_details.h" #include "catalog/pg_authid.h" +#include "catalog/gp_warehouse.h" #include "common/file_perm.h" #include "libpq/libpq.h" #include "libpq/pqsignal.h" @@ -986,6 +987,30 @@ SetCurrentWarehouseId(Oid warehouseid) CurrentWarehouseId = warehouseid; } +char * +GpGetWarehouseName(Oid warehouse_oid, bool missing_ok) +{ + HeapTuple tuple; + char *result; + + tuple = SearchSysCache1(GPWAREHOUSEOID, ObjectIdGetDatum(warehouse_oid)); + if (HeapTupleIsValid(tuple)) + { + result = text_to_cstring(&((Form_gp_warehouse) GETSTRUCT(tuple))->warehouse_name); + ReleaseSysCache(tuple); + } + else + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("warehouse with oid %u does not exist", warehouse_oid))); + result = NULL; + } + + return result; +} + /*------------------------------------------------------------------------- * Interlock-file support * diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 83481b14961..c29b87350e3 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3850,7 +3850,7 @@ struct config_int ConfigureNamesInt_gp[] = #endif { - {"gp_command_count", PGC_INTERNAL, CLIENT_CONN_OTHER, + {"gp_command_count", PGC_BACKEND, CLIENT_CONN_OTHER, gettext_noop("Shows the number of commands received from the client in this session."), NULL, GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index c67899211ca..92d70492070 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -1991,5 +1991,5 @@ void tuplestore_set_flags(Tuplestorestate *state, bool isTemp) { /* Set the file as a temporary file */ - BufFileSetIsTempFile(state->myfile, true); + BufFileSetIsTempFile(state->myfile, isTemp); } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a81ca91b3cb..cc5f9c0bb41 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -394,6 +394,7 @@ static bool isGPDB(Archive *fout); static bool isGPDB5000OrLater(Archive *fout); static bool isGPDB6000OrLater(Archive *fout); static void error_unsupported_server_version(Archive *fout) pg_attribute_noreturn(); +static PQExpBuffer createViewRefreshClause(Archive *fout, const TableInfo *tbinfo); /* END MPP ADDITION */ @@ -18452,6 +18453,13 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) if (tbinfo->relkind == RELKIND_MATVIEW) { PQExpBuffer result; + if (tbinfo->isivm == MATVIEW_IVM_DEFERRED) + { + appendPQExpBuffer(q, " REFERSH DEFERRED"); + result = createViewRefreshClause(fout, tbinfo); + appendPQExpBuffer(q, " SCHEDULE '%s'", result->data); + resetPQExpBuffer(result); + } result = createViewAsClause(fout, tbinfo); appendPQExpBuffer(q, " AS\n%s\n WITH NO DATA", @@ -21802,4 +21810,44 @@ nextToken(register char **stringp, register const char *delim) /* NOTREACHED */ } + +static PQExpBuffer +createViewRefreshClause(Archive *fout, const TableInfo *tbinfo) +{ + PQExpBuffer query = createPQExpBuffer(); + PQExpBuffer result = createPQExpBuffer(); + PGresult *res; + int len; + + /* Fetch the task definition */ + appendPQExpBuffer(query, + "SELECT schedule from pg_catalog.pg_task WHERE jobname = 'ivm_task_%u';", + tbinfo->dobj.catId.oid); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + if (PQntuples(res) != 1) + { + if (PQntuples(res) < 1) + fatal("query to obtain definition of view \"%s\" returned no data", + tbinfo->dobj.name); + else + fatal("query to obtain definition of view \"%s\" returned more than one definition", + tbinfo->dobj.name); + } + + len = PQgetlength(res, 0, 0); + + if (len == 0) + fatal("definition of view \"%s\" appears to be empty (length zero)", + tbinfo->dobj.name); + + appendBinaryPQExpBuffer(result, PQgetvalue(res, 0, 0), len - 1); + + PQclear(res); + destroyPQExpBuffer(query); + + return result; +} + /* END MPP ADDITION */ diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 76251610a03..9307d8ff83c 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2157,6 +2157,7 @@ describeOneTableDetails(const char *schemaname, (char *) NULL : pg_strdup(PQgetvalue(res, 0, 14)); else tableinfo.relam = NULL; + tableinfo.isivm = strcmp(PQgetvalue(res, 0, 15), "t") == 0; tableinfo.isdynamic = strcmp(PQgetvalue(res, 0, 16), "t") == 0; @@ -4154,9 +4155,10 @@ describeOneTableDetails(const char *schemaname, } /* Incremental view maintance info */ - if (verbose && tableinfo.relkind == RELKIND_MATVIEW && tableinfo.isivm) + if (verbose && tableinfo.relkind == RELKIND_MATVIEW && tableinfo.isivm != MATVIEW_IVM_NOTHING) { - printTableAddFooter(&cont, _("Incremental view maintenance: yes")); + printfPQExpBuffer(&buf, _("Incremental view maintenance: %c"), tableinfo.isivm); + printTableAddFooter(&cont, buf.data); } } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index f934e34e12c..0090b08db4e 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3198,13 +3198,14 @@ psql_completion(const char *text, int start, int end) else if (Matches("CREATE", "MATERIALIZED") || Matches("CREATE", "INCREMENTAL", "MATERIALIZED")) COMPLETE_WITH("VIEW"); - /* Complete CREATE MATERIALIZED VIEW with AS */ - else if (Matches("CREATE", "MATERIALIZED", "VIEW", MatchAny) || - Matches("CREATE", "INCREMENTAL", "MATERIALIZED", "VIEW", MatchAny)) - COMPLETE_WITH("AS"); - /* Complete "CREATE MATERIALIZED VIEW AS with "SELECT" */ - else if (Matches("CREATE", "MATERIALIZED", "VIEW", MatchAny, "AS") || - Matches("CREATE", "INCREMENTAL", "MATERIALIZED", "VIEW", MatchAny, "AS")) + /* Complete CREATE [INCREMENTAL] MATERIALIZED VIEW [IF NOT EXISTS] with AS or REFRESH */ + else if (Matches("CREATE", "INCREMENTAL", "MATERIALIZED", "VIEW", MatchAny)) + COMPLETE_WITH("AS", "REFRESH"); + /* Complete CREATE [INCREMENTAL] MATERIALIZED VIEW [IF NOT EXISTS] REFRESH with IMMEDIATE or DEFERRED */ + else if (Matches("CREATE", "INCREMENTAL", "MATERIALIZED", "VIEW", MatchAny, "REFRESH")) + COMPLETE_WITH("IMMEDIATE", "DEFERRED SCHEDULE"); + /* Complete "CREATE [INCREMENTAL] MATERIALIZED VIEW [IF NOT EXISTS] AS with "SELECT" */ + else if (Matches("CREATE", "INCREMENTAL", "MATERIALIZED", "VIEW", MatchAny, "AS")) COMPLETE_WITH("SELECT"); /* CREATE DYNAMIC TABLE */ @@ -3856,6 +3857,8 @@ psql_completion(const char *text, int start, int end) /* REFRESH MATERIALIZED VIEW */ else if (Matches("REFRESH")) COMPLETE_WITH("MATERIALIZED VIEW", "DYNAMIC TABLE"); + else if (Matches("REFRESH", "INCREMENTAL")) + COMPLETE_WITH("MATERIALIZED VIEW"); else if (Matches("REFRESH", "MATERIALIZED")) COMPLETE_WITH("VIEW"); else if (Matches("REFRESH", "MATERIALIZED", "VIEW")) diff --git a/src/include/catalog/gp_matview_dependency.h b/src/include/catalog/gp_matview_dependency.h new file mode 100644 index 00000000000..21cece47b13 --- /dev/null +++ b/src/include/catalog/gp_matview_dependency.h @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------- + * + * gp_matview_dependency.h + * + * Portions Copyright (c) 2024, HashData Technology Limited. + * + * gp_matview_dependency.h + * Definition about catalog of matviews dependency + * + * IDENTIFICATION + * src/include/catalog/gp_matview_dependency.h + * + *------------------------------------------------------------------------- + */ +#ifndef GP_MATVIEW_DEPENDENCY_H +#define GP_MATVIEW_DEPENDENCY_H + +#include "catalog/genbki.h" +#include "catalog/gp_matview_dependency_d.h" + +#include "nodes/pathnodes.h" + +#define timestamptz Datum + +CATALOG(gp_matview_dependency,8757,MatviewDependencyId) BKI_SHARED_RELATION +{ + Oid matviewid BKI_FORCE_NOT_NULL; + int8 trans_version; + bool defer; + bool partial; + bool isvaild; + timestamptz refresh_time; + int8 combine_version; + oidvector relids; +} FormData_gp_matview_dependency; + + +typedef FormData_gp_matview_dependency *Form_gp_matview_dependency; + + +extern void create_matview_dependency_tuple(Oid matviewOid, Relids relids, bool defer); +extern Datum get_matview_dependency_relids(Oid matviewOid); +extern void mark_matview_dependency_valid(Oid matviewOid, bool isvaild); +extern void remove_matview_dependency_byoid(Oid matviewOid); +extern void record_restart_trans_version(Oid matviewOid, uint64 version, TimestampTz ftime); +extern void record_restart_combine_version(Oid matviewOid, uint64 version, TimestampTz ftime); +extern uint64 get_restart_trans_version(Oid matviewOid, Snapshot snapshot); + +#undef timestamptz + +#endif /* GP_MATVIEW_DEPENDENCY_H */ diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index b600fe377e9..5810ae4d47e 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -119,8 +119,8 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* is relation a partition? */ bool relispartition BKI_DEFAULT(f); - /* is relation a matview with ivm? */ - bool relisivm BKI_DEFAULT(f); + /* is relation a matview with ivm or defer ivm ? */ + char relisivm BKI_DEFAULT(n); /* is relation a dynamic table? */ bool relisdynamic BKI_DEFAULT(f); @@ -229,6 +229,9 @@ DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, on pg_class using btree(r */ #define REPLICA_IDENTITY_INDEX 'i' +#define MATVIEW_IVM_NOTHING 'n' /* not ivm */ +#define MATVIEW_IVM_DEFERRED 'd' /* defer ivm */ +#define MATVIEW_IVM_IMMEDIATE 'i' /* immediate ivm */ /* * Relation kinds that have physical storage. These relations normally have * relfilenode set to non-zero, but it can also be zero if the relation is diff --git a/src/include/catalog/pg_task.h b/src/include/catalog/pg_task.h index d586d79ca0d..2fe18353614 100644 --- a/src/include/catalog/pg_task.h +++ b/src/include/catalog/pg_task.h @@ -47,6 +47,7 @@ CATALOG(pg_task,9637,TaskRelationId) BKI_SHARED_RELATION text username; bool active BKI_DEFAULT(t); text jobname; + text warehouse; } FormData_pg_task; typedef FormData_pg_task *Form_pg_task; @@ -59,7 +60,7 @@ DECLARE_UNIQUE_INDEX_PKEY(pg_task_jobid_index, 8916, on pg_task using btree(jobi extern Oid TaskCreate(const char *schedule, const char *command, const char *nodename, int32 nodeport, const char *database, const char *username, - bool active, const char *jobname); + bool active, const char *jobname, const char* warehouse); extern void TaskUpdate(Oid jobid, const char *schedule, const char *command, const char *database, diff --git a/src/include/commands/createas.h b/src/include/commands/createas.h index ff71b7be104..563bd4ae42b 100644 --- a/src/include/commands/createas.h +++ b/src/include/commands/createas.h @@ -43,10 +43,10 @@ extern ObjectAddress ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *st ParamListInfo params, QueryEnvironment *queryEnv, QueryCompletion *qc); -extern void CreateIvmTriggersOnBaseTables(Query *qry, Oid matviewOid); +extern void CreateIvmTriggersOnBaseTables(Query *qry, Oid matviewOid, bool partial, bool create); extern void CreateIndexOnIMMV(Query *query, Relation matviewRel); -extern Query *rewriteQueryForIMMV(Query *query, List *colNames); +extern Query *rewriteQueryForIMMV(Query *query, List *colNames, bool partial); extern void makeIvmAggColumn(ParseState *pstate, Aggref *aggref, char *resname, AttrNumber *next_resno, List **aggs); extern int GetIntoRelEFlags(IntoClause *intoClause); @@ -60,5 +60,6 @@ extern void intorel_initplan(struct QueryDesc *queryDesc, int eflags); extern bool CreateTableAsRelExists(CreateTableAsStmt *ctas); +extern ObjectAddress CreateTaskIVM(ParseState *pstate, Relation rel, char* interval); #endif /* CREATEAS_H */ diff --git a/src/include/commands/matview.h b/src/include/commands/matview.h index c8643aa6430..76d9cd27221 100644 --- a/src/include/commands/matview.h +++ b/src/include/commands/matview.h @@ -21,6 +21,7 @@ #include "nodes/parsenodes.h" #include "tcop/dest.h" #include "utils/relcache.h" +#include "parser/parse_node.h" typedef struct { @@ -44,10 +45,11 @@ extern PGDLLIMPORT transientrel_init_hook_type transientrel_init_hook; extern void SetMatViewPopulatedState(Relation relation, bool newstate); -extern void SetMatViewIVMState(Relation relation, bool newstate); +extern void SetMatViewIVMState(Relation relation, char newstate); extern void SetDynamicTableState(Relation relation); +extern void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence); extern ObjectAddress ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, ParamListInfo params, QueryCompletion *qc); @@ -55,6 +57,12 @@ extern DestReceiver *CreateTransientRelDestReceiver(Oid oid, Oid oldreloid, bool char relpersistence, bool skipdata); extern bool MatViewIncrementalMaintenanceIsEnabled(void); +extern void OpenMatViewIncrementalMaintenance(void); +extern void CloseMatViewIncrementalMaintenance(void); +extern void SaveMatViewMaintenanceDepth(void); +extern void RestoreMatViewMaintenanceDepth(void); + +extern Query *get_matview_query(Relation matviewRel); extern void transientrel_init_internal(QueryDesc *queryDesc); diff --git a/src/include/executor/tstoreReceiver.h b/src/include/executor/tstoreReceiver.h index 3827400fb9d..b68f3403743 100644 --- a/src/include/executor/tstoreReceiver.h +++ b/src/include/executor/tstoreReceiver.h @@ -32,6 +32,7 @@ extern void SetPersistentTstoreDestReceiverParams(DestReceiver *self, ResourceOwner owner, MemoryContext ctx, bool detoast, - const char *filename); + const char *filename, + bool defer); #endif /* TSTORE_RECEIVER_H */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index c51493bf292..6fad304d403 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -466,6 +466,7 @@ extern Oid GetCurrentRoleId(void); extern void SetCurrentRoleId(Oid roleid, bool is_superuser); extern Oid GetCurrentWarehouseId(void); extern void SetCurrentWarehouseId(Oid warehouseid); +extern char *GpGetWarehouseName(Oid warehouse_oid, bool missing_ok); /* in utils/misc/superuser.c */ extern bool superuser(void); /* current user is superuser */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 1df98bc4e0a..60be05b4253 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -981,6 +981,8 @@ typedef enum AggStrategy #define AGGSPLITOP_DESERIALIZE 0x08 /* apply deserialfn to input */ #define AGGSPLITOP_DEDUPLICATED 0x100 +#define AGGSPLITOP_REPLACE_FINAL 0x200 +#define AGGSPLITOP_REPLACE_TRANS 0x400 /* Supported operating modes (i.e., useful combinations of these options): */ typedef enum AggSplit @@ -1009,7 +1011,8 @@ typedef enum AggSplit #define DO_AGGSPLIT_DESERIALIZE(as) (((as) & AGGSPLITOP_DESERIALIZE) != 0) #define DO_AGGSPLIT_DEDUPLICATED(as) (((as) & AGGSPLITOP_DEDUPLICATED) != 0) - +#define DO_AGGSPLIT_REPLACE_FINAL(as) (((as) & AGGSPLITOP_REPLACE_FINAL) != 0) +#define DO_AGGSPLIT_REPLACE_TRANS(as) (((as) & AGGSPLITOP_REPLACE_TRANS) != 0) /* * SetOpCmd and SetOpStrategy - * overall semantics and execution strategies for SetOp plan nodes diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 9d8b12ca4a6..da06364d0f7 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1253,6 +1253,7 @@ typedef struct RangeTblEntry Bitmapset *updatedCols; /* columns needing UPDATE permission */ Bitmapset *extraUpdatedCols; /* generated columns being updated */ List *securityQuals; /* security barrier quals to apply, if any */ + List *version; /* delta scan version */ } RangeTblEntry; /* @@ -4163,6 +4164,8 @@ typedef struct RefreshMatViewStmt bool skipData; /* true for WITH NO DATA */ RangeVar *relation; /* relation to insert into */ bool isdynamic; /* relation is dynamic table? */ + bool incremental; /* true for incremental refresh */ + bool combine; /* combine current results for defer ivm */ } RefreshMatViewStmt; /* ---------------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 098c20f8fe3..91c0cda8e45 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -560,8 +560,9 @@ typedef struct Scan { Plan plan; Index scanrelid; /* relid is index into the range table */ - uint32 scanflags; /* extra scan flags */ + uint32 scanflags; /* extra scan flags */ #ifdef SERVERLESS + List* version; /* delta scan version */ /* * Base materialized view oid for delta scan. * If valid, it means a Delta SeqScan based on diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index eadb082fb39..95e4d5bc2c5 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -121,8 +121,10 @@ typedef struct IntoClause char *tableSpaceName; /* table space to use, or NULL */ Node *viewQuery; /* materialized view's SELECT query */ bool skipData; /* true for WITH NO DATA */ + bool defer; /* true for DEFERRED REFRESH */ Node *distributedBy; /* GPDB: columns to distribubte the data on. */ bool ivm; /* true for WITH IVM */ + char *interval; /* interval for IVM REFRESH */ Oid matviewOid; /* matview oid */ char *enrname; /* ENR name for materialized view delta */ bool dynamicTbl; /* true for Dynamic Tables. */ @@ -374,6 +376,7 @@ typedef struct Aggref int location; /* token location, or -1 if unknown */ int agg_expr_id; /* gpdb private, if aggref is distinct qualified, record AggExprId value in tuple split */ + int extrasplit; /* gpdb private, extra agg-splitting mode of parent Agg */ } Aggref; typedef struct diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 0d442a578f9..238c0a91313 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -90,6 +90,7 @@ PG_KEYWORD("collate", COLLATE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("collation", COLLATION, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("column", COLUMN, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("columns", COLUMNS, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("combine", COMBINE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("comment", COMMENT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("comments", COMMENTS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("commit", COMMIT, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/task/job_metadata.h b/src/include/task/job_metadata.h index a96a116bee5..293600deaac 100644 --- a/src/include/task/job_metadata.h +++ b/src/include/task/job_metadata.h @@ -37,6 +37,7 @@ typedef struct CronJob char *userName; bool active; char *jobName; + char *warehouse; } CronJob; extern bool CronJobCacheValid; @@ -56,7 +57,8 @@ extern char *GetCronStatus(CronStatus cronstatus); extern int64 ScheduleCronJob(text *scheduleText, text *commandText, text *databaseText, text *usernameText, - bool active, text *jobnameText); + bool active, text *jobnameText, + const char* warehouse); extern Oid UnscheduleCronJob(const char *jobname, const char *username, Oid jobid, bool missing_ok); diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 9fff39eddd9..1d565822e87 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -667,6 +667,7 @@ extern IndexCheckType gp_indexcheck_insert; #define SOPT_COMPTYPE "compresstype" #define SOPT_COMPLEVEL "compresslevel" #define SOPT_CHECKSUM "checksum" +#define SOPT_PARTIAL_AGG "partial_agg" /* * Functions exported by guc.c diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index a6111a9cc4f..3ec39ce6dd4 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -165,6 +165,7 @@ extern char get_func_prokind(Oid funcid); extern bool get_func_leakproof(Oid funcid); extern char func_exec_location(Oid funcid); extern Oid get_agg_transtype(Oid aggid); +extern Oid get_agg_transfn(Oid aggid); extern bool is_agg_ordered(Oid aggid); extern bool is_agg_repsafe(Oid aggid); extern bool is_agg_partial_capable(Oid aggid); @@ -178,6 +179,7 @@ extern bool get_rel_relispartition(Oid relid); extern bool get_rel_relisivm(Oid relid); extern bool get_rel_relisdynamic(Oid relid); extern int32 get_rel_relmvrefcount(Oid relid); +extern bool get_rel_haspartialagg(Oid relid); extern Oid get_rel_tablespace(Oid relid); extern char get_rel_persistence(Oid relid); extern Oid get_transform_fromsql(Oid typid, Oid langid, List *trftypes); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index fe950163655..752556cbd1b 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -349,6 +349,7 @@ typedef struct StdRdOptions int compresslevel; /* compression level (AO rels only) */ char compresstype[NAMEDATALEN]; /* compression type (AO rels only) */ bool checksum; /* checksum (AO rels only) */ + bool partial_agg; /* partial aggregation result for ivm */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 @@ -404,6 +405,10 @@ typedef struct StdRdOptions ((relation)->rd_options ? \ ((StdRdOptions *) (relation)->rd_options)->parallel_workers : (defaultpw)) +#define RelationGetPartialAgg(relation) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->partial_agg : false) + /* ViewOptions->check_option values */ typedef enum ViewOptCheckOption { @@ -787,7 +792,7 @@ typedef struct ViewOptions */ #define RelationIsPopulated(relation) ((relation)->rd_rel->relispopulated) -#define RelationIsIVM(relation) ((relation)->rd_rel->relisivm) +#define RelationIsIVM(relation) ((relation)->rd_rel->relisivm != MATVIEW_IVM_NOTHING) #define RelationHasRelativeMV(relation) (((relation)->rd_rel->relmvrefcount) > 0) diff --git a/src/test/regress/expected/misc_sanity.out b/src/test/regress/expected/misc_sanity.out index 5d237c0ab69..f11337e709d 100644 --- a/src/test/regress/expected/misc_sanity.out +++ b/src/test/regress/expected/misc_sanity.out @@ -158,6 +158,7 @@ ORDER BY 1; gp_fastsequence gp_id gp_matview_aux + gp_matview_dependency gp_matview_tables gp_partition_template gp_version_at_initdb diff --git a/src/test/singlenode_regress/expected/misc_sanity.out b/src/test/singlenode_regress/expected/misc_sanity.out index 88ae3d4e927..6f63e7ffd2c 100644 --- a/src/test/singlenode_regress/expected/misc_sanity.out +++ b/src/test/singlenode_regress/expected/misc_sanity.out @@ -156,6 +156,7 @@ ORDER BY 1; gp_fastsequence gp_id gp_matview_aux + gp_matview_dependency gp_matview_tables gp_partition_template gp_version_at_initdb From 956fd4610fd28fd5c5bb7fafcc6ca4d1869a08f5 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Wed, 23 Apr 2025 15:21:01 +0800 Subject: [PATCH 116/152] Fix some compile error --- src/backend/catalog/dependency.c | 3 --- src/backend/tcop/postgres.c | 2 +- src/include/postgres.h | 4 ++-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index f198048111c..3fd38c362e7 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -3059,9 +3059,6 @@ getObjectClass(const ObjectAddress *object) case ManifestRelationId: return OCLASS_MAIN_MANIFEST; - case ManifestRelationId: - return OCLASS_MAIN_MANIFEST; - case GpWarehouseRelationId: return OCLASS_WAREHOUSE; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index d6ce8ce4373..936030dc307 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -163,7 +163,7 @@ HandleTxnCommand_hook_type HandleTxnCommand_hook = NULL; /* * Hook for plugins to process query */ -exec_simple_query_hook execSimpleQuery_Hook = NULL; +execSimpleQuery_Hook_type execSimpleQuery_Hook = NULL; /* ---------------- * private typedefs etc diff --git a/src/include/postgres.h b/src/include/postgres.h index 9c69693bb1b..c2614dc6cf3 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -605,6 +605,6 @@ extern void ExceptionalCondition(const char *conditionName, const char *errorType, const char *fileName, int lineNumber) pg_attribute_noreturn(); -typedef void (*exec_simple_query_hook) (void (*exec)(const char *), void *whereToSendOutput); -extern PGDLLIMPORT exec_simple_query_hook execSimpleQuery_Hook; +typedef void (*execSimpleQuery_Hook_type) (void (*exec)(const char *), void *whereToSendOutput); +extern PGDLLIMPORT execSimpleQuery_Hook_type execSimpleQuery_Hook; #endif /* POSTGRES_H */ From 8845bde4070aa4d497c22bc46511882c4e7f34e0 Mon Sep 17 00:00:00 2001 From: leo Date: Mon, 17 Jun 2024 13:43:37 +0800 Subject: [PATCH 117/152] Fix: use IS_QUERY_DISPATCHER() to distinguish backends while accessing metadata Backends on QD(including GP_ROLE_EXECUTE) use metadata from local cache or meta service, Backends on QE use metadata dispatched from QD. We use IS_QUERY_DISPATCHER() instead of Gp_role to distinguish those backends. --- src/backend/tcop/postgres.c | 9 ++++++--- src/backend/utils/cache/relcache.c | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 936030dc307..efde785c017 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5865,11 +5865,14 @@ PostgresMain(int argc, char *argv[], if (cuid > 0) SetUserIdAndContext(cuid, false); /* Set current userid */ - SystemTupleStoreReset(); + if (!IS_QUERY_DISPATCHER()) + { + SystemTupleStoreReset(); #ifdef SERVERLESS - InvalidateSystemCaches(); + InvalidateSystemCaches(); #endif /* SERVERLESS */ - SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); + SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); + } if (serializedPlantreelen==0) { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index baee72685a5..6e3f2fd9ad4 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2536,7 +2536,7 @@ RelationReloadNailed(Relation relation) { Assert(relation->rd_isnailed); - if (Gp_role == GP_ROLE_EXECUTE) + if (IsPostmasterEnvironment && !IS_QUERY_DISPATCHER()) return; /* From ff6717e970f6777b1f3e447834cb6af1ed5ed375 Mon Sep 17 00:00:00 2001 From: wangweinan Date: Tue, 18 Jun 2024 14:04:26 +0800 Subject: [PATCH 118/152] Serverless QE never hold relation lock Since all catalog shift to master, and without GDD, most of operation locks level are bumped up to Exclusive Lock in QD. we do not need hold lock in QE anymore. --- src/backend/access/common/relation.c | 5 +++++ src/backend/storage/lmgr/lmgr.c | 5 +++++ src/backend/storage/lmgr/lock.c | 15 +++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/src/backend/access/common/relation.c b/src/backend/access/common/relation.c index 3b6ac578a1c..a2c532c97ec 100644 --- a/src/backend/access/common/relation.c +++ b/src/backend/access/common/relation.c @@ -23,6 +23,7 @@ #include "access/relation.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "cdb/cdbvars.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/lmgr.h" @@ -50,6 +51,10 @@ relation_open(Oid relationId, LOCKMODE lockmode) Relation r; Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES); +#ifdef SERVERLESS + if (IsNormalProcessingMode() && !IS_QUERY_DISPATCHER()) + lockmode = NoLock; +#endif /* SERVERLESS */ /* Get the lock before trying to open the relcache entry */ if (lockmode != NoLock) diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index e993c24d87a..7c2ad049c8f 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -341,6 +341,11 @@ CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode, bool orstronger) { LOCKTAG tag; +#ifdef SERVERLESS + if (IsNormalProcessingMode() && !IS_QUERY_DISPATCHER()) + return true; +#endif /* SERVERLESS */ + SET_LOCKTAG_RELATION(tag, relation->rd_lockInfo.lockRelId.dbId, relation->rd_lockInfo.lockRelId.relId); diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 90ea0a666e0..96d840cc22e 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -2270,11 +2270,26 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) * let the caller print its own error message, too. Do not ereport(ERROR). */ if (!locallock || locallock->nLocks <= 0) +#ifdef SERVERLESS + { + if (IsNormalProcessingMode() && !IS_QUERY_DISPATCHER()) + { + return true; + } + else + { + elog(WARNING, "you don't own a lock of type %s", + lockMethodTable->lockModeNames[lockmode]); + return false; + } + } +#else /* SERVERLESS */ { elog(WARNING, "you don't own a lock of type %s", lockMethodTable->lockModeNames[lockmode]); return false; } +#endif /* SERVERLESS */ /* * Decrease the count for the resource owner. From 9ea99204dd5b32c4fe500817fa9f8e56412c345a Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 13 Jun 2024 15:08:15 +0800 Subject: [PATCH 119/152] Revert "Add cache invaladation synchronization amoung QD and QEs." This reverts commit c5f30a5d4272d8e9b870725248f557049416c861. --- src/backend/access/transam/xact.c | 3 - src/backend/cdb/dispatcher/cdbdisp_extra.c | 4 +- src/backend/cdb/dispatcher/cdbdisp_query.c | 18 ++--- src/backend/storage/ipc/sinval.c | 15 ---- src/backend/storage/ipc/sinvaladt.c | 5 -- src/backend/tcop/postgres.c | 6 ++ src/backend/utils/cache/catcache.c | 12 +++ src/backend/utils/cache/inval.c | 21 ----- src/backend/utils/cache/relcache.c | 8 ++ src/backend/utils/init/postinit.c | 1 - src/include/cdb/cdbdisp_extra.h | 4 +- src/include/storage/sinvaladt.h | 91 ---------------------- src/include/utils/catcache.h | 12 +++ src/include/utils/inval.h | 37 --------- src/include/utils/relcache.h | 6 ++ 15 files changed, 56 insertions(+), 187 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index e31206b45e5..9bd0db6f25f 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2790,9 +2790,6 @@ CommitTransaction(void) if (pending_relation_deletes_hook) pending_relation_deletes_hook(); - if (cache_invalidation_async_hook) - cache_invalidation_async_hook(cache_async_messages); - is_parallel_worker = (s->blockState == TBLOCK_PARALLEL_INPROGRESS); /* Enforce parallel mode restrictions during parallel worker commit. */ diff --git a/src/backend/cdb/dispatcher/cdbdisp_extra.c b/src/backend/cdb/dispatcher/cdbdisp_extra.c index 147531d6557..8a032222a29 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_extra.c +++ b/src/backend/cdb/dispatcher/cdbdisp_extra.c @@ -54,7 +54,7 @@ RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc u * message. */ char * -PackExtraMsgs(int *len, bool need_snapshot) +PackExtraMsgs(int *len) { HASH_SEQ_STATUS status; ExtraDispEntry *hentry; @@ -86,7 +86,7 @@ PackExtraMsgs(int *len, bool need_snapshot) hash_seq_init(&status, ExtraDispTable); while ((hentry = (ExtraDispEntry *) hash_seq_search(&status)) != NULL) { - payloads[i] = (*(hentry->packFunc))(lengths + i, need_snapshot); + payloads[i] = (*(hentry->packFunc))(lengths + i); names[i] = hentry->extraDispName; totalLen += sizeof(int) + strlen(names[i]) + 1 + *(lengths + i); i++; diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 72d201438f0..3c95a4484a3 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -123,7 +123,7 @@ static int fillSliceVector(SliceTable *sliceTable, int len); static char *buildGpQueryString(DispatchCommandQueryParms *pQueryParms, - int *finalLen, int flags); + int *finalLen); static DispatchCommandQueryParms *cdbdisp_buildPlanQueryParms(struct QueryDesc *queryDesc, bool planRequiresTxn); static DispatchCommandQueryParms *cdbdisp_buildUtilityQueryParms(struct Node *stmt, int flags, List *oid_assignments); @@ -348,7 +348,7 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) ds = cdbdisp_makeDispatcherState(false); - queryText = buildGpQueryString(pQueryParms, &queryTextLength, DF_WITH_SNAPSHOT); + queryText = buildGpQueryString(pQueryParms, &queryTextLength); primaryGang = AllocateGang(ds, GANGTYPE_PRIMARY_WRITER, cdbcomponent_getCdbComponentsList()); if (gp_print_create_gang_time) @@ -539,7 +539,7 @@ cdbdisp_dispatchCommandInternal(DispatchCommandQueryParms *pQueryParms, if (system_relation_modified) ds->destroyIdleReaderGang = true; - queryText = buildGpQueryString(pQueryParms, &queryTextLength, flags); + queryText = buildGpQueryString(pQueryParms, &queryTextLength); /* * Allocate a primary QE for every available segDB in the system. @@ -919,7 +919,7 @@ fillSliceVector(SliceTable *sliceTbl, int rootIdx, */ static char * buildGpQueryString(DispatchCommandQueryParms *pQueryParms, - int *finalLen, int flags) + int *finalLen) { const char *command = pQueryParms->strCommand; int command_len; @@ -946,9 +946,8 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, char *shared_query, *pos; char *extraMsgs; - int extraLen = 0; + int extraLen; MemoryContext oldContext; - bool need_snapshot; /* * Must allocate query text within DispatcherContext, @@ -999,8 +998,7 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, sizeof(tempToastNamespaceId) + 0; - need_snapshot = flags & DF_WITH_SNAPSHOT; - extraMsgs = PackExtraMsgs(&extraLen, need_snapshot); + extraMsgs = PackExtraMsgs(&extraLen); total_query_len += extraLen; shared_query = palloc(total_query_len); @@ -1197,7 +1195,7 @@ cdbdisp_dispatchX(QueryDesc* queryDesc, sliceTbl->ic_instance_id = ++gp_interconnect_id; pQueryParms = cdbdisp_buildPlanQueryParms(queryDesc, planRequiresTxn); - queryText = buildGpQueryString(pQueryParms, &queryTextLength, DF_WITH_SNAPSHOT); + queryText = buildGpQueryString(pQueryParms, &queryTextLength); /* * Allocate result array with enough slots for QEs of primary gangs. @@ -1443,7 +1441,7 @@ CdbDispatchCopyStart(struct CdbCopy *cdbCopy, Node *stmt, int flags) */ ds = cdbdisp_makeDispatcherState(false); - queryText = buildGpQueryString(pQueryParms, &queryTextLength, flags); + queryText = buildGpQueryString(pQueryParms, &queryTextLength); /* * Allocate a primary QE for every available segDB in the system. diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index c93415b0bce..d18973f3585 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -15,7 +15,6 @@ #include "postgres.h" #include "access/xact.h" -#include "cdb/cdbvars.h" #include "commands/async.h" #include "miscadmin.h" #include "storage/ipc.h" @@ -96,11 +95,6 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m SharedInvalidMessageCounter++; invalFunction(&msg); - - if (CollectInvalMessages_hook) - { - CollectInvalMessages_hook(&msg); - } } do @@ -118,10 +112,6 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m elog(DEBUG4, "cache state reset"); SharedInvalidMessageCounter++; resetFunction(); - if (ProcessResetCache_hook) - { - ProcessResetCache_hook(); - } break; /* nothing more to do */ } @@ -135,11 +125,6 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m SharedInvalidMessageCounter++; invalFunction(&msg); - - if (CollectInvalMessages_hook) - { - CollectInvalMessages_hook(&msg); - } } /* diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 30b25fd7382..946bd8e3cb5 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -103,7 +103,6 @@ */ -#if 0 /* * Configurable parameters. * @@ -193,10 +192,6 @@ static SISeg *shmInvalBuffer; /* pointer to the shared inval buffer */ static LocalTransactionId nextLocalTransactionId; -#endif - -SISeg *shmInvalBuffer = NULL; -LocalTransactionId nextLocalTransactionId = InvalidLocalTransactionId; static void CleanupInvalidationState(int status, Datum arg); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index efde785c017..f04897d2543 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5495,6 +5495,12 @@ PostgresMain(int argc, char *argv[], */ InvalidateCatalogSnapshotConditionally(); + /* + * Also consider releasing our catalog snapshot if any, so that it's + * not preventing advance of global xmin while we wait for the client. + */ + InvalidateCatalogSnapshotConditionally(); + /* * (1) If we've reached idle state, tell the frontend we're ready for * a new query. diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index fa4f757948e..320741f85d7 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -63,6 +63,12 @@ #define CACHE_elog(...) #endif +/* Hook for plugins to get control in SearchCatCache */ +SearchCatCache_hook_type SearchCatCache_hook = NULL; + +/* Hook for plugins to get control in ReleaseCatCache */ +ReleaseCatCache_hook_type ReleaseCatCache_hook = NULL; + /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; @@ -1293,6 +1299,9 @@ SearchCatCacheInternal(CatCache *cache, Assert(cache->cc_nkeys == nkeys); + if (SearchCatCache_hook) + return (*SearchCatCache_hook)(cache, nkeys, v1, v2, v3, v4); + /* * one-time startup overhead for each cache */ @@ -1529,6 +1538,9 @@ SearchCatCacheMiss(CatCache *cache, void ReleaseCatCache(HeapTuple tuple) { + if (ReleaseCatCache_hook) + return (*ReleaseCatCache_hook)(tuple); + CatCTup *ct = (CatCTup *) (((char *) tuple) - offsetof(CatCTup, tuple)); diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 1e3d497dcf3..5a986349e2a 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -136,7 +136,6 @@ #include "cdb/cdbvars.h" -#if 0 /* * To minimize palloc traffic, we keep pending requests in successively- * larger chunks (a slightly more sophisticated version of an expansible @@ -156,13 +155,6 @@ typedef struct InvalidationListHeader InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ InvalidationChunk *rclist; /* list of chunks holding relcache msgs */ } InvalidationListHeader; -#endif -CollectInvalMessages_hook_type CollectInvalMessages_hook = NULL; -ProcessResetCache_hook_type ProcessResetCache_hook = NULL; -cache_invalidation_async_hook_type cache_invalidation_async_hook = NULL; -cache_async_cleanup_hook_type cache_async_cleanup_hook = NULL; - -CacheAsyncMessages *cache_async_messages = NULL; /*---------------- * Invalidation info is divided into two lists: @@ -705,11 +697,6 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) #endif elog(FATAL, "unrecognized SI message ID: %d", msg->id); } - - if (CollectInvalMessages_hook) - { - CollectInvalMessages_hook(msg); - } } /* @@ -1049,12 +1036,7 @@ AtEOXact_Inval(bool isCommit) { /* Quick exit if no messages */ if (transInvalInfo == NULL) - { - if (cache_async_cleanup_hook) - cache_async_cleanup_hook(cache_async_messages); return; - } - /* Must be at top of stack */ Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); @@ -1077,9 +1059,6 @@ AtEOXact_Inval(bool isCommit) if (transInvalInfo->RelcacheInitFileInval) RelationCacheInitFilePostInvalidate(); - - if (cache_async_cleanup_hook) - cache_async_cleanup_hook(cache_async_messages); } else { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 6e3f2fd9ad4..e3c96767ac9 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -259,6 +259,11 @@ static bool eoxact_list_overflowed = false; eoxact_list_overflowed = true; \ } while (0) +/* + * Hook for plugins to validate the relation in RelationIdGetRelation. + */ +RelationValidation_hook_type RelationValidation_hook = NULL; + /* * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact * cleanup work. The array expands as needed; there is no hashtable because @@ -2243,6 +2248,9 @@ RelationIdGetRelation(Oid relationId) return NULL; } + if (RelationValidation_hook) + (*RelationValidation_hook)(relationId, rd); + RelationIncrementReferenceCount(rd); /* revalidate cache entry if necessary */ if (!rd->rd_isvalid) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 1b18c0a52ed..5358e618de2 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -75,7 +75,6 @@ #include "utils/faultinjector.h" #include "utils/fmgroids.h" #include "utils/guc.h" -#include "utils/inval.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/portal.h" diff --git a/src/include/cdb/cdbdisp_extra.h b/src/include/cdb/cdbdisp_extra.h index 52cc239cc88..b6bac03b3cd 100644 --- a/src/include/cdb/cdbdisp_extra.h +++ b/src/include/cdb/cdbdisp_extra.h @@ -5,11 +5,11 @@ #define EXTRADISPNAME_MAX_LEN 64 -typedef char *(*PackFunc) (int *len, bool need_snapshot); +typedef char *(*PackFunc) (int *len); typedef void (*UnpackFunc) (const char *msg, int len); extern void RegisterExtraDispatch(const char *extraDispName, PackFunc packFunc, UnpackFunc unpackFunc); -extern char *PackExtraMsgs(int *len, bool need_snapshot); +extern char *PackExtraMsgs(int *len); extern void UnPackExtraMsgs(StringInfo strInfo); #endif /* CDBDISP_EXTRA_H */ diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index b0fa7a1df2f..14148bf8201 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -25,97 +25,6 @@ #include "storage/lock.h" #include "storage/sinval.h" - -/* - * Configurable parameters. - * - * MAXNUMMESSAGES: max number of shared-inval messages we can buffer. - * Must be a power of 2 for speed. - * - * MSGNUMWRAPAROUND: how often to reduce MsgNum variables to avoid overflow. - * Must be a multiple of MAXNUMMESSAGES. Should be large. - * - * CLEANUP_MIN: the minimum number of messages that must be in the buffer - * before we bother to call SICleanupQueue. - * - * CLEANUP_QUANTUM: how often (in messages) to call SICleanupQueue once - * we exceed CLEANUP_MIN. Should be a power of 2 for speed. - * - * SIG_THRESHOLD: the minimum number of messages a backend must have fallen - * behind before we'll send it PROCSIG_CATCHUP_INTERRUPT. - * - * WRITE_QUANTUM: the max number of messages to push into the buffer per - * iteration of SIInsertDataEntries. Noncritical but should be less than - * CLEANUP_QUANTUM, because we only consider calling SICleanupQueue once - * per iteration. - */ - -#define MAXNUMMESSAGES 4096 -#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 262144) -#define CLEANUP_MIN (MAXNUMMESSAGES / 2) -#define CLEANUP_QUANTUM (MAXNUMMESSAGES / 16) -#define SIG_THRESHOLD (MAXNUMMESSAGES / 2) -#define WRITE_QUANTUM 64 - -/* Per-backend state in shared invalidation structure */ -typedef struct ProcState -{ - /* procPid is zero in an inactive ProcState array entry. */ - pid_t procPid; /* PID of backend, for signaling */ - PGPROC *proc; /* PGPROC of backend */ - /* nextMsgNum is meaningless if procPid == 0 or resetState is true. */ - int nextMsgNum; /* next message number to read */ - bool resetState; /* backend needs to reset its state */ - bool signaled; /* backend has been sent catchup signal */ - bool hasMessages; /* backend has unread messages */ - - /* - * Backend only sends invalidations, never receives them. This only makes - * sense for Startup process during recovery because it doesn't maintain a - * relcache, yet it fires inval messages to allow query backends to see - * schema changes. - */ - bool sendOnly; /* backend only sends, never receives */ - - /* - * Next LocalTransactionId to use for each idle backend slot. We keep - * this here because it is indexed by BackendId and it is convenient to - * copy the value to and from local memory when MyBackendId is set. It's - * meaningless in an active ProcState entry. - */ - LocalTransactionId nextLXID; -} ProcState; - -/* Shared cache invalidation memory segment */ -typedef struct SISeg -{ - /* - * General state information - */ - int minMsgNum; /* oldest message still needed */ - int maxMsgNum; /* next message number to be assigned */ - int nextThreshold; /* # of messages to call SICleanupQueue */ - int lastBackend; /* index of last active procState entry, +1 */ - int maxBackends; /* size of procState array */ - - slock_t msgnumLock; /* spinlock protecting maxMsgNum */ - - /* - * Circular buffer holding shared-inval messages - */ - SharedInvalidationMessage buffer[MAXNUMMESSAGES]; - - /* - * Per-backend invalidation state info (has MaxBackends entries). - */ - ProcState procState[FLEXIBLE_ARRAY_MEMBER]; -} SISeg; - -extern SISeg *shmInvalBuffer; /* pointer to the shared inval buffer */ - - -extern LocalTransactionId nextLocalTransactionId; - /* * prototypes for functions in sinvaladt.c */ diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 8848307553e..e62c2d4018b 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -185,6 +185,18 @@ typedef struct catcacheheader int ch_ntup; /* # of tuples in all caches */ } CatCacheHeader; +/* Hook for plugins to get control in SearchCatCache */ +typedef HeapTuple (*SearchCatCache_hook_type)(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3, + Datum v4); +extern PGDLLIMPORT SearchCatCache_hook_type SearchCatCache_hook; + +/* Hook for plugins to get control in ReleaseCatCache */ +typedef void (*ReleaseCatCache_hook_type)(HeapTuple tuple); +extern PGDLLIMPORT ReleaseCatCache_hook_type ReleaseCatCache_hook; /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 2ab944ede35..877e66c63c8 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -16,37 +16,9 @@ #include "access/htup.h" #include "storage/relfilenode.h" -#include "storage/sinval.h" #include "utils/relcache.h" -/* - * To minimize palloc traffic, we keep pending requests in successively- - * larger chunks (a slightly more sophisticated version of an expansible - * array). All request types can be stored as SharedInvalidationMessage - * records. The ordering of requests within a list is never significant. - */ -typedef struct InvalidationChunk -{ - struct InvalidationChunk *next; /* list link */ - int nitems; /* # items currently stored in chunk */ - int maxitems; /* size of allocated array in this chunk */ - SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]; -} InvalidationChunk; - -typedef struct InvalidationListHeader -{ - InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ - InvalidationChunk *rclist; /* list of chunks holding relcache msgs */ -} InvalidationListHeader; - -typedef struct CacheAsyncMessages -{ - List *local_inval_messages; - bool reset_cache_state; -} CacheAsyncMessages; - extern PGDLLIMPORT int debug_discard_caches; -extern CacheAsyncMessages *cache_async_messages; typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue); typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid); @@ -93,13 +65,4 @@ extern void InvalidateSystemCaches(void); extern void InvalidateSystemCachesExtended(bool debug_discard); extern void LogLogicalInvalidations(void); - -typedef void (*CollectInvalMessages_hook_type) (SharedInvalidationMessage *msg); -typedef void (*ProcessResetCache_hook_type) (void); -typedef void (*cache_invalidation_async_hook_type) (CacheAsyncMessages *cache_async_messages); -typedef void (*cache_async_cleanup_hook_type) (CacheAsyncMessages *cache_async_messages); -extern PGDLLIMPORT CollectInvalMessages_hook_type CollectInvalMessages_hook; -extern PGDLLIMPORT ProcessResetCache_hook_type ProcessResetCache_hook; -extern PGDLLIMPORT cache_invalidation_async_hook_type cache_invalidation_async_hook; -extern PGDLLIMPORT cache_async_cleanup_hook_type cache_async_cleanup_hook; #endif /* INVAL_H */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 466ea462b9b..9989c4ef2f1 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,12 @@ typedef struct RelationData *Relation; */ typedef Relation *RelationPtr; +/* + * Hook for plugins to validate the relation in RelationIdGetRelation. + */ +typedef void (*RelationValidation_hook_type)(Oid relationId, Relation relation); +extern PGDLLIMPORT RelationValidation_hook_type RelationValidation_hook; + /* * Routines to open (lookup) and close a relcache entry */ From 96fc191f382287c28e38435c13fcdc7ae21abc06 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 13 Jun 2024 18:02:11 +0800 Subject: [PATCH 120/152] Fix: fix serveral problem in catalog dispatch 1. Move system cache initial before UnPackExtraMsgs, due to UnPackExtraMsgs will read catalog. 2. Initial SysTypeHash and PlHash in sysTransferContext. 3. CreateTupleDescCopy entry->tupDesc, Due to entry->tupDesc may be freed in CollectTypeImpl. 4. Use systable_beginscan in GetManifestPath to read ManifestRelationId. So we can collect the manifest tuple. --- src/backend/tcop/postgres.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index f04897d2543..edb1c4d066c 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5827,6 +5827,15 @@ PostgresMain(int argc, char *argv[], if (serializedCatalogLen > 0) serializedCatalog = pq_getmsgbytes(&input_message, serializedCatalogLen); + if (!IS_QUERY_DISPATCHER()) + { + SystemTupleStoreReset(); +#ifdef SERVERLESS + InvalidateSystemCaches(); +#endif /* SERVERLESS */ + SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); + } + /* * Always use the same GpIdentity.numsegments with QD on QEs */ @@ -5871,15 +5880,6 @@ PostgresMain(int argc, char *argv[], if (cuid > 0) SetUserIdAndContext(cuid, false); /* Set current userid */ - if (!IS_QUERY_DISPATCHER()) - { - SystemTupleStoreReset(); -#ifdef SERVERLESS - InvalidateSystemCaches(); -#endif /* SERVERLESS */ - SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); - } - if (serializedPlantreelen==0) { if (strncmp(query_string, "BEGIN", 5) == 0) From 0841ca6a34e670e2d9758d6c4fe4e604fb61be8f Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Tue, 18 Jun 2024 19:07:24 +0800 Subject: [PATCH 121/152] Fix flake test for copy pg_internal.init The pg_internal file should be remove when receiving a relcache invalid message. So sometimes the command of copy pg_internal file would failed. Now make a copy of pg_internal file in root dir, so that the copy command should always run success. --- src/backend/utils/cache/relcache.c | 39 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index e3c96767ac9..889cd88301d 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -6241,19 +6241,24 @@ load_relcache_init_file(bool shared) magic; int i; - if (shared) + + if (GpIdentity.segindex < 0) { - snprintf(initfilename, sizeof(initfilename), "global/%s", - RELCACHE_INIT_FILENAME); + if (shared) + snprintf(initfilename, sizeof(initfilename), "global/%s", + RELCACHE_INIT_FILENAME); + else + snprintf(initfilename, sizeof(initfilename), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); } else { -// if (GpIdentity.segindex >= 0) + if (shared) + snprintf(initfilename, sizeof(initfilename), "%s.global", + RELCACHE_INIT_FILENAME); + else snprintf(initfilename, sizeof(initfilename), "%s", RELCACHE_INIT_FILENAME); -// else -// snprintf(initfilename, sizeof(initfilename), "%s/%s", -// DatabasePath, RELCACHE_INIT_FILENAME); } fp = AllocateFile(initfilename, PG_BINARY_R); @@ -6675,6 +6680,7 @@ write_relcache_init_file(bool shared) FILE *fp; char tempfilename[MAXPGPATH]; char finalfilename[MAXPGPATH]; + char copyfilename[MAXPGPATH]; int magic; HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; @@ -6766,12 +6772,16 @@ write_relcache_init_file(bool shared) RELCACHE_INIT_FILENAME, MyProcPid); snprintf(finalfilename, sizeof(finalfilename), "global/%s", RELCACHE_INIT_FILENAME); + snprintf(copyfilename, sizeof(copyfilename), "%s.global", + RELCACHE_INIT_FILENAME); } else { snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d", DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid); - snprintf(finalfilename, sizeof(finalfilename), "%s", + snprintf(finalfilename, sizeof(finalfilename), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); + snprintf(copyfilename, sizeof(copyfilename), "%s", RELCACHE_INIT_FILENAME); } @@ -7007,6 +7017,19 @@ write_relcache_init_file(bool shared) unlink(tempfilename); } + /* + * Copy the file to root dir + */ + if (access(copyfilename, F_OK) != 0) + { + char cp_cmd[MAXPGPATH]; + sprintf(cp_cmd, "cp %s %s", finalfilename, copyfilename); + if (system(cp_cmd) != 0) + { + elog(ERROR, "copy process fail, cp_cmd %s", cp_cmd); + } + } + LWLockRelease(RelCacheInitLock); } From 10d36b8652c9e14aa5c0fb94ab72514345a302f5 Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Mon, 1 Jul 2024 16:21:40 +0800 Subject: [PATCH 122/152] Enable catalog collection on all the workers on master Read catalog from unionstore in all the workers on master, and read catalog from dispatch data in all the workers on segment. Enable the catalog collector for auto vacuum. --- src/backend/postmaster/autovacuum.c | 3 +++ src/backend/task/pg_cron.c | 4 ++++ src/backend/utils/init/postinit.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index c0e7baa7acc..db4240a6322 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -156,6 +156,7 @@ #include "utils/timeout.h" #include "utils/timestamp.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "utils/faultinjector.h" @@ -3415,6 +3416,8 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy) "", tab->at_relname); #endif + TransferReset(); + SetTransferOn(); vacuum(rel_list, &tab->at_params, bstrategy, true); } diff --git a/src/backend/task/pg_cron.c b/src/backend/task/pg_cron.c index a5757d09082..eb895a635b1 100644 --- a/src/backend/task/pg_cron.c +++ b/src/backend/task/pg_cron.c @@ -61,6 +61,7 @@ #else #include "commands/variable.h" #endif +#include "cdb/cdbtranscat.h" #include "lib/stringinfo.h" #include "libpq-fe.h" #include "libpq/pqformat.h" @@ -1877,6 +1878,9 @@ ExecuteSqlString(const char *sql) isTopLevel = commands_remaining == 1; MemoryContextSwitchTo(oldcontext); + TransferReset(); + SetTransferOn(); + /* * Do parse analysis, rule rewrite, planning, and execution for each raw * parsetree. We must fully execute each query before beginning parse diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 5358e618de2..47db1d75c3d 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -676,7 +676,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, elog(DEBUG3, "InitPostgres"); - if (StartUpCatalogData && GpIdentity.segindex >= 0) + if (StartUpCatalogData && !IS_QUERY_DISPATCHER()) { SystemTupleStoreReset(); SystemTupleStoreInit(StartUpCatalogData, StartUpCatalogLen); From 4b091a60e7b936315c2463c5206fd42afd47c40f Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Thu, 27 Jun 2024 14:23:39 +0800 Subject: [PATCH 123/152] Add a new table am routine ScanCatalogPrepare This routine is for colllect the catalog for the am, Is can alse use for other purpose. Is is called before scan_begin. --- src/backend/executor/nodeSeqscan.c | 4 ++++ src/include/access/tableam.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 889b809052c..15155ddbe87 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -228,6 +228,10 @@ ExecInitSeqScanForPartition(SeqScan *node, EState *estate, scanstate->filter_in_seqscan = true; } + if (scanstate->ss.ss_currentRelation->rd_tableam->scan_prepare_catalog) + scanstate->ss.ss_currentRelation->rd_tableam->scan_prepare_catalog( + scanstate->ss.ss_currentRelation, &scanstate->ss.ps); + return scanstate; } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 4f8bbb09653..63d63175cb0 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -373,6 +373,9 @@ typedef struct TableAmRoutine ParallelTableScanDesc pscan, uint32 flags, void * ctx); + void (*scan_prepare_catalog) (Relation rel, + struct PlanState *ps); + /* * GPDB: Extract columns for scan from targetlist and quals. This is mainly * for AOCS tables. From f6a03fee309e1d6b46ce5a4e166c99ed9e818a71 Mon Sep 17 00:00:00 2001 From: roseduan Date: Fri, 21 Jun 2024 16:34:49 +0800 Subject: [PATCH 124/152] Enable autovacuum process --- src/backend/postmaster/autovacuum.c | 827 +++++++++++++++------------- src/include/postmaster/autovacuum.h | 48 ++ 2 files changed, 480 insertions(+), 395 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index db4240a6322..2a818234c51 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -214,6 +214,15 @@ static MemoryContext AutovacMemCxt; */ AutoVacLauncherMain_hook_type AutoVacLauncherMain_hook = NULL; +/* + * Hook for plugins to get control in vacuum relation list + * If you add this hook, you`d better add TableRecheckAutoVac_hook too. + */ +AutoVacRelationList_hook_type AutoVacRelationList_hook = NULL; + +/* Hook for plugins to get control in recheck auto-vacuum/analyze table */ +TableRecheckAutoVac_hook_type TableRecheckAutoVac_hook = NULL; + /* struct to keep track of databases in launcher */ typedef struct avl_dbase { @@ -234,30 +243,6 @@ typedef struct avw_dbase PgStat_StatDBEntry *adw_entry; } avw_dbase; -/* struct to keep track of tables to vacuum and/or analyze, in 1st pass */ -typedef struct av_relation -{ - Oid ar_toastrelid; /* hash key - must be first */ - Oid ar_relid; - bool ar_hasrelopts; - AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's - * reloptions, or NULL if none */ -} av_relation; - -/* struct to keep track of tables to vacuum and/or analyze, after rechecking */ -typedef struct autovac_table -{ - Oid at_relid; - VacuumParams at_params; - double at_vacuum_cost_delay; - int at_vacuum_cost_limit; - bool at_dobalance; - bool at_sharedrel; - char *at_relname; - char *at_nspname; - char *at_datname; -} autovac_table; - /*------------- * This struct holds information about a single worker's whereabouts. We keep * an array of these in shared memory, sized according to @@ -410,6 +395,13 @@ static void autovac_report_workitem(AutoVacuumWorkItem *workitem, const char *nspname, const char *relname); static void avl_sigusr2_handler(SIGNAL_ARGS); static void autovac_refresh_stats(void); +static List* autovacuum_relation_list(Relation classRel, + Form_pg_database dbForm, + HTAB *table_toast_map, + PgStat_StatDBEntry *shared, + PgStat_StatDBEntry *dbentry, + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age); @@ -2054,17 +2046,14 @@ do_autovacuum(void) { Relation classRel; HeapTuple tuple; - TableScanDesc relScan; Form_pg_database dbForm; List *table_oids = NIL; - List *orphan_oids = NIL; HASHCTL ctl; HTAB *table_toast_map; ListCell *volatile cell; PgStat_StatDBEntry *shared; PgStat_StatDBEntry *dbentry; BufferAccessStrategy bstrategy; - ScanKeyData key; TupleDesc pg_class_desc; int effective_multixact_freeze_max_age; bool did_vacuum = false; @@ -2151,410 +2140,178 @@ do_autovacuum(void) &ctl, HASH_ELEM | HASH_BLOBS); + if (AutoVacRelationList_hook) + { + table_oids = (*AutoVacRelationList_hook)(classRel, dbForm, table_toast_map, + shared, dbentry, + pg_class_desc, + effective_multixact_freeze_max_age, + AutovacMemCxt); + } + else + { + table_oids = autovacuum_relation_list(classRel, dbForm, table_toast_map, + shared, dbentry, + pg_class_desc, + effective_multixact_freeze_max_age); + } + /* - * Scan pg_class to determine which tables to vacuum. - * - * We do this in two passes: on the first one we collect the list of plain - * relations and materialized views, and on the second one we collect - * TOAST tables. The reason for doing the second pass is that during it we - * want to use the main relation's pg_class.reloptions entry if the TOAST - * table does not have any, and we cannot obtain it unless we know - * beforehand what's the main table OID. - * - * We need to check TOAST tables separately because in cases with short, - * wide tables there might be proportionally much more activity in the - * TOAST table than in its parent. + * Create a buffer access strategy object for VACUUM to use. We want to + * use the same one across all the vacuum operations we perform, since the + * point is for VACUUM not to blow out the shared cache. */ - relScan = table_beginscan_catalog(classRel, 0, NULL); + bstrategy = GetAccessStrategy(BAS_VACUUM); /* - * On the first pass, we collect main tables to vacuum, and also the main - * table relid to TOAST relid mapping. + * create a memory context to act as fake PortalContext, so that the + * contexts created in the vacuum code are cleaned up for each table. */ - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) - { - Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); - PgStat_StatTabEntry *tabentry; - AutoVacOpts *relopts; - Oid relid; - bool dovacuum; - bool doanalyze; - bool wraparound; + PortalContext = AllocSetContextCreate(AutovacMemCxt, + "Autovacuum Portal", + ALLOCSET_DEFAULT_SIZES); - if (classForm->relkind != RELKIND_RELATION && - classForm->relkind != RELKIND_DIRECTORY_TABLE && - classForm->relkind != RELKIND_MATVIEW && - classForm->relkind != RELKIND_AOSEGMENTS && - classForm->relkind != RELKIND_AOBLOCKDIR && - classForm->relkind != RELKIND_AOVISIMAP) - continue; + /* + * Perform operations on collected tables. + */ + foreach(cell, table_oids) + { + Oid relid = lfirst_oid(cell); + HeapTuple classTup; + autovac_table *tab; + bool isshared; + bool skipit; + double stdVacuumCostDelay; + int stdVacuumCostLimit; + dlist_iter iter; - relid = classForm->oid; + CHECK_FOR_INTERRUPTS(); /* - * Check if it is a temp table (presumably, of some other backend's). - * We cannot safely process other backends' temp tables. + * Check for config changes before processing each collected table. */ - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (ConfigReloadPending) { - /* - * GPDB: Skip process temp tables since the temp namespace for QD and QE - * is using gp_session_id as suffix instead of backendID. - * And performDeletion() only execute delete on current node. - */ - continue; + ConfigReloadPending = false; + ProcessConfigFile(PGC_SIGHUP); /* - * We just ignore it if the owning backend is still active and - * using the temporary schema. Also, for safety, ignore it if the - * namespace doesn't exist or isn't a temp namespace after all. + * You might be tempted to bail out if we see autovacuum is now + * disabled. Must resist that temptation -- this might be a + * for-wraparound emergency worker, in which case that would be + * entirely inappropriate. */ - if (checkTempNamespaceStatus(classForm->relnamespace) == TEMP_NAMESPACE_IDLE) - { - /* - * The table seems to be orphaned -- although it might be that - * the owning backend has already deleted it and exited; our - * pg_class scan snapshot is not necessarily up-to-date - * anymore, so we could be looking at a committed-dead entry. - * Remember it so we can try to delete it later. - */ - orphan_oids = lappend_oid(orphan_oids, relid); - } - continue; } - /* Fetch reloptions and the pgstat entry for this table */ - relopts = extract_autovac_opts(tuple, pg_class_desc); - tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, - shared, dbentry); - - /* Check if it needs vacuum or analyze */ - relation_needs_vacanalyze(relid, relopts, classForm, tabentry, - effective_multixact_freeze_max_age, - &dovacuum, &doanalyze, &wraparound); + /* + * Find out whether the table is shared or not. (It's slightly + * annoying to fetch the syscache entry just for this, but in typical + * cases it adds little cost because table_recheck_autovac would + * refetch the entry anyway. We could buy that back by copying the + * tuple here and passing it to table_recheck_autovac, but that + * increases the odds of that function working with stale data.) + */ + classTup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(classTup)) + continue; /* somebody deleted the rel, forget it */ + isshared = ((Form_pg_class) GETSTRUCT(classTup))->relisshared; + ReleaseSysCache(classTup); - /* Relations that need work are added to table_oids */ - if (dovacuum || doanalyze) - table_oids = lappend_oid(table_oids, relid); + /* + * Hold schedule lock from here until we've claimed the table. We + * also need the AutovacuumLock to walk the worker array, but that one + * can just be a shared lock. + */ + LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE); + LWLockAcquire(AutovacuumLock, LW_SHARED); /* - * Remember TOAST associations for the second pass. Note: we must do - * this whether or not the table is going to be vacuumed, because we - * don't automatically vacuum toast tables along the parent table. + * Check whether the table is being vacuumed concurrently by another + * worker. */ - if (OidIsValid(classForm->reltoastrelid)) + skipit = false; + dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers) { - av_relation *hentry; - bool found; + WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur); - hentry = hash_search(table_toast_map, - &classForm->reltoastrelid, - HASH_ENTER, &found); + /* ignore myself */ + if (worker == MyWorkerInfo) + continue; - if (!found) + /* ignore workers in other databases (unless table is shared) */ + if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId) + continue; + + if (worker->wi_tableoid == relid) { - /* hash_search already filled in the key */ - hentry->ar_relid = relid; - hentry->ar_hasrelopts = false; - if (relopts != NULL) - { - hentry->ar_hasrelopts = true; - memcpy(&hentry->ar_reloptions, relopts, - sizeof(AutoVacOpts)); - } + skipit = true; + found_concurrent_worker = true; + break; } } - } - - table_endscan(relScan); - - /* second pass: check TOAST tables */ - ScanKeyInit(&key, - Anum_pg_class_relkind, - BTEqualStrategyNumber, F_CHAREQ, - CharGetDatum(RELKIND_TOASTVALUE)); - - relScan = table_beginscan_catalog(classRel, 1, &key); - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) - { - Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); - PgStat_StatTabEntry *tabentry; - Oid relid; - AutoVacOpts *relopts = NULL; - bool dovacuum; - bool doanalyze; - bool wraparound; + LWLockRelease(AutovacuumLock); + if (skipit) + { + LWLockRelease(AutovacuumScheduleLock); + continue; + } /* - * We cannot safely process other backends' temp tables, so skip 'em. + * Store the table's OID in shared memory before releasing the + * schedule lock, so that other workers don't try to vacuum it + * concurrently. (We claim it here so as not to hold + * AutovacuumScheduleLock while rechecking the stats.) */ - if (classForm->relpersistence == RELPERSISTENCE_TEMP) - continue; - - relid = classForm->oid; + MyWorkerInfo->wi_tableoid = relid; + MyWorkerInfo->wi_sharedrel = isshared; + LWLockRelease(AutovacuumScheduleLock); /* - * fetch reloptions -- if this toast table does not have them, try the - * main rel + * Check whether pgstat data still says we need to vacuum this table. + * It could have changed if something else processed the table while + * we weren't looking. + * + * Note: we have a special case in pgstat code to ensure that the + * stats we read are as up-to-date as possible, to avoid the problem + * that somebody just finished vacuuming this table. The window to + * the race condition is not closed but it is very small. */ - relopts = extract_autovac_opts(tuple, pg_class_desc); - if (relopts == NULL) + MemoryContextSwitchTo(AutovacMemCxt); + if (TableRecheckAutoVac_hook != NULL) { - av_relation *hentry; - bool found; - - hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found); - if (found && hentry->ar_hasrelopts) - relopts = &hentry->ar_reloptions; + tab = (*TableRecheckAutoVac_hook) (relid, table_toast_map, + pg_class_desc, + effective_multixact_freeze_max_age, + default_freeze_min_age, + default_freeze_table_age, + default_multixact_freeze_min_age, + default_multixact_freeze_table_age); + } + else + { + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + effective_multixact_freeze_max_age); + } + if (tab == NULL) + { + /* someone else vacuumed the table, or it went away */ + LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE); + MyWorkerInfo->wi_tableoid = InvalidOid; + MyWorkerInfo->wi_sharedrel = false; + LWLockRelease(AutovacuumScheduleLock); + continue; } - /* Fetch the pgstat entry for this table */ - tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, - shared, dbentry); + /* + * Remember the prevailing values of the vacuum cost GUCs. We have to + * restore these at the bottom of the loop, else we'll compute wrong + * values in the next iteration of autovac_balance_cost(). + */ + stdVacuumCostDelay = VacuumCostDelay; + stdVacuumCostLimit = VacuumCostLimit; - relation_needs_vacanalyze(relid, relopts, classForm, tabentry, - effective_multixact_freeze_max_age, - &dovacuum, &doanalyze, &wraparound); - - /* ignore analyze for toast tables */ - if (dovacuum) - table_oids = lappend_oid(table_oids, relid); - } - - table_endscan(relScan); - table_close(classRel, AccessShareLock); - - /* - * Recheck orphan temporary tables, and if they still seem orphaned, drop - * them. We'll eat a transaction per dropped table, which might seem - * excessive, but we should only need to do anything as a result of a - * previous backend crash, so this should not happen often enough to - * justify "optimizing". Using separate transactions ensures that we - * don't bloat the lock table if there are many temp tables to be dropped, - * and it ensures that we don't lose work if a deletion attempt fails. - */ - foreach(cell, orphan_oids) - { - Oid relid = lfirst_oid(cell); - Form_pg_class classForm; - ObjectAddress object; - - /* - * Check for user-requested abort. - */ - CHECK_FOR_INTERRUPTS(); - - /* - * Try to lock the table. If we can't get the lock immediately, - * somebody else is using (or dropping) the table, so it's not our - * concern anymore. Having the lock prevents race conditions below. - */ - if (!ConditionalLockRelationOid(relid, AccessExclusiveLock)) - continue; - - /* - * Re-fetch the pg_class tuple and re-check whether it still seems to - * be an orphaned temp table. If it's not there or no longer the same - * relation, ignore it. - */ - tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); - if (!HeapTupleIsValid(tuple)) - { - /* be sure to drop useless lock so we don't bloat lock table */ - UnlockRelationOid(relid, AccessExclusiveLock); - continue; - } - classForm = (Form_pg_class) GETSTRUCT(tuple); - - /* - * Make all the same tests made in the loop above. In event of OID - * counter wraparound, the pg_class entry we have now might be - * completely unrelated to the one we saw before. - */ - if (!((classForm->relkind == RELKIND_RELATION || - classForm->relkind == RELKIND_MATVIEW || - classForm->relkind == RELKIND_DIRECTORY_TABLE) && - classForm->relpersistence == RELPERSISTENCE_TEMP)) - { - UnlockRelationOid(relid, AccessExclusiveLock); - continue; - } - - if (checkTempNamespaceStatus(classForm->relnamespace) != TEMP_NAMESPACE_IDLE) - { - UnlockRelationOid(relid, AccessExclusiveLock); - continue; - } - - /* OK, let's delete it */ - ereport(LOG, - (errmsg("autovacuum: dropping orphan temp table \"%s.%s.%s\"", - get_database_name(MyDatabaseId), - get_namespace_name(classForm->relnamespace), - NameStr(classForm->relname)))); - - object.classId = RelationRelationId; - object.objectId = relid; - object.objectSubId = 0; - performDeletion(&object, DROP_CASCADE, - PERFORM_DELETION_INTERNAL | - PERFORM_DELETION_QUIETLY | - PERFORM_DELETION_SKIP_EXTENSIONS); - - /* - * To commit the deletion, end current transaction and start a new - * one. Note this also releases the lock we took. - */ - CommitTransactionCommand(); - StartTransactionCommand(); - - /* StartTransactionCommand changed current memory context */ - MemoryContextSwitchTo(AutovacMemCxt); - } - - /* - * Create a buffer access strategy object for VACUUM to use. We want to - * use the same one across all the vacuum operations we perform, since the - * point is for VACUUM not to blow out the shared cache. - */ - bstrategy = GetAccessStrategy(BAS_VACUUM); - - /* - * create a memory context to act as fake PortalContext, so that the - * contexts created in the vacuum code are cleaned up for each table. - */ - PortalContext = AllocSetContextCreate(AutovacMemCxt, - "Autovacuum Portal", - ALLOCSET_DEFAULT_SIZES); - - /* - * Perform operations on collected tables. - */ - foreach(cell, table_oids) - { - Oid relid = lfirst_oid(cell); - HeapTuple classTup; - autovac_table *tab; - bool isshared; - bool skipit; - double stdVacuumCostDelay; - int stdVacuumCostLimit; - dlist_iter iter; - - CHECK_FOR_INTERRUPTS(); - - /* - * Check for config changes before processing each collected table. - */ - if (ConfigReloadPending) - { - ConfigReloadPending = false; - ProcessConfigFile(PGC_SIGHUP); - - /* - * You might be tempted to bail out if we see autovacuum is now - * disabled. Must resist that temptation -- this might be a - * for-wraparound emergency worker, in which case that would be - * entirely inappropriate. - */ - } - - /* - * Find out whether the table is shared or not. (It's slightly - * annoying to fetch the syscache entry just for this, but in typical - * cases it adds little cost because table_recheck_autovac would - * refetch the entry anyway. We could buy that back by copying the - * tuple here and passing it to table_recheck_autovac, but that - * increases the odds of that function working with stale data.) - */ - classTup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); - if (!HeapTupleIsValid(classTup)) - continue; /* somebody deleted the rel, forget it */ - isshared = ((Form_pg_class) GETSTRUCT(classTup))->relisshared; - ReleaseSysCache(classTup); - - /* - * Hold schedule lock from here until we've claimed the table. We - * also need the AutovacuumLock to walk the worker array, but that one - * can just be a shared lock. - */ - LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE); - LWLockAcquire(AutovacuumLock, LW_SHARED); - - /* - * Check whether the table is being vacuumed concurrently by another - * worker. - */ - skipit = false; - dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers) - { - WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur); - - /* ignore myself */ - if (worker == MyWorkerInfo) - continue; - - /* ignore workers in other databases (unless table is shared) */ - if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId) - continue; - - if (worker->wi_tableoid == relid) - { - skipit = true; - found_concurrent_worker = true; - break; - } - } - LWLockRelease(AutovacuumLock); - if (skipit) - { - LWLockRelease(AutovacuumScheduleLock); - continue; - } - - /* - * Store the table's OID in shared memory before releasing the - * schedule lock, so that other workers don't try to vacuum it - * concurrently. (We claim it here so as not to hold - * AutovacuumScheduleLock while rechecking the stats.) - */ - MyWorkerInfo->wi_tableoid = relid; - MyWorkerInfo->wi_sharedrel = isshared; - LWLockRelease(AutovacuumScheduleLock); - - /* - * Check whether pgstat data still says we need to vacuum this table. - * It could have changed if something else processed the table while - * we weren't looking. - * - * Note: we have a special case in pgstat code to ensure that the - * stats we read are as up-to-date as possible, to avoid the problem - * that somebody just finished vacuuming this table. The window to - * the race condition is not closed but it is very small. - */ - MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, - effective_multixact_freeze_max_age); - if (tab == NULL) - { - /* someone else vacuumed the table, or it went away */ - LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE); - MyWorkerInfo->wi_tableoid = InvalidOid; - MyWorkerInfo->wi_sharedrel = false; - LWLockRelease(AutovacuumScheduleLock); - continue; - } - - /* - * Remember the prevailing values of the vacuum cost GUCs. We have to - * restore these at the bottom of the loop, else we'll compute wrong - * values in the next iteration of autovac_balance_cost(). - */ - stdVacuumCostDelay = VacuumCostDelay; - stdVacuumCostLimit = VacuumCostLimit; - - /* Must hold AutovacuumLock while mucking with cost balance info */ - LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); + /* Must hold AutovacuumLock while mucking with cost balance info */ + LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); /* advertise my cost delay parameters for the balancing algorithm */ MyWorkerInfo->wi_dobalance = tab->at_dobalance; @@ -3678,3 +3435,283 @@ autovac_refresh_stats(void) pgstat_clear_snapshot(); } + +static List* +autovacuum_relation_list(Relation classRel, + Form_pg_database dbForm, + HTAB *table_toast_map, + PgStat_StatDBEntry *shared, + PgStat_StatDBEntry *dbentry, + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age) +{ + HeapTuple tuple; + TableScanDesc relScan; + List *table_oids = NIL; + List *orphan_oids = NIL; + ListCell *volatile cell; + ScanKeyData key; + + /* + * Scan pg_class to determine which tables to vacuum. + * + * We do this in two passes: on the first one we collect the list of plain + * relations and materialized views, and on the second one we collect + * TOAST tables. The reason for doing the second pass is that during it we + * want to use the main relation's pg_class.reloptions entry if the TOAST + * table does not have any, and we cannot obtain it unless we know + * beforehand what's the main table OID. + * + * We need to check TOAST tables separately because in cases with short, + * wide tables there might be proportionally much more activity in the + * TOAST table than in its parent. + */ + relScan = table_beginscan_catalog(classRel, 0, NULL); + + /* + * On the first pass, we collect main tables to vacuum, and also the main + * table relid to TOAST relid mapping. + */ + while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + { + Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); + PgStat_StatTabEntry *tabentry; + AutoVacOpts *relopts; + Oid relid; + bool dovacuum; + bool doanalyze; + bool wraparound; + + if (classForm->relkind != RELKIND_RELATION && + classForm->relkind != RELKIND_DIRECTORY_TABLE && + classForm->relkind != RELKIND_MATVIEW && + classForm->relkind != RELKIND_AOSEGMENTS && + classForm->relkind != RELKIND_AOBLOCKDIR && + classForm->relkind != RELKIND_AOVISIMAP) + continue; + + relid = classForm->oid; + + /* + * Check if it is a temp table (presumably, of some other backend's). + * We cannot safely process other backends' temp tables. + */ + if (classForm->relpersistence == RELPERSISTENCE_TEMP) + { + /* + * GPDB: Skip process temp tables since the temp namespace for QD and QE + * is using gp_session_id as suffix instead of backendID. + * And performDeletion() only execute delete on current node. + */ + continue; + + /* + * We just ignore it if the owning backend is still active and + * using the temporary schema. Also, for safety, ignore it if the + * namespace doesn't exist or isn't a temp namespace after all. + */ + if (checkTempNamespaceStatus(classForm->relnamespace) == TEMP_NAMESPACE_IDLE) + { + /* + * The table seems to be orphaned -- although it might be that + * the owning backend has already deleted it and exited; our + * pg_class scan snapshot is not necessarily up-to-date + * anymore, so we could be looking at a committed-dead entry. + * Remember it so we can try to delete it later. + */ + orphan_oids = lappend_oid(orphan_oids, relid); + } + continue; + } + + /* Fetch reloptions and the pgstat entry for this table */ + relopts = extract_autovac_opts(tuple, pg_class_desc); + tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, + shared, dbentry); + + /* Check if it needs vacuum or analyze */ + relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, + &dovacuum, &doanalyze, &wraparound); + + /* Relations that need work are added to table_oids */ + if (dovacuum || doanalyze) + table_oids = lappend_oid(table_oids, relid); + + /* + * Remember TOAST associations for the second pass. Note: we must do + * this whether or not the table is going to be vacuumed, because we + * don't automatically vacuum toast tables along the parent table. + */ + if (OidIsValid(classForm->reltoastrelid)) + { + av_relation *hentry; + bool found; + + hentry = hash_search(table_toast_map, + &classForm->reltoastrelid, + HASH_ENTER, &found); + + if (!found) + { + /* hash_search already filled in the key */ + hentry->ar_relid = relid; + hentry->ar_hasrelopts = false; + if (relopts != NULL) + { + hentry->ar_hasrelopts = true; + memcpy(&hentry->ar_reloptions, relopts, + sizeof(AutoVacOpts)); + } + } + } + } + + table_endscan(relScan); + + /* second pass: check TOAST tables */ + ScanKeyInit(&key, + Anum_pg_class_relkind, + BTEqualStrategyNumber, F_CHAREQ, + CharGetDatum(RELKIND_TOASTVALUE)); + + relScan = table_beginscan_catalog(classRel, 1, &key); + while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + { + Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); + PgStat_StatTabEntry *tabentry; + Oid relid; + AutoVacOpts *relopts = NULL; + bool dovacuum; + bool doanalyze; + bool wraparound; + + /* + * We cannot safely process other backends' temp tables, so skip 'em. + */ + if (classForm->relpersistence == RELPERSISTENCE_TEMP) + continue; + + relid = classForm->oid; + + /* + * fetch reloptions -- if this toast table does not have them, try the + * main rel + */ + relopts = extract_autovac_opts(tuple, pg_class_desc); + if (relopts == NULL) + { + av_relation *hentry; + bool found; + + hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found); + if (found && hentry->ar_hasrelopts) + relopts = &hentry->ar_reloptions; + } + + /* Fetch the pgstat entry for this table */ + tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, + shared, dbentry); + + relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, + &dovacuum, &doanalyze, &wraparound); + + /* ignore analyze for toast tables */ + if (dovacuum) + table_oids = lappend_oid(table_oids, relid); + } + + table_endscan(relScan); + table_close(classRel, AccessShareLock); + + /* + * Recheck orphan temporary tables, and if they still seem orphaned, drop + * them. We'll eat a transaction per dropped table, which might seem + * excessive, but we should only need to do anything as a result of a + * previous backend crash, so this should not happen often enough to + * justify "optimizing". Using separate transactions ensures that we + * don't bloat the lock table if there are many temp tables to be dropped, + * and it ensures that we don't lose work if a deletion attempt fails. + */ + foreach(cell, orphan_oids) + { + Oid relid = lfirst_oid(cell); + Form_pg_class classForm; + ObjectAddress object; + + /* + * Check for user-requested abort. + */ + CHECK_FOR_INTERRUPTS(); + + /* + * Try to lock the table. If we can't get the lock immediately, + * somebody else is using (or dropping) the table, so it's not our + * concern anymore. Having the lock prevents race conditions below. + */ + if (!ConditionalLockRelationOid(relid, AccessExclusiveLock)) + continue; + + /* + * Re-fetch the pg_class tuple and re-check whether it still seems to + * be an orphaned temp table. If it's not there or no longer the same + * relation, ignore it. + */ + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + { + /* be sure to drop useless lock so we don't bloat lock table */ + UnlockRelationOid(relid, AccessExclusiveLock); + continue; + } + classForm = (Form_pg_class) GETSTRUCT(tuple); + + /* + * Make all the same tests made in the loop above. In event of OID + * counter wraparound, the pg_class entry we have now might be + * completely unrelated to the one we saw before. + */ + if (!((classForm->relkind == RELKIND_RELATION || + classForm->relkind == RELKIND_MATVIEW || + classForm->relkind == RELKIND_DIRECTORY_TABLE) && + classForm->relpersistence == RELPERSISTENCE_TEMP)) + { + UnlockRelationOid(relid, AccessExclusiveLock); + continue; + } + + if (checkTempNamespaceStatus(classForm->relnamespace) != TEMP_NAMESPACE_IDLE) + { + UnlockRelationOid(relid, AccessExclusiveLock); + continue; + } + + /* OK, let's delete it */ + ereport(LOG, + (errmsg("autovacuum: dropping orphan temp table \"%s.%s.%s\"", + get_database_name(MyDatabaseId), + get_namespace_name(classForm->relnamespace), + NameStr(classForm->relname)))); + + object.classId = RelationRelationId; + object.objectId = relid; + object.objectSubId = 0; + performDeletion(&object, DROP_CASCADE, + PERFORM_DELETION_INTERNAL | + PERFORM_DELETION_QUIETLY | + PERFORM_DELETION_SKIP_EXTENSIONS); + + /* + * To commit the deletion, end current transaction and start a new + * one. Note this also releases the lock we took. + */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* StartTransactionCommand changed current memory context */ + MemoryContextSwitchTo(AutovacMemCxt); + } + + return table_oids; +} diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index 5810ce55890..a812dee7f68 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -14,7 +14,11 @@ #ifndef AUTOVACUUM_H #define AUTOVACUUM_H +#include "pgstat.h" +#include "catalog/pg_database.h" +#include "commands/vacuum.h" #include "storage/block.h" +#include "utils/rel.h" /* * Other processes can request specific work from autovacuum, identified by @@ -25,6 +29,29 @@ typedef enum AVW_BRINSummarizeRange } AutoVacuumWorkItemType; +/* struct to keep track of tables to vacuum and/or analyze, in 1st pass */ +typedef struct av_relation +{ + Oid ar_toastrelid; /* hash key - must be first */ + Oid ar_relid; + bool ar_hasrelopts; + AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's + * reloptions, or NULL if none */ +} av_relation; + +/* struct to keep track of tables to vacuum and/or analyze, after rechecking */ +typedef struct autovac_table +{ + Oid at_relid; + VacuumParams at_params; + double at_vacuum_cost_delay; + int at_vacuum_cost_limit; + bool at_dobalance; + bool at_sharedrel; + char *at_relname; + char *at_nspname; + char *at_datname; +} autovac_table; /* GUC variables */ extern bool autovacuum_start_daemon; @@ -59,6 +86,27 @@ extern bool IsAutoVacuumWorkerProcess(void); typedef void (*AutoVacLauncherMain_hook_type)(int argc, char *argv[]); extern PGDLLIMPORT AutoVacLauncherMain_hook_type AutoVacLauncherMain_hook; +/* Hook for plugins to get control in vacuum relation list */ +typedef List* (*AutoVacRelationList_hook_type)(Relation classRel, + Form_pg_database dbForm, + HTAB *table_toast_map, + PgStat_StatDBEntry *shared, + PgStat_StatDBEntry *dbentry, + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age, + MemoryContext AutovacMemCxt); +extern PGDLLIMPORT AutoVacRelationList_hook_type AutoVacRelationList_hook; + +/* Hook for plugins to get control in recheck auto-vacuum/analyze table */ +typedef autovac_table* (*TableRecheckAutoVac_hook_type) (Oid relid, HTAB *table_toast_map, + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age, + int default_freeze_min_age, + int default_freeze_table_age, + int default_multixact_freeze_min_age, + int default_multixact_freeze_table_age); +extern PGDLLIMPORT TableRecheckAutoVac_hook_type TableRecheckAutoVac_hook; + /* Functions to start autovacuum process, called from postmaster */ extern void autovac_init(void); extern int StartAutoVacLauncher(void); From 4c988824aaa1053382a801fa431a8b666fe20e7e Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 25 Jun 2024 16:52:30 +0800 Subject: [PATCH 125/152] Remove useless function UpdateManifestRecord. We use insert and delete to update, it's not used. --- src/backend/catalog/main_manifest.c | 34 ----------------------------- src/include/catalog/main_manifest.h | 1 - 2 files changed, 35 deletions(-) diff --git a/src/backend/catalog/main_manifest.c b/src/backend/catalog/main_manifest.c index 24dfd2db109..beb5d4f1dab 100644 --- a/src/backend/catalog/main_manifest.c +++ b/src/backend/catalog/main_manifest.c @@ -80,40 +80,6 @@ InsertManifestRecord(Oid relid, RelFileNodeId relfilenode, text *path) recordDependencyOn(&dep, &ref, DEPENDENCY_INTERNAL); } -void -UpdateManifestRecord(RelFileNodeId relfilenode, text *path) -{ - Datum values[2]; - HeapTuple newtuple; - HeapTuple oldtuple; - ScanKeyData key; - SysScanDesc scan; - bool nulls[2]; - Relation rel = heap_open(ManifestRelationId, RowExclusiveLock); - - ScanKeyInit(&key, Anum_main_manifest_relnode, BTEqualStrategyNumber, - F_INT8EQ, UInt64GetDatum(relfilenode)); - - scan = systable_beginscan(rel, InvalidOid, false, NULL, 1, &key); - - oldtuple = systable_getnext(scan); - if (!HeapTupleIsValid(oldtuple)) - ereport(ERROR, (errcode(ERRCODE_IO_ERROR), - errmsg("write manifest catalog error"))); - - values[0] = UInt64GetDatum(relfilenode); - values[1] = PointerGetDatum(path); - nulls[0] = false; - nulls[1] = false; - - newtuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); - - CatalogTupleUpdate(rel, &oldtuple->t_self, newtuple); - - systable_endscan(scan); - heap_close(rel, NoLock); -} - void DeleteManifestCatalog(RelFileNodeId relnode) { diff --git a/src/include/catalog/main_manifest.h b/src/include/catalog/main_manifest.h index a7913fa56c8..afa50bc6593 100644 --- a/src/include/catalog/main_manifest.h +++ b/src/include/catalog/main_manifest.h @@ -32,7 +32,6 @@ typedef FormData_main_manifest *Form_main_manifest; extern void InsertManifestRecord(Oid relid, RelFileNodeId relnode, text* path); extern void RemoveManifestRecord(RelFileNodeId relnode); -extern void UpdateManifestRecord(RelFileNodeId relnode, text* path); extern void DeleteManifestCatalog(RelFileNodeId relnode); #endif /* MAIN_MANIFEST.h */ From c24cbbb9a1b7868836f268dfd1daad1eb0bd7d1f Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 8 Jul 2024 16:42:34 +0800 Subject: [PATCH 126/152] Support Subquery in Append Agg. For a append agg inside a subquery, we will use its target list to match materialized view. However, Postgres will remove unused columns of subquery that do not exit in upper query in a hacky way: make NUll for target entries. It will make us fail to match view and rewrite as we only support exactly match for now. Workaround for this with GUC if we are allowed to attmpt to answer query. --- src/backend/cdb/cdbgroupingpaths.c | 2 +- src/backend/optimizer/path/allpaths.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index 1fcd8ac8441..b69465993ac 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -3147,7 +3147,7 @@ make_pathtarget_from_tupledesc(TupleDesc tupdes) attr->atttypid, attr->atttypmod, attr->attcollation, - 0 /* FIXME: What if we are a subquery? */); + 0); target->exprs = lappend(target->exprs, (Expr*) newVar); } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 9db0db827d5..1f45379f7b9 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4521,6 +4521,17 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) Bitmapset *attrs_used = NULL; ListCell *lc; + /* + * CBDB: + * This function changes target list in a hacky way, set NULL for + * a target entry which cause diffs if we want to rewirte with a + * materialized view. + * We may use subquery's parse tree to match view, so do not + * change target list in that case. + */ + if (enable_answer_query_using_materialized_views) + return; + /* * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we * could update all the child SELECTs' tlists, but it seems not worth the From ed1fdc4b04eab159459030902747b6e1a17b96d7 Mon Sep 17 00:00:00 2001 From: leo Date: Fri, 28 Jun 2024 18:03:32 +0800 Subject: [PATCH 127/152] Fix: use MACRO SERVERLESS instead of GUC enable_serverless --- contrib/interconnect/udp/ic_udpifc.c | 8 ++++++-- src/backend/cdb/cdbutil.c | 7 ++++--- src/backend/commands/copyfrom.c | 8 ++++++-- src/backend/commands/matview.c | 4 +++- src/backend/commands/trigger.c | 25 +++++++++++++++++-------- src/backend/commands/vacuum.c | 7 +++++-- src/backend/fts/ftsprobe.c | 5 +++-- src/backend/storage/page/bufpage.c | 4 +++- src/backend/tcop/postgres.c | 6 +++++- src/backend/utils/init/globals.c | 5 ----- src/backend/utils/init/postinit.c | 4 +++- src/backend/utils/misc/gpexpand.c | 6 +++++- src/include/access/xlog.h | 6 +++++- src/include/miscadmin.h | 1 - 14 files changed, 65 insertions(+), 31 deletions(-) diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index 02a91552c4b..5c2a2ce1a2e 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -1604,8 +1604,10 @@ initConnHashTable(ConnHashTable *ht, MemoryContext cxt) * In serverless architecture, the cluster may have only one QD, skip Initialization. * Initialization will be done later. */ - if (enable_serverless && Gp_role == GP_ROLE_DISPATCH && ht->size == 0) +#ifdef SERVERLESS + if (Gp_role == GP_ROLE_DISPATCH && ht->size == 0) return true; +#endif Assert(ht->size > 0); @@ -1646,7 +1648,8 @@ connAddHash(ConnHashTable *ht, MotionConn *mConn) /* * Initialize connection hash table if needed. */ - if (enable_serverless && Gp_role == GP_ROLE_DISPATCH && ht->size == 0) +#ifdef SERVERLESS + if (Gp_role == GP_ROLE_DISPATCH && ht->size == 0) { old = MemoryContextSwitchTo(ht->cxt); initConnHashTable(ht, ht->cxt); @@ -1654,6 +1657,7 @@ connAddHash(ConnHashTable *ht, MotionConn *mConn) Assert(ht->size > 0); } +#endif conn = CONTAINER_OF(mConn, MotionConnUDP, mConn); diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 393cc98caf4..644cc33196d 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -287,9 +287,10 @@ readGpSegConfigFromCatalog(int *total_dbs) * In serverless mode, and if we are not in fts probe process, * we only need the segment that is up and has the same warehouseid. */ - if (enable_serverless && !am_ftsprobe) - need_current_segment = (warehouseid == GetCurrentWarehouseId() || DatumGetInt16(attr) == MASTER_CONTENT_ID) - && (status == GP_SEGMENT_CONFIGURATION_STATUS_UP); +#ifdef SERVERLESS + if (!am_ftsprobe) + need_current_segment = (warehouseid == GetCurrentWarehouseId() || DatumGetInt16(attr) == MASTER_CONTENT_ID) && (status == GP_SEGMENT_CONFIGURATION_STATUS_UP); +#endif if (need_current_segment) { diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index d04b8dc1088..3d272c72013 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1710,10 +1710,14 @@ CopyFrom(CopyFromState cstate) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot perform COPY FREEZE because of prior transaction activity"))); - +#ifdef SERVERLESS if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() && cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId() && - ((enable_serverless && Gp_role == GP_ROLE_DISPATCH) || !enable_serverless)) + Gp_role == GP_ROLE_DISPATCH) +#else + if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() && + cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId()) +#endif ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction"))); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index cc76371d67f..f2ed7bb3ee5 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -1042,7 +1042,8 @@ transientrel_shutdown(DestReceiver *self) table_close(matviewRel, NoLock); } - else if(enable_serverless && Gp_role == GP_ROLE_DISPATCH && !myState->concurrent) +#ifdef SERVERLESS + else if(Gp_role == GP_ROLE_DISPATCH && !myState->concurrent) { Relation matviewRel; @@ -1056,6 +1057,7 @@ transientrel_shutdown(DestReceiver *self) pgstat_count_truncate(matviewRel); table_close(matviewRel, NoLock); } +#endif } /* diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 8bd4ea95930..d81b84d688d 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -241,13 +241,14 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, * FIXME: table which is not a heap table and AO table * does not support constraint(deferred) trigger now. */ - if (stmt->isconstraint && enable_serverless && - (RelationIsNonblockRelation(rel))) +#ifdef SERVERLESS + if (stmt->isconstraint && RelationIsNonblockRelation(rel)) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), errmsg("\"%s\" is not a heap table and AO table", RelationGetRelationName(rel)), errdetail("constraint trigger is not supported now"))); +#endif } else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -2598,10 +2599,11 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, bool should_free = false; int i; - if(enable_serverless) +#ifdef SERVERLESS Assert(HeapTupleIsValid(fdw_trigtuple) || ItemPointerIsValid(tupleid)); - else +#else Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); +#endif if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -2849,10 +2851,11 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, * and tupleid are all valid. We also change the assert of ExecBRDeleteTriggers * because update partition table will trigger ExecBRDeleteTriggers. */ - if(enable_serverless) +#ifdef SERVERLESS Assert(HeapTupleIsValid(fdw_trigtuple) || ItemPointerIsValid(tupleid)); - else +#else Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); +#endif if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -5397,10 +5400,12 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) /* * FIXME: deferred trigger is not supported in the serverless architecture now. */ - if (enable_serverless && stmt->deferred) +#ifdef SERVERLESS + if (stmt->deferred) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), errmsg("deferred trigger is not supported in Cloudberry now"))); +#endif /* If we haven't already done so, initialize our state. */ if (afterTriggers.state == NULL) @@ -6029,8 +6034,12 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * is more efficient. Because it is inefficient to fetch tuple * throught its ctid. */ +#ifdef SERVERLESS if (row_trigger && (relkind == RELKIND_FOREIGN_TABLE || - (enable_serverless && (RelationIsNonblockRelation(rel))))) + RelationIsNonblockRelation(rel))) +#else + if (row_trigger && relkind == RELKIND_FOREIGN_TABLE) +#endif { if (fdw_tuplestore == NULL) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5d3e5e52e7d..532634bc598 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2326,9 +2326,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, bool is_toast; bool shouldDispatch; +#ifdef SERVERLESS + shouldDispatch = false; +#else shouldDispatch = (Gp_role == GP_ROLE_DISPATCH && - ENABLE_DISPATCH() && - !enable_serverless); + ENABLE_DISPATCH()); +#endif Assert(params != NULL); diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index 0f9b21e2fb8..eef1b3f4288 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -858,8 +858,9 @@ processRetry(fts_context *context) * mirror as down prematurely. If mirror is already marked * down in configuration, there is no need to retry. */ - if (enable_serverless) - break; +#ifdef SERVERLESS + break; +#endif if (!(ftsInfo->result.retryRequested && SEGMENT_IS_ALIVE(ftsInfo->mirror_cdbinfo))) diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 4a7b32d0bd9..6e08dd7c1e0 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -105,13 +105,15 @@ PageIsVerifiedExtended(Page page, ForkNumber forknum, /* * In serverless architecture, the page is checked in smgrread. */ - if (DataChecksumsEnabled() && !enable_serverless) +#ifndef SERVERLESS + if (DataChecksumsEnabled()) { checksum = pg_checksum_page((char *) page, blkno); if (checksum != p->pd_checksum) checksum_failure = true; } +#endif PageDecryptInplace(page, forknum, blkno); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index edb1c4d066c..50115d6995d 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1445,7 +1445,11 @@ exec_mpp_query(const char *query_string, * In serverless architecture, all the slice send their stat like * seq_scan to QD. */ - if (Gp_role == GP_ROLE_EXECUTE && (Gp_is_writer || enable_serverless)) +#ifdef SERVERLESS + if (Gp_role == GP_ROLE_EXECUTE) +#else + if (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer) +#endif pgstat_send_qd_tabstats(); (*receiver->rDestroy) (receiver); diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 30958c7a7fc..b187da7e849 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -149,11 +149,6 @@ double hash_mem_multiplier = 1.0; int maintenance_work_mem = 65536; int max_parallel_maintenance_workers = 2; -/* - * use CloudberryDB serverless architecture - */ -bool enable_serverless = false; - /* * Primary determinants of sizes of shared-memory structures. * diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 47db1d75c3d..7427dc55fe8 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1154,7 +1154,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); - if (!bootstrap && !enable_serverless) +#ifndef SERVERLESS + if (!bootstrap) { if (access(fullpath, F_OK) == -1) { @@ -1174,6 +1175,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, ValidatePgVersion(fullpath); } +#endif SetDatabasePath(fullpath); pfree(fullpath); diff --git a/src/backend/utils/misc/gpexpand.c b/src/backend/utils/misc/gpexpand.c index 40de91f5571..78a32ba9935 100644 --- a/src/backend/utils/misc/gpexpand.c +++ b/src/backend/utils/misc/gpexpand.c @@ -114,9 +114,13 @@ gp_expand_protect_catalog_changes(Relation relation) int oldVersion; int newVersion; - if (Gp_role != GP_ROLE_DISPATCH || enable_serverless) +#ifdef SERVERLESS + return; +#else + if (Gp_role != GP_ROLE_DISPATCH) /* only lock catalog updates on qd */ return; +#endif if (RelationGetNamespace(relation) != PG_CATALOG_NAMESPACE) /* not catalog relations */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 666c395cc06..0468cd2845d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -255,7 +255,11 @@ extern PGDLLIMPORT int wal_level; (DataChecksumsEnabled() || FileEncryptionEnabled || wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ -#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA && !enable_serverless) +#ifdef SERVERLESS +#define XLogStandbyInfoActive() (false) +#else +#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) +#endif /* Do we need to WAL-log information required only for logical replication? */ #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL) diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 6fad304d403..792ef316df3 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -337,7 +337,6 @@ extern PGDLLIMPORT int work_mem; extern PGDLLIMPORT double hash_mem_multiplier; extern PGDLLIMPORT int maintenance_work_mem; extern PGDLLIMPORT int max_parallel_maintenance_workers; -extern PGDLLIMPORT bool enable_serverless; extern PGDLLIMPORT int statement_mem; extern PGDLLIMPORT int max_statement_mem; extern PGDLLIMPORT int gp_vmem_limit_per_query; From 4d1fc0662d7aae36134a73f6c40df9d51ac2ed76 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 25 Jul 2024 14:19:24 +0800 Subject: [PATCH 128/152] Enhance: Support for alter warehouse name suspend/resume/options 1. Support alter warehouse name suspend/resume. 2. Support alter warehouse name options/replace options 3. Change FTS ignore dbid and contentid check when serverless is defined. --- src/backend/cdb/cdbvars.c | 9 +++ src/backend/fts/ftsmessagehandler.c | 2 + src/backend/nodes/copyfuncs.c | 1 + src/backend/parser/gram.y | 111 +++++++++++++++++++++++++++- src/backend/utils/init/miscinit.c | 1 - src/include/cdb/cdbvars.h | 30 ++++++-- src/include/nodes/parsenodes.h | 6 +- src/include/parser/kwlist.h | 2 + 8 files changed, 152 insertions(+), 10 deletions(-) diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 9db3389e0bf..52291f53de8 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -646,3 +646,12 @@ gp_execution_dbid(PG_FUNCTION_ARGS) * Warehouse hook for Create/Drop/Alter Warehouse */ WarehouseMethod *warehouse_method = NULL; + +const char *const WarehouseStatusStr[] = { + "CREATING", + "RUNNING", + "SUSPENDED", + "STOPPING", + "SUSPENDING", + "RESUMING" +}; diff --git a/src/backend/fts/ftsmessagehandler.c b/src/backend/fts/ftsmessagehandler.c index e9293f67444..5012c5ffdee 100644 --- a/src/backend/fts/ftsmessagehandler.c +++ b/src/backend/fts/ftsmessagehandler.c @@ -466,6 +466,7 @@ HandleFtsMessage(const char* query_string) error_level = WARNING; #endif +#ifndef SERVERLESS if (dbid != GpIdentity.dbid) ereport(error_level, (errmsg("message type: %s received dbid:%d doesn't match this segments configured dbid:%d", @@ -475,6 +476,7 @@ HandleFtsMessage(const char* query_string) ereport(error_level, (errmsg("message type: %s received contentid:%d doesn't match this segments configured contentid:%d", message_type, contid, GpIdentity.segindex))); +#endif SIMPLE_FAULT_INJECTOR("fts_handle_message"); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 9acb54ec45c..ea7b01023b4 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6428,6 +6428,7 @@ _copyAlterWarehouseStmt(const AlterWarehouseStmt *from) COPY_STRING_FIELD(whname); COPY_SCALAR_FIELD(warehouse_size); COPY_NODE_FIELD(options); + COPY_SCALAR_FIELD(missing_ok); return newnode; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 554b73acc02..c5cc77d9205 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -890,10 +890,10 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ QUEUE - RANDOMLY READABLE READS REJECT_P REPLICATED RESOURCE + RANDOMLY READABLE READS REJECT_P REPLICATED RESOURCE RESUME ROOTPARTITION - SCATTER SEGMENT SEGMENTS SHRINK SPLIT SUBPARTITION + SCATTER SEGMENT SEGMENTS SHRINK SPLIT SUBPARTITION SUSPEND TAG @@ -13591,6 +13591,8 @@ DropWarehouseStmt: DROP WAREHOUSE name * * QUERY: * ALTER WAREHOUSE name SET WAREHOUSE_SIZE warehouse_size + * ALTER WAREHOUSE name SUSPEND + * ALTER WAREHOUSE name RESUME * *****************************************************************************/ @@ -13602,6 +13604,18 @@ AlterWarehouseStmt: n->whname = $3; n->warehouse_size = $6; n->options = NULL; + n->missing_ok = false; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS SET WAREHOUSE_SIZE SignedIconst + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE; + n->whname = $3; + n->warehouse_size = $8; + n->options = NULL; + n->missing_ok = true; $$ = (Node *)n; } | @@ -13613,6 +13627,95 @@ AlterWarehouseStmt: n->warehouse_size = 0; n->newowner = $6; n->options = NULL; + n->missing_ok = false; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS OWNER TO RoleSpec + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_ALTER_OWNER; + n->whname = $3; + n->warehouse_size = 0; + n->newowner = $8; + n->options = NULL; + n->missing_ok = true; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name SUSPEND + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_SUSPEND; + n->whname = $3; + n->missing_ok = false; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS SUSPEND + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_SUSPEND; + n->whname = $3; + n->missing_ok = true; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name RESUME + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_RESUME; + n->whname = $3; + n->missing_ok = false; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS RESUME + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_RESUME; + n->whname = $3; + n->missing_ok = true; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name alter_generic_options + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_OPTIONS; + n->whname = $3; + n->missing_ok = false; + n->options = $4; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS alter_generic_options + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_OPTIONS; + n->whname = $3; + n->missing_ok = true; + n->options = $6; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name REPLACE create_generic_options + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_REPLACE_OPTIONS; + n->whname = $3; + n->missing_ok = false; + n->options = $5; + $$ = (Node *)n; + } + | + ALTER WAREHOUSE name IF_P EXISTS REPLACE create_generic_options + { + AlterWarehouseStmt *n = makeNode(AlterWarehouseStmt); + n->kind = ALTER_WAREHOUSE_REPLACE_OPTIONS; + n->whname = $3; + n->missing_ok = true; + n->options = $7; $$ = (Node *)n; } ; @@ -20066,6 +20169,7 @@ unreserved_keyword: | RESOURCE | RESTART | RESTRICT + | RESUME | RETRIEVE | RETURN | RETURNS @@ -20117,6 +20221,7 @@ unreserved_keyword: | SUBPARTITION | SUBSCRIPTION | SUPPORT + | SUSPEND | SYSID | SYSTEM_P | TABLES @@ -21080,6 +21185,7 @@ bare_label_keyword: | RESOURCE | RESTART | RESTRICT + | RESUME | RETRIEVE | RETURN | RETURNS @@ -21139,6 +21245,7 @@ bare_label_keyword: | SUBSCRIPTION | SUBSTRING | SUPPORT + | SUSPEND | SYMMETRIC | SYSID | SYSTEM_P diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 44dd832cc3c..b606323bd43 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -983,7 +983,6 @@ GetCurrentWarehouseId(void) void SetCurrentWarehouseId(Oid warehouseid) { - AssertArg(OidIsValid(warehouseid)); CurrentWarehouseId = warehouseid; } diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 5703d23b6a8..5df3d337376 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -794,9 +794,19 @@ typedef enum WarehouseStatus WAREHOUSE_STATUS_CREATING, WAREHOUSE_STATUS_RUNNING, WAREHOUSE_STATUS_SUSPENDED, - WAREHOUSE_STATUS_STOPPING + WAREHOUSE_STATUS_STOPPING, + WAREHOUSE_STATUS_SUSPENDING, + WAREHOUSE_STATUS_RESUMING } WarehouseStatus; +extern const char *const WarehouseStatusStr[]; + +typedef enum WarehouseAction +{ + WAREHOUSE_STATUS_SUSPEND, + WAREHOUSE_STATUS_RESUME +} WarehouseAction; + typedef struct WarehouseSegmentConfig { char *hostname; @@ -823,11 +833,19 @@ bool (*DropWarehouse_hook)(char *warehouse_name, WarehouseSegmentConfig *seg_configs, int warehouse_size); -bool (*AlterWarehouse_hook)(char *warehouse_name, - int old_warehouse_size, - int new_warehouse_size, - WarehouseSegmentConfig **new_seg_configs, - int *seg_configs_size); +bool (*AlterWarehouseSize_hook)(char *warehouse_name, + int old_warehouse_size, + int new_warehouse_size, + WarehouseSegmentConfig **new_seg_configs, + int *seg_configs_size); + +bool (*AlterWarehouseStatus_hook)(char *warehouse_name, + WarehouseAction action, + bool *update_catalog); + +bool (*AlterWarehouseOptions_hook)(char *warehouse_name, + char **warehouse_options, + int warehouse_options_size); } WarehouseMethod; extern WarehouseMethod *warehouse_method; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index da06364d0f7..84324543221 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -4500,7 +4500,10 @@ typedef enum AlterWarehouseType { ALTER_WAREHOUSE_OPTIONS, ALTER_WAREHOUSE_SET_WAREHOUSE_SIZE, - ALTER_WAREHOUSE_ALTER_OWNER + ALTER_WAREHOUSE_ALTER_OWNER, + ALTER_WAREHOUSE_SUSPEND, + ALTER_WAREHOUSE_RESUME, + ALTER_WAREHOUSE_REPLACE_OPTIONS } AlterWarehouseType; typedef struct AlterWarehouseStmt @@ -4511,6 +4514,7 @@ typedef struct AlterWarehouseStmt int warehouse_size; /* New size of warehouse if set warehouse_size command */ RoleSpec *newowner; /* the new owner */ List *options; /* List of DefElem nodes */ + bool missing_ok; } AlterWarehouseStmt; #endif /* PARSENODES_H */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 238c0a91313..f3e957a8bb1 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -415,6 +415,7 @@ PG_KEYWORD("reset", RESET, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("resource", RESOURCE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("restart", RESTART, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("restrict", RESTRICT, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("resume", RESUME, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("retrieve", RETRIEVE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("return", RETURN, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("returning", RETURNING, RESERVED_KEYWORD, AS_LABEL) @@ -477,6 +478,7 @@ PG_KEYWORD("subpartition", SUBPARTITION, UNRESERVED_KEYWORD, BARE_LABEL) /* G PG_KEYWORD("subscription", SUBSCRIPTION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("substring", SUBSTRING, COL_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("support", SUPPORT, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("suspend", SUSPEND, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("symmetric", SYMMETRIC, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sysid", SYSID, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("system", SYSTEM_P, UNRESERVED_KEYWORD, BARE_LABEL) From 068ea375aba89d8c0131193211d93b68c7e77f84 Mon Sep 17 00:00:00 2001 From: leo Date: Fri, 19 Jul 2024 10:48:52 +0800 Subject: [PATCH 129/152] Refactor: use MACRO SERVERLESS to improve readability and maintainability 1. remove some useless code and hooks 1. remove MACRO SERVERLESS from common logical code 1. add MACRO SERVERLESS to cloud service related code --- src/backend/access/heap/heapam_visibility.c | 2 +- src/backend/access/transam/xact.c | 10 +------- src/backend/cdb/cdbcat.c | 15 ++++++++---- src/backend/cdb/cdbllize.c | 15 +++++++++++- src/backend/commands/createas.c | 2 ++ src/backend/commands/trigger.c | 22 ++++++++++++++---- src/backend/commands/typecmds.c | 15 ------------ src/backend/commands/user.c | 2 ++ src/backend/executor/execMain.c | 8 +++++++ src/backend/executor/nodeModifyTable.c | 22 +++++++++++++----- .../translate/CTranslatorRelcacheToDXL.cpp | 2 +- src/backend/nodes/copyfuncs.c | 1 + src/backend/optimizer/util/pathnode.c | 2 ++ src/backend/storage/buffer/localbuf.c | 9 ++++++++ src/backend/utils/cache/catcache.c | 12 ---------- src/backend/utils/cache/relcache.c | 8 ------- src/backend/utils/mmgr/portalmem.c | 10 -------- src/bin/initdb/initdb.c | 23 +++++++++++++++++++ src/bin/psql/describe.c | 4 ++++ src/include/access/xact.h | 3 --- src/include/nodes/pathnodes.h | 2 ++ src/include/utils/catcache.h | 13 ----------- src/include/utils/rel.h | 4 ++-- src/include/utils/relcache.h | 6 ----- 24 files changed, 115 insertions(+), 97 deletions(-) diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 542068a340a..e4176b1f594 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -180,11 +180,11 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer, Relation rel, { bool isXmin; +#ifdef SERVERLESS /* * On QE, we can see any changes on catalog relations(dirty read) in InitProcessing Mode * because of the latest snapshot, do not set hint bits. */ -#ifdef SERVERLESS if (IsInitProcessingMode() && Gp_role == GP_ROLE_EXECUTE && GpIdentity.segindex != MASTER_CONTENT_ID) { diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 9bd0db6f25f..ec25745619e 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -913,12 +913,6 @@ GetCurrentCommandId(bool used) return currentCommandId; } -void -SetCurrentCommandId(CommandId cid) -{ - currentCommandId = cid; -} - /* * SetParallelStartTimestamps * @@ -2768,11 +2762,9 @@ StartTransaction(void) DtxContextToString(DistributedTransactionContext), IsoLevelAsUpperString(XactIsoLevel), XactReadOnly, LocalDistribXact_DisplayString(MyProc->pgprocno)))); -#ifdef SERVERLESS + CallXactCallbacks(s->blockState == TBLOCK_PARALLEL_INPROGRESS ? XACT_EVENT_PARALLEL_BEGIN : XACT_EVENT_BEGIN); -#endif - } /* diff --git a/src/backend/cdb/cdbcat.c b/src/backend/cdb/cdbcat.c index 61dd8ddf0e8..62e99a90edc 100644 --- a/src/backend/cdb/cdbcat.c +++ b/src/backend/cdb/cdbcat.c @@ -97,8 +97,13 @@ makeGpPolicy(GpPolicyType ptype, int nattrs, int numsegments) policy->numsegments = numsegments; policy->nattrs = nattrs; +#ifdef SERVERLESS Assert(numsegments >= 0 || (ptype == POLICYTYPE_ENTRY && numsegments == -1)); +#else + Assert(numsegments > 0 || + (ptype == POLICYTYPE_ENTRY && numsegments == -1)); +#endif return policy; } @@ -427,9 +432,11 @@ GpPolicyFetch(Oid tbloid) switch (policyform->policytype) { case SYM_POLICYTYPE_REPLICATED: +#ifdef SERVERLESS if (policyform->numsegments == 0) policy = createReplicatedGpPolicy(getgpsegmentCount()); else +#endif policy = createReplicatedGpPolicy(policyform->numsegments); break; case SYM_POLICYTYPE_PARTITIONED: @@ -461,16 +468,14 @@ GpPolicyFetch(Oid tbloid) } /* Create a GpPolicy object. */ +#ifdef SERVERLESS if (policyform->numsegments == 0) - { policy = makeGpPolicy(POLICYTYPE_PARTITIONED, - nattrs, getgpsegmentCount()); - } + nattrs, getgpsegmentCount()); else - { +#endif policy = makeGpPolicy(POLICYTYPE_PARTITIONED, nattrs, policyform->numsegments); - } for (i = 0; i < nattrs; i++) { diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index e0be199bf88..b000452b049 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -328,8 +328,12 @@ cdbllize_get_final_locus(PlannerInfo *root, PathTarget *target) if (intoPolicy != NULL) { +#ifdef SERVERLESS Assert(GpPolicyIsEntry(query->intoPolicy) || GpPolicyIsPartitioned(query->intoPolicy) || GpPolicyIsReplicated(query->intoPolicy)); +#else + Assert(intoPolicy->ptype != POLICYTYPE_ENTRY); +#endif Assert(intoPolicy->nattrs >= 0); Assert(intoPolicy->nattrs <= MaxPolicyAttributeNumber); @@ -345,6 +349,7 @@ cdbllize_get_final_locus(PlannerInfo *root, PathTarget *target) CdbPathLocus_MakeReplicated(&locus, intoPolicy->numsegments, 0); return locus; } +#ifdef SERVERLESS else if (intoPolicy->ptype == POLICYTYPE_ENTRY) { /* @@ -356,6 +361,7 @@ cdbllize_get_final_locus(PlannerInfo *root, PathTarget *target) return entryLocus; } +#endif } } else if (query->commandType == CMD_SELECT && query->parentStmtType == PARENTSTMTTYPE_NONE) @@ -425,8 +431,12 @@ cdbllize_adjust_top_path(PlannerInfo *root, Path *best_path, { targetPolicy = query->intoPolicy; +#ifdef SERVERLESS Assert(GpPolicyIsEntry(query->intoPolicy) || GpPolicyIsPartitioned(query->intoPolicy) || GpPolicyIsReplicated(query->intoPolicy)); +#else + Assert(query->intoPolicy->ptype != POLICYTYPE_ENTRY); +#endif Assert(query->intoPolicy->nattrs >= 0); Assert(query->intoPolicy->nattrs <= MaxPolicyAttributeNumber); } @@ -514,9 +524,12 @@ cdbllize_adjust_top_path(PlannerInfo *root, Path *best_path, " Make sure column(s) chosen are the optimal data distribution key to minimize skew."))); } } +#ifdef SERVERLESS Assert(GpPolicyIsEntry(targetPolicy) || GpPolicyIsPartitioned(targetPolicy) || GpPolicyIsReplicated(targetPolicy)); - +#else + Assert(targetPolicy->ptype != POLICYTYPE_ENTRY); +#endif query->intoPolicy = targetPolicy; if (GpPolicyIsReplicated(targetPolicy) && diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 025f02bdcbb..6d0d7197d93 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -231,10 +231,12 @@ create_ctas_internal(List *attrList, IntoClause *into, QueryDesc *queryDesc, boo CommandCounterIncrement(); } +#ifdef SERVERLESS if (!queryDesc->ddesc) { GetAssignedOidsForDispatch(); } +#endif if (Gp_role == GP_ROLE_DISPATCH && dispatch) CdbDispatchUtilityStatement((Node *) create, diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index d81b84d688d..7ecac7e2589 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -237,11 +237,11 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, errmsg("\"%s\" is a table", RelationGetRelationName(rel)), errdetail("Tables cannot have INSTEAD OF triggers."))); +#ifdef SERVERLESS /* * FIXME: table which is not a heap table and AO table * does not support constraint(deferred) trigger now. */ -#ifdef SERVERLESS if (stmt->isconstraint && RelationIsNonblockRelation(rel)) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), @@ -745,7 +745,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, #ifndef SERVERLESS /* Check GPDB limitations */ - if (RelationIsAppendOptimized(rel) && + if (RelationIsNonblockRelation(rel) && TRIGGER_FOR_ROW(tgtype) && !stmt->isconstraint) { @@ -2845,13 +2845,13 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, relinfo); +#ifdef SERVERLESS /* * FIXME: In the serverless architecture, For update operation, we save the * oldtuple to avoid high-cost table_tuple_fetch_row_version. Thus, fdw_trigtuple * and tupleid are all valid. We also change the assert of ExecBRDeleteTriggers * because update partition table will trigger ExecBRDeleteTriggers. */ -#ifdef SERVERLESS Assert(HeapTupleIsValid(fdw_trigtuple) || ItemPointerIsValid(tupleid)); #else Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); @@ -4409,8 +4409,12 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, ExecDropSingleTupleTableSlot(slot2); slot1 = slot2 = NULL; } +#ifdef SERVERLESS if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || RelationIsNonblockRelation(rel)) +#else + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) +#endif { slot1 = MakeSingleTupleTableSlot(rel->rd_att, &TTSOpsMinimalTuple); @@ -5397,10 +5401,10 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) { int my_level = GetCurrentTransactionNestLevel(); +#ifdef SERVERLESS /* * FIXME: deferred trigger is not supported in the serverless architecture now. */ -#ifdef SERVERLESS if (stmt->deferred) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), @@ -5706,6 +5710,7 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) } if (Gp_role == GP_ROLE_DISPATCH) { +#ifdef SERVERLESS bool snapshot_set = false; if (!ActiveSnapshotSet()) { @@ -5722,6 +5727,13 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) { PopActiveSnapshot(); } +#else + CdbDispatchUtilityStatement((Node *) stmt, + DF_CANCEL_ON_ERROR| + DF_NEED_TWO_PHASE, + NIL, + NULL); +#endif } } @@ -6028,13 +6040,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, modifiedCols, oldslot, newslot)) continue; +#ifdef SERVERLESS /* * In serverless architecture, implementing trigger the * same as foreign table which use tuplestore to store the tuple * is more efficient. Because it is inefficient to fetch tuple * throught its ctid. */ -#ifdef SERVERLESS if (row_trigger && (relkind == RELKIND_FOREIGN_TABLE || RelationIsNonblockRelation(rel))) #else diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 5d4458b453c..f9888340fa4 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -84,17 +84,6 @@ #include "catalog/gp_indexing.h" -#if 0 -/* result structure for get_rels_with_domain() */ -typedef struct -{ - Relation rel; /* opened and locked relation */ - int natts; /* number of attributes of interest */ - int *atts; /* attribute numbers */ - /* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */ -} RelToCheck; -#endif - /* parameter structure for AlterTypeRecurse() */ typedef struct { @@ -137,10 +126,6 @@ static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid); static Oid findRangeSubOpclass(List *opcname, Oid subtype); static Oid findRangeCanonicalFunction(List *procname, Oid typeOid); static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype); -#if 0 -static void validateDomainConstraint(Oid domainoid, char *ccbin); -static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode); -#endif static void checkEnumOwner(HeapTuple tup); static char *domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index e00d2363a9e..3a05b6744e9 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -388,6 +388,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) errmsg("conflicting or redundant options"))); denableProfile = defel; } +#ifdef SERVERLESS else if (strcmp(defel->defname, "default_warehosue") == 0) { if (ddefaultwarehosue) @@ -396,6 +397,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt) errmsg("conflicting or redundant options"))); ddefaultwarehosue = defel; } +#endif else elog(ERROR, "option \"%s\" not recognized", defel->defname); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index a86676df332..4073d347228 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -265,8 +265,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) Assert(queryDesc->plannedstmt->intoPolicy == NULL || GpPolicyIsPartitioned(queryDesc->plannedstmt->intoPolicy) || +#ifdef SERVERLESS GpPolicyIsReplicated(queryDesc->plannedstmt->intoPolicy) || GpPolicyIsEntry(queryDesc->plannedstmt->intoPolicy)); +#else + GpPolicyIsReplicated(queryDesc->plannedstmt->intoPolicy)); +#endif /* GPDB hook for collecting query info */ if (query_info_collect_hook) @@ -1787,8 +1791,12 @@ InitPlan(QueryDesc *queryDesc, int eflags) Assert(plannedstmt->intoPolicy == NULL || GpPolicyIsPartitioned(plannedstmt->intoPolicy) || +#ifdef SERVERLESS GpPolicyIsReplicated(plannedstmt->intoPolicy) || GpPolicyIsEntry(plannedstmt->intoPolicy)); +#else + GpPolicyIsReplicated(plannedstmt->intoPolicy)); +#endif if (DEBUG1 >= log_min_messages) { diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index e548faae576..e49d502a71e 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1556,9 +1556,13 @@ ldelete:; /* AFTER ROW DELETE Triggers */ /* - * Disallow DELETE triggers on a split UPDATE. See comments in ExecInsert(). + * GPDB_12_MERGE_FIXME: PostgreSQL *does* fire INSERT and DELETE + * triggers on an UPDATE that moves tuples from one partition to another. + * Should we follow that example with cross-segment UPDATEs too? */ +#ifndef SERVERLESS if (!RelationIsNonblockRelation(resultRelationDesc) && !splitUpdate) +#endif { ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple, ar_delete_trig_tcs); @@ -2176,11 +2180,17 @@ lreplace:; } /* AFTER ROW UPDATE Triggers */ - ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, - recheckIndexes, - mtstate->operation == CMD_INSERT ? - mtstate->mt_oc_transition_capture : - mtstate->mt_transition_capture); +#ifndef SERVERLESS + /* GPDB: AO and AOCO tables don't support triggers */ + if (!RelationIsNonblockRelation(resultRelationDesc)) +#endif + { + ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, + recheckIndexes, + mtstate->operation == CMD_INSERT ? + mtstate->mt_oc_transition_capture : + mtstate->mt_transition_capture); + } list_free(recheckIndexes); diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 17ce92dcefa..24552a1cc97 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -2554,7 +2554,7 @@ CTranslatorRelcacheToDXL::RetrieveRelStorageType(Relation rel) // Why use the magic number 7015 instead of the macro definition? // Just to make it look like it doesn't make sense, // so others will notice that the logic needs to be refactored - case 7015: + case HASHDATA_AM_OID: case AO_COLUMN_TABLE_AM_OID: rel_storage_type = IMDRelation::ErelstorageAppendOnlyCols; break; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ea7b01023b4..962fcb4eded 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6427,6 +6427,7 @@ _copyAlterWarehouseStmt(const AlterWarehouseStmt *from) COPY_SCALAR_FIELD(kind); COPY_STRING_FIELD(whname); COPY_SCALAR_FIELD(warehouse_size); + COPY_NODE_FIELD(newowner); COPY_NODE_FIELD(options); COPY_SCALAR_FIELD(missing_ok); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 1c3548c914a..3ac2d35f19b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1043,7 +1043,9 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->barrierHazard = false; pathnode->rescannable = true; pathnode->sameslice_relids = rel->relids; +#ifdef SERVERLESS pathnode->basemv = 0; +#endif cost_seqscan(pathnode, root, rel, pathnode->param_info); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 98ca440af3f..fe0bf218357 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -117,6 +117,15 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool found; uint32 buf_state; +#ifndef SERVERLESS + /* + * Local buffers are used for temp tables in PostgreSQL. As temp tables + * use shared buffers in Cloudberry, we shouldn't be useing local buffers + * for anything. + */ + Assert(false); +#endif + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 320741f85d7..fa4f757948e 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -63,12 +63,6 @@ #define CACHE_elog(...) #endif -/* Hook for plugins to get control in SearchCatCache */ -SearchCatCache_hook_type SearchCatCache_hook = NULL; - -/* Hook for plugins to get control in ReleaseCatCache */ -ReleaseCatCache_hook_type ReleaseCatCache_hook = NULL; - /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; @@ -1299,9 +1293,6 @@ SearchCatCacheInternal(CatCache *cache, Assert(cache->cc_nkeys == nkeys); - if (SearchCatCache_hook) - return (*SearchCatCache_hook)(cache, nkeys, v1, v2, v3, v4); - /* * one-time startup overhead for each cache */ @@ -1538,9 +1529,6 @@ SearchCatCacheMiss(CatCache *cache, void ReleaseCatCache(HeapTuple tuple) { - if (ReleaseCatCache_hook) - return (*ReleaseCatCache_hook)(tuple); - CatCTup *ct = (CatCTup *) (((char *) tuple) - offsetof(CatCTup, tuple)); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 889cd88301d..f901c32c2a2 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -259,11 +259,6 @@ static bool eoxact_list_overflowed = false; eoxact_list_overflowed = true; \ } while (0) -/* - * Hook for plugins to validate the relation in RelationIdGetRelation. - */ -RelationValidation_hook_type RelationValidation_hook = NULL; - /* * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact * cleanup work. The array expands as needed; there is no hashtable because @@ -2248,9 +2243,6 @@ RelationIdGetRelation(Oid relationId) return NULL; } - if (RelationValidation_hook) - (*RelationValidation_hook)(relationId, rd); - RelationIncrementReferenceCount(rd); /* revalidate cache entry if necessary */ if (!rd->rd_isvalid) diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 4a4d70d15f5..67408d3dafe 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -50,16 +50,6 @@ * ---------------- */ -#if 0 -#define MAX_PORTALNAME_LEN NAMEDATALEN - -typedef struct portalhashent -{ - char portalname[MAX_PORTALNAME_LEN]; - Portal portal; -} PortalHashEnt; -#endif - static HTAB *PortalHashTable = NULL; #define PortalHashTableLookup(NAME, PORTAL) \ diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 0762586f06c..3593256f492 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2194,6 +2194,29 @@ make_postgres(FILE *cmdfd) PG_CMD_PUTS(*line); } +#ifdef SERVERLESS +/* + * copy template1 to postgres + */ +static void +make_hashdatadb(FILE *cmdfd) +{ + const char *const *line; + static const char *const postgres_setup[] = { + "CREATE DATABASE hashdatadb;\n\n", + "COMMENT ON DATABASE hashdatadb IS 'default administrative connection database';\n\n", + /* + * Clean out dead rows in pg_database + */ + "VACUUM FULL pg_database;\n\n", + NULL + }; + + for (line = postgres_setup; *line; line++) + PG_CMD_PUTS(*line); +} +#endif + /* * signal handler in case we are interrupted. * diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 9307d8ff83c..80a97abe9b3 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4715,6 +4715,7 @@ add_partition_by_footer(printTableContent *const cont, const char *oid) return; /* success */ } +#ifdef SERVERLESS static char * GetDefaultTablespace() { @@ -4739,6 +4740,7 @@ GetDefaultTablespace() PQclear(result); return NULL; } +#endif /* * Add a tablespace description to a footer. If 'newline' is true, it is added @@ -4780,6 +4782,7 @@ add_tablespace_footer(printTableContent *const cont, char relkind, /* Should always be the case, but.... */ if (PQntuples(result) > 0) { +#ifdef SERVERLESS char *default_tablespace = GetDefaultTablespace(); if (default_tablespace != NULL && strcmp(PQgetvalue(result, 0, 0), default_tablespace) == 0) { @@ -4789,6 +4792,7 @@ add_tablespace_footer(printTableContent *const cont, char relkind, return; } pg_free(default_tablespace); +#endif if (newline) { /* Add the tablespace as a new footer */ diff --git a/src/include/access/xact.h b/src/include/access/xact.h index b587211d8e9..c8d98a59645 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -126,10 +126,8 @@ extern int MyXactFlags; */ typedef enum { -#ifdef SERVERLESS XACT_EVENT_BEGIN, XACT_EVENT_PARALLEL_BEGIN, -#endif XACT_EVENT_COMMIT, XACT_EVENT_PARALLEL_COMMIT, XACT_EVENT_ABORT, @@ -506,7 +504,6 @@ extern void MarkCurrentTransactionIdLoggedIfAny(void); extern void MarkTopTransactionWriteXLogOnExecutor(void); extern bool SubTransactionIsActive(SubTransactionId subxid); extern CommandId GetCurrentCommandId(bool used); -extern void SetCurrentCommandId(CommandId cid); extern void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts); extern TimestampTz GetCurrentTransactionStartTimestamp(void); extern TimestampTz GetCurrentStatementStartTimestamp(void); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0de5f060139..227e41aef6a 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1522,7 +1522,9 @@ typedef struct Path */ Relids sameslice_relids; +#ifdef SERVERLESS Oid basemv; /* Oid of materialized view of Delta SeqScan based on. */ +#endif } Path; /* diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index e62c2d4018b..0213582bfef 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -185,19 +185,6 @@ typedef struct catcacheheader int ch_ntup; /* # of tuples in all caches */ } CatCacheHeader; -/* Hook for plugins to get control in SearchCatCache */ -typedef HeapTuple (*SearchCatCache_hook_type)(CatCache *cache, - int nkeys, - Datum v1, - Datum v2, - Datum v3, - Datum v4); -extern PGDLLIMPORT SearchCatCache_hook_type SearchCatCache_hook; - -/* Hook for plugins to get control in ReleaseCatCache */ -typedef void (*ReleaseCatCache_hook_type)(HeapTuple tuple); -extern PGDLLIMPORT ReleaseCatCache_hook_type ReleaseCatCache_hook; - /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 752556cbd1b..4b9608445da 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -551,7 +551,7 @@ typedef struct ViewOptions */ #define PAX_AM_OID 7047 #define HASHDATA_AM_OID 7015 -#define HASHDATA_TBALE_AM_OID 7604 +#define HASHDATA_AM_HANDLER_OID 7604 #define RelationIsPax(relation) \ ((relation)->rd_rel->relam == PAX_AM_OID) @@ -576,7 +576,7 @@ typedef struct ViewOptions (relation)->rd_rel->relam == HASHDATA_AM_OID) #define AMHandlerIsHashdataCols(amhandler) \ - ((amhandler) == HASHDATA_TBALE_AM_OID) + ((amhandler) == HASHDATA_AM_HANDLER_OID) /* * RelationIsBitmapIndex * True iff relation is a bitmap index diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 9989c4ef2f1..466ea462b9b 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,12 +39,6 @@ typedef struct RelationData *Relation; */ typedef Relation *RelationPtr; -/* - * Hook for plugins to validate the relation in RelationIdGetRelation. - */ -typedef void (*RelationValidation_hook_type)(Oid relationId, Relation relation); -extern PGDLLIMPORT RelationValidation_hook_type RelationValidation_hook; - /* * Routines to open (lookup) and close a relcache entry */ From 5cd6a0be70eb9a68a79f9c26e6df86efb0a96afa Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Mon, 1 Jul 2024 21:59:43 -0700 Subject: [PATCH 130/152] Add SERVERLESS macro on the function load/write rel cache function --- src/backend/utils/cache/relcache.c | 83 +++++++++++++++++------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f901c32c2a2..a3fa185d960 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -6234,14 +6234,17 @@ load_relcache_init_file(bool shared) int i; +#ifdef SERVERLESS if (GpIdentity.segindex < 0) { +#endif /* SERVERLESS */ if (shared) snprintf(initfilename, sizeof(initfilename), "global/%s", RELCACHE_INIT_FILENAME); else snprintf(initfilename, sizeof(initfilename), "%s/%s", DatabasePath, RELCACHE_INIT_FILENAME); +#ifdef SERVERLESS } else { @@ -6252,12 +6255,11 @@ load_relcache_init_file(bool shared) snprintf(initfilename, sizeof(initfilename), "%s", RELCACHE_INIT_FILENAME); } +#endif /* SERVERLESS */ fp = AllocateFile(initfilename, PG_BINARY_R); if (fp == NULL) - { return false; - } /* * Read the index relcache entries from the file. Note we will not enter @@ -6272,7 +6274,6 @@ load_relcache_init_file(bool shared) /* check for correct magic number (compatible version) */ if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) goto read_failed; - if (magic != RELCACHE_INIT_FILEMAGIC) goto read_failed; @@ -6290,7 +6291,6 @@ load_relcache_init_file(bool shared) { if (nread == 0) break; /* end of file */ - goto read_failed; } @@ -6318,6 +6318,7 @@ load_relcache_init_file(bool shared) relform = (Form_pg_class) palloc(len); if (fread(relform, 1, len, fp) != len) goto read_failed; + rel->rd_rel = relform; /* initialize attribute tuple forms */ @@ -6346,15 +6347,13 @@ load_relcache_init_file(bool shared) /* next read the access method specific field */ if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) goto read_failed; - if (len > 0) { rel->rd_options = palloc(len); if (fread(rel->rd_options, 1, len, fp) != len) goto read_failed; - if (len != VARSIZE(rel->rd_options)) - goto read_failed; + goto read_failed; /* sanity check */ } else { @@ -6442,7 +6441,6 @@ load_relcache_init_file(bool shared) /* next, read the vector of support procedure OIDs */ if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) goto read_failed; - support = (RegProcedure *) MemoryContextAlloc(indexcxt, len); if (fread(support, 1, len, fp) != len) goto read_failed; @@ -6556,8 +6554,10 @@ load_relcache_init_file(bool shared) * Reset transient-state fields in the relcache entry */ rel->rd_smgr = NULL; +#ifdef SERVERLESS rel->rd_isnailed = true; rel->rd_isvalid = true; +#endif /* SERVERLESS */ if (rel->rd_isnailed) rel->rd_refcnt = 1; else @@ -6604,33 +6604,34 @@ load_relcache_init_file(bool shared) * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put * an Assert(false) there. */ -// if (shared) -// { -// if (nailed_rels != NUM_CRITICAL_SHARED_RELS || -// nailed_indexes != NUM_CRITICAL_SHARED_INDEXES) -// { -// elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively", -// nailed_rels, nailed_indexes, -// NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES); -// /* Make sure we get developers' attention about this */ -// Assert(false); -// /* In production builds, recover by bootstrapping the relcache */ -// goto read_failed; -// } -// } -// else -// { -// if (nailed_rels != NUM_CRITICAL_LOCAL_RELS || -// nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES) -// { -// elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively", -// nailed_rels, nailed_indexes, -// NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES); -// /* We don't need an Assert() in this case */ -// goto read_failed; -// } -// } - +#ifndef SERVERLESS + if (shared) + { + if (nailed_rels != NUM_CRITICAL_SHARED_RELS || + nailed_indexes != NUM_CRITICAL_SHARED_INDEXES) + { + elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively", + nailed_rels, nailed_indexes, + NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES); + /* Make sure we get developers' attention about this */ + Assert(false); + /* In production builds, recover by bootstrapping the relcache */ + goto read_failed; + } + } + else + { + if (nailed_rels != NUM_CRITICAL_LOCAL_RELS || + nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES) + { + elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively", + nailed_rels, nailed_indexes, + NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES); + /* We don't need an Assert() in this case */ + goto read_failed; + } + } +#endif /* SERVERLESS */ /* * OK, all appears well. * @@ -6672,10 +6673,13 @@ write_relcache_init_file(bool shared) FILE *fp; char tempfilename[MAXPGPATH]; char finalfilename[MAXPGPATH]; +#ifdef SERVERLESS char copyfilename[MAXPGPATH]; +#endif /* SERVERLESS */ int magic; HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; +#ifdef SERVERLESS int i,j; Oid collectRelids[60] = { AggregateRelationId, @@ -6743,6 +6747,7 @@ write_relcache_init_file(bool shared) ResQueueCapabilityRelationId }; +#endif /* SERVERLESS */ if (write_relcache_init_file_hook && write_relcache_init_file_hook()) return; @@ -6764,8 +6769,10 @@ write_relcache_init_file(bool shared) RELCACHE_INIT_FILENAME, MyProcPid); snprintf(finalfilename, sizeof(finalfilename), "global/%s", RELCACHE_INIT_FILENAME); +#ifdef SERVERLESS snprintf(copyfilename, sizeof(copyfilename), "%s.global", RELCACHE_INIT_FILENAME); +#endif /* SERVERLESS */ } else { @@ -6773,8 +6780,10 @@ write_relcache_init_file(bool shared) DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid); snprintf(finalfilename, sizeof(finalfilename), "%s/%s", DatabasePath, RELCACHE_INIT_FILENAME); +#ifdef SERVERLESS snprintf(copyfilename, sizeof(copyfilename), "%s", RELCACHE_INIT_FILENAME); +#endif /* SERVERLESS */ } unlink(tempfilename); /* in case it exists w/wrong permissions */ @@ -6900,6 +6909,7 @@ write_relcache_init_file(bool shared) } } +#ifdef SERVERLESS for (i = 0; i < sizeof(collectRelids) / sizeof(Oid); ++i) { Relation rel; @@ -6965,6 +6975,7 @@ write_relcache_init_file(bool shared) table_close(rel, AccessShareLock); } +#endif /* SERVERLESS */ if (FreeFile(fp)) elog(FATAL, "could not write init file"); @@ -7012,6 +7023,7 @@ write_relcache_init_file(bool shared) /* * Copy the file to root dir */ +#ifdef SERVERLESS if (access(copyfilename, F_OK) != 0) { char cp_cmd[MAXPGPATH]; @@ -7021,6 +7033,7 @@ write_relcache_init_file(bool shared) elog(ERROR, "copy process fail, cp_cmd %s", cp_cmd); } } +#endif /* SERVERLESS */ LWLockRelease(RelCacheInitLock); } From 56d1c77f87a51cadfa03593ebc23b4da773ba184 Mon Sep 17 00:00:00 2001 From: hanwei Date: Thu, 25 Jul 2024 18:25:58 +0800 Subject: [PATCH 131/152] Enhancement: adjust tablespace for support serverless architecture In serverless architecture, change the way of using tablespace. --- src/backend/catalog/storage_database.c | 7 +++++++ src/backend/commands/dbcommands.c | 8 +++++++- src/backend/commands/indexcmds.c | 2 ++ src/backend/commands/tablespace.c | 10 ++++++++++ src/common/relpath.c | 8 ++++++++ src/include/utils/sync_guc_name.h | 2 ++ src/include/utils/unsync_guc_name.h | 3 +++ src/test/regress/pg_regress.c | 3 +++ 8 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/storage_database.c b/src/backend/catalog/storage_database.c index b7ccaa91e38..14c9d2b78d1 100644 --- a/src/backend/catalog/storage_database.c +++ b/src/backend/catalog/storage_database.c @@ -184,10 +184,17 @@ dropDatabaseDirectory(DbDirNode *deldb, bool isRedo) /* * Remove files from the old tablespace */ +#ifdef SERVERLESS + if (!rmtree(dbpath, true)) + ereport(LOG, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dbpath))); +#else if (!rmtree(dbpath, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dbpath))); +#endif pfree(dbpath); } diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index a8b4c34154a..fc763b31ddd 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -2222,11 +2222,17 @@ remove_dbtablespaces(Oid db_id) pfree(dstpath); continue; } - +#ifdef SERVERLESS + if (!rmtree(dstpath, true)) + ereport(LOG, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dstpath))); +#else if (!rmtree(dstpath, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dstpath))); +#endif ltblspc = lappend_oid(ltblspc, dsttablespace); pfree(dstpath); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 2065667ce42..d1d397cf900 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -973,11 +973,13 @@ DefineIndex(Oid relationId, if (stmt->tableSpace) { tablespaceId = get_tablespace_oid(stmt->tableSpace, false); +#ifndef SERVERLESS if (partitioned && tablespaceId == MyDatabaseTableSpace && Gp_role != GP_ROLE_EXECUTE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot specify default tablespace for partitioned relations"))); +#endif /* SERVERLESS */ } else { diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index a4fe9eedca0..84c604711db 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -173,6 +173,14 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) if (spcNode != DEFAULTTABLESPACE_OID && !isRedo) TablespaceLockTuple(spcNode, AccessShareLock, true); + /* + * In Serverless mode, create local dbspace by tablespace + * set pg_default + */ +#ifdef SERVERLESS + spcNode = DEFAULTTABLESPACE_OID; +#endif + dir = GetDatabasePath(dbNode, spcNode); if (stat(dir, &st) < 0) @@ -496,7 +504,9 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) /* Post creation hook for new tablespace */ InvokeObjectPostCreateHook(TableSpaceRelationId, tablespaceoid, 0); +#ifndef SERVERLESS create_tablespace_directories(location, tablespaceoid); +#endif /* Record the filesystem change in XLOG */ { diff --git a/src/common/relpath.c b/src/common/relpath.c index bd1ba60ba83..a50d8c258c8 100644 --- a/src/common/relpath.c +++ b/src/common/relpath.c @@ -151,6 +151,14 @@ GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, { char *path; + /* + * In Serverless mode, get local relation path by tablespace + * set pg_default + */ +#ifdef SERVERLESS + if (spcNode != GLOBALTABLESPACE_OID) + spcNode = DEFAULTTABLESPACE_OID; +#endif if (spcNode == GLOBALTABLESPACE_OID) { /* Shared system relations live in {datadir}/global */ diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 2e8bc561dca..f882ede91d1 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -40,7 +40,9 @@ "deadlock_timeout", "default_table_access_method", "default_index_access_method", +#ifndef SERVERLESS "default_tablespace", +#endif "default_toast_compression", "dml_ignore_target_partition_check", "enable_parallel", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 38b585a84ef..5a90c1fd2af 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -94,6 +94,9 @@ "debug_walrepl_snd", "debug_walrepl_syncrep", "debug_xlog_record_read", +#ifdef SERVERLESS + "default_tablespace", +#endif "default_statistics_target", "default_text_search_config", "default_transaction_deferrable", diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index 3b9e91136d4..12bfbf78351 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -3601,6 +3601,9 @@ cluster_healthy(void) { char line[1024]; int i, n; +#ifdef SERVERLESS + return true; +#endif psql_command_output("postgres", line, 1024, "SELECT * FROM gp_segment_configuration WHERE status = 'd' OR preferred_role != role;"); From f3c161bcabee0c5770b52f127aaa16308f86860b Mon Sep 17 00:00:00 2001 From: leo Date: Wed, 3 Jul 2024 20:57:19 +0800 Subject: [PATCH 132/152] Refactor: add MACRO SERVERLESS to catalog dispatching related code --- src/backend/access/heap/heapam.c | 10 ++ src/backend/access/index/genam.c | 27 ++++- src/backend/access/index/indexam.c | 8 ++ src/backend/catalog/namespace.c | 6 ++ src/backend/cdb/cdbtranscat.c | 3 +- src/backend/cdb/dispatcher/cdbdisp_query.c | 26 ++++- src/backend/cdb/dispatcher/cdbgang_async.c | 6 +- src/backend/commands/copyto.c | 3 +- src/backend/commands/tablecmds.c | 3 +- src/backend/executor/execExpr.c | 9 +- src/backend/executor/execMain.c | 11 +- src/backend/executor/execPartition.c | 5 +- src/backend/executor/execProcnode.c | 6 ++ src/backend/executor/execSRF.c | 4 + src/backend/executor/functions.c | 2 + src/backend/executor/nodeIncrementalSort.c | 4 + src/backend/executor/nodeValuesscan.c | 6 ++ src/backend/foreign/foreign.c | 4 + src/backend/nodes/outfast.c | 18 ++-- src/backend/nodes/readfast.c | 17 +-- src/backend/nodes/readfuncs.c | 4 + src/backend/optimizer/plan/planner.c | 6 ++ src/backend/optimizer/util/plancat.c | 4 + src/backend/optimizer/util/predtest.c | 6 ++ src/backend/postmaster/autovacuum.c | 4 + src/backend/postmaster/postmaster.c | 8 ++ src/backend/task/pg_cron.c | 4 + src/backend/tcop/postgres.c | 18 +++- src/backend/tcop/utility.c | 4 + src/backend/utils/adt/enum.c | 8 ++ src/backend/utils/cache/catcache.c | 32 ++++++ src/backend/utils/cache/evtcache.c | 4 + src/backend/utils/cache/plancache.c | 4 + src/backend/utils/cache/relcache.c | 116 +++++++++++++++------ src/backend/utils/cache/syscache.c | 9 ++ src/backend/utils/cache/ts_cache.c | 10 +- src/backend/utils/cache/typcache.c | 17 ++- src/backend/utils/init/postinit.c | 4 + src/backend/utils/mb/mbutils.c | 2 + src/backend/utils/misc/superuser.c | 7 +- src/backend/utils/mmgr/aset.c | 12 ++- src/backend/utils/mmgr/mcxt.c | 2 + src/include/access/genam.h | 2 + src/include/cdb/cdbtranscat.h | 2 + src/include/cdb/cdbvars.h | 1 - src/include/commands/trigger.h | 2 + src/include/executor/executor.h | 2 + src/include/executor/functions.h | 2 + src/include/mb/pg_wchar.h | 2 + src/include/nodes/nodes.h | 4 +- src/include/utils/typcache.h | 2 + src/interfaces/libpq/fe-connect.c | 2 + src/interfaces/libpq/fe-protocol3.c | 2 + src/interfaces/libpq/libpq-int.h | 2 + src/pl/plpgsql/src/pl_comp.c | 4 + src/pl/plpgsql/src/pl_exec.c | 26 +++++ src/pl/plpgsql/src/pl_handler.c | 12 +++ src/pl/plpgsql/src/plpgsql.h | 3 +- 58 files changed, 456 insertions(+), 77 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 36decf68fa9..383181f4bcf 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -73,7 +73,9 @@ #include "utils/spccache.h" #include "catalog/oid_dispatch.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "utils/guc.h" #include "utils/faultinjector.h" @@ -2757,7 +2759,9 @@ simple_heap_insert(Relation relation, HeapTuple tup) heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL, GetCurrentTransactionId()); +#ifdef SERVERLESS TransStoreTuple(tup); +#endif } /* @@ -3282,7 +3286,9 @@ simple_heap_delete(Relation relation, ItemPointer tid) break; } +#ifdef SERVERLESS TransRemoveTuple(RelationGetRelid(relation), *tid); +#endif } /* @@ -4382,7 +4388,9 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) TM_FailureData tmfd; LockTupleMode lockmode; +#ifdef SERVERLESS TransRemoveTuple(tup->t_tableOid, *otid); +#endif result = heap_update_internal(relation, otid, tup, GetCurrentCommandId(true), InvalidSnapshot, @@ -4413,7 +4421,9 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) break; } +#ifdef SERVERLESS TransStoreTuple(tup); +#endif } diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 3d7472b1916..cb5b58e682f 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -25,8 +25,9 @@ #include "access/tableam.h" #include "access/transam.h" #include "catalog/index.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" -#include "cdb/cdbvars.h" +#endif #include "lib/stringinfo.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -363,6 +364,7 @@ index_compute_xid_horizon_for_tuples(Relation irel, * ---------------------------------------------------------------- */ +#ifdef SERVERLESS static SysScanDesc systable_beginscan_qe(Relation heapRelation, int nkeys, ScanKey key) { @@ -377,6 +379,7 @@ systable_beginscan_qe(Relation heapRelation, int nkeys, ScanKey key) return sysscan; } +#endif /* * systable_beginscan --- set up for heap-or-index scan @@ -407,8 +410,10 @@ systable_beginscan(Relation heapRelation, SysScanDesc sysscan; Relation irel; +#ifdef SERVERLESS if (systup_store_active()) return systable_beginscan_qe(heapRelation, nkeys, key); +#endif if (indexOK && !IgnoreSystemIndexes && @@ -507,6 +512,7 @@ HandleConcurrentAbort() errmsg("transaction aborted during system catalog scan"))); } +#ifdef SERVERLESS static HeapTuple systable_getnext_qe(SysScanDesc sysscan) { @@ -522,6 +528,7 @@ systable_getnext_qe(SysScanDesc sysscan) return htup; } +#endif /* * systable_getnext --- get next tuple in a heap-or-index scan @@ -540,8 +547,10 @@ systable_getnext(SysScanDesc sysscan) { HeapTuple htup = NULL; +#ifdef SERVERLESS if (systup_store_active()) return systable_getnext_qe(sysscan); +#endif if (sysscan->irel) { @@ -581,7 +590,9 @@ systable_getnext(SysScanDesc sysscan) */ HandleConcurrentAbort(); +#ifdef SERVERLESS TransStoreTuple(htup); +#endif return htup; } @@ -628,7 +639,7 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) return result; } - +#ifdef SERVERLESS static void systable_endscan_qe(SysScanDesc sysscan) { @@ -643,6 +654,7 @@ systable_endscan_qe(SysScanDesc sysscan) UnregisterSnapshot(sysscan->snapshot); pfree(sysscan); } +#endif /* * systable_endscan --- close scan, release resources @@ -652,11 +664,13 @@ systable_endscan_qe(SysScanDesc sysscan) void systable_endscan(SysScanDesc sysscan) { +#ifdef SERVERLESS if (systup_store_active()) { systable_endscan_qe(sysscan); return; } +#endif if (sysscan->slot) { @@ -711,8 +725,10 @@ systable_beginscan_ordered(Relation heapRelation, SysScanDesc sysscan; int i; +#ifdef SERVERLESS if (systup_store_sorted_active()) return systable_beginscan_qe(heapRelation, nkeys, key); +#endif /* REINDEX can probably be a hard error here ... */ if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) @@ -775,8 +791,10 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) { HeapTuple htup = NULL; +#ifdef SERVERLESS if (systup_store_sorted_active()) return systable_getnext_qe(sysscan); +#endif Assert(sysscan->irel); if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot)) @@ -792,7 +810,9 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) */ HandleConcurrentAbort(); +#ifdef SERVERLESS TransStoreTuple(htup); +#endif return htup; } @@ -803,12 +823,13 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) void systable_endscan_ordered(SysScanDesc sysscan) { +#ifdef SERVERLESS if (systup_store_sorted_active()) { systable_endscan_qe(sysscan); return; } - +#endif if (sysscan->slot) { ExecDropSingleTupleTableSlot(sysscan->slot); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index de67affdb98..3a1ac49e70b 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -55,7 +55,9 @@ #include "catalog/index.h" #include "catalog/pg_amproc.h" #include "catalog/pg_type.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "commands/defrem.h" #include "nodes/makefuncs.h" #include "pgstat.h" @@ -148,6 +150,7 @@ index_open(Oid relationId, LOCKMODE lockmode) return r; } +#ifdef SERVERLESS Relation order_index_open(Oid relationId, LOCKMODE lockmode) { @@ -167,6 +170,7 @@ order_index_open(Oid relationId, LOCKMODE lockmode) return r; } +#endif /* ---------------- * index_close - close an index relation @@ -180,12 +184,16 @@ order_index_open(Oid relationId, LOCKMODE lockmode) void index_close(Relation relation, LOCKMODE lockmode) { +#ifdef SERVERLESS LockRelId relid; if (!relation) return; relid = relation->rd_lockInfo.lockRelId; +#else + LockRelId relid = relation->rd_lockInfo.lockRelId; +#endif Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES); diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 2747227de40..0799c538c22 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -3465,6 +3465,12 @@ GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId) void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId) { +#ifndef SERVERLESS + /* Worker should not have created its own namespaces ... */ + Assert(myTempNamespace == InvalidOid); + Assert(myTempToastNamespace == InvalidOid); + Assert(myTempNamespaceSubID == InvalidSubTransactionId); +#endif /* Assign same namespace OIDs that leader has */ myTempNamespace = tempNamespaceId; myTempToastNamespace = tempToastNamespaceId; diff --git a/src/backend/cdb/cdbtranscat.c b/src/backend/cdb/cdbtranscat.c index 32144db9fc9..7b8640b65e6 100644 --- a/src/backend/cdb/cdbtranscat.c +++ b/src/backend/cdb/cdbtranscat.c @@ -1,6 +1,6 @@ - #include "postgres.h" +#ifdef SERVERLESS #include "fmgr.h" #include "access/xact.h" #include "access/nbtree.h" @@ -244,3 +244,4 @@ CollectStartupCatalog(int *len) return NULL; } } +#endif \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 3c95a4484a3..c55f69a8e96 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -49,7 +49,9 @@ #include "cdb/cdbdisp_dtx.h" /* for qdSerializeDtxContextInfo() */ #include "cdb/cdbdispatchresult.h" #include "cdb/cdbcopy.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "executor/execUtils.h" #include "cdb/cdbpq.h" @@ -95,8 +97,10 @@ typedef struct DispatchCommandQueryParms int serializedPlantreelen; char *serializedQueryDispatchDesc; int serializedQueryDispatchDesclen; +#ifdef SERVERLESS char *serializedCatalog; int serializedCatalogLen; +#endif /* * Additional information. @@ -334,8 +338,10 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) ErrorData *qeError = NULL; int flags = DF_NONE; +#ifdef SERVERLESS SetTransferOn(); InitQuery(strCommand); +#endif if (CdbNeedDispatchCommand_hook && !CdbNeedDispatchCommand_hook(strCommand, &flags, NULL, NULL)) return; @@ -416,8 +422,10 @@ CdbDispatchCommand(const char *strCommand, int flags, CdbPgResults *cdb_pgresults) { +#ifdef SERVERLESS SetTransferOn(); InitQuery(strCommand); +#endif return CdbDispatchCommandToSegments(strCommand, flags, @@ -588,10 +596,12 @@ cdbdisp_buildCommandQueryParms(const char *strCommand, int flags) pQueryParms->strCommand = strCommand; pQueryParms->serializedQueryDispatchDesc = NULL; pQueryParms->serializedQueryDispatchDesclen = 0; +#ifdef SERVERLESS if (IsTransferOn()) pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), &pQueryParms->serializedCatalogLen, NULL); +#endif /* * Serialize a version of our DTX Context Info */ @@ -665,11 +675,12 @@ cdbdisp_buildUtilityQueryParms(struct Node *stmt, pQueryParms->serializedQueryDispatchDesc = serializedQueryDispatchDesc; pQueryParms->serializedQueryDispatchDesclen = serializedQueryDispatchDesc_len; +#ifdef SERVERLESS if (IsTransferOn()) pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), &pQueryParms->serializedCatalogLen, NULL); - +#endif /* * Serialize a version of our DTX Context Info */ @@ -729,11 +740,12 @@ cdbdisp_buildPlanQueryParms(struct QueryDesc *queryDesc, pQueryParms->serializedQueryDispatchDesc = sddesc; pQueryParms->serializedQueryDispatchDesclen = sddesc_len; +#ifdef SERVERLESS if (IsTransferOn()) pQueryParms->serializedCatalog = serializeNode((Node*) GetTransferNode(), &pQueryParms->serializedCatalogLen, NULL); - +#endif /* * Serialize a version of our snapshot, and generate our transction * isolations. We generally want Plan based dispatch to be in a global @@ -927,8 +939,10 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, int plantree_len = pQueryParms->serializedPlantreelen; const char *sddesc = pQueryParms->serializedQueryDispatchDesc; int sddesc_len = pQueryParms->serializedQueryDispatchDesclen; +#ifdef SERVERLESS const char *sdcatalog = pQueryParms->serializedCatalog; int sdcatalog_len = pQueryParms->serializedCatalogLen; +#endif const char *dtxContextInfo = pQueryParms->serializedDtxContextInfo; int dtxContextInfo_len = pQueryParms->serializedDtxContextInfolen; int64 currentStatementStartTimestamp = GetCurrentStatementStartTimestamp(); @@ -984,13 +998,17 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, sizeof(command_len) + sizeof(plantree_len) + sizeof(sddesc_len) + +#ifdef SERVERLESS sizeof(sdcatalog_len) + +#endif sizeof(dtxContextInfo_len) + dtxContextInfo_len + command_len + plantree_len + sddesc_len + +#ifdef SERVERLESS sdcatalog_len + +#endif sizeof(numsegments) + sizeof(resgroupInfo.len) + resgroupInfo.len + @@ -1053,9 +1071,11 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, memcpy(pos, &tmp, sizeof(tmp)); pos += sizeof(tmp); +#ifdef SERVERLESS tmp = htonl(sdcatalog_len); memcpy(pos, &tmp, sizeof(tmp)); pos += sizeof(tmp); +#endif tmp = htonl(dtxContextInfo_len); memcpy(pos, &tmp, sizeof(tmp)); @@ -1084,11 +1104,13 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms, pos += sddesc_len; } +#ifdef SERVERLESS if (sdcatalog_len > 0) { memcpy(pos, sdcatalog, sdcatalog_len); pos += sdcatalog_len; } +#endif tmp = htonl(numsegments); memcpy(pos, &tmp, sizeof(numsegments)); diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 01f56e52d5a..2f86a7344e4 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -33,9 +33,10 @@ #include "cdb/cdbfts.h" #include "cdb/cdbgang.h" #include "cdb/cdbgang_async.h" -#include "cdb/cdbsrlz.h" #include "cdb/cdbtm.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "miscadmin.h" @@ -343,9 +344,10 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) if (fds[currentFdNumber].revents & fds[currentFdNumber].events || fds[currentFdNumber].revents & (POLLERR | POLLHUP | POLLNVAL)) { +#ifdef SERVERLESS segdbDesc->conn->catalog = CollectStartupCatalog(&segdbDesc->conn->catalog_size); - +#endif pollingStatus[i] = PQconnectPoll(segdbDesc->conn); } diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 62701556c91..0ac4347270b 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -1661,9 +1661,10 @@ CopyToDispatch(CopyToState cstate) cdbCopy = makeCdbCopyTo(cstate); +#ifdef SERVERLESS if (cstate->need_transcoding) StoreEncodingConversion(cstate->file_encoding); - +#endif /* XXX: lock all partitions */ /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 24da81f7ea5..f7a85897c64 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -133,11 +133,12 @@ #include "nodes/altertablenodes.h" #include "cdb/cdbdisp.h" #include "cdb/cdbdisp_query.h" -#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "cdb/cdbrelsize.h" #include "cdb/cdboidsync.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "postmaster/autostats.h" const char *synthetic_sql = "(internally generated SQL command)"; diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 2bd34c58966..f65e127c592 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -54,7 +54,9 @@ #include "access/detoast.h" #include "access/heaptoast.h" #include "catalog/pg_collation.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "utils/pg_locale.h" #include "utils/syscache.h" @@ -992,6 +994,7 @@ ExecInitExprRec(Expr *node, ExprState *state, scratch.d.constval.isnull = con->constisnull; ExprEvalPushStep(state, &scratch); +#ifdef SERVERLESS if (IsTransferOn()) { HeapTuple typeTup; @@ -1000,7 +1003,7 @@ ExecInitExprRec(Expr *node, ExprState *state, if (typeTup) ReleaseSysCache(typeTup); } - +#endif break; } @@ -2550,11 +2553,13 @@ ExprEvalPushStep_internal(ExprState *es, const ExprEvalStep *s) void ExprEvalPushStep(ExprState *es, const ExprEvalStep *s) { +#ifdef SERVERLESS if (ExprEvalPushStep_hook) { (*ExprEvalPushStep_hook) (es, s); return; } +#endif ExprEvalPushStep_internal(es, s); } @@ -2642,6 +2647,7 @@ ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, Oid funcid, fcinfo->args[argno].value = con->constvalue; fcinfo->args[argno].isnull = con->constisnull; +#ifdef SERVERLESS if (IsTransferOn()) { HeapTuple typeTup; @@ -2650,6 +2656,7 @@ ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, Oid funcid, if (typeTup) ReleaseSysCache(typeTup); } +#endif } else { diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 4073d347228..4a93c34a4b5 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -103,7 +103,6 @@ #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbexplain.h" /* cdbexplain_sendExecStats() */ -#include "cdb/cdbtranscat.h" #include "cdb/cdbplan.h" #include "cdb/cdbsubplan.h" #include "cdb/cdbvars.h" @@ -116,7 +115,9 @@ #include "cdb/cdbtargeteddispatch.h" #include "cdb/cdbutil.h" #include "cdb/cdbendpoint.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ queryDesc->ddesc->parallelCursorName && \ @@ -239,7 +240,9 @@ ExecutorStart(QueryDesc *queryDesc, int eflags) */ pgstat_report_query_id(queryDesc->plannedstmt->queryId, false); +#ifdef SERVERLESS SetTransferOn(); +#endif if (ExecutorStart_hook) (*ExecutorStart_hook) (queryDesc, eflags); @@ -596,13 +599,17 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) */ Assert(CurrentMemoryContext == estate->es_query_cxt); +#ifdef SERVERLESS if (!shouldDispatch) SetTransferOff(); +#endif InitPlan(queryDesc, eflags); +#ifdef SERVERLESS if (!shouldDispatch) SetTransferOn(); +#endif Assert(queryDesc->planstate); @@ -2475,8 +2482,10 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ChildToRootMapValid = false; resultRelInfo->ri_CopyMultiInsertBuffer = NULL; +#ifdef SERVERLESS if (CollectResultInfo_hook) (*CollectResultInfo_hook) (resultRelInfo); +#endif } /* diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index e11df96270d..0215178c36b 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -36,8 +36,9 @@ #include "cdb/cdbaocsam.h" #include "cdb/cdbappendonlyam.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" -#include "cdb/cdbtranscat.h" +#endif /* * Helper macro that is used to determine if a Modifytable node came from a @@ -1848,6 +1849,7 @@ ExecCreatePartitionPruneState(PlanState *planstate, */ partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); +#ifdef SERVERLESS if (IsTransferOn()) { if (partrel->rd_partkeycxt) @@ -1857,6 +1859,7 @@ ExecCreatePartitionPruneState(PlanState *planstate, partrel->rd_partkeycxt = NULL; } } +#endif partkey = RelationGetPartitionKey(partrel); partdesc = PartitionDirectoryLookup(estate->es_partition_directory, partrel); diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index c042c09c46a..4168ca8d99a 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -151,9 +151,15 @@ ExecInitNode_hook_type ExecInitNode_hook = NULL; /* Hook for plugins to get control in ExecEndNode() */ ExecEndNode_hook_type ExecEndNode_hook = NULL; + +#ifdef SERVERLESS ExprEvalPushStep_hook_type ExprEvalPushStep_hook = NULL; CollectResultInfo_hook_type CollectResultInfo_hook = NULL; CollectProc_hook_type CollectProc_hook = NULL; +#endif + +LockTable_hook_type LockTable_hook = NULL; + /** * Forward declarations of static functions */ diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c index 255fe3ffdc3..78d5b14999d 100644 --- a/src/backend/executor/execSRF.c +++ b/src/backend/executor/execSRF.c @@ -20,7 +20,9 @@ #include "access/htup_details.h" #include "catalog/objectaccess.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "executor/execdebug.h" #include "funcapi.h" #include "miscadmin.h" @@ -806,8 +808,10 @@ init_sexpr(Oid foid, Oid input_collation, Expr *node, sexpr->funcResultSlot = NULL; sexpr->shutdown_reg = false; +#ifdef SERVERLESS if (CollectProc_hook) (*CollectProc_hook) (sexpr->fcinfo); +#endif } /* diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index c1ace282dd3..892e1e031e7 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -1542,6 +1542,7 @@ PG_END_TRY(); return result; } +#ifdef SERVERLESS void fmgr_sql_init(PG_FUNCTION_ARGS) { @@ -1586,6 +1587,7 @@ fmgr_sql_init(PG_FUNCTION_ARGS) } PG_END_TRY(); } +#endif /* * error context callback to let us supply a call-stack traceback diff --git a/src/backend/executor/nodeIncrementalSort.c b/src/backend/executor/nodeIncrementalSort.c index a1fbde63891..135af740183 100644 --- a/src/backend/executor/nodeIncrementalSort.c +++ b/src/backend/executor/nodeIncrementalSort.c @@ -79,7 +79,9 @@ #include "postgres.h" #include "access/htup_details.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "executor/execdebug.h" #include "executor/nodeIncrementalSort.h" #include "miscadmin.h" @@ -1066,8 +1068,10 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags) SO_printf("ExecInitIncrementalSort: sort node initialized\n"); +#ifdef SERVERLESS if (IsTransferOn()) preparePresortedCols(incrsortstate); +#endif return incrsortstate; } diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c index b99f63f5eb4..03faf359b1b 100644 --- a/src/backend/executor/nodeValuesscan.c +++ b/src/backend/executor/nodeValuesscan.c @@ -25,7 +25,9 @@ */ #include "postgres.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "executor/executor.h" #include "executor/nodeValuesscan.h" #include "jit/jit.h" @@ -297,8 +299,12 @@ ExecInitValuesScan(ValuesScan *node, EState *estate, int eflags) * case where there are no SubPlans anywhere. */ if ((estate->es_subplanstates && +#ifdef SERVERLESS contain_subplans((Node *) exprs)) || IsTransferOn()) +#else + contain_subplans((Node *) exprs)) +#endif { int saved_jit_flags; diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c index 67364ae5d7c..de63610b1e9 100644 --- a/src/backend/foreign/foreign.c +++ b/src/backend/foreign/foreign.c @@ -21,7 +21,9 @@ #include "catalog/pg_foreign_table_seg.h" #include "catalog/pg_user_mapping.h" #include "cdb/cdbgang.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" #include "commands/defrem.h" @@ -713,10 +715,12 @@ GetFdwRoutineForRelation(Relation relation, bool makecopy) /* Give back the locally palloc'd copy regardless of makecopy */ return fdwroutine; } +#ifdef SERVERLESS else if (IsTransferOn()) { GetFdwRoutineByRelId(RelationGetRelid(relation)); } +#endif /* We have valid cached data --- does the caller want a copy? */ if (makecopy) diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index 0774f4873e2..323fc3e801f 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -41,7 +41,9 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "cdb/cdbgang.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "utils/workfile_mgr.h" #include "parser/parsetree.h" @@ -858,7 +860,7 @@ _outGpSplitPartitionCmd(StringInfo str, const GpSplitPartitionCmd *node) WRITE_NODE_FIELD(arg2); } - +#ifdef SERVERLESS static void _outSystemTableTransferNode(StringInfo str, const SystemTableTransferNode *node) { @@ -878,7 +880,7 @@ _outTransferTuple(StringInfo str, const TransferTuple *node) WRITE_OID_FIELD(t_tableOid); appendBinaryStringInfo(str, node->t_data, node->t_len); } - +#endif /* * _outNode - @@ -1956,12 +1958,6 @@ _outNode(StringInfo str, void *obj) case T_AlterDatabaseStmt: _outAlterDatabaseStmt(str, obj); break; - case T_SystemTableTransferNode: - _outSystemTableTransferNode(str, obj); - break; - case T_TransferTuple: - _outTransferTuple(str, obj); - break; #ifdef SERVERLESS case T_APHashExpr: _outAPHashExpr(str, obj); @@ -1972,6 +1968,12 @@ _outNode(StringInfo str, void *obj) case T_APRangeExpr: _outAPRangeExpr(str, obj); break; + case T_SystemTableTransferNode: + _outSystemTableTransferNode(str, obj); + break; + case T_TransferTuple: + _outTransferTuple(str, obj); + break; #endif /* SERVERLESS */ default: elog(ERROR, "could not serialize unrecognized node type: %d", diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index ef983dcf4e1..53842cc4822 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -40,7 +40,9 @@ #include "catalog/pg_class.h" #include "catalog/heap.h" #include "cdb/cdbgang.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif /* * Macros to simplify reading of different kinds of fields. Use these @@ -1838,7 +1840,7 @@ _readEphemeralNamedRelationInfo(void) READ_DONE(); } - +#ifdef SERVERLESS static SystemTableTransferNode * _readSystemTableTransferNode(void) { @@ -1866,6 +1868,7 @@ _readTransferTuple(void) READ_DONE(); } +#endif static void * _readAlterDatabaseStmt(void) @@ -2974,12 +2977,6 @@ readNodeBinary(void) case T_DropTaskStmt: return_value = _readDropTaskStmt(); break; - case T_SystemTableTransferNode: - return_value= _readSystemTableTransferNode(); - break; - case T_TransferTuple: - return_value = _readTransferTuple(); - break; #ifdef SERVERLESS case T_APHashExpr: return_value = _readAPHashExpr(); @@ -2990,6 +2987,12 @@ readNodeBinary(void) case T_APRangeExpr: return_value = _readAPRangeExpr(); break; + case T_SystemTableTransferNode: + return_value= _readSystemTableTransferNode(); + break; + case T_TransferTuple: + return_value = _readTransferTuple(); + break; #endif /* SERVERLESS */ default: return_value = NULL; /* keep the compiler silent */ diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 163c44a953d..4b5c53092fa 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -52,7 +52,9 @@ #include "utils/builtins.h" #include "cdb/cdbgang.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "nodes/altertablenodes.h" /* @@ -620,6 +622,7 @@ _readConst(void) else local_node->constvalue = readDatum(local_node->constbyval); +#ifdef SERVERLESS if (local_node->consttype == REGCLASSOID && IsTransferOn()) { if (!RelationStoredCheck(local_node->constvalue)) @@ -630,6 +633,7 @@ _readConst(void) relation_close(rel, AccessShareLock); } } +#endif READ_DONE(); } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9c71febac0e..a545bfb28fc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -80,7 +80,9 @@ #include "cdb/cdbgroupingpaths.h" /* create_grouping_paths() extensions */ #include "cdb/cdbsetop.h" /* motion utilities */ #include "cdb/cdbtargeteddispatch.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" #include "optimizer/aqumv.h" /* answer_query_using_materialized_views */ @@ -377,8 +379,12 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, GP_ROLE_DISPATCH == Gp_role && IS_QUERY_DISPATCHER() && (cursorOptions & CURSOR_OPT_SKIP_FOREIGN_PARTITIONS) == 0 && +#ifdef SERVERLESS (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0 && !inPlPgsql) +#else + (cursorOptions & CURSOR_OPT_PARALLEL_RETRIEVE) == 0) +#endif { #ifdef USE_ORCA diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 307017cf83e..8e56de6d8ed 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -188,7 +188,11 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, else hasindex = relation->rd_rel->relhasindex; +#ifdef SERVERLESS if (hasindex && Gp_role != GP_ROLE_EXECUTE) +#else + if (hasindex) +#endif { List *indexoidlist; LOCKMODE lmode; diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c index 0714ea48ba3..0b99421e188 100644 --- a/src/backend/optimizer/util/predtest.c +++ b/src/backend/optimizer/util/predtest.c @@ -31,7 +31,9 @@ #include "nodes/makefuncs.h" #include "catalog/pg_operator.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "optimizer/clauses.h" #include "optimizer/paths.h" #include "optimizer/predtest_valueset.h" @@ -2005,7 +2007,11 @@ lookup_proof_cache(Oid pred_op, Oid clause_op, bool refute_it) cache_entry = (OprProofCacheEntry *) hash_search(OprProofCacheHash, (void *) &key, HASH_ENTER, &cfound); +#ifdef SERVERLESS if (!cfound || IsTransferOn()) +#else + if (!cfound) +#endif { /* new cache entry, set it invalid */ cache_entry->have_implic = false; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2a818234c51..e03a0c0e442 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -156,7 +156,9 @@ #include "utils/timeout.h" #include "utils/timestamp.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "utils/faultinjector.h" @@ -3173,8 +3175,10 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy) "", tab->at_relname); #endif +#ifdef SERVERLESS TransferReset(); SetTransferOn(); +#endif vacuum(rel_list, &tab->at_params, bstrategy, true); } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 18c21c3f024..52eff5ec8c1 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -156,7 +156,9 @@ #include "cdb/cdbtm.h" #include "cdb/cdbvars.h" #include "cdb/cdbendpoint.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/ic_proxy_bgworker.h" #include "cdb/ml_ipc.h" #include "utils/metrics_utils.h" @@ -2672,6 +2674,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) * given packet length, complain. */ if (offset != len - 1) +#ifdef SERVERLESS { int catalog_len; @@ -2688,6 +2691,11 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) StartUpCatalogLen = catalog_len; memcpy(StartUpCatalogData, buf + offset, catalog_len); } +#else + ereport(FATAL, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid startup packet layout: expected terminator as last byte"))); +#endif /* * If the client requested a newer protocol version or if the client diff --git a/src/backend/task/pg_cron.c b/src/backend/task/pg_cron.c index eb895a635b1..9f0c5425370 100644 --- a/src/backend/task/pg_cron.c +++ b/src/backend/task/pg_cron.c @@ -61,7 +61,9 @@ #else #include "commands/variable.h" #endif +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "lib/stringinfo.h" #include "libpq-fe.h" #include "libpq/pqformat.h" @@ -1878,8 +1880,10 @@ ExecuteSqlString(const char *sql) isTopLevel = commands_remaining == 1; MemoryContextSwitchTo(oldcontext); +#ifdef SERVERLESS TransferReset(); SetTransferOn(); +#endif /* * Do parse analysis, rule rewrite, planning, and execution for each raw diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 50115d6995d..aa0954c64e9 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -100,7 +100,9 @@ #include "cdb/cdbdispatchresult.h" #include "cdb/cdbendpoint.h" #include "cdb/cdbgang.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/ml_ipc.h" #include "access/twophase.h" #include "postmaster/backoff.h" @@ -5689,7 +5691,9 @@ PostgresMain(int argc, char *argv[], check_forbidden_in_gpdb_handlers(firstchar); +#ifdef SERVERLESS TransferReset(); +#endif switch (firstchar) { @@ -5744,14 +5748,18 @@ PostgresMain(int argc, char *argv[], const char *serializedDtxContextInfo = NULL; const char *serializedPlantree = NULL; const char *serializedQueryDispatchDesc = NULL; +#ifdef SERVERLESS const char *serializedCatalog = NULL; +#endif const char *resgroupInfoBuf = NULL; int query_string_len = 0; int serializedDtxContextInfolen = 0; int serializedPlantreelen = 0; int serializedQueryDispatchDesclen = 0; +#ifdef SERVERLESS int serializedCatalogLen = 0; +#endif int resgroupInfoLen = 0; TimestampTz statementStart; Oid suid; @@ -5787,7 +5795,9 @@ PostgresMain(int argc, char *argv[], query_string_len = pq_getmsgint(&input_message, 4); serializedPlantreelen = pq_getmsgint(&input_message, 4); serializedQueryDispatchDesclen = pq_getmsgint(&input_message, 4); +#ifdef SERVERLESS serializedCatalogLen = pq_getmsgint(&input_message, 4); +#endif serializedDtxContextInfolen = pq_getmsgint(&input_message, 4); /* read in the DTX context info */ @@ -5827,19 +5837,19 @@ PostgresMain(int argc, char *argv[], if (serializedQueryDispatchDesclen > 0) serializedQueryDispatchDesc = pq_getmsgbytes(&input_message,serializedQueryDispatchDesclen); - +#ifdef SERVERLESS if (serializedCatalogLen > 0) serializedCatalog = pq_getmsgbytes(&input_message, serializedCatalogLen); if (!IS_QUERY_DISPATCHER()) { SystemTupleStoreReset(); -#ifdef SERVERLESS + InvalidateSystemCaches(); -#endif /* SERVERLESS */ + SystemTupleStoreInit(serializedCatalog, serializedCatalogLen); } - +#endif /* * Always use the same GpIdentity.numsegments with QD on QEs */ diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index d00409ec311..58efee52d26 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -84,7 +84,9 @@ #include "catalog/pg_profile.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbendpoint.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" @@ -586,7 +588,9 @@ ProcessUtility(PlannedStmt *pstmt, Assert(queryString != NULL); /* required as of 8.4 */ Assert(qc == NULL || qc->commandTag == CMDTAG_UNKNOWN); +#ifdef SERVERLESS SetTransferOn(); +#endif /* * Greenplum specific code: diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index 9d23f916e85..20496080852 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -403,7 +403,11 @@ enum_endpoint(Oid enumtypoid, ScanDirection direction) ObjectIdGetDatum(enumtypoid)); enum_rel = table_open(EnumRelationId, AccessShareLock); +#ifdef SERVERLESS enum_idx = order_index_open(EnumTypIdSortOrderIndexId, AccessShareLock); +#else + enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); +#endif enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); @@ -562,7 +566,11 @@ enum_range_internal(Oid enumtypoid, Oid lower, Oid upper) ObjectIdGetDatum(enumtypoid)); enum_rel = table_open(EnumRelationId, AccessShareLock); +#ifdef SERVERLESS enum_idx = order_index_open(EnumTypIdSortOrderIndexId, AccessShareLock); +#else + enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); +#endif enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); max = 64; diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index fa4f757948e..e66125d4321 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -24,7 +24,9 @@ #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "common/hashfn.h" #include "miscadmin.h" @@ -66,8 +68,10 @@ /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; +#ifdef SERVERLESS static HeapTuple SearchCatCacheInternalCollect(CatCache *cache, int nkeys, Datum v1, Datum v2, Datum v3, Datum v4); +#endif static inline HeapTuple SearchCatCacheInternal(CatCache *cache, int nkeys, Datum v1, Datum v2, @@ -1038,8 +1042,10 @@ InitCatCachePhase2(CatCache *cache, bool touch_index) if (cache->cc_tupdesc == NULL) CatalogCacheInitializeCache(cache); +#ifdef SERVERLESS if (systup_store_active()) return; +#endif if (touch_index && cache->id != AMOID && @@ -1212,7 +1218,11 @@ SearchCatCache(CatCache *cache, Datum v3, Datum v4) { +#ifdef SERVERLESS return SearchCatCacheInternalCollect(cache, cache->cc_nkeys, v1, v2, v3, v4); +#else + return SearchCatCacheInternal(cache, cache->cc_nkeys, v1, v2, v3, v4); +#endif } @@ -1226,7 +1236,11 @@ HeapTuple SearchCatCache1(CatCache *cache, Datum v1) { +#ifdef SERVERLESS return SearchCatCacheInternalCollect(cache, 1, v1, 0, 0, 0); +#else + return SearchCatCacheInternal(cache, 1, v1, 0, 0, 0); +#endif } @@ -1234,7 +1248,11 @@ HeapTuple SearchCatCache2(CatCache *cache, Datum v1, Datum v2) { +#ifdef SERVERLESS return SearchCatCacheInternalCollect(cache, 2, v1, v2, 0, 0); +#else + return SearchCatCacheInternal(cache, 2, v1, v2, 0, 0); +#endif } @@ -1242,7 +1260,11 @@ HeapTuple SearchCatCache3(CatCache *cache, Datum v1, Datum v2, Datum v3) { +#ifdef SERVERLESS return SearchCatCacheInternalCollect(cache, 3, v1, v2, v3, 0); +#else + return SearchCatCacheInternal(cache, 3, v1, v2, v3, 0); +#endif } @@ -1250,9 +1272,14 @@ HeapTuple SearchCatCache4(CatCache *cache, Datum v1, Datum v2, Datum v3, Datum v4) { +#ifdef SERVERLESS return SearchCatCacheInternalCollect(cache, 4, v1, v2, v3, v4); +#else + return SearchCatCacheInternal(cache, 4, v1, v2, v3, v4); +#endif } +#ifdef SERVERLESS static HeapTuple SearchCatCacheInternalCollect(CatCache *cache, int nkeys, @@ -1269,6 +1296,7 @@ SearchCatCacheInternalCollect(CatCache *cache, return htup; } +#endif /* * Work-horse for SearchCatCache/SearchCatCacheN. @@ -2167,7 +2195,11 @@ PrintCatCacheLeakWarning(HeapTuple tuple, const char *resOwnerName) /* Safety check to ensure we were handed a cache entry */ Assert(ct->ct_magic == CT_MAGIC); +#ifdef SERVERLESS elog(LOG, "cache reference leak: cache %s (%d), tuple %u/%u has count %d, resowner '%s'", +#else + elog(WARNING, "cache reference leak: cache %s (%d), tuple %u/%u has count %d, resowner '%s'", +#endif ct->my_cache->cc_relname, ct->my_cache->id, ItemPointerGetBlockNumber(&(tuple->t_self)), ItemPointerGetOffsetNumber(&(tuple->t_self)), diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index a9262d27018..a0e468aaf9f 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -128,7 +128,11 @@ BuildEventTriggerCache(void) * Prepare to scan pg_event_trigger in name order. */ rel = relation_open(EventTriggerRelationId, AccessShareLock); +#ifdef SERVERLESS irel = order_index_open(EventTriggerNameIndexId, AccessShareLock); +#else + irel = index_open(EventTriggerNameIndexId, AccessShareLock); +#endif scan = systable_beginscan_ordered(rel, irel, NULL, 0, NULL); /* diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index d4e9ed53c2b..b83e84e7627 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -75,7 +75,9 @@ #include "utils/syscache.h" #include "utils/plancache.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbutil.h" /* @@ -1094,8 +1096,10 @@ choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams, Into if (IsTransactionStmtPlan(plansource)) return false; +#ifdef SERVERLESS if (IsTransferOn()) return true; +#endif /* Let settings force the decision */ if (plan_cache_mode == PLAN_CACHE_MODE_FORCE_GENERIC_PLAN) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a3fa185d960..e3e5082397b 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -47,38 +47,21 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/partition.h" -#include "catalog/pg_aggregate.h" #include "catalog/pg_am.h" -#include "catalog/pg_amop.h" #include "catalog/pg_amproc.h" #include "catalog/pg_attrdef.h" -#include "catalog/pg_authid.h" #include "catalog/pg_auth_members.h" #include "catalog/pg_auth_time_constraint.h" #include "catalog/pg_authid.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_conversion.h" #include "catalog/pg_database.h" -#include "catalog/pg_default_acl.h" -#include "catalog/pg_enum.h" -#include "catalog/pg_event_trigger.h" -#include "catalog/pg_extprotocol.h" -#include "catalog/pg_foreign_data_wrapper.h" -#include "catalog/pg_foreign_server.h" -#include "catalog/pg_foreign_table.h" -#include "catalog/pg_language.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" -#include "catalog/pg_operator.h" -#include "catalog/pg_opfamily.h" -#include "catalog/pg_partitioned_table.h" #include "catalog/pg_password_history.h" #include "catalog/pg_proc.h" #include "catalog/pg_profile.h" #include "catalog/pg_publication.h" -#include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" -#include "catalog/pg_sequence.h" #include "catalog/pg_shseclabel.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" @@ -93,8 +76,6 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" -#include "partitioning/partbounds.h" -#include "partitioning/partdesc.h" #include "rewrite/rewriteDefine.h" #include "rewrite/rowsecurity.h" #include "storage/lmgr.h" @@ -106,7 +87,6 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/partcache.h" #include "utils/relmapper.h" #include "utils/resowner_private.h" #include "utils/snapmgr.h" @@ -115,15 +95,35 @@ #include "access/transam.h" #include "catalog/gp_distribution_policy.h" /* GpPolicy */ #include "catalog/gp_indexing.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#ifdef SERVERLESS +#include "catalog/pg_aggregate.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_conversion.h" +#include "catalog/pg_default_acl.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_extprotocol.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_language.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_partitioned_table.h" +#include "catalog/pg_range.h" +#include "catalog/pg_sequence.h" +#include "partitioning/partbounds.h" +#include "partitioning/partdesc.h" +#include "utils/partcache.h" #include "catalog/gp_storage_server.h" #include "catalog/gp_storage_user_mapping.h" #include "catalog/gp_warehouse.h" -#include "catalog/heap.h" -#include "catalog/index.h" #include "catalog/main_manifest.h" #include "catalog/pg_depend.h" #include "catalog/pg_db_role_setting.h" -#include "catalog/pg_directory_table.h" #include "catalog/pg_proc_callback.h" #include "catalog/pg_cast.h" #include "catalog/pg_collation.h" @@ -146,8 +146,8 @@ #include "catalog/pg_ts_template.h" #include "catalog/pg_user_mapping.h" #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbtm.h" -#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" /* Gp_role */ #include "cdb/cdbsreh.h" @@ -271,6 +271,7 @@ static int EOXactTupleDescArrayLen = 0; /* * macros to manipulate the lookup hashtable */ +#ifdef SERVERLESS #define RelationCacheInsert(RELATION, replace_allowed) \ do { \ RelIdCacheEnt *hentry; bool found; \ @@ -295,6 +296,29 @@ do { \ else \ hentry->reldesc = (RELATION); \ } while(0) +#else +#define RelationCacheInsert(RELATION, replace_allowed) \ +do { \ + RelIdCacheEnt *hentry; bool found; \ + hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \ + (void *) &((RELATION)->rd_id), \ + HASH_ENTER, &found); \ + if (found) \ + { \ + /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \ + Relation _old_rel = hentry->reldesc; \ + Assert(replace_allowed); \ + hentry->reldesc = (RELATION); \ + if (RelationHasReferenceCountZero(_old_rel)) \ + RelationDestroyRelation(_old_rel, false); \ + else if (!IsBootstrapProcessingMode()) \ + elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \ + RelationGetRelationName(_old_rel)); \ + } \ + else \ + hentry->reldesc = (RELATION); \ +} while(0) +#endif #define RelationIdCacheLookup(ID, RELATION) \ do { \ @@ -742,12 +766,14 @@ RelationBuildTupleDesc(Relation relation) * computed when and if needed during tuple access. */ #ifdef USE_ASSERT_CHECKING -// { -// int i; -// -// for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) -// Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); -// } +#ifndef SERVERLESS + { + int i; + + for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) + Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); + } +#endif #endif /* @@ -1786,6 +1812,7 @@ LookupOpclassInfo(Oid operatorClassOid, { Assert(numSupport == opcentry->numSupport); +#ifdef SERVERLESS if (IsTransferOn()) { pfree(opcentry->supportProcs); @@ -1795,6 +1822,7 @@ LookupOpclassInfo(Oid operatorClassOid, opcentry->numSupport = numSupport; opcentry->supportProcs = NULL; /* filled below */ } +#endif } /* @@ -2222,12 +2250,16 @@ Relation RelationIdGetRelation(Oid relationId) { Relation rd; +#ifdef SERVERLESS bool collected; +#endif /* Make sure we're in an xact, even if this ends up being a cache hit */ Assert(IsTransactionState()); +#ifdef SERVERLESS collected = RelationStored(relationId); +#endif /* * first try to find reldesc in the cache @@ -2265,9 +2297,15 @@ RelationIdGetRelation(Oid relationId) * change, but we still want to update the rd_rel entry. So * rd_isvalid = false is left in place for a later lookup. */ +#ifdef SERVERLESS Assert(rd->rd_isvalid || rd->rd_isnailed); +#else + Assert(rd->rd_isvalid || + (rd->rd_isnailed && !criticalRelcachesBuilt)); +#endif } +#ifdef SERVERLESS if (!collected && !rd->rd_isnailed) { volatile Relation tmpRd; @@ -2285,7 +2323,7 @@ RelationIdGetRelation(Oid relationId) rd->rd_partcheckcxt = NULL; } } - +#endif return rd; } @@ -2536,8 +2574,10 @@ RelationReloadNailed(Relation relation) { Assert(relation->rd_isnailed); +#ifdef SERVERLESS if (IsPostmasterEnvironment && !IS_QUERY_DISPATCHER()) return; +#endif /* * Redo RelationInitPhysicalAddr in case it is a mapped relation whose @@ -4457,7 +4497,11 @@ RelationCacheInitializePhase3(void) /* * If it's a faked-up entry, read the real pg_class tuple. */ +#ifdef SERVERLESS if (relation->rd_rel->relowner == InvalidOid && !systup_store_active()) +#else + if (relation->rd_rel->relowner == InvalidOid) +#endif { HeapTuple htup; Form_pg_class relp; @@ -4604,8 +4648,10 @@ load_critical_index(Oid indexoid, Oid heapoid) { Relation ird; +#ifdef SERVERLESS if (systup_store_active()) return; +#endif /* * We must lock the underlying catalog before locking the index to avoid @@ -4756,9 +4802,9 @@ AttrDefaultFetch(Relation relation, int ndef) attrdef[found].adnum = adform->adnum; attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s); pfree(s); - +#ifdef SERVERLESS DefaultValueStore(attrdef[found].adbin); - +#endif found++; } @@ -7064,6 +7110,7 @@ RelationIdIsInInitFile(Oid relationId) if (relationId == SharedSecLabelRelationId || relationId == TriggerRelidNameIndexId || relationId == DatabaseNameIndexId || +#ifdef SERVERLESS relationId == DbRoleSettingRelationId || relationId == SharedSecLabelObjectIndexId || relationId == ManifestRelationId || @@ -7073,6 +7120,9 @@ RelationIdIsInInitFile(Oid relationId) relationId == AttrDefaultRelationId || relationId == TriggerRelationId || relationId == InheritsRelationId) +#else + relationId == SharedSecLabelObjectIndexId) +#endif { /* * If this Assert fails, we don't need the applicable special case diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 047a97917b8..ed20ef72135 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -92,7 +92,9 @@ #include "access/heapam.h" #include "catalog/pg_resgroup.h" #include "catalog/pg_extprotocol.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "miscadmin.h" #include "catalog/gp_indexing.h" @@ -1693,12 +1695,15 @@ struct catclist * SearchSysCacheList(int cacheId, int nkeys, Datum key1, Datum key2, Datum key3) { +#ifdef SERVERLESS CatCList *list; +#endif if (cacheId < 0 || cacheId >= SysCacheSize || !PointerIsValid(SysCache[cacheId])) elog(ERROR, "invalid cache ID: %d", cacheId); +#ifdef SERVERLESS list = SearchCatCacheList(SysCache[cacheId], nkeys, key1, key2, key3); @@ -1712,6 +1717,10 @@ SearchSysCacheList(int cacheId, int nkeys, } return list; +#else + return SearchCatCacheList(SysCache[cacheId], nkeys, + key1, key2, key3); +#endif } /* diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 7d7d9fa238f..c63afcbe88b 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -478,7 +478,11 @@ lookup_ts_config_cache(Oid cfgId) ObjectIdGetDatum(cfgId)); maprel = table_open(TSConfigMapRelationId, AccessShareLock); +#ifdef SERVERLESS mapidx = order_index_open(TSConfigMapIndexId, AccessShareLock); +#else + mapidx = index_open(TSConfigMapIndexId, AccessShareLock); +#endif mapscan = systable_beginscan_ordered(maprel, mapidx, NULL, 1, &mapskey); @@ -489,8 +493,10 @@ lookup_ts_config_cache(Oid cfgId) if (toktype <= 0 || toktype > MAXTOKENTYPE) elog(ERROR, "maptokentype value %d is out of range", toktype); -// if (toktype < maxtokentype) -// elog(ERROR, "maptokentype entries are out of order"); +#ifndef SERVERLESS + if (toktype < maxtokentype) + elog(ERROR, "maptokentype entries are out of order"); +#endif if (toktype > maxtokentype) { /* starting a new token type, but first save the prior data */ diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 5175cc29a5b..d9ec999cdd5 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -56,7 +56,9 @@ #include "catalog/pg_operator.h" #include "catalog/pg_range.h" #include "catalog/pg_type.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "commands/defrem.h" #include "executor/executor.h" #include "lib/dshash.h" @@ -323,7 +325,7 @@ static void shared_record_typmod_registry_detach(dsm_segment *segment, static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc); static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc, uint32 typmod); - +#ifdef SERVERLESS static void CreateTypeMemoryContext(void) { @@ -336,6 +338,7 @@ CreateTypeMemoryContext(void) "TypeMemoryContext", ALLOCSET_DEFAULT_SIZES); } +#endif /* * lookup_type_cache @@ -371,8 +374,13 @@ lookup_type_cache(Oid type_id, int flags) CacheRegisterSyscacheCallback(CONSTROID, TypeCacheConstrCallback, (Datum) 0); /* Also make sure CacheMemoryContext exists */ +#ifdef SERVERLESS CreateCacheMemoryContext(); CreateTypeMemoryContext(); +#else + if (!CacheMemoryContext) + CreateCacheMemoryContext(); +#endif } /* Try to look up an existing entry */ @@ -476,8 +484,10 @@ lookup_type_cache(Oid type_id, int flags) ReleaseSysCache(tp); } +#ifdef SERVERLESS if (!InTypeStore()) TypeStore(type_id, flags); +#endif /* * Look up opclasses if we haven't already and any dependent info is @@ -2010,8 +2020,13 @@ assign_record_type_typmod(TupleDesc tupDesc) HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); /* Also make sure CacheMemoryContext exists */ +#ifdef SERVERLESS CreateCacheMemoryContext(); CreateTypeMemoryContext(); +#else + if (!CacheMemoryContext) + CreateCacheMemoryContext(); +#endif } /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 7427dc55fe8..380673dcb06 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -46,7 +46,9 @@ #include "libpq/libpq-be.h" #include "cdb/cdbendpoint.h" #include "cdb/cdbtm.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "cdb/cdbvars.h" #include "cdb/cdbutil.h" #include "mb/pg_wchar.h" @@ -676,11 +678,13 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, elog(DEBUG3, "InitPostgres"); +#ifdef SERVERLESS if (StartUpCatalogData && !IS_QUERY_DISPATCHER()) { SystemTupleStoreReset(); SystemTupleStoreInit(StartUpCatalogData, StartUpCatalogLen); } +#endif /* * Add my PGPROC struct to the ProcArray. diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 8ea3ea81e72..1a86b1e9a9a 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -438,6 +438,7 @@ pg_do_encoding_conversion(unsigned char *src, int len, return result; } +#ifdef SERVERLESS void StoreEncodingConversion(int dest_encoding) { @@ -449,6 +450,7 @@ StoreEncodingConversion(int dest_encoding) if (htup) ReleaseSysCache(htup); } +#endif /* * Convert src string to another encoding. diff --git a/src/backend/utils/misc/superuser.c b/src/backend/utils/misc/superuser.c index ca77ea92e10..28376c4e928 100644 --- a/src/backend/utils/misc/superuser.c +++ b/src/backend/utils/misc/superuser.c @@ -26,7 +26,6 @@ #include "utils/inval.h" #include "utils/syscache.h" -#include "cdb/cdbvars.h" #include "storage/proc.h" /* * In common cases the same roleid (ie, the session or current ID) will @@ -69,8 +68,10 @@ superuser_arg(Oid roleid) HeapTuple rtup; /* Quick out for cache hit */ -// if (OidIsValid(last_roleid) && last_roleid == roleid) -// return last_roleid_is_super; +#ifndef SERVERLESS + if (OidIsValid(last_roleid) && last_roleid == roleid) + return last_roleid_is_super; +#endif /* Special escape path in case you deleted all your users. */ if (!IsUnderPostmaster && roleid == BOOTSTRAP_SUPERUSERID) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 4de1a65e342..f9533bd1c9c 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -195,7 +195,9 @@ MEMORY_ACCOUNT_INC_ALLOCATED(AllocSet set, Size newbytes) /* Make sure these values are not overflow */ Assert(set->localAllocated >= newbytes); -// Assert(parent->currentAllocated >= set->localAllocated); +#ifndef SERVERLESS + Assert(parent->currentAllocated >= set->localAllocated); +#endif } static inline void @@ -204,7 +206,9 @@ MEMORY_ACCOUNT_DEC_ALLOCATED(AllocSet set, Size newbytes) AllocSet parent = set->accountingParent; Assert(set->localAllocated >= newbytes); - // Assert(parent->currentAllocated >= set->localAllocated); +#ifndef SERVERLESS + Assert(parent->currentAllocated >= set->localAllocated); +#endif set->localAllocated -= newbytes; parent->currentAllocated -= newbytes; @@ -761,7 +765,11 @@ AllocSetDelete(MemoryContext context, MemoryContext parent) /* Make sure all children have been deleted */ Assert(context->firstchild == NULL); MEMORY_ACCOUNT_DEC_ALLOCATED(set, set->localAllocated); +#ifdef SERVERLESS if (IS_MEMORY_ACCOUNT(set) && parent) +#else + if (IS_MEMORY_ACCOUNT(set)) +#endif { /* Roll up our peak value to the parent, before this context goes away. */ AllocSet parentset = (AllocSet) parent; diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index f6a869cf9b7..9aa580cdac2 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -63,7 +63,9 @@ MemoryContext TopMemoryContext = NULL; MemoryContext ErrorContext = NULL; MemoryContext PostmasterContext = NULL; MemoryContext CacheMemoryContext = NULL; +#ifdef SERVERLESS MemoryContext TypeMemoryContext = NULL; +#endif MemoryContext MessageContext = NULL; MemoryContext TopTransactionContext = NULL; MemoryContext CurTransactionContext = NULL; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index edf146b0aa3..3ec5a24108d 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -138,7 +138,9 @@ typedef struct IndexOrderByDistance #define IndexScanIsValid(scan) PointerIsValid(scan) extern Relation index_open(Oid relationId, LOCKMODE lockmode); +#ifdef SERVERLESS extern Relation order_index_open(Oid relationId, LOCKMODE lockmode); +#endif extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, diff --git a/src/include/cdb/cdbtranscat.h b/src/include/cdb/cdbtranscat.h index 2b38821e0bb..1eea0bcc139 100644 --- a/src/include/cdb/cdbtranscat.h +++ b/src/include/cdb/cdbtranscat.h @@ -1,6 +1,7 @@ #ifndef CDBTANSCAT_H #define CDBTANSCAT_H +#ifdef SERVERLESS #include "access/heapam.h" #include "access/htup.h" #include "nodes/execnodes.h" @@ -114,4 +115,5 @@ extern char *StartUpCatalogData; extern int StartUpCatalogLen; extern char *CollectStartupCatalog(int *len); +#endif #endif //CDBTANSCAT_H \ No newline at end of file diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 5df3d337376..7b777480399 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -759,7 +759,6 @@ extern GpId GpIdentity; #define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID) #define IS_QUERY_EXECUTOR_BACKEND() (Gp_role == GP_ROLE_EXECUTE && gp_session_id > 0) -#define IS_SEGMENT_EXECUTOR() (IS_QUERY_EXECUTOR_BACKEND() && GpIdentity.segindex >= 0) /* Stores the listener port that this process uses to listen for incoming * Interconnect connections from other Motion nodes. diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index c226469a09c..ecaf63da929 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -104,7 +104,9 @@ typedef struct TransitionCaptureState #define TRIGGER_EVENT_INSTEAD 0x00000010 #define TRIGGER_EVENT_TIMINGMASK 0x00000018 +#ifdef SERVERLESS #define TRIGGER_EVENT_PREPARE 0x00000020 +#endif /* More TriggerEvent flags, used only within trigger.c */ diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 1bde2c9765e..81fc930e008 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -106,6 +106,7 @@ extern PGDLLIMPORT SetDtxFlag_hook_type SetDtxFlag_hook; typedef PlanState *(*ExecInitNode_hook_type)(Plan *node, EState *estate, int eflags); extern PGDLLIMPORT ExecInitNode_hook_type ExecInitNode_hook; +#ifdef SERVERLESS typedef void (*ExprEvalPushStep_hook_type) (ExprState *es, const ExprEvalStep *s); extern PGDLLIMPORT ExprEvalPushStep_hook_type ExprEvalPushStep_hook; @@ -114,6 +115,7 @@ extern PGDLLIMPORT CollectResultInfo_hook_type CollectResultInfo_hook; typedef void (*CollectProc_hook_type) (FunctionCallInfo fcinfo); extern PGDLLIMPORT CollectProc_hook_type CollectProc_hook; +#endif /* Hook for plugins to get control in ExecEndNode() */ typedef void (*ExecEndNode_hook_type)(PlanState *node); diff --git a/src/include/executor/functions.h b/src/include/executor/functions.h index 4b3851bef75..e89eb3eb73d 100644 --- a/src/include/executor/functions.h +++ b/src/include/executor/functions.h @@ -35,7 +35,9 @@ typedef struct SQLFunctionParseInfo typedef SQLFunctionParseInfo *SQLFunctionParseInfoPtr; extern Datum fmgr_sql(PG_FUNCTION_ARGS); +#ifdef SERVERLESS extern void fmgr_sql_init(PG_FUNCTION_ARGS); +#endif extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTuple, Node *call_expr, Oid inputCollation); diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 881559092a1..34cb73f932b 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -636,7 +636,9 @@ extern int pg_bind_textdomain_codeset(const char *domainname); extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding); +#ifdef SERVERLESS extern void StoreEncodingConversion(int dest_encoding); +#endif extern int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 60be05b4253..e0e5b27c455 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -686,8 +686,6 @@ typedef enum NodeTag T_RetrieveStmt, T_ReindexIndexInfo, /* in nodes/parsenodes.h */ T_EphemeralNamedRelationInfo, /* utils/queryenvironment.h */ - T_SystemTableTransferNode, - T_TransferTuple, T_FileFragment, T_FileScanTask, T_ExternalTableMetadata, @@ -695,6 +693,8 @@ typedef enum NodeTag T_APListExpr, T_APRangeExpr, T_APHashExpr, + T_SystemTableTransferNode, + T_TransferTuple, #endif /* SERVERLESS */ } NodeTag; diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h index 81e2f20424b..56ee2254dd9 100644 --- a/src/include/utils/typcache.h +++ b/src/include/utils/typcache.h @@ -154,7 +154,9 @@ typedef struct TypeCacheEntry /* This value will not equal any valid tupledesc identifier, nor 0 */ #define INVALID_TUPLEDESC_IDENTIFIER ((uint64) 1) +#ifdef SERVERLESS extern MemoryContext TypeMemoryContext; +#endif /* * Callers wishing to maintain a long-lived reference to a domain's constraint diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 62314c4ddd9..aeae279c051 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -432,7 +432,9 @@ static const PQEnvironmentOption EnvironmentOptions[] = } }; +#ifdef SERVERLESS extern char *CollectStartupCatalog(int *len); +#endif /* The connection URI must start with either of the following designators: */ static const char uri_designator[] = "postgresql://"; diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 0bf5609002a..03826ca476a 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -2472,6 +2472,7 @@ build_startup_packet(const PGconn *conn, char *packet, packet[packet_len] = '\0'; packet_len++; +#ifdef SERVERLESS if (conn->catalog) { if (packet) @@ -2482,6 +2483,7 @@ build_startup_packet(const PGconn *conn, char *packet, packet_len += sizeof(int); packet_len += conn->catalog_size; } +#endif return packet_len; } diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index b24629b99df..196d2c1fe70 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -438,8 +438,10 @@ struct pg_conn char *diffoptions; /* MPP: transfer changed GUCs(require sync) from QD to QEs */ char *gsslib; /* What GSS library to use ("gssapi" or * "sspi") */ +#ifdef SERVERLESS char *catalog; int catalog_size; +#endif char *ssl_min_protocol_version; /* minimum TLS protocol version */ char *ssl_max_protocol_version; /* maximum TLS protocol version */ char *target_session_attrs; /* desired session properties */ diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 8f4f525934f..74259a45307 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -172,8 +172,12 @@ plpgsql_compile(FunctionCallInfo fcinfo, bool forValidator) { /* We have a compiled function, but is it still valid? */ if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && +#ifdef SERVERLESS ItemPointerEquals(&function->fn_tid, &procTup->t_self) && !function_is_prepare(fcinfo)) +#else + ItemPointerEquals(&function->fn_tid, &procTup->t_self)) +#endif function_valid = true; else { diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 3ebbb1c9a1d..1902a8f66b4 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -23,11 +23,15 @@ #include "access/tupconvert.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" +#ifdef SERVERLESS #include "cdb/cdbtranscat.h" +#endif #include "commands/defrem.h" #include "executor/execExpr.h" #include "executor/spi.h" +#ifdef SERVERLESS #include "executor/spi_priv.h" +#endif #include "executor/tstoreReceiver.h" #include "funcapi.h" #include "mb/stringinfo_mb.h" @@ -425,8 +429,12 @@ static Datum do_cast_value(PLpgSQL_execstate *estate, Oid reqtype, int32 reqtypmod); static plpgsql_CastHashEntry *get_cast_hashentry(PLpgSQL_execstate *estate, Oid srctype, int32 srctypmod, +#ifdef SERVERLESS Oid dsttype, int32 dsttypmod, bool renew); +#else + Oid dsttype, int32 dsttypmod); +#endif static void exec_init_tuple_store(PLpgSQL_execstate *estate); static void exec_set_found(PLpgSQL_execstate *estate, bool state); static void plpgsql_create_econtext(PLpgSQL_execstate *estate); @@ -7681,8 +7689,12 @@ do_cast_value(PLpgSQL_execstate *estate, cast_entry = get_cast_hashentry(estate, valtype, valtypmod, +#ifdef SERVERLESS reqtype, reqtypmod, true); +#else + reqtype, reqtypmod); +#endif if (cast_entry) { ExprContext *econtext = estate->eval_econtext; @@ -7719,8 +7731,12 @@ do_cast_value(PLpgSQL_execstate *estate, static plpgsql_CastHashEntry * get_cast_hashentry(PLpgSQL_execstate *estate, Oid srctype, int32 srctypmod, +#ifdef SERVERLESS Oid dsttype, int32 dsttypmod, bool renew) +#else + Oid dsttype, int32 dsttypmod) +#endif { plpgsql_CastHashKey cast_key; plpgsql_CastHashEntry *cast_entry; @@ -7740,8 +7756,12 @@ get_cast_hashentry(PLpgSQL_execstate *estate, cast_entry->cast_cexpr = NULL; if (cast_entry->cast_cexpr == NULL || +#ifdef SERVERLESS !cast_entry->cast_cexpr->is_valid || renew) +#else + !cast_entry->cast_cexpr->is_valid) +#endif { /* * We've not looked up this coercion before, or we have but the cached @@ -8001,7 +8021,11 @@ exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr) * Release the plan refcount obtained by SPI_plan_get_cached_plan. (This * refcount is held by the wrong resowner, so we can't just repurpose it.) */ +#ifdef SERVERLESS ReleaseCachedPlan(cplan, expr->plan->saved ? CurrentResourceOwner : NULL); +#else + ReleaseCachedPlan(cplan, CurrentResourceOwner); +#endif } /* @@ -8741,6 +8765,7 @@ format_preparedparamsdata(PLpgSQL_execstate *estate, return paramstr.data; } +#ifdef SERVERLESS static void prepare_stmt_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block); static void init_toplevel_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block); @@ -10445,3 +10470,4 @@ function_is_prepare(FunctionCallInfo fcinfo) return false; } } +#endif \ No newline at end of file diff --git a/src/pl/plpgsql/src/pl_handler.c b/src/pl/plpgsql/src/pl_handler.c index c293b0c2452..e16b4267501 100644 --- a/src/pl/plpgsql/src/pl_handler.c +++ b/src/pl/plpgsql/src/pl_handler.c @@ -273,6 +273,7 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) * subhandler */ if (CALLED_AS_TRIGGER(fcinfo)) +#ifdef SERVERLESS { TriggerData *triggerData = (TriggerData *) fcinfo->context; @@ -282,6 +283,10 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) retval = PointerGetDatum( plpgsql_exec_trigger(func, (TriggerData *) fcinfo->context)); } +#else + retval = PointerGetDatum(plpgsql_exec_trigger(func, + (TriggerData *) fcinfo->context)); +#endif else if (CALLED_AS_EVENT_TRIGGER(fcinfo)) { plpgsql_exec_event_trigger(func, @@ -289,6 +294,7 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) /* there's no return value in this case */ } else +#ifdef SERVERLESS { CallContext *callContext = (CallContext *) fcinfo->context; @@ -301,6 +307,12 @@ plpgsql_call_handler(PG_FUNCTION_ARGS) procedure_resowner, !nonatomic); } +#else + retval = plpgsql_exec_function(func, fcinfo, + NULL, NULL, + procedure_resowner, + !nonatomic); +#endif } PG_FINALLY(); { diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index 5ec18b12a93..f2153295f14 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -1323,7 +1323,7 @@ extern void plpgsql_scanner_finish(void); */ extern int plpgsql_yyparse(void); - +#ifdef SERVERLESS extern Datum plpgsql_prepare_function(PLpgSQL_function *func, FunctionCallInfo fcinfo, EState *simple_eval_estate, ResourceOwner simple_eval_resowner, @@ -1332,4 +1332,5 @@ extern Datum plpgsql_prepare_function(PLpgSQL_function *func, FunctionCallInfo f extern void plpgsql_prepare_trigger(PLpgSQL_function *func, TriggerData *trigdata); extern void init_stmt(PLpgSQL_execstate *estate, PLpgSQL_stmt *stmt); extern bool function_is_prepare(FunctionCallInfo fcinfo); +#endif #endif /* PLPGSQL_H */ From 79a256efb3b7be2ca248fe8d285598915e63e4a3 Mon Sep 17 00:00:00 2001 From: roseduan Date: Mon, 22 Jul 2024 16:17:17 +0800 Subject: [PATCH 133/152] Fix: auto analyze inherited table error It will coredump when auto analyze the inherited table, this reason is that it will enter the acquire_sample_rows_dispatcher method: if (Gp_role == GP_ROLE_DISPATCH && ENABLE_DISPATCH()) { return acquire_sample_rows_dispatcher(onerel, true, /* inherited stats */ elevel, rows, targrows, totalrows, totaldeadrows); } but if we analyze the table manually, we hold dispatch in hashdata_ProcessUtility. so we will follow the manual analyze logic, just igore this routine in serverlese mode. --- src/backend/commands/analyze.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index aa0a17d84ae..88a478b04b4 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1992,6 +1992,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, ListCell *lc; bool has_child; + /* + * CBDB: auto vacuum will enter here, but we will hold dispatch in + * serverless mode when analyzing the table, so we will follow the + * normal path. + */ +#ifndef SERVERLESS /* * Like in acquire_sample_rows(), if we're in the QD, fetch the sample * from segments. @@ -2014,6 +2020,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, totalrows, totaldeadrows); } } +#endif /* * Find all members of inheritance set. We only need AccessShareLock on From 96fa89fb70a9e404d47507b023b74e79a37b5241 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 22 Jul 2024 09:25:51 +0800 Subject: [PATCH 134/152] Use aqumv public function in grouping paths. CBDB has exposed aqumv_adjust_simple_query to adjust parse tree, remove duplicated codes. Ignore am_by_tablespace files by the way. --- src/backend/cdb/cdbgroupingpaths.c | 80 ++---------------------------- 1 file changed, 3 insertions(+), 77 deletions(-) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index b69465993ac..c4ecb129f44 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -77,6 +77,7 @@ #include "parser/parsetree.h" #include "utils/guc.h" #include "commands/matview.h" +#include "optimizer/aqumv.h" typedef enum { @@ -255,14 +256,6 @@ rewrite_to_append_agg_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Rel static PathTarget * make_pathtarget_from_tupledesc(TupleDesc tupdes); -typedef struct -{ - int varno; -} aqumv_adjust_varno_context; - -static void aqumv_adjust_simple_parse(Query *parse); -static void aqumv_adjust_varno(Query *parse, int delta); -static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context); #endif /* @@ -3055,7 +3048,7 @@ simple_view_matching(Query *parse) continue; /* Transform actions to a normal parse tree. */ - aqumv_adjust_simple_parse(viewQuery); + aqumv_adjust_simple_query(viewQuery); /* * See AQUMV_FIXME_MVP in aqumv.c @@ -3329,71 +3322,4 @@ rewrite_to_append_agg_path(PlannerInfo *root, cdb_agg_planning_context *ctx, Rel } } -/* - * This should be refactor after CBDB github expose these functions. - * Keep for now. - * Wrap of aqumv_adjust_varno, expose for other places. - * Adjust view's actions to a parse tree that can be processed as normal. - * This in-place update the parse param. - */ -void aqumv_adjust_simple_parse(Query *parse) -{ - ListCell *lc; - /* - * AQUMV - * We have to rewrite now before we do the real Equivalent - * Transformation 'rewrite'. - * Because actions sotored in rule is not a normal query tree, - * it can't be used directly, ex: new/old realtions used to - * refresh mv. - * Earse unused relatoins, keep the right one. - */ - foreach(lc, parse->rtable) - { - RangeTblEntry* rtetmp = lfirst(lc); - if ((rtetmp->relkind == RELKIND_MATVIEW) && - (rtetmp->alias != NULL) && - (strcmp(rtetmp->alias->aliasname, "new") == 0 || - strcmp(rtetmp->alias->aliasname,"old") == 0)) - { - foreach_delete_current(parse->rtable, lc); - } - } - - /* - * Now we have the right relation, adjust - * varnos in its query tree. - * AQUMV_FIXME_MVP: Only one single relation - * is supported now, we could assign varno - * to 1 opportunistically. - */ - aqumv_adjust_varno(parse, 1); - -} -static void -aqumv_adjust_varno(Query* parse, int varno) -{ - aqumv_adjust_varno_context context; - context.varno = varno; - parse = query_tree_mutator(parse, aqumv_adjust_varno_mutator, &context, QTW_DONT_COPY_QUERY); -} - -/* - * Adjust varno and rindex with delta. - */ -static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - ((Var *)node)->varno = context->varno; - ((Var *)node)->varnosyn = context->varno; /* NB: This should be backported to CBDB github! */ - } - else if (IsA(node, RangeTblRef)) - /* AQUMV_FIXME_MVP: currently we have only one relation */ - ((RangeTblRef*) node)->rtindex = context->varno; - return expression_tree_mutator(node, aqumv_adjust_varno_mutator, context); -} - -#endif \ No newline at end of file +#endif From 570dae9cf3b17f09f3f5ddcfce30d2d50da62055 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Fri, 12 Jul 2024 10:56:29 +0800 Subject: [PATCH 135/152] Remove regular lock on segment --- src/backend/catalog/partition.c | 17 +++++++++++++++++ src/backend/storage/lmgr/lmgr.c | 7 +++++++ src/backend/storage/lmgr/lock.c | 10 ++++++++++ src/include/catalog/partition.h | 1 + src/include/executor/executor.h | 3 +++ 5 files changed, 38 insertions(+) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index e978d32c1f9..0e5d0d9bc49 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -75,6 +75,23 @@ get_partition_parent(Oid relid, bool even_if_detached) return result; } +Oid +get_partition_parent_noerror(Oid relid) +{ + Relation catalogRelation; + Oid result; + bool detach_pending; + + catalogRelation = table_open(InheritsRelationId, AccessShareLock); + + result = get_partition_parent_worker(catalogRelation, relid, + &detach_pending); + + table_close(catalogRelation, AccessShareLock); + + return result; +} + /* * get_partition_parent_worker * Scan the pg_inherits relation to return the OID of the parent of the diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 7c2ad049c8f..555b67d24d3 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -33,9 +33,12 @@ #include "access/heapam.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "cdb/cdbvars.h" +#include "partitioning/partdesc.h" #include "storage/proc.h" #include "utils/lsyscache.h" /* CDB: get_rel_namespace() */ +#include "utils/syscache.h" /* * Per-backend counter for generating speculative insertion tokens. @@ -145,6 +148,10 @@ LockRelationOid(Oid relid, LOCKMODE lockmode) AcceptInvalidationMessages(); MarkLockClear(locallock); } + + + if (LockTable_hook) + (*LockTable_hook) (relid, lockmode); } /* diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 96d840cc22e..e9ac97fed78 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -843,6 +843,11 @@ LockAcquireExtended(const LOCKTAG *locktag, lockMethodTable->lockModeNames[lockmode]); #endif +#ifdef SERVERLESS + if (!IS_QUERY_DISPATCHER()) + return LOCKACQUIRE_ALREADY_CLEAR; +#endif /* SERVERLESS */ + /* Identify owner for lock */ if (sessionLock) owner = NULL; @@ -2242,6 +2247,11 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LWLock *partitionLock; bool wakeupNeeded; +#ifdef SERVERLESS + if (!IS_QUERY_DISPATCHER()) + return true; +#endif /* SERVERLESS */ + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 4e2160a7df5..6c8f6d7ab5f 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -20,6 +20,7 @@ #define HASH_PARTITION_SEED UINT64CONST(0x7A5B22367996DCFD) extern Oid get_partition_parent(Oid relid, bool even_if_detached); +extern Oid get_partition_parent_noerror(Oid relid); extern List *get_partition_ancestors(Oid relid); extern Oid index_get_partition(Relation partition, Oid indexId); extern List *map_partition_varattnos(List *expr, int fromrel_varno, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 81fc930e008..c3ffc88dc7b 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -117,6 +117,9 @@ typedef void (*CollectProc_hook_type) (FunctionCallInfo fcinfo); extern PGDLLIMPORT CollectProc_hook_type CollectProc_hook; #endif +typedef void (*LockTable_hook_type) (Oid relid, int lockmode); +extern PGDLLIMPORT LockTable_hook_type LockTable_hook; + /* Hook for plugins to get control in ExecEndNode() */ typedef void (*ExecEndNode_hook_type)(PlanState *node); extern PGDLLIMPORT ExecEndNode_hook_type ExecEndNode_hook; From cd075ee2eb0adafb5a4728e8c08414f21f835d20 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Fri, 19 Jul 2024 10:48:14 +0800 Subject: [PATCH 136/152] Fix work around of materialized view data status. CBDB has added MatviewUsableForAppendAgg() to identify data status is up to date or is avaliable for Append Agg Plan. Remove pg_class.relinsertonly. Authored-by: Zhang Mingli [avamingli@gmail.com](mailto:avamingli@gmail.com) --- src/backend/catalog/heap.c | 1 - src/backend/catalog/index.c | 1 - src/backend/cdb/cdbgroupingpaths.c | 19 +++++++++---------- src/backend/utils/cache/relcache.c | 1 - src/include/catalog/pg_class.h | 4 ---- 5 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 1c1c673e52a..a834a9bee50 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1330,7 +1330,6 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relisdynamic - 1] = BoolGetDatum(rd_rel->relisdynamic); values[Anum_pg_class_relmvrefcount - 1] = Int32GetDatum(rd_rel->relmvrefcount); values[Anum_pg_class_relhaspartialagg - 1] = BoolGetDatum(rd_rel->relhaspartialagg); - values[Anum_pg_class_relinsertonly - 1] = BoolGetDatum(rd_rel->relinsertonly); values[Anum_pg_class_relisivm - 1] = CharGetDatum(rd_rel->relisivm); if (relacl != (Datum) 0) values[Anum_pg_class_relacl - 1] = relacl; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 9409f5427ad..81a993ac7fd 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1076,7 +1076,6 @@ index_create_internal(Relation heapRelation, indexRelation->rd_rel->relisdynamic = false; indexRelation->rd_rel->relmvrefcount = 0; indexRelation->rd_rel->relhaspartialagg = false; - indexRelation->rd_rel->relinsertonly = false; /* * store index's pg_class entry diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index c4ecb129f44..b1528a459b1 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -72,6 +72,7 @@ #include "access/genam.h" #include "access/table.h" +#include "catalog/gp_matview_aux.h" #include "catalog/pg_rewrite.h" #include "nodes/pathnodes.h" #include "parser/parsetree.h" @@ -3001,17 +3002,15 @@ simple_view_matching(Query *parse) matviewRel = table_open(rewrite_tup->ev_class, AccessShareLock); need_close = true; - /* - * Consider IVM only has insert operation - * since lastest REFRESH and with partial agg results. - */ if (!RelationIsPopulated(matviewRel) || - /* FIXME: uncomment below when IVM is enabled in hashdata cloud. */ - #if 0 - (!RelationIsIVM(matviewRel)) || - #endif - !matviewRel->rd_rel->relhaspartialagg || - !matviewRel->rd_rel->relinsertonly) + !matviewRel->rd_rel->relhaspartialagg) + continue; + + /* + * Consider mv data status since lastest REFRESH + * with partial agg results. + */ + if (!MatviewUsableForAppendAgg(RelationGetRelid(matviewRel))) continue; if (matviewRel->rd_rel->relhasrules == false || diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index e3e5082397b..410c78c7de4 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2082,7 +2082,6 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_rel->relisdynamic = false; relation->rd_rel->relmvrefcount = 0; relation->rd_rel->relhaspartialagg = false; - relation->rd_rel->relinsertonly = false; relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; relation->rd_rel->relpages = 0; diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 5810ae4d47e..64238bf9e9e 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -132,10 +132,6 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* TODO: IVM implement this. */ bool relhaspartialagg BKI_DEFAULT(f); - /* materialized view's data is not up to date, but has only insert operation after latest REFRESH. */ - /* TODO: IVM implement this. */ - bool relinsertonly BKI_DEFAULT(f); - /* link to original rel during table rewrite; otherwise 0 */ Oid relrewrite BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_class); From 2327c30ea5de94f21403495f45d823e2994dc24b Mon Sep 17 00:00:00 2001 From: roseduan Date: Thu, 18 Jul 2024 15:30:14 +0800 Subject: [PATCH 137/152] Fix: fts coredump The upstream code has merged into hashdata cloud, which is missed to handle the serverless fts. Sine there is no mirrr db info in cloud, so we will ignore the log detail, otherwise it will cause coredump. --- src/backend/fts/ftsprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index eef1b3f4288..f0ba4636915 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -284,7 +284,7 @@ checkIfFailedDueToNormalRestart(fts_segment_info *ftsInfo) "primary dbid=%d, mirror dbid=%d", ftsInfo->primary_cdbinfo->config->segindex, ftsInfo->primary_cdbinfo->config->dbid, - ftsInfo->mirror_cdbinfo->config->dbid); + ftsInfo->mirror_cdbinfo ? ftsInfo->mirror_cdbinfo->config->dbid : -1); } } From 5f31cab18824ef47db1004437d3dd1d4a96b2d82 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Fri, 19 Jul 2024 13:42:37 +0800 Subject: [PATCH 138/152] Support min, max in Append Agg Plan. IVM has enabled min, max function with partial agg results. They have no difference with others like count, sum and should be able for Append Agg Plan. Add cases for that. Authored-by: Zhang Mingli [avamingli@gmail.com](mailto:avamingli@gmail.com) --- src/backend/cdb/cdbgroupingpaths.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index b1528a459b1..e4cba1c2d0c 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -2849,6 +2849,8 @@ try_append_agg(Query *parse) */ if ((strcmp(aggname, "count") == 0) && (strcmp(aggname, "sum") == 0) && + (strcmp(aggname, "min") == 0) && + (strcmp(aggname, "max") == 0) && (strcmp(aggname, "avg") == 0)) return false; From d865aa0abd4e7d1a08c35cd66cdb9936e8511431 Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Thu, 25 Jul 2024 16:37:33 +0800 Subject: [PATCH 139/152] Add create_plan_hook and cursor option IVM with partial do not use vectorization plan. --- src/backend/commands/createas.c | 4 +++- src/backend/optimizer/plan/createplan.c | 15 +++++---------- src/backend/optimizer/plan/planner.c | 1 + src/include/nodes/parsenodes.h | 1 + src/include/optimizer/planner.h | 4 ++++ 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6d0d7197d93..62a4cde069a 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -466,6 +466,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, * either came straight from the parser, or suitable locks were * acquired by plancache.c. */ + int cursor_options; rewritten = QueryRewrite(query); /* SELECT should never rewrite to more or less than one SELECT query */ @@ -476,9 +477,10 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, query = linitial_node(Query, rewritten); Assert(query->commandType == CMD_SELECT); + cursor_options = partial ? CURSOR_OPT_PARALLEL_NOT_OK : CURSOR_OPT_PARALLEL_OK; /* plan the query */ plan = pg_plan_query(query, pstate->p_sourcetext, - CURSOR_OPT_PARALLEL_OK, params); + cursor_options, params); /*GPDB: Save the target information in PlannedStmt */ /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1edacccae4b..5a0695d1084 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -38,6 +38,7 @@ #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" +#include "optimizer/planner.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" @@ -649,6 +650,9 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) plan->parallel = best_path->locus.parallel_workers; + if (create_plan_hook) + create_plan_hook(root, best_path, plan); + return plan; } @@ -664,9 +668,7 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) List *gating_clauses; List *tlist; Plan *plan; -#ifdef SERVERLESS - RangeTblEntry *rte; -#endif + /* * Extract the relevant restriction clauses from the parent relation. The * executor must apply all these restrictions during the scan, except for @@ -900,13 +902,6 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) DirectDispatchUpdateContentIdsFromPlan(root, plan); plan->locustype = best_path->locus.locustype; -#ifdef SERVERLESS - if (best_path->pathtype == T_SeqScan) - { - rte = planner_rt_fetch(rel->relid, root); - ((Scan*)plan)->version = rte->version; - } -#endif /* * If there are any pseudoconstant clauses attached to this node, insert a * gating Result node that evaluates the pseudoconstants as one-time diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a545bfb28fc..9315da4199a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -102,6 +102,7 @@ bool optimizer_init = false; /* Hook for plugins to get control in planner() */ planner_hook_type planner_hook = NULL; +create_plan_hook_type create_plan_hook = NULL; /* Hook for plugins to get control when grouping_planner() plans upper rels */ create_upper_paths_hook_type create_upper_paths_hook = NULL; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 84324543221..dd7f5bd78e1 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3537,6 +3537,7 @@ typedef struct SecLabelStmt #define CURSOR_OPT_GENERIC_PLAN 0x0200 /* force use of generic plan */ #define CURSOR_OPT_CUSTOM_PLAN 0x0400 /* force use of custom plan */ #define CURSOR_OPT_PARALLEL_OK 0x0800 /* parallel mode OK */ +#define CURSOR_OPT_PARALLEL_NOT_OK 0x0000 /* parallel mode not OK */ /* * This is used to request the planner to create a plan that's updatable with diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 29aac021f38..3c922d591e7 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -30,6 +30,10 @@ typedef PlannedStmt *(*planner_hook_type) (Query *parse, ParamListInfo boundParams); extern PGDLLIMPORT planner_hook_type planner_hook; +/* Hook for plugins to get control in create_plan_recurse() */ +typedef void (*create_plan_hook_type) (PlannerInfo *root, Path *best_path, Plan *plan); +extern PGDLLIMPORT create_plan_hook_type create_plan_hook; + /* Hook for plugins to get control when grouping_planner() plans upper rels */ typedef void (*create_upper_paths_hook_type) (PlannerInfo *root, UpperRelationKind stage, From ab953729396596134bbef911b2ce1bed09ceb28e Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Thu, 25 Jul 2024 16:39:34 +0800 Subject: [PATCH 140/152] [CLOUD] Enable delta scan vectorization IVM with partial agg fallback to normal plan. Fix corner case of version 0. Fix tablespace. --- src/backend/commands/createas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 62a4cde069a..b973085e06f 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -652,7 +652,7 @@ rewriteQueryForIMMV(Query *query, List *colNames, bool partial) SortGroupClause *scl = (SortGroupClause *) lfirst(lc); TargetEntry *tle = get_sortgroupclause_tle(scl, rewritten->targetList); - if (tle->resjunk && !partial) + if (tle->resjunk) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GROUP BY expression not appearing in select list is not supported on incrementally maintainable materialized view"))); From 6c3373009c726c2cd4b6a7c83b909ce80a851e3d Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 25 Jul 2024 16:36:54 +0800 Subject: [PATCH 141/152] Fix: Retry get hostip from DNS if failed Due to unstable DNS service in cloud environment, retry 10 second to get the hostip from DNS. --- src/backend/cdb/cdbutil.c | 55 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 644cc33196d..6588875bd33 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -1115,6 +1115,9 @@ ensureInterconnectAddress(void) */ CdbComponentDatabaseInfo *qdInfo; qdInfo = cdbcomponent_getComponentInfo(MASTER_CONTENT_ID); + if (!qdInfo->config->hostip) + ereport(ERROR, + (errmsg("Could not get hostip of QD"))); interconnect_address = MemoryContextStrdup(TopMemoryContext, qdInfo->config->hostip); } else if (qdHostname && qdHostname[0] != '\0') @@ -1242,7 +1245,7 @@ CdbComponentDatabaseInfoCompare(const void *p1, const void *p2) * The keys are all NAMEDATALEN long. */ static char * -getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) +getDnsCachedAddressInternal(char *name, int port, int elevel, bool use_cache) { SegIpEntry *e = NULL; char hostinfo[NI_MAXHOST]; @@ -1390,6 +1393,30 @@ getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) return pstrdup(hostinfo); } +/* getDnsCachedAddress + * + * Due to unstable DNS service in cloud environment, retry 10 seconds to get the hostip from DNS + */ +static char * +getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) +{ + char *hostip = NULL; + int i = 0; + + while (!(hostip = getDnsCachedAddressInternal(name, port, elevel,use_cache))) + { + pg_usleep(1000); + if (++i >= 10) + { + ereport(LOG, + (errmsg("could not translate host name \"%s\", port \"%d\" to address after retry 10 seconds", + name, port))); + break; + } + } + return hostip; +} + /* * getDnsAddress * @@ -3629,7 +3656,7 @@ CdbComponentDatabaseInfoCompare(const void *p1, const void *p2) * The keys are all NAMEDATALEN long. */ static char * -getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) +getDnsCachedAddressInternal(char *name, int port, int elevel, bool use_cache) { SegIpEntry *e = NULL; char hostinfo[NI_MAXHOST]; @@ -3777,6 +3804,30 @@ getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) return pstrdup(hostinfo); } +/* getDnsCachedAddress + * + * Due to unstable DNS service in cloud environment, retry 10 seconds to get the hostip from DNS + */ +static char * +getDnsCachedAddress(char *name, int port, int elevel, bool use_cache) +{ + char *hostip = NULL; + int i = 0; + + while(!(hostip = getDnsCachedAddressInternal(name, port, elevel,use_cache))) + { + pg_usleep(1000); + if(++i >= 10) + { + ereport(LOG, + (errmsg("could not translate host name \"%s\", port \"%d\" to address after retry 10 seconds", + name, port))); + break; + } + } + return hostip; +} + /* * getDnsAddress * From f6fd386db3c155e380728812498e0ac0c424f169 Mon Sep 17 00:00:00 2001 From: hanwei Date: Thu, 22 Aug 2024 11:18:16 +0800 Subject: [PATCH 142/152] Fix symbol not found in backstrace When use erreport, it can print stack message in log system. But it sometimes cannot print some symbols. And some functions especially other extern functions(for example C++) cannnot display correctly, it should resolve. So add a hook for finding correct function names. --- src/backend/utils/error/elog.c | 124 +++++++++++++++------------------ src/include/utils/elog.h | 3 + src/port/noblock.c | 4 ++ 3 files changed, 65 insertions(+), 66 deletions(-) diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 859ac9e56fa..401c8d9d98a 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -103,6 +103,7 @@ */ #define _DARWIN_C_SOURCE 1 #include +#include /* In this module, access gettext() via err_gettext() */ #undef _ @@ -115,6 +116,7 @@ ErrorContextCallback *error_context_stack = NULL; sigjmp_buf *PG_exception_stack = NULL; extern bool redirection_done; +resolve_symbol_function_hook_type resolve_symbol_function_hook = NULL; /* * Hook for intercepting messages before they are sent to the server log. @@ -3790,88 +3792,72 @@ append_stacktrace(PipeProtoChunk *buffer, StringInfo append, void *const *stacka FILE * fd; - bool fd_ok = false; char cmd[CMD_BUFFER_SIZE]; - char cmdresult[STACK_DEPTH_MAX][SYMBOL_SIZE]; - char addrtxt[ADDRESS_SIZE]; + char cmdresult[2][SYMBOL_SIZE]; #if defined(__darwin__) const char * prog = "atos -o"; #else - const char * prog = "addr2line -s -e"; + const char * prog = "addr2line -s -f -e"; #endif - static bool in_translate_stacktrace = false; bool addr2line_ok = gp_log_stack_trace_lines; if (stacksize == 0) return; - - if (!in_translate_stacktrace && addr2line_ok) - { - /* - * Keep a record that we are doing this work, so if we crash during it, we don't - * try to do it again when we recurse back here, - */ - in_translate_stacktrace = true; - - snprintf(cmd,sizeof(cmd),"%s %s ",prog,my_exec_path); - - for (stack_no = 0; stack_no < stacksize && stack_no < 100; stack_no++) - { - cmdresult[stack_no][0] = '\0'; /* clear this array for later */ - snprintf(addrtxt, sizeof(addrtxt),"%p ",stackarray[stack_no]); - - Assert(sizeof(cmd) > strlen(cmd)); - strncat(cmd, addrtxt, sizeof(cmd) - strlen(cmd) - 1); - } - - cmdresult[0][0] = '\0'; - fd = popen(cmd,"r"); - if (fd != NULL) - fd_ok = true; - - if (fd_ok) - { - for (stack_no = 0; stack_no < stacksize && stack_no < STACK_DEPTH_MAX; stack_no++) - { - /* initialize the string */ - cmdresult[stack_no][0] = '\0'; - // Get one line of the result from addr2line (or atos) - if (fgets(cmdresult[stack_no],SYMBOL_SIZE,fd) == NULL) - break; - // Force it to be a valid string (in case it was too long) - cmdresult[stack_no][SYMBOL_SIZE-1] = '\0'; - // Get rid of the newline at the end. - if (strlen(cmdresult[stack_no]) > 0 && - cmdresult[stack_no][strlen(cmdresult[stack_no])-1] == '\n') - cmdresult[stack_no][strlen(cmdresult[stack_no])-1] = '\0'; - } - } - - if (!fd_ok || strlen(cmdresult[0]) <= 1) - { - addr2line_ok = false; - } - - if (fd != NULL) - pclose(fd); - - in_translate_stacktrace = false; - } - for (stack_no = 0; stack_no < stacksize; stack_no++) { /* check if file/line info is available */ char *lineInfo = ""; - if (addr2line_ok && stack_no < STACK_DEPTH_MAX) - { - lineInfo = cmdresult[stack_no]; - } + struct link_map* link_map; + const char *function = NULL; - if (dladdr(stackarray[stack_no], &dli) != 0) + if (dladdr1(stackarray[stack_no], &dli, (void**)&link_map, RTLD_DL_LINKMAP) != 0) { + if (addr2line_ok) + { + size_t vam_add = (size_t) ((char *) stackarray[stack_no] - (char *)link_map->l_addr); + if (strncmp(dli.dli_fname, "postgres:", strlen("postgres:")) == 0) + snprintf(cmd,sizeof(cmd),"%s %s %lx", prog, my_exec_path, vam_add); + else + snprintf(cmd,sizeof(cmd),"%s %s %lx", prog, dli.dli_fname, vam_add); + fd = popen(cmd,"r"); + + if (fd != NULL) + { + size_t r_ind = 0; + /* initialize the string */ + cmdresult[r_ind][0] = '\0'; + while (fgets(cmdresult[r_ind], SYMBOL_SIZE, fd) != NULL) + { + // Force it to be a valid string (in case it was too long) + cmdresult[r_ind][SYMBOL_SIZE-1] = '\0'; + if (strlen(cmdresult[r_ind]) > 0 && + cmdresult[r_ind][strlen(cmdresult[r_ind])-1] == '\n') + cmdresult[r_ind][strlen(cmdresult[r_ind])-1] = '\0'; +#if defined(__darwin__) + lineInfo = cmdresult[r_ind]; + break; +#else + if (r_ind == 0) + { + function = cmdresult[r_ind]; + } + else if (r_ind == 1) + { + lineInfo = cmdresult[r_ind]; + break; + } + ++r_ind; + /* initialize the string */ + cmdresult[r_ind][0] = '\0'; +#endif + } + } + if (fd != NULL) + pclose(fd); + } const char *file = dli.dli_fname; if (file != NULL && file[0] != '\0') { @@ -3891,10 +3877,17 @@ append_stacktrace(PipeProtoChunk *buffer, StringInfo append, void *const *stacka file = ""; } - const char *function = dli.dli_sname; if (function == NULL || function[0] == '\0') { +#if defined(__darwin__) + function = ""; +#else function = ""; +#endif + } + else if (resolve_symbol_function_hook) + { + function = (*resolve_symbol_function_hook)(function); } // check if lineInfo was retrieved @@ -3937,7 +3930,6 @@ append_stacktrace(PipeProtoChunk *buffer, StringInfo append, void *const *stacka lineInfo); } - } else { diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index 0f61a3ae8b9..c5f965c3698 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -608,4 +608,7 @@ extern bool gp_log_stack_trace_lines; /* session GUC, controls line info in st extern const char *SegvBusIllName(int signal); extern void StandardHandlerForSigillSigsegvSigbus_OnMainThread(char * processName, SIGNAL_ARGS); +typedef const char * (*resolve_symbol_function_hook_type) (const char *symbol_function); +extern PGDLLIMPORT resolve_symbol_function_hook_type resolve_symbol_function_hook; + #endif /* ELOG_H */ diff --git a/src/port/noblock.c b/src/port/noblock.c index b43222c3383..5f0978f31f4 100644 --- a/src/port/noblock.c +++ b/src/port/noblock.c @@ -30,6 +30,8 @@ pg_set_noblock(pgsocket sock) flags = fcntl(sock, F_GETFL); if (flags < 0) return false; + if (fcntl(sock, F_SETFD, flags | FD_CLOEXEC) == -1) + return false; if (fcntl(sock, F_SETFL, (flags | O_NONBLOCK)) == -1) return false; return true; @@ -54,6 +56,8 @@ pg_set_block(pgsocket sock) flags = fcntl(sock, F_GETFL); if (flags < 0) return false; + if (fcntl(sock, F_SETFD, flags | FD_CLOEXEC) == -1) + return false; if (fcntl(sock, F_SETFL, (flags & ~O_NONBLOCK)) == -1) return false; return true; From 64ccba0e895ec0d459a699a84b0079227b857d06 Mon Sep 17 00:00:00 2001 From: JInbao Chen Date: Thu, 19 Sep 2024 14:03:07 +0800 Subject: [PATCH 143/152] [CLOUD] Remove unionstore extention from segment --- .../output/external_table_optimizer.source | 11 --------- src/backend/access/heap/heapam.c | 9 ++++++++ src/backend/access/table/tableam.c | 23 ++++++++++++++++++- src/backend/access/transam/xact.c | 5 ++++ src/backend/cdb/cdbtranscat.c | 9 ++++++++ src/backend/cdb/dispatcher/cdbdisp_async.c | 2 ++ src/backend/fts/ftsprobe.c | 6 +++++ src/backend/postmaster/checkpointer.c | 7 +++++- src/backend/replication/logical/launcher.c | 1 + src/backend/utils/cache/relcache.c | 5 ++++ src/backend/utils/cache/relmapper.c | 6 +++++ src/backend/utils/init/miscinit.c | 5 ++++ src/backend/utils/init/postinit.c | 12 +++++++--- src/include/access/tableam.h | 6 +---- src/include/cdb/cdbtranscat.h | 4 ++++ 15 files changed, 90 insertions(+), 21 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/output/external_table_optimizer.source b/contrib/pax_storage/src/test/regress/output/external_table_optimizer.source index 68523e63aae..683c0f3989e 100644 --- a/contrib/pax_storage/src/test/regress/output/external_table_optimizer.source +++ b/contrib/pax_storage/src/test/regress/output/external_table_optimizer.source @@ -2754,17 +2754,6 @@ CREATE OR REPLACE FUNCTION read_from_file() RETURNS integer as '$libdir/gpextpro CREATE TRUSTED PROTOCOL demoprot (readfunc = 'read_from_file', writefunc = 'write_to_file'); -- alter external protocol's name ALTER PROTOCOL demoprot RENAME TO demoprot2; --- type name is a fixed-length string padded by '\0', normal(wrong) --- renaming will make cdbhash() getting different values, select --- displaying more than oneline here -select distinct ptcname from ( - select ptcname AS ptcname from gp_dist_random('pg_extprotocol') -) all_segments where ptcname = 'demoprot2'; - ptcname ------------ - demoprot2 -(1 row) - -- drop temp external protocols DROP PROTOCOL if exists demoprot; NOTICE: protocol "demoprot" does not exist, skipping diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 383181f4bcf..12e9a49350c 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1402,6 +1402,11 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) { HeapScanDesc scan = (HeapScanDesc) sscan; +#ifdef SERVERLESS + if (systup_store_active() && RelationGetRelid(sscan->rs_rd) < FirstNormalObjectId) + return systup_store_getnext(sscan); +#endif + /* * This is still widely used directly, without going through table AM, so * add a safety check. It's possible we should, at a later point, @@ -1443,6 +1448,10 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) pgstat_count_heap_getnext(scan->rs_base.rs_rd); +#ifdef SERVERLESS + TransStoreTuple(&scan->rs_ctup); +#endif + return &scan->rs_ctup; } diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 40d66d68fe8..7aa48db3492 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -24,6 +24,7 @@ #include "access/syncscan.h" #include "access/tableam.h" #include "access/xact.h" +#include "cdb/cdbtranscat.h" #include "optimizer/plancat.h" #include "port/pg_bitutils.h" #include "storage/bufmgr.h" @@ -117,12 +118,32 @@ table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT; Oid relid = RelationGetRelid(relation); - Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + Snapshot snapshot; + +#ifdef SERVERLESS + if (systup_store_active() && RelationGetRelid(relation) < FirstNormalObjectId) + return systup_store_beginscan(relation, nkeys, key, false); +#endif + snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, NULL, flags, NULL); } +void +table_endscan(TableScanDesc scan) +{ +#ifdef SERVERLESS + if (systup_store_active() && RelationGetRelid(scan->rs_rd) < FirstNormalObjectId) + { + systup_store_endscan(scan); + return; + } +#endif + + scan->rs_rd->rd_tableam->scan_end(scan); +} + void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) { diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index ec25745619e..c6c1d07f4f3 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2057,6 +2057,11 @@ RecordTransactionAbort(bool isSubXact) else xid = GetCurrentTransactionIdIfAny(); +#ifdef SERVERLESS + if (GpIdentity.segindex >= 0) + return xid; +#endif + /* * If we haven't been assigned an XID, nobody will care whether we aborted * or not. Hence, we're done in that case. It does not matter if we have diff --git a/src/backend/cdb/cdbtranscat.c b/src/backend/cdb/cdbtranscat.c index 7b8640b65e6..67a5ee4817e 100644 --- a/src/backend/cdb/cdbtranscat.c +++ b/src/backend/cdb/cdbtranscat.c @@ -161,6 +161,7 @@ bool PlFuncStored(Oid funcid) systup_store_beginscan_hook_type systup_store_beginscan_hook = NULL; systup_store_endscan_hook_type systup_store_endscan_hook = NULL; systup_store_getnextslot_hook_type systup_store_getnextslot_hook = NULL; +systup_store_getnext_hook_type systup_store_getnext_hook = NULL; systup_store_active_hook_type systup_store_active_hook = NULL; systup_store_sorted_active_hook_type systup_store_sorted_active_hook = NULL; @@ -184,6 +185,14 @@ bool systup_store_getnextslot(TableScanDesc sscan, TupleTableSlot *slot) else return false; } +HeapTuple systup_store_getnext(TableScanDesc sscan) +{ + if (systup_store_getnext_hook) + return (*systup_store_getnext_hook) (sscan); + else + return NULL; +} + bool systup_store_active(void) { if (systup_store_active_hook) diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index a6f275289fc..3d0b263b29b 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -1179,9 +1179,11 @@ processResults(CdbDispatchResult *dispatchResult) if (sscanf(qnotifies->extra, "%u:%u", &dbid, &seq_oid) != 2) elog(ERROR, "invalid nextval message"); +#ifndef SERVERLESS if (dbid != MyDatabaseId) elog(ERROR, "nextval message database id:%u doesn't match my database id:%u", dbid, MyDatabaseId); +#endif PG_TRY(); { diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index f0ba4636915..e45aa121238 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -26,6 +26,7 @@ #include "libpq-int.h" #include "access/xact.h" #include "cdb/cdbfts.h" +#include "cdb/cdbtranscat.h" #include "cdb/cdbvars.h" #include "postmaster/fts.h" #include "postmaster/ftsprobe.h" @@ -335,6 +336,11 @@ ftsConnect(fts_context *context) } else if (ftsInfo->poll_revents & (POLLOUT | POLLIN)) { + +#ifdef SERVERLESS + ftsInfo->conn->catalog = + CollectStartupCatalog(&ftsInfo->conn->catalog_size); +#endif switch(PQconnectPoll(ftsInfo->conn)) { case PGRES_POLLING_OK: diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 07a7a435641..931fc33e852 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -42,6 +42,7 @@ #include "access/htup_details.h" #include "access/xlog.h" #include "access/xlog_internal.h" +#include "cdb/cdbvars.h" #include "libpq/pqsignal.h" #include "miscadmin.h" #include "pgstat.h" @@ -455,7 +456,11 @@ CheckpointerMain(void) */ if (!do_restartpoint) { - CreateCheckPoint(flags); +#ifdef SERVERLESS + if (GpIdentity.segindex < 0) +#endif + CreateCheckPoint(flags); + ckpt_performed = true; } else diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 3c69817c2e8..eb55de91f95 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -24,6 +24,7 @@ #include "access/xact.h" #include "catalog/pg_subscription.h" #include "catalog/pg_subscription_rel.h" +#include "cdb/cdbvars.h" #include "funcapi.h" #include "libpq/pqsignal.h" #include "miscadmin.h" diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 410c78c7de4..095f839cf4d 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1448,6 +1448,11 @@ RelationInitPhysicalAddr(Relation relation) { Oid oldnode = relation->rd_node.relNode; +#ifdef SERVERLESS + if (GpIdentity.segindex >= 0) + return; +#endif + /* these relations kinds never have storage */ if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) return; diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 0ba71600d45..ed2633c653f 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -49,6 +49,7 @@ #include "catalog/catalog.h" #include "catalog/pg_tablespace.h" #include "catalog/storage.h" +#include "cdb/cdbvars.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/fd.h" @@ -714,6 +715,11 @@ load_relmap_file(bool shared, bool lock_held) int fd; int r; +#ifdef SERVERLESS + if (GpIdentity.segindex >= 0) + return; +#endif + if (shared) { snprintf(mapfilename, sizeof(mapfilename), "global/%s", diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index b606323bd43..50862dd98a7 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1800,6 +1800,11 @@ expand_shared_preload_libraries_string() { for (int i = 0; i < shared_preload_libraries_num; i++) { +#ifdef SERVERLESS + if (strcmp(process_shared_preload_libraries_array[i], "unionstore") == 0 && + GpIdentity.segindex >= 0) + continue; +#endif elemlist = lappend(elemlist, pstrdup((char*)process_shared_preload_libraries_array[i])); } } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 380673dcb06..47d7eff34ba 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -679,11 +679,17 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, elog(DEBUG3, "InitPostgres"); #ifdef SERVERLESS - if (StartUpCatalogData && !IS_QUERY_DISPATCHER()) + if (!IS_QUERY_DISPATCHER()) { - SystemTupleStoreReset(); - SystemTupleStoreInit(StartUpCatalogData, StartUpCatalogLen); + elog(LOG, "Startup catalog %d procid: %d", StartUpCatalogLen, MyProcPid); + + if (StartUpCatalogData) + { + SystemTupleStoreReset(); + SystemTupleStoreInit(StartUpCatalogData, StartUpCatalogLen); + } } + #endif /* diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 63d63175cb0..6ae0c27d7cb 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -1198,11 +1198,7 @@ table_beginscan_analyze(Relation rel, AnalyzeContext *ctx) /* * End relation scan. */ -static inline void -table_endscan(TableScanDesc scan) -{ - scan->rs_rd->rd_tableam->scan_end(scan); -} +extern void table_endscan(TableScanDesc scan); /* * Restart a relation scan. diff --git a/src/include/cdb/cdbtranscat.h b/src/include/cdb/cdbtranscat.h index 1eea0bcc139..ecf0360d9a2 100644 --- a/src/include/cdb/cdbtranscat.h +++ b/src/include/cdb/cdbtranscat.h @@ -79,6 +79,7 @@ extern TableScanDesc systup_store_beginscan(Relation relation, int nkeys, ScanKe uint32 flags); extern void systup_store_endscan(TableScanDesc sscan); extern bool systup_store_getnextslot(TableScanDesc sscan, TupleTableSlot *slot); +extern HeapTuple systup_store_getnext(TableScanDesc sscan); extern bool systup_store_active(void); extern bool systup_store_sorted_active(void); @@ -91,6 +92,9 @@ extern PGDLLIMPORT systup_store_endscan_hook_type systup_store_endscan_hook; typedef bool (*systup_store_getnextslot_hook_type) (TableScanDesc sscan, TupleTableSlot *slot); extern PGDLLIMPORT systup_store_getnextslot_hook_type systup_store_getnextslot_hook; +typedef HeapTuple (*systup_store_getnext_hook_type) (TableScanDesc sscan); +extern PGDLLIMPORT systup_store_getnext_hook_type systup_store_getnext_hook; + typedef bool (*systup_store_active_hook_type) (void); extern PGDLLIMPORT systup_store_active_hook_type systup_store_active_hook; typedef bool (*systup_store_sorted_active_hook_type) (void); From bef94ebbf98faf316e1ca7ad18fc467e8fe07bfa Mon Sep 17 00:00:00 2001 From: roseduan Date: Mon, 5 Aug 2024 13:18:06 +0800 Subject: [PATCH 144/152] disable autovacuum temporarily disable autovacuum temporarily because of the flacky regression test 'vacuum.sql' --- src/backend/commands/vacuum.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 532634bc598..fb2c95f00e2 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2413,6 +2413,14 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, lmode = (params->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock; +#ifdef SERVERLESS + /* + * Force full vacuum for hashdata table, because there are a flaky test + * in vacuum.sql. Remove this code after the vacuum bug is fixed. + */ + lmode = AccessExclusiveLock; +#endif + /* open the relation and get the appropriate lock on it */ rel = vacuum_open_relation(relid, relation, params->options, params->log_min_duration >= 0, lmode); From eaeb741ede11b7a9eb20f0402176f77ea12a077a Mon Sep 17 00:00:00 2001 From: leo Date: Mon, 5 Aug 2024 11:10:09 +0800 Subject: [PATCH 145/152] Add MACRO 'FAULT_INJECTOR'/'SERVERLESS' to related code --- gpcontrib/gp_inject_fault/gp_inject_fault.c | 16 +++++++++++++++ src/backend/fts/ftsmessagehandler.c | 22 ++++++++++++++++++++- src/backend/tcop/postgres.c | 2 ++ src/backend/utils/cache/relcache.c | 2 ++ src/include/utils/faultinjector.h | 2 ++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/gpcontrib/gp_inject_fault/gp_inject_fault.c b/gpcontrib/gp_inject_fault/gp_inject_fault.c index bdbd7dd685a..0c387c3f2eb 100644 --- a/gpcontrib/gp_inject_fault/gp_inject_fault.c +++ b/gpcontrib/gp_inject_fault/gp_inject_fault.c @@ -29,7 +29,9 @@ PG_MODULE_MAGIC; +#ifdef FAULT_INJECTOR static const char *const faultInjectModuleName = "$libdir/gp_inject_fault"; +#endif extern Datum gp_inject_fault(PG_FUNCTION_ARGS); extern Datum insert_noop_xlog_record(PG_FUNCTION_ARGS); @@ -51,6 +53,8 @@ fts_with_panic_warning(FaultInjectorEntry_s faultEntry) errmsg("consider disabling FTS probes while injecting a panic."), errhint("Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing."))); } + +#ifdef FAULT_INJECTOR /* * Intercept log messages. * Define a method here to override default notice handling routines. @@ -99,6 +103,8 @@ print_log_handler(void *arg, const PGresult *pgresult) ThrowErrorData(edata); } +#endif + /* * Register warning when extension is loaded. * @@ -106,13 +112,16 @@ print_log_handler(void *arg, const PGresult *pgresult) void _PG_init(void) { +#ifdef FAULT_INJECTOR InjectFaultInit(); +#endif MemoryContext oldContext = MemoryContextSwitchTo(TopMemoryContext); register_fault_injection_warning(fts_with_panic_warning); MemoryContextSwitchTo(oldContext); } +#ifdef FAULT_INJECTOR static void get_segment_configuration(int dbid, char **hostname, int *port, int *content) { @@ -174,11 +183,13 @@ get_segment_configuration(int dbid, char **hostname, int *port, int *content) table_close(configrel, NoLock); #endif } +#endif PG_FUNCTION_INFO_V1(gp_inject_fault); Datum gp_inject_fault(PG_FUNCTION_ARGS) { +#ifdef FAULT_INJECTOR char *faultName = TextDatumGetCString(PG_GETARG_DATUM(0)); char *type = TextDatumGetCString(PG_GETARG_DATUM(1)); char *ddlStatement = TextDatumGetCString(PG_GETARG_DATUM(2)); @@ -288,6 +299,11 @@ gp_inject_fault(PG_FUNCTION_ARGS) elog(ERROR, "%s", response); } PG_RETURN_TEXT_P(cstring_to_text(response)); +#else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("fault injection disable, enable with flags --enable-faultinjector "))); +#endif } PG_FUNCTION_INFO_V1(insert_noop_xlog_record); diff --git a/src/backend/fts/ftsmessagehandler.c b/src/backend/fts/ftsmessagehandler.c index 5012c5ffdee..78db09a0dd3 100644 --- a/src/backend/fts/ftsmessagehandler.c +++ b/src/backend/fts/ftsmessagehandler.c @@ -315,6 +315,16 @@ HandleFtsWalRepSyncRepOff(void) false, /* RequestRetry */ }; +#ifdef FAULT_INJECTOR + if (FaultInjector_InjectFaultIfSet("fts_probe", + DDLNotSpecified, + "" /* databaseName */, + "" /* tableName */) == FaultInjectorTypeSkip) + { + SendFtsResponse(&response, FTS_MSG_SYNCREP_OFF); + } +#endif + ereport(LOG, (errmsg("turning off synchronous wal replication due to FTS request"))); UnsetSyncStandbysDefined(); @@ -387,6 +397,16 @@ HandleFtsWalRepPromote(void) ereport(LOG, (errmsg("promoting mirror to primary due to FTS request"))); +#ifdef FAULT_INJECTOR + if (FaultInjector_InjectFaultIfSet("fts_probe", + DDLNotSpecified, + "" /* databaseName */, + "" /* tableName */) == FaultInjectorTypeSkip) + { + goto skip_promote; + } +#endif + #ifndef USE_INTERNAL_FTS if (IS_QUERY_DISPATCHER()) { bool succ; @@ -439,7 +459,7 @@ HandleFtsWalRepPromote(void) " DBState = %d, RedoPtr = %X/%X", state, (uint32) (redo >> 32), (uint32) redo); } -#ifndef USE_INTERNAL_FTS +#if defined(FAULT_INJECTOR) || !defined(USE_INTERNAL_FTS) skip_promote: #endif SendFtsResponse(&response, FTS_MSG_PROMOTE); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index aa0954c64e9..07853a9492b 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5722,8 +5722,10 @@ PostgresMain(int argc, char *argv[], } else if (am_ftshandler) HandleFtsMessage(query_string); +#ifdef FAULT_INJECTOR else if (am_faulthandler) HandleFaultMessage(query_string); +#endif else if (exec_simple_query_hook) exec_simple_query_hook(query_string); else diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 095f839cf4d..f58f855bda6 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -6841,6 +6841,7 @@ write_relcache_init_file(bool shared) fp = AllocateFile(tempfilename, PG_BINARY_W); if (fp == NULL) { +#ifndef SERVERLESS /* * We used to consider this a fatal error, but we might as well * continue with backend startup ... @@ -6850,6 +6851,7 @@ write_relcache_init_file(bool shared) errmsg("could not create relation-cache initialization file \"%s\": %m", tempfilename), errdetail("Continuing anyway, but there's something wrong."))); +#endif /* SERVERLESS */ return; } diff --git a/src/include/utils/faultinjector.h b/src/include/utils/faultinjector.h index e09d2b77c2a..0404290f484 100644 --- a/src/include/utils/faultinjector.h +++ b/src/include/utils/faultinjector.h @@ -81,6 +81,7 @@ typedef struct FaultInjectorEntry_s { } FaultInjectorEntry_s; +#ifdef FAULT_INJECTOR extern void InjectFaultInit(void); extern Size FaultInjector_ShmemSize(void); @@ -118,6 +119,7 @@ extern char *InjectFault( char *tableName, int startOccurrence, int endOccurrence, int extraArg, int gpSessionid); extern void HandleFaultMessage(const char* msg); +#endif typedef void (*fault_injection_warning_function)(FaultInjectorEntry_s faultEntry); void register_fault_injection_warning(fault_injection_warning_function warning); From 3afd6ce37147e3260806a2eef142e6f8f7eb1d66 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 6 Aug 2024 17:16:08 +0800 Subject: [PATCH 146/152] Support COUNT with const value in Append AGG plan. Support count(n) where n is a const value, user's SQL has something like that. select count(1) has no difference with selelct count(*). Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/cdb/cdbgroupingpaths.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index e4cba1c2d0c..d3882abce05 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -2870,7 +2870,17 @@ try_append_agg(Query *parse) TargetEntry *tle = (TargetEntry *) linitial(aggref->args); if (!IsA(tle->expr, Var)) + { + /* + * Allow count() has const, ex: count(1) + */ + if ((strcmp(aggname, "count") == 0) && + (IsA(tle->expr, Const)) && + (!castNode(Const, tle->expr)->constisnull)) + return true; + return false; + } } else return false; From 45d60bad1b421319142cf37a01a92cee55ed9151 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 6 Aug 2024 17:46:33 +0800 Subject: [PATCH 147/152] Force CTAS distribution to random in serverless mode. For CRATE..AS, taregt table's distribution policy could be derived from the Query of AS part. create materialized view cloud_ctas_mv as select a, count(b) from cloud_ctas_t0 group by a with no data; The locus of cloud_ctas_mv may be Hashed by cloumn a as it's a agg with group by. It's ok for CBDB, but in serverless mode, the underlying data is random, we can't store a distribution policy with distkeys or numsegments for that.Else, will get error when we switch to a cluster with more or less segments. We have done something in extensions, but it didn't take effect due to the architecture and process of utility hooks. This is the last resort in core codes. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/commands/createas.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index b973085e06f..74970c40039 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -489,6 +489,18 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, */ plan->intoClause = copyObject(stmt->into); +#ifdef SERVERLESS + /* + * Though it could be Hash distibuted due to the AS Query, + * we must treat it as randomly in serverless mode. + */ + if (GpPolicyIsPartitioned(plan->intoPolicy) && + (plan->intoPolicy->nattrs > 0 || + plan->intoPolicy->numsegments > 0)) + { + plan->intoPolicy = createRandomPartitionedPolicy(0); + } +#endif /* * Use a snapshot with an updated command ID to ensure this query sees * results of any previously executed queries. (This could only From 88c92acaa391064abc2d5c88bdae47e80fe3d122 Mon Sep 17 00:00:00 2001 From: lizhaohan Date: Mon, 19 Aug 2024 15:29:55 +0800 Subject: [PATCH 148/152] Forbid connecting to QE in utility mode This is part of commit "[CLOUD] Enable start QD in utility" but changes the CBDB. --- src/backend/cdb/cdbvars.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 52291f53de8..529a8b81df0 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -436,6 +436,10 @@ check_gp_role(char **newval, void **extra, GucSource source) return true; else if (Gp_role == GP_ROLE_UNDEFINED) return (newrole != GP_ROLE_UNDEFINED); +#ifdef SERVERLESS + else if (Gp_role == GP_ROLE_EXECUTE && newrole == GP_ROLE_UTILITY) + elog(ERROR, "connecting to qe in utility mode is forbidden in Cloud"); +#endif else /* can only downgrade to utility. */ return (newrole == GP_ROLE_UTILITY); } From ecbb91ae133dbef9bde8d187837f051ae9db6321 Mon Sep 17 00:00:00 2001 From: zhangwenchao <656540940@qq.com> Date: Thu, 8 Aug 2024 11:27:59 +0800 Subject: [PATCH 149/152] Make column case-sensitivity only for output column. Co-authored-by: Wei Shaolun weishaolun@hashdata.cn --- src/backend/access/common/printtup.c | 5 +++- src/backend/access/common/tupdesc.c | 1 + src/backend/executor/execTuples.c | 3 ++ src/backend/nodes/copyfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 15 ++++++++++ src/backend/parser/gram.y | 41 ++++++++++++++++++++++++---- src/backend/parser/parse_target.c | 12 ++++---- src/backend/parser/scan.l | 24 ++++++++++++++++ src/backend/utils/misc/guc_gp.c | 11 ++++++++ src/include/access/tupdesc.h | 1 + src/include/nodes/parsenodes.h | 2 ++ src/include/nodes/primnodes.h | 1 + src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + 14 files changed, 106 insertions(+), 13 deletions(-) diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index a74f495f1f5..ec195508e7f 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -231,7 +231,10 @@ SendRowDescriptionMessage(StringInfo buf, TupleDesc typeinfo, else format = 0; - pq_writestring(buf, NameStr(att->attname)); + if (typeinfo->orignames[i]) + pq_writestring(buf, typeinfo->orignames[i]); + else + pq_writestring(buf, NameStr(att->attname)); pq_writeint32(buf, resorigtbl); pq_writeint16(buf, resorigcol); pq_writeint32(buf, atttypid); diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 1fff6709d57..6d8d6054f7e 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -70,6 +70,7 @@ CreateTemplateTupleDesc(int natts) * Initialize other fields of the tupdesc. */ desc->natts = natts; + desc->orignames = palloc0(natts * sizeof(char *)); desc->constr = NULL; desc->tdtypeid = RECORDOID; desc->tdtypmod = -1; diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 2c2106af9a7..bcb5623b4f6 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -1998,6 +1998,9 @@ ExecTypeFromTLInternal(List *targetList, bool skipjunk) TupleDescInitEntryCollation(typeInfo, cur_resno, exprCollation((Node *) tle->expr)); + + if (tle->origname) + typeInfo->orignames[cur_resno - 1] = tle->origname; cur_resno++; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 962fcb4eded..453b068be0f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2778,6 +2778,7 @@ _copyTargetEntry(const TargetEntry *from) COPY_NODE_FIELD(expr); COPY_SCALAR_FIELD(resno); COPY_STRING_FIELD(resname); + COPY_STRING_FIELD(origname); COPY_SCALAR_FIELD(ressortgroupref); COPY_SCALAR_FIELD(resorigtbl); COPY_SCALAR_FIELD(resorigcol); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9315da4199a..f500ef59650 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -342,6 +342,21 @@ planner(Query *parse, const char *query_string, int cursorOptions, else result = standard_planner(parse, query_string, cursorOptions, boundParams); + if (output_col_case_sensitive && result && parse->commandType == CMD_SELECT) + { + ListCell *lp; + ListCell *lr; + + forboth(lp, result->planTree->targetlist, lr, parse->targetList) + { + TargetEntry *lte = (TargetEntry *) lfirst(lp); + TargetEntry *rte = (TargetEntry *) lfirst(lr); + Assert(lte->resno == rte->resno); + if (rte->origname) + lte->origname = rte->origname; + } + } + return result; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c5cc77d9205..a9d2df99442 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -222,6 +222,10 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); static Node *makeIsNotDistinctFromNode(Node *expr, int position); static bool isSetWithReorganize(List **options); + +extern char *orig_str_val; +bool collabel_is_ident = false; + static char *greenplumLegacyAOoptions(const char *accessMethod, List **options); static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_t yyscanner); @@ -1574,6 +1578,21 @@ stmt: | RuleStmt | SecLabelStmt | SelectStmt + { + ListCell *lc; + SelectStmt *n = (SelectStmt *) $1; + + while(n->op) + n = n->larg; + + foreach(lc, n->targetList) + { + ResTarget *t = (ResTarget *) lfirst(lc); + if (t->orig_name) + t->use_orig_name = true; + } + $$ = $1; + } | TransactionStmt | TruncateStmt | UnlistenStmt @@ -19420,6 +19439,10 @@ target_list: target_el: a_expr AS ColLabel { $$ = makeNode(ResTarget); + if (collabel_is_ident && orig_str_val != NULL) + { + $$->orig_name = pstrdup(orig_str_val); + } $$->name = $3; $$->indirection = NIL; $$->val = (Node *)$1; @@ -19442,6 +19465,10 @@ target_el: a_expr AS ColLabel | a_expr BareColLabel { $$ = makeNode(ResTarget); + if (collabel_is_ident && orig_str_val != NULL) + { + $$->orig_name = pstrdup(orig_str_val); + } $$->name = $2; $$->indirection = NIL; $$->val = (Node *)$1; @@ -19450,6 +19477,10 @@ target_el: a_expr AS ColLabel | a_expr { $$ = makeNode(ResTarget); + if (IsA($1, ColumnRef) && collabel_is_ident && orig_str_val != NULL) + { + $$->orig_name = pstrdup(orig_str_val); + } $$->name = NULL; $$->indirection = NIL; $$->val = (Node *)$1; @@ -19849,9 +19880,9 @@ plassign_equals: COLON_EQUALS /* Column identifier --- names that can be column, table, etc names. */ -ColId: IDENT { $$ = $1; } - | unreserved_keyword { $$ = pstrdup($1); } - | col_name_keyword { $$ = pstrdup($1); } +ColId: IDENT { collabel_is_ident = true; $$ = $1; } + | unreserved_keyword { collabel_is_ident = false; $$ = pstrdup($1); } + | col_name_keyword { collabel_is_ident = false; $$ = pstrdup($1); } ; /* Type/function identifier --- names that can be type or function names. @@ -20285,8 +20316,8 @@ PartitionColId: PartitionIdentKeyword { $$ = pstrdup($1); } /* Bare column label --- names that can be column labels without writing "AS". * This classification is orthogonal to the other keyword categories. */ -BareColLabel: IDENT { $$ = $1; } - | bare_label_keyword { $$ = pstrdup($1); } +BareColLabel: IDENT { collabel_is_ident = true; $$ = $1; } + | bare_label_keyword { collabel_is_ident = false; $$ = pstrdup($1); } ; diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 039952aa8ae..6f312624b3a 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -126,6 +126,7 @@ transformTargetList(ParseState *pstate, List *targetlist, List *p_target = NIL; bool expand_star; ListCell *o_target; + TargetEntry *target; /* Shouldn't have any leftover multiassign items at start */ Assert(pstate->p_multiassign_exprs == NIL); @@ -179,13 +180,10 @@ transformTargetList(ParseState *pstate, List *targetlist, * Not "something.*", or we want to treat that as a plain whole-row * variable, so transform as a single expression */ - p_target = lappend(p_target, - transformTargetEntry(pstate, - res->val, - NULL, - exprKind, - res->name, - false)); + target = transformTargetEntry(pstate, res->val, NULL, exprKind, res->name, false); + if (res->use_orig_name) + target->origname = res->orig_name; + p_target = lappend(p_target, target); } /* diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 9f9d8a17061..9b56d7c2eeb 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -40,6 +40,7 @@ #include "parser/parser.h" /* only needed for GUC variables */ #include "parser/scansup.h" #include "mb/pg_wchar.h" +#include "utils/guc.h" } %{ @@ -65,6 +66,25 @@ fprintf_to_ereport(const char *fmt, const char *msg) int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; bool escape_string_warning = true; bool standard_conforming_strings = true; +char *orig_str_val = NULL; + +static void +set_orig_str_val(const char *val, int len) +{ + if (orig_str_val != NULL) + { + pfree(orig_str_val); + orig_str_val = NULL; + } + + if (output_col_case_sensitive && val != NULL) + { + orig_str_val = MemoryContextAllocZero(TopMemoryContext, len + 1); + memcpy(orig_str_val, val, len); + if (len >= NAMEDATALEN) + truncate_identifier(orig_str_val, len, false); + } +} /* * Constant data exported from this file. This array maps from the @@ -513,6 +533,7 @@ other . { /* If NCHAR isn't a keyword, just return "n" */ yylval->str = pstrdup("n"); + set_orig_str_val(NULL, 0); return IDENT; } } @@ -781,6 +802,7 @@ other . if (yyextra->literallen >= NAMEDATALEN) truncate_identifier(ident, yyextra->literallen, true); yylval->str = ident; + set_orig_str_val(NULL, 0); return IDENT; } {dquote} { @@ -806,6 +828,7 @@ other . /* throw back all but the initial u/U */ yyless(1); /* and treat it as {identifier} */ + set_orig_str_val(yytext, yyleng); ident = downcase_truncate_identifier(yytext, yyleng, true); yylval->str = ident; return IDENT; @@ -1029,6 +1052,7 @@ other . * No. Convert the identifier to lower case, and truncate * if necessary. */ + set_orig_str_val(yytext, yyleng); ident = downcase_truncate_identifier(yytext, yyleng, true); yylval->str = ident; return IDENT; diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index c29b87350e3..5e80e80048a 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -213,6 +213,7 @@ int Debug_dtm_action_nestinglevel = DEBUG_DTM_ACTION_NESTINGLEVEL_DEFAULT; int gp_connection_send_timeout; bool create_restartpoint_on_ckpt_record_replay = false; +bool output_col_case_sensitive = false; /* * This variable is a dummy that doesn't do anything, except in some @@ -3290,6 +3291,16 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"output_col_case_sensitive", PGC_USERSET, CUSTOM_OPTIONS, + gettext_noop("Set 'as' column label case sensitive"), + NULL + }, + &output_col_case_sensitive, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h index dd120973e30..2b8d2c606de 100644 --- a/src/include/access/tupdesc.h +++ b/src/include/access/tupdesc.h @@ -83,6 +83,7 @@ typedef struct TupleDescData int32 tdtypmod; /* typmod for tuple type */ int tdrefcount; /* reference count, or -1 if not counting */ TupleConstr *constr; /* constraints, or NULL if none */ + char **orignames; /* attrs[N] is the description of Attribute Number N+1 */ FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER]; } TupleDescData; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index dd7f5bd78e1..1816d957905 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -498,6 +498,8 @@ typedef struct ResTarget { NodeTag type; char *name; /* column name or NULL */ + char *orig_name; /* origin column name (case sensitive) or NULL */ + bool use_orig_name; /* output origin column name to user */ List *indirection; /* subscripts, field names, and '*', or NIL */ Node *val; /* the value expression to compute or assign */ int location; /* token location, or -1 if unknown */ diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 95e4d5bc2c5..c7fe210a458 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1610,6 +1610,7 @@ typedef struct TargetEntry Expr *expr; /* expression to evaluate */ AttrNumber resno; /* attribute number (see notes above) */ char *resname; /* name of the column (could be NULL) */ + char *origname; /* original name of the column */ Index ressortgroupref; /* nonzero if referenced by a sort/group * clause */ Oid resorigtbl; /* OID of column's source table */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 1d565822e87..f6e47b4778d 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -644,6 +644,7 @@ extern int gp_predicate_pushdown_sample_rows; extern bool gp_log_endpoints; extern bool gp_allow_date_field_width_5digits; +extern bool output_col_case_sensitive; /* * Try to push the hash table of hash join node down to the scan node as diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 5a90c1fd2af..af78e486f7e 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -492,6 +492,7 @@ "optimizer_use_external_constant_expression_evaluation_for_ints", "optimizer_use_gpdb_allocators", "optimizer_xform_bind_threshold", + "output_col_case_sensitive", "parallel_leader_participation", "parallel_setup_cost", "parallel_tuple_cost", From 779bb8c6f8c964d70a146023cbd4b6ecee7b32ef Mon Sep 17 00:00:00 2001 From: yangjianghua Date: Mon, 12 Aug 2024 09:36:47 +0800 Subject: [PATCH 150/152] Clean pg_cron guc optimizer,enable_answer_query_using_materialized_views. * offload two Guc to ivm modules. * Clean up task dependencies. --- src/backend/commands/taskcmds.c | 2 ++ src/backend/task/pg_cron.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/taskcmds.c b/src/backend/commands/taskcmds.c index 0e4ac2e22fe..2b51d4c37a4 100644 --- a/src/backend/commands/taskcmds.c +++ b/src/backend/commands/taskcmds.c @@ -28,6 +28,7 @@ #include "postgres.h" +#include "access/xact.h" #include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/pg_task.h" @@ -334,6 +335,7 @@ DropTask(ParseState *pstate, DropTaskStmt * stmt) ObjectAddressSet(address, TaskRelationId, jobid); /* Clean up dependencies */ deleteSharedDependencyRecordsFor(TaskRelationId, jobid, 0); + deleteDependencyRecordsFor(TaskRelationId, jobid, false); } return address; diff --git a/src/backend/task/pg_cron.c b/src/backend/task/pg_cron.c index 9f0c5425370..affb678bba0 100644 --- a/src/backend/task/pg_cron.c +++ b/src/backend/task/pg_cron.c @@ -1095,9 +1095,8 @@ ManageCronTask(CronTask *task, TimestampTz currentTime) }; sprintf(nodePortString, "%d", cronJob->nodePort); if (cronJob->warehouse) - appendStringInfo(&options_buf, "-c optimizer=off" - " -c enable_answer_query_using_materialized_views=off" - " -c gp_command_count=%d -c warehouse=%s", + appendStringInfo(&options_buf, + "-c gp_command_count=%d -c warehouse=%s", gp_command_count, cronJob->warehouse); Assert(sizeof(keywordArray) == sizeof(valueArray)); From cdc2ae205cfb7194bc8fbd2492855d210014c01e Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Sat, 10 Aug 2024 09:50:34 +0800 Subject: [PATCH 151/152] Eliminate const expressions for Append AGG. Const expressions like where 1 = 1 and a > 1 will be processed to where a > 1 by planner. Quals like: 1 = 1 is always TRUE, for a AND expression that's useless. But as we store MV's view query as it was originally, the parse tree processed by planner may not match MV's exactly. Process that quals during Append AGG to fix. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/cdb/cdbgroupingpaths.c | 31 ++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index d3882abce05..5db80dc4fc2 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -63,6 +63,7 @@ #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/planner.h" #include "optimizer/prep.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" @@ -244,7 +245,7 @@ static bool expand_append_agg(PlannerInfo *root, cdb_agg_planning_context *ctx); static Relation -simple_view_matching(Query *parse); +simple_view_matching(PlannerInfo *root, Query *parse); static void expand_append_agg_guts(PlannerInfo *root, cdb_agg_planning_context *ctx, Relation matviewRel); @@ -2954,7 +2955,7 @@ expand_append_agg(PlannerInfo *root, cdb_agg_planning_context *ctx) return false; /* Find a matched view for input query. */ - matviewRel = simple_view_matching(parse); + matviewRel = simple_view_matching(root, parse); if (matviewRel == NULL) return false; @@ -2979,7 +2980,7 @@ expand_append_agg(PlannerInfo *root, cdb_agg_planning_context *ctx) * A lock will be held if we find a matched view, the caller should handle that. */ static Relation -simple_view_matching(Query *parse) +simple_view_matching(PlannerInfo *root, Query *parse) { Query *viewQuery; /* Query of view. */ Relation matviewRel = NULL; /* Matched view relation. */ @@ -3071,8 +3072,27 @@ simple_view_matching(Query *parse) */ mvrte->checkAsUser = InvalidOid; - /* To make equal parse tree, need root to assign aggno in precess_aggrefs. */ + /* + * Need a subroot to process quals, use it to eval const expressions + * and AND, OR expression simplification. + * ex: + * where 1 = 1 and a > 1 + * will be processed to: + * where TRUE and a > 1 + * and then: + * where a > 1 + * + * and to assign aggno in precess_aggrefs. + * + * We only use subrrot to process view query, it's not used to do real planner, + * so free it after that. + */ subroot = (PlannerInfo *) palloc0(sizeof(PlannerInfo)); + /* + * We have to fill info from root, to avoid crash during processing, + * though they are not same in act. + */ + memcpy(subroot, root, sizeof(PlannerInfo)); subroot->parse = viewQuery; subroot->processed_tlist = viewQuery->targetList; if (viewQuery->hasAggs) @@ -3086,6 +3106,9 @@ simple_view_matching(Query *parse) viewQuery->stmt_location = parse->stmt_location; viewQuery->stmt_len = parse->stmt_len; + preprocess_qual_conditions(subroot, (Node *) viewQuery->jointree); + pfree(subroot); + /* * Before we compare Query, quals need to be preprocessed becuase * A signle qual may be a OpExpr or a list with one element. From 2226561d3d9a7657a942243d6d9b5ae161e4f687 Mon Sep 17 00:00:00 2001 From: WangWeinan Date: Tue, 13 Aug 2024 06:37:49 +0000 Subject: [PATCH 152/152] Build db without cloud extension Support compile and deploy db without cloud extension source code. Add a ci for compilation check as preparation for regression and isolation2 test ci --- src/backend/catalog/heap.c | 2 - src/backend/cdb/cdbutil.c | 23 ++-- src/backend/commands/createas.c | 2 +- src/backend/commands/trigger.c | 7 - src/backend/executor/nodeValuesscan.c | 2 +- src/backend/nodes/makefuncs.c | 18 +++ src/backend/parser/gram.y | 17 ++- src/backend/postmaster/postmaster.c | 6 +- src/backend/storage/ipc/procarray.c | 2 + src/backend/utils/adt/dbsize.c | 12 +- src/backend/utils/cache/relcache.c | 2 + src/backend/utils/init/postinit.c | 5 + src/backend/utils/resgroup/resgroup.c | 123 ++++++++++++++++++ src/include/nodes/makefuncs.h | 2 - src/interfaces/libpq/fe-misc.c | 4 + src/interfaces/libpq/libpq-fe.h | 2 + .../regress/expected/join_hash_optimizer.out | 13 +- src/test/regress/expected/misc_sanity.out | 6 +- src/test/regress/expected/oidjoins.out | 1 + 19 files changed, 212 insertions(+), 37 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a834a9bee50..7bca300251a 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1342,9 +1342,7 @@ InsertPgClassTuple(Relation pg_class_desc, /* relpartbound is set by updating this tuple, if necessary */ nulls[Anum_pg_class_relpartbound - 1] = true; -#ifdef SERVERLESS nulls[Anum_pg_class_relpartspec - 1] = true; -#endif /* SERVERLESS */ tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls); diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 6588875bd33..f875beff270 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -274,22 +274,25 @@ readGpSegConfigFromCatalog(int *total_dbs) Assert(!isNull); warehouseid = DatumGetObjectId(attr); - /* status */ - attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_status, RelationGetDescr(gp_seg_config_rel), &isNull); - Assert(!isNull); - char status = DatumGetChar(attr); - - /* content */ - attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_content, RelationGetDescr(gp_seg_config_rel), &isNull); - Assert(!isNull); - /* * In serverless mode, and if we are not in fts probe process, * we only need the segment that is up and has the same warehouseid. */ #ifdef SERVERLESS + char status; + int16 contentid; + /* content */ + attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_content, RelationGetDescr(gp_seg_config_rel), &isNull); + Assert(!isNull); + contentid = DatumGetInt16(attr); + + /* status */ + attr = heap_getattr(gp_seg_config_tuple, Anum_gp_segment_configuration_status, RelationGetDescr(gp_seg_config_rel), &isNull); + Assert(!isNull); + status = DatumGetChar(attr); + if (!am_ftsprobe) - need_current_segment = (warehouseid == GetCurrentWarehouseId() || DatumGetInt16(attr) == MASTER_CONTENT_ID) && (status == GP_SEGMENT_CONFIGURATION_STATUS_UP); + need_current_segment = (warehouseid == GetCurrentWarehouseId() || contentid == MASTER_CONTENT_ID) && (status == GP_SEGMENT_CONFIGURATION_STATUS_UP); #endif if (need_current_segment) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 74970c40039..f700ee9db76 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -1558,7 +1558,7 @@ check_ivm_restriction_walker(Node *node, check_ivm_restriction_context *context) if (joinexpr->jointype > JOIN_INNER || context->partial) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("The JOIN is not supported on incrementally maintainable materialized view"))); + errmsg("OUTER JOIN is not supported on incrementally maintainable materialized view"))); expression_tree_walker(node, check_ivm_restriction_walker, (void *) context); break; diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7ecac7e2589..d55cc2bd8c3 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -213,13 +213,6 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Triggers for statements are not yet supported"))); } -#else - if (!stmt->row) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Triggers for statements are not yet supported"))); - } #endif /* SERVERLESS */ /* diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c index 03faf359b1b..5988a050687 100644 --- a/src/backend/executor/nodeValuesscan.c +++ b/src/backend/executor/nodeValuesscan.c @@ -303,7 +303,7 @@ ExecInitValuesScan(ValuesScan *node, EState *estate, int eflags) contain_subplans((Node *) exprs)) || IsTransferOn()) #else - contain_subplans((Node *) exprs)) + contain_subplans((Node *) exprs))) #endif { int saved_jit_flags; diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 798c0f95efc..6b11aa56338 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -864,4 +864,22 @@ makeAPListExpr(void) return (Node *)lexpr; } +#else +Node * +makeAPHashExpr(int modulus) +{ + return NULL; +} + +Node * +makeAPRangeExpr(List *lower, List *upper, List *step, bool has_default) +{ + return NULL; +} + +Node * +makeAPListExpr(void) +{ + return NULL; +} #endif /* SERVERLESS */ \ No newline at end of file diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index a9d2df99442..66f44455872 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -50,6 +50,7 @@ #include #include +#include "pg_config.h" #include "access/tableam.h" #include "catalog/index.h" #include "catalog/namespace.h" @@ -6742,8 +6743,10 @@ TabSubPartition: } | TabSubPartitionBy OptAutoPartitionBoundSpec { +#ifdef SERVERLESS PartitionSpec *n = (PartitionSpec *) $1; n->apExpr = (Expr *)$2; +#endif /* SERVERLESS */ $$ = $1; } | TabSubPartitionBy TabSubPartition @@ -9168,14 +9171,18 @@ TriggerForSpec: } | /* EMPTY */ { - /* let creation of triggers go through for pg_restore when upgrading from GP6 to GP7 */ + +#ifdef SERVERLESS + $$ = false; +#else /* SERVERLESS */ + /* let creation of triggers go through for pg_restore when upgrading from GP6 to GP7 */ if (!gp_enable_statement_trigger) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Triggers for statements are not yet supported"))); } - $$ = false; +#endif /* SERVERLESS */ } ; @@ -9188,6 +9195,10 @@ TriggerForType: ROW { $$ = true; } | STATEMENT { + +#ifdef SERVERLESS + $$ = false; +#else /* SERVERLESS */ /* let creation of triggers go through for pg_restore when upgrading from GP6 to GP7 */ if (!gp_enable_statement_trigger) { @@ -9195,7 +9206,7 @@ TriggerForType: (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Triggers for statements are not yet supported"))); } - $$ = false; +#endif /* SERVERLESS */ } ; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 52eff5ec8c1..7982ef8c152 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2105,7 +2105,8 @@ ServerLoop(void) */ if (!IsBinaryUpgrade && AutoVacPID == 0 && (AutoVacuumingActive() || start_autovac_launcher) && - pmState == PM_RUN) + pmState == PM_RUN && + Gp_role != GP_ROLE_UTILITY) { AutoVacPID = StartAutoVacLauncher(); if (AutoVacPID != 0) @@ -3564,7 +3565,8 @@ reaper(SIGNAL_ARGS) * Likewise, start other special children as needed. In a restart * situation, some of them may be alive already. */ - if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0) + if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0 && + Gp_role != GP_ROLE_UTILITY) AutoVacPID = StartAutoVacLauncher(); if (PgArchStartupAllowed() && PgArchPID == 0) PgArchPID = StartArchiver(); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 0926a86fae6..39565e9cb12 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2920,8 +2920,10 @@ GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext) errmsg("out of memory"))); } +#ifdef SERVERLESS if (GetSnapshotData_hook) return (*GetSnapshotData_hook) (snapshot, distributedTransactionContext); +#endif /* SERVERLESS */ /* * GP: Distributed snapshot. diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index b90452d4b69..6a10eaa4d05 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -553,14 +553,22 @@ pg_relation_size(PG_FUNCTION_ARGS) // TODO directory table size = calculate_relation_size(rel, forkNumber); - if (Gp_role == GP_ROLE_DISPATCH && (RelationIsHeap(rel) || RelationIsAppendOptimized(rel))) + if (Gp_role == GP_ROLE_DISPATCH) { - char *sql; +#ifdef SERVERLESS + /* The storage is shared */ + if (!RelationIsHashdata(rel)) + { +#endif /* SERVERLESS */ + char *sql; sql = psprintf("select pg_catalog.pg_relation_size(%u, '%s')", relOid, forkNames[forkNumber]); size += get_size_from_segDBs(sql); +#ifdef SERVERLESS + } +#endif /* SERVERLESS */ } relation_close(rel, AccessShareLock); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f58f855bda6..f2a87b4dedc 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -6797,6 +6797,8 @@ write_relcache_init_file(bool shared) ResQueueCapabilityRelationId }; +#else + int i; #endif /* SERVERLESS */ if (write_relcache_init_file_hook && write_relcache_init_file_hook()) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 47d7eff34ba..27ac2c4309b 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -857,6 +857,11 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, XactIsoLevel = XACT_READ_COMMITTED; (void) GetTransactionSnapshot(); + +#ifdef SERVERLESS + TransferReset(); + SetTransferOn(); +#endif /* SERVERLESS */ } /* diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 6dc5f0c8174..2bc681cbba0 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -265,6 +265,17 @@ static void resgroupDumpCaps(StringInfo str, ResGroupCap *caps); static void resgroupDumpSlots(StringInfo str); static void resgroupDumpFreeSlots(StringInfo str); +static ResGroupData* resgroupCreateOrUpdate(Oid groupId, const ResGroupCaps *caps); +static ResGroupData* resgroupCreate(Oid groupId, const ResGroupCaps *caps); +static void resgroupUpdate(ResGroupData *group, ResGroupCaps oldCaps, + const ResGroupCaps *caps); +static void resgroupUpdateCpuSet(ResGroupData *group, const ResGroupCaps *caps, + const char *oldcpuset); +#ifdef SERVERLESS +static void resgroupSimpleInitCpuSet(ResGroupData *group, const ResGroupCaps *caps); +static ResGroupData *findGroupOrCreate(Oid groupId, const ResGroupCaps *caps); +#endif + static void sessionSetSlot(ResGroupSlotData *slot); static void sessionResetSlot(ResGroupSlotData *slot); static ResGroupSlotData *sessionGetSlot(void); @@ -421,6 +432,99 @@ initCgroup(void) cgroupOpsRoutine->initcgroup(); } +static ResGroupData* +resgroupCreate(Oid groupId, const ResGroupCaps *caps) +{ + ResGroupData *group; + + group = createGroup(groupId, caps); + Assert(group != NULL); + + cgroupOpsRoutine->createcgroup(groupId); + + if (CpusetIsEmpty(caps->cpuset)) + { + cgroupOpsRoutine->setcpulimit(groupId, caps->cpuHardQuotaLimit); + cgroupOpsRoutine->setcpupriority(groupId, caps->cpuSoftPriority); + } + + return group; +} + +#ifdef SERVERLESS +/* + * The efficiency is not high, so the bitmap implementation is still needed + * for backend startup. + */ +static void +resgroupSimpleInitCpuSet(ResGroupData *group, const ResGroupCaps *caps) +{ + resgroupUpdateCpuSet(group, caps, ""); +} +#endif + +static void +resgroupUpdateCpuSet(ResGroupData *group, const ResGroupCaps *caps, const char *oldcpuset) +{ + char defaultCpusetGroup[MaxCpuSetLength]; + + /* update current group cpuset */ + char *cpuset = getCpuSetByRole(caps->cpuset); + cgroupOpsRoutine->setcpuset(group->groupId, cpuset); + + /* reset default group if cpuset has changed */ + cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, + defaultCpusetGroup, + MaxCpuSetLength); + /* Add old value to default group sub new value from default group */ + CpusetUnion(defaultCpusetGroup, oldcpuset, MaxCpuSetLength); + CpusetDifference(defaultCpusetGroup, cpuset, MaxCpuSetLength); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultCpusetGroup); +} + +static void +resgroupUpdate(ResGroupData *group, ResGroupCaps oldCaps, const ResGroupCaps *caps) +{ + group->caps = *caps; + + if (oldCaps.cpuHardQuotaLimit != caps->cpuHardQuotaLimit) + { + cgroupOpsRoutine->setcpulimit(group->groupId, caps->cpuHardQuotaLimit); + } + if (oldCaps.cpuSoftPriority != caps->cpuSoftPriority) + { + cgroupOpsRoutine->setcpupriority(group->groupId, caps->cpuSoftPriority); + } + if (strcmp(oldCaps.cpuset, caps->cpuset) && + gp_resource_group_enable_cgroup_cpuset) + { + char *oldcpuset = getCpuSetByRole(oldCaps.cpuset); + resgroupUpdateCpuSet(group, caps, oldcpuset); + } + if (oldCaps.concurrency != caps->concurrency) + { + wakeupSlots(group, true); + } +} + +static ResGroupData* +resgroupCreateOrUpdate(Oid groupId, const ResGroupCaps *caps) +{ + ResGroupData *group; + + group = groupHashFind(groupId, false); + if (group == NULL) + { + group = resgroupCreate(groupId, caps); + } + else + { + resgroupUpdate(group, group->caps, caps); + } + + return group; +} + /* * Load the resource groups in shared memory. Note this * can only be done after enough setup has been done. This uses @@ -1660,6 +1764,25 @@ UnassignResGroup(void) pgstat_report_resgroup(InvalidOid); } +#ifdef SERVERLESS +static ResGroupData * +findGroupOrCreate(Oid groupId, const ResGroupCaps *caps) +{ + ResGroupData *group; + + group = groupHashFind(groupId, false); + if (group == NULL) + { + group = resgroupCreate(groupId, caps); + if (gp_resource_group_enable_cgroup_cpuset) + resgroupSimpleInitCpuSet(group, caps); + } + + Assert(group != NULL); + return group; +} +#endif + /* * QEs are not assigned/unassigned to a resource group on segments for each * transaction, instead, they switch resource group when a new resource group diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index ecf77a2e4ad..6e6eadfa940 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -108,10 +108,8 @@ extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_ extern ReindexIndexInfo *makeReindexIndexInfo(Oid indexId, Oid tableId, Oid amId, bool safe); -#ifdef SERVERLESS extern Node *makeAPHashExpr(int modulus); extern Node *makeAPRangeExpr(List *lower, List *upper, List *step, bool has_default); extern Node *makeAPListExpr(void); -#endif /* SERVERLESS */ #endif /* MAKEFUNC_H */ diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 9c6e9916d2f..6e56baad4b5 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -64,9 +64,11 @@ static int pqSocketCheck(PGconn *conn, int forRead, int forWrite, time_t end_time); static int pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time); +#ifdef SERVERLESS #ifndef FRONTEND static int32 last_assigned_exec_status_type = PGRES_LAST_DEFAULT; #endif +#endif /* SERVERLESS */ /* * PQlibVersion: return the libpq version number @@ -1395,6 +1397,7 @@ libpq_ngettext(const char *msgid, const char *msgid_plural, unsigned long n) #endif /* ENABLE_NLS */ +#ifdef SERVERLESS #ifndef FRONTEND /* * When we need to add a new exec status in extension, we should @@ -1412,3 +1415,4 @@ add_exec_status_type(void) return (ExecStatusType) last_assigned_exec_status_type; } #endif +#endif /* SERVERLESS */ \ No newline at end of file diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index 6a984aa31b0..f69b96b7e0c 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -682,9 +682,11 @@ extern PQsslKeyPassHook_OpenSSL_type PQgetSSLKeyPassHook_OpenSSL(void); extern void PQsetSSLKeyPassHook_OpenSSL(PQsslKeyPassHook_OpenSSL_type hook); extern int PQdefaultSSLKeyPassHook_OpenSSL(char *buf, int size, PGconn *conn); +#ifdef SERVERLESS #ifndef FRONTEND extern ExecStatusType add_exec_status_type(void); #endif +#endif /* SERVERLESS */ #ifdef __cplusplus } #endif diff --git a/src/test/regress/expected/join_hash_optimizer.out b/src/test/regress/expected/join_hash_optimizer.out index 053d0ef4898..b59879e4184 100644 --- a/src/test/regress/expected/join_hash_optimizer.out +++ b/src/test/regress/expected/join_hash_optimizer.out @@ -998,8 +998,8 @@ insert into wide select generate_series(3, 100) as id, rpad('', 320000, 'x') as explain (costs off) select length(max(s.t)) from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); - QUERY PLAN ------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------ Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -1009,11 +1009,12 @@ explain (costs off) Hash Key: wide.id -> Seq Scan on wide -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: wide_1.id - -> Seq Scan on wide wide_1 + -> Result + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: wide_1.id + -> Seq Scan on wide wide_1 Optimizer: Pivotal Optimizer (GPORCA) -(13 rows) +(14 rows) select length(max(s.t)) from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); diff --git a/src/test/regress/expected/misc_sanity.out b/src/test/regress/expected/misc_sanity.out index f11337e709d..a9729b2ad58 100644 --- a/src/test/regress/expected/misc_sanity.out +++ b/src/test/regress/expected/misc_sanity.out @@ -106,7 +106,9 @@ ORDER BY 1, 2; gp_configuration_history | desc | text gp_version_at_initdb | productversion | text gp_warehouse | status | text + gp_warehouse | warehouse_acl | aclitem[] gp_warehouse | warehouse_name | text + gp_warehouse | whoptions | text[] main_manifest | path | text pg_attribute | attacl | aclitem[] pg_attribute | attfdwoptions | text[] @@ -116,6 +118,7 @@ ORDER BY 1, 2; pg_class | reloptions | text[] pg_class | relpartbound | pg_node_tree pg_foreign_table_seg | ftsoptions | text[] + pg_class | relpartspec | pg_node_tree pg_index | indexprs | pg_node_tree pg_index | indpred | pg_node_tree pg_largeobject | data | bytea @@ -134,12 +137,13 @@ ORDER BY 1, 2; pg_task | nodename | text pg_task | schedule | text pg_task | username | text + pg_task | warehouse | text pg_task_run_history | command | text pg_task_run_history | database | text pg_task_run_history | return_message | text pg_task_run_history | status | text pg_task_run_history | username | text -(36 rows) +(40 rows) -- system catalogs without primary keys -- diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index f739326012d..10cb443616e 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -145,6 +145,7 @@ NOTICE: checking pg_aggregate {aggfinalfn} => pg_proc {oid} NOTICE: checking gp_storage_server {srvowner} => pg_authid {oid} NOTICE: checking gp_storage_user_mapping {umserver} => gp_storage_server {oid} NOTICE: checking gp_storage_user_mapping {umuser} => pg_authid {oid} +NOTICE: checking gp_warehouse {owner} => pg_authid {oid} NOTICE: checking pg_aggregate {aggcombinefn} => pg_proc {oid} NOTICE: checking pg_aggregate {aggserialfn} => pg_proc {oid} NOTICE: checking pg_aggregate {aggdeserialfn} => pg_proc {oid}