diff --git a/.gitmodules b/.gitmodules
index 038b1484190..1dc2cbf7153 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,16 @@
[submodule "gpcontrib/gpcloud/test/googletest"]
path = gpcontrib/gpcloud/test/googletest
url = https://github.com/google/googletest.git
+[submodule "contrib/pax_storage/src/cpp/contrib/googletest"]
+ path = contrib/pax_storage/src/cpp/contrib/googletest
+ url = https://code.hashdata.xyz/cloudberry/googletest
+[submodule "contrib/pax_storage/src/cpp/contrib/tabulate"]
+ path = contrib/pax_storage/src/cpp/contrib/tabulate
+ url = https://code.hashdata.xyz/cloudberry/tabulate
+[submodule "contrib/pax_storage/src/cpp/contrib/googlebench"]
+ path = contrib/pax_storage/src/cpp/contrib/googlebench
+ url = https://code.hashdata.xyz/cloudberry/benchmark.git
+[submodule "contrib/pax_storage/src/cpp/contrib/cpp-stub"]
+ path = contrib/pax_storage/src/cpp/contrib/cpp-stub
+ url = https://code.hashdata.xyz/cloudberry/cpp-stub.git
+ branch = dev
diff --git a/GNUmakefile.in b/GNUmakefile.in
index 77c2ab55218..e6333e39bec 100644
--- a/GNUmakefile.in
+++ b/GNUmakefile.in
@@ -31,6 +31,9 @@ all:
$(MAKE) -C contrib/pg_buffercache all
ifeq ($(with_openssl), yes)
$(MAKE) -C contrib/sslinfo all
+endif
+ifeq ($(enable_pax), yes)
+ $(MAKE) -C contrib/pax_storage all
endif
$(MAKE) -C gpMgmt all
$(MAKE) -C gpcontrib all
@@ -73,6 +76,9 @@ install:
$(MAKE) -C contrib/tablefunc $@
$(MAKE) -C contrib/passwordcheck $@
$(MAKE) -C contrib/pg_buffercache $@
+ifeq ($(enable_pax), yes)
+ $(MAKE) -C contrib/pax_storage $@
+endif
ifeq ($(with_openssl), yes)
$(MAKE) -C contrib/sslinfo $@
endif
@@ -180,6 +186,9 @@ ICW_TARGETS += contrib/extprotocol contrib/dblink contrib/pg_trgm
ICW_TARGETS += contrib/indexscan contrib/hstore contrib/pgcrypto
ICW_TARGETS += contrib/tablefunc contrib/passwordcheck
ICW_TARGETS += contrib/pg_buffercache
+ifeq ($(enable_pax), yes)
+ICW_TARGETS += contrib/pax_storage
+endif
# sslinfo depends on openssl
ifeq ($(with_openssl), yes)
ICW_TARGETS += contrib/sslinfo
diff --git a/configure b/configure
index 57fec242cce..47b466a34e4 100755
--- a/configure
+++ b/configure
@@ -751,6 +751,7 @@ ICU_CFLAGS
with_icu
enable_thread_safety
INCLUDES
+enable_pax
enable_preload_ic_module
enable_ic_proxy
enable_external_fts
@@ -901,6 +902,7 @@ enable_gpcloud
enable_external_fts
enable_ic_proxy
enable_preload_ic_module
+enable_pax
enable_thread_safety
with_icu
with_tcl
@@ -1616,6 +1618,7 @@ Optional Features:
library)
--disable-preload-ic-module
disable preload interconnect module
+ --enable-pax enable pax support
--disable-thread-safety disable thread-safety in client libraries
--enable-openssl-redirect
enable redirect openssl interface to internal
@@ -9098,6 +9101,36 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: checking whether to build with preload ic module ... $enable_preload_ic_module" >&5
$as_echo "checking whether to build with preload ic module ... $enable_preload_ic_module" >&6; }
+#
+# pax support
+#
+
+
+# Check whether --enable-pax was given.
+if test "${enable_pax+set}" = set; then :
+ enableval=$enable_pax;
+ case $enableval in
+ yes)
+
+$as_echo "#define USE_PAX_STORAGE 1" >>confdefs.h
+
+ ;;
+ no)
+ :
+ ;;
+ *)
+ as_fn_error $? "no argument expected for --enable-pax option" "$LINENO" 5
+ ;;
+ esac
+
+else
+ enable_pax=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: checking whether to build with pax support ... $enable_pax" >&5
+$as_echo "checking whether to build with pax support ... $enable_pax" >&6; }
#
# Include directories
diff --git a/configure.ac b/configure.ac
index d9df92f6768..1686a00416b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -925,6 +925,16 @@ PGAC_ARG_BOOL(enable, preload-ic-module, yes,
AC_MSG_RESULT([checking whether to build with preload ic module ... $enable_preload_ic_module])
AC_SUBST(enable_preload_ic_module)
+#
+# pax support
+#
+PGAC_ARG_BOOL(enable, pax, no,
+ [enable pax support],
+ [AC_DEFINE(USE_PAX_STORAGE, 1,
+ [Define to 1 to support pax])])
+AC_MSG_RESULT([checking whether to build with pax support ... $enable_pax])
+AC_SUBST(enable_pax)
+
#
# Include directories
#
diff --git a/contrib/Makefile b/contrib/Makefile
index fceafaffe8a..2292adb88f2 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -98,6 +98,12 @@ else
ALWAYS_SUBDIRS += hstore_plpython jsonb_plpython ltree_plpython
endif
+ifeq ($(enable_pax),yes)
+SUBDIRS += pax_storage
+else
+ALWAYS_SUBDIRS += pax_storage
+endif
+
# Missing:
# start-scripts \ (does not have a makefile)
diff --git a/contrib/pax_storage/.ci/tf/qingcloud-provider.tf b/contrib/pax_storage/.ci/tf/qingcloud-provider.tf
deleted file mode 100644
index 02941a05be8..00000000000
--- a/contrib/pax_storage/.ci/tf/qingcloud-provider.tf
+++ /dev/null
@@ -1,86 +0,0 @@
-variable "qingcloud_access_key" {
- sensitive = true
- type = string
-}
-
-variable "qingcloud_secret_key" {
- sensitive = true
- type = string
-}
-
-variable "qingcloud_zone" {
- default = "pek3c"
-}
-
-variable "instance_name" {
- default = "ci"
-}
-
-variable "instance_image" {
- default = "img-qbpas5m2"
-}
-
-variable "instance_class" {
- default = 202
-}
-
-variable "instance_cpu" {
- default = 16
-}
-
-variable "instance_memory" {
- default = 16384
-}
-
-variable "instance_os_disk_size" {
- default = 100
-}
-
-variable "instance_vxnet" {
- default = "vxnet-5tjdylj"
-}
-
-variable "instance_keypair" {
- default = [
- "kp-o07unn26"]
-}
-
-terraform {
- required_providers {
- qingcloud = {
- source = "HashDataInc/qingcloud"
- version = "1.2.7"
- }
- ansible = {
- source = "nbering/ansible"
- version = "1.0.4"
- }
- }
-}
-
-provider "qingcloud" {
- access_key = var.qingcloud_access_key
- secret_key = var.qingcloud_secret_key
- zone = var.qingcloud_zone
-}
-
-
-resource "qingcloud_instance" "ci" {
- name = var.instance_name
- image_id = var.instance_image
- instance_class = var.instance_class
- cpu = var.instance_cpu
- memory = var.instance_memory
- os_disk_size = var.instance_os_disk_size
- managed_vxnet_id = var.instance_vxnet
- keypair_ids = var.instance_keypair
-}
-
-resource "ansible_host" "ci" {
- inventory_hostname = qingcloud_instance.ci.private_ip
- groups = [
- "runner"]
- vars = {
- ansible_user = "root"
- }
-}
diff --git a/contrib/pax_storage/.clang-tidy b/contrib/pax_storage/.clang-tidy
index 6b6594d4fb7..9e3bff8027b 100644
--- a/contrib/pax_storage/.clang-tidy
+++ b/contrib/pax_storage/.clang-tidy
@@ -12,7 +12,7 @@ Checks: '-*,
modernize-avoid-bind,
modernize-loop-convert,
modernize-make-shared,
- modernize-make-unique,
+ - modernize-make-unique,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-auto-ptr,
@@ -37,7 +37,7 @@ Checks: '-*,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-size-empty,
- readability-convert-member-functions-to-static,
+ - readability-convert-member-functions-to-static,
readability-deleted-default,
readability-make-member-function-const,
readability-misplaced-array-index,
@@ -52,7 +52,7 @@ Checks: '-*,
readability-uniqueptr-delete-release,
readability-redundant-member-init,
readability-simplify-subscript-expr,
- readability-simplify-boolean-expr,
+ - readability-simplify-boolean-expr,
readability-inconsistent-declaration-parameter-name,
readability-identifier-naming,
@@ -68,7 +68,6 @@ Checks: '-*,
bugprone-incorrect-roundings,
bugprone-infinite-loop,
bugprone-integer-division,
- bugprone-macro-parentheses,
bugprone-macro-repeated-side-effects,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-artithmetic-in-alloc,
@@ -225,4 +224,4 @@ CheckOptions:
- key: modernize-use-transparent-functors.SafeMode
value: 1
- key: modernize-use-emplace.IgnoreImplicitConstructors
- value: 1
\ No newline at end of file
+ value: 1
diff --git a/contrib/pax_storage/.githooks/pre-push b/contrib/pax_storage/.githooks/pre-push
deleted file mode 100755
index 22bebb8148b..00000000000
--- a/contrib/pax_storage/.githooks/pre-push
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-#
-# Verify what is about to be pushed. Called by "git
-# push" after it has checked the remote status, but before anything has been
-# pushed. If this script exits with a non-zero status nothing will be pushed.
-#
-
-rc=0
-
-if [ -x "./tools/cpplint.py" ]; then
- echo "Running cpplint ..."
- mkdir -p .tmp/
- ./tools/cpplint.py --counting=detailed --recursive . > .tmp/cpplint.log 2>&1
- rc=$?
- if [ $rc -ne 0 ]; then
- tail -n 1 .tmp/cpplint.log
- echo ""
- echo "ERROR cpplint returned errors!"
- echo "ERROR Fix the problem and use 'git add' to update your changes."
- echo "ERROR See `pwd`/.tmp/cpplint.log for more information."
- echo ""
- fi
-fi
-
-exit $rc
\ No newline at end of file
diff --git a/contrib/pax_storage/.gitignore b/contrib/pax_storage/.gitignore
index 76807d0dbc3..1fe686d3b2e 100644
--- a/contrib/pax_storage/.gitignore
+++ b/contrib/pax_storage/.gitignore
@@ -20,5 +20,5 @@ clang-tidy.result
**/*.pb.cc
# Executables
-*.out
+/*.out
!src/data/expected/*.out
diff --git a/contrib/pax_storage/.gitlab-ci.yml b/contrib/pax_storage/.gitlab-ci.yml
deleted file mode 100644
index 51415fcc444..00000000000
--- a/contrib/pax_storage/.gitlab-ci.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-stages:
- - build
-
-.global_variables: &global_variables
- # Runner instance name, passed to Terraform
- TF_VAR_instance_name: "cbdb-test-pipeline-${CI_PIPELINE_ID}-job-${CI_JOB_ID}"
- TF_VAR_qingcloud_access_key: "key"
- TF_VAR_qingcloud_secret_key: "secret"
- # Custom clone path on runner instance
- GIT_SUBMODULE_STRATEGY: "normal"
- GIT_DEPTH: 0
- CI_USER: root
- # For internal deploy
- ARTIFACTORY_USERNAME: "admin"
- ARTIFACTORY_PASSWORD: "token"
- AWS_ACCESS_KEY_ID: "${TF_VAR_qingcloud_access_key}"
- AWS_SECRET_ACCESS_KEY: "${TF_VAR_qingcloud_secret_key}"
- GIT_CLONE_PATH: "/code/gpdb_pax_src"
- # cbdb project dir
- CBDB_PROJECT_DIR: "/code/gpdb_src"
- # For artifacts
- BUCKET_INTERMEDIATE: "http://artifactory.hashdata.xyz/artifactory/hashdata-repository/intermediate-artifacts"
- # For pax storage project
- CBDB_PAX_BRANCH: $CI_COMMIT_BRANCH
-
-.build_script: &build_script
- script: |
- git clone -b feature-pax https://buildbot:Passw0rd@code.hashdata.xyz/cloudberry/cbdb.git $CBDB_PROJECT_DIR
- cd /code/gpdb_src
- git submodule update --init --recursive
- cd /code
- echo "${CI_PIPELINE_ID}" > ${CBDB_PROJECT_DIR}/BUILD_NUMBER
- bash ${CBDB_PROJECT_DIR}/hd-ci/compile_cbdb.bash
- bash ${GIT_CLONE_PATH}/hd-ci/compile_pax.bash
- bash ${GIT_CLONE_PATH}/hd-ci/clang_tidy_pax.bash
- cp ${CBDB_PROJECT_DIR}/cbdb-artifacts.txt ${CI_PROJECT_DIR}/cbdb-artifacts.txt
- touch /code/CI_STATUS
-
-.build_artifacts: &build_artifacts
- artifacts:
- name: "artifacts"
- when: always
- paths:
- - ${CI_PROJECT_DIR}/cbdb-artifacts.txt
- - ${GIT_CLONE_PATH}/clang-tidy.result
- reports:
- dotenv: ${CI_PROJECT_DIR}/cbdb-artifacts.txt
-
-.cbdb_test_rules: &cbdb_test_rules
- rules:
- - if: $CI_COMMIT_TAG
- when: never
- - if: '$RUN_NIGHTLY_BUILD == "true"'
- when: always
- - if: '$RUN_TEST_BUILD == "true"'
- when: always
- - if: '$CI_PIPELINE_SOURCE == "pipeline"'
- when: always
- - when: always
-
-x86_64:build:
- stage: build
- variables:
- <<: *global_variables
- <<: *build_script
- <<: *build_artifacts
- <<: *cbdb_test_rules
- timeout: 8 hours
- retry:
- max: 2
- when: always
diff --git a/contrib/pax_storage/.gitmodules b/contrib/pax_storage/.gitmodules
index 884a7c5972b..5c3c84dc64d 100644
--- a/contrib/pax_storage/.gitmodules
+++ b/contrib/pax_storage/.gitmodules
@@ -5,3 +5,6 @@
path = src/cpp/contrib/zstd
url = https://code.hashdata.xyz/cloudberry/lib_zstd.git
branch = v1.5.5
+[submodule "src/cpp/contrib/cpp-stub"]
+ path = src/cpp/contrib/cpp-stub
+ url = https://code.hashdata.xyz/cloudberry/cpp-stub.git
diff --git a/contrib/pax_storage/CMakeLists.txt b/contrib/pax_storage/CMakeLists.txt
index b4ab18181ea..2a6cde93a82 100644
--- a/contrib/pax_storage/CMakeLists.txt
+++ b/contrib/pax_storage/CMakeLists.txt
@@ -2,73 +2,74 @@ project(Pax)
cmake_minimum_required (VERSION 3.11.0)
set(CMAKE_CXX_STANDARD 14)
-find_program(
- PG_CONFIG pg_config
- HINTS ${PG_PATH}
- PATH_SUFFIXES bin
- DOC "The path to the pg_config of the CBDB version to compile against")
+set(TOP_DIR ${PROJECT_SOURCE_DIR}/../..)
+set(CBDB_INCLUDE_DIR ${TOP_DIR}/src/include)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g")
-if(NOT PG_CONFIG)
- message(FATAL_ERROR "Unable to find 'pg_config'")
-endif()
-
-# Function to call pg_config and extract values.
-function(GET_PG_CONFIG var)
- set(_temp)
-
- # Only call pg_config if the variable didn't already have a value.
- if(NOT ${var})
- execute_process(
- COMMAND ${PG_CONFIG} ${ARGN}
- OUTPUT_VARIABLE _temp
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- endif()
-
- set(${var}
- ${_temp}
- PARENT_SCOPE)
-endfunction()
+# Build gtest options
+option(BUILD_GTEST "Build with google test" ON)
-# Get CBDB configuration from pg_config
-get_pg_config(PG_INCLUDEDIR --includedir)
-# TODO check exists if this is needed
-set(CBDB_INCLUDE_DIR ${PG_INCLUDEDIR}/postgresql/server)
+option(BUILD_GBENCH "Build with google benchmark" OFF)
-# Debug options
-option(ENBALE_DEBUG "Enable debug" ON)
+# Build pax tools
+option(BUILD_TOOLS "Build with pax tools" ON)
-# Build gtest options
-option(BUILD_GTEST "Build with google test" ON)
-
-# Build pax format lib
-option(BUILD_PAX_FORMAT "Build pax format lib" OFF)
+# env CBDB_BUILD_TYPE is set by Lighting pipeline
+if (NOT DEFINED ENV{CBDB_BUILD_TYPE})
+ set(ENV{CBDB_BUILD_TYPE} "debug")
+endif()
+set(CBDB_BUILD_TYPE $ENV{CBDB_BUILD_TYPE})
+message(STATUS "env CBDB_BUILD_TYPE=$ENV{CBDB_BUILD_TYPE} => ${CBDB_BUILD_TYPE}")
-if (ENBALE_DEBUG)
- ADD_DEFINITIONS(-DENBALE_DEBUG)
- # Use to build compile_commands.json
- set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
- SET(CMAKE_BUILD_TYPE "Debug")
- SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb")
- SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
-else()
+if (${CBDB_BUILD_TYPE} STREQUAL "release")
SET(CMAKE_BUILD_TYPE "Release")
+ SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3")
# no need build gtest in release mode
SET(BUILD_GTEST OFF)
-endif(ENBALE_DEBUG)
-
-# Vec options
-option(VEC_BUILD "Build pax vectorization version" OFF)
-set(VEC_HOME "" CACHE STRING "Path to vectorization home")
-if (VEC_BUILD)
-
-if("${VEC_HOME}" STREQUAL "")
- message(FATAL_ERROR "No found vectorization home setting. Using -DVEC_HOME to spec vectorization home")
+elseif(${CBDB_BUILD_TYPE} STREQUAL "debug")
+ ADD_DEFINITIONS(-DENABLE_DEBUG)
+ # Use to build compile_commands.json
+ SET(CMAKE_BUILD_TYPE "Debug")
+ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -ggdb")
+else()
+ message(FATAL_ERROR "unknown CBDB_BUILD_TYPE: ${CBDB_BUILD_TYPE}")
endif()
-set(CBDB_ROOT_INCLUDE_DIR ${PG_INCLUDEDIR})
-ADD_DEFINITIONS(-DVEC_BUILD)
+if(BUILD_GBENCH)
+ SET(BUILD_GTEST ON)
+endif(BUILD_GBENCH)
+
+if (BUILD_GTEST)
+ SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -no-pie -fno-stack-protector -Wall -Wno-unused-function -Wno-unused-variable")
+ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-access-control -fno-inline -Wno-pmf-conversions -Wl,--allow-multiple-definition -no-pie -fno-stack-protector")
+endif(BUILD_GTEST)
+
+# Plasma options
+option (ENABLE_PLASMA "Enable plasma cache" OFF)
+if (ENABLE_PLASMA)
+ADD_DEFINITIONS(-DENABLE_PLASMA)
+# plasma need use CXX_STANDARD 17
+set(CMAKE_CXX_STANDARD 17)
+endif()
+# Vec options
+include(CheckSymbolExists)
+SET(PG_CONFIG_HEADER_FILE "${CBDB_INCLUDE_DIR}/pg_config.h")
+CHECK_SYMBOL_EXISTS(USE_VECTORIZATION "${PG_CONFIG_HEADER_FILE}" VEC_BUILD)
+message(STATUS "pg_config.h => ${PG_CONFIG_HEADER_FILE}")
+if (VEC_BUILD)
+ set(VEC_HOME "${PROJECT_SOURCE_DIR}/../vectorization")
+ ADD_DEFINITIONS(-DVEC_BUILD)
+ message(STATUS "Build pax with vectorization support, VEC_HOME=${VEC_HOME}")
+else()
+ message(STATUS "Build pax without vectorization support")
endif(VEC_BUILD)
+
+## find dependency
+## depend on the value of the above option to check dependencies.
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/")
+include(FindDependencies)
add_subdirectory(src/cpp)
diff --git a/contrib/pax_storage/FindDependencies.cmake b/contrib/pax_storage/FindDependencies.cmake
new file mode 100644
index 00000000000..c98e1eb3a67
--- /dev/null
+++ b/contrib/pax_storage/FindDependencies.cmake
@@ -0,0 +1,37 @@
+find_package(BISON REQUIRED)
+
+## protobuf
+include(FindProtobuf)
+find_package(Protobuf 3.6.1 REQUIRED)
+
+# ztsd
+# in our image snapshot, zstd is managed using pkg-config, so so the pkg-config method is used first here
+find_package(PkgConfig QUIET)
+if(PKGCONFIG_FOUND)
+ pkg_check_modules(ZSTD libzstd)
+endif()
+if(NOT ZSTD_FOUND)
+ find_package(ZSTD QUIET)
+ if(NOT ZSTD_FOUND)
+ message(FATAL_ERROR "zstd not found")
+ endif()
+endif()
+
+## for vectorazition
+if (VEC_BUILD)
+ find_package(PkgConfig REQUIRED)
+ pkg_check_modules(GLIB REQUIRED glib-2.0)
+
+ # resolve vectorization dependency, or the header files will miss
+ message(STATUS "Resolve vectorization dependency ...")
+ execute_process(
+ COMMAND make download_arrow
+ WORKING_DIRECTORY ${VEC_HOME}
+ RESULT_VARIABLE CMD_RESULT
+ )
+ if(CMD_RESULT EQUAL 0)
+ message(STATUS "Resolve vectorization dependency succeeded.")
+ else()
+ message(FATAL_ERROR "Resolve vectorization dependency failed with result: ${CMD_RESULT}")
+ endif()
+endif(VEC_BUILD)
\ No newline at end of file
diff --git a/contrib/pax_storage/Makefile b/contrib/pax_storage/Makefile
new file mode 100644
index 00000000000..8a6f143713d
--- /dev/null
+++ b/contrib/pax_storage/Makefile
@@ -0,0 +1,59 @@
+# contrib/pax_storage/Makefile
+
+MODULE_big = pax
+OBJS = \
+ $(WIN32RES)
+PG_CPPFLAGS = -I/usr/local/include
+PG_CXXFLAGS = -std=c++14
+
+PGFILEDESC = "pax - PAX table access method"
+SHLIB_LINK += -luuid
+
+REGRESS = setup
+REGRESS += detoast ddl types update
+# FIXME: several plans are bad in update_gp when use orca
+# REGRESS += update_gp
+REGRESS += teardown
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pax_storage
+top_builddir = ../../
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+REGRESS_OPTS += --init-file=$(top_builddir)/src/test/regress/init_file
+
+.PHONY: all
+all: build-pax
+
+.PHONY: install-data build-pax
+build-pax:
+ @echo "build pax"
+ mkdir -p build
+ cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$(DESTDIR)$(prefix) && make -j8 && cp src/cpp/libpax.so ../pax.so
+
+install-data: build-pax
+ $(INSTALL_DATA) pax-cdbinit--1.0.sql '$(DESTDIR)$(datadir)/cdb_init.d/pax-cdbinit--1.0.sql'
+
+.PHONY: install
+install: install-data
+ @echo "install data"
+ make -C build install
+
+.PHONY: uninstall-data
+
+uninstall-data:
+ $(RM) '$(DESTDIR)$(datadir)/cdb_init.d/pax-cdbinit--1.0.sql'
+
+uninstall: uninstall-data
+
+clean-data:
+ $(RM) pax-cdbinit--1.0.sql
+ $(RM) -r build
+
+clean: clean-data
diff --git a/contrib/pax_storage/README.md b/contrib/pax_storage/README.md
index f26eeaf11be..e82817afed5 100644
--- a/contrib/pax_storage/README.md
+++ b/contrib/pax_storage/README.md
@@ -43,7 +43,7 @@ make -j
### Build GTEST
1. make sure already build pax with cmake option `-DBUILD_GTEST=on`, default value is on
-2. better with debug cmake option `-DENBALE_DEBUG=on`, default value is on
+2. better with debug cmake option `-DENABLE_DEBUG=on`, default value is on
3. run tests
```
diff --git a/contrib/pax_storage/expected/ddl.out b/contrib/pax_storage/expected/ddl.out
new file mode 100644
index 00000000000..2917f049b80
--- /dev/null
+++ b/contrib/pax_storage/expected/ddl.out
@@ -0,0 +1,73 @@
+set default_table_access_method = 'pax';
+create table pax_test.t1(
+ id int,
+ name text not null,
+ height float not null,
+ decimal_col decimal(10, 2) not null,
+ created_at timestamp with time zone not null,
+ updated_at timestamp with time zone not null
+) using pax distributed BY (id);
+\d+ pax_test.t1
+ Table "pax_test.t1"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+-------------+--------------------------+-----------+----------+---------+----------+--------------+-------------
+ id | integer | | | | plain | |
+ name | text | | not null | | extended | |
+ height | double precision | | not null | | plain | |
+ decimal_col | numeric(10,2) | | not null | | main | |
+ created_at | timestamp with time zone | | not null | | plain | |
+ updated_at | timestamp with time zone | | not null | | plain | |
+Distributed by: (id)
+
+create table pax_test.t2(
+ id int,
+ name text not null,
+ height float not null,
+ decimal_col decimal(10, 2) not null,
+ created_at timestamp with time zone not null,
+ updated_at timestamp with time zone not null
+);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+\d+ pax_test.t2
+ Table "pax_test.t2"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+-------------+--------------------------+-----------+----------+---------+----------+--------------+-------------
+ id | integer | | | | plain | |
+ name | text | | not null | | extended | |
+ height | double precision | | not null | | plain | |
+ decimal_col | numeric(10,2) | | not null | | main | |
+ created_at | timestamp with time zone | | not null | | plain | |
+ updated_at | timestamp with time zone | | not null | | plain | |
+Distributed by: (id)
+
+insert into pax_test.t1 (id, name, height, decimal_col, created_at, updated_at) values
+ (1, 'Alice', 1.65, 1.23, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'),
+ (2, 'Bob', 1.75, 2.34, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'),
+ (3, 'Carol', 1.85, 3.45, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08');
+alter table pax_test.t1 add column new_col1 int;
+alter table pax_test.t1 add column new_col2 int default null;
+alter table pax_test.t1 add column new_col3 int default 0;
+alter table pax_test.t1 add column new_col4 int default 12;
+select * from pax_test.t1;
+ id | name | height | decimal_col | created_at | updated_at | new_col1 | new_col2 | new_col3 | new_col4
+----+-------+--------+-------------+-------------------------------------+-------------------------------------+----------+----------+----------+----------
+ 1 | Alice | 1.65 | 1.23 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT | | | 0 | 12
+ 2 | Bob | 1.75 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT | | | 0 | 12
+ 3 | Carol | 1.85 | 3.45 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT | | | 0 | 12
+(3 rows)
+
+alter table pax_test.t1 drop column new_col2;
+alter table pax_test.t1 drop column new_col3;
+vacuum pax_test.t1;
+vacuum full pax_test.t1;
+drop table pax_test.t1;
+drop table pax_test.t2;
+-- alter column with options
+create table pax_test.t3 (v1 numeric(100,1)) with(compresstype=zstd, compresslevel=1);
+alter table pax_test.t3 alter column v1 type numeric;
+drop table pax_test.t3;
+-- add column with options
+create table pax_test.t4 (v1 text) with(compresstype=zstd, compresslevel=1);
+alter table pax_test.t4 add column v2 text;
+drop table pax_test.t4;
diff --git a/contrib/pax_storage/expected/detoast.out b/contrib/pax_storage/expected/detoast.out
new file mode 100644
index 00000000000..ded4073a380
--- /dev/null
+++ b/contrib/pax_storage/expected/detoast.out
@@ -0,0 +1,91 @@
+CREATE TABLE toasttest_external(f1 text);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- The storage `EXTERNAL` allows out-of-line storage but not compression.
+alter table toasttest_external alter column f1 set storage external;
+-- These tests are sensitive to block size. In CBDB, the block
+-- size is 32 kB, whereas in PostgreSQL it's 8kB. Therefore make
+-- the data 4x larger here.
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+-- expect >0 blocks
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty
+ FROM pg_class where relname = 'toasttest_external';
+ is_empty
+----------
+ f
+(1 row)
+
+create table toasttest_external_pax(f1 text) using pax;
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into toasttest_external_pax select * from toasttest_external;
+drop table toasttest_external;
+-- If pax insert toast here, Then after drop toasttest_external, toast
+-- will not get the source data.
+select length(f1) from toasttest_external_pax;
+ length
+--------
+ 12000
+ 12000
+ 12000
+ 12000
+(4 rows)
+
+drop table toasttest_external_pax;
+CREATE TABLE toasttest_compress(f1 text);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- The storage `MAIN` allows compression but not out-of-line storage.
+alter table toasttest_compress alter column f1 set storage main;
+-- about 1M
+INSERT INTO toasttest_compress values (repeat('1234567890123456',1024 * 64));
+-- should be true, becase it's not store in toast table
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty FROM pg_class where relname = 'toasttest_compress';
+ is_empty
+----------
+ t
+(1 row)
+
+create table toasttest_compress_pax(f1 text) using pax;
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into toasttest_compress_pax select * from toasttest_compress;
+drop table toasttest_compress;
+select length(f1) from toasttest_compress_pax;
+ length
+---------
+ 1048576
+(1 row)
+
+drop table toasttest_compress_pax;
+CREATE TABLE toasttest_extended(f1 text);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- The storage `EXTENDED` allows both compression and out-of-line storage.
+alter table toasttest_extended alter column f1 set storage EXTENDED;
+-- about 1M, will use out-of-line storage
+INSERT INTO toasttest_extended values (repeat('1234567890123456',1024 * 64));
+-- about 80k , will use compression storage
+INSERT INTO toasttest_extended values (repeat('1234567890123456',1024 * 5));
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty FROM pg_class where relname = 'toasttest_extended';
+ is_empty
+----------
+ f
+(1 row)
+
+create table toasttest_extended_pax(f1 text) using pax;
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into toasttest_extended_pax select * from toasttest_extended;
+drop table toasttest_extended;
+select length(f1) from toasttest_extended_pax;
+ length
+---------
+ 1048576
+ 81920
+(2 rows)
+
+drop table toasttest_extended_pax;
diff --git a/contrib/pax_storage/src/data/sql/teardown.sql b/contrib/pax_storage/expected/setup.out
similarity index 100%
rename from contrib/pax_storage/src/data/sql/teardown.sql
rename to contrib/pax_storage/expected/setup.out
diff --git a/contrib/pax_storage/expected/teardown.out b/contrib/pax_storage/expected/teardown.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/contrib/pax_storage/src/data/expected/types.out b/contrib/pax_storage/expected/types.out
similarity index 75%
rename from contrib/pax_storage/src/data/expected/types.out
rename to contrib/pax_storage/expected/types.out
index 585ea81da8b..3f173f31c0e 100644
--- a/contrib/pax_storage/src/data/expected/types.out
+++ b/contrib/pax_storage/expected/types.out
@@ -1,8 +1,4 @@
--- start_ignore
-create extension pax;
-drop table if exists all_typbyval_pg_types;
--- end_ignore
-CREATE TABLE all_typbyval_pg_types (
+CREATE TABLE pax_test.all_typbyval_pg_types (
id int,
bool_col bool,
char_col char,
@@ -24,10 +20,10 @@ CREATE TABLE all_typbyval_pg_types (
timestamptz_col timestamptz,
pg_lsn_col pg_lsn
) USING pax distributed by (id);
-insert into all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
+insert into pax_test.all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0');
-select * from all_typbyval_pg_types;
+select * from pax_test.all_typbyval_pg_types;
id | bool_col | char_col | int2_col | cid_col | float4_col | int4_col | date_col | oid_col | time_stamp_col | int8_col | float8_col | money_col | time_col | timestamptz_col | pg_lsn_col
----+----------+----------+----------+---------+------------+----------+------------+---------+--------------------------+----------+------------+-----------+----------+------------------------------+------------
1 | t | c | 2 | 0 | 4.2 | 5 | 05-17-2023 | 7 | Wed May 17 17:56:49 2023 | 10 | 11.1111 | $12.00 | 17:56:49 | Wed May 17 17:56:49 2023 PDT | 16/0
@@ -35,10 +31,7 @@ select * from all_typbyval_pg_types;
1 | t | c | 2 | 0 | 4.2 | 5 | 05-17-2023 | 7 | Wed May 17 17:56:49 2023 | 10 | 11.1111 | $12.00 | 17:56:49 | Wed May 17 17:56:49 2023 PDT | 16/0
(3 rows)
--- start_ignore
-drop table if exists all_typlen_lt_0_pg_type;
--- end_ignore
-create table all_typlen_lt_0_pg_type (
+create table pax_test.all_typlen_lt_0_pg_type (
id int,
name_col name,
numeric_col numeric,
@@ -46,13 +39,12 @@ create table all_typlen_lt_0_pg_type (
varchar_col varchar(128),
point_col point
) USING pax distributed by (id);
-insert into all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2));
-select * from all_typlen_lt_0_pg_type;
+insert into pax_test.all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2));
+select * from pax_test.all_typlen_lt_0_pg_type;
id | name_col | numeric_col | text_col | varchar_col | point_col
----+----------+-------------+----------+-------------+-----------
1 | hello | 1.23 | text | varchar | (1,2)
(1 row)
--- start_ignore
-drop table if exists all_typbyval_pg_types;
--- end_ignore
+drop table pax_test.all_typbyval_pg_types;
+drop table pax_test.all_typlen_lt_0_pg_type;
diff --git a/contrib/pax_storage/expected/update.out b/contrib/pax_storage/expected/update.out
new file mode 100644
index 00000000000..2420012b121
--- /dev/null
+++ b/contrib/pax_storage/expected/update.out
@@ -0,0 +1,624 @@
+set default_table_access_method = pax;
+set pax.enable_filter = off;
+--
+-- UPDATE ... SET
= DEFAULT;
+--
+CREATE TABLE update_test (
+ a INT DEFAULT 10,
+ b INT,
+ c TEXT
+);
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+);
+INSERT INTO update_test VALUES (5, 10, 'foo');
+INSERT INTO update_test(b, a) VALUES (15, 10);
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+----+----+-----
+ 5 | 10 | foo
+ 10 | 15 |
+(2 rows)
+
+UPDATE update_test SET a = DEFAULT, b = DEFAULT;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+----+---+-----
+ 10 | | foo
+ 10 | |
+(2 rows)
+
+-- aliases for the UPDATE target table
+UPDATE update_test AS t SET b = 10 WHERE t.a = 10;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+----+----+-----
+ 10 | 10 | foo
+ 10 | 10 |
+(2 rows)
+
+UPDATE update_test t SET b = t.b + 10 WHERE t.a = 10;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+----+----+-----
+ 10 | 20 | foo
+ 10 | 20 |
+(2 rows)
+
+--
+-- Test VALUES in FROM
+--
+UPDATE update_test SET a=v.i FROM (VALUES(100, 20)) AS v(i, j)
+ WHERE update_test.b = v.j;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+-----+----+-----
+ 100 | 20 | foo
+ 100 | 20 |
+(2 rows)
+
+-- fail, wrong data type:
+UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i, j)
+ WHERE update_test.b = v.j;
+ERROR: column "a" is of type integer but expression is of type record
+LINE 1: UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i...
+ ^
+HINT: You will need to rewrite or cast the expression.
+--
+-- Test multiple-set-clause syntax
+--
+INSERT INTO update_test SELECT a,b+1,c FROM update_test;
+SELECT * FROM update_test;
+ a | b | c
+-----+----+-----
+ 100 | 20 | foo
+ 100 | 20 |
+ 100 | 21 | foo
+ 100 | 21 |
+(4 rows)
+
+UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo';
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+-----+----+-------
+ 10 | 31 | bugle
+ 10 | 32 | bugle
+ 100 | 20 |
+ 100 | 21 |
+(4 rows)
+
+UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+ a | b | c
+-----+----+-----
+ 11 | 41 | car
+ 11 | 42 | car
+ 100 | 20 |
+ 100 | 21 |
+(4 rows)
+
+-- fail, multi assignment to same column:
+UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10;
+ERROR: multiple assignments to same column "b"
+-- uncorrelated sub-select:
+UPDATE update_test
+ SET (b,a) = (select a,b from update_test where b = 41 and c = 'car')
+ WHERE a = 100 AND b = 20;
+SELECT * FROM update_test;
+ a | b | c
+-----+----+-----
+ 100 | 21 |
+ 11 | 41 | car
+ 11 | 42 | car
+ 41 | 11 |
+(4 rows)
+
+-- correlated sub-select:
+UPDATE update_test o
+ SET (b,a) = (select a+1,b from update_test i
+ where i.a=o.a and i.b=o.b and i.c is not distinct from o.c);
+SELECT * FROM update_test;
+ a | b | c
+----+-----+-----
+ 21 | 101 |
+ 41 | 12 | car
+ 42 | 12 | car
+ 11 | 42 |
+(4 rows)
+
+-- fail, multiple rows supplied:
+UPDATE update_test SET (b,a) = (select a+1,b from update_test);
+ERROR: more than one row returned by a subquery used as an expression
+-- set to null if no rows supplied:
+UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000)
+ WHERE a = 11;
+SELECT * FROM update_test;
+ a | b | c
+----+-----+-----
+ 21 | 101 |
+ 41 | 12 | car
+ 42 | 12 | car
+ | |
+(4 rows)
+
+-- *-expansion should work in this context:
+UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j)
+ WHERE update_test.a = v.i;
+-- you might expect this to work, but syntactically it's not a RowExpr:
+UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j)
+ WHERE update_test.a = v.i;
+ERROR: source for a multiple-column UPDATE item must be a sub-SELECT or ROW() expression
+LINE 1: UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) ...
+ ^
+-- if an alias for the target table is specified, don't allow references
+-- to the original table name
+UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10;
+ERROR: invalid reference to FROM-clause entry for table "update_test"
+LINE 1: UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a...
+ ^
+HINT: Perhaps you meant to reference the table alias "t".
+-- Make sure that we can update to a TOASTed value.
+UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
+SELECT a, b, char_length(c) FROM update_test;
+ a | b | char_length
+----+-----+-------------
+ | |
+ 21 | 100 |
+ 41 | 12 | 10000
+ 42 | 12 | 10000
+(4 rows)
+
+-- Check multi-assignment with a Result node to handle a one-time filter.
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE update_test t
+ SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a)
+ WHERE CURRENT_USER = SESSION_USER;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------
+ Update on public.update_test t
+ -> Explicit Redistribute Motion 3:3 (slice1; segments: 3)
+ Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, (DMLAction)
+ -> Split
+ Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, DMLAction
+ -> Seq Scan on public.update_test t
+ Output: $1, $2, t.c, (SubPlan 1 (returns $1,$2)), t.ctid, t.gp_segment_id, t.*
+ SubPlan 1 (returns $1,$2)
+ -> Result
+ Output: s.b, s.a
+ Filter: (s.a = t.a)
+ -> Materialize
+ Output: s.b, s.a
+ -> Broadcast Motion 3:3 (slice2; segments: 3)
+ Output: s.b, s.a
+ -> Seq Scan on public.update_test s
+ Output: s.b, s.a
+ Optimizer: Postgres query optimizer
+(18 rows)
+
+UPDATE update_test t
+ SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a)
+ WHERE CURRENT_USER = SESSION_USER;
+SELECT a, b, char_length(c) FROM update_test;
+ a | b | char_length
+-----+----+-------------
+ | |
+ 100 | 21 |
+ 12 | 41 | 10000
+ 12 | 42 | 10000
+(4 rows)
+
+-- Test ON CONFLICT DO UPDATE
+INSERT INTO upsert_test VALUES(1, 'Boo'), (3, 'Zoo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+-- correlated sub-select:
+INSERT INTO upsert_test VALUES (1, 'Baz'), (3, 'Zaz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a)
+ RETURNING *;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+-- correlated sub-select (EXCLUDED.* alias):
+INSERT INTO upsert_test VALUES (1, 'Bat'), (3, 'Zot') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING *;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+-- ON CONFLICT using system attributes in RETURNING, testing both the
+-- inserting and updating paths. See bug report at:
+-- https://www.postgresql.org/message-id/73436355-6432-49B1-92ED-1FE4F7E7E100%40finefun.com.au
+INSERT INTO upsert_test VALUES (2, 'Beeble') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = 0 AS xmax_correct;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+-- currently xmax is set after a conflict - that's probably not good,
+-- but it seems worthwhile to have to be explicit if that changes.
+INSERT INTO upsert_test VALUES (2, 'Brox') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = pg_current_xact_id()::xid AS xmax_correct;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+DROP TABLE update_test;
+DROP TABLE upsert_test;
+-- Test ON CONFLICT DO UPDATE with partitioned table and non-identical children
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+) PARTITION BY LIST (a);
+CREATE TABLE upsert_test_1 PARTITION OF upsert_test FOR VALUES IN (1);
+CREATE TABLE upsert_test_2 (b TEXT, a INT PRIMARY KEY);
+ALTER TABLE upsert_test ATTACH PARTITION upsert_test_2 FOR VALUES IN (2);
+INSERT INTO upsert_test VALUES(1, 'Boo'), (2, 'Zoo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+-- correlated sub-select:
+WITH aaa AS (SELECT 1 AS ctea, ' Foo' AS cteb) INSERT INTO upsert_test
+ VALUES (1, 'Bar'), (2, 'Baz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT upsert_test.b||cteb, upsert_test.a FROM aaa) RETURNING *;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+DROP TABLE upsert_test;
+---------------------------
+-- UPDATE with row movement
+---------------------------
+-- When a partitioned table receives an UPDATE to the partitioned key and the
+-- new values no longer meet the partition's bound, the row must be moved to
+-- the correct partition for the new partition key (if one exists). We must
+-- also ensure that updatable views on partitioned tables properly enforce any
+-- WITH CHECK OPTION that is defined. The situation with triggers in this case
+-- also requires thorough testing as partition key updates causing row
+-- movement convert UPDATEs into DELETE+INSERT.
+CREATE TABLE range_parted (
+ a text,
+ b bigint,
+ c numeric,
+ d int,
+ e varchar
+) PARTITION BY RANGE (a, b);
+-- Create partitions intentionally in descending bound order, so as to test
+-- that update-row-movement works with the leaf partitions not in bound order.
+CREATE TABLE part_b_20_b_30 (e varchar, c numeric, a text, b bigint, d int);
+-- GPDB: distribution policy must match the parent table.
+alter table part_b_20_b_30 set distributed by (a);
+ALTER TABLE range_parted ATTACH PARTITION part_b_20_b_30 FOR VALUES FROM ('b', 20) TO ('b', 30);
+CREATE TABLE part_b_10_b_20 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY RANGE (c);
+alter table part_b_10_b_20 set distributed by (a);
+CREATE TABLE part_b_1_b_10 PARTITION OF range_parted FOR VALUES FROM ('b', 1) TO ('b', 10);
+ALTER TABLE range_parted ATTACH PARTITION part_b_10_b_20 FOR VALUES FROM ('b', 10) TO ('b', 20);
+CREATE TABLE part_a_10_a_20 PARTITION OF range_parted FOR VALUES FROM ('a', 10) TO ('a', 20);
+CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10);
+-- Check that partition-key UPDATE works sanely on a partitioned table that
+-- does not have any child partitions.
+UPDATE part_b_10_b_20 set b = b - 6;
+-- Create some more partitions following the above pattern of descending bound
+-- order, but let's make the situation a bit more complex by having the
+-- attribute numbers of the columns vary from their parent partition.
+CREATE TABLE part_c_100_200 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY range (abs(d));
+ALTER TABLE part_c_100_200 DROP COLUMN e, DROP COLUMN c, DROP COLUMN a;
+ALTER TABLE part_c_100_200 ADD COLUMN c numeric, ADD COLUMN e varchar, ADD COLUMN a text;
+ALTER TABLE part_c_100_200 DROP COLUMN b;
+ALTER TABLE part_c_100_200 ADD COLUMN b bigint;
+CREATE TABLE part_d_1_15 PARTITION OF part_c_100_200 FOR VALUES FROM (1) TO (15);
+CREATE TABLE part_d_15_20 PARTITION OF part_c_100_200 FOR VALUES FROM (15) TO (20);
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200);
+ERROR: distribution policy for "part_c_100_200" must be the same as that for "part_b_10_b_20"
+-- GPDB: distribution policy must match the parent table, so the previous command fails.
+-- Change the distribution key and try again.
+alter table part_c_100_200 set distributed by (a);
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200);
+CREATE TABLE part_c_1_100 (e varchar, d int, c numeric, b bigint, a text);
+alter table part_c_1_100 set distributed by (a);
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_1_100 FOR VALUES FROM (1) TO (100);
+\set init_range_parted 'truncate range_parted; insert into range_parted VALUES (''a'', 1, 1, 1), (''a'', 10, 200, 1), (''b'', 12, 96, 1), (''b'', 13, 97, 2), (''b'', 15, 105, 16), (''b'', 17, 105, 19)'
+\set show_data 'select tableoid::regclass::text COLLATE "C" partname, * from range_parted ORDER BY 1, 2, 3, 4, 5, 6'
+:init_range_parted;
+:show_data;
+ partname | a | b | c | d | e
+----------------+---+----+-----+----+---
+ part_a_10_a_20 | a | 10 | 200 | 1 |
+ part_a_1_a_10 | a | 1 | 1 | 1 |
+ part_c_1_100 | b | 12 | 96 | 1 |
+ part_c_1_100 | b | 13 | 97 | 2 |
+ part_d_15_20 | b | 15 | 105 | 16 |
+ part_d_15_20 | b | 17 | 105 | 19 |
+(6 rows)
+
+-- The order of subplans should be in bound order
+EXPLAIN (costs off) UPDATE range_parted set c = c - 50 WHERE c > 97;
+ QUERY PLAN
+-------------------------------------------------------
+ Update on range_parted
+ Update on part_a_1_a_10 range_parted_1
+ Update on part_a_10_a_20 range_parted_2
+ Update on part_b_1_b_10 range_parted_3
+ Update on part_c_1_100 range_parted_4
+ Update on part_d_1_15 range_parted_5
+ Update on part_d_15_20 range_parted_6
+ Update on part_b_20_b_30 range_parted_7
+ -> Append
+ -> Seq Scan on part_a_1_a_10 range_parted_1
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_a_10_a_20 range_parted_2
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_b_1_b_10 range_parted_3
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_c_1_100 range_parted_4
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_d_1_15 range_parted_5
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_d_15_20 range_parted_6
+ Filter: (c > '97'::numeric)
+ -> Seq Scan on part_b_20_b_30 range_parted_7
+ Filter: (c > '97'::numeric)
+(23 rows)
+
+-- fail, row movement happens only within the partition subtree.
+UPDATE part_c_100_200 set c = c - 20, d = c WHERE c = 105;
+ERROR: new row for relation "part_c_100_200" violates partition constraint
+DETAIL: Failing row contains (105, 85, null, b, 15).
+-- fail, no partition key update, so no attempt to move tuple,
+-- but "a = 'a'" violates partition constraint enforced by root partition)
+UPDATE part_b_10_b_20 set a = 'a';
+ERROR: new row for relation "part_b_10_b_20" violates partition constraint
+DETAIL: Failing row contains (null, 96, a, 12, 1).
+-- ok, partition key update, no constraint violation
+UPDATE range_parted set d = d - 10 WHERE d > 10;
+-- ok, no partition key update, no constraint violation
+UPDATE range_parted set e = d;
+-- No row found
+UPDATE part_c_1_100 set c = c + 20 WHERE c = 98;
+-- ok, row movement
+UPDATE part_b_10_b_20 set c = c + 20 returning c, b, a;
+ c | b | a
+-----+----+---
+ 116 | 12 | b
+ 117 | 13 | b
+ 125 | 15 | b
+ 125 | 17 | b
+(4 rows)
+
+:show_data;
+ partname | a | b | c | d | e
+----------------+---+----+-----+---+---
+ part_a_10_a_20 | a | 10 | 200 | 1 | 1
+ part_a_1_a_10 | a | 1 | 1 | 1 | 1
+ part_d_1_15 | b | 12 | 116 | 1 | 1
+ part_d_1_15 | b | 13 | 117 | 2 | 2
+ part_d_1_15 | b | 15 | 125 | 6 | 6
+ part_d_1_15 | b | 17 | 125 | 9 | 9
+(6 rows)
+
+-- fail, row movement happens only within the partition subtree.
+UPDATE part_b_10_b_20 set b = b - 6 WHERE c > 116 returning *;
+ERROR: new row for relation "part_b_10_b_20" violates partition constraint
+DETAIL: Failing row contains (2, 117, b, 7, 2).
+-- ok, row movement, with subset of rows moved into different partition.
+UPDATE range_parted set b = b - 6 WHERE c > 116 returning a, b + c;
+ a | ?column?
+---+----------
+ a | 204
+ b | 124
+ b | 134
+ b | 136
+(4 rows)
+
+:show_data;
+ partname | a | b | c | d | e
+---------------+---+----+-----+---+---
+ part_a_1_a_10 | a | 1 | 1 | 1 | 1
+ part_a_1_a_10 | a | 4 | 200 | 1 | 1
+ part_b_1_b_10 | b | 7 | 117 | 2 | 2
+ part_b_1_b_10 | b | 9 | 125 | 6 | 6
+ part_d_1_15 | b | 11 | 125 | 9 | 9
+ part_d_1_15 | b | 12 | 116 | 1 | 1
+(6 rows)
+
+-- Common table needed for multiple test scenarios.
+CREATE TABLE mintab(c1 int);
+INSERT into mintab VALUES (120);
+-- update partition key using updatable view.
+CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 FROM mintab) WITH CHECK OPTION;
+-- ok
+UPDATE upview set c = 199 WHERE b = 4;
+-- fail, check option violation
+UPDATE upview set c = 120 WHERE b = 4;
+ERROR: new row violates check option for view "upview"
+DETAIL: Failing row contains (a, 4, 120, 1, 1).
+-- fail, row movement with check option violation
+UPDATE upview set a = 'b', b = 15, c = 120 WHERE b = 4;
+ERROR: new row violates check option for view "upview"
+DETAIL: Failing row contains (b, 15, 120, 1, 1).
+-- ok, row movement, check option passes
+UPDATE upview set a = 'b', b = 15 WHERE b = 4;
+:show_data;
+ partname | a | b | c | d | e
+---------------+---+----+-----+---+---
+ part_a_1_a_10 | a | 1 | 1 | 1 | 1
+ part_b_1_b_10 | b | 7 | 117 | 2 | 2
+ part_b_1_b_10 | b | 9 | 125 | 6 | 6
+ part_d_1_15 | b | 11 | 125 | 9 | 9
+ part_d_1_15 | b | 12 | 116 | 1 | 1
+ part_d_1_15 | b | 15 | 199 | 1 | 1
+(6 rows)
+
+-- cleanup
+DROP VIEW upview;
+-- RETURNING having whole-row vars.
+:init_range_parted;
+UPDATE range_parted set c = 95 WHERE a = 'b' and b > 10 and c > 100 returning (range_parted), *;
+ range_parted | a | b | c | d | e
+---------------+---+----+----+----+---
+ (b,15,95,16,) | b | 15 | 95 | 16 |
+ (b,17,95,19,) | b | 17 | 95 | 19 |
+(2 rows)
+
+:show_data;
+ partname | a | b | c | d | e
+----------------+---+----+-----+----+---
+ part_a_10_a_20 | a | 10 | 200 | 1 |
+ part_a_1_a_10 | a | 1 | 1 | 1 |
+ part_c_1_100 | b | 12 | 96 | 1 |
+ part_c_1_100 | b | 13 | 97 | 2 |
+ part_c_1_100 | b | 15 | 95 | 16 |
+ part_c_1_100 | b | 17 | 95 | 19 |
+(6 rows)
+
+-- Creating default partition for range
+:init_range_parted;
+create table part_def partition of range_parted default;
+\d+ part_def
+ Table "public.part_def"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+-------------------+-----------+----------+---------+----------+--------------+-------------
+ a | text | | | | extended | |
+ b | bigint | | | | plain | |
+ c | numeric | | | | main | |
+ d | integer | | | | plain | |
+ e | character varying | | | | extended | |
+Partition of: range_parted DEFAULT
+Partition constraint: (NOT ((a IS NOT NULL) AND (b IS NOT NULL) AND (((a = 'a'::text) AND (b >= '1'::bigint) AND (b < '10'::bigint)) OR ((a = 'a'::text) AND (b >= '10'::bigint) AND (b < '20'::bigint)) OR ((a = 'b'::text) AND (b >= '1'::bigint) AND (b < '10'::bigint)) OR ((a = 'b'::text) AND (b >= '10'::bigint) AND (b < '20'::bigint)) OR ((a = 'b'::text) AND (b >= '20'::bigint) AND (b < '30'::bigint)))))
+
+insert into range_parted values ('c', 9);
+-- ok
+update part_def set a = 'd' where a = 'c';
+-- fail
+update part_def set a = 'a' where a = 'd';
+ERROR: new row for relation "part_def" violates partition constraint
+DETAIL: Failing row contains (a, 9, null, null, null).
+:show_data;
+ partname | a | b | c | d | e
+----------------+---+----+-----+----+---
+ part_a_10_a_20 | a | 10 | 200 | 1 |
+ part_a_1_a_10 | a | 1 | 1 | 1 |
+ part_c_1_100 | b | 12 | 96 | 1 |
+ part_c_1_100 | b | 13 | 97 | 2 |
+ part_d_15_20 | b | 15 | 105 | 16 |
+ part_d_15_20 | b | 17 | 105 | 19 |
+ part_def | d | 9 | | |
+(7 rows)
+
+-- Update row movement from non-default to default partition.
+-- fail, default partition is not under part_a_10_a_20;
+UPDATE part_a_10_a_20 set a = 'ad' WHERE a = 'a';
+ERROR: new row for relation "part_a_10_a_20" violates partition constraint
+DETAIL: Failing row contains (ad, 10, 200, 1, null).
+-- ok
+UPDATE range_parted set a = 'ad' WHERE a = 'a';
+UPDATE range_parted set a = 'bd' WHERE a = 'b';
+:show_data;
+ partname | a | b | c | d | e
+----------+----+----+-----+----+---
+ part_def | ad | 1 | 1 | 1 |
+ part_def | ad | 10 | 200 | 1 |
+ part_def | bd | 12 | 96 | 1 |
+ part_def | bd | 13 | 97 | 2 |
+ part_def | bd | 15 | 105 | 16 |
+ part_def | bd | 17 | 105 | 19 |
+ part_def | d | 9 | | |
+(7 rows)
+
+-- Update row movement from default to non-default partitions.
+-- ok
+UPDATE range_parted set a = 'a' WHERE a = 'ad';
+UPDATE range_parted set a = 'b' WHERE a = 'bd';
+:show_data;
+ partname | a | b | c | d | e
+----------------+---+----+-----+----+---
+ part_a_10_a_20 | a | 10 | 200 | 1 |
+ part_a_1_a_10 | a | 1 | 1 | 1 |
+ part_c_1_100 | b | 12 | 96 | 1 |
+ part_c_1_100 | b | 13 | 97 | 2 |
+ part_d_15_20 | b | 15 | 105 | 16 |
+ part_d_15_20 | b | 17 | 105 | 19 |
+ part_def | d | 9 | | |
+(7 rows)
+
+-- Cleanup: range_parted no longer needed.
+DROP TABLE range_parted;
+CREATE TABLE list_parted (
+ a text,
+ b int
+) PARTITION BY list (a);
+CREATE TABLE list_part1 PARTITION OF list_parted for VALUES in ('a', 'b');
+CREATE TABLE list_default PARTITION OF list_parted default;
+INSERT into list_part1 VALUES ('a', 1);
+INSERT into list_default VALUES ('d', 10);
+-- fail
+UPDATE list_default set a = 'a' WHERE a = 'd';
+ERROR: new row for relation "list_default" violates partition constraint
+DETAIL: Failing row contains (a, 10).
+-- ok
+UPDATE list_default set a = 'x' WHERE a = 'd';
+DROP TABLE list_parted;
+--------------
+-- Some more update-partition-key test scenarios below. This time use list
+-- partitions.
+--------------
+-- Setup for list partitions
+CREATE TABLE list_parted (a numeric, b int, c int8) PARTITION BY list (a);
+CREATE TABLE sub_parted PARTITION OF list_parted for VALUES in (1) PARTITION BY list (b);
+CREATE TABLE sub_part1(b int, c int8, a numeric);
+alter table sub_part1 set distributed by (a); -- GPDB: distribution policy must match the parent table.
+ALTER TABLE sub_parted ATTACH PARTITION sub_part1 for VALUES in (1);
+CREATE TABLE sub_part2(b int, c int8, a numeric);
+alter table sub_part2 set distributed by (a); -- GPDB: distribution policy must match the parent table.
+ALTER TABLE sub_parted ATTACH PARTITION sub_part2 for VALUES in (2);
+CREATE TABLE list_part1(a numeric, b int, c int8);
+ALTER TABLE list_parted ATTACH PARTITION list_part1 for VALUES in (2,3);
+INSERT into list_parted VALUES (2,5,50);
+INSERT into list_parted VALUES (3,6,60);
+INSERT into sub_parted VALUES (1,1,60);
+INSERT into sub_parted VALUES (1,2,10);
+-- Test partition constraint violation when intermediate ancestor is used and
+-- constraint is inherited from upper root.
+UPDATE sub_parted set a = 2 WHERE c = 10;
+ERROR: new row for relation "sub_parted" violates partition constraint
+DETAIL: Failing row contains (2, 2, 10).
+-- Test update-partition-key, where the unpruned partitions do not have their
+-- partition keys updated.
+SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1;
+ tableoid | a | b | c
+------------+---+---+----
+ list_part1 | 2 | 5 | 50
+(1 row)
+
+UPDATE list_parted set b = c + a WHERE a = 2;
+SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1;
+ tableoid | a | b | c
+------------+---+----+----
+ list_part1 | 2 | 52 | 50
+(1 row)
+
+-- Cleanup: list_parted no longer needed.
+DROP TABLE list_parted;
+-- create custom operator class and hash function, for the same reason
+-- explained in alter_table.sql
+create or replace function dummy_hashint4(a int4, seed int8) returns int8 as
+$$ begin return (a + seed); end; $$ language 'plpgsql' immutable;
+create operator class custom_opclass for type int4 using hash as
+operator 1 = , function 2 dummy_hashint4(int4, int8);
+create table hash_parted (
+ a int,
+ b int
+) partition by hash (a custom_opclass, b custom_opclass);
+create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1);
+create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2);
+create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0);
+create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4);
+insert into hpart1 values (1, 1);
+insert into hpart2 values (2, 5);
+insert into hpart4 values (3, 4);
+-- fail
+update hpart1 set a = 3, b=4 where a = 1;
+ERROR: new row for relation "hpart1" violates partition constraint
+DETAIL: Failing row contains (3, 4).
+-- ok, row movement
+update hash_parted set b = b - 1 where b = 1;
+-- ok
+update hash_parted set b = b + 8 where b = 1;
+-- cleanup
+drop table hash_parted;
+drop operator class custom_opclass using hash;
+drop function dummy_hashint4(a int4, seed int8);
diff --git a/contrib/pax_storage/src/data/expected/update_gp.out b/contrib/pax_storage/expected/update_gp.out
similarity index 77%
rename from contrib/pax_storage/src/data/expected/update_gp.out
rename to contrib/pax_storage/expected/update_gp.out
index ba6ca2bc930..9a06ce92d04 100644
--- a/contrib/pax_storage/src/data/expected/update_gp.out
+++ b/contrib/pax_storage/expected/update_gp.out
@@ -1,9 +1,9 @@
+set default_table_access_method = pax;
-- Test DELETE and UPDATE on an inherited table.
-- The special aspect of this table is that the inherited table has
-- a different distribution key. 'p' table's distribution key matches
-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion
-- node correctly for p2.
-set default_table_access_method = 'pax';
create table todelete (a int) distributed by (a);
create table parent (a int, b int, c int) distributed by (a);
create table child (a int, b int, c int) inherits (parent) distributed by (b);
@@ -19,14 +19,14 @@ update parent set c=c+100 from todelete where parent.a = todelete.a;
select * from parent;
a | b | c
----+----+-----
+ 1 | 1 | 1
5 | 5 | 105
9 | 9 | 9
- 10 | 10 | 10
- 6 | 6 | 106
+ 7 | 7 | 107
2 | 2 | 2
8 | 8 | 8
- 7 | 7 | 107
- 1 | 1 | 1
+ 10 | 10 | 10
+ 6 | 6 | 106
(8 rows)
drop table todelete;
@@ -49,13 +49,13 @@ update target set b=target.b+100 where c = 3 and a in (select b from todelete);
select * from target;
a | b | c
---+-----+---
- 5 | 0 | 1
- 5 | 100 | 3
- 1 | 0 | 1
2 | 0 | 1
- 3 | 0 | 1
4 | 0 | 1
4 | 0 | 3
+ 1 | 0 | 1
+ 3 | 0 | 1
+ 5 | 0 | 1
+ 5 | 100 | 3
(7 rows)
-- Also test an update with a qual that doesn't match any partition. The
@@ -76,12 +76,10 @@ create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
insert into base_tbl select g, g from generate_series(1, 5) g;
--- start_ignore
explain (costs off) update base_tbl set a=a+1;
-ERROR: can't split update for inherit table: base_tbl (preptlist.c:138)
--- end_ignore
+ERROR: can't split update for inherit table: base_tbl
update base_tbl set a = 5;
-ERROR: can't split update for inherit table: base_tbl (preptlist.c:138)
+ERROR: can't split update for inherit table: base_tbl
--
-- Explicit Distribution motion must be added if any of the child nodes
-- contains any motion excluding the motions in initplans.
@@ -96,8 +94,8 @@ CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)
INSERT INTO keo3 VALUES ('1', '1');
CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo4 VALUES ('1', '1');
+ANALYZE keo1, keo2, keo3, keo4;
-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA)
--- start_ignore
EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
@@ -107,41 +105,37 @@ EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
(SELECT min (keo4.keo_para_budget_date) FROM keo4)))
) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
Update on keo1
- InitPlan 3 (returns $2) (slice4)
- -> Finalize Aggregate
- InitPlan 2 (returns $1) (slice6)
- -> Gather Motion 3:1 (slice7; segments: 3)
- InitPlan 1 (returns $0) (slice8)
- -> Finalize Aggregate
- -> Gather Motion 3:1 (slice9; segments: 3)
- -> Partial Aggregate
- -> Seq Scan on keo4
+ InitPlan 3 (returns $2) (slice3)
+ -> Aggregate
+ InitPlan 2 (returns $1) (slice5)
+ -> Gather Motion 3:1 (slice6; segments: 3)
+ InitPlan 1 (returns $0) (slice7)
+ -> Aggregate
+ -> Gather Motion 3:1 (slice8; segments: 3)
+ -> Seq Scan on keo4
-> Seq Scan on keo4 keo4_1
Filter: ((keo_para_budget_date)::text = $0)
- -> Gather Motion 3:1 (slice5; segments: 3)
- -> Partial Aggregate
- -> Seq Scan on keo3
- Filter: ((bky_per)::text = ($1)::text)
- -> Explicit Redistribute Motion 3:3 (slice1; segments: 3)
+ -> Gather Motion 3:1 (slice4; segments: 3)
+ -> Seq Scan on keo3
+ Filter: ((bky_per)::text = ($1)::text)
+ -> Hash Join
+ Hash Cond: ((a.user_vie_project_code_pk)::text = (b.projects_pk)::text)
-> Hash Join
- Hash Cond: ((b.projects_pk)::text = (a.user_vie_project_code_pk)::text)
- -> Seq Scan on keo2 b
+ Hash Cond: ((a.user_vie_project_code_pk)::text = (keo1.user_vie_project_code_pk)::text)
+ -> Broadcast Motion 3:3 (slice1; segments: 3)
+ -> Seq Scan on keo1 a
+ Filter: ((user_vie_fiscal_year_period_sk)::text = $2)
-> Hash
- -> Broadcast Motion 3:3 (slice2; segments: 3)
- -> Hash Join
- Hash Cond: ((keo1.user_vie_project_code_pk)::text = (a.user_vie_project_code_pk)::text)
- -> Seq Scan on keo1
- -> Hash
- -> Broadcast Motion 3:3 (slice3; segments: 3)
- -> Seq Scan on keo1 a
- Filter: ((user_vie_fiscal_year_period_sk)::text = $2)
+ -> Seq Scan on keo1
+ -> Hash
+ -> Broadcast Motion 3:3 (slice2; segments: 3)
+ -> Seq Scan on keo2 b
Optimizer: Postgres query optimizer
-(30 rows)
+(27 rows)
--- end_ignore
UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
@@ -160,7 +154,6 @@ SELECT user_vie_act_cntr_marg_cum FROM keo1;
-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA)
CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x);
INSERT INTO keo5 VALUES (1,1);
--- start_ignore
EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
QUERY PLAN
-------------------------------------------------------
@@ -181,7 +174,6 @@ EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS
Optimizer: Postgres query optimizer
(15 rows)
--- end_ignore
DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
SELECT x FROM keo5;
x
@@ -194,34 +186,48 @@ DROP TABLE keo2;
DROP TABLE keo3;
DROP TABLE keo4;
DROP TABLE keo5;
--- start_ignore
--- -- text types. We should support the following updates.
--- --
--- CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE IF EXISTS update_distr_key;
--- CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
--- INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
--- UPDATE update_distr_key SET a = 5 WHERE b = 10;
--- SELECT * from update_distr_key;
--- DROP TABLE update_distr_key;
--- end_ignore
+--
+-- text types. We should support the following updates.
+--
+CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE IF EXISTS update_distr_key;
+NOTICE: table "update_distr_key" does not exist, skipping
+CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
+UPDATE update_distr_key SET a = 5 WHERE b = 10;
+SELECT * from update_distr_key;
+ a | b
+---+----
+ 0 | 0
+ 2 | 20
+ 4 | 40
+ 6 | 60
+ 8 | 80
+ 5 | 10
+ 3 | 30
+ 5 | 50
+ 7 | 70
+ 9 | 90
+(10 rows)
+
+DROP TABLE update_distr_key;
-- below cases is to test multi-hash-cols
CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3);
CREATE TABLE tab5(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3, c4, c5);
@@ -233,14 +239,14 @@ SELECT gp_segment_id, * FROM tab3;
---------------+----+----+----+----+----
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
- 2 | 1 | 1 | 1 | 1 | 1
- 2 | 2 | 2 | 2 | 2 | 2
- 2 | 6 | 6 | 6 | 6 | 6
- 2 | 7 | 7 | 7 | 7 | 7
1 | 3 | 3 | 3 | 3 | 3
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
+ 2 | 1 | 1 | 1 | 1 | 1
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
(10 rows)
UPDATE tab3 set c1 = 9 where c4 = 1;
@@ -267,66 +273,69 @@ SELECT gp_segment_id, * FROM tab3;
1 | 4 | 4 | 4 | 4 | 4
1 | 9 | 9 | 9 | 9 | 9
1 | 10 | 10 | 10 | 10 | 10
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 5 | 6 | 1 | 1 | 1
- 0 | 5 | 5 | 5 | 5 | 5
- 0 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab3 set (c1,c2,c3) = (3,2,1) where c4 = 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
- 0 | 5 | 5 | 5 | 5 | 5
- 0 | 8 | 8 | 8 | 8 | 8
- 1 | 3 | 3 | 3 | 3 | 3
- 1 | 4 | 4 | 4 | 4 | 4
- 1 | 9 | 9 | 9 | 9 | 9
- 1 | 10 | 10 | 10 | 10 | 10
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 3 | 2 | 1 | 1 | 1
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
(10 rows)
UPDATE tab3 set c1 = 11 where c2 = 10 and c2 < 1;
SELECT gp_segment_id, * FROM tab3;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
- 1 | 3 | 3 | 3 | 3 | 3
- 1 | 4 | 4 | 4 | 4 | 4
- 1 | 9 | 9 | 9 | 9 | 9
- 1 | 10 | 10 | 10 | 10 | 10
0 | 5 | 5 | 5 | 5 | 5
0 | 8 | 8 | 8 | 8 | 8
2 | 2 | 2 | 2 | 2 | 2
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 3 | 2 | 1 | 1 | 1
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
(10 rows)
-- test tab5
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
- 0 | 4 | 4 | 4 | 4 | 4
- 0 | 9 | 9 | 9 | 9 | 9
- 0 | 10 | 10 | 10 | 10 | 10
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
1 | 1 | 1 | 1 | 1 | 1
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
- 2 | 6 | 6 | 6 | 6 | 6
- 2 | 7 | 7 | 7 | 7 | 7
- 2 | 8 | 8 | 8 | 8 | 8
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
(10 rows)
UPDATE tab5 set c1 = 1000 where c4 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+------+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
@@ -334,9 +343,6 @@ SELECT gp_segment_id, * FROM tab5;
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
- 0 | 4 | 4 | 4 | 4 | 4
- 0 | 9 | 9 | 9 | 9 | 9
- 0 | 10 | 10 | 10 | 10 | 10
(10 rows)
UPDATE tab5 set (c1,c2) = (9,10) where c4 = 1;
@@ -359,12 +365,12 @@ UPDATE tab5 set (c1,c2,c4) = (5,8,6) where c4 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
- 2 | 6 | 6 | 6 | 6 | 6
- 2 | 7 | 7 | 7 | 7 | 7
- 2 | 8 | 8 | 8 | 8 | 8
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
0 | 4 | 4 | 4 | 4 | 4
0 | 9 | 9 | 9 | 9 | 9
0 | 10 | 10 | 10 | 10 | 10
@@ -375,6 +381,9 @@ UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
@@ -382,15 +391,15 @@ SELECT gp_segment_id, * FROM tab5;
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
- 0 | 4 | 4 | 4 | 4 | 4
- 0 | 9 | 9 | 9 | 9 | 9
- 0 | 10 | 10 | 10 | 10 | 10
(10 rows)
UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1;
SELECT gp_segment_id, * FROM tab5;
gp_segment_id | c1 | c2 | c3 | c4 | c5
---------------+----+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
1 | 2 | 2 | 2 | 2 | 2
1 | 3 | 3 | 3 | 3 | 3
1 | 5 | 5 | 5 | 5 | 5
@@ -398,12 +407,8 @@ SELECT gp_segment_id, * FROM tab5;
2 | 6 | 6 | 6 | 6 | 6
2 | 7 | 7 | 7 | 7 | 7
2 | 8 | 8 | 8 | 8 | 8
- 0 | 4 | 4 | 4 | 4 | 4
- 0 | 9 | 9 | 9 | 9 | 9
- 0 | 10 | 10 | 10 | 10 | 10
(10 rows)
--- start_ignore
EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1;
QUERY PLAN
---------------------------------------------------------------
@@ -414,7 +419,6 @@ EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1;
Optimizer: Postgres query optimizer
(5 rows)
--- end_ignore
-- clean up
drop table tab3;
drop table tab5;
@@ -430,6 +434,12 @@ drop table if exists update_ao_table;
NOTICE: table "update_ao_table" does not exist, skipping
drop table if exists update_aoco_table;
NOTICE: table "update_aoco_table" does not exist, skipping
+drop table if exists p_1;
+NOTICE: table "p_1" does not exist, skipping
+drop table if exists p_2;
+NOTICE: table "p_2" does not exist, skipping
+drop table if exists subpartition_1;
+NOTICE: table "subpartition_1" does not exist, skipping
-- end_ignore
-- Update normal table distribution key
create table update_dist(a int) distributed by (a);
@@ -449,43 +459,43 @@ insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
a | b
---+----
- 1 | 2
- 5 | 10
- 2 | 4
3 | 6
4 | 8
+ 5 | 10
+ 1 | 2
+ 2 | 4
(5 rows)
select * from s;
a | b
---+----
+ 1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
- 1 | 2
(5 rows)
update r set a = r.a + 1 from s where r.a = s.a;
select * from r;
a | b
---+----
- 3 | 4
4 | 6
- 2 | 2
5 | 8
6 | 10
+ 3 | 4
+ 2 | 2
(5 rows)
update r set a = r.a + 1 where a in (select a from s);
select * from r;
a | b
---+----
- 4 | 4
- 3 | 2
6 | 10
5 | 6
6 | 8
+ 4 | 4
+ 3 | 2
(5 rows)
-- Update redistribution
@@ -496,32 +506,32 @@ insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
select * from r;
a | b
---+---
+ 3 | 3
+ 4 | 4
5 | 5
1 | 1
2 | 2
- 3 | 3
- 4 | 4
(5 rows)
select * from s;
a | b
---+----
- 1 | 2
- 5 | 10
- 2 | 4
3 | 6
4 | 8
+ 5 | 10
+ 1 | 2
+ 2 | 4
(5 rows)
update r set a = r.a + 1 from s where r.b = s.b;
select * from r;
a | b
---+---
- 3 | 3
- 3 | 2
1 | 1
+ 3 | 3
5 | 5
5 | 4
+ 3 | 2
(5 rows)
update r set a = r.a + 1 where b in (select b from s);
@@ -529,9 +539,9 @@ select * from r;
a | b
---+---
1 | 1
+ 3 | 3
5 | 5
6 | 4
- 3 | 3
4 | 2
(5 rows)
@@ -553,48 +563,82 @@ select * from r;
select * from s;
a | b
---+---
- 2 | 2
3 | 3
4 | 4
- 1 | 1
5 | 5
+ 1 | 1
+ 2 | 2
(5 rows)
update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b);
select * from s;
a | b
---+---
- 5 | 5
- 5 | 4
1 | 1
3 | 3
+ 5 | 5
3 | 2
+ 5 | 4
(5 rows)
--- start_ignore
-- Update ao table distribution key
--- create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
--- insert into update_ao_table select g, g from generate_series(1, 5) g;
--- select * from update_ao_table;
--- update update_ao_table set a = a + 1 where b = 3;
--- select * from update_ao_table;
+create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
+insert into update_ao_table select g, g from generate_series(1, 5) g;
+select * from update_ao_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+update update_ao_table set a = a + 1 where b = 3;
+select * from update_ao_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 4 | 4
+ 5 | 5
+ 4 | 3
+(5 rows)
+
-- Update aoco table distribution key
--- create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
--- insert into update_aoco_table select g,g from generate_series(1, 5) g;
--- select * from update_aoco_table;
--- update update_aoco_table set a = a + 1 where b = 3;
--- select * from update_aoco_table;
--- end_ignore
+create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
+insert into update_aoco_table select g,g from generate_series(1, 5) g;
+select * from update_aoco_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+update update_aoco_table set a = a + 1 where b = 3;
+select * from update_aoco_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 4 | 4
+ 5 | 5
+ 4 | 3
+(5 rows)
+
-- Update prepare
delete from s;
insert into s select generate_series(1, 5), generate_series(1, 5);
select * from r;
a | b
---+----
+ 1 | 2
2 | 4
3 | 6
4 | 8
- 1 | 2
5 | 10
(5 rows)
@@ -602,10 +646,10 @@ select * from s;
a | b
---+---
1 | 1
- 5 | 5
2 | 2
3 | 3
4 | 4
+ 5 | 5
(5 rows)
prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b);
@@ -613,27 +657,26 @@ execute update_s(10);
select * from s;
a | b
----+---
- 3 | 3
1 | 1
- 12 | 2
- 5 | 5
14 | 4
+ 3 | 3
+ 5 | 5
+ 12 | 2
(5 rows)
-- Confirm that a split update is not created for a table excluded by
-- constraints in the planner.
create table nosplitupdate (a int) distributed by (a);
--- start_ignore
explain update nosplitupdate set a=0 where a=1 and a<1;
QUERY PLAN
-----------------------------------------------------------
- Update on nosplitupdate (cost=0.00..0.01 rows=0 width=0)
- -> Result (cost=0.00..0.00 rows=0 width=46)
+ Update on nosplitupdate (cost=0.00..0.01 rows=1 width=0)
+ -> Result (cost=0.00..0.01 rows=1 width=0)
One-Time Filter: false
+ Planning time: 0.271 ms
Optimizer: Postgres query optimizer
-(4 rows)
+(5 rows)
--- end_ignore
-- test split-update when split-node's flow is entry
create table tsplit_entry (c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Cloudberry Database data distribution key for this table.
@@ -641,53 +684,56 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
insert into tsplit_entry values (1), (2);
analyze tsplit_entry;
-- start_ignore
+-- gp_segment_configuration scan is different when using different FTS
explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
- Update on tsplit_entry (cost=10000000001.01..10000000002.17 rows=0 width=0)
- -> Explicit Redistribute Motion 1:3 (slice1) (cost=10000000001.01..10000000002.17 rows=2 width=74)
- -> Split (cost=10000000001.01..10000000002.08 rows=7 width=74)
- -> Nested Loop (cost=10000000001.01..10000000002.08 rows=3 width=74)
- -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=38)
- -> Seq Scan on tsplit_entry (cost=0.00..1.01 rows=1 width=38)
- -> Materialize (cost=1.01..1.04 rows=1 width=40)
- -> Subquery Scan on s (cost=1.01..1.03 rows=1 width=40)
- -> Aggregate (cost=1.01..1.02 rows=1 width=8)
- -> Seq Scan on gp_segment_configuration (cost=0.00..1.01 rows=1 width=0)
+ Update on tsplit_entry (cost=10000000001.00..10000000003.18 rows=3 width=54)
+ -> Explicit Redistribute Motion 1:3 (slice) (cost=10000000001.00..10000000003.18 rows=7 width=54)
+ -> Split (cost=10000000001.00..10000000003.18 rows=7 width=54)
+ -> Nested Loop (cost=10000000001.00..10000000003.12 rows=4 width=54)
+ -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..2.06 rows=2 width=14)
+ -> Seq Scan on tsplit_entry (cost=0.00..2.02 rows=1 width=14)
+ -> Materialize (cost=1.00..1.03 rows=1 width=40)
+ -> Subquery Scan on s (cost=1.00..1.02 rows=1 width=40)
+ -> Aggregate (cost=1.00..1.01 rows=1 width=8)
+ -> Function Scan on gp_get_segment_configuration (cost=0.00..1.00 rows=1 width=0)
Optimizer: Postgres query optimizer
(11 rows)
-
-- end_ignore
update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
--- start_ignore
--- CREATE TABLE update_gp_foo (
--- a_dist int,
--- b int,
--- c_part int,
--- d int
--- )
--- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
--- (
--- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
--- );
--- CREATE TABLE update_gp_foo1 (
--- a_dist int,
--- b int,
--- c_part int,
--- d int
--- )
--- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
--- (
--- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
--- );
--- INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
--- INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
--- UPDATE update_gp_foo
--- SET b = update_gp_foo.c_part,
--- d = update_gp_foo1.a_dist
--- FROM update_gp_foo1;
--- SELECT * from update_gp_foo;
--- end_ignore
+CREATE TABLE update_gp_foo (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
+ );
+CREATE TABLE update_gp_foo1 (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
+ );
+INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
+INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
+UPDATE update_gp_foo
+SET b = update_gp_foo.c_part,
+ d = update_gp_foo1.a_dist
+FROM update_gp_foo1;
+SELECT * from update_gp_foo;
+ a_dist | b | c_part | d
+--------+---+--------+----
+ 12 | 1 | 1 | 12
+(1 row)
+
-- Test insert on conflict do update
-- Insert on conflict do update is an insert statement but might
-- invoke ExecUpdate on segments, but updating distkeys of a table
@@ -695,31 +741,24 @@ update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_confi
-- planning, if a `insert on conflict do update` statement set the
-- dist keys of the table, it will raise an error.
-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444
--- start_ignore
create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a);
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
-ERROR: not supported on pax relations: IndexBuildRangeScan
-- the following statement should error out because the on conflict update want to
-- modify the tuple's distkey which might lead to wrong data distribution
insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
ERROR: modification of distribution columns in OnConflictUpdate is not supported
drop index uidx_t_insert_on_conflict_update_distkey;
-ERROR: index "uidx_t_insert_on_conflict_update_distkey" does not exist
drop table t_insert_on_conflict_update_distkey;
-- randomly distributed table cannot add unique constrain, so next we test replicated table
create table t_insert_on_conflict_update_distkey(a int, b int) distributed replicated;
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
-ERROR: not supported on pax relations: IndexBuildRangeScan
-- the following statement should succeed because replicated table does not contain distkey
insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
-ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
--- end_ignore
+ERROR: not implemented yet on pax relations: TupleInsertSpeculative
-- Some tests on a partitioned table.
CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a);
CREATE TABLE update_gp_rangep_1_to_10 PARTITION OF update_gp_rangep FOR VALUES FROM (1) TO (10);
-NOTICE: table has parent, setting distribution columns to match parent table
CREATE TABLE update_gp_rangep_10_to_20 PARTITION OF update_gp_rangep FOR VALUES FROM (10) TO (20);
-NOTICE: table has parent, setting distribution columns to match parent table
INSERT INTO update_gp_rangep SELECT g, g, g FROM generate_series(1, 4) g;
-- Simple case: Same partition, same node.
UPDATE update_gp_rangep SET a = 9 WHERE a = 1;
@@ -729,24 +768,23 @@ UPDATE update_gp_rangep SET b = 1 WHERE a = 2;
UPDATE update_gp_rangep SET a = 10 WHERE a = 3;
-- Move row to different partition and also change distribution key
UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4;
--- start_ignore
SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a;
tableoid | a | b | orig_a
---------------------------+----+---+--------
update_gp_rangep_1_to_10 | 9 | 1 | 1
update_gp_rangep_1_to_10 | 2 | 1 | 2
+ update_gp_rangep_10_to_20 | 10 | 3 | 3
update_gp_rangep_10_to_20 | 11 | 1 | 4
-(3 rows)
+(4 rows)
--- end_ignore
-- Also do a lookup with specific distribution key. If the rows were not
-- correctly moved across segments, this would fail to find them, assuming
-- that direct dispatch is effective.
SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1;
tableoid | a | b | orig_a
---------------------------+----+---+--------
- update_gp_rangep_1_to_10 | 9 | 1 | 1
update_gp_rangep_1_to_10 | 2 | 1 | 2
+ update_gp_rangep_1_to_10 | 9 | 1 | 1
update_gp_rangep_10_to_20 | 11 | 1 | 4
(3 rows)
@@ -755,9 +793,7 @@ drop table r;
drop table s;
drop table update_dist;
drop table update_ao_table;
-ERROR: table "update_ao_table" does not exist
drop table update_aoco_table;
-ERROR: table "update_aoco_table" does not exist
drop table nosplitupdate;
drop table tsplit_entry;
-- end_ignore
diff --git a/contrib/pax_storage/expected/update_gp_optimizer.out b/contrib/pax_storage/expected/update_gp_optimizer.out
new file mode 100644
index 00000000000..d335f4c3f61
--- /dev/null
+++ b/contrib/pax_storage/expected/update_gp_optimizer.out
@@ -0,0 +1,793 @@
+set default_table_access_method = pax;
+-- Test DELETE and UPDATE on an inherited table.
+-- The special aspect of this table is that the inherited table has
+-- a different distribution key. 'p' table's distribution key matches
+-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion
+-- node correctly for p2.
+create table todelete (a int) distributed by (a);
+create table parent (a int, b int, c int) distributed by (a);
+create table child (a int, b int, c int) inherits (parent) distributed by (b);
+NOTICE: merging column "a" with inherited definition
+NOTICE: merging column "b" with inherited definition
+NOTICE: merging column "c" with inherited definition
+insert into parent select g, g, g from generate_series(1,5) g;
+insert into child select g, g, g from generate_series(6,10) g;
+insert into todelete select generate_series(3,4);
+delete from parent using todelete where parent.a = todelete.a;
+insert into todelete select generate_series(5,7);
+update parent set c=c+100 from todelete where parent.a = todelete.a;
+select * from parent;
+ a | b | c
+----+----+-----
+ 1 | 1 | 1
+ 5 | 5 | 105
+ 9 | 9 | 9
+ 7 | 7 | 107
+ 2 | 2 | 2
+ 8 | 8 | 8
+ 10 | 10 | 10
+ 6 | 6 | 106
+(8 rows)
+
+drop table todelete;
+drop table child;
+drop table parent;
+-- This is similar to the above, but with a partitioned table (which is
+-- implemented by inheritance) rather than an explicitly inherited table.
+-- The scans on some of the partitions degenerate into Result nodes with
+-- False one-time filter, which don't need a Motion node.
+create table todelete (a int, b int) distributed by (a);
+create table target (a int, b int, c int)
+ distributed by (a)
+ partition by range (c) (start(1) end(5) every(1), default partition extra);
+insert into todelete select g, g % 4 from generate_series(1, 10) g;
+insert into target select g, 0, 3 from generate_series(1, 5) g;
+insert into target select g, 0, 1 from generate_series(1, 5) g;
+delete from target where c = 3 and a in (select b from todelete);
+insert into todelete values (1, 5);
+update target set b=target.b+100 where c = 3 and a in (select b from todelete);
+select * from target;
+ a | b | c
+---+-----+---
+ 2 | 0 | 1
+ 4 | 0 | 1
+ 4 | 0 | 3
+ 1 | 0 | 1
+ 3 | 0 | 1
+ 5 | 0 | 1
+ 5 | 100 | 3
+(7 rows)
+
+-- Also test an update with a qual that doesn't match any partition. The
+-- Append degenerates into a dummy Result with false One-Time Filter.
+alter table target drop default partition;
+update target set b = 10 where c = 10;
+drop table todelete;
+drop table target;
+--
+-- Test updated on inheritance parent table, where some child tables need a
+-- Split Update, but not all.
+--
+create table base_tbl (a int4, b int4) distributed by (a);
+create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a);
+NOTICE: merging column "a" with inherited definition
+NOTICE: merging column "b" with inherited definition
+create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b);
+NOTICE: merging column "a" with inherited definition
+NOTICE: merging column "b" with inherited definition
+insert into base_tbl select g, g from generate_series(1, 5) g;
+explain (costs off) update base_tbl set a=a+1;
+ERROR: can't split update for inherit table: base_tbl
+update base_tbl set a = 5;
+ERROR: can't split update for inherit table: base_tbl
+--
+-- Explicit Distribution motion must be added if any of the child nodes
+-- contains any motion excluding the motions in initplans.
+-- These test cases and expectation are applicable for GPDB planner not for ORCA.
+--
+SET gp_autostats_mode = NONE;
+CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY;
+INSERT INTO keo1 VALUES ('1', '1', '1');
+CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY;
+INSERT INTO keo2 VALUES ('1');
+CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per);
+INSERT INTO keo3 VALUES ('1', '1');
+CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY;
+INSERT INTO keo4 VALUES ('1', '1');
+ANALYZE keo1, keo2, keo3, keo4;
+-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA)
+EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
+ ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
+ ON b.projects_pk=a.user_vie_project_code_pk
+ WHERE a.user_vie_fiscal_year_period_sk =
+ (SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
+ (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
+ (SELECT min (keo4.keo_para_budget_date) FROM keo4)))
+ ) t1
+WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Update on keo1
+ InitPlan 3 (returns $2) (slice3)
+ -> Aggregate
+ InitPlan 2 (returns $1) (slice5)
+ -> Gather Motion 3:1 (slice6; segments: 3)
+ InitPlan 1 (returns $0) (slice7)
+ -> Aggregate
+ -> Gather Motion 3:1 (slice8; segments: 3)
+ -> Seq Scan on keo4
+ -> Seq Scan on keo4 keo4_1
+ Filter: ((keo_para_budget_date)::text = $0)
+ -> Gather Motion 3:1 (slice4; segments: 3)
+ -> Seq Scan on keo3
+ Filter: ((bky_per)::text = ($1)::text)
+ -> Hash Join
+ Hash Cond: ((a.user_vie_project_code_pk)::text = (b.projects_pk)::text)
+ -> Hash Join
+ Hash Cond: ((a.user_vie_project_code_pk)::text = (keo1.user_vie_project_code_pk)::text)
+ -> Broadcast Motion 3:3 (slice1; segments: 3)
+ -> Seq Scan on keo1 a
+ Filter: ((user_vie_fiscal_year_period_sk)::text = $2)
+ -> Hash
+ -> Seq Scan on keo1
+ -> Hash
+ -> Broadcast Motion 3:3 (slice2; segments: 3)
+ -> Seq Scan on keo2 b
+ Optimizer: Postgres query optimizer
+(27 rows)
+
+UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
+ ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
+ ON b.projects_pk=a.user_vie_project_code_pk
+ WHERE a.user_vie_fiscal_year_period_sk =
+ (SELECT MAX (sky_per) FROM keo3 WHERE bky_per =
+ (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date =
+ (SELECT min (keo4.keo_para_budget_date) FROM keo4)))
+ ) t1
+WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
+SELECT user_vie_act_cntr_marg_cum FROM keo1;
+ user_vie_act_cntr_marg_cum
+----------------------------
+ 234.682
+(1 row)
+
+-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA)
+CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x);
+INSERT INTO keo5 VALUES (1,1);
+EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
+ QUERY PLAN
+-------------------------------------------------------
+ Delete on keo5
+ InitPlan 1 (returns $0) (slice1)
+ -> Gather Motion 3:1 (slice2; segments: 3)
+ -> Seq Scan on keo5 keo5_2
+ Filter: (x < 2)
+ -> Result
+ One-Time Filter: $0
+ -> Hash Join
+ Hash Cond: (keo5.x = keo5_1.x)
+ -> Seq Scan on keo5
+ -> Hash
+ -> HashAggregate
+ Group Key: keo5_1.x
+ -> Seq Scan on keo5 keo5_1
+ Optimizer: Postgres query optimizer
+(15 rows)
+
+DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
+SELECT x FROM keo5;
+ x
+---
+(0 rows)
+
+RESET gp_autostats_mode;
+DROP TABLE keo1;
+DROP TABLE keo2;
+DROP TABLE keo3;
+DROP TABLE keo4;
+DROP TABLE keo5;
+--
+-- text types. We should support the following updates.
+--
+CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
+CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+DROP TABLE IF EXISTS update_distr_key;
+NOTICE: table "update_distr_key" does not exist, skipping
+CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
+UPDATE update_distr_key SET a = 5 WHERE b = 10;
+SELECT * from update_distr_key;
+ a | b
+---+----
+ 0 | 0
+ 2 | 20
+ 4 | 40
+ 6 | 60
+ 8 | 80
+ 5 | 10
+ 3 | 30
+ 5 | 50
+ 7 | 70
+ 9 | 90
+(10 rows)
+
+DROP TABLE update_distr_key;
+-- below cases is to test multi-hash-cols
+CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3);
+CREATE TABLE tab5(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3, c4, c5);
+INSERT INTO tab3 SELECT i, i, i, i, i FROM generate_series(1, 10)i;
+INSERT INTO tab5 SELECT i, i, i, i, i FROM generate_series(1, 10)i;
+-- test tab3
+SELECT gp_segment_id, * FROM tab3;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+ 2 | 1 | 1 | 1 | 1 | 1
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+(10 rows)
+
+UPDATE tab3 set c1 = 9 where c4 = 1;
+SELECT gp_segment_id, * FROM tab3;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
+ 0 | 9 | 1 | 1 | 1 | 1
+(10 rows)
+
+UPDATE tab3 set (c1,c2) = (5,6) where c4 = 1;
+SELECT gp_segment_id, * FROM tab3;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 5 | 6 | 1 | 1 | 1
+(10 rows)
+
+UPDATE tab3 set (c1,c2,c3) = (3,2,1) where c4 = 1;
+SELECT gp_segment_id, * FROM tab3;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 3 | 2 | 1 | 1 | 1
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
+(10 rows)
+
+UPDATE tab3 set c1 = 11 where c2 = 10 and c2 < 1;
+SELECT gp_segment_id, * FROM tab3;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 0 | 5 | 5 | 5 | 5 | 5
+ 0 | 8 | 8 | 8 | 8 | 8
+ 2 | 2 | 2 | 2 | 2 | 2
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 3 | 2 | 1 | 1 | 1
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 4 | 4 | 4 | 4 | 4
+ 1 | 9 | 9 | 9 | 9 | 9
+ 1 | 10 | 10 | 10 | 10 | 10
+(10 rows)
+
+-- test tab5
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+ 1 | 1 | 1 | 1 | 1 | 1
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+(10 rows)
+
+UPDATE tab5 set c1 = 1000 where c4 = 1;
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+------+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 1 | 1000 | 1 | 1 | 1 | 1
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+(10 rows)
+
+UPDATE tab5 set (c1,c2) = (9,10) where c4 = 1;
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+ 0 | 9 | 10 | 1 | 1 | 1
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+(10 rows)
+
+UPDATE tab5 set (c1,c2,c4) = (5,8,6) where c4 = 1;
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+ 0 | 5 | 8 | 1 | 6 | 1
+(10 rows)
+
+UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1;
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 1 | 1 | 2 | 3 | 0 | 6
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+(10 rows)
+
+UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1;
+SELECT gp_segment_id, * FROM tab5;
+ gp_segment_id | c1 | c2 | c3 | c4 | c5
+---------------+----+----+----+----+----
+ 0 | 4 | 4 | 4 | 4 | 4
+ 0 | 9 | 9 | 9 | 9 | 9
+ 0 | 10 | 10 | 10 | 10 | 10
+ 1 | 2 | 2 | 2 | 2 | 2
+ 1 | 3 | 3 | 3 | 3 | 3
+ 1 | 5 | 5 | 5 | 5 | 5
+ 1 | 1 | 2 | 3 | 0 | 6
+ 2 | 6 | 6 | 6 | 6 | 6
+ 2 | 7 | 7 | 7 | 7 | 7
+ 2 | 8 | 8 | 8 | 8 | 8
+(10 rows)
+
+EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Update on tab3
+ -> Explicit Redistribute Motion 3:3 (slice1; segments: 3)
+ -> Split
+ -> Seq Scan on tab3
+ Optimizer: Postgres query optimizer
+(5 rows)
+
+-- clean up
+drop table tab3;
+drop table tab5;
+-- Update distribution key
+-- start_ignore
+drop table if exists r;
+NOTICE: table "r" does not exist, skipping
+drop table if exists s;
+NOTICE: table "s" does not exist, skipping
+drop table if exists update_dist;
+NOTICE: table "update_dist" does not exist, skipping
+drop table if exists update_ao_table;
+NOTICE: table "update_ao_table" does not exist, skipping
+drop table if exists update_aoco_table;
+NOTICE: table "update_aoco_table" does not exist, skipping
+-- end_ignore
+-- Update normal table distribution key
+create table update_dist(a int) distributed by (a);
+insert into update_dist values(1);
+update update_dist set a=0 where a=1;
+select * from update_dist;
+ a
+---
+ 0
+(1 row)
+
+-- Update distribution key with join
+create table r (a int, b int) distributed by (a);
+create table s (a int, b int) distributed by (a);
+insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
+insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
+select * from r;
+ a | b
+---+----
+ 3 | 6
+ 4 | 8
+ 5 | 10
+ 1 | 2
+ 2 | 4
+(5 rows)
+
+select * from s;
+ a | b
+---+----
+ 1 | 2
+ 2 | 4
+ 3 | 6
+ 4 | 8
+ 5 | 10
+(5 rows)
+
+update r set a = r.a + 1 from s where r.a = s.a;
+select * from r;
+ a | b
+---+----
+ 4 | 6
+ 5 | 8
+ 6 | 10
+ 3 | 4
+ 2 | 2
+(5 rows)
+
+update r set a = r.a + 1 where a in (select a from s);
+select * from r;
+ a | b
+---+----
+ 6 | 10
+ 5 | 6
+ 6 | 8
+ 4 | 4
+ 3 | 2
+(5 rows)
+
+-- Update redistribution
+delete from r;
+delete from s;
+insert into r select generate_series(1, 5), generate_series(1, 5);
+insert into s select generate_series(1, 5), generate_series(1, 5) * 2;
+select * from r;
+ a | b
+---+---
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 1 | 1
+ 2 | 2
+(5 rows)
+
+select * from s;
+ a | b
+---+----
+ 3 | 6
+ 4 | 8
+ 5 | 10
+ 1 | 2
+ 2 | 4
+(5 rows)
+
+update r set a = r.a + 1 from s where r.b = s.b;
+select * from r;
+ a | b
+---+---
+ 1 | 1
+ 3 | 3
+ 5 | 5
+ 5 | 4
+ 3 | 2
+(5 rows)
+
+update r set a = r.a + 1 where b in (select b from s);
+select * from r;
+ a | b
+---+---
+ 1 | 1
+ 3 | 3
+ 5 | 5
+ 6 | 4
+ 4 | 2
+(5 rows)
+
+-- Update hash aggreate group by
+delete from r;
+delete from s;
+insert into r select generate_series(1, 5), generate_series(1, 5) * 2;
+insert into s select generate_series(1, 5), generate_series(1, 5);
+select * from r;
+ a | b
+---+----
+ 1 | 2
+ 2 | 4
+ 3 | 6
+ 4 | 8
+ 5 | 10
+(5 rows)
+
+select * from s;
+ a | b
+---+---
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 1 | 1
+ 2 | 2
+(5 rows)
+
+update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b);
+select * from s;
+ a | b
+---+---
+ 1 | 1
+ 3 | 3
+ 5 | 5
+ 3 | 2
+ 5 | 4
+(5 rows)
+
+-- Update ao table distribution key
+create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
+insert into update_ao_table select g, g from generate_series(1, 5) g;
+select * from update_ao_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+update update_ao_table set a = a + 1 where b = 3;
+select * from update_ao_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 4 | 4
+ 5 | 5
+ 4 | 3
+(5 rows)
+
+-- Update aoco table distribution key
+create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
+insert into update_aoco_table select g,g from generate_series(1, 5) g;
+select * from update_aoco_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+update update_aoco_table set a = a + 1 where b = 3;
+select * from update_aoco_table;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 4 | 4
+ 5 | 5
+ 4 | 3
+(5 rows)
+
+-- Update prepare
+delete from s;
+insert into s select generate_series(1, 5), generate_series(1, 5);
+select * from r;
+ a | b
+---+----
+ 1 | 2
+ 2 | 4
+ 3 | 6
+ 4 | 8
+ 5 | 10
+(5 rows)
+
+select * from s;
+ a | b
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b);
+execute update_s(10);
+select * from s;
+ a | b
+----+---
+ 1 | 1
+ 14 | 4
+ 3 | 3
+ 5 | 5
+ 12 | 2
+(5 rows)
+
+-- Confirm that a split update is not created for a table excluded by
+-- constraints in the planner.
+create table nosplitupdate (a int) distributed by (a);
+explain update nosplitupdate set a=0 where a=1 and a<1;
+ QUERY PLAN
+-----------------------------------------------------------
+ Update on nosplitupdate (cost=0.00..0.01 rows=1 width=0)
+ -> Result (cost=0.00..0.01 rows=1 width=0)
+ One-Time Filter: false
+ Planning time: 0.271 ms
+ Optimizer: Postgres query optimizer
+(5 rows)
+
+-- test split-update when split-node's flow is entry
+create table tsplit_entry (c int);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Cloudberry Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tsplit_entry values (1), (2);
+analyze tsplit_entry;
+-- start_ignore
+-- gp_segment_configuration scan is different when using different FTS
+explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------
+ Update on tsplit_entry (cost=10000000001.00..10000000003.18 rows=3 width=54)
+ -> Explicit Redistribute Motion 1:3 (slice) (cost=10000000001.00..10000000003.18 rows=7 width=54)
+ -> Split (cost=10000000001.00..10000000003.18 rows=7 width=54)
+ -> Nested Loop (cost=10000000001.00..10000000003.12 rows=4 width=54)
+ -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..2.06 rows=2 width=14)
+ -> Seq Scan on tsplit_entry (cost=0.00..2.02 rows=1 width=14)
+ -> Materialize (cost=1.00..1.03 rows=1 width=40)
+ -> Subquery Scan on s (cost=1.00..1.02 rows=1 width=40)
+ -> Aggregate (cost=1.00..1.01 rows=1 width=8)
+ -> Function Scan on gp_get_segment_configuration (cost=0.00..1.00 rows=1 width=0)
+ Optimizer: Postgres query optimizer
+(11 rows)
+-- end_ignore
+update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
+CREATE TABLE update_gp_foo (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
+ );
+CREATE TABLE update_gp_foo1 (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
+ );
+INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
+INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
+UPDATE update_gp_foo
+SET b = update_gp_foo.c_part,
+ d = update_gp_foo1.a_dist
+FROM update_gp_foo1;
+SELECT * from update_gp_foo;
+ a_dist | b | c_part | d
+--------+---+--------+----
+ 12 | 1 | 1 | 12
+(1 row)
+
+-- Test insert on conflict do update
+-- Insert on conflict do update is an insert statement but might
+-- invoke ExecUpdate on segments, but updating distkeys of a table
+-- may lead to wrong data distribution. We will check this before
+-- planning, if a `insert on conflict do update` statement set the
+-- dist keys of the table, it will raise an error.
+-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444
+create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a);
+create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
+-- the following statement should error out because the on conflict update want to
+-- modify the tuple's distkey which might lead to wrong data distribution
+insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
+ERROR: modification of distribution columns in OnConflictUpdate is not supported
+drop index uidx_t_insert_on_conflict_update_distkey;
+drop table t_insert_on_conflict_update_distkey;
+-- randomly distributed table cannot add unique constrain, so next we test replicated table
+create table t_insert_on_conflict_update_distkey(a int, b int) distributed replicated;
+create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
+-- the following statement should succeed because replicated table does not contain distkey
+insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
+ERROR: not implemented yet on pax relations: TupleInsertSpeculative
+-- Some tests on a partitioned table.
+CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a);
+CREATE TABLE update_gp_rangep_1_to_10 PARTITION OF update_gp_rangep FOR VALUES FROM (1) TO (10);
+CREATE TABLE update_gp_rangep_10_to_20 PARTITION OF update_gp_rangep FOR VALUES FROM (10) TO (20);
+INSERT INTO update_gp_rangep SELECT g, g, g FROM generate_series(1, 4) g;
+-- Simple case: Same partition, same node.
+UPDATE update_gp_rangep SET a = 9 WHERE a = 1;
+-- Distribution key update, same partition.
+UPDATE update_gp_rangep SET b = 1 WHERE a = 2;
+-- Move row to different partition, but no change in distribution key
+UPDATE update_gp_rangep SET a = 10 WHERE a = 3;
+-- Move row to different partition and also change distribution key
+UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4;
+SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a;
+ tableoid | a | b | orig_a
+---------------------------+----+---+--------
+ update_gp_rangep_1_to_10 | 9 | 1 | 1
+ update_gp_rangep_1_to_10 | 2 | 1 | 2
+ update_gp_rangep_10_to_20 | 10 | 3 | 3
+ update_gp_rangep_10_to_20 | 11 | 1 | 4
+(4 rows)
+
+-- Also do a lookup with specific distribution key. If the rows were not
+-- correctly moved across segments, this would fail to find them, assuming
+-- that direct dispatch is effective.
+SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1;
+ tableoid | a | b | orig_a
+---------------------------+----+---+--------
+ update_gp_rangep_1_to_10 | 2 | 1 | 2
+ update_gp_rangep_1_to_10 | 9 | 1 | 1
+ update_gp_rangep_10_to_20 | 11 | 1 | 4
+(3 rows)
+
+-- start_ignore
+drop table r;
+drop table s;
+drop table update_dist;
+drop table update_ao_table;
+drop table update_aoco_table;
+drop table nosplitupdate;
+drop table tsplit_entry;
+-- end_ignore
diff --git a/contrib/pax_storage/hd-ci/clang_tidy_pax.bash b/contrib/pax_storage/hd-ci/clang_tidy_pax.bash
index 6d94247c41d..108c4bdfaab 100644
--- a/contrib/pax_storage/hd-ci/clang_tidy_pax.bash
+++ b/contrib/pax_storage/hd-ci/clang_tidy_pax.bash
@@ -16,7 +16,7 @@ function do_git_diff() {
exit 0
fi
- modified_files=$(git diff --name-only $CBDB_PAX_DEV_BRANCH)
+ modified_files=$(git diff --name-only $CBDB_PAX_DEV_BRANCH -- ':!icw_test')
for extension in "${CBDB_PAX_EXT[@]}"; do
if echo "$modified_files" | grep -E -e "$extension" | grep -q -v "$CBDB_PAXC_GREP"; then
files=$(echo "$modified_files" | grep -E -e "$extension" | grep -v "$CBDB_PAXC_GREP")
diff --git a/contrib/pax_storage/sql/ddl.sql b/contrib/pax_storage/sql/ddl.sql
new file mode 100644
index 00000000000..fbb390e12ae
--- /dev/null
+++ b/contrib/pax_storage/sql/ddl.sql
@@ -0,0 +1,51 @@
+set default_table_access_method = 'pax';
+
+create table pax_test.t1(
+ id int,
+ name text not null,
+ height float not null,
+ decimal_col decimal(10, 2) not null,
+ created_at timestamp with time zone not null,
+ updated_at timestamp with time zone not null
+) using pax distributed BY (id);
+\d+ pax_test.t1
+
+create table pax_test.t2(
+ id int,
+ name text not null,
+ height float not null,
+ decimal_col decimal(10, 2) not null,
+ created_at timestamp with time zone not null,
+ updated_at timestamp with time zone not null
+);
+\d+ pax_test.t2
+
+insert into pax_test.t1 (id, name, height, decimal_col, created_at, updated_at) values
+ (1, 'Alice', 1.65, 1.23, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'),
+ (2, 'Bob', 1.75, 2.34, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'),
+ (3, 'Carol', 1.85, 3.45, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08');
+
+alter table pax_test.t1 add column new_col1 int;
+alter table pax_test.t1 add column new_col2 int default null;
+alter table pax_test.t1 add column new_col3 int default 0;
+alter table pax_test.t1 add column new_col4 int default 12;
+
+select * from pax_test.t1;
+
+alter table pax_test.t1 drop column new_col2;
+alter table pax_test.t1 drop column new_col3;
+
+vacuum pax_test.t1;
+vacuum full pax_test.t1;
+
+drop table pax_test.t1;
+drop table pax_test.t2;
+
+-- alter column with options
+create table pax_test.t3 (v1 numeric(100,1)) with(compresstype=zstd, compresslevel=1);
+alter table pax_test.t3 alter column v1 type numeric;
+drop table pax_test.t3;
+-- add column with options
+create table pax_test.t4 (v1 text) with(compresstype=zstd, compresslevel=1);
+alter table pax_test.t4 add column v2 text;
+drop table pax_test.t4;
diff --git a/contrib/pax_storage/sql/detoast.sql b/contrib/pax_storage/sql/detoast.sql
new file mode 100644
index 00000000000..0af5be392e7
--- /dev/null
+++ b/contrib/pax_storage/sql/detoast.sql
@@ -0,0 +1,51 @@
+CREATE TABLE toasttest_external(f1 text);
+-- The storage `EXTERNAL` allows out-of-line storage but not compression.
+alter table toasttest_external alter column f1 set storage external;
+-- These tests are sensitive to block size. In CBDB, the block
+-- size is 32 kB, whereas in PostgreSQL it's 8kB. Therefore make
+-- the data 4x larger here.
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+INSERT INTO toasttest_external values (repeat('1234567890',300*4));
+-- expect >0 blocks
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty
+ FROM pg_class where relname = 'toasttest_external';
+
+create table toasttest_external_pax(f1 text) using pax;
+insert into toasttest_external_pax select * from toasttest_external;
+drop table toasttest_external;
+-- If pax insert toast here, Then after drop toasttest_external, toast
+-- will not get the source data.
+select length(f1) from toasttest_external_pax;
+drop table toasttest_external_pax;
+
+
+CREATE TABLE toasttest_compress(f1 text);
+-- The storage `MAIN` allows compression but not out-of-line storage.
+alter table toasttest_compress alter column f1 set storage main;
+-- about 1M
+INSERT INTO toasttest_compress values (repeat('1234567890123456',1024 * 64));
+-- should be true, becase it's not store in toast table
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty FROM pg_class where relname = 'toasttest_compress';
+
+create table toasttest_compress_pax(f1 text) using pax;
+insert into toasttest_compress_pax select * from toasttest_compress;
+drop table toasttest_compress;
+select length(f1) from toasttest_compress_pax;
+drop table toasttest_compress_pax;
+
+CREATE TABLE toasttest_extended(f1 text);
+-- The storage `EXTENDED` allows both compression and out-of-line storage.
+alter table toasttest_extended alter column f1 set storage EXTENDED;
+-- about 1M, will use out-of-line storage
+INSERT INTO toasttest_extended values (repeat('1234567890123456',1024 * 64));
+-- about 80k , will use compression storage
+INSERT INTO toasttest_extended values (repeat('1234567890123456',1024 * 5));
+SELECT pg_relation_size(reltoastrelid) = 0 AS is_empty FROM pg_class where relname = 'toasttest_extended';
+
+create table toasttest_extended_pax(f1 text) using pax;
+insert into toasttest_extended_pax select * from toasttest_extended;
+drop table toasttest_extended;
+select length(f1) from toasttest_extended_pax;
+drop table toasttest_extended_pax;
\ No newline at end of file
diff --git a/contrib/pax_storage/sql/setup.sql b/contrib/pax_storage/sql/setup.sql
new file mode 100644
index 00000000000..0966d0946a8
--- /dev/null
+++ b/contrib/pax_storage/sql/setup.sql
@@ -0,0 +1,3 @@
+-- start_ignore
+create schema pax_test;
+-- end_ignore
diff --git a/contrib/pax_storage/sql/teardown.sql b/contrib/pax_storage/sql/teardown.sql
new file mode 100644
index 00000000000..641380076f8
--- /dev/null
+++ b/contrib/pax_storage/sql/teardown.sql
@@ -0,0 +1,3 @@
+-- start_ignore
+drop schema if exists pax_test;
+-- end_ignore
diff --git a/contrib/pax_storage/src/data/sql/types.sql b/contrib/pax_storage/sql/types.sql
similarity index 54%
rename from contrib/pax_storage/src/data/sql/types.sql
rename to contrib/pax_storage/sql/types.sql
index 6eb3e09f895..1509c634889 100644
--- a/contrib/pax_storage/src/data/sql/types.sql
+++ b/contrib/pax_storage/sql/types.sql
@@ -1,9 +1,5 @@
--- start_ignore
-create extension pax;
-drop table if exists all_typbyval_pg_types;
--- end_ignore
-CREATE TABLE all_typbyval_pg_types (
+CREATE TABLE pax_test.all_typbyval_pg_types (
id int,
bool_col bool,
char_col char,
@@ -26,16 +22,12 @@ CREATE TABLE all_typbyval_pg_types (
pg_lsn_col pg_lsn
) USING pax distributed by (id);
-insert into all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
+insert into pax_test.all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'),
(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0');
-select * from all_typbyval_pg_types;
+select * from pax_test.all_typbyval_pg_types;
--- start_ignore
-drop table if exists all_typlen_lt_0_pg_type;
--- end_ignore
-
-create table all_typlen_lt_0_pg_type (
+create table pax_test.all_typlen_lt_0_pg_type (
id int,
name_col name,
numeric_col numeric,
@@ -44,9 +36,8 @@ create table all_typlen_lt_0_pg_type (
point_col point
) USING pax distributed by (id);
-insert into all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2));
-select * from all_typlen_lt_0_pg_type;
+insert into pax_test.all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2));
+select * from pax_test.all_typlen_lt_0_pg_type;
--- start_ignore
-drop table if exists all_typbyval_pg_types;
--- end_ignore
\ No newline at end of file
+drop table pax_test.all_typbyval_pg_types;
+drop table pax_test.all_typlen_lt_0_pg_type;
diff --git a/contrib/pax_storage/sql/update.sql b/contrib/pax_storage/sql/update.sql
new file mode 100644
index 00000000000..669ba110f4c
--- /dev/null
+++ b/contrib/pax_storage/sql/update.sql
@@ -0,0 +1,386 @@
+set default_table_access_method = pax;
+set pax.enable_filter = off;
+
+--
+-- UPDATE ... SET = DEFAULT;
+--
+
+CREATE TABLE update_test (
+ a INT DEFAULT 10,
+ b INT,
+ c TEXT
+);
+
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+);
+
+INSERT INTO update_test VALUES (5, 10, 'foo');
+INSERT INTO update_test(b, a) VALUES (15, 10);
+
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+
+UPDATE update_test SET a = DEFAULT, b = DEFAULT;
+
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+
+-- aliases for the UPDATE target table
+UPDATE update_test AS t SET b = 10 WHERE t.a = 10;
+
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+
+UPDATE update_test t SET b = t.b + 10 WHERE t.a = 10;
+
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+
+--
+-- Test VALUES in FROM
+--
+
+UPDATE update_test SET a=v.i FROM (VALUES(100, 20)) AS v(i, j)
+ WHERE update_test.b = v.j;
+
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+
+-- fail, wrong data type:
+UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i, j)
+ WHERE update_test.b = v.j;
+
+--
+-- Test multiple-set-clause syntax
+--
+
+INSERT INTO update_test SELECT a,b+1,c FROM update_test;
+SELECT * FROM update_test;
+
+UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo';
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10;
+SELECT a,b,c FROM update_test ORDER BY a,b,c;
+-- fail, multi assignment to same column:
+UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10;
+
+-- uncorrelated sub-select:
+UPDATE update_test
+ SET (b,a) = (select a,b from update_test where b = 41 and c = 'car')
+ WHERE a = 100 AND b = 20;
+SELECT * FROM update_test;
+-- correlated sub-select:
+UPDATE update_test o
+ SET (b,a) = (select a+1,b from update_test i
+ where i.a=o.a and i.b=o.b and i.c is not distinct from o.c);
+SELECT * FROM update_test;
+-- fail, multiple rows supplied:
+UPDATE update_test SET (b,a) = (select a+1,b from update_test);
+-- set to null if no rows supplied:
+UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000)
+ WHERE a = 11;
+SELECT * FROM update_test;
+-- *-expansion should work in this context:
+UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j)
+ WHERE update_test.a = v.i;
+-- you might expect this to work, but syntactically it's not a RowExpr:
+UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j)
+ WHERE update_test.a = v.i;
+
+-- if an alias for the target table is specified, don't allow references
+-- to the original table name
+UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10;
+
+-- Make sure that we can update to a TOASTed value.
+UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
+SELECT a, b, char_length(c) FROM update_test;
+
+-- Check multi-assignment with a Result node to handle a one-time filter.
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE update_test t
+ SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a)
+ WHERE CURRENT_USER = SESSION_USER;
+UPDATE update_test t
+ SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a)
+ WHERE CURRENT_USER = SESSION_USER;
+SELECT a, b, char_length(c) FROM update_test;
+
+-- Test ON CONFLICT DO UPDATE
+
+INSERT INTO upsert_test VALUES(1, 'Boo'), (3, 'Zoo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+-- correlated sub-select:
+INSERT INTO upsert_test VALUES (1, 'Baz'), (3, 'Zaz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a)
+ RETURNING *;
+-- correlated sub-select (EXCLUDED.* alias):
+INSERT INTO upsert_test VALUES (1, 'Bat'), (3, 'Zot') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING *;
+
+-- ON CONFLICT using system attributes in RETURNING, testing both the
+-- inserting and updating paths. See bug report at:
+-- https://www.postgresql.org/message-id/73436355-6432-49B1-92ED-1FE4F7E7E100%40finefun.com.au
+INSERT INTO upsert_test VALUES (2, 'Beeble') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = 0 AS xmax_correct;
+-- currently xmax is set after a conflict - that's probably not good,
+-- but it seems worthwhile to have to be explicit if that changes.
+INSERT INTO upsert_test VALUES (2, 'Brox') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = pg_current_xact_id()::xid AS xmax_correct;
+
+DROP TABLE update_test;
+DROP TABLE upsert_test;
+
+-- Test ON CONFLICT DO UPDATE with partitioned table and non-identical children
+
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+) PARTITION BY LIST (a);
+
+CREATE TABLE upsert_test_1 PARTITION OF upsert_test FOR VALUES IN (1);
+CREATE TABLE upsert_test_2 (b TEXT, a INT PRIMARY KEY);
+ALTER TABLE upsert_test ATTACH PARTITION upsert_test_2 FOR VALUES IN (2);
+
+INSERT INTO upsert_test VALUES(1, 'Boo'), (2, 'Zoo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+-- correlated sub-select:
+WITH aaa AS (SELECT 1 AS ctea, ' Foo' AS cteb) INSERT INTO upsert_test
+ VALUES (1, 'Bar'), (2, 'Baz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT upsert_test.b||cteb, upsert_test.a FROM aaa) RETURNING *;
+
+DROP TABLE upsert_test;
+
+
+---------------------------
+-- UPDATE with row movement
+---------------------------
+
+-- When a partitioned table receives an UPDATE to the partitioned key and the
+-- new values no longer meet the partition's bound, the row must be moved to
+-- the correct partition for the new partition key (if one exists). We must
+-- also ensure that updatable views on partitioned tables properly enforce any
+-- WITH CHECK OPTION that is defined. The situation with triggers in this case
+-- also requires thorough testing as partition key updates causing row
+-- movement convert UPDATEs into DELETE+INSERT.
+
+CREATE TABLE range_parted (
+ a text,
+ b bigint,
+ c numeric,
+ d int,
+ e varchar
+) PARTITION BY RANGE (a, b);
+
+-- Create partitions intentionally in descending bound order, so as to test
+-- that update-row-movement works with the leaf partitions not in bound order.
+CREATE TABLE part_b_20_b_30 (e varchar, c numeric, a text, b bigint, d int);
+-- GPDB: distribution policy must match the parent table.
+alter table part_b_20_b_30 set distributed by (a);
+ALTER TABLE range_parted ATTACH PARTITION part_b_20_b_30 FOR VALUES FROM ('b', 20) TO ('b', 30);
+CREATE TABLE part_b_10_b_20 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY RANGE (c);
+alter table part_b_10_b_20 set distributed by (a);
+CREATE TABLE part_b_1_b_10 PARTITION OF range_parted FOR VALUES FROM ('b', 1) TO ('b', 10);
+ALTER TABLE range_parted ATTACH PARTITION part_b_10_b_20 FOR VALUES FROM ('b', 10) TO ('b', 20);
+CREATE TABLE part_a_10_a_20 PARTITION OF range_parted FOR VALUES FROM ('a', 10) TO ('a', 20);
+CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10);
+
+-- Check that partition-key UPDATE works sanely on a partitioned table that
+-- does not have any child partitions.
+UPDATE part_b_10_b_20 set b = b - 6;
+
+-- Create some more partitions following the above pattern of descending bound
+-- order, but let's make the situation a bit more complex by having the
+-- attribute numbers of the columns vary from their parent partition.
+CREATE TABLE part_c_100_200 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY range (abs(d));
+ALTER TABLE part_c_100_200 DROP COLUMN e, DROP COLUMN c, DROP COLUMN a;
+ALTER TABLE part_c_100_200 ADD COLUMN c numeric, ADD COLUMN e varchar, ADD COLUMN a text;
+ALTER TABLE part_c_100_200 DROP COLUMN b;
+ALTER TABLE part_c_100_200 ADD COLUMN b bigint;
+CREATE TABLE part_d_1_15 PARTITION OF part_c_100_200 FOR VALUES FROM (1) TO (15);
+CREATE TABLE part_d_15_20 PARTITION OF part_c_100_200 FOR VALUES FROM (15) TO (20);
+
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200);
+
+-- GPDB: distribution policy must match the parent table, so the previous command fails.
+-- Change the distribution key and try again.
+alter table part_c_100_200 set distributed by (a);
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200);
+
+CREATE TABLE part_c_1_100 (e varchar, d int, c numeric, b bigint, a text);
+alter table part_c_1_100 set distributed by (a);
+ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_1_100 FOR VALUES FROM (1) TO (100);
+
+\set init_range_parted 'truncate range_parted; insert into range_parted VALUES (''a'', 1, 1, 1), (''a'', 10, 200, 1), (''b'', 12, 96, 1), (''b'', 13, 97, 2), (''b'', 15, 105, 16), (''b'', 17, 105, 19)'
+\set show_data 'select tableoid::regclass::text COLLATE "C" partname, * from range_parted ORDER BY 1, 2, 3, 4, 5, 6'
+:init_range_parted;
+:show_data;
+
+-- The order of subplans should be in bound order
+EXPLAIN (costs off) UPDATE range_parted set c = c - 50 WHERE c > 97;
+
+-- fail, row movement happens only within the partition subtree.
+UPDATE part_c_100_200 set c = c - 20, d = c WHERE c = 105;
+-- fail, no partition key update, so no attempt to move tuple,
+-- but "a = 'a'" violates partition constraint enforced by root partition)
+UPDATE part_b_10_b_20 set a = 'a';
+-- ok, partition key update, no constraint violation
+UPDATE range_parted set d = d - 10 WHERE d > 10;
+-- ok, no partition key update, no constraint violation
+UPDATE range_parted set e = d;
+-- No row found
+UPDATE part_c_1_100 set c = c + 20 WHERE c = 98;
+-- ok, row movement
+UPDATE part_b_10_b_20 set c = c + 20 returning c, b, a;
+:show_data;
+
+-- fail, row movement happens only within the partition subtree.
+UPDATE part_b_10_b_20 set b = b - 6 WHERE c > 116 returning *;
+-- ok, row movement, with subset of rows moved into different partition.
+UPDATE range_parted set b = b - 6 WHERE c > 116 returning a, b + c;
+
+:show_data;
+
+-- Common table needed for multiple test scenarios.
+CREATE TABLE mintab(c1 int);
+INSERT into mintab VALUES (120);
+
+-- update partition key using updatable view.
+CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 FROM mintab) WITH CHECK OPTION;
+-- ok
+UPDATE upview set c = 199 WHERE b = 4;
+-- fail, check option violation
+UPDATE upview set c = 120 WHERE b = 4;
+-- fail, row movement with check option violation
+UPDATE upview set a = 'b', b = 15, c = 120 WHERE b = 4;
+-- ok, row movement, check option passes
+UPDATE upview set a = 'b', b = 15 WHERE b = 4;
+
+:show_data;
+
+-- cleanup
+DROP VIEW upview;
+
+-- RETURNING having whole-row vars.
+:init_range_parted;
+UPDATE range_parted set c = 95 WHERE a = 'b' and b > 10 and c > 100 returning (range_parted), *;
+:show_data;
+
+
+-- Creating default partition for range
+:init_range_parted;
+create table part_def partition of range_parted default;
+\d+ part_def
+insert into range_parted values ('c', 9);
+-- ok
+update part_def set a = 'd' where a = 'c';
+-- fail
+update part_def set a = 'a' where a = 'd';
+
+:show_data;
+
+-- Update row movement from non-default to default partition.
+-- fail, default partition is not under part_a_10_a_20;
+UPDATE part_a_10_a_20 set a = 'ad' WHERE a = 'a';
+-- ok
+UPDATE range_parted set a = 'ad' WHERE a = 'a';
+UPDATE range_parted set a = 'bd' WHERE a = 'b';
+:show_data;
+-- Update row movement from default to non-default partitions.
+-- ok
+UPDATE range_parted set a = 'a' WHERE a = 'ad';
+UPDATE range_parted set a = 'b' WHERE a = 'bd';
+:show_data;
+
+-- Cleanup: range_parted no longer needed.
+DROP TABLE range_parted;
+
+CREATE TABLE list_parted (
+ a text,
+ b int
+) PARTITION BY list (a);
+CREATE TABLE list_part1 PARTITION OF list_parted for VALUES in ('a', 'b');
+CREATE TABLE list_default PARTITION OF list_parted default;
+INSERT into list_part1 VALUES ('a', 1);
+INSERT into list_default VALUES ('d', 10);
+
+-- fail
+UPDATE list_default set a = 'a' WHERE a = 'd';
+-- ok
+UPDATE list_default set a = 'x' WHERE a = 'd';
+
+DROP TABLE list_parted;
+
+--------------
+-- Some more update-partition-key test scenarios below. This time use list
+-- partitions.
+--------------
+
+-- Setup for list partitions
+CREATE TABLE list_parted (a numeric, b int, c int8) PARTITION BY list (a);
+CREATE TABLE sub_parted PARTITION OF list_parted for VALUES in (1) PARTITION BY list (b);
+
+CREATE TABLE sub_part1(b int, c int8, a numeric);
+alter table sub_part1 set distributed by (a); -- GPDB: distribution policy must match the parent table.
+ALTER TABLE sub_parted ATTACH PARTITION sub_part1 for VALUES in (1);
+CREATE TABLE sub_part2(b int, c int8, a numeric);
+alter table sub_part2 set distributed by (a); -- GPDB: distribution policy must match the parent table.
+ALTER TABLE sub_parted ATTACH PARTITION sub_part2 for VALUES in (2);
+
+CREATE TABLE list_part1(a numeric, b int, c int8);
+ALTER TABLE list_parted ATTACH PARTITION list_part1 for VALUES in (2,3);
+
+INSERT into list_parted VALUES (2,5,50);
+INSERT into list_parted VALUES (3,6,60);
+INSERT into sub_parted VALUES (1,1,60);
+INSERT into sub_parted VALUES (1,2,10);
+
+-- Test partition constraint violation when intermediate ancestor is used and
+-- constraint is inherited from upper root.
+UPDATE sub_parted set a = 2 WHERE c = 10;
+
+-- Test update-partition-key, where the unpruned partitions do not have their
+-- partition keys updated.
+SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1;
+UPDATE list_parted set b = c + a WHERE a = 2;
+SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1;
+
+
+-- Cleanup: list_parted no longer needed.
+DROP TABLE list_parted;
+
+-- create custom operator class and hash function, for the same reason
+-- explained in alter_table.sql
+create or replace function dummy_hashint4(a int4, seed int8) returns int8 as
+$$ begin return (a + seed); end; $$ language 'plpgsql' immutable;
+create operator class custom_opclass for type int4 using hash as
+operator 1 = , function 2 dummy_hashint4(int4, int8);
+
+create table hash_parted (
+ a int,
+ b int
+) partition by hash (a custom_opclass, b custom_opclass);
+create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1);
+create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2);
+create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0);
+create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4);
+insert into hpart1 values (1, 1);
+insert into hpart2 values (2, 5);
+insert into hpart4 values (3, 4);
+
+-- fail
+update hpart1 set a = 3, b=4 where a = 1;
+-- ok, row movement
+update hash_parted set b = b - 1 where b = 1;
+-- ok
+update hash_parted set b = b + 8 where b = 1;
+
+-- cleanup
+drop table hash_parted;
+drop operator class custom_opclass using hash;
+drop function dummy_hashint4(a int4, seed int8);
diff --git a/contrib/pax_storage/src/data/sql/update_gp.sql b/contrib/pax_storage/sql/update_gp.sql
similarity index 78%
rename from contrib/pax_storage/src/data/sql/update_gp.sql
rename to contrib/pax_storage/sql/update_gp.sql
index 3fe87351ac5..30efc73f679 100644
--- a/contrib/pax_storage/src/data/sql/update_gp.sql
+++ b/contrib/pax_storage/sql/update_gp.sql
@@ -1,9 +1,10 @@
+set default_table_access_method = pax;
+
-- Test DELETE and UPDATE on an inherited table.
-- The special aspect of this table is that the inherited table has
-- a different distribution key. 'p' table's distribution key matches
-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion
-- node correctly for p2.
-set default_table_access_method = 'pax';
create table todelete (a int) distributed by (a);
create table parent (a int, b int, c int) distributed by (a);
create table child (a int, b int, c int) inherits (parent) distributed by (b);
@@ -62,9 +63,8 @@ create table base_tbl (a int4, b int4) distributed by (a);
create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a);
create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b);
insert into base_tbl select g, g from generate_series(1, 5) g;
--- start_ignore
+
explain (costs off) update base_tbl set a=a+1;
--- end_ignore
update base_tbl set a = 5;
--
@@ -84,8 +84,8 @@ INSERT INTO keo3 VALUES ('1', '1');
CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY;
INSERT INTO keo4 VALUES ('1', '1');
+ANALYZE keo1, keo2, keo3, keo4;
-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA)
--- start_ignore
EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
@@ -95,7 +95,6 @@ EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
(SELECT min (keo4.keo_para_budget_date) FROM keo4)))
) t1
WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
--- end_ignore
UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM
( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b
ON b.projects_pk=a.user_vie_project_code_pk
@@ -110,9 +109,7 @@ SELECT user_vie_act_cntr_marg_cum FROM keo1;
-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA)
CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x);
INSERT INTO keo5 VALUES (1,1);
--- start_ignore
EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
--- end_ignore
DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2));
SELECT x FROM keo5;
@@ -123,54 +120,52 @@ DROP TABLE keo3;
DROP TABLE keo4;
DROP TABLE keo5;
--- start_ignore
--- -- text types. We should support the following updates.
--- --
+--
+-- text types. We should support the following updates.
+--
--- CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE ttab1;
--- DROP TABLE ttab2;
+DROP TABLE ttab1;
+DROP TABLE ttab2;
--- CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
--- CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a);
+CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a);
--- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
+UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a;
--- DROP TABLE IF EXISTS update_distr_key;
+DROP TABLE IF EXISTS update_distr_key;
--- CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
--- INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
+CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i;
--- UPDATE update_distr_key SET a = 5 WHERE b = 10;
+UPDATE update_distr_key SET a = 5 WHERE b = 10;
--- SELECT * from update_distr_key;
+SELECT * from update_distr_key;
--- DROP TABLE update_distr_key;
-
--- end_ignore
+DROP TABLE update_distr_key;
-- below cases is to test multi-hash-cols
CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3);
@@ -202,9 +197,8 @@ UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1;
SELECT gp_segment_id, * FROM tab5;
UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1;
SELECT gp_segment_id, * FROM tab5;
--- start_ignore
+
EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1;
--- end_ignore
-- clean up
drop table tab3;
@@ -260,21 +254,20 @@ select * from r;
select * from s;
update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b);
select * from s;
--- start_ignore
+
-- Update ao table distribution key
--- create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
--- insert into update_ao_table select g, g from generate_series(1, 5) g;
--- select * from update_ao_table;
--- update update_ao_table set a = a + 1 where b = 3;
--- select * from update_ao_table;
+create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a);
+insert into update_ao_table select g, g from generate_series(1, 5) g;
+select * from update_ao_table;
+update update_ao_table set a = a + 1 where b = 3;
+select * from update_ao_table;
-- Update aoco table distribution key
--- create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
--- insert into update_aoco_table select g,g from generate_series(1, 5) g;
--- select * from update_aoco_table;
--- update update_aoco_table set a = a + 1 where b = 3;
--- select * from update_aoco_table;
--- end_ignore
+create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a);
+insert into update_aoco_table select g,g from generate_series(1, 5) g;
+select * from update_aoco_table;
+update update_aoco_table set a = a + 1 where b = 3;
+select * from update_aoco_table;
-- Update prepare
delete from s;
@@ -288,9 +281,7 @@ select * from s;
-- Confirm that a split update is not created for a table excluded by
-- constraints in the planner.
create table nosplitupdate (a int) distributed by (a);
--- start_ignore
explain update nosplitupdate set a=0 where a=1 and a<1;
--- end_ignore
-- test split-update when split-node's flow is entry
create table tsplit_entry (c int);
@@ -298,42 +289,42 @@ insert into tsplit_entry values (1), (2);
analyze tsplit_entry;
-- start_ignore
+-- gp_segment_configuration scan is different when using different FTS
explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
-- end_ignore
update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s;
--- start_ignore
--- CREATE TABLE update_gp_foo (
--- a_dist int,
--- b int,
--- c_part int,
--- d int
--- )
--- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
--- (
--- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
--- );
-
--- CREATE TABLE update_gp_foo1 (
--- a_dist int,
--- b int,
--- c_part int,
--- d int
--- )
--- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
--- (
--- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
--- );
-
--- INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
--- INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
-
--- UPDATE update_gp_foo
--- SET b = update_gp_foo.c_part,
--- d = update_gp_foo1.a_dist
--- FROM update_gp_foo1;
-
--- SELECT * from update_gp_foo;
--- end_ignore
+
+CREATE TABLE update_gp_foo (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false)
+ );
+
+CREATE TABLE update_gp_foo1 (
+ a_dist int,
+ b int,
+ c_part int,
+ d int
+)
+WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part)
+ (
+ PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false)
+ );
+
+INSERT INTO update_gp_foo VALUES (12, 40, 1, 50);
+INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50);
+
+UPDATE update_gp_foo
+SET b = update_gp_foo.c_part,
+ d = update_gp_foo1.a_dist
+FROM update_gp_foo1;
+
+SELECT * from update_gp_foo;
-- Test insert on conflict do update
-- Insert on conflict do update is an insert statement but might
@@ -342,7 +333,6 @@ update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_confi
-- planning, if a `insert on conflict do update` statement set the
-- dist keys of the table, it will raise an error.
-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444
--- start_ignore
create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a);
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
@@ -358,7 +348,6 @@ create table t_insert_on_conflict_update_distkey(a int, b int) distributed repli
create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b);
-- the following statement should succeed because replicated table does not contain distkey
insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1;
--- end_ignore
-- Some tests on a partitioned table.
CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a);
@@ -379,9 +368,8 @@ UPDATE update_gp_rangep SET a = 10 WHERE a = 3;
-- Move row to different partition and also change distribution key
UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4;
--- start_ignore
+
SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a;
--- end_ignore
-- Also do a lookup with specific distribution key. If the rows were not
-- correctly moved across segments, this would fail to find them, assuming
-- that direct dispatch is effective.
diff --git a/contrib/pax_storage/src/cpp/CMakeLists.txt b/contrib/pax_storage/src/cpp/CMakeLists.txt
index 8ac5f846c7a..8b6cbe0fd29 100644
--- a/contrib/pax_storage/src/cpp/CMakeLists.txt
+++ b/contrib/pax_storage/src/cpp/CMakeLists.txt
@@ -1,242 +1,37 @@
cmake_minimum_required (VERSION 3.11.0)
# protobuf
-include(ExternalProject)
-option(ORC_PREFER_STATIC_PROTOBUF "Prefer static protobuf library, if available" ON)
-set(THIRDPARTY_CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}")
-set(THIRDPARTY_DIR "${CMAKE_BINARY_DIR}/src/cpp/contrib")
-set(THIRDPARTY_LOG_OPTIONS LOG_CONFIGURE 1
- LOG_BUILD 1
- LOG_INSTALL 1
- LOG_DOWNLOAD 1)
-set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
-set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
-set(PROTOBUF_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX}
- -DCMAKE_INSTALL_LIBDIR=lib
- -DBUILD_SHARED_LIBS=OFF
- -Dprotobuf_BUILD_TESTS=OFF)
-
-set(PROTOBUF_CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
-set(PROTOBUF_STATIC_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
-set(PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
-message(STATUS "${PROTOBUF_STATIC_LIB}")
-set(PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-set(PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}")
-
-set(PROTOBUF_CONFIGURE CONFIGURE_COMMAND "${THIRDPARTY_CONFIGURE_COMMAND}" ${PROTOBUF_CMAKE_ARGS}
- "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-prefix/src/protobuf_ep/cmake")
-
-ExternalProject_Add(protobuf_ep
- URL "https://artifactory.hashdata.xyz/artifactory/utility/protobuf-3.6.1.tar.gz"
- ${PROTOBUF_CONFIGURE}
- ${THIRDPARTY_LOG_OPTIONS}
- BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}")
-
-set(PROTOBUF_LIBRARY ${PROTOBUF_STATIC_LIB})
-set(PROTOC_LIBRARY ${PROTOC_STATIC_LIB})
-set(PROTOBUF_VENDORED ON)
-set(INSTALL_VENDORED_LIBS OFF)
-
-add_library (orc_protobuf INTERFACE)
-add_library (orc::protobuf ALIAS orc_protobuf)
-add_library (orc_protoc INTERFACE)
-add_library (orc::protoc ALIAS orc_protoc)
-
-if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOBUF_STATIC_LIB})
- target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_STATIC_LIB})
-else ()
- target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_LIBRARY})
-endif()
-
-target_include_directories (orc_protobuf SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR})
-
-if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOC_STATIC_LIB})
- target_link_libraries (orc_protoc INTERFACE ${PROTOC_STATIC_LIB})
-else ()
- target_link_libraries (orc_protoc INTERFACE ${PROTOC_LIBRARY})
-endif()
-
-target_include_directories (orc_protoc SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR})
-
-if (PROTOBUF_VENDORED)
- add_dependencies (orc_protoc protobuf_ep)
- add_dependencies (orc_protobuf protobuf_ep)
- if (INSTALL_VENDORED_LIBS)
- install(FILES "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}"
- DESTINATION "lib")
- endif ()
-endif ()
-
-set(orc_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.proto")
-set(orc_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.pb.cc")
-
-set(pax_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.proto")
-set(pax_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.pb.cc")
-
-set(catalog_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.proto")
-set(stats_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.pb.cc")
-
-add_custom_command(OUTPUT ${orc_proto_src}
- COMMAND ${PROTOBUF_EXECUTABLE}
- -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/
- --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/"
- ${orc_proto_file})
-
-add_custom_command(OUTPUT ${pax_proto_src}
- COMMAND ${PROTOBUF_EXECUTABLE}
- -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/
- --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/"
- ${pax_proto_file})
-
-add_custom_command(OUTPUT ${stats_proto_src}
- COMMAND ${PROTOBUF_EXECUTABLE}
- -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/
- --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto"
- ${catalog_proto_file})
-
-add_custom_target(generate_protobuf DEPENDS ${orc_proto_src} ${pax_proto_src} ${stats_proto_src})
-
-if (BUILD_GTEST AND NOT BUILD_PAX_FORMAT)
- add_subdirectory(contrib/googletest)
- ADD_DEFINITIONS(-DRUN_GTEST)
- file(GLOB TEST_CASE_SOURCES
- ${CMAKE_CURRENT_SOURCE_DIR}/*/*_test.cc
- ${CMAKE_CURRENT_SOURCE_DIR}/*/*/*_test.cc)
-
- link_directories($ENV{GPHOME}/lib)
- add_executable(test_main ${TEST_CASE_SOURCES})
- add_dependencies(test_main gtest gmock gtest_main)
- target_include_directories(test_main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${gtest_SOURCE_DIR}/include)
- target_link_libraries(test_main gtest gmock gtest_main postgres pax)
-endif(BUILD_GTEST AND NOT BUILD_PAX_FORMAT)
-
-# ztsd
-set(ZSTD_BUILD_PROGRAMS OFF)
-set(ZSTD_BUILD_TESTS OFF)
-set(ZSTD_BUILD_CONTRIB)
-add_subdirectory(contrib/zstd/build/cmake/)
-set(ZTSD_HEADER contrib/zstd/lib)
-
-set(pax_comm_src
- comm/bitmap.cc
- comm/paxc_wrappers.cc
- comm/cbdb_wrappers.cc)
-
-set(pax_exceptions_src
- exceptions/CException.cc)
-
-set(pax_storage_src
- storage/columns/pax_column.cc
- storage/columns/pax_column_int.cc
- storage/columns/pax_compress.cc
- storage/columns/pax_columns.cc
- storage/columns/pax_encoding_utils.cc
- storage/columns/pax_encoding_non_fixed_column.cc
- storage/columns/pax_encoding_column.cc
- storage/columns/pax_decoding.cc
- storage/columns/pax_encoding.cc
- storage/columns/pax_rlev2_decoding.cc
- storage/columns/pax_rlev2_encoding.cc
- storage/file_system.cc
- storage/pax_filter.cc
- storage/local_file_system.cc
- storage/micro_partition.cc
- storage/micro_partition_file_factory.cc
- storage/micro_partition_iterator.cc
- storage/micro_partition_metadata.cc
- storage/pax_buffer.cc
- storage/proto/protobuf_stream.cc
- storage/pax_filter.cc
- storage/strategy.cc
- storage/paxc_block_map_manager.cc
- storage/orc/orc.cc
- storage/strategy.cc)
-
-if(NOT BUILD_PAX_FORMAT)
- set(pax_storage_src ${pax_storage_src} storage/pax.cc)
-endif(NOT BUILD_PAX_FORMAT)
-
-set(pax_access_src
- access/pax_access_handle.cc
- access/pax_deleter.cc
- access/pax_dml_state.cc
- access/pax_inserter.cc
- access/pax_updater.cc
- access/pax_scanner.cc)
-
-set(pax_catalog_src
- catalog/micro_partition_stats.cc
- catalog/pax_aux_table.cc)
-
-set(pax_vec_src
- storage/vec/pax_vec_adapter.cc
- storage/vec/pax_vec_reader.cc)
-
-link_directories($ENV{GPHOME}/lib)
-
-if(BUILD_PAX_FORMAT)
- # paxformat.so
- ADD_DEFINITIONS(-DBUILD_PAX_FORMAT)
- add_library(paxformat SHARED ${orc_proto_src} ${pax_proto_src} ${pax_storage_src} ${pax_exceptions_src} ${pax_comm_src} )
- target_include_directories(paxformat PUBLIC ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR})
- target_link_libraries(paxformat PUBLIC uuid orc_protobuf zstd z)
- set_target_properties(paxformat PROPERTIES
- OUTPUT_NAME paxformat)
- add_dependencies(paxformat generate_protobuf)
-
- # export headers
- set(PAX_COMM_HEADERS
- comm/cbdb_api.h
- )
-
- ## install dynamic libraray
- install(TARGETS paxformat
- LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
-
- # TODO(gongxun):
- # We should explicitly specify the headers
- # that need to be exported, and use the syntax of
- # install(FILES,...) to install the header files
- install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/storage
- DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax
- FILES_MATCHING
- PATTERN "*.h"
-)
-
-install(FILES ${PAX_COMM_HEADERS}
- DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax/comm
+set(protobuf_files
+ ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.proto
+ ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.proto
+ ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.proto
)
-else()
- add_library(pax SHARED ${orc_proto_src} ${pax_proto_src} ${pax_storage_src} ${stats_proto_src} ${pax_exceptions_src}
- ${pax_access_src} ${pax_comm_src} ${pax_catalog_src} ${pax_vec_src})
- set_target_properties(pax PROPERTIES OUTPUT_NAME pax)
- target_include_directories(pax PUBLIC ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR})
- target_link_libraries(pax PUBLIC uuid orc_protobuf zstd z postgres)
- add_dependencies(pax generate_protobuf)
- add_custom_command(TARGET pax POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E
- copy_if_different $ ${CMAKE_CURRENT_SOURCE_DIR}/../data/pax.so)
-endif(BUILD_PAX_FORMAT)
+set(PROTO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto)
+set(PROTO_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto)
-# vec build
-if (VEC_BUILD)
- set(VEC_HEADER ${VEC_HOME}/src/include/)
+## we generate these files in the cmake -B build phase.
+## when we start the make phase, all dependent files exist, and we can compile with multiple threads.
+FOREACH(FIL ${protobuf_files})
+ GET_FILENAME_COMPONENT(FIL_WE ${FIL} NAME_WE)
+ string(REGEX REPLACE ".+/(.+)\\..*" "\\1" FILE_NAME ${FIL})
+ string(REGEX REPLACE "(.+)\\${FILE_NAME}.*" "\\1" FILE_PATH ${FIL})
- find_package(PkgConfig REQUIRED)
- pkg_check_modules(GLIB REQUIRED glib-2.0)
+ set(PROTO_SRCS ${PROTO_SRCS} "${PROTO_OUTPUT_DIR}/${FIL_WE}.pb.cc")
+ set(PROTO_HDRS ${PROTO_SRCS} "${PROTO_OUTPUT_DIR}/${FIL_WE}.pb.h")
- target_include_directories(pax PRIVATE
- ${VEC_HEADER} # for utils/tuptable_vec.h
- ${CBDB_ROOT_INCLUDE_DIR} # for arrow-glib/arrow-glib.h and otehr arrow interface
- ${GLIB_INCLUDE_DIRS} # for glib-object.h
- )
+ EXECUTE_PROCESS(
+ COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I ${PROTO_DIR} --cpp_out=${PROTO_OUTPUT_DIR} ${FIL}
+ )
+ENDFOREACH()
+add_custom_target(generate_protobuf DEPENDS ${PROTO_SRCS} ${PROTO_HDRS})
- if(BUILD_GTEST)
- target_include_directories(test_main PRIVATE ${VEC_HEADER} ${CBDB_ROOT_INCLUDE_DIR} ${GLIB_INCLUDE_DIRS})
- endif(BUILD_GTEST)
+link_directories($ENV{GPHOME}/lib)
- target_link_libraries(pax PRIVATE arrow)
-endif(VEC_BUILD)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+## build pax_format.so
+include(pax_format)
+## build pax.so
+include(pax)
diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc
index be34ff3aecc..a6bc4ab74be 100644
--- a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc
@@ -3,11 +3,18 @@
#include "comm/cbdb_api.h"
#include "access/pax_dml_state.h"
+#include "access/pax_partition.h"
#include "access/pax_scanner.h"
#include "access/pax_updater.h"
+#include "access/paxc_rel_options.h"
+#include "access/paxc_scanner.h"
#include "catalog/pax_aux_table.h"
+#include "catalog/pax_fastsequence.h"
+#include "catalog/pg_pax_tables.h"
+#include "comm/guc.h"
+#include "comm/pax_memory.h"
#include "exceptions/CException.h"
-#include "storage/paxc_block_map_manager.h"
+#include "storage/local_file_system.h"
#define NOT_IMPLEMENTED_YET \
ereport(ERROR, \
@@ -18,14 +25,8 @@
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
errmsg("not supported on pax relations: %s", __func__)))
-#define PAX_DEFAULT_COMPRESSLEVEL AO_DEFAULT_COMPRESSLEVEL
-#define PAX_MIN_COMPRESSLEVEL AO_MIN_COMPRESSLEVEL
-#define PAX_MAX_COMPRESSLEVEL AO_MAX_COMPRESSLEVEL
-
-#define PAX_DEFAULT_COMPRESSTYPE AO_DEFAULT_COMPRESSTYPE
-
#define RELATION_IS_PAX(rel) \
- (OidIsValid((rel)->rd_rel->relam) && AMOidIsPax((rel)->rd_rel->relam))
+ (OidIsValid((rel)->rd_rel->relam) && RelationIsPAX(rel))
// CBDB_TRY();
// {
@@ -41,6 +42,7 @@
//
// CBDB_CATCH_MATCH() is optional and can have several match pattern.
+char *global_pg_error_message = nullptr;
cbdb::CException global_exception(cbdb::CException::kExTypeInvalid);
// being of a try block w/o explicit handler
@@ -63,6 +65,7 @@ cbdb::CException global_exception(cbdb::CException::kExTypeInvalid);
catch (cbdb::CException & e) { \
internal_cbdb_try_throw_error_ = true; \
internal_cbdb_try_throw_error_with_stack_ = true; \
+ global_pg_error_message = elog_message(); \
elog(LOG, "\npax stack trace: \n%s", e.Stack()); \
global_exception = e; \
} \
@@ -79,44 +82,29 @@ cbdb::CException global_exception(cbdb::CException::kExTypeInvalid);
} while (0);
// end of a try-catch block
-#define CBDB_END_TRY() \
- } \
- if (internal_cbdb_try_throw_error_) { \
- if (internal_cbdb_try_throw_error_with_stack_) { \
- elog(LOG, "\npax stack trace: \n%s", global_exception.Stack()); \
- ereport(ERROR, errmsg("%s", global_exception.What().c_str())); \
- } \
- if (error_message.Length() == 0) \
- error_message.Append("ERROR: %s", __func__); \
- ereport(ERROR, errmsg("%s", error_message.Message())); \
- } \
- } \
+#define CBDB_END_TRY() \
+ } \
+ if (internal_cbdb_try_throw_error_) { \
+ if (global_pg_error_message) { \
+ elog(LOG, "\npg error message:%s", global_pg_error_message); \
+ } \
+ if (internal_cbdb_try_throw_error_with_stack_) { \
+ elog(LOG, "\npax stack trace: \n%s", global_exception.Stack()); \
+ ereport( \
+ ERROR, \
+ errmsg("%s (PG message: %s)", global_exception.What().c_str(), \
+ !global_pg_error_message ? "" : global_pg_error_message)); \
+ } \
+ if (error_message.Length() == 0) \
+ error_message.Append("ERROR: %s", __func__); \
+ ereport(ERROR, errmsg("%s", error_message.Message())); \
+ } \
+ } \
while (0)
-bool AMOidIsPax(Oid am_oid) {
- HeapTuple tuple;
- Form_pg_am form;
- bool is_pax;
-
- tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(am_oid));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for pg_am.oid = %u", am_oid);
-
- form = (Form_pg_am)GETSTRUCT(tuple);
- is_pax = strcmp(NameStr(form->amname), "pax") == 0;
- ReleaseSysCache(tuple);
-
- return is_pax;
-}
-
-// reloptions structure and variables.
-static relopt_kind self_relopt_kind;
-static const relopt_parse_elt kSelfReloptTab[] = {
- {"compresslevel", RELOPT_TYPE_INT, offsetof(PaxOptions, compress_level)},
- {"compresstype", RELOPT_TYPE_STRING, offsetof(PaxOptions, compress_type)},
- {"storage_format", RELOPT_TYPE_STRING,
- offsetof(PaxOptions, storage_format)},
-};
+#define PAX_SCAN_REUSE_BUFFER_DEFAULT_SIZE 8 * 1024 * 1024
+#define PAX_SCAN_REUSE_BUFFER_MIN_SIZE 1 * 1024 * 1024
+#define PAX_SCAN_REUSE_BUFFER_MAX_SIZE 32 * 1024 * 1024
// access methods that are implemented in C++
namespace pax {
@@ -128,7 +116,7 @@ TableScanDesc CCPaxAccessMethod::ScanBegin(Relation relation, Snapshot snapshot,
CBDB_TRY();
{
return PaxScanDesc::BeginScan(relation, snapshot, nkeys, key, pscan, flags,
- nullptr);
+ nullptr, true);
}
CBDB_CATCH_DEFAULT();
CBDB_END_TRY();
@@ -138,21 +126,22 @@ TableScanDesc CCPaxAccessMethod::ScanBegin(Relation relation, Snapshot snapshot,
void CCPaxAccessMethod::ScanEnd(TableScanDesc scan) {
CBDB_TRY();
- { PaxScanDesc::EndScan(scan); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ desc->EndScan();
+ }
CBDB_CATCH_DEFAULT();
- CBDB_FINALLY({
- // FIXME: destroy PaxScanDesc?
- });
+ CBDB_FINALLY({});
CBDB_END_TRY();
}
TableScanDesc CCPaxAccessMethod::ScanExtractColumns(
- Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan,
- List *targetlist, List *qual, uint32 flags) {
+ Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key,
+ ParallelTableScanDesc parallel_scan, struct PlanState *ps, uint32 flags) {
CBDB_TRY();
{
- return pax::PaxScanDesc::BeginScanExtractColumns(
- rel, snapshot, parallel_scan, targetlist, qual, flags);
+ return pax::PaxScanDesc::BeginScanExtractColumns(rel, snapshot, nkeys, key,
+ parallel_scan, ps, flags);
}
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
@@ -160,16 +149,107 @@ TableScanDesc CCPaxAccessMethod::ScanExtractColumns(
pg_unreachable();
}
+struct IndexFetchTableData *CCPaxAccessMethod::IndexFetchBegin(Relation rel) {
+ CBDB_TRY();
+ {
+ auto desc = PAX_NEW(rel);
+ return desc->ToBase();
+ }
+ CBDB_CATCH_DEFAULT();
+ CBDB_FINALLY({});
+ CBDB_END_TRY();
+ return nullptr; // keep compiler quiet
+}
+
+void CCPaxAccessMethod::IndexFetchEnd(IndexFetchTableData *scan) {
+ CBDB_TRY();
+ {
+ auto desc = PaxIndexScanDesc::FromBase(scan);
+ PAX_DELETE(desc);
+ }
+ CBDB_CATCH_DEFAULT();
+ CBDB_FINALLY({});
+ CBDB_END_TRY();
+}
+
+bool CCPaxAccessMethod::IndexFetchTuple(struct IndexFetchTableData *scan,
+ ItemPointer tid, Snapshot snapshot,
+ TupleTableSlot *slot, bool *call_again,
+ bool *all_dead) {
+ CBDB_TRY();
+ {
+ auto desc = PaxIndexScanDesc::FromBase(scan);
+ return desc->FetchTuple(tid, snapshot, slot, call_again, all_dead);
+ }
+ CBDB_CATCH_DEFAULT();
+ CBDB_FINALLY({});
+ CBDB_END_TRY();
+ return false; // keep compiler quiet
+}
+
+void CCPaxAccessMethod::IndexFetchReset(IndexFetchTableData * /*scan*/) {}
+
void CCPaxAccessMethod::RelationSetNewFilenode(Relation rel,
const RelFileNode *newrnode,
char persistence,
TransactionId *freeze_xid,
MultiXactId *minmulti) {
+ Relation pax_tables_rel;
+ ScanKeyData scan_key[1];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ Oid pax_relid;
+ bool exists;
+
+ *freeze_xid = *minmulti = InvalidTransactionId;
+
+ pax_tables_rel = table_open(PAX_TABLES_RELATION_ID, RowExclusiveLock);
+ pax_relid = RelationGetRelid(rel);
+
+ ScanKeyInit(&scan_key[0], ANUM_PG_PAX_TABLES_RELID, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(pax_relid));
+ scan = systable_beginscan(pax_tables_rel, PAX_TABLES_RELID_INDEX_ID, true,
+ NULL, 1, scan_key);
+ tuple = systable_getnext(scan);
+ exists = HeapTupleIsValid(tuple);
+ if (exists) {
+ Oid aux_relid;
+
+ // set new filenode, not create new table
+ //
+ // 1. truncate aux table by new relfilenode
+ aux_relid = ::paxc::GetPaxAuxRelid(pax_relid);
+ Assert(OidIsValid(aux_relid));
+ paxc::PaxAuxRelationSetNewFilenode(aux_relid);
+ } else {
+ // create new table
+ //
+ // 1. create aux table
+ // 2. initialize fast sequence in pg_pax_fastsequence
+ // 3. setup dependency
+ paxc::CPaxCreateMicroPartitionTable(rel);
+ }
+
+ // initialize or reset the fast sequence number
+ paxc::CPaxInitializeFastSequenceEntry(
+ pax_relid,
+ exists ? FASTSEQUENCE_INIT_TYPE_UPDATE : FASTSEQUENCE_INIT_TYPE_CREATE);
+
+ systable_endscan(scan);
+ table_close(pax_tables_rel, NoLock);
+
+ // create relfilenode file for pax table
+ auto srel = RelationCreateStorage(*newrnode, persistence, SMGR_MD, rel);
+ smgrclose(srel);
+
+ // create data directory
CBDB_TRY();
{
- *freeze_xid = *minmulti = InvalidTransactionId;
- pax::CCPaxAuxTable::PaxAuxRelationSetNewFilenode(rel, newrnode,
- persistence);
+ FileSystem *fs = pax::Singleton::GetInstance();
+ auto path = cbdb::BuildPaxDirectoryPath(*newrnode, rel->rd_backend);
+ Assert(!path.empty());
+ CBDB_CHECK((fs->CreateDirectory(path) == 0),
+ cbdb::CException::ExType::kExTypeIOError);
}
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
@@ -233,12 +313,14 @@ void CCPaxAccessMethod::RelationFileUnlink(RelFileNodeBackend rnode) {
CBDB_END_TRY();
}
-void CCPaxAccessMethod::ScanRescan(TableScanDesc scan, ScanKey /*key*/,
- bool /*set_params*/, bool /*allow_strat*/,
- bool /*allow_sync*/,
- bool /*allow_pagemode*/) {
+void CCPaxAccessMethod::ScanRescan(TableScanDesc scan, ScanKey key,
+ bool set_params, bool allow_strat,
+ bool allow_sync, bool allow_pagemode) {
CBDB_TRY();
- { pax::PaxScanDesc::ReScan(scan); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ desc->ReScan(key, set_params, allow_strat, allow_sync, allow_pagemode);
+ }
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
CBDB_END_TRY();
@@ -248,11 +330,12 @@ bool CCPaxAccessMethod::ScanGetNextSlot(TableScanDesc scan,
ScanDirection /*direction*/,
TupleTableSlot *slot) {
CBDB_TRY();
- { return PaxScanDesc::ScanGetNextSlot(scan, slot); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->GetNextSlot(slot);
+ }
CBDB_CATCH_DEFAULT();
- CBDB_FINALLY({
- // FIXME: destroy PaxScanDesc?
- });
+ CBDB_FINALLY({});
CBDB_END_TRY();
pg_unreachable();
@@ -315,11 +398,14 @@ TM_Result CCPaxAccessMethod::TupleUpdate(Relation relation, ItemPointer otid,
pg_unreachable();
}
-bool CCPaxAccessMethod::ScanAnalyzeNextBlock(
- TableScanDesc scan, BlockNumber blockno,
- BufferAccessStrategy /*bstrategy*/) {
+bool CCPaxAccessMethod::ScanAnalyzeNextBlock(TableScanDesc scan,
+ BlockNumber blockno,
+ BufferAccessStrategy bstrategy) {
CBDB_TRY();
- { return PaxScanDesc::ScanAnalyzeNextBlock(scan, blockno); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->ScanAnalyzeNextBlock(blockno, bstrategy);
+ }
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
CBDB_END_TRY();
@@ -327,34 +413,54 @@ bool CCPaxAccessMethod::ScanAnalyzeNextBlock(
}
bool CCPaxAccessMethod::ScanAnalyzeNextTuple(TableScanDesc scan,
- TransactionId /*oldest_xmin*/,
+ TransactionId oldest_xmin,
double *liverows, double *deadrows,
TupleTableSlot *slot) {
CBDB_TRY();
- { return PaxScanDesc::ScanAnalyzeNextTuple(scan, liverows, deadrows, slot); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->ScanAnalyzeNextTuple(oldest_xmin, liverows, deadrows, slot);
+ }
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
CBDB_END_TRY();
pg_unreachable();
}
-bool CCPaxAccessMethod::ScanBitmapNextBlock(TableScanDesc /*scan*/,
- TBMIterateResult * /*tbmres*/) {
- NOT_IMPLEMENTED_YET;
- return false;
+bool CCPaxAccessMethod::ScanBitmapNextBlock(TableScanDesc scan,
+ TBMIterateResult *tbmres) {
+ CBDB_TRY();
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->BitmapNextBlock(tbmres);
+ }
+ CBDB_CATCH_DEFAULT();
+ CBDB_FINALLY({});
+ CBDB_END_TRY();
+ pg_unreachable();
}
-bool CCPaxAccessMethod::ScanBitmapNextTuple(TableScanDesc /*scan*/,
- TBMIterateResult * /*tbmres*/,
- TupleTableSlot * /*slot*/) {
- NOT_IMPLEMENTED_YET;
- return false;
+bool CCPaxAccessMethod::ScanBitmapNextTuple(TableScanDesc scan,
+ TBMIterateResult *tbmres,
+ TupleTableSlot *slot) {
+ CBDB_TRY();
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->BitmapNextTuple(tbmres, slot);
+ }
+ CBDB_CATCH_DEFAULT();
+ CBDB_FINALLY({});
+ CBDB_END_TRY();
+ pg_unreachable();
}
bool CCPaxAccessMethod::ScanSampleNextBlock(TableScanDesc scan,
SampleScanState *scanstate) {
CBDB_TRY();
- { return PaxScanDesc::ScanSampleNextBlock(scan, scanstate); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->ScanSampleNextBlock(scanstate);
+ }
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
CBDB_END_TRY();
@@ -362,10 +468,13 @@ bool CCPaxAccessMethod::ScanSampleNextBlock(TableScanDesc scan,
}
bool CCPaxAccessMethod::ScanSampleNextTuple(TableScanDesc scan,
- SampleScanState * /*scanstate*/,
+ SampleScanState *scanstate,
TupleTableSlot *slot) {
CBDB_TRY();
- { return PaxScanDesc::ScanSampleNextTuple(scan, slot); }
+ {
+ auto desc = PaxScanDesc::ToDesc(scan);
+ return desc->ScanSampleNextTuple(scanstate, slot);
+ }
CBDB_CATCH_DEFAULT();
CBDB_FINALLY({});
CBDB_END_TRY();
@@ -409,9 +518,7 @@ void CCPaxAccessMethod::FinishBulkInsert(Relation relation, int options) {
}
void CCPaxAccessMethod::ExtDmlInit(Relation rel, CmdType operation) {
- if (!RELATION_IS_PAX(rel)) {
- return;
- }
+ if (!RELATION_IS_PAX(rel)) return;
CBDB_TRY();
{ pax::CPaxDmlStateLocal::Instance()->InitDmlState(rel, operation); }
@@ -421,9 +528,7 @@ void CCPaxAccessMethod::ExtDmlInit(Relation rel, CmdType operation) {
}
void CCPaxAccessMethod::ExtDmlFini(Relation rel, CmdType operation) {
- if (!RELATION_IS_PAX(rel)) {
- return;
- }
+ if (!RELATION_IS_PAX(rel)) return;
CBDB_TRY();
{ pax::CPaxDmlStateLocal::Instance()->FinishDmlState(rel, operation); }
@@ -458,29 +563,6 @@ void PaxAccessMethod::ParallelscanReinitialize(
NOT_IMPLEMENTED_YET;
}
-struct IndexFetchTableData *PaxAccessMethod::IndexFetchBegin(Relation /*rel*/) {
- NOT_SUPPORTED_YET;
- return nullptr;
-}
-
-void PaxAccessMethod::IndexFetchEnd(IndexFetchTableData * /*data*/) {
- NOT_SUPPORTED_YET;
-}
-
-void PaxAccessMethod::IndexFetchReset(IndexFetchTableData * /*data*/) {
- NOT_SUPPORTED_YET;
-}
-
-bool PaxAccessMethod::IndexFetchTuple(struct IndexFetchTableData * /*scan*/,
- ItemPointer /*tid*/,
- Snapshot /*snapshot*/,
- TupleTableSlot * /*slot*/,
- bool * /*call_again*/,
- bool * /*all_dead*/) {
- NOT_SUPPORTED_YET;
- return false;
-}
-
void PaxAccessMethod::TupleInsertSpeculative(Relation /*relation*/,
TupleTableSlot * /*slot*/,
CommandId /*cid*/, int /*options*/,
@@ -556,10 +638,10 @@ uint64 PaxAccessMethod::RelationSize(Relation rel, ForkNumber fork_number) {
if (fork_number != MAIN_FORKNUM) return 0;
// Get the oid of pg_pax_blocks_xxx from pg_pax_tables
- GetPaxTablesEntryAttributes(rel->rd_id, &pax_aux_oid, NULL, NULL);
+ pax_aux_oid = ::paxc::GetPaxAuxRelid(rel->rd_id);
// Scan pg_pax_blocks_xxx to calculate size of micro partition
- pax_aux_rel = heap_open(pax_aux_oid, AccessShareLock);
+ pax_aux_rel = table_open(pax_aux_oid, AccessShareLock);
aux_tup_desc = RelationGetDescr(pax_aux_rel);
aux_scan = systable_beginscan(pax_aux_rel, InvalidOid, false, NULL, 0, NULL);
@@ -576,7 +658,7 @@ uint64 PaxAccessMethod::RelationSize(Relation rel, ForkNumber fork_number) {
}
systable_endscan(aux_scan);
- heap_close(pax_aux_rel, AccessShareLock);
+ table_close(pax_aux_rel, AccessShareLock);
return pax_size;
}
@@ -599,7 +681,7 @@ void PaxAccessMethod::EstimateRelSize(Relation rel, int32 * /*attr_widths*/,
TupleDesc aux_tup_desc;
HeapTuple aux_tup;
SysScanDesc aux_scan;
- uint32 total_tuples = 0;
+ uint64 total_tuples = 0;
uint64 pax_size = 0;
// Even an empty table takes at least one page,
@@ -610,10 +692,10 @@ void PaxAccessMethod::EstimateRelSize(Relation rel, int32 * /*attr_widths*/,
*allvisfrac = 0;
// Get the oid of pg_pax_blocks_xxx from pg_pax_tables
- GetPaxTablesEntryAttributes(rel->rd_id, &pax_aux_oid, NULL, NULL);
+ pax_aux_oid = ::paxc::GetPaxAuxRelid(rel->rd_id);
// Scan pg_pax_blocks_xxx to get attributes
- pax_aux_rel = heap_open(pax_aux_oid, AccessShareLock);
+ pax_aux_rel = table_open(pax_aux_oid, AccessShareLock);
aux_tup_desc = RelationGetDescr(pax_aux_rel);
aux_scan = systable_beginscan(pax_aux_rel, InvalidOid, false, NULL, 0, NULL);
@@ -640,20 +722,122 @@ void PaxAccessMethod::EstimateRelSize(Relation rel, int32 * /*attr_widths*/,
}
systable_endscan(aux_scan);
- heap_close(pax_aux_rel, AccessShareLock);
+ table_close(pax_aux_rel, AccessShareLock);
*tuples = static_cast(total_tuples);
*pages = RelationGuessNumberOfBlocksFromSize(pax_size);
}
double PaxAccessMethod::IndexBuildRangeScan(
- Relation /*heap_relation*/, Relation /*index_relation*/,
- IndexInfo * /*index_info*/, bool /*allow_sync*/, bool /*anyvisible*/,
- bool /*progress*/, BlockNumber /*start_blockno*/, BlockNumber /*numblocks*/,
- IndexBuildCallback /*callback*/, void * /*callback_state*/,
- TableScanDesc /*scan*/) {
- NOT_SUPPORTED_YET;
- return 0.0;
+ Relation heap_relation, Relation index_relation, IndexInfo *index_info,
+ bool /*allow_sync*/, bool anyvisible, bool progress,
+ BlockNumber start_blockno, BlockNumber numblocks,
+ IndexBuildCallback callback, void *callback_state, TableScanDesc scan) {
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ double reltuples = 0;
+ ExprState *predicate;
+ TupleTableSlot *slot;
+ EState *estate;
+ ExprContext *econtext;
+ Snapshot snapshot;
+
+ bool checking_uniqueness;
+ bool need_unregister_snapshot;
+ BlockNumber previous_blkno = InvalidBlockNumber;
+
+ Assert(OidIsValid(index_relation->rd_rel->relam));
+ Assert(!IsSystemRelation(heap_relation));
+
+ checking_uniqueness =
+ (index_info->ii_Unique || index_info->ii_ExclusionOps != NULL);
+ // "Any visible" mode is not compatible with uniqueness checks; make sure
+ // only one of those is requested.
+ (void)anyvisible; // keep compiler quiet for release version
+ Assert(!(anyvisible && checking_uniqueness));
+
+ slot = table_slot_create(heap_relation, NULL);
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+ econtext->ecxt_scantuple = slot;
+ predicate = ExecPrepareQual(index_info->ii_Predicate, estate);
+
+ if (!scan) {
+ snapshot = RegisterSnapshot(GetTransactionSnapshot());
+ scan = table_beginscan(heap_relation, snapshot, 0, NULL);
+ need_unregister_snapshot = true;
+ } else {
+ snapshot = scan->rs_snapshot;
+ need_unregister_snapshot = false;
+ }
+
+ // FIXME: Only brin index uses partial index now. setup start_blockno
+ // and numblocks is too late after beginscan is called now, because
+ // the current micro partition is opened. The workaround is ugly to
+ // check and close the current micro partition and open another one.
+ if (start_blockno != 0 || numblocks != InvalidBlockNumber)
+ elog(ERROR, "PAX doesn't support partial index scan now");
+
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) {
+ CHECK_FOR_INTERRUPTS();
+
+ if (progress) {
+ BlockNumber blkno = pax::GetBlockNumber(slot->tts_tid);
+ if (previous_blkno == InvalidBlockNumber)
+ previous_blkno = blkno;
+ else if (previous_blkno != blkno) {
+ pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
+ blkno - start_blockno);
+ previous_blkno = blkno;
+ }
+ }
+ reltuples += 1;
+
+ MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+ /*
+ * In a partial index, discard tuples that don't satisfy the
+ * predicate.
+ */
+ if (predicate && !ExecQual(predicate, econtext)) continue;
+
+ /*
+ * For the current heap tuple, extract all the attributes we use in
+ * this index, and note which are null. This also performs evaluation
+ * of any expressions needed.
+ */
+ FormIndexDatum(index_info, slot, estate, values, isnull);
+
+ /*
+ * You'd think we should go ahead and build the index tuple here, but
+ * some index AMs want to do further processing on the data first. So
+ * pass the values[] and isnull[] arrays, instead.
+ */
+ callback(index_relation, &slot->tts_tid, values, isnull, true,
+ callback_state);
+ }
+
+ /* Report scan progress one last time. */
+ if (progress && previous_blkno != InvalidBlockNumber)
+ pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
+ previous_blkno + 1 - start_blockno);
+
+ table_endscan(scan);
+ if (need_unregister_snapshot) UnregisterSnapshot(snapshot);
+
+ ExecDropSingleTupleTableSlot(slot);
+ FreeExecutorState(estate);
+
+ /* These may have been pointing to the now-gone estate */
+ index_info->ii_ExpressionsState = NIL;
+ index_info->ii_PredicateState = NULL;
+
+ return reltuples;
+}
+
+bool PaxAccessMethod::IndexUniqueCheck(Relation rel, ItemPointer tid,
+ Snapshot snapshot, bool *all_dead) {
+ return paxc::IndexUniqueCheck(rel, tid, snapshot, all_dead);
}
void PaxAccessMethod::IndexValidateScan(Relation /*heap_relation*/,
@@ -664,83 +848,107 @@ void PaxAccessMethod::IndexValidateScan(Relation /*heap_relation*/,
NOT_IMPLEMENTED_YET;
}
-#define PAX_COPY_OPT(pax_opts_, pax_opt_name_) \
- do { \
- PaxOptions *pax_opts = reinterpret_cast(pax_opts_); \
- int pax_name_offset_ = *reinterpret_cast(pax_opts->pax_opt_name_); \
- if (pax_name_offset_) \
- strlcpy(pax_opts->pax_opt_name_, \
- reinterpret_cast(pax_opts) + pax_name_offset_, \
- sizeof(pax_opts->pax_opt_name_)); \
- } while (0)
-bytea *PaxAccessMethod::Amoptions(Datum reloptions, char /*relkind*/,
- bool validate) {
- void *rdopts;
-
- rdopts = build_reloptions(reloptions, validate, self_relopt_kind,
- sizeof(PaxOptions), kSelfReloptTab,
- lengthof(kSelfReloptTab));
- // adjust string values
- PAX_COPY_OPT(rdopts, storage_format);
- PAX_COPY_OPT(rdopts, compress_type);
-
- return reinterpret_cast(rdopts);
-}
-#undef PAX_COPY_OPT
-
+// Swap data between two pax tables, but not swap oids
+// 1. swap partition-spec in pg_pax_tables
+// 2. swap relation content for aux table and toast
void PaxAccessMethod::SwapRelationFiles(Oid relid1, Oid relid2,
TransactionId frozen_xid,
MultiXactId cutoff_multi) {
- HeapTuple tuple1;
- HeapTuple tuple2;
+ HeapTuple old_tuple1;
+ HeapTuple old_tuple2;
Relation pax_rel;
+ TupleDesc desc;
+ ScanKeyData key[1];
+ SysScanDesc scan;
- Oid b_relid1;
- Oid b_relid2;
+ Oid aux_relid1;
+ Oid aux_relid2;
- pax_rel = table_open(PaxTablesRelationId, RowExclusiveLock);
+ pax_rel = table_open(PAX_TABLES_RELATION_ID, RowExclusiveLock);
+ desc = RelationGetDescr(pax_rel);
- tuple1 = SearchSysCacheCopy1(PAXTABLESID, relid1);
- if (!HeapTupleIsValid(tuple1))
- ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA),
- errmsg("cache lookup failed with relid=%u for aux relation "
- "in pg_pax_tables.",
- relid1)));
+ // save ctid, auxrelid and partition-spec for the first pax relation
+ ScanKeyInit(&key[0], ANUM_PG_PAX_TABLES_RELID, BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid1));
- tuple2 = SearchSysCacheCopy1(PAXTABLESID, relid2);
- if (!HeapTupleIsValid(tuple2))
- ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA),
- errmsg("cache lookup failed with relid=%u for aux relation "
- "in pg_pax_tables.",
- relid2)));
+ scan = systable_beginscan(pax_rel, PAX_TABLES_RELID_INDEX_ID, true, nullptr,
+ 1, key);
+ old_tuple1 = systable_getnext(scan);
+ if (!HeapTupleIsValid(old_tuple1))
+ ereport(ERROR, (errmsg("relid=%u is not a pax relation", relid1)));
- // swap the entries
- {
- Form_pg_pax_tables form1;
- Form_pg_pax_tables form2;
+ old_tuple1 = heap_copytuple(old_tuple1);
+ systable_endscan(scan);
- int16 temp_compresslevel;
- NameData temp_compresstype;
+ // save ctid, auxrelid and partition-spec for the second pax relation
+ ScanKeyInit(&key[0], ANUM_PG_PAX_TABLES_RELID, BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid2));
+ scan = systable_beginscan(pax_rel, PAX_TABLES_RELID_INDEX_ID, true, nullptr,
+ 1, key);
+ old_tuple2 = systable_getnext(scan);
+ if (!HeapTupleIsValid(old_tuple2))
+ ereport(ERROR, (errmsg("relid=%u is not a pax relation", relid2)));
- form1 = (Form_pg_pax_tables)GETSTRUCT(tuple1);
- form2 = (Form_pg_pax_tables)GETSTRUCT(tuple2);
+ old_tuple2 = heap_copytuple(old_tuple2);
+ systable_endscan(scan);
- Assert(((Form_pg_pax_tables)GETSTRUCT(tuple1))->relid == relid1);
- Assert(((Form_pg_pax_tables)GETSTRUCT(tuple2))->relid == relid2);
+ // swap the entries
+ {
+ HeapTuple tuple1;
+ HeapTuple tuple2;
+ Datum values[NATTS_PG_PAX_TABLES];
+ bool nulls[NATTS_PG_PAX_TABLES];
+ Datum datum;
+ bool isnull;
+
+ datum =
+ heap_getattr(old_tuple1, ANUM_PG_PAX_TABLES_AUXRELID, desc, &isnull);
+ Assert(!isnull);
+ aux_relid1 = DatumGetObjectId(datum);
+
+ values[ANUM_PG_PAX_TABLES_RELID - 1] = ObjectIdGetDatum(relid1);
+ values[ANUM_PG_PAX_TABLES_AUXRELID - 1] = datum;
+ nulls[ANUM_PG_PAX_TABLES_RELID - 1] = false;
+ nulls[ANUM_PG_PAX_TABLES_AUXRELID - 1] = false;
+
+ datum = heap_getattr(old_tuple2, ANUM_PG_PAX_TABLES_PARTITIONSPEC, desc,
+ &isnull);
+ if (!isnull) {
+ auto vl = reinterpret_cast(DatumGetPointer(datum));
+ vl = pg_detoast_datum_packed(vl);
+ values[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = PointerGetDatum(vl);
+ }
+ nulls[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = isnull;
- b_relid1 = form1->blocksrelid;
- b_relid2 = form2->blocksrelid;
+ tuple1 = heap_form_tuple(desc, values, nulls);
+ tuple1->t_data->t_ctid = old_tuple1->t_data->t_ctid;
+ tuple1->t_self = old_tuple1->t_self;
+ tuple1->t_tableOid = old_tuple1->t_tableOid;
- memcpy(&temp_compresstype, &form1->compresstype, sizeof(NameData));
- memcpy(&form1->compresstype, &form2->compresstype, sizeof(NameData));
- memcpy(&form2->compresstype, &temp_compresstype, sizeof(NameData));
+ datum =
+ heap_getattr(old_tuple2, ANUM_PG_PAX_TABLES_AUXRELID, desc, &isnull);
+ Assert(!isnull);
+ aux_relid2 = DatumGetObjectId(datum);
+
+ values[ANUM_PG_PAX_TABLES_RELID - 1] = ObjectIdGetDatum(relid2);
+ values[ANUM_PG_PAX_TABLES_AUXRELID - 1] = datum;
+ nulls[ANUM_PG_PAX_TABLES_RELID - 1] = false;
+ nulls[ANUM_PG_PAX_TABLES_AUXRELID - 1] = false;
+
+ datum = heap_getattr(old_tuple1, ANUM_PG_PAX_TABLES_PARTITIONSPEC, desc,
+ &isnull);
+ if (!isnull) {
+ auto vl = reinterpret_cast(DatumGetPointer(datum));
+ vl = pg_detoast_datum_packed(vl);
+ values[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = PointerGetDatum(vl);
+ }
+ nulls[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = isnull;
- temp_compresslevel = form1->compresslevel;
- form1->compresslevel = form2->compresslevel;
- form2->compresslevel = temp_compresslevel;
- }
+ tuple2 = heap_form_tuple(desc, values, nulls);
+ tuple2->t_data->t_ctid = old_tuple2->t_data->t_ctid;
+ tuple2->t_self = old_tuple2->t_self;
+ tuple2->t_tableOid = old_tuple2->t_tableOid;
- {
CatalogIndexState indstate;
indstate = CatalogOpenIndexes(pax_rel);
@@ -753,23 +961,55 @@ void PaxAccessMethod::SwapRelationFiles(Oid relid1, Oid relid2,
/* swap relation files for aux table */
{
- Relation b_rel1;
- Relation b_rel2;
-
- b_rel1 = relation_open(b_relid1, AccessExclusiveLock);
- b_rel2 = relation_open(b_relid2, AccessExclusiveLock);
-
- swap_relation_files(b_relid1, b_relid2, false, /* target_is_pg_class */
- true, /* swap_toast_by_content */
- true, /*swap_stats */
- true, /* is_internal */
+ Relation aux_rel1;
+ Relation aux_rel2;
+ ReindexParams reindex_params = {0};
+ Relation toast_rel1 = nullptr;
+ Relation toast_rel2 = nullptr;
+
+ aux_rel1 = relation_open(aux_relid1, AccessExclusiveLock);
+ aux_rel2 = relation_open(aux_relid2, AccessExclusiveLock);
+
+ if (OidIsValid(aux_rel1->rd_rel->reltoastrelid))
+ toast_rel1 =
+ relation_open(aux_rel1->rd_rel->reltoastrelid, AccessExclusiveLock);
+ if (OidIsValid(aux_rel2->rd_rel->reltoastrelid))
+ toast_rel2 =
+ relation_open(aux_rel2->rd_rel->reltoastrelid, AccessExclusiveLock);
+
+ swap_relation_files(aux_relid1, aux_relid2, false, /* target_is_pg_class */
+ true, /* swap_toast_by_content */
+ true, /*swap_stats */
+ true, /* is_internal */
frozen_xid, cutoff_multi, NULL);
- relation_close(b_rel1, NoLock);
- relation_close(b_rel2, NoLock);
+ if (toast_rel1) relation_close(toast_rel1, NoLock);
+ if (toast_rel2) relation_close(toast_rel2, NoLock);
+ relation_close(aux_rel1, NoLock);
+ relation_close(aux_rel2, NoLock);
+
+ reindex_relation(aux_relid1, 0, &reindex_params);
+ reindex_relation(aux_relid2, 0, &reindex_params);
}
}
+bytea *PaxAccessMethod::AmOptions(Datum reloptions, char relkind,
+ bool validate) {
+ return paxc_default_rel_options(reloptions, relkind, validate);
+}
+
+void PaxAccessMethod::ValidateColumnEncodingClauses(List *encoding_opts) {
+ paxc_validate_column_encoding_clauses(encoding_opts);
+}
+
+List *PaxAccessMethod::TransformColumnEncodingClauses(Relation /*rel*/,
+ List *encoding_opts,
+ bool validate,
+ bool from_type) {
+ return paxc_transform_column_encoding_clauses(encoding_opts, validate,
+ from_type);
+}
+
} // namespace paxc
// END of C implementation
@@ -789,10 +1029,11 @@ static const TableAmRoutine kPaxColumnMethods = {
.parallelscan_reinitialize =
paxc::PaxAccessMethod::ParallelscanReinitialize,
- .index_fetch_begin = paxc::PaxAccessMethod::IndexFetchBegin,
- .index_fetch_reset = paxc::PaxAccessMethod::IndexFetchReset,
- .index_fetch_end = paxc::PaxAccessMethod::IndexFetchEnd,
- .index_fetch_tuple = paxc::PaxAccessMethod::IndexFetchTuple,
+ .index_fetch_begin = pax::CCPaxAccessMethod::IndexFetchBegin,
+ .index_fetch_reset = pax::CCPaxAccessMethod::IndexFetchReset,
+ .index_fetch_end = pax::CCPaxAccessMethod::IndexFetchEnd,
+ .index_fetch_tuple = pax::CCPaxAccessMethod::IndexFetchTuple,
+ .index_unique_check = paxc::PaxAccessMethod::IndexUniqueCheck,
.tuple_fetch_row_version = paxc::PaxAccessMethod::TupleFetchRowVersion,
.tuple_tid_valid = paxc::PaxAccessMethod::TupleTidValid,
@@ -831,8 +1072,12 @@ static const TableAmRoutine kPaxColumnMethods = {
.scan_sample_next_block = pax::CCPaxAccessMethod::ScanSampleNextBlock,
.scan_sample_next_tuple = pax::CCPaxAccessMethod::ScanSampleNextTuple,
- .amoptions = paxc::PaxAccessMethod::Amoptions,
+ .amoptions = paxc::PaxAccessMethod::AmOptions,
.swap_relation_files = paxc::PaxAccessMethod::SwapRelationFiles,
+ .validate_column_encoding_clauses =
+ paxc::PaxAccessMethod::ValidateColumnEncodingClauses,
+ .transform_column_encoding_clauses =
+ paxc::PaxAccessMethod::TransformColumnEncodingClauses,
};
PG_MODULE_MAGIC;
@@ -841,107 +1086,168 @@ Datum pax_tableam_handler(PG_FUNCTION_ARGS) { // NOLINT
PG_RETURN_POINTER(&kPaxColumnMethods);
}
-static void PaxValidateStorageFormat(const char *value) {
- size_t i;
- static const char *storage_formats[] = {
- "orc",
- "ppt",
- };
-
- for (i = 0; i < lengthof(storage_formats); i++) {
- if (strcmp(value, storage_formats[i]) == 0) return;
+static object_access_hook_type prev_object_access_hook = NULL;
+
+static void PaxObjectAccessHook(ObjectAccessType access, Oid class_id,
+ Oid object_id, int sub_id, void *arg) {
+ Relation rel;
+ PartitionKey pkey;
+ List *part;
+ List *pby;
+ paxc::PaxOptions *options;
+
+ if (prev_object_access_hook)
+ prev_object_access_hook(access, class_id, object_id, sub_id, arg);
+
+ if (access != OAT_POST_CREATE || class_id != RelationRelationId) return;
+
+ CommandCounterIncrement();
+ rel = relation_open(object_id, RowExclusiveLock);
+ auto ok = ((rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW) &&
+ rel->rd_options && RelationIsPAX(rel));
+ if (!ok) goto out;
+
+ options = reinterpret_cast(rel->rd_options);
+ if (!options->partition_by()) {
+ if (options->partition_ranges()) {
+ elog(ERROR, "set '%s', but partition_by not specified",
+ options->partition_ranges());
+ }
+ goto out;
}
- ereport(ERROR, (errmsg("unsupported storage format: '%s'", value)));
-}
-static void PaxValidateCompresstype(const char *value) {
- size_t i;
- static const char *compress_types[] = {
- "none",
- "zlib",
- };
+ pby = paxc_raw_parse(options->partition_by());
+ pkey = paxc::PaxRelationBuildPartitionKey(rel, pby);
+ if (pkey->partnatts > 1) elog(ERROR, "pax only support 1 partition key now");
- for (i = 0; i < lengthof(compress_types); i++) {
- if (strcmp(value, compress_types[i]) == 0) return;
- }
- ereport(ERROR, (errmsg("unsupported compress type: '%s'", value)));
-}
+ part = lappend(NIL, pby);
+ if (options->partition_ranges()) {
+ List *ranges;
-static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
-static ExecutorStart_hook_type prev_executor_start = NULL;
-static ExecutorEnd_hook_type prev_executor_end = NULL;
-static uint32 executor_run_ref_count = 0;
+ ranges = paxc_parse_partition_ranges(options->partition_ranges());
+ ranges = paxc::PaxValidatePartitionRanges(rel, pkey, ranges);
+ part = lappend(part, ranges);
+ }
+ // Currently, partition_ranges must be set to partition pax tables.
+ // We hope this option be removed and automatically partition data set.
+ else
+ elog(ERROR, "partition_ranges must be set for partition_by='%s'",
+ options->partition_by());
-void PaxShmemInit() {
- if (prev_shmem_startup_hook) prev_shmem_startup_hook();
+ ::paxc::PaxInitializePartitionSpec(rel, reinterpret_cast(part));
- paxc::paxc_shmem_startup();
+out:
+ relation_close(rel, NoLock);
}
-static void PaxExecutorStart(QueryDesc *query_desc, int eflags) {
- if (prev_executor_start)
- prev_executor_start(query_desc, eflags);
- else
- standard_ExecutorStart(query_desc, eflags);
-
- executor_run_ref_count++;
+static void DefineGUCs() {
+ DefineCustomBoolVariable("pax_enable_debug", "enable pax debug", NULL,
+ &pax::pax_enable_debug, true, PGC_USERSET, 0, NULL,
+ NULL, NULL);
+
+ DefineCustomBoolVariable("pax_enable_filter", "enable pax filter", NULL,
+ &pax::pax_enable_filter, true, PGC_USERSET, 0, NULL,
+ NULL, NULL);
+
+ DefineCustomIntVariable(
+ "pax_max_tuples_per_group",
+ "the default value for the limit on the number of tuples in a group",
+ NULL, &pax::pax_max_tuples_per_group, VEC_BATCH_LENGTH, 0,
+ VEC_BATCH_LENGTH * 100, PGC_USERSET, 0, NULL, NULL, NULL);
+
+#ifdef ENABLE_PLASMA
+ DefineCustomBoolVariable(
+ "pax_enable_plasma", "Enable plasma cache the set of columns", NULL,
+ &pax::pax_enable_plasma_in_mem, true, PGC_USERSET, 0, NULL, NULL, NULL);
+#endif
+
+ DefineCustomIntVariable(
+ "pax_scan_reuse_buffer_size", "set the reuse buffer size", NULL,
+ &pax::pax_scan_reuse_buffer_size, PAX_SCAN_REUSE_BUFFER_DEFAULT_SIZE,
+ PAX_SCAN_REUSE_BUFFER_MIN_SIZE, PAX_SCAN_REUSE_BUFFER_MAX_SIZE,
+ PGC_USERSET, 0, NULL, NULL, NULL);
}
-static void PaxExecutorEnd(QueryDesc *query_desc) {
- if (prev_executor_end)
- prev_executor_end(query_desc);
- else
- standard_ExecutorEnd(query_desc);
+struct PaxObjectProperty {
+ const char *name;
+ Oid class_oid;
+ Oid index_oid;
+ AttrNumber attnum_oid;
+};
- executor_run_ref_count--;
- Assert(executor_run_ref_count >= 0);
- if (executor_run_ref_count == 0) {
- paxc::release_command_resource();
- }
-}
+static const struct PaxObjectProperty kPaxObjectProperties[] = {
+ {"fast-sequence", PAX_FASTSEQUENCE_OID, PAX_FASTSEQUENCE_INDEX_OID,
+ ANUM_PG_PAX_FAST_SEQUENCE_OBJID},
+ {"pg_pax_tables", PAX_TABLES_RELATION_ID, PAX_TABLES_RELID_INDEX_ID,
+ ANUM_PG_PAX_TABLES_RELID},
+ // add pg_pax_tables here
+};
-static void PaxXactCallback(XactEvent event, void * /*arg*/) {
- if (event == XACT_EVENT_COMMIT || event == XACT_EVENT_ABORT ||
- event == XACT_EVENT_PARALLEL_ABORT ||
- event == XACT_EVENT_PARALLEL_COMMIT) {
- if (executor_run_ref_count > 0) {
- executor_run_ref_count = 0;
- paxc::release_command_resource();
- }
+static const struct PaxObjectProperty *FindPaxObjectProperty(Oid class_id) {
+ for (const auto &property : kPaxObjectProperties) {
+ const auto p = &property;
+ if (p->class_oid == class_id) return p;
}
+ return NULL;
}
-void _PG_init(void) { // NOLINT
- if (!process_shared_preload_libraries_in_progress) {
- ereport(ERROR, (errmsg("pax must be loaded via shared_preload_libraries")));
- return;
- }
+static void PaxDeleteObject(struct CustomObjectClass * /*self*/,
+ const ObjectAddress *object, int /*flags*/) {
+ Relation rel;
+ HeapTuple tup;
+ SysScanDesc scan;
+ ScanKeyData skey[1];
+
+ const auto object_property = FindPaxObjectProperty(object->classId);
+ Assert(object_property);
+ Assert(object_property->class_oid == object->classId);
+
+ rel = table_open(object->classId, RowExclusiveLock);
+ ScanKeyInit(&skey[0], object_property->attnum_oid, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(object->objectId));
+
+ scan =
+ systable_beginscan(rel, object_property->index_oid, true, NULL, 1, skey);
- paxc::paxc_shmem_request();
+ /* we expect exactly one match */
+ tup = systable_getnext(scan);
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "could not find tuple for %s %u", object_property->name,
+ object->objectId);
- prev_shmem_startup_hook = shmem_startup_hook;
- shmem_startup_hook = PaxShmemInit;
+ CatalogTupleDelete(rel, &tup->t_self);
- prev_executor_start = ExecutorStart_hook;
- ExecutorStart_hook = PaxExecutorStart;
+ systable_endscan(scan);
- prev_executor_end = ExecutorEnd_hook;
- ExecutorEnd_hook = PaxExecutorEnd;
+ table_close(rel, RowExclusiveLock);
+}
+
+static struct CustomObjectClass pax_fastsequence_coc = {
+ .class_id = PAX_FASTSEQUENCE_OID,
+ .do_delete = PaxDeleteObject,
+};
+
+static struct CustomObjectClass pax_tables_coc = {
+ .class_id = PAX_TABLES_RELATION_ID,
+ .do_delete = PaxDeleteObject,
+};
+
+void _PG_init(void) { // NOLINT
+ prev_object_access_hook = object_access_hook;
+ object_access_hook = PaxObjectAccessHook;
ext_dml_init_hook = pax::CCPaxAccessMethod::ExtDmlInit;
ext_dml_finish_hook = pax::CCPaxAccessMethod::ExtDmlFini;
file_unlink_hook = pax::CCPaxAccessMethod::RelationFileUnlink;
- RegisterXactCallback(PaxXactCallback, NULL);
-
- self_relopt_kind = add_reloption_kind();
- add_string_reloption(self_relopt_kind, "storage_format", "pax storage format",
- "orc", PaxValidateStorageFormat, AccessExclusiveLock);
- add_string_reloption(self_relopt_kind, "compresstype", "pax compress type",
- PAX_DEFAULT_COMPRESSTYPE, PaxValidateCompresstype,
- AccessExclusiveLock);
- add_int_reloption(self_relopt_kind, "compresslevel", "pax compress level",
- PAX_DEFAULT_COMPRESSLEVEL, AO_MIN_COMPRESSLEVEL,
- AO_MAX_COMPRESSLEVEL, AccessExclusiveLock);
+ register_custom_object_class(&pax_fastsequence_coc);
+ register_custom_object_class(&pax_tables_coc);
+
+ DefineGUCs();
+
+ RegisterResourceReleaseCallback(paxc::FdHandleAbortCallback, NULL);
+
+ paxc::paxc_reg_rel_options();
}
} // extern "C"
diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.h b/contrib/pax_storage/src/cpp/access/pax_access_handle.h
index 2bca0ba15a7..d88afbb2917 100644
--- a/contrib/pax_storage/src/cpp/access/pax_access_handle.h
+++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.h
@@ -3,6 +3,7 @@
#include "comm/cbdb_api.h"
namespace paxc {
+
class PaxAccessMethod final {
private:
PaxAccessMethod() = default;
@@ -22,14 +23,6 @@ class PaxAccessMethod final {
static void ParallelscanReinitialize(Relation rel,
ParallelTableScanDesc pscan);
- /* Index Scan Callbacks, unsupported yet */
- static struct IndexFetchTableData *IndexFetchBegin(Relation rel);
- static void IndexFetchEnd(struct IndexFetchTableData *data);
- static void IndexFetchReset(struct IndexFetchTableData *data);
- static bool IndexFetchTuple(struct IndexFetchTableData *scan, ItemPointer tid,
- Snapshot snapshot, TupleTableSlot *slot,
- bool *call_again, bool *all_dead);
-
/* Callbacks for non-modifying operations on individual tuples */
static bool TupleFetchRowVersion(Relation relation, ItemPointer tid,
Snapshot snapshot, TupleTableSlot *slot);
@@ -66,15 +59,18 @@ class PaxAccessMethod final {
bool allow_sync, bool anyvisible, bool progress,
BlockNumber start_blockno, BlockNumber numblocks,
IndexBuildCallback callback, void *callback_state, TableScanDesc scan);
+ static bool IndexUniqueCheck(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead);
static void IndexValidateScan(Relation heap_relation, Relation index_relation,
IndexInfo *index_info, Snapshot snapshot,
ValidateIndexState *state);
-
- static bytea *Amoptions(Datum reloptions, char relkind, bool validate);
-
static void SwapRelationFiles(Oid relid1, Oid relid2,
TransactionId frozen_xid,
MultiXactId cutoff_multi);
+
+ static bytea *AmOptions(Datum reloptions, char relkind, bool validate);
+ static void ValidateColumnEncodingClauses(List *encoding_opts);
+ static List *TransformColumnEncodingClauses(Relation rel, List *encoding_opts,
+ bool validate, bool from_type);
};
} // namespace paxc
@@ -96,9 +92,17 @@ class CCPaxAccessMethod final {
TupleTableSlot *slot);
static TableScanDesc ScanExtractColumns(Relation rel, Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key,
ParallelTableScanDesc parallel_scan,
- List *targetlist, List *qual,
- uint32 flags);
+ struct PlanState *ps, uint32 flags);
+
+ /* Index Scan Callbacks */
+ static struct IndexFetchTableData *IndexFetchBegin(Relation rel);
+ static void IndexFetchEnd(struct IndexFetchTableData *scan);
+ static void IndexFetchReset(struct IndexFetchTableData *scan);
+ static bool IndexFetchTuple(struct IndexFetchTableData *scan, ItemPointer tid,
+ Snapshot snapshot, TupleTableSlot *slot,
+ bool *call_again, bool *all_dead);
/* Manipulations of physical tuples. */
static void TupleInsert(Relation relation, TupleTableSlot *slot,
@@ -162,11 +166,3 @@ class CCPaxAccessMethod final {
extern ext_dml_func_hook_type ext_dml_init_hook;
extern ext_dml_func_hook_type ext_dml_finish_hook;
-
-// plain structure used by reloptions, can be accessed from C++ code.
-struct PaxOptions {
- int32 vl_len; /* varlena header (do not touch directly!) */
- char storage_format[16];
- char compress_type[16];
- int compress_level;
-};
diff --git a/contrib/pax_storage/src/cpp/access/pax_deleter.cc b/contrib/pax_storage/src/cpp/access/pax_deleter.cc
index 594bc221eb0..3af63a7e6bb 100644
--- a/contrib/pax_storage/src/cpp/access/pax_deleter.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_deleter.cc
@@ -5,18 +5,15 @@
#include
#include "access/pax_dml_state.h"
+#include "catalog/pax_aux_table.h"
#include "comm/singleton.h"
#include "storage/pax_itemptr.h"
-#include "storage/paxc_block_map_manager.h"
namespace pax {
-CPaxDeleter::CPaxDeleter(const Relation rel, const Snapshot snapshot)
+CPaxDeleter::CPaxDeleter(Relation rel, Snapshot snapshot)
: rel_(rel), snapshot_(snapshot) {}
-CPaxDeleter::~CPaxDeleter() = default;
-
-TM_Result CPaxDeleter::DeleteTuple(const Relation relation,
- const ItemPointer tid, const CommandId cid,
- const Snapshot snapshot,
+TM_Result CPaxDeleter::DeleteTuple(Relation relation, ItemPointer tid,
+ CommandId cid, Snapshot snapshot,
TM_FailureData *tmfd) {
CPaxDeleter *deleter =
CPaxDmlStateLocal::Instance()->GetDeleter(relation, snapshot);
@@ -29,63 +26,64 @@ TM_Result CPaxDeleter::DeleteTuple(const Relation relation,
}
return result;
}
+// used for delete tuples
+TM_Result CPaxDeleter::MarkDelete(ItemPointer tid) {
+ uint32 tuple_offset = pax::GetTupleOffset(*tid);
-TM_Result CPaxDeleter::MarkDelete(const ItemPointer tid) {
- PaxItemPointer pax_tid(reinterpret_cast(tid));
- uint8 table_no = pax_tid.GetTableNo();
- uint32 block_number = pax_tid.GetBlockNumber();
- uint32 tuple_number = pax_tid.GetTupleNumber();
-
- std::string block_id =
- cbdb::GetBlockId(rel_->rd_id, table_no, block_number).ToStr();
+ std::string block_id = MapToBlockNumber(rel_, *tid);
if (block_bitmap_map_.find(block_id) == block_bitmap_map_.end()) {
- // TODO(gongxun): bitmap should support dynamic raise size
block_bitmap_map_[block_id] =
- std::unique_ptr(new DynamicBitmap()); // NOLINT
- }
- DynamicBitmap *bitmap = block_bitmap_map_[block_id].get();
- if (bitmap->NumBits() <= tuple_number) {
- bitmap->Resize(bitmap->NumBits() * 2);
+ pax_unique_ptr(PAX_NEW()); // NOLINT
+ cbdb::DeleteMicroPartitionEntry(RelationGetRelid(rel_), snapshot_,
+ block_id);
}
-
- if (bitmap->Test(tuple_number)) {
+ auto bitmap = block_bitmap_map_[block_id].get();
+ if (bitmap->Test(tuple_offset)) {
return TM_SelfModified;
}
-
- bitmap->Set(tuple_number);
+ bitmap->Set(tuple_offset);
return TM_Ok;
}
-void CPaxDeleter::ExecDelete() {
- if (block_bitmap_map_.empty()) {
- return;
+// used for merge remaining partition files, no tuple needs to delete
+void CPaxDeleter::MarkDelete(BlockNumber pax_block_id) {
+ std::string block_id = std::to_string(pax_block_id);
+
+ if (block_bitmap_map_.find(block_id) == block_bitmap_map_.end()) {
+ block_bitmap_map_[block_id] = pax_unique_ptr(PAX_NEW());
+ cbdb::DeleteMicroPartitionEntry(RelationGetRelid(rel_), snapshot_,
+ block_id);
}
+}
+
+void CPaxDeleter::ExecDelete() {
+ if (block_bitmap_map_.empty()) return;
- TableDeleter table_deleter(rel_, buildDeleteIterator(),
+ TableDeleter table_deleter(rel_, BuildDeleteIterator(),
std::move(block_bitmap_map_), snapshot_);
table_deleter.Delete();
}
-std::unique_ptr>
-CPaxDeleter::buildDeleteIterator() {
+pax_unique_ptr>
+CPaxDeleter::BuildDeleteIterator() {
std::vector micro_partitions;
+ auto rel_path = cbdb::BuildPaxDirectoryPath(rel_->rd_node, rel_->rd_backend);
for (auto &it : block_bitmap_map_) {
std::string block_id = it.first;
- DynamicBitmap *bitmap_ptr = it.second.get();
- BitmapIterator bitmap_it(bitmap_ptr);
- int32 tuple_number = bitmap_it.Next(true);
- if (tuple_number != -1) {
+ {
pax::MicroPartitionMetadata meta_info;
- meta_info.SetFileName(cbdb::BuildPaxFilePath(rel_, block_id));
+ meta_info.SetFileName(cbdb::BuildPaxFilePath(rel_path, block_id));
meta_info.SetMicroPartitionId(std::move(block_id));
micro_partitions.push_back(std::move(meta_info));
}
}
- IteratorBase *iter = new VectorIterator(std::move(micro_partitions));
+ IteratorBase *iter =
+ PAX_NEW>(
+ std::move(micro_partitions));
- return std::unique_ptr>(iter);
+ return pax_unique_ptr>(iter);
}
} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/pax_deleter.h b/contrib/pax_storage/src/cpp/access/pax_deleter.h
index 7d94ffe6efd..0312e63a742 100644
--- a/contrib/pax_storage/src/cpp/access/pax_deleter.h
+++ b/contrib/pax_storage/src/cpp/access/pax_deleter.h
@@ -7,27 +7,26 @@
#include
#include "comm/bitmap.h"
+#include "comm/pax_memory.h"
#include "storage/pax.h"
namespace pax {
class CPaxDeleter {
public:
- explicit CPaxDeleter(const Relation rel, const Snapshot snapshot);
-
- static TM_Result DeleteTuple(const Relation relation, const ItemPointer tid,
- const CommandId cid, const Snapshot snapshot,
+ explicit CPaxDeleter(Relation rel, Snapshot snapshot);
+ ~CPaxDeleter() = default;
+ static TM_Result DeleteTuple(Relation relation, ItemPointer tid,
+ CommandId cid, Snapshot snapshot,
TM_FailureData *tmfd);
- TM_Result MarkDelete(const ItemPointer tid);
-
- ~CPaxDeleter();
-
+ TM_Result MarkDelete(ItemPointer tid);
+ void MarkDelete(BlockNumber pax_block_id);
void ExecDelete();
private:
- std::unique_ptr> buildDeleteIterator();
- std::map> block_bitmap_map_;
- const Relation rel_;
- const Snapshot snapshot_;
+ std::unique_ptr> BuildDeleteIterator();
+ std::map> block_bitmap_map_;
+ Relation rel_;
+ Snapshot snapshot_;
}; // class CPaxDeleter
} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/pax_dml_state.cc b/contrib/pax_storage/src/cpp/access/pax_dml_state.cc
index 335eb659197..91cd996339f 100644
--- a/contrib/pax_storage/src/cpp/access/pax_dml_state.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_dml_state.cc
@@ -8,10 +8,6 @@ void CPaxDmlStateLocal::DmlStateResetCallback(void * /*arg*/) {
}
void CPaxDmlStateLocal::InitDmlState(Relation rel, CmdType operation) {
- if (operation == CMD_UPDATE || operation == CMD_DELETE) {
- cbdb::InitCommandResource();
- }
-
if (!dml_descriptor_tab_) {
HASHCTL hash_ctl;
Assert(!cbdb::pax_memory_context);
@@ -44,7 +40,7 @@ void CPaxDmlStateLocal::FinishDmlState(Relation rel, CmdType /*operation*/) {
// TODO(gongxun): deleter finish
state->deleter->ExecDelete();
- delete state->deleter;
+ PAX_DELETE(state->deleter);
state->deleter = nullptr;
// FIXME: it's update operation, maybe we should do something here
}
@@ -55,7 +51,7 @@ void CPaxDmlStateLocal::FinishDmlState(Relation rel, CmdType /*operation*/) {
old_ctx = MemoryContextSwitchTo(cbdb::pax_memory_context);
state->inserter->FinishInsert();
- delete state->inserter;
+ PAX_DELETE(state->inserter);
state->inserter = nullptr;
MemoryContextSwitchTo(old_ctx);
}
@@ -66,7 +62,7 @@ CPaxInserter *CPaxDmlStateLocal::GetInserter(Relation rel) {
state = FindDmlState(cbdb::RelationGetRelationId(rel));
// TODO(gongxun): switch memory context??
if (state->inserter == nullptr) {
- state->inserter = new CPaxInserter(rel);
+ state->inserter = PAX_NEW(rel);
}
return state->inserter;
}
@@ -76,7 +72,7 @@ CPaxDeleter *CPaxDmlStateLocal::GetDeleter(Relation rel, Snapshot snapshot) {
state = FindDmlState(cbdb::RelationGetRelationId(rel));
// TODO(gongxun): switch memory context??
if (state->deleter == nullptr) {
- state->deleter = new CPaxDeleter(rel, snapshot);
+ state->deleter = PAX_NEW(rel, snapshot);
}
return state->deleter;
}
diff --git a/contrib/pax_storage/src/cpp/access/pax_inserter.cc b/contrib/pax_storage/src/cpp/access/pax_inserter.cc
index 2584efdb3d9..68d1da880b6 100644
--- a/contrib/pax_storage/src/cpp/access/pax_inserter.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_inserter.cc
@@ -4,18 +4,32 @@
#include
#include "access/pax_dml_state.h"
-#include "catalog/micro_partition_stats.h"
+#include "access/pax_partition.h"
+#include "access/paxc_rel_options.h"
#include "catalog/pax_aux_table.h"
#include "comm/cbdb_wrappers.h"
+#include "storage/micro_partition_stats.h"
#include "storage/strategy.h"
namespace pax {
-CPaxInserter::CPaxInserter(Relation rel) : rel_(rel), insert_count_(0) {
- writer_ = new TableWriter(rel);
- writer_->SetWriteSummaryCallback(&cbdb::AddMicroPartitionEntry)
- ->SetFileSplitStrategy(new PaxDefaultSplitStrategy())
- ->SetStatsCollector(new MicroPartitionStats())
+CPaxInserter::CPaxInserter(Relation rel)
+ : rel_(rel), insert_count_(0), part_obj_(nullptr), writer_(nullptr) {
+ part_obj_ = PAX_NEW();
+ auto ok = part_obj_->Initialize(rel_);
+ if (ok) {
+ writer_ = PAX_NEW(rel, part_obj_);
+ } else {
+ // fallback to TableWriter
+ writer_ = PAX_NEW(rel);
+ part_obj_->Release();
+ PAX_DELETE(part_obj_);
+ part_obj_ = nullptr;
+ }
+
+ writer_->SetWriteSummaryCallback(&cbdb::InsertOrUpdateMicroPartitionEntry)
+ ->SetFileSplitStrategy(PAX_NEW())
+ ->SetStatsCollector(PAX_NEW())
->Open();
}
@@ -29,8 +43,7 @@ void CPaxInserter::InsertTuple(Relation relation, TupleTableSlot *slot,
slot_getallattrs(slot);
}
- CTupleSlot cslot(slot);
- writer_->WriteTuple(&cslot);
+ writer_->WriteTuple(slot);
}
void CPaxInserter::MultiInsert(Relation relation, TupleTableSlot **slots,
@@ -51,8 +64,14 @@ void CPaxInserter::FinishBulkInsert(Relation relation, int /*options*/) {
void CPaxInserter::FinishInsert() {
writer_->Close();
- delete writer_;
+ PAX_DELETE(writer_);
writer_ = nullptr;
+
+ if (part_obj_) {
+ part_obj_->Release();
+ PAX_DELETE(part_obj_);
+ part_obj_ = nullptr;
+ }
}
void CPaxInserter::TupleInsert(Relation relation, TupleTableSlot *slot,
diff --git a/contrib/pax_storage/src/cpp/access/pax_inserter.h b/contrib/pax_storage/src/cpp/access/pax_inserter.h
index abd191981bf..92300769755 100644
--- a/contrib/pax_storage/src/cpp/access/pax_inserter.h
+++ b/contrib/pax_storage/src/cpp/access/pax_inserter.h
@@ -4,9 +4,9 @@
#include "storage/micro_partition_metadata.h"
#include "storage/pax.h"
-
+#include "storage/pax_table_partition_writer.h"
namespace pax {
-
+class PartitionObject;
class CPaxInserter {
public:
explicit CPaxInserter(Relation rel);
@@ -29,6 +29,7 @@ class CPaxInserter {
Relation rel_;
uint32 insert_count_;
+ PartitionObject *part_obj_;
TableWriter *writer_;
}; // class CPaxInserter
diff --git a/contrib/pax_storage/src/cpp/access/pax_partition.cc b/contrib/pax_storage/src/cpp/access/pax_partition.cc
new file mode 100644
index 00000000000..9533677a969
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/pax_partition.cc
@@ -0,0 +1,745 @@
+#include "access/pax_partition.h"
+
+#include "comm/cbdb_api.h"
+
+#include "access/pax_access_handle.h"
+#include "catalog/pg_pax_tables.h"
+#include "comm/cbdb_wrappers.h"
+
+namespace paxc {
+// support optional `EVERY` syntax:
+// FROM(start_value) TO(end_value) [ EVERY(interval) ]
+struct PaxPartitionEveryIterator {
+ PartitionKey key;
+ Datum from_value;
+ Datum to_value;
+
+ ExprState *plus_expr_state;
+ ParamListInfo plus_expr_params;
+ EState *estate;
+
+ Datum current_start;
+ Datum current_end;
+ bool ended;
+
+ ParseState *pstate;
+};
+
+static int PartitionCheckBound(PartitionKey key, PartitionBoundSpec *spec);
+
+static void PaxPartitionDestroyEveryIterator(
+ struct PaxPartitionEveryIterator *iter) {
+ if (iter->estate) FreeExecutorState(iter->estate);
+ pfree(iter);
+}
+
+// See the implementation in PartEveryIterator
+static struct PaxPartitionEveryIterator *PaxPartitionInitEveryIterator(
+ ParseState *pstate, PartitionKey key, Node *from, Node *to, Node *every) {
+ Assert(key->partnatts == 1);
+ auto part_col_typid = get_partition_col_typid(key, 0);
+ auto part_col_typmod = get_partition_col_typmod(key, 0);
+ auto part_col_collation = get_partition_col_collation(key, 0);
+ Datum from_value;
+ Datum to_value;
+ Const *c;
+
+ auto iter =
+ (PaxPartitionEveryIterator *)palloc0(sizeof(PaxPartitionEveryIterator));
+ Assert(from && to && every);
+
+ c = castNode(Const, from);
+ if (c->constisnull)
+ elog(ERROR, "cann't use NULL with range partition specification");
+ from_value = c->constvalue;
+
+ c = castNode(Const, to);
+ if (c->constisnull)
+ elog(ERROR, "cann't use NULL with range partition specification");
+ to_value = c->constvalue;
+
+ auto param = makeNode(Param);
+ param->paramid = 1;
+ param->paramtype = part_col_typid;
+ param->paramtypmod = part_col_typmod;
+ param->paramcollid = part_col_collation;
+ param->location = -1;
+
+ auto plus_expr = (Node *)make_op(
+ pstate,
+ list_make2(makeString((char *)"pg_catalog"), makeString((char *)"+")),
+ (Node *)param, (Node *)every, pstate->p_last_srf, -1);
+
+ if (IsA(plus_expr, CollateExpr)) {
+ auto expr_collation = exprCollation(plus_expr);
+ if (OidIsValid(expr_collation) && expr_collation != part_col_collation)
+ elog(ERROR,
+ "collation of partition bound value for column %d doesn't match "
+ "partition key collation \"%s\"",
+ get_partition_col_attnum(key, 0),
+ get_collation_name(part_col_collation));
+ }
+ plus_expr = coerce_to_target_type(
+ pstate, plus_expr, exprType(plus_expr), part_col_typid, part_col_typmod,
+ COERCION_ASSIGNMENT, COERCE_IMPLICIT_CAST, -1);
+ if (plus_expr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("specified value cannot be cast to type %s for column %d",
+ format_type_be(part_col_typid),
+ get_partition_col_attnum(key, 0))));
+
+ iter->key = key;
+ iter->from_value = from_value;
+ iter->to_value = to_value;
+
+ iter->plus_expr_params = makeParamList(1);
+ iter->plus_expr_params->params[0].value = (Datum)0;
+ iter->plus_expr_params->params[0].isnull = true;
+ iter->plus_expr_params->params[0].pflags = 0;
+ iter->plus_expr_params->params[0].ptype = part_col_typid;
+ iter->estate = CreateExecutorState();
+ iter->estate->es_param_list_info = iter->plus_expr_params;
+
+ iter->plus_expr_state =
+ ExecInitExprWithParams((Expr *)plus_expr, iter->plus_expr_params);
+
+ iter->current_end = iter->from_value;
+ iter->current_start = (Datum)0;
+ iter->ended = false;
+
+ iter->pstate = pstate;
+
+ return iter;
+}
+
+static List *PaxPartitionBuildDatums(PartitionKey key, Datum *datums) {
+ List *result = NIL;
+ for (int i = 0; i < key->partnatts; i++) {
+ Const *c;
+ PartitionRangeDatum *prd;
+ c = makeConst(
+ key->parttypid[i], key->parttypmod[i], key->parttypcoll[i],
+ key->parttyplen[i],
+ datumCopy(datums[i], key->parttypbyval[i], key->parttyplen[i]), false,
+ key->parttypbyval[i]);
+
+ prd = makeNode(PartitionRangeDatum);
+ prd->kind = PARTITION_RANGE_DATUM_VALUE;
+ prd->value = (Node *)c;
+ result = lappend(result, prd);
+ }
+ return result;
+}
+
+static PartitionBoundSpec *PaxPartitionNextPartBound(
+ struct PaxPartitionEveryIterator *iter) {
+ if (iter->ended) return nullptr;
+
+ bool isnull;
+
+ iter->plus_expr_params->params[0].isnull = false;
+ iter->plus_expr_params->params[0].value = iter->current_end;
+
+ auto next_start = iter->current_end;
+ auto next_end = ExecEvalExprSwitchContext(
+ iter->plus_expr_state, GetPerTupleExprContext(iter->estate), &isnull);
+ if (isnull)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("could not compute next partition boundary with "
+ "EVERY, plus-operator returned NULL"),
+ parser_errposition(iter->pstate, -1)));
+
+ auto cmpval = DatumGetInt32(FunctionCall2Coll(&iter->key->partsupfunc[0],
+ iter->key->partcollation[0],
+ next_end, iter->to_value));
+ if (cmpval >= 0) {
+ iter->ended = true;
+ next_end = iter->to_value;
+ }
+ // sanity check in case next_start >= next_end
+ cmpval = DatumGetInt32(FunctionCall2Coll(&iter->key->partsupfunc[0],
+ iter->key->partcollation[0],
+ next_start, next_end));
+ if (cmpval >= 0) elog(ERROR, "invalid range bound with EVERY");
+
+ iter->current_start = next_start;
+ iter->current_end = next_end;
+
+ // build PartitionBoundSpec for [iter->current_start, iter->current_end)
+ PartitionBoundSpec *boundspec;
+
+ boundspec = makeNode(PartitionBoundSpec);
+ boundspec->strategy = PARTITION_STRATEGY_RANGE;
+ boundspec->is_default = false;
+ boundspec->lowerdatums =
+ PaxPartitionBuildDatums(iter->key, &iter->current_start);
+ boundspec->upperdatums =
+ PaxPartitionBuildDatums(iter->key, &iter->current_end);
+
+ return boundspec;
+}
+
+static Node *GetConstValue(List *datums) {
+ auto prd = (PartitionRangeDatum *)linitial(datums);
+ Assert(IsA(prd, PartitionRangeDatum));
+ Assert(prd->kind == PARTITION_RANGE_DATUM_VALUE);
+
+ auto c = (Const *)prd->value;
+ Assert(c && IsA(c, Const) && !c->constisnull);
+ return (Node *)c;
+}
+
+// generate a list of partition bound specs
+static List *TransformPartitionExtension(ParseState *pstate, Relation relation,
+ PartitionKey key,
+ PartitionRangeExtension *range_ext) {
+ List *result = NIL;
+ PartitionBoundSpec *range;
+
+ auto every = range_ext->every;
+ auto spec = transformPartitionBound(pstate, relation, key, &range_ext->spec);
+ if (!every) return list_make1(spec);
+
+ if (PartitionCheckBound(key, spec) >= 0)
+ elog(ERROR, "invalid range bound: from %s to %s every(X)",
+ get_range_partbound_string(spec->lowerdatums),
+ get_range_partbound_string(spec->upperdatums));
+
+ // calculate partition by every expression
+ if (key->partnatts != 1 || key->partnatts != list_length(every))
+ elog(ERROR, "pax partition EVERY only support one column");
+
+ auto ev = (Node *)linitial(every);
+ auto iter = PaxPartitionInitEveryIterator(
+ pstate, key, GetConstValue(spec->lowerdatums),
+ GetConstValue(spec->upperdatums),
+ (Node *)transformExpr(pstate, ev, EXPR_KIND_PARTITION_BOUND));
+
+ while ((range = PaxPartitionNextPartBound(iter))) {
+ result = lappend(result, range);
+ }
+ PaxPartitionDestroyEveryIterator(iter);
+ return result;
+}
+
+static bool PaxLoadPartitionSpec(Oid relid, List **partparams_list,
+ List **partboundspec_list) {
+ Node *part;
+ List *list;
+
+ ::paxc::GetPaxTablesEntryAttributes(relid, NULL, &part);
+ if (!part) return false;
+
+ list = castNode(List, part);
+ Assert(list_length(list) == 2);
+ *partparams_list = castNode(List, list_nth(list, 0));
+ *partboundspec_list = castNode(List, list_nth(list, 1));
+ return true;
+}
+
+static inline PartitionRangeDatumKind RangeDatumToKind(List *datums, int i) {
+ PartitionRangeDatum *rd = castNode(PartitionRangeDatum, list_nth(datums, i));
+ return rd->kind;
+}
+static inline Datum RangeDatumToValue(List *datums, int i) {
+ PartitionRangeDatum *rd = castNode(PartitionRangeDatum, list_nth(datums, i));
+ Const *c = castNode(Const, rd->value);
+ Assert(c && !c->constisnull);
+ return c->constvalue;
+}
+// Reference: partition_rbound_cmp()
+int PartitionComparePartitionKeys(PartitionKey key, List *datums1,
+ List *datums2) {
+ Assert(key->partnatts == list_length(datums1));
+ Assert(key->partnatts == list_length(datums2));
+ FmgrInfo *partsupfunc = key->partsupfunc;
+ Oid *partcollation = key->partcollation;
+ int natts = key->partnatts;
+ int i;
+ int32 colnum = 0;
+ int32 cmpval = 0;
+ for (i = 0; i < natts; i++) {
+ colnum++;
+ auto kind1 = RangeDatumToKind(datums1, i);
+ auto kind2 = RangeDatumToKind(datums2, i);
+
+ if (kind1 < kind2) return -colnum;
+ if (kind1 > kind2) return colnum;
+ if (kind1 != PARTITION_RANGE_DATUM_VALUE) {
+ /*
+ * The column bounds are both MINVALUE or both MAXVALUE. No later
+ * columns should be considered, but we still need to compare
+ * whether they are upper or lower bounds.
+ */
+ break;
+ }
+ cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], partcollation[i],
+ RangeDatumToValue(datums1, i),
+ RangeDatumToValue(datums2, i)));
+ if (cmpval != 0) break;
+ }
+ return cmpval == 0 ? 0 : (cmpval < 0 ? -colnum : colnum);
+}
+
+static int PartitionCheckBound(PartitionKey key, PartitionBoundSpec *spec) {
+ return PartitionComparePartitionKeys(key, spec->lowerdatums,
+ spec->upperdatums);
+}
+
+int PartitionBoundSpecCmp(const ListCell *a, const ListCell *b, void *arg) {
+ auto spec1 = lfirst_node(PartitionBoundSpec, a);
+ auto spec2 = lfirst_node(PartitionBoundSpec, b);
+ auto key = static_cast(arg);
+ return PartitionComparePartitionKeys(key, spec1->lowerdatums,
+ spec2->lowerdatums);
+}
+
+bool PartitionCheckBounds(PartitionKey key, List *spec_list) {
+ ListCell *lc;
+ int i;
+ int nparts = list_length(spec_list);
+ bool ok = true;
+
+ Assert(nparts > 0);
+
+ // self bound check
+ foreach (lc, spec_list) {
+ auto spec = lfirst_node(PartitionBoundSpec, lc);
+
+ if (spec->strategy != key->strategy)
+ elog(ERROR, "strategy not match with partition key");
+ if (spec->is_default) elog(ERROR, "unexpected default partition");
+ if (list_length(spec->lowerdatums) != key->partnatts)
+ elog(ERROR,
+ "number of lower bound values mismatches the number of partition "
+ "keys");
+ if (list_length(spec->upperdatums) != key->partnatts)
+ elog(ERROR,
+ "number of upper bound values mismatches the number of partition "
+ "keys");
+
+ ok = PartitionCheckBound(key, spec) < 0;
+ if (!ok) goto out;
+ }
+
+ // cross bound check, only checks whether prev.upper <= cur.lower
+ list_sort_arg(spec_list, PartitionBoundSpecCmp, key);
+ for (i = 1; i < nparts; i++) {
+ auto spec1 = castNode(PartitionBoundSpec, list_nth(spec_list, i - 1));
+ auto spec2 = castNode(PartitionBoundSpec, list_nth(spec_list, i));
+ // the upper value should be less than or equal to the lower value of the
+ // next part
+ ok = PartitionComparePartitionKeys(key, spec1->upperdatums,
+ spec2->lowerdatums) <= 0;
+ if (!ok) break;
+ }
+out:
+ return ok;
+}
+
+List *PaxValidatePartitionRanges(Relation relation, PartitionKey key,
+ List *raw_partbound_list) {
+ ParseState *pstate = make_parsestate(NULL);
+ List *spec_list = NIL;
+ int nparts;
+ bool ok;
+
+ nparts = list_length(raw_partbound_list);
+ Assert(nparts > 0);
+
+ for (int i = 0; i < nparts; i++) {
+ auto spec =
+ static_cast(list_nth(raw_partbound_list, i));
+ Assert(IsA(spec, PartitionBoundSpec));
+ auto part_list = TransformPartitionExtension(pstate, relation, key, spec);
+ spec_list = list_concat(spec_list, part_list);
+ pfree(part_list);
+ }
+
+ // check whether bounds overlaps
+ ok = paxc::PartitionCheckBounds(key, spec_list);
+ if (!ok) elog(ERROR, "partition bounds have overlaps");
+
+ list_free_deep(raw_partbound_list);
+ free_parsestate(pstate);
+
+ return spec_list;
+}
+
+// Reference: RelationBuildPartitionKey
+PartitionKey PaxRelationBuildPartitionKey(Relation relation,
+ List *partparams_list) {
+ int i;
+ PartitionKey key;
+ Oid *partopclass;
+ ListCell *partexprs_item;
+ int16 procnum;
+
+ Assert(RelationIsPAX(relation));
+
+ key = (PartitionKey)palloc0(sizeof(PartitionKeyData));
+ key->strategy = PARTITION_STRATEGY_RANGE;
+ key->partnatts = list_length(partparams_list);
+ key->partattrs = (AttrNumber *)palloc(key->partnatts * sizeof(AttrNumber));
+ key->partopfamily = (Oid *)palloc(key->partnatts * sizeof(Oid));
+ key->partopcintype = (Oid *)palloc(key->partnatts * sizeof(Oid));
+ key->partsupfunc = (FmgrInfo *)palloc0(key->partnatts * sizeof(FmgrInfo));
+
+ key->partcollation = (Oid *)palloc(key->partnatts * sizeof(Oid));
+ key->parttypid = (Oid *)palloc(key->partnatts * sizeof(Oid));
+ key->parttypmod = (int32 *)palloc(key->partnatts * sizeof(int32));
+ key->parttyplen = (int16 *)palloc(key->partnatts * sizeof(int16));
+ key->parttypbyval = (bool *)palloc(key->partnatts * sizeof(bool));
+ key->parttypalign = (char *)palloc(key->partnatts * sizeof(char));
+ key->parttypcoll = (Oid *)palloc(key->partnatts * sizeof(Oid));
+
+ partopclass = (Oid *)palloc(key->partnatts * sizeof(Oid));
+ ComputePartitionAttrs(NULL, relation, partparams_list, key->partattrs, NULL,
+ partopclass, key->partcollation, key->strategy);
+
+ /* determine support function number to search for */
+ procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? HASHEXTENDED_PROC
+ : BTORDER_PROC;
+
+ // We don't have expressions as our partition keys, but keep
+ // the code the same as the kernel.
+ partexprs_item = list_head(key->partexprs);
+ for (i = 0; i < key->partnatts; i++) {
+ AttrNumber attno = key->partattrs[i];
+ HeapTuple opclasstup;
+ Form_pg_opclass opclassform;
+ Oid funcid;
+
+ /* Collect opfamily information */
+ opclasstup = SearchSysCache1(CLAOID, ObjectIdGetDatum(partopclass[i]));
+ if (!HeapTupleIsValid(opclasstup))
+ elog(ERROR, "cache lookup failed for opclass %u", partopclass[i]);
+
+ opclassform = (Form_pg_opclass)GETSTRUCT(opclasstup);
+ key->partopfamily[i] = opclassform->opcfamily;
+ key->partopcintype[i] = opclassform->opcintype;
+
+ /* Get a support function for the specified opfamily and datatypes */
+ funcid = get_opfamily_proc(opclassform->opcfamily, opclassform->opcintype,
+ opclassform->opcintype, procnum);
+ if (!OidIsValid(funcid))
+ ereport(
+ ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("operator class \"%s\" of access method %s is missing "
+ "support function %d for type %s",
+ NameStr(opclassform->opcname),
+ (key->strategy == PARTITION_STRATEGY_HASH) ? "hash" : "btree",
+ procnum, format_type_be(opclassform->opcintype))));
+
+ fmgr_info_cxt(funcid, &key->partsupfunc[i], CurrentMemoryContext);
+
+ /* Collect type information */
+ if (attno != 0) {
+ Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
+
+ key->parttypid[i] = att->atttypid;
+ key->parttypmod[i] = att->atttypmod;
+ key->parttypcoll[i] = att->attcollation;
+ } else {
+ if (partexprs_item == NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+
+ key->parttypid[i] = exprType(static_cast(lfirst(partexprs_item)));
+ key->parttypmod[i] =
+ exprTypmod(static_cast(lfirst(partexprs_item)));
+ key->parttypcoll[i] =
+ exprCollation(static_cast(lfirst(partexprs_item)));
+
+ partexprs_item = lnext(key->partexprs, partexprs_item);
+ }
+ get_typlenbyvalalign(key->parttypid[i], &key->parttyplen[i],
+ &key->parttypbyval[i], &key->parttypalign[i]);
+
+ ReleaseSysCache(opclasstup);
+ }
+ pfree(partopclass);
+ return key;
+}
+
+static PartitionDesc PaxRelationBuildPartitionDesc(PartitionKey key,
+ List *partboundspec_list,
+ MemoryContext tmp_ctx,
+ MemoryContext target_ctx) {
+ PartitionDesc partdesc;
+ PartitionBoundInfo boundinfo;
+ PartitionBoundSpec **boundspecs = NULL;
+ int nparts;
+ MemoryContext saved_cxt;
+ int *mapping;
+
+ saved_cxt = MemoryContextSwitchTo(tmp_ctx);
+ nparts = list_length(partboundspec_list);
+ boundspecs =
+ (PartitionBoundSpec **)palloc(nparts * sizeof(PartitionBoundSpec *));
+ for (int i = 0; i < nparts; i++)
+ boundspecs[i] =
+ static_cast(list_nth(partboundspec_list, i));
+
+ /*
+ * Create PartitionBoundInfo and mapping, working in the caller's context.
+ * This could fail, but we haven't done any damage if so.
+ */
+ boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
+ pfree(boundspecs);
+
+ MemoryContextSwitchTo(target_ctx);
+ partdesc = (PartitionDescData *)palloc0(sizeof(PartitionDescData));
+ partdesc->nparts = nparts;
+ partdesc->detached_exist = false;
+ partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
+ pfree(boundinfo);
+
+ // PAX doesn't have child partition tables
+ partdesc->oids = NULL;
+ partdesc->is_leaf = NULL;
+ /* Return to caller's context, and blow away the temporary context. */
+ MemoryContextSwitchTo(saved_cxt);
+ return partdesc;
+}
+
+static void PaxFormPartitionKeyDatum(PartitionKey key, TupleTableSlot *slot,
+ Datum *values, bool *isnull) {
+ for (int i = 0; i < key->partnatts; i++) {
+ AttrNumber keycol = key->partattrs[i];
+
+ Assert(keycol > 0);
+ values[i] = slot_getattr(slot, keycol, &isnull[i]);
+ }
+}
+
+bool PartitionObjectInternal::Initialize(Relation pax_rel) {
+ MemoryContext tmp_ctx;
+ MemoryContext saved_ctx;
+ List *partparams_list;
+ List *partboundspec_list;
+ PartitionKey key = NULL;
+ PartitionDesc desc = NULL;
+ bool ok;
+
+ Assert(pax_rel);
+ pax_rel_ = pax_rel;
+
+ tmp_ctx = AllocSetContextCreate(CurrentMemoryContext, "tmp pax partition ctx",
+ ALLOCSET_DEFAULT_SIZES);
+ mctx_ = AllocSetContextCreate(CurrentMemoryContext, "pax partition ctx",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContextCopyAndSetIdentifier(mctx_, RelationGetRelationName(pax_rel));
+
+ saved_ctx = MemoryContextSwitchTo(tmp_ctx);
+ ok = PaxLoadPartitionSpec(RelationGetRelid(pax_rel), &partparams_list,
+ &partboundspec_list);
+ if (!ok) goto out;
+
+ MemoryContextSwitchTo(mctx_);
+
+ // The partition keys have no strict constraint for DDLs.
+ // The column names/types may be changed later by the user, but the PAX code
+ // is not aware of it. So, we ignore these inconsistent changes for partition
+ // writer.
+ PG_TRY();
+ {
+ key = PaxRelationBuildPartitionKey(pax_rel, partparams_list);
+ InitializeMergeInfo(key, partboundspec_list, tmp_ctx, mctx_);
+
+ desc =
+ PaxRelationBuildPartitionDesc(key, partboundspec_list, tmp_ctx, mctx_);
+ partition_bound_spec_ = static_cast(copyObject(partboundspec_list));
+ }
+ PG_CATCH();
+ {
+ // fall back to not use the partition writer
+ ok = false;
+ FlushErrorState();
+ }
+ PG_END_TRY();
+ partition_key_ = key;
+ partition_desc_ = desc;
+out:
+ MemoryContextSwitchTo(saved_ctx);
+ MemoryContextDelete(tmp_ctx);
+ return ok;
+}
+
+void PartitionObjectInternal::InitializeMergeInfo(PartitionKey key,
+ List *partboundspec_list,
+ MemoryContext tmp_ctx,
+ MemoryContext target_ctx) {
+ // gather whether the adjacent bounds are continuous
+ // NOTE: the bounds are already sorted.
+ MemoryContext saved_ctx;
+ int *merge_index;
+ int nparts;
+ int merge_len;
+
+ saved_ctx = MemoryContextSwitchTo(tmp_ctx);
+ nparts = list_length(partboundspec_list);
+ merge_index = (int *)palloc(2 * nparts * sizeof(int));
+ merge_index[0] = 0;
+ merge_len = 1;
+ for (int i = 1; i < nparts; i++) {
+ PartitionBoundSpec *spec1 =
+ castNode(PartitionBoundSpec, list_nth(partboundspec_list, i - 1));
+ PartitionBoundSpec *spec2 =
+ castNode(PartitionBoundSpec, list_nth(partboundspec_list, i));
+
+ auto cmpval = PartitionComparePartitionKeys(key, spec1->upperdatums,
+ spec2->lowerdatums);
+ Assert(cmpval <= 0);
+ if (cmpval != 0) {
+ merge_index[merge_len++] = i - 1;
+ merge_index[merge_len++] = i;
+ }
+ }
+ merge_index[merge_len++] = nparts - 1;
+
+ Assert(merge_len % 2 == 0);
+ MemoryContextSwitchTo(target_ctx);
+ merge_len_ = merge_len;
+ merge_index_ = (int *)palloc(merge_len * sizeof(int));
+ memcpy(merge_index_, merge_index, merge_len * sizeof(int));
+ pfree(merge_index);
+
+ MemoryContextSwitchTo(saved_ctx);
+}
+
+void PartitionObjectInternal::Release() {
+ pax_rel_ = nullptr;
+ partition_key_ = nullptr;
+ partition_desc_ = nullptr;
+ partition_bound_spec_ = nullptr;
+ if (mctx_) {
+ MemoryContextDelete(mctx_);
+ mctx_ = nullptr;
+ }
+}
+
+int PartitionObjectInternal::NumPartitions() const {
+ Assert(pax_rel_ && partition_key_ && partition_desc_ && mctx_);
+ return list_length(partition_bound_spec_);
+}
+
+int PartitionObjectInternal::NumPartitionKeys() const {
+ Assert(pax_rel_ && partition_key_ && partition_desc_ && mctx_);
+ return get_partition_natts(partition_key_);
+}
+
+int PartitionObjectInternal::FindPartition(TupleTableSlot *slot) {
+ Datum values[PARTITION_MAX_KEYS];
+ bool isnull[PARTITION_MAX_KEYS];
+
+ Assert(pax_rel_ && partition_key_ && partition_desc_ && mctx_);
+ PaxFormPartitionKeyDatum(partition_key_, slot, values, isnull);
+ return get_partition_for_tuple(partition_key_, partition_desc_, values,
+ isnull);
+}
+
+} // namespace paxc
+
+namespace pax {
+bool PartitionObject::Initialize(Relation pax_rel) {
+ // FIXME: We MUST catch some types of exceptions and assumes
+ // the partition should be ignored. Because the partition constraint
+ // may be broken by:
+ // 1. rename column name
+ // 2. change column type
+ // 3. drop one or more columns in the partition keys
+ CBDB_WRAP_START;
+ { return stub_.Initialize(pax_rel); }
+ CBDB_WRAP_END;
+}
+void PartitionObject::Release() {
+ CBDB_WRAP_START;
+ { stub_.Release(); }
+ CBDB_WRAP_END;
+}
+
+int PartitionObject::FindPartition(TupleTableSlot *slot) {
+ CBDB_WRAP_START;
+ { return stub_.FindPartition(slot); }
+ CBDB_WRAP_END;
+}
+std::pair PartitionObject::GetMergeListInfo() {
+ return {stub_.merge_index_, stub_.merge_len_};
+}
+
+} // namespace pax
+
+extern "C" {
+// CREATE FUNCTION pax_dump_ranges(relid Oid) RETURNS SETOF TEXT AS
+// '$libdir/pax', 'PaxPartitionDumpRanges'
+// LANGUAGE C STRICT;
+// UDF about partition
+PG_FUNCTION_INFO_V1(PaxPartitionDumpRanges);
+struct PartitionRangeDumpContext {
+ List *boundspec_list;
+ MemoryContext mctx;
+ int index;
+};
+
+Datum PaxPartitionDumpRanges(PG_FUNCTION_ARGS) {
+ PartitionRangeDumpContext *ctx;
+ FuncCallContext *funcctx;
+
+ if (SRF_IS_FIRSTCALL()) {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext tmp_ctx;
+ MemoryContext old_ctx;
+ List *partparams;
+ List *partboundspecs;
+ bool ok;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ tmp_ctx =
+ AllocSetContextCreate(funcctx->multi_call_memory_ctx,
+ "tmp pax partition ctx", ALLOCSET_DEFAULT_SIZES);
+ old_ctx = MemoryContextSwitchTo(tmp_ctx);
+
+ ok = paxc::PaxLoadPartitionSpec(relid, &partparams, &partboundspecs);
+ if (!ok) partboundspecs = nullptr;
+
+ ctx =
+ (PartitionRangeDumpContext *)palloc(sizeof(PartitionRangeDumpContext));
+ ctx->boundspec_list = partboundspecs;
+ ctx->mctx = tmp_ctx;
+ ctx->index = 0;
+ funcctx->user_fctx = (void *)ctx;
+ MemoryContextSwitchTo(old_ctx);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ ctx = (PartitionRangeDumpContext *)funcctx->user_fctx;
+ while (ctx->index < list_length(ctx->boundspec_list)) {
+ StringInfoData str;
+ char *value_list;
+ text *range;
+ PartitionBoundSpec *spec =
+ castNode(PartitionBoundSpec, list_nth(ctx->boundspec_list, ctx->index));
+ ++ctx->index;
+
+ initStringInfo(&str);
+ appendStringInfoString(&str, "from");
+ value_list = get_range_partbound_string(spec->lowerdatums);
+ appendStringInfoString(&str, value_list);
+ pfree(value_list);
+
+ appendStringInfoString(&str, " to");
+ value_list = get_range_partbound_string(spec->upperdatums);
+ appendStringInfoString(&str, value_list);
+ pfree(value_list);
+
+ range = cstring_to_text(str.data);
+ pfree(str.data);
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(range));
+ }
+
+ MemoryContextDelete(ctx->mctx);
+ SRF_RETURN_DONE(funcctx);
+}
+}
diff --git a/contrib/pax_storage/src/cpp/access/pax_partition.h b/contrib/pax_storage/src/cpp/access/pax_partition.h
new file mode 100644
index 00000000000..2e7ed5ea05e
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/pax_partition.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "comm/cbdb_api.h"
+
+#include
+
+namespace pax {
+class PartitionObject;
+}
+
+struct PartitionRangeExtension {
+ struct PartitionBoundSpec spec;
+ List *every;
+};
+namespace paxc {
+
+extern PartitionKey PaxRelationBuildPartitionKey(Relation relation,
+ List *partparams_list);
+extern bool PartitionCheckBounds(PartitionKey key, int nparts,
+ PartitionBoundSpec **partboundspecs);
+extern List *PaxValidatePartitionRanges(Relation relation, PartitionKey key,
+ List *raw_partbound_list);
+
+class PartitionObjectInternal {
+ public:
+ bool Initialize(Relation pax_rel);
+ void Release();
+
+ // Get number of partitions, excluding the default partition
+ int NumPartitions() const;
+ int NumPartitionKeys() const;
+ // -1 if default partition, >=0 leaf partition
+ int FindPartition(TupleTableSlot *slot);
+ PartitionKey GetPartitionKey() { return partition_key_; }
+ PartitionDesc GetPartitionDesc() { return partition_desc_; }
+
+ private:
+ void InitializeMergeInfo(PartitionKey key, List *partboundspec_list,
+ MemoryContext tmp_ctx, MemoryContext target_ctx);
+
+ friend class pax::PartitionObject;
+ Relation pax_rel_ = nullptr;
+ PartitionKey partition_key_ = nullptr;
+ PartitionDesc partition_desc_ = nullptr;
+ int *merge_index_ = nullptr;
+ size_t merge_len_ = 0;
+ List *partition_bound_spec_ = nullptr;
+ MemoryContext mctx_ = nullptr;
+};
+} // namespace paxc
+
+namespace pax {
+class PartitionObject {
+ public:
+ bool Initialize(Relation pax_rel);
+ void Release();
+
+ PartitionKey GetPartitionKey() { return stub_.GetPartitionKey(); }
+ PartitionDesc GetPartitionDesc() { return stub_.GetPartitionDesc(); }
+
+ // Get number of partitions, excluding the default partition
+ int NumPartitions() const { return stub_.NumPartitions(); }
+ // Get number of the partition keys
+ int NumPartitionKeys() const { return stub_.NumPartitionKeys(); }
+
+ // -1 if default partition, >= 0 leaf partition
+ int FindPartition(TupleTableSlot *slot);
+
+ std::pair GetMergeListInfo();
+
+ private:
+ paxc::PartitionObjectInternal stub_;
+};
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/pax_scanner.cc b/contrib/pax_storage/src/cpp/access/pax_scanner.cc
index 9abd53ade28..3b4054b4bf2 100644
--- a/contrib/pax_storage/src/cpp/access/pax_scanner.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_scanner.cc
@@ -1,19 +1,147 @@
#include "access/pax_scanner.h"
#include "access/pax_access_handle.h"
+#include "catalog/pax_aux_table.h"
+#include "catalog/pg_pax_tables.h"
+#include "comm/guc.h"
+#include "comm/pax_memory.h"
#include "storage/local_file_system.h"
#include "storage/micro_partition.h"
#include "storage/micro_partition_iterator.h"
+#include "storage/micro_partition_stats.h"
#include "storage/orc/orc.h"
#include "storage/pax.h"
#include "storage/pax_buffer.h"
+#include "storage/pax_defined.h"
+
+#ifdef ENABLE_PLASMA
+#include "storage/cache/pax_plasma_cache.h"
+#endif
+
+#ifdef VEC_BUILD
+#include "utils/am_vec.h"
+#endif
+
+namespace paxc {
+bool IndexUniqueCheck(Relation rel, ItemPointer tid, Snapshot snapshot,
+ bool * /*all_dead*/) {
+ paxc::ScanAuxContext context;
+ HeapTuple tuple;
+ char block_name[NAMEDATALEN];
+ Oid aux_relid;
+ bool exists;
+
+ aux_relid = ::paxc::GetPaxAuxRelid(RelationGetRelid(rel));
+ snprintf(block_name, sizeof(block_name), "%u", pax::GetBlockNumber(*tid));
+ context.BeginSearchMicroPartition(aux_relid, InvalidOid, snapshot,
+ AccessShareLock, block_name);
+ tuple = context.SearchMicroPartitionEntry();
+ exists = HeapTupleIsValid(tuple);
+ context.EndSearchMicroPartition(AccessShareLock);
+ return exists;
+}
+} // namespace paxc
namespace pax {
+PaxIndexScanDesc::PaxIndexScanDesc(Relation rel) : base_{.rel = rel} {
+ Assert(rel);
+ Assert(&base_ == reinterpret_cast(this));
+ rel_path_ = cbdb::BuildPaxDirectoryPath(rel->rd_node, rel->rd_backend);
+}
+
+PaxIndexScanDesc::~PaxIndexScanDesc() {
+ if (reader_) {
+ reader_->Close();
+ PAX_DELETE(reader_);
+ }
+}
+
+bool PaxIndexScanDesc::FetchTuple(ItemPointer tid, Snapshot snapshot,
+ TupleTableSlot *slot, bool *call_again,
+ bool *all_dead) {
+ BlockNumber block = pax::GetBlockNumber(*tid);
+ if (block != current_block_ || !reader_) {
+ if (!OpenMicroPartition(block, snapshot)) return false;
+ }
+
+ Assert(current_block_ == block && reader_);
+ if (call_again) *call_again = false;
+ if (all_dead) *all_dead = false;
+
+ auto ok = reader_->GetTuple(slot, pax::GetTupleOffset(*tid));
+ if (ok) {
+ SetBlockNumber(&slot->tts_tid, block);
+ ExecStoreVirtualTuple(slot);
+ }
+
+ return ok;
+}
+
+bool PaxIndexScanDesc::OpenMicroPartition(BlockNumber block,
+ Snapshot snapshot) {
+ bool ok;
+
+ Assert(block != current_block_);
+
+ ok = cbdb::IsMicroPartitionVisible(base_.rel, block, snapshot);
+ if (ok) {
+ MicroPartitionReader::ReaderOptions options;
+
+ auto block_name = std::to_string(block);
+ auto file_name = cbdb::BuildPaxFilePath(rel_path_, block_name);
+ options.block_id = block_name;
+ auto file = Singleton::GetInstance()->Open(
+ file_name, fs::kReadMode);
+ auto reader = PAX_NEW(file);
+ reader->Open(options);
+ if (reader_) {
+ reader_->Close();
+ PAX_DELETE(reader_);
+ }
+ reader_ = reader;
+ current_block_ = block;
+ }
+
+ return ok;
+}
+
+bool PaxScanDesc::BitmapNextBlock(struct TBMIterateResult *tbmres) {
+ cindex_ = 0;
+ if (!index_desc_) {
+ index_desc_ = PAX_NEW(rs_base_.rs_rd);
+ }
+ return true;
+}
+
+bool PaxScanDesc::BitmapNextTuple(struct TBMIterateResult *tbmres,
+ TupleTableSlot *slot) {
+ ItemPointerData tid;
+ if (tbmres->ntuples < 0) {
+ // lossy bitmap. The maximum value of the last 16 bits in CTID is
+ // 0x7FFF + 1, i.e. 0x8000. See layout of ItemPointerData in PAX
+ if (cindex_ > 0X8000) elog(ERROR, "unexpected offset in pax");
+
+ ItemPointerSet(&tid, tbmres->blockno, cindex_);
+ } else if (cindex_ < tbmres->ntuples) {
+ // The maximum value of the last 16 bits in CTID is 0x7FFF + 1,
+ // i.e. 0x8000. See layout of ItemPointerData in PAX
+ if (tbmres->offsets[cindex_] > 0X8000)
+ elog(ERROR, "unexpected offset in pax");
+
+ ItemPointerSet(&tid, tbmres->blockno, tbmres->offsets[cindex_]);
+ } else {
+ return false;
+ }
+ ++cindex_;
+ return index_desc_->FetchTuple(&tid, rs_base_.rs_snapshot, slot, nullptr,
+ nullptr);
+}
+
TableScanDesc PaxScanDesc::BeginScan(Relation relation, Snapshot snapshot,
- int nkeys, struct ScanKeyData *key,
+ int nkeys, struct ScanKeyData * /*key*/,
ParallelTableScanDesc pscan, uint32 flags,
- PaxFilter *filter) {
+ PaxFilter *filter, bool build_bitmap) {
PaxScanDesc *desc;
MemoryContext old_ctx;
TableReader::ReaderOptions reader_options{};
@@ -22,7 +150,7 @@ TableScanDesc PaxScanDesc::BeginScan(Relation relation, Snapshot snapshot,
offsetof(PaxScanDesc, rs_base_) == 0,
"rs_base should be the first field and aligned to the object address");
- desc = new PaxScanDesc();
+ desc = PAX_NEW();
desc->memory_context_ = cbdb::AllocSetCtxCreate(
CurrentMemoryContext, "Pax Storage", PAX_ALLOCSET_DEFAULT_SIZES);
@@ -33,157 +161,224 @@ TableScanDesc PaxScanDesc::BeginScan(Relation relation, Snapshot snapshot,
desc->rs_base_.rs_nkeys = nkeys;
desc->rs_base_.rs_flags = flags;
desc->rs_base_.rs_parallel = pscan;
- desc->key_ = key;
- desc->reused_buffer_ = new DataBuffer(32 * 1024 * 1024); // 32mb
+ desc->reused_buffer_ = PAX_NEW>(pax_scan_reuse_buffer_size);
desc->filter_ = filter;
+ if (!desc->filter_) {
+ desc->filter_ = PAX_NEW();
+ }
+
+ if (!desc->filter_->GetColumnProjection().first) {
+ auto natts = cbdb::RelationGetAttributesNumber(relation);
+ auto cols = PAX_NEW_ARRAY(natts);
+ memset(cols, true, natts);
+ desc->filter_->SetColumnProjection(cols, natts);
+ }
+
#ifdef VEC_BUILD
- if (flags & (1 << 12)) {
- desc->vec_adapter_ = new VecAdapter(cbdb::RelationGetTupleDesc(relation));
+ if (flags & SO_TYPE_VECTOR) {
+ desc->vec_adapter_ =
+ PAX_NEW(cbdb::RelationGetTupleDesc(relation), build_bitmap);
reader_options.is_vec = true;
reader_options.adapter = desc->vec_adapter_;
}
-#endif
+#endif // VEC_BUILD
+
+#ifdef ENABLE_PLASMA
+ if (pax_enable_plasma_in_mem) {
+ std::string plasma_socket_path =
+ std::string(desc->plasma_socket_path_prefix_);
+ plasma_socket_path.append(std::to_string(PostPortNumber));
+ plasma_socket_path.append("\0");
+ PaxPlasmaCache::CacheOptions cache_options;
+ cache_options.domain_socket = plasma_socket_path;
+ cache_options.memory_quota = 0;
+ cache_options.waitting_ms = 0;
+
+ desc->pax_cache_ = PAX_NEW(std::move(cache_options));
+ auto status = desc->pax_cache_->Initialize();
+ if (!status.Ok()) {
+ elog(WARNING, "Plasma cache client init failed, message: %s",
+ status.Error().c_str());
+ PAX_DELETE(desc->pax_cache_);
+ desc->pax_cache_ = nullptr;
+ }
+
+ reader_options.pax_cache = desc->pax_cache_;
+ }
- // init shared memory
- cbdb::InitCommandResource();
+#endif // ENABLE_PLASMA
old_ctx = MemoryContextSwitchTo(desc->memory_context_);
// build reader
- reader_options.build_bitmap = true;
+ reader_options.build_bitmap = build_bitmap;
reader_options.reused_buffer = desc->reused_buffer_;
reader_options.rel_oid = desc->rs_base_.rs_rd->rd_id;
reader_options.filter = filter;
auto iter = MicroPartitionInfoIterator::New(relation, snapshot);
if (filter && filter->HasMicroPartitionFilter()) {
- auto wrap = new FilterIterator(
+ auto wrap = PAX_NEW>(
std::move(iter), [filter, relation](const auto &x) {
- return filter->TestMicroPartitionScan(x.GetStats(),
- RelationGetDescr(relation));
+ MicroPartitionStatsProvider provider(x.GetStats());
+ auto ok = filter->TestScan(provider, RelationGetDescr(relation),
+ PaxFilterStatisticsKind::kFile);
+ return ok;
});
iter = std::unique_ptr>(wrap);
}
- desc->reader_ = new TableReader(std::move(iter), reader_options);
+ desc->reader_ = PAX_NEW(std::move(iter), reader_options);
desc->reader_->Open();
MemoryContextSwitchTo(old_ctx);
return &desc->rs_base_;
}
-void PaxScanDesc::EndScan(TableScanDesc scan) {
- PaxScanDesc *desc = ScanToDesc(scan);
+void PaxScanDesc::EndScan() {
+ if (pax_enable_debug && filter_) {
+ filter_->LogStatistics();
+ }
- Assert(desc->reader_);
- desc->reader_->Close();
+ Assert(reader_);
+ reader_->Close();
- delete desc->reused_buffer_;
- delete desc->reader_;
- delete desc->filter_;
+ PAX_DELETE(reused_buffer_);
+ PAX_DELETE(reader_);
+ PAX_DELETE(filter_);
#ifdef VEC_BUILD
- delete desc->vec_adapter_;
+ PAX_DELETE(vec_adapter_);
#endif
+
+#ifdef ENABLE_PLASMA
+ if (pax_cache_) {
+ pax_cache_->Destroy();
+ PAX_DELETE(pax_cache_);
+ }
+#endif
+
+ PAX_DELETE(index_desc_);
+
// TODO(jiaqizho): please double check with abort transaction @gongxun
- Assert(desc->memory_context_);
- cbdb::MemoryCtxDelete(desc->memory_context_);
- delete desc;
+ Assert(memory_context_);
+ cbdb::MemoryCtxDelete(memory_context_);
+ auto self = this;
+ PAX_DELETE(self);
}
TableScanDesc PaxScanDesc::BeginScanExtractColumns(
- Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan,
- List *targetlist, List *qual, uint32 flags) {
+ Relation rel, Snapshot snapshot, int /*nkeys*/,
+ struct ScanKeyData * /*key*/, ParallelTableScanDesc parallel_scan,
+ struct PlanState *ps, uint32 flags) {
TableScanDesc paxscan;
PaxFilter *filter;
+ List *targetlist = ps->plan->targetlist;
+ List *qual = ps->plan->qual;
auto natts = cbdb::RelationGetAttributesNumber(rel);
bool *cols;
bool found = false;
+ bool build_bitmap = true;
+ PaxcExtractcolumnContext extract_column;
+
+ filter = PAX_NEW();
- filter = new PaxFilter();
+ Assert(natts >= 0);
- cols = new bool[natts];
+ cols = PAX_NEW_ARRAY(natts);
memset(cols, false, natts);
+ extract_column.cols = cols;
+ extract_column.natts = natts;
+
found = cbdb::ExtractcolumnsFromNode(reinterpret_cast(targetlist),
- cols, natts);
+ &extract_column);
found = cbdb::ExtractcolumnsFromNode(reinterpret_cast(qual), cols,
natts) ||
found;
+ build_bitmap = cbdb::IsSystemAttrNumExist(&extract_column,
+ SelfItemPointerAttributeNumber);
// In some cases (for example, count(*)), targetlist and qual may be null,
// extractcolumns_walker will return immediately, so no columns are specified.
// We always scan the first column.
- if (!found) cols[0] = true;
+ if (!found && !build_bitmap && natts > 0) cols[0] = true;
// The `cols` life cycle will be bound to `PaxFilter`
filter->SetColumnProjection(cols, natts);
- {
+ if (pax_enable_filter) {
ScanKey scan_keys = nullptr;
int n_scan_keys = 0;
auto ok = pax::BuildScanKeys(rel, qual, false, &scan_keys, &n_scan_keys);
if (ok) filter->SetScanKeys(scan_keys, n_scan_keys);
+
+ if (gp_enable_predicate_pushdown
+#ifdef VEC_BUILD
+ && !(flags & SO_TYPE_VECTOR)
+#endif
+ )
+ filter->BuildExecutionFilterForColumns(rel, ps);
}
- paxscan = BeginScan(rel, snapshot, 0, nullptr, parallel_scan, flags, filter);
+ paxscan = BeginScan(rel, snapshot, 0, nullptr, parallel_scan, flags, filter,
+ build_bitmap);
return paxscan;
}
// FIXME: shall we take these parameters into account?
-void PaxScanDesc::ReScan(TableScanDesc scan) {
- PaxScanDesc *desc = ScanToDesc(scan);
+void PaxScanDesc::ReScan(ScanKey /*key*/, bool /*set_params*/,
+ bool /*allow_strat*/, bool /*allow_sync*/,
+ bool /*allow_pagemode*/) {
MemoryContext old_ctx;
- Assert(desc && desc->reader_);
+ Assert(reader_);
- old_ctx = MemoryContextSwitchTo(desc->memory_context_);
- desc->reader_->ReOpen();
+ old_ctx = MemoryContextSwitchTo(memory_context_);
+ reader_->ReOpen();
MemoryContextSwitchTo(old_ctx);
}
-bool PaxScanDesc::ScanGetNextSlot(TableScanDesc scan, TupleTableSlot *slot) {
- PaxScanDesc *desc = ScanToDesc(scan);
+bool PaxScanDesc::GetNextSlot(TupleTableSlot *slot) {
MemoryContext old_ctx;
bool ok = false;
- CTupleSlot cslot(slot);
- old_ctx = MemoryContextSwitchTo(desc->memory_context_);
+ old_ctx = MemoryContextSwitchTo(memory_context_);
- ok = desc->reader_->ReadTuple(&cslot);
+ Assert(reader_);
+ ok = reader_->ReadTuple(slot);
MemoryContextSwitchTo(old_ctx);
return ok;
}
-bool PaxScanDesc::ScanAnalyzeNextBlock(TableScanDesc scan,
- BlockNumber blockno) {
- PaxScanDesc *desc = ScanToDesc(scan);
- desc->target_tuple_id_ = blockno;
-
+bool PaxScanDesc::ScanAnalyzeNextBlock(BlockNumber blockno,
+ BufferAccessStrategy /*bstrategy*/) {
+ target_tuple_id_ = blockno;
return true;
}
-bool PaxScanDesc::ScanAnalyzeNextTuple(TableScanDesc scan, double *liverows,
- const double *deadrows,
+bool PaxScanDesc::ScanAnalyzeNextTuple(TransactionId /*oldest_xmin*/,
+ double *liverows,
+ const double * /* deadrows */,
TupleTableSlot *slot) {
- PaxScanDesc *desc = ScanToDesc(scan);
MemoryContext old_ctx;
bool ok = false;
- old_ctx = MemoryContextSwitchTo(desc->memory_context_);
- Assert(*deadrows == 0); // not dead rows in pax latest snapshot
- while (desc->next_tuple_id_ < desc->target_tuple_id_) {
- ok = PaxScanDesc::ScanGetNextSlot(scan, slot);
+ old_ctx = MemoryContextSwitchTo(memory_context_);
+ while (next_tuple_id_ < target_tuple_id_) {
+ ok = GetNextSlot(slot);
if (!ok) break;
- desc->next_tuple_id_++;
+ next_tuple_id_++;
+ }
+ if (next_tuple_id_ == target_tuple_id_) {
+ ok = GetNextSlot(slot);
+ next_tuple_id_++;
+ if (ok) *liverows += 1;
}
MemoryContextSwitchTo(old_ctx);
- if (ok) *liverows += 1;
return ok;
}
-bool PaxScanDesc::ScanSampleNextBlock(TableScanDesc scan,
- SampleScanState *scanstate) {
- PaxScanDesc *desc = ScanToDesc(scan);
+bool PaxScanDesc::ScanSampleNextBlock(SampleScanState *scanstate) {
MemoryContext old_ctx;
TsmRoutine *tsm = scanstate->tsmroutine;
BlockNumber blockno = 0;
@@ -193,39 +388,36 @@ bool PaxScanDesc::ScanSampleNextBlock(TableScanDesc scan,
double allvisfrac = 0;
bool ok = false;
- old_ctx = MemoryContextSwitchTo(desc->memory_context_);
+ old_ctx = MemoryContextSwitchTo(memory_context_);
- if (desc->total_tuples_ == 0) {
- paxc::PaxAccessMethod::EstimateRelSize(scan->rs_rd, &attrwidths, &pages,
+ if (total_tuples_ == 0) {
+ paxc::PaxAccessMethod::EstimateRelSize(rs_base_.rs_rd, &attrwidths, &pages,
&total_tuples, &allvisfrac);
- desc->total_tuples_ = total_tuples;
+ total_tuples_ = total_tuples;
}
if (tsm->NextSampleBlock)
- blockno = tsm->NextSampleBlock(scanstate, desc->total_tuples_);
+ blockno = tsm->NextSampleBlock(scanstate, total_tuples_);
else
- blockno = system_nextsampleblock(scanstate, desc->total_tuples_);
+ blockno = system_nextsampleblock(scanstate, total_tuples_);
ok = BlockNumberIsValid(blockno);
- if (ok) {
- desc->fetch_tuple_id_ = blockno;
- }
+ if (ok) fetch_tuple_id_ = blockno;
MemoryContextSwitchTo(old_ctx);
return ok;
}
-bool PaxScanDesc::ScanSampleNextTuple(TableScanDesc scan,
+bool PaxScanDesc::ScanSampleNextTuple(SampleScanState * /*scanstate*/,
TupleTableSlot *slot) {
- PaxScanDesc *desc = ScanToDesc(scan);
MemoryContext old_ctx;
bool ok = false;
- old_ctx = MemoryContextSwitchTo(desc->memory_context_);
- while (desc->next_tuple_id_ < desc->fetch_tuple_id_) {
- ok = PaxScanDesc::ScanGetNextSlot(scan, slot);
+ old_ctx = MemoryContextSwitchTo(memory_context_);
+ while (next_tuple_id_ < fetch_tuple_id_) {
+ ok = GetNextSlot(slot);
if (!ok) break;
- desc->next_tuple_id_++;
+ next_tuple_id_++;
}
MemoryContextSwitchTo(old_ctx);
return ok;
diff --git a/contrib/pax_storage/src/cpp/access/pax_scanner.h b/contrib/pax_storage/src/cpp/access/pax_scanner.h
index f06ab6c9fb8..33f6754855c 100644
--- a/contrib/pax_storage/src/cpp/access/pax_scanner.h
+++ b/contrib/pax_storage/src/cpp/access/pax_scanner.h
@@ -2,52 +2,84 @@
#include "comm/cbdb_api.h"
+#include
+
#include "storage/pax.h"
#include "storage/pax_filter.h"
#ifdef VEC_BUILD
#include "storage/vec/pax_vec_adapter.h"
#endif
+
+namespace paxc {
+bool IndexUniqueCheck(Relation rel, ItemPointer tid, Snapshot snapshot,
+ bool *all_dead);
+}
+
namespace pax {
+class PaxIndexScanDesc final {
+ public:
+ explicit PaxIndexScanDesc(Relation rel);
+ ~PaxIndexScanDesc();
+ bool FetchTuple(ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot,
+ bool *call_again, bool *all_dead);
+ inline IndexFetchTableData *ToBase() { return &base_; }
+ static inline PaxIndexScanDesc *FromBase(IndexFetchTableData *base) {
+ return reinterpret_cast(base);
+ }
+
+ private:
+ bool OpenMicroPartition(BlockNumber block, Snapshot snapshot);
+
+ IndexFetchTableData base_;
+ BlockNumber current_block_ = InvalidBlockNumber;
+ MicroPartitionReader *reader_ = nullptr;
+ std::string rel_path_;
+};
class PaxScanDesc {
public:
static TableScanDesc BeginScan(Relation relation, Snapshot snapshot,
int nkeys, struct ScanKeyData *key,
ParallelTableScanDesc pscan, uint32 flags,
- PaxFilter *filter);
-
- static void ReScan(TableScanDesc scan);
- static void EndScan(TableScanDesc scan);
+ PaxFilter *filter, bool build_bitmap);
static TableScanDesc BeginScanExtractColumns(
- Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan,
- List *targetlist, List *qual, uint32 flags);
+ Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key,
+ ParallelTableScanDesc parallel_scan, struct PlanState *ps, uint32 flags);
- static bool ScanGetNextSlot(TableScanDesc scan, TupleTableSlot *slot);
+ void EndScan();
+ void ReScan(ScanKey key, bool set_params, bool allow_strat, bool allow_sync,
+ bool allow_pagemode);
- static bool ScanAnalyzeNextBlock(TableScanDesc scan, BlockNumber blockno);
- static bool ScanAnalyzeNextTuple(TableScanDesc scan, double *liverows,
- const double *deadrows,
- TupleTableSlot *slot);
+ bool GetNextSlot(TupleTableSlot *slot);
- static bool ScanSampleNextBlock(TableScanDesc scan,
- SampleScanState *scanstate);
+ bool ScanAnalyzeNextBlock(BlockNumber blockno,
+ BufferAccessStrategy bstrategy);
+ bool ScanAnalyzeNextTuple(TransactionId oldest_xmin, double *liverows,
+ const double *deadrows, TupleTableSlot *slot);
- static bool ScanSampleNextTuple(TableScanDesc scan, TupleTableSlot *slot);
+ bool ScanSampleNextBlock(SampleScanState *scanstate);
- ~PaxScanDesc() = default;
+ bool ScanSampleNextTuple(SampleScanState *scanstate, TupleTableSlot *slot);
- private:
- PaxScanDesc() = default;
+ bool BitmapNextBlock(struct TBMIterateResult *tbmres);
+ bool BitmapNextTuple(struct TBMIterateResult *tbmres, TupleTableSlot *slot);
+
+ ~PaxScanDesc() = default;
- static inline PaxScanDesc *ScanToDesc(TableScanDesc scan) {
+ static inline PaxScanDesc *ToDesc(TableScanDesc scan) {
auto desc = reinterpret_cast(scan);
return desc;
}
+ private:
+ template
+ friend T *PAX_NEW(Args &&...args);
+ PaxScanDesc() = default;
+
private:
TableScanDescData rs_base_{};
- const ScanKeyData *key_ = nullptr;
+
TableReader *reader_ = nullptr;
DataBuffer *reused_buffer_ = nullptr;
@@ -67,6 +99,15 @@ class PaxScanDesc {
#ifdef VEC_BUILD
VecAdapter *vec_adapter_ = nullptr;
#endif
+
+#ifdef ENABLE_PLASMA
+ const std::string plasma_socket_path_prefix_ = "/tmp/.s.plasma.";
+ PaxCache *pax_cache_ = nullptr;
+#endif
+
+ // used only by bitmap index scan
+ PaxIndexScanDesc *index_desc_ = nullptr;
+ int cindex_ = 0;
}; // class PaxScanDesc
} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/pax_updater.cc b/contrib/pax_storage/src/cpp/access/pax_updater.cc
index e5f79c23ee2..3fbb8787c54 100644
--- a/contrib/pax_storage/src/cpp/access/pax_updater.cc
+++ b/contrib/pax_storage/src/cpp/access/pax_updater.cc
@@ -8,19 +8,29 @@ namespace pax {
TM_Result CPaxUpdater::UpdateTuple(
const Relation relation, const ItemPointer otid, TupleTableSlot *slot,
const CommandId cid, const Snapshot snapshot, const Snapshot /*crosscheck*/,
- const bool /*wait*/, TM_FailureData * /*tmfd*/,
- LockTupleMode * /*lockmode*/, bool * /*update_indexes*/) {
+ const bool /*wait*/, TM_FailureData * tmfd,
+ LockTupleMode * lockmode, bool *update_indexes) {
TM_Result result;
- CPaxDeleter *deleter =
- CPaxDmlStateLocal::Instance()->GetDeleter(relation, snapshot);
+
+ auto dml_state = CPaxDmlStateLocal::Instance();
+ auto deleter = dml_state->GetDeleter(relation, snapshot);
+ auto inserter = dml_state->GetInserter(relation);
+
Assert(deleter != nullptr);
- CPaxInserter *inserter = CPaxDmlStateLocal::Instance()->GetInserter(relation);
Assert(inserter != nullptr);
+ *lockmode = LockTupleExclusive;
result = deleter->MarkDelete(otid);
- // FIXME(gongxun): check result and return TM_SelfModified if needed
- inserter->InsertTuple(relation, slot, cid, 0, nullptr);
+ if (result == TM_Ok) {
+ inserter->InsertTuple(relation, slot, cid, 0, nullptr);
+ *update_indexes = true;
+ } else {
+ // FIXME: set tmfd correctly.
+ // FYI, ao ignores both tmfd and lockmode
+ tmfd->ctid = *otid;
+ *update_indexes = false;
+ }
// TODO(gongxun): update pgstat info
return result;
}
diff --git a/contrib/pax_storage/src/cpp/access/paxc_gram.y b/contrib/pax_storage/src/cpp/access/paxc_gram.y
new file mode 100644
index 00000000000..84ca498fd99
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/paxc_gram.y
@@ -0,0 +1,575 @@
+%{
+#include "postgres.h"
+
+
+#include "nodes/pg_list.h"
+#include "parser/parser.h"
+#include "parser/parse_type.h"
+#include "parser/scanner.h"
+#include "parser/scansup.h"
+#include "utils/builtins.h"
+#include "utils/datetime.h"
+
+#include "access/paxc_scanner.h"
+
+/* Location tracking support --- simpler than bison's default */
+#define YYLLOC_DEFAULT(Current, Rhs, N) \
+ do { \
+ if (N) \
+ (Current) = (Rhs)[1]; \
+ else \
+ (Current) = (Rhs)[0]; \
+ } while (0)
+
+#define parser_errposition(pos) scanner_errposition(pos, yyscanner)
+#define parser_yyerror(msg) scanner_yyerror(yyscanner, msg)
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc. This prevents
+ * memory leaks if we error out during parsing. Note this only works with
+ * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE pfree
+
+static void paxc_yyerror(core_yyscan_t yyscanner, const char *message);
+static int paxc_yylex(core_yyscan_t yyscanner);
+static int paxc_scanner_errposition(int location);
+static List *paxc_result;
+
+%}
+
+/* %pure-parser */
+%expect 0
+%name-prefix="paxc_yy"
+%locations
+%parse-param {core_yyscan_t yyscanner}
+%lex-param {core_yyscan_t yyscanner}
+
+%union
+{
+ core_YYSTYPE core_yystype;
+ /* these fields must match core_YYSTYPE: */
+ int ival;
+ char *str;
+ const char *keyword;
+
+ bool boolean;
+ List *list;
+ Node *node;
+ TypeName *typnam;
+ PartitionElem *partelem;
+ PartitionSpec *partspec;
+ PartitionBoundSpec *partboundspec;
+}
+
+/* %type top_level_stmt */
+%type partition_by part_params any_name opt_collate attrs opt_qualified_name
+%type part_elem
+%type ColId attr_name
+
+// FIXME: types for partition ranges
+//%type partition_ranges expr_list opt_type_modifiers
+//%type partition_range
+//%type AexprConst a_expr c_expr
+//%type Numeric opt_float ConstTypename ConstDatetime ConstInterval ConstCharacter CharacterWithLength CharacterWithoutLength ConstBit BitWithLength BitWithoutLength
+//%type opt_varying opt_timezone
+//%type opt_interval interval_second
+//%type Sconst character
+//%type Iconst
+
+%token IDENT
+//%token FCONST SCONST BCONST XCONST
+//%token ICONST
+
+%token COLLATE
+//%token TRUE_P FALSE_P HOUR_P YEAR_P NULL_P MONTH_P TO VARYING VARCHAR TIMESTAMP BIT TIME INTERVAL DAY_P MINUTE_P SECOND_P CHARACTER NATIONAL NCHAR CHAR_P ZONE INT_P INTEGER SMALLINT BIGINT REAL FLOAT_P DOUBLE_P PRECISION DECIMAL_P DEC NUMERIC BOOLEAN_P FROM
+
+%token WITH_LA WITHOUT_LA
+
+
+%%
+
+top_level_stmt:
+ partition_by { paxc_result = $1; }
+// | partition_ranges { paxc_result = $1; }
+ ;
+
+partition_by: part_params { $$ = $1; }
+ ;
+part_params:
+ part_elem { $$ = list_make1($1); }
+ | part_params ',' part_elem { $$ = lappend($1, $3); }
+ ;
+
+part_elem: ColId opt_collate opt_qualified_name
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = $1;
+ n->expr = NULL;
+ n->collation = $2;
+ n->opclass = $3;
+ n->location = @1;
+ $$ = n;
+ }
+/*
+ | func_expr_windowless opt_collate opt_qualified_name
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = NULL;
+ n->expr = $1;
+ n->collation = $2;
+ n->opclass = $3;
+ n->location = @1;
+ $$ = n;
+ }
+ | '(' a_expr ')' opt_collate opt_qualified_name
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = NULL;
+ n->expr = $2;
+ n->collation = $4;
+ n->opclass = $5;
+ n->location = @1;
+ $$ = n;
+ }
+*/
+ ;
+
+/* Column identifier --- names that can be column, table, etc names.
+ */
+ColId: IDENT { $$ = $1; }
+ ;
+opt_collate: COLLATE any_name { $$ = $2; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
+any_name:
+ ColId { $$ = list_make1(makeString($1)); }
+ | ColId attrs { $$ = lcons(makeString($1), $2); }
+ ;
+
+attrs: '.' attr_name { $$ = list_make1(makeString($2)); }
+ | attrs '.' attr_name { $$ = lappend($1, makeString($3)); }
+ ;
+
+attr_name: IDENT { $$ = $1; }
+ ;
+
+/* opclass */
+opt_qualified_name: any_name { $$ = $1; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
+//partition_ranges: partition_ranges ',' partition_range { $$ = lappend($1, $3); }
+// | partition_range { $$ = list_make1($1); }
+// ;
+//
+//partition_range: FROM '(' expr_list ')' TO '(' expr_list ')'
+// {
+// PartitionBoundSpec *n = makeNode(PartitionBoundSpec);
+//
+// n->strategy = PARTITION_STRATEGY_RANGE;
+// n->is_default = false;
+// n->lowerdatums = $3;
+// n->upperdatums = $7;
+//
+// $$ = n;
+// }
+// ;
+//
+//expr_list: a_expr { $$ = list_make1($1); }
+// | expr_list ',' a_expr { $$ = lappend($1, $3); }
+// ;
+//
+//a_expr: c_expr { $$ = $1; }
+// ;
+//c_expr: AexprConst { $$ = $1; }
+// ;
+//
+///*
+// * Constants
+// */
+//AexprConst: Iconst { $$ = makeIntConst($1, @1); }
+// | FCONST { $$ = makeFloatConst($1, @1); }
+// | Sconst { $$ = makeStringConst($1, @1); }
+// | BCONST { $$ = makeBitStringConst($1, @1); }
+// | XCONST
+// {
+// /* This is a bit constant per SQL99:
+// * Without Feature F511, "BIT data type",
+// * a shall not be a
+// * or a .
+// */
+// $$ = makeBitStringConst($1, @1);
+// }
+// | ConstTypename Sconst { $$ = makeStringConstCast($2, @2, $1); }
+// | ConstInterval Sconst opt_interval
+// {
+// TypeName *t = $1;
+//
+// t->typmods = $3;
+// $$ = makeStringConstCast($2, @2, t);
+// }
+// | ConstInterval '(' Iconst ')' Sconst
+// {
+// TypeName *t = $1;
+//
+// t->typmods = list_make2(makeIntConst(INTERVAL_FULL_RANGE, -1),
+// makeIntConst($3, @3));
+// $$ = makeStringConstCast($5, @5, t);
+// }
+// | TRUE_P { $$ = makeBoolAConst(true, @1); }
+// | FALSE_P { $$ = makeBoolAConst(false, @1); }
+// | NULL_P { $$ = makeNullAConst(@1); }
+// ;
+//
+//Iconst: ICONST { $$ = $1; };
+//Sconst: SCONST { $$ = $1; };
+//
+//ConstTypename:
+// Numeric { $$ = $1; }
+// | ConstBit { $$ = $1; }
+// | ConstCharacter { $$ = $1; }
+// | ConstDatetime { $$ = $1; }
+// ;
+//
+///* ConstBit is like Bit except "BIT" defaults to unspecified length */
+///* See notes for ConstCharacter, which addresses same issue for "CHAR" */
+//ConstBit: BitWithLength { $$ = $1; }
+// | BitWithoutLength
+// {
+// $$ = $1;
+// $$->typmods = NIL;
+// }
+// ;
+//
+//BitWithLength: BIT opt_varying '(' expr_list ')'
+// {
+// char *typname;
+//
+// typname = $2 ? "varbit" : "bit";
+// $$ = SystemTypeName(typname);
+// $$->typmods = $4;
+// $$->location = @1;
+// }
+// ;
+//
+//BitWithoutLength: BIT opt_varying
+// {
+// /* bit defaults to bit(1), varbit to no limit */
+// if ($2)
+// {
+// $$ = SystemTypeName("varbit");
+// }
+// else
+// {
+// $$ = SystemTypeName("bit");
+// $$->typmods = list_make1(makeIntConst(1, -1));
+// }
+// $$->location = @1;
+// }
+// ;
+//
+//ConstCharacter: CharacterWithLength
+// {
+// $$ = $1;
+// }
+// | CharacterWithoutLength
+// {
+// /* Length was not specified so allow to be unrestricted.
+// * This handles problems with fixed-length (bpchar) strings
+// * which in column definitions must default to a length
+// * of one, but should not be constrained if the length
+// * was not specified.
+// */
+// $$ = $1;
+// $$->typmods = NIL;
+// }
+// ;
+//
+//CharacterWithLength: character '(' Iconst ')'
+// {
+// $$ = SystemTypeName($1);
+// $$->typmods = list_make1(makeIntConst($3, @3));
+// $$->location = @1;
+// }
+// ;
+//
+//CharacterWithoutLength: character
+// {
+// $$ = SystemTypeName($1);
+// /* char defaults to char(1), varchar to no limit */
+// if (strcmp($1, "bpchar") == 0)
+// $$->typmods = list_make1(makeIntConst(1, -1));
+// $$->location = @1;
+// }
+// ;
+//
+//character: CHARACTER opt_varying { $$ = $2 ? "varchar": "bpchar"; }
+// | CHAR_P opt_varying { $$ = $2 ? "varchar": "bpchar"; }
+// | VARCHAR { $$ = "varchar"; }
+// | NATIONAL CHARACTER opt_varying { $$ = $3 ? "varchar": "bpchar"; }
+// | NATIONAL CHAR_P opt_varying { $$ = $3 ? "varchar": "bpchar"; }
+// | NCHAR opt_varying { $$ = $2 ? "varchar": "bpchar"; }
+// ;
+//
+//opt_varying: VARYING { $$ = true; }
+// | /*EMPTY*/ { $$ = false; }
+// ;
+//
+///*
+// * SQL date/time types
+// */
+//ConstDatetime:
+// TIMESTAMP '(' Iconst ')' opt_timezone
+// {
+// if ($5)
+// $$ = SystemTypeName("timestamptz");
+// else
+// $$ = SystemTypeName("timestamp");
+// $$->typmods = list_make1(makeIntConst($3, @3));
+// $$->location = @1;
+// }
+// | TIMESTAMP opt_timezone
+// {
+// if ($2)
+// $$ = SystemTypeName("timestamptz");
+// else
+// $$ = SystemTypeName("timestamp");
+// $$->location = @1;
+// }
+// | TIME '(' Iconst ')' opt_timezone
+// {
+// if ($5)
+// $$ = SystemTypeName("timetz");
+// else
+// $$ = SystemTypeName("time");
+// $$->typmods = list_make1(makeIntConst($3, @3));
+// $$->location = @1;
+// }
+// | TIME opt_timezone
+// {
+// if ($2)
+// $$ = SystemTypeName("timetz");
+// else
+// $$ = SystemTypeName("time");
+// $$->location = @1;
+// }
+// ;
+//
+//ConstInterval: INTERVAL
+// {
+// $$ = SystemTypeName("interval");
+// $$->location = @1;
+// }
+// ;
+//
+//opt_timezone: WITH_LA TIME ZONE { $$ = true; }
+// | WITHOUT_LA TIME ZONE { $$ = false; }
+// | /*EMPTY*/ { $$ = false; }
+// ;
+//
+//opt_interval:
+// YEAR_P { $$ = list_make1(makeIntConst(INTERVAL_MASK(YEAR), @1)); }
+// | MONTH_P { $$ = list_make1(makeIntConst(INTERVAL_MASK(MONTH), @1)); }
+// | DAY_P { $$ = list_make1(makeIntConst(INTERVAL_MASK(DAY), @1)); }
+// | HOUR_P { $$ = list_make1(makeIntConst(INTERVAL_MASK(HOUR), @1)); }
+// | MINUTE_P { $$ = list_make1(makeIntConst(INTERVAL_MASK(MINUTE), @1)); }
+// | interval_second { $$ = $1; }
+// | YEAR_P TO MONTH_P
+// {
+// $$ = list_make1(makeIntConst(INTERVAL_MASK(YEAR) |
+// INTERVAL_MASK(MONTH), @1));
+// }
+// | DAY_P TO HOUR_P
+// {
+// $$ = list_make1(makeIntConst(INTERVAL_MASK(DAY) |
+// INTERVAL_MASK(HOUR), @1));
+// }
+// | DAY_P TO MINUTE_P
+// {
+// $$ = list_make1(makeIntConst(INTERVAL_MASK(DAY) |
+// INTERVAL_MASK(HOUR) |
+// INTERVAL_MASK(MINUTE), @1));
+// }
+// | DAY_P TO interval_second
+// {
+// $$ = $3;
+// linitial($$) = makeIntConst(INTERVAL_MASK(DAY) |
+// INTERVAL_MASK(HOUR) |
+// INTERVAL_MASK(MINUTE) |
+// INTERVAL_MASK(SECOND), @1);
+// }
+// | HOUR_P TO MINUTE_P
+// {
+// $$ = list_make1(makeIntConst(INTERVAL_MASK(HOUR) |
+// INTERVAL_MASK(MINUTE), @1));
+// }
+// | HOUR_P TO interval_second
+// {
+// $$ = $3;
+// linitial($$) = makeIntConst(INTERVAL_MASK(HOUR) |
+// INTERVAL_MASK(MINUTE) |
+// INTERVAL_MASK(SECOND), @1);
+// }
+// | MINUTE_P TO interval_second
+// {
+// $$ = $3;
+// linitial($$) = makeIntConst(INTERVAL_MASK(MINUTE) |
+// INTERVAL_MASK(SECOND), @1);
+// }
+// | /*EMPTY*/ { $$ = NIL; }
+// ;
+//
+//interval_second:
+// SECOND_P
+// {
+// $$ = list_make1(makeIntConst(INTERVAL_MASK(SECOND), @1));
+// }
+// | SECOND_P '(' Iconst ')'
+// {
+// $$ = list_make2(makeIntConst(INTERVAL_MASK(SECOND), @1),
+// makeIntConst($3, @3));
+// }
+// ;
+//
+//opt_type_modifiers: '(' expr_list ')' { $$ = $2; }
+// | /* EMPTY */ { $$ = NIL; }
+// ;
+//
+///*
+// * SQL numeric data types
+// */
+//Numeric:
+// INT_P
+// {
+// $$ = SystemTypeName("int4");
+// $$->location = @1;
+// }
+// | INTEGER
+// {
+// $$ = SystemTypeName("int4");
+// $$->location = @1;
+// }
+// | SMALLINT
+// {
+// $$ = SystemTypeName("int2");
+// $$->location = @1;
+// }
+// | BIGINT
+// {
+// $$ = SystemTypeName("int8");
+// $$->location = @1;
+// }
+// | REAL
+// {
+// $$ = SystemTypeName("float4");
+// $$->location = @1;
+// }
+// | FLOAT_P opt_float
+// {
+// $$ = $2;
+// $$->location = @1;
+// }
+// | DOUBLE_P PRECISION
+// {
+// $$ = SystemTypeName("float8");
+// $$->location = @1;
+// }
+// | DECIMAL_P opt_type_modifiers
+// {
+// $$ = SystemTypeName("numeric");
+// $$->typmods = $2;
+// $$->location = @1;
+// }
+// | DEC opt_type_modifiers
+// {
+// $$ = SystemTypeName("numeric");
+// $$->typmods = $2;
+// $$->location = @1;
+// }
+// | NUMERIC opt_type_modifiers
+// {
+// $$ = SystemTypeName("numeric");
+// $$->typmods = $2;
+// $$->location = @1;
+// }
+// | BOOLEAN_P
+// {
+// $$ = SystemTypeName("bool");
+// $$->location = @1;
+// }
+// ;
+//
+//opt_float: '(' Iconst ')'
+// {
+// /*
+// * Check FLOAT() precision limits assuming IEEE floating
+// * types - thomas 1997-09-18
+// */
+// if ($2 < 1)
+// ereport(ERROR,
+// (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+// errmsg("precision for type float must be at least 1 bit"),
+// parser_errposition(@2)));
+// else if ($2 <= 24)
+// $$ = SystemTypeName("float4");
+// else if ($2 <= 53)
+// $$ = SystemTypeName("float8");
+// else
+// ereport(ERROR,
+// (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+// errmsg("precision for type float must be less than 54 bits"),
+// parser_errposition(@2)));
+// }
+// | /*EMPTY*/ { $$ = SystemTypeName("float8"); }
+// ;
+//
+
+%%
+
+static int paxc_scanner_errposition(int location) {
+ return location;
+}
+
+static void paxc_yyerror(core_yyscan_t yyscanner, const char *message) {
+ ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s", _(message))));
+}
+static int paxc_yylex(core_yyscan_t yyscanner) {
+ return core_yylex(&paxc_yylval.core_yystype, &paxc_yylloc, yyscanner);
+}
+
+static core_yyscan_t paxc_scanner_init(const char *str, core_yy_extra_type *extra) {
+ paxc_result = NIL;
+ return scanner_init(str, extra, &ScanKeywords, ScanKeywordTokens);
+}
+
+static void paxc_scanner_finish(core_yyscan_t yyscanner) {
+ scanner_finish(yyscanner);
+ paxc_result = NIL;
+}
+
+List *paxc_raw_parse(const char *str) {
+ core_yyscan_t yyscanner;
+ core_yy_extra_type extra;
+ List *result;
+ int yyresult;
+
+ yyscanner = paxc_scanner_init(str, &extra);
+ yyresult = paxc_yyparse(yyscanner);
+ if (yyresult != 0)
+ elog(ERROR, "pacx_yyparse returned %d", yyresult);
+
+ result = paxc_result;
+ paxc_scanner_finish(yyscanner);
+ return result;
+}
+
diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
new file mode 100644
index 00000000000..fc230b874d3
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
@@ -0,0 +1,270 @@
+#include "access/paxc_rel_options.h"
+
+namespace paxc {
+
+typedef struct {
+ const char *optname; /* option's name */
+ const pax::ColumnEncoding_Kind kind;
+} relopt_compress_type_mapping;
+
+static const relopt_compress_type_mapping kSelfRelCompressMap[] = {
+ {ColumnEncoding_Kind_NO_ENCODED_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED},
+ {ColumnEncoding_Kind_RLE_V2_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_RLE_V2},
+ {ColumnEncoding_Kind_DIRECT_DELTA_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA},
+ {ColumnEncoding_Kind_COMPRESS_ZSTD_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD},
+ {ColumnEncoding_Kind_COMPRESS_ZLIB_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB},
+};
+
+typedef struct {
+ const char *optname; /* option's name */
+ const pax::PaxStorageFormat format;
+} relopt_format_type_mapping;
+
+static const relopt_format_type_mapping kSelfRelFormatMap[] = {
+ {STORAGE_FORMAT_TYPE_ORC, pax::PaxStorageFormat::kTypeStorageOrcNonVec},
+ {STORAGE_FORMAT_TYPE_ORC_VEC, pax::PaxStorageFormat::kTypeStorageOrcVec},
+};
+
+// reloptions structure and variables.
+static relopt_kind self_relopt_kind;
+
+#define PAX_COPY_STR_OPT(pax_opts_, pax_opt_name_) \
+ do { \
+ PaxOptions *pax_opts = reinterpret_cast(pax_opts_); \
+ int pax_name_offset_ = *reinterpret_cast(pax_opts->pax_opt_name_); \
+ if (pax_name_offset_) \
+ strlcpy(pax_opts->pax_opt_name_, \
+ reinterpret_cast(pax_opts) + pax_name_offset_, \
+ sizeof(pax_opts->pax_opt_name_)); \
+ } while (0)
+
+static const char *kSelfColumnEncodingClauseWhiteList[] = {
+ PAX_SOPT_COMPTYPE,
+ PAX_SOPT_COMPLEVEL,
+};
+
+static const relopt_parse_elt kSelfReloptTab[] = {
+ // no allow set with encoding
+ {PAX_SOPT_STORAGE_FORMAT, RELOPT_TYPE_STRING,
+ offsetof(PaxOptions, storage_format)},
+ // allow with encoding
+ {PAX_SOPT_COMPTYPE, RELOPT_TYPE_STRING,
+ offsetof(PaxOptions, compress_type)},
+ {PAX_SOPT_COMPLEVEL, RELOPT_TYPE_INT, offsetof(PaxOptions, compress_level)},
+ {PAX_SOPT_PARTITION_BY, RELOPT_TYPE_STRING,
+ offsetof(PaxOptions, partition_by_offset)},
+ {PAX_SOPT_PARTITION_RANGES, RELOPT_TYPE_STRING,
+ offsetof(PaxOptions, partition_ranges_offset)},
+};
+
+static void paxc_validate_rel_options_storage_format(const char *value) {
+ size_t i;
+
+ for (i = 0; i < lengthof(kSelfRelFormatMap); i++) {
+ if (strcmp(value, kSelfRelFormatMap[i].optname) == 0) return;
+ }
+ ereport(ERROR, (errmsg("unsupported storage format: '%s'", value)));
+}
+
+static void paxc_validate_rel_options_compress_type(const char *value) {
+ size_t i;
+
+ for (i = 0; i < lengthof(kSelfRelCompressMap); i++) {
+ if (strcmp(value, kSelfRelCompressMap[i].optname) == 0) return;
+ }
+ ereport(ERROR, (errmsg("unsupported compress type: '%s'", value)));
+}
+
+static void paxc_validate_rel_option(PaxOptions *options) {
+ Assert(options);
+ if (strcmp(ColumnEncoding_Kind_NO_ENCODED_STR, options->compress_type) == 0 ||
+ strcmp(ColumnEncoding_Kind_RLE_V2_STR, options->compress_type) == 0 ||
+ strcmp(ColumnEncoding_Kind_DIRECT_DELTA_STR, options->compress_type) ==
+ 0) {
+ if (options->compress_level != 0) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("compresslevel=%d should setting is not work for "
+ "current encoding.",
+ options->compress_level)));
+ }
+ }
+}
+
+bytea *paxc_default_rel_options(Datum reloptions, char /*relkind*/,
+ bool validate) {
+ Assert(self_relopt_kind != 0);
+ bytea *rdopts = (bytea *)build_reloptions(
+ reloptions, validate, self_relopt_kind, sizeof(PaxOptions),
+ kSelfReloptTab, lengthof(kSelfReloptTab));
+
+ PAX_COPY_STR_OPT(rdopts, storage_format);
+ PAX_COPY_STR_OPT(rdopts, compress_type);
+ return rdopts;
+}
+
+PaxOptions **paxc_relation_get_attribute_options(Relation rel) {
+ Datum *dats;
+ PaxOptions **opts;
+ int i;
+
+ Assert(rel && OidIsValid(RelationGetRelid(rel)));
+
+ opts = (PaxOptions **)palloc0(RelationGetNumberOfAttributes(rel) *
+ sizeof(PaxOptions *));
+
+ dats = get_rel_attoptions(RelationGetRelid(rel),
+ RelationGetNumberOfAttributes(rel));
+
+ for (i = 0; i < RelationGetNumberOfAttributes(rel); i++) {
+ if (DatumGetPointer(dats[i]) != NULL) {
+ opts[i] = (PaxOptions *)paxc_default_rel_options(dats[i], 0, false);
+ pfree(DatumGetPointer(dats[i]));
+ }
+ }
+ pfree(dats);
+
+ return opts;
+}
+
+static void paxc_validate_single_column_encoding_clauses(
+ List *single_column_encoding) {
+ ListCell *cell = NULL;
+ Datum d;
+ PaxOptions *option = NULL;
+ /* not allow caller pass the `PAX_SOPT_STORAGE_FORMAT`
+ */
+ foreach (cell, single_column_encoding) {
+ DefElem *def = (DefElem *)lfirst(cell);
+ bool not_in_white_list = true;
+
+ if (!def->defname) {
+ continue;
+ }
+
+ for (size_t i = 0; i < lengthof(kSelfColumnEncodingClauseWhiteList); i++) {
+ if (strcmp(kSelfColumnEncodingClauseWhiteList[i], def->defname) == 0) {
+ not_in_white_list = false;
+ break;
+ }
+ }
+
+ if (not_in_white_list) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("%s not allow setting in ENCODING CLAUSES.",
+ def->defname)));
+ }
+ }
+
+ d = transformRelOptions(PointerGetDatum(NULL), single_column_encoding, NULL,
+ NULL, true, false);
+
+ option = (PaxOptions *)paxc_default_rel_options(d, 0, true);
+ paxc_validate_rel_option(option);
+}
+
+void paxc_validate_column_encoding_clauses(List *encoding_opts) {
+ ListCell *lc;
+ foreach (lc, encoding_opts) {
+ ColumnReferenceStorageDirective *crsd =
+ (ColumnReferenceStorageDirective *)lfirst(lc);
+ paxc_validate_single_column_encoding_clauses(crsd->encoding);
+ }
+}
+
+List *paxc_transform_column_encoding_clauses(List *encoding_opts, bool validate,
+ bool fromType) {
+ List *ret_list = NIL;
+
+ if (fromType) {
+ return NIL;
+ }
+
+ ret_list = list_copy(encoding_opts);
+ /* there are no need to do column encoding clauses transform in pax
+ * because pax will setting default encoding inside
+ */
+ if (validate) {
+ paxc_validate_single_column_encoding_clauses(encoding_opts);
+ }
+
+ /* if column no setting the encoding clauses
+ * in transformColumnEncoding will pass the relation option
+ * to column encoding clauses, should remove the
+ * `PAX_SOPT_STORAGE_FORMAT` from it.
+ */
+ ListCell *cell = NULL;
+ foreach (cell, ret_list) {
+ DefElem *def = (DefElem *)lfirst(cell);
+ bool not_in_white_list = true;
+ if (!def->defname) {
+ continue;
+ }
+
+ for (size_t i = 0; i < lengthof(kSelfColumnEncodingClauseWhiteList); i++) {
+ if (strcmp(kSelfColumnEncodingClauseWhiteList[i], def->defname) == 0) {
+ not_in_white_list = false;
+ break;
+ }
+ }
+
+ if (not_in_white_list) {
+ ret_list = foreach_delete_current(ret_list, cell);
+ }
+ }
+
+ return ret_list;
+}
+
+void paxc_reg_rel_options() {
+ self_relopt_kind = add_reloption_kind();
+ add_string_reloption(
+ self_relopt_kind, PAX_SOPT_STORAGE_FORMAT, "pax storage format", "orc",
+ paxc_validate_rel_options_storage_format, AccessExclusiveLock);
+ add_string_reloption(self_relopt_kind, PAX_SOPT_COMPTYPE, "pax compress type",
+ PAX_DEFAULT_COMPRESSTYPE,
+ paxc_validate_rel_options_compress_type,
+ AccessExclusiveLock);
+ add_int_reloption(self_relopt_kind, PAX_SOPT_COMPLEVEL, "pax compress level",
+ PAX_DEFAULT_COMPRESSLEVEL, PAX_MIN_COMPRESSLEVEL,
+ PAX_MAX_COMPRESSLEVEL, AccessExclusiveLock);
+ add_string_reloption(self_relopt_kind, PAX_SOPT_PARTITION_BY, "partition by",
+ NULL, NULL, AccessExclusiveLock);
+ add_string_reloption(self_relopt_kind, PAX_SOPT_PARTITION_RANGES,
+ "partition ranges", NULL, NULL, AccessExclusiveLock);
+}
+
+} // namespace paxc
+
+namespace pax {
+
+ColumnEncoding_Kind CompressKeyToColumnEncodingKind(const char *encoding_str) {
+ Assert(encoding_str);
+
+ for (size_t i = 0; i < lengthof(paxc::kSelfRelCompressMap); i++) {
+ if (strcmp(paxc::kSelfRelCompressMap[i].optname, encoding_str) == 0) {
+ return paxc::kSelfRelCompressMap[i].kind;
+ }
+ }
+
+ CBDB_RAISE(cbdb::CException::kExTypeLogicError);
+}
+
+PaxStorageFormat StorageFormatKeyToPaxStorageFormat(
+ const char *storage_format_str) {
+ Assert(storage_format_str);
+
+ for (size_t i = 0; i < lengthof(paxc::kSelfRelFormatMap); i++) {
+ if (strcmp(paxc::kSelfRelFormatMap[i].optname, storage_format_str) == 0) {
+ return paxc::kSelfRelFormatMap[i].format;
+ }
+ }
+
+ CBDB_RAISE(cbdb::CException::kExTypeLogicError);
+}
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
new file mode 100644
index 00000000000..cc66575610b
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
@@ -0,0 +1,110 @@
+#pragma once
+
+#include "comm/cbdb_api.h"
+
+#include "exceptions/CException.h"
+#include "storage/pax_defined.h"
+#include "storage/proto/proto_wrappers.h" // for ColumnEncoding_Kind
+
+namespace paxc {
+
+#define ColumnEncoding_Kind_NO_ENCODED_STR "none"
+#define ColumnEncoding_Kind_RLE_V2_STR "rle"
+#define ColumnEncoding_Kind_DIRECT_DELTA_STR "delta"
+#define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd"
+#define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib"
+
+#define STORAGE_FORMAT_TYPE_ORC "orc"
+#define STORAGE_FORMAT_TYPE_ORC_VEC "orc_vec"
+#define STORAGE_FORMAT_TYPE_DEFAULT STORAGE_FORMAT_TYPE_ORC
+
+#define PAX_DEFAULT_COMPRESSLEVEL AO_DEFAULT_COMPRESSLEVEL
+#define PAX_MIN_COMPRESSLEVEL AO_MIN_COMPRESSLEVEL
+#define PAX_MAX_COMPRESSLEVEL AO_MAX_COMPRESSLEVEL
+#define PAX_DEFAULT_COMPRESSTYPE ColumnEncoding_Kind_NO_ENCODED_STR
+
+#define PAX_SOPT_STORAGE_FORMAT "storage_format"
+#define PAX_SOPT_COMPTYPE SOPT_COMPTYPE
+#define PAX_SOPT_COMPLEVEL SOPT_COMPLEVEL
+#define PAX_SOPT_PARTITION_BY "partition_by"
+#define PAX_SOPT_PARTITION_RANGES "partition_ranges"
+
+// plain structure used by reloptions, can be accessed from C++ code.
+struct PaxOptions {
+ // Pax needs to define the StdRdOptions instead of just vl_len.
+ // This is because many places in the CBDB assume that option in
+ // relation can be cast into StdRdOptions.
+ StdRdOptions rd_options;
+ char storage_format[16];
+ char compress_type[16];
+ int compress_level;
+ int partition_by_offset = 0;
+ int partition_ranges_offset = 0;
+
+ char *partition_by() {
+ return partition_by_offset == 0
+ ? NULL
+ : reinterpret_cast(this) + partition_by_offset;
+ }
+ char *partition_ranges() {
+ return partition_ranges_offset == 0
+ ? NULL
+ : reinterpret_cast(this) + partition_ranges_offset;
+ }
+};
+
+#define RelationGetOptions(relation, field_name, default_opt) \
+ ((relation)->rd_options \
+ ? ((paxc::PaxOptions *)(relation)->rd_options)->field_name \
+ : (default_opt))
+
+/*
+ * used to register pax rel options
+ */
+void paxc_reg_rel_options();
+
+/*
+ * parse the rel options in `pg_attribute_encoding` and relation
+ * if no ENCODING setting in `pg_attribute_encoding` will fill with
+ * the default one
+ */
+bytea *paxc_default_rel_options(Datum reloptions, char /*relkind*/,
+ bool validate);
+
+/*
+ * parse the attr options from `pg_attribute_encoding`
+ * if no ENCODING setting in `pg_attribute_encoding` will fill with
+ * the default one
+ */
+PaxOptions **paxc_relation_get_attribute_options(Relation rel);
+
+/*
+ * validate the ENCODING CLAUSES
+ * like `CREATE TABLE t1 (c1 int, COLUMN c1 ENCODING (key=value)) using
+ * pax`
+ */
+void paxc_validate_column_encoding_clauses(List *encoding_opts);
+
+/*
+ * transform the ENCODING options if key no setting
+ * validate will become true only when the encoding syntax is true
+ * like `CREATE TABLE t1 (c1 int ENCODING (key=value)) using pax`
+ *
+ * pax no need transform the ENCODING options if key no setting
+ * it will deal the default value inside pax colomn
+ */
+List *paxc_transform_column_encoding_clauses(List *encoding_opts, bool validate,
+ bool fromType);
+
+} // namespace paxc
+
+namespace pax {
+
+// use to transform compress type str to encoding kind
+extern ColumnEncoding_Kind CompressKeyToColumnEncodingKind(
+ const char *encoding_str);
+
+extern PaxStorageFormat StorageFormatKeyToPaxStorageFormat(
+ const char *storage_format_str);
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/access/paxc_scanner.cc b/contrib/pax_storage/src/cpp/access/paxc_scanner.cc
new file mode 100644
index 00000000000..43d628ee25f
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/paxc_scanner.cc
@@ -0,0 +1,131 @@
+#include "access/paxc_scanner.h"
+
+#include "access/pax_partition.h"
+
+#define blank_char(ch) ((ch) == ' ' || (ch) == '\t' || (ch) == '\n')
+#define ident_char(ch) (((ch) >= 'a' && (ch) <= 'z') || \
+ ((ch) >= 'A' && (ch) <= 'Z') || \
+ ((ch) >= '0' && (ch) <= '9') || \
+ (ch) == '_')
+
+static inline const char *paxc_eat_blank(const char *s) {
+ while (blank_char(*s))
+ s++;
+ return s;
+}
+
+static inline const char *paxc_expect_char(const char *s, char ch) {
+ const char *p = paxc_eat_blank(s);
+ if (*p != ch)
+ elog(ERROR, "invalid syntax for partition range:'%s' at '%s'", s, p);
+
+ return p + 1;
+}
+
+static const char *paxc_expect_ident(const char *s, const char *ident) {
+ const char *p = s;
+ const char *q;
+ size_t n;
+
+ n = strlen(ident);
+ p = paxc_eat_blank(s);
+ if (strncasecmp(p, ident, n) != 0)
+ elog(ERROR, "unexpected ident: %s, want %s", s, ident);
+ q = p + n;
+ if (ident_char(*q))
+ elog(ERROR, "unexpected ident: %s, want %s", s, ident);
+
+ return q;
+}
+
+static const char *paxc_parse_single_integer(const char *expr, Node **result) {
+ const char *p;
+ char *endptr;
+ int val;
+
+ p = paxc_eat_blank(expr);
+ val = strtol(p, &endptr, 10);
+ A_Const *n = makeNode(A_Const);
+
+ n->val.type = T_Integer;
+ n->val.val.ival = val;
+ n->location = -1;
+ *result = (Node *)n;
+
+ return endptr;
+}
+
+static const char *paxc_parse_expr_list(const char *expr_list, List **result) {
+ const char *p = expr_list;
+
+ *result = NIL;
+ p = paxc_eat_blank(expr_list);
+ while (*p) {
+ Node *value = NULL;
+ p = paxc_parse_single_integer(p, &value);
+ Assert(value);
+
+ *result = lappend(*result, value);
+
+ p = paxc_eat_blank(p);
+ if (*p != ',') break;
+ p++;
+ }
+ return p;
+}
+
+List *paxc_parse_partition_ranges(const char *ranges) {
+ const char *p = ranges;
+ List *result = NIL;
+ if (!p || *p == '\0') return NIL;
+
+ while (*p && (p = paxc_expect_ident(p, "from"))) {
+ List *from_list = NIL;
+ List *to_list = NIL;
+ List *every_list = NIL;
+
+ p = paxc_expect_char(p, '(');
+ p = paxc_parse_expr_list(p, &from_list);
+ p = paxc_expect_char(p, ')');
+ Assert(from_list);
+
+ p = paxc_expect_ident(p, "to");
+ p = paxc_expect_char(p, '(');
+ p = paxc_parse_expr_list(p, &to_list);
+ p = paxc_expect_char(p, ')');
+ Assert(to_list);
+
+ p = paxc_eat_blank(p);
+ if (strncasecmp(p, "every", 5) == 0) {
+ // from(X) to(Y) every(Z)
+ p += 5;
+ p = paxc_expect_char(p, '(');
+ p = paxc_parse_expr_list(p, &every_list);
+ p = paxc_expect_char(p, ')');
+ Assert(every_list);
+ p = paxc_eat_blank(p);
+ }
+ if (*p == ',') {
+ p++;
+ } else if (*p != '\0') {
+ elog(ERROR, "unexpected range delimiter: %s", p);
+ }
+
+ if (list_length(from_list) == 0 ||
+ list_length(from_list) != list_length(to_list)) {
+ elog(ERROR, "the lengths of expr_list are not equal in from and to: %d %d",
+ list_length(from_list), list_length(to_list));
+ }
+
+ PartitionRangeExtension *ext = (PartitionRangeExtension *)palloc0(sizeof(PartitionRangeExtension));
+ PartitionBoundSpec *n = &ext->spec;
+ n->type = T_PartitionBoundSpec;
+ n->strategy = PARTITION_STRATEGY_RANGE;
+ n->is_default = false;
+ n->lowerdatums = from_list;
+ n->upperdatums = to_list;
+ ext->every = every_list;
+ result = lappend(result, ext);
+ }
+ return result;
+}
diff --git a/contrib/pax_storage/src/cpp/access/paxc_scanner.h b/contrib/pax_storage/src/cpp/access/paxc_scanner.h
new file mode 100644
index 00000000000..79ca99f9537
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/access/paxc_scanner.h
@@ -0,0 +1,13 @@
+#pragma once
+#include "comm/cbdb_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct List;
+extern struct List *paxc_raw_parse(const char *str);
+extern struct List *paxc_parse_partition_ranges(const char *ranges);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc
deleted file mode 100644
index 3e0bd53d103..00000000000
--- a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc
+++ /dev/null
@@ -1,341 +0,0 @@
-#include "catalog/micro_partition_stats.h"
-
-#include "comm/cbdb_api.h"
-
-#include "comm/cbdb_wrappers.h"
-#include "storage/micro_partition_metadata.h"
-#include "storage/proto/proto_wrappers.h"
-
-namespace pax {
-// SetStatsMessage may be called several times in a write,
-// one for each micro partition, so all members need to reset.
-// Some metainfo like typid, collation, oids for less/greater,
-// fmgr should be exactly consistent.
-MicroPartitionStats *MicroPartitionStats::SetStatsMessage(
- pax::stats::MicroPartitionStatisticsInfo *stats, int natts) {
- FmgrInfo finfo;
- std::tuple zero_oids = {InvalidOid, InvalidOid, InvalidOid, InvalidOid};
-
- Assert(natts > 0);
- Assert(stats && stats->columnstats_size() == 0);
- initial_check_ = false;
- stats_ = stats;
-
- memset(&finfo, 0, sizeof(finfo));
- procs_.clear();
- finfos_.clear();
- status_.clear();
- for (int i = 0; i < natts; i++) {
- procs_.emplace_back(zero_oids);
- finfos_.emplace_back(std::pair({finfo, finfo}));
- status_.emplace_back('u');
- auto columnstats = stats_->add_columnstats();
- Assert(columnstats->allnull());
- Assert(!columnstats->hasnull());
- }
- Assert(stats_->columnstats_size() == natts);
- return this;
-}
-
-void MicroPartitionStats::AddRow(TupleTableSlot *slot) {
- auto desc = slot->tts_tupleDescriptor;
- auto n = desc->natts;
-
- if (!initial_check_) {
- DoInitialCheck(desc);
- initial_check_ = true;
- }
- CBDB_CHECK(status_.size() == static_cast(n),
- cbdb::CException::ExType::kExTypeSchemaNotMatch);
- for (auto i = 0; i < n; i++) {
- auto att = &desc->attrs[i];
-
- AssertImply(att->attisdropped, slot->tts_isnull[i]);
- if (slot->tts_isnull[i])
- AddNullColumn(i);
- else
- AddNonNullColumn(i, slot->tts_values[i], desc);
- }
-}
-
-void MicroPartitionStats::AddNullColumn(int column_index) {
- Assert(column_index >= 0);
- Assert(column_index < static_cast(procs_.size()));
-
- auto column_stats = stats_->mutable_columnstats(column_index);
- column_stats->set_hasnull(true);
-}
-
-void MicroPartitionStats::AddNonNullColumn(int column_index, Datum value,
- TupleDesc desc) {
- Assert(column_index >= 0);
- Assert(column_index < static_cast(procs_.size()));
-
- auto att = TupleDescAttr(desc, column_index);
- auto collation = att->attcollation;
- auto typlen = att->attlen;
- auto typbyval = att->attbyval;
- auto column_stats = stats_->mutable_columnstats(column_index);
- column_stats->set_allnull(false);
-
- // update min/max
- switch (status_[column_index]) {
- case 'x':
- break;
- case 'y':
- Assert(column_stats->minmaxstats().has_typid());
- Assert(column_stats->minmaxstats().has_minimal());
- Assert(column_stats->minmaxstats().has_maximum());
- Assert(column_stats->minmaxstats().has_proclt());
- Assert(column_stats->minmaxstats().has_procgt());
- Assert(column_stats->minmaxstats().has_procle());
- Assert(column_stats->minmaxstats().has_procge());
- Assert(column_stats->minmaxstats().typid() == att->atttypid);
- Assert(column_stats->minmaxstats().collation() == collation);
-
- UpdateMinMaxValue(column_index, value, collation, typlen, typbyval);
- break;
- case 'n': {
- auto minmax = column_stats->mutable_minmaxstats();
-
- Assert(!minmax->has_proclt());
- Assert(!minmax->has_procgt());
- Assert(!minmax->has_procle());
- Assert(!minmax->has_procge());
- Assert(!minmax->has_typid());
- Assert(!minmax->has_minimal());
- Assert(!minmax->has_maximum());
-
- minmax->set_typid(att->atttypid);
- minmax->set_collation(collation);
- minmax->set_proclt(std::get<0>(procs_[column_index]));
- minmax->set_procgt(std::get<1>(procs_[column_index]));
- minmax->set_procle(std::get<2>(procs_[column_index]));
- minmax->set_procge(std::get<3>(procs_[column_index]));
- minmax->set_minimal(ToValue(value, typlen, typbyval));
- minmax->set_maximum(ToValue(value, typlen, typbyval));
- status_[column_index] = 'y';
- break;
- }
- default:
- Assert(false);
- }
-}
-
-void MicroPartitionStats::UpdateMinMaxValue(int column_index, Datum datum,
- Oid collation, int typlen,
- bool typbyval) {
- Assert(initial_check_);
- Assert(column_index >= 0 && static_cast(column_index) < status_.size());
- Assert(status_[column_index] == 'y');
-
- auto &finfos = finfos_[column_index];
- auto minmax =
- stats_->mutable_columnstats(column_index)->mutable_minmaxstats();
- bool ok;
-
- {
- const auto &min = minmax->minimal();
- auto val = FromValue(min, typlen, typbyval, &ok);
- CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError);
- auto update =
- DatumGetBool(cbdb::FunctionCall2Coll(&finfos.first, collation, datum, val));
- if (update) minmax->set_minimal(ToValue(datum, typlen, typbyval));
- }
- {
- const auto &max = minmax->maximum();
- auto val = FromValue(max, typlen, typbyval, &ok);
- CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError);
- auto update =
- DatumGetBool(cbdb::FunctionCall2Coll(&finfos.second, collation, datum, val));
- if (update) minmax->set_maximum(ToValue(datum, typlen, typbyval));
- }
-}
-
-bool MicroPartitionStats::GetStrategyProcinfo(
- Oid typid, std::tuple &procids,
- std::pair &finfos) {
- return cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<0>(procids), &finfos.first,
- BTLessStrategyNumber) &&
- cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<1>(procids), &finfos.second,
- BTGreaterStrategyNumber) &&
- cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<2>(procids), nullptr,
- BTLessEqualStrategyNumber) &&
- cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<3>(procids), nullptr,
- BTGreaterEqualStrategyNumber);
-}
-
-void MicroPartitionStats::DoInitialCheck(TupleDesc desc) {
- auto natts = desc->natts;
-
- Assert(natts == static_cast(status_.size()));
- Assert(natts == stats_->columnstats_size());
- Assert(status_.size() == procs_.size());
- Assert(status_.size() == finfos_.size());
-
- for (int i = 0; i < natts; i++) {
- auto att = TupleDescAttr(desc, i);
- if (att->attisdropped ||
- !GetStrategyProcinfo(att->atttypid, procs_[i], finfos_[i])) {
- status_[i] = 'x';
- continue;
- }
- status_[i] = 'n';
- }
-}
-
-Datum MicroPartitionStats::FromValue(const std::string &s, int typlen,
- bool typbyval, bool *ok) {
- const char *p = s.data();
- *ok = true;
- if (typbyval) {
- Assert(typlen > 0);
- switch (typlen) {
- case 1: {
- int8 i = *reinterpret_cast(p);
- return cbdb::Int8ToDatum(i);
- }
- case 2: {
- int16 i = *reinterpret_cast(p);
- return cbdb::Int16ToDatum(i);
- }
- case 4: {
- int32 i = *reinterpret_cast(p);
- return cbdb::Int32ToDatum(i);
- }
- case 8: {
- int64 i = *reinterpret_cast(p);
- return cbdb::Int64ToDatum(i);
- }
- default:
- Assert(!"unexpected typbyval, len not in 1,2,4,8");
- *ok = false;
- break;
- }
- return 0;
- }
-
- Assert(typlen == -1 || typlen > 0);
- return PointerGetDatum(p);
-}
-
-std::string MicroPartitionStats::ToValue(Datum datum, int typlen,
- bool typbyval) {
- if (typbyval) {
- Assert(typlen > 0);
- switch (typlen) {
- case 1: {
- int8 i = cbdb::DatumToInt8(datum);
- return std::string(reinterpret_cast(&i), sizeof(i));
- }
- case 2: {
- int16 i = cbdb::DatumToInt16(datum);
- return std::string(reinterpret_cast(&i), sizeof(i));
- }
- case 4: {
- int32 i = cbdb::DatumToInt32(datum);
- return std::string(reinterpret_cast(&i), sizeof(i));
- }
- case 8: {
- int64 i = cbdb::DatumToInt64(datum);
- return std::string(reinterpret_cast(&i), sizeof(i));
- }
- default:
- Assert(!"unexpected typbyval, len not in 1,2,4,8");
- break;
- }
- CBDB_RAISE(cbdb::CException::kExTypeLogicError);
- }
-
- if (typlen == -1) {
- void *v;
- int len;
-
- v = cbdb::PointerAndLenFromDatum(datum, &len);
- Assert(v && len != -1);
- return std::string(reinterpret_cast(v), len);
- }
- // byref but fixed size
- Assert(typlen > 0);
- return std::string(reinterpret_cast(cbdb::DatumToPointer(datum)),
- typlen);
-}
-} // namespace pax
-
-static inline const char *BoolToString(bool b) { return b ? "true" : "false"; }
-
-static char *TypeValueToCString(Oid typid, Oid collation,
- const std::string &value) {
- FmgrInfo finfo;
- HeapTuple tuple;
- Form_pg_type form;
- Datum datum;
- bool ok;
-
- tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for type %u", typid);
-
- form = (Form_pg_type)GETSTRUCT(tuple);
- Assert(OidIsValid(form->typoutput));
-
- datum = pax::MicroPartitionStats::FromValue(value, form->typlen,
- form->typbyval, &ok);
- if (!ok)
- elog(ERROR, "unexpected typlen: %d\n", form->typlen);
-
- fmgr_info_cxt(form->typoutput, &finfo, CurrentMemoryContext);
- datum = FunctionCall1Coll(&finfo, collation, datum);
- ReleaseSysCache(tuple);
-
- return DatumGetCString(datum);
-}
-
-// define stat type for custom output
-extern "C" {
-extern Datum MicroPartitionStatsInput(PG_FUNCTION_ARGS);
-extern Datum MicroPartitionStatsOutput(PG_FUNCTION_ARGS);
-PG_FUNCTION_INFO_V1(MicroPartitionStatsInput);
-PG_FUNCTION_INFO_V1(MicroPartitionStatsOutput);
-}
-
-Datum MicroPartitionStatsInput(PG_FUNCTION_ARGS) {
- ereport(ERROR, (errmsg("unsupport MicroPartitionStatsInput")));
- (void)fcinfo;
- PG_RETURN_POINTER(NULL);
-}
-
-Datum MicroPartitionStatsOutput(PG_FUNCTION_ARGS) {
- struct varlena *v = PG_GETARG_VARLENA_PP(0);
- pax::stats::MicroPartitionStatisticsInfo stats;
- StringInfoData str;
-
- bool ok = stats.ParseFromArray(VARDATA_ANY(v), VARSIZE_ANY_EXHDR(v));
- if (!ok) ereport(ERROR, (errmsg("micropartition stats is corrupt")));
-
- initStringInfo(&str);
- for (int i = 0, n = stats.columnstats_size(); i < n; i++) {
- const auto &column = stats.columnstats(i);
-
- if (i > 0) appendStringInfoChar(&str, ',');
-
- appendStringInfo(&str, "[(%s,%s)", BoolToString(column.allnull()),
- BoolToString(column.hasnull()));
-
- if (!column.has_minmaxstats()) {
- appendStringInfoString(&str, ",None]");
- continue;
- }
-
- const auto &minmax = column.minmaxstats();
- appendStringInfo(&str, ",(%u,%u,%u,%u,%s,%s)]", minmax.typid(),
- minmax.collation(), minmax.proclt(),
- minmax.procgt(),
- TypeValueToCString(minmax.typid(), minmax.collation(),
- minmax.minimal()),
- TypeValueToCString(minmax.typid(), minmax.collation(),
- minmax.maximum()));
- }
-
- PG_RETURN_CSTRING(str.data);
-}
diff --git a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h
deleted file mode 100644
index 56be8e8cddf..00000000000
--- a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-#include "comm/cbdb_api.h"
-
-#include
-#include
-#include
-
-namespace pax {
-namespace stats {
-class MicroPartitionStatisticsInfo;
-}
-
-class MicroPartitionStats final {
- public:
- MicroPartitionStats() = default;
- MicroPartitionStats *SetStatsMessage(
- pax::stats::MicroPartitionStatisticsInfo *stats, int natts);
-
- void AddRow(TupleTableSlot *slot);
-
- static std::string ToValue(Datum datum, int typlen, bool typbyval);
- static Datum FromValue(const std::string &s, int typlen, bool typbyval, bool *ok);
-
- private:
- void AddNullColumn(int column_index);
- void AddNonNullColumn(int column_index, Datum value, TupleDesc desc);
- void DoInitialCheck(TupleDesc desc);
- void UpdateMinMaxValue(int column_index, Datum datum, Oid collation,
- int typlen, bool typbyval);
- static bool GetStrategyProcinfo(Oid typid, std::tuple &procids,
- std::pair &finfos);
-
- // stats_: only references the info object by pointer
- pax::stats::MicroPartitionStatisticsInfo *stats_ = nullptr;
-
- // less: tuple[0], greater: tuple[1], le: tuple[2], ge: tuple[3]
- std::vector> procs_;
- // less: pair[0], greater: pair[1]
- std::vector> finfos_;
-
- // status to indicate whether the oids are initialized
- // or the min-max values are initialized
- // 'u': all is uninitialized
- // 'x': column doesn't support min-max
- // 'n': oids are initialized, but min-max value is missing
- // 'y': min-max is set, needs update.
- std::vector status_;
- bool initial_check_ = false;
-};
-
-} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc
index 906d8d9fd22..44d97f25694 100644
--- a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc
+++ b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc
@@ -3,14 +3,14 @@
#include "comm/cbdb_api.h"
#include
-
#include
+#include "catalog/pax_fastsequence.h"
+#include "catalog/pg_pax_tables.h"
#include "comm/cbdb_wrappers.h"
#include "storage/file_system.h"
#include "storage/local_file_system.h"
#include "storage/micro_partition_metadata.h"
-#include "storage/paxc_block_map_manager.h"
namespace paxc {
@@ -46,26 +46,20 @@ static void CPaxTransactionalTruncateTable(Oid aux_relid) {
// 2.create table outside transactional block, insert data
// and truncate table inside transactional block.
static void CPaxNontransactionalTruncateTable(Relation rel) {
- HeapTuple tuple;
Relation aux_rel;
Oid aux_relid;
- tuple = SearchSysCache1(PAXTABLESID, RelationGetRelid(rel));
- if (!HeapTupleIsValid(tuple))
- ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA),
- errmsg("cache lookup failed with relid=%u for aux relation "
- "in pg_pax_tables.",
- RelationGetRelid(rel))));
- aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tuple))->blocksrelid;
- ReleaseSysCache(tuple);
+ aux_relid = ::paxc::GetPaxAuxRelid(RelationGetRelid(rel));
Assert(OidIsValid(aux_relid));
aux_rel = relation_open(aux_relid, AccessExclusiveLock);
heap_truncate_one_rel(aux_rel);
relation_close(aux_rel, NoLock);
+
+ paxc::CPaxInitializeFastSequenceEntry(RelationGetRelid(rel), FASTSEQUENCE_INIT_TYPE_INPLACE);
}
-static void CPaxCreateMicroPartitionTable(const Relation rel) {
+void CPaxCreateMicroPartitionTable(Relation rel) {
Relation pg_class_desc;
char aux_relname[32];
Oid relid;
@@ -79,7 +73,7 @@ static void CPaxCreateMicroPartitionTable(const Relation rel) {
// 1. create blocks table.
snprintf(aux_relname, sizeof(aux_relname), "pg_pax_blocks_%u", pax_relid);
- aux_namespace_id = PG_PAXAUX_NAMESPACE;
+ aux_namespace_id = PG_EXTAUX_NAMESPACE;
aux_relid = GetNewOidForRelation(pg_class_desc, ClassOidIndexId,
Anum_pg_class_oid, // new line
aux_relname, aux_namespace_id);
@@ -91,12 +85,18 @@ static void CPaxCreateMicroPartitionTable(const Relation rel) {
// TODO(chenhongjie): uncompressed and compressed ptblocksize are needed.
TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE,
"ptblocksize", INT4OID, -1, 0);
- TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS,
+ TupleDescInitEntry(tupdesc,
+ (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS,
"ptstatistics", PAX_AUX_STATS_TYPE_OID, -1, 0);
+ {
+ // Add constraints for the aux table
+ auto attr = TupleDescAttr(tupdesc, ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1);
+ attr->attnotnull = true;
+ }
relid = heap_create_with_catalog(
aux_relname, aux_namespace_id, InvalidOid, aux_relid, InvalidOid,
InvalidOid, rel->rd_rel->relowner, HEAP_TABLE_AM_OID, tupdesc, NIL,
- RELKIND_RELATION, rel->rd_rel->relpersistence, rel->rd_rel->relisshared,
+ RELKIND_RELATION, RELPERSISTENCE_PERMANENT, rel->rd_rel->relisshared,
RelationIsMapped(rel), ONCOMMIT_NOOP, NULL, /* GP Policy */
(Datum)0, false, /* use _user_acl */
true, true, InvalidOid, NULL, /* typeaddress */
@@ -104,8 +104,10 @@ static void CPaxCreateMicroPartitionTable(const Relation rel) {
Assert(relid == aux_relid);
table_close(pg_class_desc, NoLock);
+ NewRelationCreateToastTable(relid, (Datum)0);
+
// 2. insert entry into pg_pax_tables.
- InsertPaxTablesEntry(pax_relid, aux_relid, "", 0);
+ ::paxc::InsertPaxTablesEntry(pax_relid, aux_relid, NULL);
// 3. record pg_depend, pg_pax_blocks_ depends relation.
{
@@ -123,84 +125,115 @@ static void CPaxCreateMicroPartitionTable(const Relation rel) {
base.classId = RelationRelationId;
base.objectId = pax_relid;
base.objectSubId = 0;
- aux.classId = PaxTablesRelationId;
+ aux.classId = PAX_TABLES_RELATION_ID;
aux.objectId = pax_relid;
aux.objectSubId = 0;
recordDependencyOn(&aux, &base, DEPENDENCY_INTERNAL);
}
-}
-
-static void CPaxDeletePaxBlockEntry(Oid relid, Snapshot pax_meta_data_snapshot,
- const char *blockname) {
- Relation rel;
- ScanKeyData key[1];
- SysScanDesc scan;
- HeapTuple tuple;
- NameData ptblockname;
+ CommandCounterIncrement();
- rel = table_open(relid, RowExclusiveLock);
- namestrcpy(&ptblockname, blockname);
- ScanKeyInit(&key[0], ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME,
- BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(&ptblockname));
-
- // should add snapshot support
- scan = systable_beginscan(rel, InvalidOid, false, pax_meta_data_snapshot, 1,
- key);
-
- tuple = systable_getnext(scan);
- if (HeapTupleIsValid(tuple)) {
- CatalogTupleDelete(rel, &tuple->t_self);
+ // 4. create index on ptblockname dynamically, the index name should be pg_paxaux.pg_pax_blocks_index_xxx.
+ {
+ char aux_index_name[NAMEDATALEN];
+ IndexInfo *indexInfo;
+ List *indexColNames;
+ Relation aux_rel;
+ int16 coloptions[1];
+ Oid classObjectId[1];
+ Oid collationObjectId[1];
+
+ snprintf(aux_index_name, sizeof(aux_index_name), "%s_idx", aux_relname);
+
+ indexInfo = makeNode(IndexInfo);
+ indexInfo->ii_NumIndexAttrs = 1;
+ indexInfo->ii_NumIndexKeyAttrs = 1;
+ indexInfo->ii_IndexAttrNumbers[0] = ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME;
+ indexInfo->ii_Expressions = NIL;
+ indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_Predicate = NIL;
+ indexInfo->ii_PredicateState = NULL;
+ indexInfo->ii_Unique = true;
+ indexInfo->ii_ReadyForInserts = true;
+ indexInfo->ii_Concurrent = false;
+ indexInfo->ii_Am = BTREE_AM_OID;
+ indexInfo->ii_Context = CurrentMemoryContext;
+
+ collationObjectId[0] = C_COLLATION_OID;
+ classObjectId[0] = GetDefaultOpClass(NAMEOID, BTREE_AM_OID);
+ coloptions[0] = 0;
+
+ auto attr = TupleDescAttr(tupdesc, ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1);
+ indexColNames = list_make1(NameStr(attr->attname));
+
+ // ShareLock is not really needed here, but take it anyway.
+ aux_rel = table_open(aux_relid, ShareLock);
+
+ index_create(aux_rel,
+ aux_index_name,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid,
+ indexInfo,
+ indexColNames,
+ BTREE_AM_OID,
+ rel->rd_rel->reltablespace,
+ collationObjectId, classObjectId, coloptions, (Datum) 0,
+ INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL);
+
+ // Unlock target table -- no one can see it
+ table_close(aux_rel, ShareLock);
+
+ // Unlock the index -- no one can see it anyway
+ //UnlockRelationOid(paxauxiliary_idxid, AccessExclusiveLock);
+
+ CommandCounterIncrement();
}
- systable_endscan(scan);
- table_close(rel, RowExclusiveLock);
}
-static void CPaxCopyPaxBlockEntry(Relation old_relation, Relation new_relation) {
+void DeleteMicroPartitionEntry(Oid pax_relid, Snapshot snapshot,
+ const char *blockname) {
+ ScanAuxContext context;
HeapTuple tuple;
- SysScanDesc pax_scan;
- Relation old_aux_rel, new_aux_rel;
- Oid old_aux_relid = 0, new_aux_relid = 0;
+ Oid aux_relid;
- HeapTuple tupcache;
- tupcache = SearchSysCache1(PAXTABLESID, RelationGetRelid(old_relation));
- Assert(HeapTupleIsValid(tupcache));
- old_aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid;
- ReleaseSysCache(tupcache);
+ aux_relid = ::paxc::GetPaxAuxRelid(pax_relid);
- tupcache = SearchSysCache1(PAXTABLESID, RelationGetRelid(new_relation));
- Assert(HeapTupleIsValid(tupcache));
- new_aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid;
- ReleaseSysCache(tupcache);
+ context.BeginSearchMicroPartition(aux_relid, InvalidOid, snapshot, RowExclusiveLock, blockname);
+ tuple = context.SearchMicroPartitionEntry();
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "delete micro partition \"%s\" failed for relation(%u)", blockname, pax_relid);
- old_aux_rel = table_open(old_aux_relid, RowExclusiveLock);
- new_aux_rel = table_open(new_aux_relid, RowExclusiveLock);
+ Assert(context.GetRelation());
+ CatalogTupleDelete(context.GetRelation(), &tuple->t_self);
- pax_scan = systable_beginscan(old_aux_rel, InvalidOid, false,
- NULL, 0, NULL);
- while ((tuple = systable_getnext(pax_scan)) != NULL) {
- CatalogTupleInsert(new_aux_rel, tuple);
- }
- systable_endscan(pax_scan);
- table_close(old_aux_rel, RowExclusiveLock);
- table_close(new_aux_rel, RowExclusiveLock);
+ context.EndSearchMicroPartition(NoLock);
}
-} // namespace paxc
+void InsertMicroPartitionPlaceHolder(Oid aux_relid, const char *blockname) {
+ NameData ptblockname;
+ Datum values[NATTS_PG_PAX_BLOCK_TABLES];
+ bool nulls[NATTS_PG_PAX_BLOCK_TABLES];
-namespace cbdb {
-Oid GetPaxAuxRelid(Oid relid) {
- Oid aux_relid = InvalidOid;
- CBDB_WRAP_START;
- {
- GetPaxTablesEntryAttributes(relid, &aux_relid, NULL, NULL);
- return aux_relid;
- }
- CBDB_WRAP_END;
+ Assert(blockname && strlen(blockname) < NAMEDATALEN);
+ namestrcpy(&ptblockname, blockname);
+
+ values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = NameGetDatum(&ptblockname);
+ nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = false;
+
+ nulls[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = true;
+ nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] = true;
+ nulls[ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS - 1] = true;
+
+ InsertTuple(aux_relid, values, nulls);
+ CommandCounterIncrement();
}
-static void InsertPaxBlockEntry(Oid relid, const char *blockname, int pttupcount,
- int ptblocksize, const ::pax::stats::MicroPartitionStatisticsInfo &mp_stats) {
+void InsertOrUpdateMicroPartitionPlaceHolder(Oid aux_relid,
+ const char *blockname,
+ int num_tuples, int file_size,
+ const ::pax::stats::MicroPartitionStatisticsInfo &mp_stats) {
int stats_length = mp_stats.ByteSize();
uint32 len = VARHDRSZ + stats_length;
void *output;
@@ -209,58 +242,221 @@ static void InsertPaxBlockEntry(Oid relid, const char *blockname, int pttupcount
Datum values[NATTS_PG_PAX_BLOCK_TABLES];
bool nulls[NATTS_PG_PAX_BLOCK_TABLES];
- output = cbdb::Palloc(len);
+ output = palloc(len);
SET_VARSIZE(output, len);
mp_stats.SerializeToArray(VARDATA(output), stats_length);
Assert(blockname);
namestrcpy(&ptblockname, blockname);
- values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] =
- NameGetDatum(&ptblockname);
+ values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = NameGetDatum(&ptblockname);
nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = false;
- values[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = Int32GetDatum(pttupcount);
+ values[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = Int32GetDatum(num_tuples);
nulls[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = false;
- values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] =
- Int32GetDatum(ptblocksize);
+ values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] = Int32GetDatum(file_size);
nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] = false;
- // Serialize catalog statitics information into PG bytea format and saved in aux table ptstatitics column.
+ // Serialize catalog statitics information into PG bytea format and saved in
+ // aux table ptstatitics column.
values[ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS - 1] = PointerGetDatum(output);
nulls[ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS - 1] = false;
- CBDB_WRAP_START;
- {
- paxc::InsertTuple(relid, values, nulls);
+ ScanAuxContext context;
+ context.BeginSearchMicroPartition(aux_relid, InvalidOid, NULL, RowExclusiveLock, blockname);
+ auto aux_rel = context.GetRelation();
+ auto oldtuple = context.SearchMicroPartitionEntry();
+ if (!HeapTupleIsValid(oldtuple))
+ elog(ERROR, "micro partition doesn't exist before inserting tuples");
+
+ if (num_tuples > 0) {
+ auto newtuple = heap_form_tuple(RelationGetDescr(aux_rel), values, nulls);
+
+ newtuple->t_data->t_ctid = oldtuple->t_data->t_ctid;
+ newtuple->t_self = oldtuple->t_self;
+ newtuple->t_tableOid = oldtuple->t_tableOid;
+ CatalogTupleUpdate(aux_rel, &newtuple->t_self, newtuple);
+ heap_freetuple(newtuple);
+ } else {
+ CatalogTupleDelete(aux_rel, &oldtuple->t_self);
+ }
+ context.EndSearchMicroPartition(NoLock);
+
+ pfree(output);
+
+ CommandCounterIncrement();
+}
+
+Oid FindAuxIndexOid(Oid aux_relid, Snapshot snapshot) {
+ ScanKeyData scankey[1];
+ Relation indrel;
+ SysScanDesc scan;
+ HeapTuple tuple;
+ Oid index_oid;
+ int index_count = 0;
+
+ ScanKeyInit(&scankey[0], Anum_pg_index_indrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(aux_relid));
+ indrel = table_open(IndexRelationId, AccessShareLock);
+ scan = systable_beginscan(indrel, IndexIndrelidIndexId, true, snapshot, 1, scankey);
+
+ index_oid = InvalidOid;
+ while (HeapTupleIsValid(tuple = systable_getnext(scan))) {
+ auto index = (Form_pg_index) GETSTRUCT(tuple);
+ index_count++;
+ if (!index->indislive || !index->indisvalid) continue;
+ index_oid = index->indexrelid;
+ }
+ systable_endscan(scan);
+ table_close(indrel, NoLock);
+
+ if (index_count != 1 || !OidIsValid(index_oid))
+ elog(ERROR, "unexpected number of index of aux table: %d", index_count);
+
+ return index_oid;
+}
+
+static inline Oid GetAuxIndexOid(Oid aux_relid, Oid *aux_index_relid, Snapshot snapshot) {
+ if (aux_index_relid) {
+ if (OidIsValid(*aux_index_relid))
+ return *aux_index_relid;
+ else
+ return *aux_index_relid = FindAuxIndexOid(aux_relid, snapshot);
+ } else {
+ return FindAuxIndexOid(aux_relid, snapshot);
+ }
+}
+
+void ScanAuxContext::BeginSearchMicroPartition(Oid aux_relid, Oid aux_index_relid, Snapshot snapshot, LOCKMODE lockmode, const char *blockname) {
+ Assert(aux_relid);
+ if (!OidIsValid(aux_index_relid) && blockname)
+ aux_index_relid = FindAuxIndexOid(aux_relid, snapshot);
+
+ aux_rel_ = table_open(aux_relid, lockmode);
+ if (blockname) {
+ ScanKeyData scankey[1];
+
+ ScanKeyInit(&scankey[0], ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(blockname));
+ scan_ = systable_beginscan(aux_rel_, aux_index_relid, true, snapshot, 1, scankey);
+ } else {
+ scan_ = systable_beginscan(aux_rel_, aux_index_relid, false, snapshot, 0, nullptr);
+ }
+}
+
+HeapTuple ScanAuxContext::SearchMicroPartitionEntry() {
+ Assert(aux_rel_ && scan_);
+ return systable_getnext(scan_);
+}
+
+void ScanAuxContext::EndSearchMicroPartition(LOCKMODE lockmode) {
+ Assert(aux_rel_ && scan_);
+
+ systable_endscan(scan_);
+ table_close(aux_rel_, lockmode);
+ scan_ = nullptr;
+ aux_rel_ = nullptr;
+}
+
+void PaxAuxRelationSetNewFilenode(Oid aux_relid) {
+ Relation aux_rel;
+ Oid toastrelid;
+ ReindexParams reindex_params = {0};
+
+ aux_rel = relation_open(aux_relid, AccessExclusiveLock);
+ RelationSetNewRelfilenode(aux_rel, aux_rel->rd_rel->relpersistence);
+ toastrelid = aux_rel->rd_rel->reltoastrelid;
+ if (OidIsValid(toastrelid)) {
+ Relation toast_rel;
+ toast_rel = relation_open(toastrelid, AccessExclusiveLock);
+ RelationSetNewRelfilenode(toast_rel, toast_rel->rd_rel->relpersistence);
+ relation_close(toast_rel, NoLock);
}
+ if (aux_rel->rd_rel->relhasindex)
+ reindex_relation(aux_relid, REINDEX_REL_PROCESS_TOAST, &reindex_params);
+ pgstat_count_truncate(aux_rel);
+ relation_close(aux_rel, NoLock);
+}
+
+bool IsMicroPartitionVisible(Relation pax_rel, BlockNumber block, Snapshot snapshot) {
+ struct ScanAuxContext context;
+ HeapTuple tuple;
+ Oid aux_relid;
+ char block_name[NAMEDATALEN];
+ bool ok;
+
+ aux_relid = ::paxc::GetPaxAuxRelid(RelationGetRelid(pax_rel));
+ snprintf(block_name, sizeof(block_name), "%u", block);
+
+ context.BeginSearchMicroPartition(aux_relid, InvalidOid, snapshot, AccessShareLock, block_name);
+ tuple = context.SearchMicroPartitionEntry();
+ ok = HeapTupleIsValid(tuple);
+ context.EndSearchMicroPartition(NoLock);
+
+ return ok;
+}
+
+static void CPaxCopyPaxBlockEntry(Relation old_relation,
+ Relation new_relation) {
+ HeapTuple tuple;
+ SysScanDesc pax_scan;
+ Relation old_aux_rel, new_aux_rel;
+ Oid old_aux_relid = 0, new_aux_relid = 0;
+
+ old_aux_relid = ::paxc::GetPaxAuxRelid(RelationGetRelid(old_relation));
+ new_aux_relid = ::paxc::GetPaxAuxRelid(RelationGetRelid(new_relation));
+ old_aux_rel = table_open(old_aux_relid, RowExclusiveLock);
+ new_aux_rel = table_open(new_aux_relid, RowExclusiveLock);
+
+ pax_scan = systable_beginscan(old_aux_rel, InvalidOid, false, NULL, 0, NULL);
+ while ((tuple = systable_getnext(pax_scan)) != NULL) {
+ CatalogTupleInsert(new_aux_rel, tuple);
+ }
+ systable_endscan(pax_scan);
+ table_close(old_aux_rel, RowExclusiveLock);
+ table_close(new_aux_rel, RowExclusiveLock);
+}
+
+} // namespace paxc
+
+namespace cbdb {
+Oid GetPaxAuxRelid(Oid relid) {
+ CBDB_WRAP_START;
+ { return ::paxc::GetPaxAuxRelid(relid); }
CBDB_WRAP_END;
+}
- cbdb::Pfree(output);
+void DeleteMicroPartitionEntry(Oid pax_relid, Snapshot snapshot,
+ const std::string &blockname) {
+ CBDB_WRAP_START;
+ { paxc::DeleteMicroPartitionEntry(pax_relid, snapshot, blockname.c_str()); }
+ CBDB_WRAP_END;
}
-static void DeletePaxBlockEntry(Oid relid, Snapshot snapshot,
- const char *blockname) {
+void InsertMicroPartitionPlaceHolder(Oid pax_relid, const std::string &blockname) {
CBDB_WRAP_START;
{
- paxc::CPaxDeletePaxBlockEntry(relid, snapshot, blockname);
+ Oid aux_relid;
+
+ aux_relid = ::paxc::GetPaxAuxRelid(pax_relid);
+ paxc::InsertMicroPartitionPlaceHolder(aux_relid, blockname.c_str());
}
CBDB_WRAP_END;
}
+void InsertOrUpdateMicroPartitionEntry(const pax::WriteSummary &summary) {
+ CBDB_WRAP_START;
+ {
+ Oid aux_relid;
-void DeleteMicroPartitionEntry(Oid pax_relid,
- Snapshot snapshot,
- const std::string &block_id) {
- Oid aux_relid = GetPaxAuxRelid(pax_relid);
- cbdb::DeletePaxBlockEntry(aux_relid, snapshot,
- block_id.c_str());
+ aux_relid = ::paxc::GetPaxAuxRelid(summary.rel_oid);
+ paxc::InsertOrUpdateMicroPartitionPlaceHolder(aux_relid, summary.block_id.c_str(),
+ summary.num_tuples, summary.file_size, summary.mp_stats);
+ }
+ CBDB_WRAP_END;
}
-void AddMicroPartitionEntry(const pax::WriteSummary &summary) {
- Oid aux_relid;
- aux_relid = GetPaxAuxRelid(summary.rel_oid);
- cbdb::InsertPaxBlockEntry(aux_relid, summary.block_id.c_str(),
- summary.num_tuples, summary.file_size, summary.mp_stats);
+bool IsMicroPartitionVisible(Relation pax_rel, BlockNumber block, Snapshot snapshot) {
+ CBDB_WRAP_START;
+ { return paxc::IsMicroPartitionVisible(pax_rel, block, snapshot); }
+ CBDB_WRAP_END;
}
static void PaxTransactionalTruncateTable(Oid aux_relid) {
@@ -289,31 +485,6 @@ static void PaxCopyPaxBlockEntry(Relation old_relation, Relation new_relation) {
} // namespace cbdb
namespace pax {
-void CCPaxAuxTable::PaxAuxRelationSetNewFilenode(Relation rel,
- const RelFileNode *newrnode,
- char persistence) {
- HeapTuple tupcache;
- std::string path;
- FileSystem *fs = pax::Singleton::GetInstance();
-
- tupcache = cbdb::SearchSysCache(rel, PAXTABLESID);
- if (cbdb::TupleIsValid(tupcache)) {
- Oid aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid;
- cbdb::PaxTransactionalTruncateTable(aux_relid);
- cbdb::ReleaseTupleCache(tupcache);
- } else {
- // create pg_pax_blocks_
- cbdb::PaxCreateMicroPartitionTable(rel);
- }
-
- // Create pax table relfilenode file and database directory under path base/,
- // The relfilenode created here is to be compatible with PG normal process
- // logic instead of being used by pax storage.
- cbdb::RelationCreateStorageDirectory(*newrnode, persistence, SMGR_MD, rel);
- path = cbdb::BuildPaxDirectoryPath(*newrnode, rel->rd_backend);
- Assert(!path.empty());
- CBDB_CHECK((fs->CreateDirectory(path) == 0), cbdb::CException::ExType::kExTypeIOError);
-}
void CCPaxAuxTable::PaxAuxRelationNontransactionalTruncate(Relation rel) {
cbdb::PaxNontransactionalTruncateTable(rel);
@@ -337,43 +508,52 @@ void CCPaxAuxTable::PaxAuxRelationCopyData(Relation rel,
src_path = cbdb::BuildPaxDirectoryPath(rel->rd_node, rel->rd_backend);
Assert(!src_path.empty());
- // get micropatition file source folder filename list for copying.
- filelist = fs->ListDirectory(src_path);
- if (filelist.empty()) return;
-
dst_path = cbdb::BuildPaxDirectoryPath(*newrnode, rel->rd_backend);
Assert(!dst_path.empty());
if (src_path.empty() || dst_path.empty())
CBDB_RAISE(cbdb::CException::ExType::kExTypeFileOperationError);
- // createnewpath is used to indicate if creating destination micropartition file directory and storage file for copying or not.
- // 1. For RelationCopyData case, createnewpath should be set as true to explicitly create a new destination directory under
+ // createnewpath is used to indicate if creating destination micropartition
+ // file directory and storage file for copying or not.
+ // 1. For RelationCopyData case, createnewpath should be set as true to
+ // explicitly create a new destination directory under
// new tablespace path pg_tblspc/.
- // 2. For RelationCopyDataForCluster case, createnewpath should be set as false cause the destination directory was already
- // created with a new temp table by previously calling PaxAuxRelationSetNewFilenode.
+ // 2. For RelationCopyDataForCluster case, createnewpath should be set as
+ // false cause the destination directory was already
+ // created with a new temp table by previously calling
+ // PaxAuxRelationSetNewFilenode.
if (createnewpath) {
// create pg_pax_table relfilenode file and dbid directory.
cbdb::RelationCreateStorageDirectory(*newrnode, rel->rd_rel->relpersistence,
- SMGR_MD, rel);
+ SMGR_MD, rel);
// create micropartition file destination folder for copying.
- CBDB_CHECK((fs->CreateDirectory(dst_path) == 0), cbdb::CException::ExType::kExTypeIOError);
+ CBDB_CHECK((fs->CreateDirectory(dst_path) == 0),
+ cbdb::CException::ExType::kExTypeIOError);
}
+ // Get micropatition file source folder filename list for copying, if file
+ // list is empty then skip copying file directly.
+ filelist = fs->ListDirectory(src_path);
+ if (filelist.empty()) return;
+
for (auto &iter : filelist) {
Assert(!iter.empty());
- src_path.append("/");
- src_path.append(iter);
- dst_path.append("/");
- dst_path.append(iter);
- fs->CopyFile(src_path, dst_path);
+ std::string src_file = src_path;
+ std::string dst_file = dst_path;
+ src_file.append("/");
+ src_file.append(iter);
+ dst_file.append("/");
+ dst_file.append(iter);
+ fs->CopyFile(src_file, dst_file);
}
// TODO(Tony) : here need to implement pending delete srcPath after set new
// tablespace.
}
-void CCPaxAuxTable::PaxAuxRelationCopyDataForCluster(Relation old_rel, Relation new_rel) {
+void CCPaxAuxTable::PaxAuxRelationCopyDataForCluster(Relation old_rel,
+ Relation new_rel) {
PaxAuxRelationCopyData(old_rel, &new_rel->rd_node, false);
cbdb::PaxCopyPaxBlockEntry(old_rel, new_rel);
// TODO(Tony) : here need to implement PAX re-organize semantics logic.
@@ -388,5 +568,5 @@ void CCPaxAuxTable::PaxAuxRelationFileUnlink(RelFileNode node,
relpath = cbdb::BuildPaxDirectoryPath(node, backend);
fs->DeleteDirectory(relpath, delete_topleveldir);
}
-} // namespace pax
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h
index 7d79f36f863..3d39ef67e4d 100644
--- a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h
+++ b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h
@@ -1,10 +1,9 @@
#pragma once
-#include "catalog/pax_aux_table.h"
-
#include "comm/cbdb_api.h"
#include
+#include "catalog/pax_aux_table.h"
#include "storage/micro_partition_metadata.h"
#define ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME 1
@@ -13,6 +12,37 @@
#define ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS 4
#define NATTS_PG_PAX_BLOCK_TABLES 4
+namespace paxc {
+void CPaxCreateMicroPartitionTable(Relation rel);
+
+Oid FindAuxIndexOid(Oid aux_relid, Snapshot snapshot);
+
+void InsertMicroPartitionPlaceHolder(Oid aux_relid, const char *blockname);
+void DeleteMicroPartitionEntry(Oid pax_relid, Snapshot snapshot, const char *blockname);
+// Scan aux table
+// seqscan: MicroPartitionInfoIterator
+// index scan
+struct ScanAuxContext {
+ public:
+ void BeginSearchMicroPartition(Oid aux_relid, Oid aux_index_relid,
+ Snapshot snapshot, LOCKMODE lockmode, const char *blockname);
+ void BeginSearchMicroPartition(Oid aux_relid, Snapshot snapshot, LOCKMODE lockmode) {
+ BeginSearchMicroPartition(aux_relid, InvalidOid, snapshot, lockmode, nullptr);
+ }
+ HeapTuple SearchMicroPartitionEntry();
+ void EndSearchMicroPartition(LOCKMODE lockmode);
+
+ Relation GetRelation() { return aux_rel_; }
+
+ private:
+ Relation aux_rel_ = nullptr;
+ SysScanDesc scan_ = nullptr;
+};
+
+void PaxAuxRelationSetNewFilenode(Oid aux_relid);
+bool IsMicroPartitionVisible(Relation pax_rel, BlockNumber block, Snapshot snapshot);
+}
+
namespace pax {
class CCPaxAuxTable final {
public:
@@ -25,26 +55,27 @@ class CCPaxAuxTable final {
static void PaxAuxRelationNontransactionalTruncate(Relation rel);
- static void PaxAuxRelationCopyData(Relation rel,
- const RelFileNode *newrnode,
+ static void PaxAuxRelationCopyData(Relation rel, const RelFileNode *newrnode,
bool createnewpath = true);
- static void PaxAuxRelationCopyDataForCluster(Relation old_rel, Relation new_rel);
+ static void PaxAuxRelationCopyDataForCluster(Relation old_rel,
+ Relation new_rel);
static void PaxAuxRelationFileUnlink(RelFileNode node, BackendId backend,
bool delete_topleveldir);
};
+
} // namespace pax
namespace cbdb {
Oid GetPaxAuxRelid(Oid relid);
-void AddMicroPartitionEntry(const pax::WriteSummary &summary);
+void InsertMicroPartitionPlaceHolder(Oid pax_relid, const std::string &blockname);
+void InsertOrUpdateMicroPartitionEntry(const pax::WriteSummary &summary);
-void DeleteMicroPartitionEntry(Oid pax_relid,
- Snapshot snapshot,
- const std::string &block_id);
+void DeleteMicroPartitionEntry(Oid pax_relid, Snapshot snapshot,
+ const std::string &blockname);
+bool IsMicroPartitionVisible(Relation pax_rel, BlockNumber block, Snapshot snapshot);
} // namespace cbdb
-
diff --git a/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.cc b/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.cc
new file mode 100644
index 00000000000..a78cdf9c235
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.cc
@@ -0,0 +1,175 @@
+#include "catalog/pax_fastsequence.h"
+
+#include "comm/cbdb_api.h"
+
+namespace paxc {
+
+// Get the required objid Tuple from pg_pax_fastsequence system table.
+// objid indicates single pax micro-partition table oid.
+// lock_mode indicates the lock level used when retrive data from system table.
+static HeapTuple CPaxOpenFastSequenceTable(Oid objid,
+ Relation *pax_fastsequence_rel,
+ SysScanDesc *pax_fastsequece_scan,
+ LOCKMODE lock_mode) {
+ ScanKeyData scankey[1];
+ HeapTuple tuple;
+ Relation rel;
+ SysScanDesc scan;
+
+ rel = table_open(PAX_FASTSEQUENCE_OID, lock_mode);
+
+ /* SELECT * FROM paxaux.pg_pax_fastsequence WHERE objid = :1 FOR UPDATE */
+ ScanKeyInit(&scankey[0], ANUM_PG_PAX_FAST_SEQUENCE_OBJID,
+ BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(objid));
+
+ scan = systable_beginscan(rel, PAX_FASTSEQUENCE_INDEX_OID, true, NULL, 1,
+ scankey);
+
+ tuple = systable_getnext(scan);
+
+ *pax_fastsequence_rel = rel;
+ *pax_fastsequece_scan = scan;
+
+ return tuple;
+}
+
+static inline void CPaxCloseFastSequenceTable(Relation pax_fastsequence_rel,
+ SysScanDesc pax_fastsequece_scan,
+ LOCKMODE lock_mode) {
+ systable_endscan(pax_fastsequece_scan);
+ table_close(pax_fastsequence_rel, lock_mode);
+}
+
+// update the existing fast sequence number for (objid).
+// This tuple is updated with the new value. Otherwise, a new tuple is inserted
+// into the table.
+static void CPaxUpdateFastsequence(Relation pax_fastsequence_rel,
+ HeapTuple old_tuple, TupleDesc tuple_desc,
+ Oid objid, int32 new_seqno) {
+ HeapTuple new_tuple;
+ Datum values[NATTS_PG_PAX_FAST_SEQUENCE_TABLES];
+ bool nulls[NATTS_PG_PAX_FAST_SEQUENCE_TABLES];
+
+ // If such a tuple does not exist, insert a new one.
+ Assert(HeapTupleIsValid(old_tuple));
+
+ values[ANUM_PG_PAX_FAST_SEQUENCE_OBJID - 1] = ObjectIdGetDatum(objid);
+ values[ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE - 1] =
+ Int32GetDatum(new_seqno);
+ nulls[ANUM_PG_PAX_FAST_SEQUENCE_OBJID - 1] = false;
+ nulls[ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE - 1] = false;
+
+ new_tuple = heap_form_tuple(tuple_desc, values, nulls);
+ Assert(HeapTupleIsValid(new_tuple));
+
+ new_tuple->t_data->t_ctid = old_tuple->t_data->t_ctid;
+ new_tuple->t_self = old_tuple->t_self;
+
+ heap_inplace_update(pax_fastsequence_rel, new_tuple);
+ heap_freetuple(new_tuple);
+}
+
+// InitializeFastSequenceEntry is used to generate and keep track of allocated
+// micropartition file number. objid indicates single pax micro-partition table
+// oid. lastsequence indicates the current allocated file number by using
+// fastsequence allocation.
+void CPaxInitializeFastSequenceEntry(Oid objid, char init_type) {
+ Relation pax_fastsequence_rel;
+ SysScanDesc scan;
+ TupleDesc desc;
+ HeapTuple tuple;
+ HeapTuple new_tuple;
+ Datum values[NATTS_PG_PAX_FAST_SEQUENCE_TABLES];
+ bool nulls[NATTS_PG_PAX_FAST_SEQUENCE_TABLES];
+
+ Assert(init_type == FASTSEQUENCE_INIT_TYPE_CREATE ||
+ init_type == FASTSEQUENCE_INIT_TYPE_INPLACE ||
+ init_type == FASTSEQUENCE_INIT_TYPE_UPDATE);
+ // Initilize a new object id and use row-based exclusive lock to avoid
+ // concurrency issue.
+ tuple = CPaxOpenFastSequenceTable(objid, &pax_fastsequence_rel, &scan,
+ RowExclusiveLock);
+
+ desc = RelationGetDescr(pax_fastsequence_rel);
+ values[ANUM_PG_PAX_FAST_SEQUENCE_OBJID - 1] = ObjectIdGetDatum(objid);
+ values[ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE - 1] = Int32GetDatum(0);
+ nulls[ANUM_PG_PAX_FAST_SEQUENCE_OBJID - 1] = false;
+ nulls[ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE - 1] = false;
+ new_tuple = heap_form_tuple(desc, values, nulls);
+
+ if (init_type == FASTSEQUENCE_INIT_TYPE_CREATE) {
+ ObjectAddress base;
+ ObjectAddress aux;
+
+ if (HeapTupleIsValid(tuple))
+ elog(ERROR, "existing tuple in pg_pax_fastsequence when creating pax table");
+
+ CatalogTupleInsert(pax_fastsequence_rel, new_tuple);
+
+ base.classId = RelationRelationId;
+ base.objectId = objid;
+ base.objectSubId = 0;
+ aux.classId = PAX_FASTSEQUENCE_OID;
+ aux.objectId = objid;
+ aux.objectSubId = 0;
+ recordDependencyOn(&aux, &base, DEPENDENCY_INTERNAL);
+ } else {
+ // exists, set to 0 in-place, or update
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "no tuple found in pg_pax_fastsequence for existing pax table");
+
+ new_tuple->t_data->t_ctid = tuple->t_data->t_ctid;
+ new_tuple->t_self = tuple->t_self;
+ if (init_type == FASTSEQUENCE_INIT_TYPE_INPLACE)
+ heap_inplace_update(pax_fastsequence_rel, new_tuple);
+ else if (init_type == FASTSEQUENCE_INIT_TYPE_UPDATE)
+ CatalogTupleUpdate(pax_fastsequence_rel, &new_tuple->t_self, new_tuple);
+ }
+
+ heap_freetuple(new_tuple);
+ CPaxCloseFastSequenceTable(pax_fastsequence_rel, scan, RowExclusiveLock);
+}
+
+// GetFastSequences
+// Get consecutive sequence numbers, the returned sequence number is the
+// lastsequence + 1
+int32 CPaxGetFastSequences(Oid objid) {
+ Relation pax_fastsequence_rel = NULL;
+ SysScanDesc scan = NULL;
+ TupleDesc tuple_desc;
+ HeapTuple tuple;
+ Datum seqno_datum;
+ int32 seqno;
+ bool isnull = false;
+
+ // Increase and read sequence number base on objid and use row-based exclusive
+ // lock to avoid concurrency issue.
+ tuple = CPaxOpenFastSequenceTable(objid, &pax_fastsequence_rel, &scan,
+ RowExclusiveLock);
+
+ Assert(HeapTupleIsValid(tuple));
+
+ tuple_desc = RelationGetDescr(pax_fastsequence_rel);
+
+ seqno_datum = heap_getattr(tuple, ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE,
+ tuple_desc, &isnull);
+ if (isnull) {
+ ereport(
+ ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg(
+ "CPaxGetFastSequences got an invalid lastsequence number: NULL")));
+ }
+ seqno = DatumGetInt32(seqno_datum);
+ if (seqno < 0)
+ elog(ERROR, "sequence number out of range: %d", seqno);
+
+ CPaxUpdateFastsequence(pax_fastsequence_rel, tuple, tuple_desc, objid,
+ seqno + 1);
+
+ CPaxCloseFastSequenceTable(pax_fastsequence_rel, scan, RowExclusiveLock);
+
+ return seqno;
+}
+
+} // namespace paxc
diff --git a/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.h b/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.h
new file mode 100644
index 00000000000..b5269dfa3df
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/catalog/pax_fastsequence.h
@@ -0,0 +1,32 @@
+//-------------------------------------------------------------------------
+// Cloudberry Database
+// Copyright (c) 2023, HashData Technology Limited.
+// pax_fastsequence.h
+// provide a system table maintaining a light-weight fast sequence number for a
+// unique object.
+//
+// IDENTIFICATION
+// src/catalog/pax_fastsequence.h
+// Author: Tony Ying
+//--------------------------------------------------------------------------
+
+#pragma once
+#include "comm/cbdb_api.h"
+
+#define ANUM_PG_PAX_FAST_SEQUENCE_OBJID 1
+#define ANUM_PG_PAX_FAST_SEQUENCE_LASTSEQUENCE 2
+#define NATTS_PG_PAX_FAST_SEQUENCE_TABLES 2
+
+// CREATE: initialize seqno by INSERT, no tuple exists before
+// INPLACE: inplace update when grow the seqno or non-transactional truncate
+// UPDATE: transactional truncate, needs to preserve the old seqno
+// after rollback
+#define FASTSEQUENCE_INIT_TYPE_CREATE 'C'
+#define FASTSEQUENCE_INIT_TYPE_INPLACE 'I'
+#define FASTSEQUENCE_INIT_TYPE_UPDATE 'U'
+
+namespace paxc {
+void CPaxInitializeFastSequenceEntry(Oid objid, char init_type);
+int32 CPaxGetFastSequences(Oid objid);
+
+} // namespace paxc
diff --git a/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.cc b/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.cc
new file mode 100644
index 00000000000..c3d7b98f5c9
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.cc
@@ -0,0 +1,131 @@
+#include "catalog/pg_pax_tables.h"
+
+#include "comm/cbdb_api.h"
+
+namespace paxc {
+
+void InsertPaxTablesEntry(Oid relid, Oid blocksrelid, Node *partitionspec) {
+ Relation rel;
+ TupleDesc desc;
+ HeapTuple tuple;
+ bool nulls[NATTS_PG_PAX_TABLES];
+ Datum values[NATTS_PG_PAX_TABLES];
+
+ rel = table_open(PAX_TABLES_RELATION_ID, RowExclusiveLock);
+ desc = RelationGetDescr(rel);
+ Assert(desc->natts == NATTS_PG_PAX_TABLES);
+
+ values[ANUM_PG_PAX_TABLES_RELID - 1] = ObjectIdGetDatum(relid);
+ values[ANUM_PG_PAX_TABLES_AUXRELID - 1] = ObjectIdGetDatum(blocksrelid);
+ nulls[ANUM_PG_PAX_TABLES_RELID - 1] = false;
+ nulls[ANUM_PG_PAX_TABLES_AUXRELID - 1] = false;
+
+ if (partitionspec) {
+ values[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] =
+ CStringGetTextDatum(nodeToString(partitionspec));
+ nulls[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = false;
+ } else {
+ values[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = 0;
+ nulls[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = true;
+ }
+ tuple = heap_form_tuple(desc, values, nulls);
+
+ /* insert a new tuple */
+ CatalogTupleInsert(rel, tuple);
+
+ table_close(rel, NoLock);
+}
+
+void GetPaxTablesEntryAttributes(Oid relid, Oid *blocksrelid,
+ Node **partitionspec) {
+ Relation rel;
+ ScanKeyData key[1];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ bool isnull;
+
+ rel = table_open(PAX_TABLES_RELATION_ID, RowExclusiveLock);
+
+ ScanKeyInit(&key[0], ANUM_PG_PAX_TABLES_RELID, BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ scan = systable_beginscan(rel, PAX_TABLES_RELID_INDEX_ID, true, NULL, 1, key);
+ tuple = systable_getnext(scan);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("pax table relid \"%d\" does not exist in "
+ "pg_pax_tables",
+ relid)));
+
+ if (partitionspec) {
+ Datum v;
+ v = heap_getattr(tuple, ANUM_PG_PAX_TABLES_PARTITIONSPEC,
+ RelationGetDescr(rel), &isnull);
+ *partitionspec = NULL;
+ if (!isnull) {
+ char *str = TextDatumGetCString(v);
+ *partitionspec = (Node *)stringToNode(str);
+ pfree(str);
+ }
+ }
+
+ if (blocksrelid) {
+ *blocksrelid = heap_getattr(tuple, ANUM_PG_PAX_TABLES_AUXRELID,
+ RelationGetDescr(rel), &isnull);
+ if (isnull) ereport(ERROR, (errmsg("pg_pax_tables.auxrelid is null")));
+ }
+
+ /* Finish up scan and close pg_pax_tables catalog. */
+ systable_endscan(scan);
+ table_close(rel, NoLock);
+}
+
+void PaxInitializePartitionSpec(Relation paxrel, Node *part) {
+ Relation rel;
+ ScanKeyData key[1];
+ SysScanDesc scan;
+ HeapTuple oldtuple;
+ TupleDesc desc;
+ bool isnull;
+
+ Assert(paxrel->rd_rel->relkind == RELKIND_RELATION ||
+ paxrel->rd_rel->relkind == RELKIND_MATVIEW);
+ Assert(paxrel->rd_options);
+
+ rel = table_open(PAX_TABLES_RELATION_ID, RowExclusiveLock);
+ desc = RelationGetDescr(rel);
+ ScanKeyInit(&key[0], ANUM_PG_PAX_TABLES_RELID, BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(paxrel)));
+
+ scan = systable_beginscan(rel, PAX_TABLES_RELID_INDEX_ID, true, NULL, 1, key);
+ oldtuple = systable_getnext(scan);
+ if (!HeapTupleIsValid(oldtuple)) elog(ERROR, "only support pax tables");
+
+ (void)heap_getattr(oldtuple, ANUM_PG_PAX_TABLES_PARTITIONSPEC, desc, &isnull);
+ if (isnull) {
+ HeapTuple newtup;
+ Datum values[NATTS_PG_PAX_TABLES];
+ bool repl[NATTS_PG_PAX_TABLES];
+ bool isnull[NATTS_PG_PAX_TABLES];
+
+ memset(repl, false, sizeof(repl));
+ values[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] =
+ CStringGetTextDatum(nodeToString(part));
+ repl[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = true;
+ isnull[ANUM_PG_PAX_TABLES_PARTITIONSPEC - 1] = false;
+
+ newtup = heap_modify_tuple(oldtuple, desc, values, isnull, repl);
+ CatalogTupleUpdate(rel, &oldtuple->t_self, newtup);
+ heap_freetuple(newtup);
+
+ CommandCounterIncrement();
+ } else {
+ elog(ERROR, "existing pax table update partition spec?");
+ }
+
+ /* Finish up scan and close pg_pax_tables catalog. */
+ systable_endscan(scan);
+ table_close(rel, NoLock);
+}
+
+} // namespace paxc
diff --git a/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.h b/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.h
new file mode 100644
index 00000000000..f4d138b0a4b
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/catalog/pg_pax_tables.h
@@ -0,0 +1,24 @@
+#pragma once
+#include "comm/cbdb_api.h"
+
+#define NATTS_PG_PAX_TABLES 3
+#define ANUM_PG_PAX_TABLES_RELID 1
+#define ANUM_PG_PAX_TABLES_AUXRELID 2
+#define ANUM_PG_PAX_TABLES_PARTITIONSPEC 3
+
+namespace paxc {
+
+void InsertPaxTablesEntry(Oid relid, Oid blocksrelid, Node *partitionspec);
+
+void GetPaxTablesEntryAttributes(Oid relid, Oid *blocksrelid,
+ Node **partitionspec);
+
+void PaxInitializePartitionSpec(Relation paxrel, Node *part);
+
+static inline Oid GetPaxAuxRelid(Oid pax_relid) {
+ Oid aux_relid;
+ GetPaxTablesEntryAttributes(pax_relid, &aux_relid, nullptr);
+ return aux_relid;
+}
+
+} // namespace paxc
diff --git a/contrib/pax_storage/src/cpp/cmake/pax.cmake b/contrib/pax_storage/src/cpp/cmake/pax.cmake
new file mode 100644
index 00000000000..6c7ef92b60c
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/cmake/pax.cmake
@@ -0,0 +1,187 @@
+
+## generate_sql
+add_executable(generate_sql_script_program "${CMAKE_CURRENT_SOURCE_DIR}/../../tools/gen_sql.c")
+target_include_directories(generate_sql_script_program PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR})
+add_custom_command(OUTPUT generate_sql_file
+ COMMAND ${CMAKE_CURRENT_BINARY_DIR}/generate_sql_script_program > "${CMAKE_CURRENT_SOURCE_DIR}/../../pax-cdbinit--1.0.sql"
+ DEPENDS generate_sql_script_program
+ COMMENT "dynamically generate sql script file"
+)
+add_custom_target(create_sql_script DEPENDS generate_sql_script_program generate_sql_file)
+
+# bison
+bison_target(paxc_gram access/paxc_gram.y ${CMAKE_CURRENT_BINARY_DIR}/paxc_gram.c)
+
+
+set(pax_comm_src
+ comm/bitmap.cc
+ comm/guc.cc
+ comm/paxc_wrappers.cc
+ comm/pax_memory.cc
+ comm/cbdb_wrappers.cc)
+
+set(pax_exceptions_src
+ exceptions/CException.cc)
+
+set(pax_storage_src
+ storage/cache/pax_cache.cc
+ storage/cache/pax_plasma_cache.cc
+ storage/columns/pax_column_cache.cc
+ storage/columns/pax_column_traits.cc
+ storage/columns/pax_column.cc
+ storage/columns/pax_compress.cc
+ storage/columns/pax_columns.cc
+ storage/columns/pax_encoding_utils.cc
+ storage/columns/pax_encoding_non_fixed_column.cc
+ storage/columns/pax_encoding_column.cc
+ storage/columns/pax_decoding.cc
+ storage/columns/pax_encoding.cc
+ storage/columns/pax_rlev2_decoding.cc
+ storage/columns/pax_rlev2_encoding.cc
+ storage/columns/pax_vec_column.cc
+ storage/columns/pax_vec_encoding_column.cc
+ storage/oper/pax_oper.cc
+ storage/oper/pax_stats.cc
+ storage/file_system.cc
+ storage/local_file_system.cc
+ storage/micro_partition.cc
+ storage/micro_partition_file_factory.cc
+ storage/micro_partition_metadata.cc
+ storage/micro_partition_row_filter_reader.cc
+ storage/micro_partition_stats.cc
+ storage/orc/orc_format_reader.cc
+ storage/orc/orc_group.cc
+ storage/orc/orc_vec_group.cc
+ storage/orc/orc_reader.cc
+ storage/orc/orc_writer.cc
+ storage/pax_buffer.cc
+ storage/pax_filter.cc
+ storage/pax_itemptr.cc
+ storage/proto/protobuf_stream.cc
+ storage/pax.cc
+ storage/pax_table_partition_writer.cc
+ storage/strategy.cc
+ storage/micro_partition_iterator.cc
+ )
+
+
+set(pax_access_src
+ ${BISON_paxc_gram_OUTPUTS} # BISON output file
+ access/paxc_rel_options.cc
+ access/paxc_scanner.cc
+ access/pax_access_handle.cc
+ access/pax_deleter.cc
+ access/pax_dml_state.cc
+ access/pax_inserter.cc
+ access/pax_partition.cc
+ access/pax_updater.cc
+ access/pax_scanner.cc)
+
+set(pax_catalog_src
+ catalog/pax_aux_table.cc
+ catalog/pg_pax_tables.cc
+ catalog/pax_fastsequence.cc
+ )
+
+set(pax_vec_src
+ storage/vec/pax_vec_adapter.cc
+ storage/vec/pax_vec_reader.cc)
+
+
+#### pax.so
+set(pax_target_src ${PROTO_SRCS} ${pax_storage_src} ${pax_exceptions_src}
+ ${pax_access_src} ${pax_comm_src} ${pax_catalog_src} ${pax_vec_src})
+set(pax_target_include ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR})
+set(pax_target_link_libs protobuf zstd z postgres)
+set(pax_target_link_directories ${PROJECT_SOURCE_DIR}/../../src/backend/)
+set(pax_target_dependencies generate_protobuf create_sql_script)
+
+# enable plasma
+if (ENABLE_PLASMA)
+ set(pax_target_link_libs ${pax_target_link_libs} uuid plasma)
+endif()
+
+add_library(pax SHARED ${pax_target_src})
+set_target_properties(pax PROPERTIES OUTPUT_NAME pax)
+
+# vec build
+if (VEC_BUILD)
+ find_package(PkgConfig REQUIRED)
+ pkg_check_modules(GLIB REQUIRED glib-2.0)
+ set(pax_target_include
+ ${pax_target_include}
+ ${VEC_HOME}/src/include # for utils/tuptable_vec.h
+ ${VEC_HOME}/arrow/include # for arrow-glib/arrow-glib.h and otehr arrow interface
+ ${GLIB_INCLUDE_DIRS} # for glib-object.h
+ )
+ set(pax_target_link_directories
+ ${pax_target_link_directories}
+ ${VEC_HOME}/arrow/lib)
+ set(pax_target_link_libs
+ ${pax_target_link_libs}
+ arrow)
+endif(VEC_BUILD)
+
+target_include_directories(pax PUBLIC ${pax_target_include})
+target_link_directories(pax PUBLIC ${pax_target_link_directories})
+target_link_libraries(pax PUBLIC ${pax_target_link_libs})
+set_target_properties(pax PROPERTIES
+ BUILD_RPATH_USE_ORIGIN ON
+ BUILD_WITH_INSTALL_RPATH ON
+ INSTALL_RPATH "$ORIGIN:$ORIGIN/.."
+ LINK_FLAGS "-Wl,--enable-new-dtags"
+)
+
+add_dependencies(pax ${pax_target_dependencies})
+add_custom_command(TARGET pax POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E
+ copy_if_different $ ${CMAKE_CURRENT_SOURCE_DIR}/../../pax.so)
+
+if (BUILD_GTEST)
+ add_subdirectory(contrib/googletest)
+ ADD_DEFINITIONS(-DRUN_GTEST)
+ file(GLOB test_case_sources
+ pax_gtest_helper.cc
+ pax_gtest.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/*/*_test.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/*/*/*_test.cc)
+
+ add_executable(test_main ${pax_target_src} ${test_case_sources})
+ add_dependencies(test_main ${pax_target_dependencies} gtest gmock)
+ target_include_directories(test_main PUBLIC ${pax_target_include} ${CMAKE_CURRENT_SOURCE_DIR} ${gtest_SOURCE_DIR}/include contrib/cpp-stub/src/ contrib/cpp-stub/src_linux/)
+
+ target_link_directories(test_main PUBLIC ${pax_target_link_directories})
+ target_link_libraries(test_main PUBLIC ${pax_target_link_libs} gtest gmock postgres)
+endif(BUILD_GTEST)
+
+if(BUILD_GBENCH)
+ add_subdirectory(contrib/googlebench)
+ ADD_DEFINITIONS(-DRUN_GBENCH)
+ file(GLOB bench_sources
+ pax_gbench.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/*/*_bench.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/*/*/*_bench.cc)
+
+ add_executable(bench_main ${pax_target_src} ${bench_sources})
+ add_dependencies(bench_main ${pax_target_dependencies} gtest gmock)
+ target_include_directories(bench_main PUBLIC ${pax_target_include} ${CMAKE_CURRENT_SOURCE_DIR} contrib/googlebench/include contrib/cpp-stub/src/ contrib/cpp-stub/src_linux/)
+ link_directories(contrib/googlebench/src)
+ target_link_libraries(bench_main PUBLIC ${pax_target_link_libs} gtest gmock benchmark postgres)
+ if (VEC_BUILD)
+ target_link_libraries(bench_main PRIVATE arrow)
+ endif(VEC_BUILD)
+endif(BUILD_GBENCH)
+
+if (BUILD_TOOLS)
+ add_subdirectory(contrib/tabulate)
+ link_directories($ENV{GPHOME}/lib)
+
+ add_executable(pax_dump storage/tools/pax_dump.cpp storage/tools/pax_dump_reader.cpp)
+ target_include_directories(pax_dump PUBLIC ${pax_target_include} ${CMAKE_CURRENT_SOURCE_DIR} contrib/tabulate/include)
+ add_dependencies(pax_dump ${pax_target_dependencies})
+ target_link_libraries(pax_dump PUBLIC pax protobuf)
+endif(BUILD_TOOLS)
+
+## install dynamic libraray
+install(TARGETS pax
+ LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
\ No newline at end of file
diff --git a/contrib/pax_storage/src/cpp/cmake/pax_format.cmake b/contrib/pax_storage/src/cpp/cmake/pax_format.cmake
new file mode 100644
index 00000000000..55206384e91
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/cmake/pax_format.cmake
@@ -0,0 +1,93 @@
+# paxformat.so
+
+set(pax_comm_src
+ comm/bitmap.cc
+ comm/guc.cc
+ comm/paxc_wrappers.cc
+ comm/pax_memory.cc
+ comm/cbdb_wrappers.cc)
+
+set(pax_exceptions_src
+ exceptions/CException.cc)
+
+set(pax_storage_src
+ storage/cache/pax_cache.cc
+ storage/cache/pax_plasma_cache.cc
+ storage/columns/pax_column_cache.cc
+ storage/columns/pax_column_traits.cc
+ storage/columns/pax_column.cc
+ storage/columns/pax_compress.cc
+ storage/columns/pax_columns.cc
+ storage/columns/pax_encoding_utils.cc
+ storage/columns/pax_encoding_non_fixed_column.cc
+ storage/columns/pax_encoding_column.cc
+ storage/columns/pax_decoding.cc
+ storage/columns/pax_encoding.cc
+ storage/columns/pax_rlev2_decoding.cc
+ storage/columns/pax_rlev2_encoding.cc
+ storage/columns/pax_vec_column.cc
+ storage/columns/pax_vec_encoding_column.cc
+ storage/oper/pax_oper.cc
+ storage/oper/pax_stats.cc
+ storage/file_system.cc
+ storage/local_file_system.cc
+ storage/micro_partition.cc
+ storage/micro_partition_file_factory.cc
+ storage/micro_partition_metadata.cc
+ storage/micro_partition_row_filter_reader.cc
+ storage/micro_partition_stats.cc
+ storage/orc/orc_format_reader.cc
+ storage/orc/orc_group.cc
+ storage/orc/orc_vec_group.cc
+ storage/orc/orc_reader.cc
+ storage/orc/orc_writer.cc
+ storage/pax_buffer.cc
+ storage/pax_filter.cc
+ storage/proto/protobuf_stream.cc
+ )
+
+add_library(paxformat SHARED ${PROTO_SRCS} ${pax_storage_src} ${pax_exceptions_src} ${pax_comm_src} )
+target_compile_definitions(paxformat PRIVATE BUILD_PAX_FORMAT)
+target_include_directories(paxformat PUBLIC ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR})
+target_link_libraries(paxformat PUBLIC uuid protobuf zstd z)
+set_target_properties(paxformat PROPERTIES
+ OUTPUT_NAME paxformat)
+add_dependencies(paxformat generate_protobuf)
+
+# export headers
+set(PAX_COMM_HEADERS
+ comm/bitmap.h
+ comm/cbdb_api.h
+ comm/log.h
+ comm/cbdb_wrappers.h
+ comm/pax_rel.h
+ comm/pax_memory.h
+ comm/guc.h
+)
+
+set(PAX_EXCEPTION_HEADERS
+ exceptions/CException.h
+)
+
+# TODO(gongxun):
+# We should explicitly specify the headers
+# that need to be exported, and use the syntax of
+# install(FILES,...) to install the header files
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/storage
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax
+ FILES_MATCHING
+ PATTERN "*.h"
+)
+
+install(FILES ${PAX_COMM_HEADERS}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax/comm
+)
+
+install(FILES ${PAX_EXCEPTION_HEADERS}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax/exceptions
+)
+
+## install dynamic libraray
+install(TARGETS paxformat
+ LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
+
diff --git a/contrib/pax_storage/src/cpp/comm/bitmap.cc b/contrib/pax_storage/src/cpp/comm/bitmap.cc
index 7009323445f..eeb2b404cfc 100644
--- a/contrib/pax_storage/src/cpp/comm/bitmap.cc
+++ b/contrib/pax_storage/src/cpp/comm/bitmap.cc
@@ -1,150 +1,23 @@
#include "comm/bitmap.h"
-#include "exceptions/CException.h"
-
namespace pax {
+const uint8 kNumBits[] = {
+ 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
+};
-DynamicBitmap::DynamicBitmap() { bitmap_.resize(1024); }
-DynamicBitmap::DynamicBitmap(uint32 size) { bitmap_.resize(size); }
-
-DynamicBitmap::~DynamicBitmap() { bitmap_.clear(); }
-
-void DynamicBitmap::Set(uint32 index) {
- CBDB_CHECK(index >= 0 && index < bitmap_.size(),
- cbdb::CException::ExType::kExTypeOutOfRange);
- bitmap_[index] = true;
-}
-
-bool DynamicBitmap::Test(uint32 index) const {
- CBDB_CHECK(index >= 0 && index < bitmap_.size(),
- cbdb::CException::ExType::kExTypeOutOfRange);
- return bitmap_[index];
-}
-
-void DynamicBitmap::Clear(uint32 index) {
- CBDB_CHECK(index >= 0 && index < bitmap_.size(),
- cbdb::CException::ExType::kExTypeOutOfRange);
- bitmap_[index] = false;
-}
-
-void DynamicBitmap::Reset() { bitmap_.clear(); }
-
-void DynamicBitmap::Resize(int size) { bitmap_.resize(size); }
-
-// TODO(gongxun): need to do optimization for this function
-bool DynamicBitmap::BitmapFindFirst(uint32 offset, bool value,
- uint32 *idx) const {
- auto it = std::find(bitmap_.begin() + offset, bitmap_.end(), value);
- if (it == bitmap_.end()) {
- return false;
- }
- *idx = it - bitmap_.begin();
- return true;
-}
-
-uint32 DynamicBitmap::NumBits() const { return bitmap_.size(); }
-
-FixedBitmap::FixedBitmap(uint32 size) {
- byte_size_ = (size >> 3) + (size & 7 ? 1 : 0);
- bitmap_ = new uint8[byte_size_];
-
- num_bits_ = size;
- memset(bitmap_, 0, byte_size_);
-}
-
-FixedBitmap::~FixedBitmap() { delete[] bitmap_; }
-
-void FixedBitmap::Set(uint32 index) {
- CBDB_CHECK(index >= 0 && index < num_bits_,
- cbdb::CException::ExType::kExTypeOutOfRange);
- bitmap_[index >> 3] |= 1 << (index & 7);
-}
-
-bool FixedBitmap::Test(uint32 index) const {
- CBDB_CHECK(index >= 0 && index < num_bits_,
- cbdb::CException::ExType::kExTypeOutOfRange);
- return bitmap_[index >> 3] & (1 << (index & 7));
}
-
-void FixedBitmap::Reset() { std::memset(bitmap_, 0, byte_size_); }
-
-void FixedBitmap::Clear(uint32 index) {
- CBDB_CHECK(index >= 0 && index < num_bits_,
- cbdb::CException::ExType::kExTypeOutOfRange);
- bitmap_[index >> 3] &= ~(1 << (index & 7));
-}
-
-uint32 FixedBitmap::Size() const { return byte_size_; }
-uint32 FixedBitmap::NumBits() const { return num_bits_; }
-bool FixedBitmap::BitmapFindFirst(uint32 offset, bool value,
- uint32 *idx) const {
- const uint64 pattern64[2] = {0xffffffffffffffff, 0x0000000000000000};
- const uint8 pattern8[2] = {0xff, 0x00};
- uint32 bit;
-
- if (offset >= num_bits_) {
- return false;
- }
-
- // Jump to the byte at specified offset
- const uint8 *p = bitmap_ + (offset >> 3);
- uint32 num_bits = num_bits_ - offset;
-
- // Find a 'value' bit at the end of the first byte
- if ((bit = offset & 0x7)) {
- for (; bit < 8 && num_bits > 0; ++bit) {
- if (Test(((p - bitmap_) << 3) + bit) == value) {
- *idx = ((p - bitmap_) << 3) + bit;
- return true;
- }
-
- num_bits--;
- }
- p++;
- }
-
- // check 64bit at the time for a 'value' bit
- const uint64 *u64 = (const uint64 *)p;
- while (num_bits >= 64 && *u64 == pattern64[value]) {
- num_bits -= 64;
- u64++;
- }
-
- // check 8bit at the time for a 'value' bit
- p = (const uint8 *)u64;
- while (num_bits >= 8 && *p == pattern8[value]) {
- num_bits -= 8;
- p++;
- }
-
- // Find a 'value' bit at the beginning of the last byte
- for (bit = 0; num_bits > 0; ++bit) {
- if (Test(((p - bitmap_) << 3) + bit) == value) {
- *idx = ((p - bitmap_) << 3) + bit;
- return true;
- }
- num_bits--;
- }
-
- return false;
-}
-
-BitmapIterator::BitmapIterator(Bitmap *map) : offset_(0), bitmap_(map) {}
-
-void BitmapIterator::SeekTo(size_t bit) {
- Assert(bit < bitmap_->NumBits());
- offset_ = bit;
-}
-
-int32 BitmapIterator::Next(bool value) {
- int32 len = bitmap_->NumBits() - offset_;
- if (len <= 0) return -1;
- uint32 index;
- if (bitmap_->BitmapFindFirst(offset_, value, &index)) {
- offset_ = index + 1;
- return index;
- }
- return -1;
-}
-
-} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/comm/bitmap.h b/contrib/pax_storage/src/cpp/comm/bitmap.h
index daa5819a3a1..0d503be48f2 100644
--- a/contrib/pax_storage/src/cpp/comm/bitmap.h
+++ b/contrib/pax_storage/src/cpp/comm/bitmap.h
@@ -4,97 +4,261 @@
#include
-#include
#include
-#include
-#include
-#include
-#include
-#include
-namespace pax {
+#include "comm/pax_memory.h"
+#include "exceptions/CException.h"
-class Bitmap {
- public:
- virtual ~Bitmap() {}
- virtual void Set(uint32 index) = 0;
- virtual bool Test(uint32 index) const = 0;
- virtual void Clear(uint32 index) = 0;
- virtual void Reset() = 0;
- virtual bool BitmapFindFirst(uint32 offset, bool value,
- uint32 *idx) const = 0;
- virtual uint32 NumBits() const = 0;
-};
-
-class DynamicBitmap : public Bitmap {
+namespace pax {
+extern const uint8 kNumBits[];
+#define BM_WORD_BITS (sizeof(T) << 3)
+// log2(BM_WORD_BITS)
+#define BM_WORD_SHIFTS \
+ (sizeof(T) == 1 ? 3 : (sizeof(T) == 2 ? 4 : (sizeof(T) == 4 ? 5 : 6)))
+#define BM_INDEX_WORD_OFF(index) ((index) >> BM_WORD_SHIFTS)
+#define BM_INDEX_BIT_OFF(index) ((index) & (BM_WORD_BITS - 1))
+#define BM_INDEX_BIT(index) (1ULL << BM_INDEX_BIT_OFF(index))
+template
+struct BitmapRaw final {
public:
- friend class BitmapIterator;
- DynamicBitmap();
- explicit DynamicBitmap(uint32 size);
+ inline void Set(uint32 index) {
+ bitmap[BM_INDEX_WORD_OFF(index)] |= BM_INDEX_BIT(index);
+ }
+ // set first the bits [0, index] to 1
+ inline void SetN(uint32 index) {
+ memset(&bitmap[0], -1, sizeof(T) * BM_INDEX_WORD_OFF(index));
+ bitmap[BM_INDEX_WORD_OFF(index)] |= (BM_INDEX_BIT(index) << 1) - 1;
+ }
+ inline void Clear(uint32 index) {
+ bitmap[BM_INDEX_WORD_OFF(index)] &= ~BM_INDEX_BIT(index);
+ }
+ inline void ClearN(uint32 index) {
+ memset(&bitmap[0], 0, sizeof(T) * BM_INDEX_WORD_OFF(index));
+ bitmap[BM_INDEX_WORD_OFF(index)] &= ~((BM_INDEX_BIT(index) << 1) - 1);
+ }
+ inline void ClearAll() {
+ AssertImply(size > 0, bitmap);
+ if (size > 0) memset(&bitmap[0], 0, sizeof(T) * size);
+ }
+ inline bool Test(uint32 index) const {
+ return (bitmap[BM_INDEX_WORD_OFF(index)] & BM_INDEX_BIT(index)) != 0;
+ }
+ // invert the bit and return the old value.
+ inline bool Toggle(uint32 index) {
+ return !((bitmap[BM_INDEX_WORD_OFF(index)] ^= BM_INDEX_BIT(index)) &
+ BM_INDEX_BIT(index));
+ }
+ inline size_t WordBits(T v) const {
+ if (sizeof(T) == 1)
+ return kNumBits[v];
+ else if (sizeof(T) == 2)
+ return kNumBits[v & 0xff] + kNumBits[v >> 8];
+ else if (sizeof(T) == 4)
+ return kNumBits[v & 0xff] + kNumBits[(v >> 8) & 0xff] +
+ kNumBits[(v >> 16) & 0xff] + kNumBits[(v >> 24) & 0xff];
+ else if (sizeof(T) == 8)
+ return kNumBits[v & 0xff] + kNumBits[(v >> 8) & 0xff] +
+ kNumBits[(v >> 16) & 0xff] + kNumBits[(v >> 24) & 0xff] +
+ kNumBits[(v >> 32) & 0xff] + kNumBits[(v >> 40) & 0xff] +
+ kNumBits[(v >> 48) & 0xff] + kNumBits[(v >> 56) & 0xff];
+ return 0;
+ }
+ // count bits in range [0, index]
+ inline size_t CountBits(uint32 index) const {
+ size_t nbits = 0;
+ for (uint32 i = 0; i < BM_INDEX_WORD_OFF(index); i++)
+ nbits += WordBits(bitmap[i]);
+ {
+ auto w = bitmap[BM_INDEX_WORD_OFF(index)];
+ nbits += WordBits(w & ((BM_INDEX_BIT(index) << 1) - 1));
+ }
- virtual ~DynamicBitmap();
+ return nbits;
+ }
+ // count bits in range [start, end]
+ inline size_t CountBits(uint32 start_index, uint32 end_index) const {
+ size_t nbits = 0;
+ uint32 word_off = BM_INDEX_WORD_OFF(start_index);
- void Set(uint32 index) override;
+ Assert(start_index <= end_index);
- bool Test(uint32 index) const override;
+ if (BM_INDEX_WORD_OFF(end_index) == word_off) {
+ uint32 w = bitmap[word_off] >> BM_INDEX_BIT_OFF(start_index);
+ return WordBits(w & ((1ULL << (end_index - start_index + 1)) - 1));
+ }
+ {
+ uint32 w = bitmap[BM_INDEX_WORD_OFF(start_index)];
+ nbits += WordBits(w >> BM_INDEX_BIT_OFF(start_index));
+ }
+ for (uint32 i = BM_INDEX_WORD_OFF(start_index + BM_WORD_BITS),
+ n = BM_INDEX_WORD_OFF(end_index);
+ i < n; i++)
+ nbits += WordBits(bitmap[i]);
+ {
+ auto w = bitmap[BM_INDEX_WORD_OFF(end_index)];
+ nbits += WordBits(w & ((BM_INDEX_BIT(end_index) << 1) - 1));
+ }
+ return nbits;
+ }
- void Clear(uint32 index) override;
+ inline bool HasEnoughSpace(uint32 index) const {
+ static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
+ sizeof(T) == 8);
+ static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
+ return (index >> BM_WORD_SHIFTS) < size;
+ }
+ inline bool Empty() const {
+ if (!bitmap) return true;
+ for (size_t i = 0; i < size; i++)
+ if (bitmap[i]) return false;
+ return true;
+ }
+ BitmapRaw() = default;
+ BitmapRaw(T *buffer, size_t size) : bitmap(buffer), size(size) {}
+ BitmapRaw(const BitmapRaw &) = delete;
+ BitmapRaw(BitmapRaw &&raw) : bitmap(raw.bitmap), size(raw.size) {
+ raw.bitmap = nullptr;
+ raw.size = 0;
+ }
+ BitmapRaw &operator=(BitmapRaw) = delete;
+ BitmapRaw &operator=(BitmapRaw &) = delete;
+ BitmapRaw &operator=(const BitmapRaw &) = delete;
+ BitmapRaw &operator=(BitmapRaw &&raw) {
+ if (this != &raw) {
+ PAX_DELETE_ARRAY(bitmap);
+ bitmap = raw.bitmap;
+ size = raw.size;
+ raw.bitmap = nullptr;
+ raw.size = 0;
+ }
+ return *this;
+ }
- void Reset() override;
+ ~BitmapRaw() = default;
- void Resize(int size);
-
- // TODO(gongxun): need to do optimization for this function
- bool BitmapFindFirst(uint32 offset, bool value, uint32 *idx) const override;
-
- uint32 NumBits() const override;
-
- private:
- std::vector bitmap_;
+ T *bitmap = nullptr;
+ size_t size = 0;
};
-class FixedBitmap : public Bitmap {
+template
+class BitmapTpl final {
public:
- friend class BitmapIterator;
- explicit FixedBitmap(uint32 size);
+ using BitmapMemoryPolicy = void (*)(BitmapRaw &, uint32);
+ explicit BitmapTpl(uint32 initial_size = 16,
+ BitmapMemoryPolicy policy = DefaultBitmapMemoryPolicy) {
+ static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
+ sizeof(T) == 8);
+ static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
+ policy_ = policy;
+ policy(raw_, Max(initial_size, 16));
+ }
+ explicit BitmapTpl(const BitmapRaw &raw, BitmapMemoryPolicy policy) {
+ static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
+ sizeof(T) == 8);
+ static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
+ Assert(policy == ReadOnlyRefBitmap || policy == ReadOnlyOwnBitmap);
+ policy_ = policy;
+ raw_.bitmap = raw.bitmap;
+ raw_.size = raw.size;
+ }
+ BitmapTpl(const BitmapTpl &tpl) = delete;
+ BitmapTpl(BitmapTpl &&tpl)
+ : raw_(std::move(tpl.raw_)), policy_(tpl.policy_) {}
+ BitmapTpl &operator=(const BitmapTpl &tpl) = delete;
+ BitmapTpl &operator=(BitmapTpl &&tpl) = delete;
+ ~BitmapTpl() {
+ // Reference doesn't free the memory
+ if (policy_ == ReadOnlyRefBitmap) raw_.bitmap = nullptr;
+ }
+ inline size_t WordBits() const { return BM_WORD_BITS; }
+ inline void Set(uint32 index) {
+ if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
+ raw_.Set(index);
+ }
+ inline void SetN(uint32 index) {
+ if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
+ raw_.SetN(index);
+ }
+ inline void Clear(uint32 index) {
+ if (likely(raw_.HasEnoughSpace(index))) raw_.Clear(index);
+ }
+ inline void ClearN(uint32 index) {
+ if (raw_.HasEnoughSpace(index))
+ raw_.ClearN(index);
+ else
+ raw_.ClearAll();
+ }
+ inline void ClearAll() { raw_.ClearAll(); }
+ inline bool Test(uint32 index) const {
+ if (likely(raw_.HasEnoughSpace(index))) return raw_.Test(index);
+ return false;
+ }
+ // invert the bit and return the old value.
+ inline bool Toggle(uint32 index) {
+ if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
+ return raw_.Toggle(index);
+ }
+ // count bits in range [0, index]
+ inline size_t CountBits(uint32 index) const {
+ if (raw_.size == 0) return 0;
+ if ((raw_.size << BM_WORD_SHIFTS) <= index)
+ index = (raw_.size << BM_WORD_SHIFTS) - 1;
+ return raw_.CountBits(index);
+ }
+ inline size_t CountBits(uint32 start_index, uint32 end_index) const {
+ if ((raw_.size << BM_WORD_SHIFTS) <= start_index) return 0;
+ if ((raw_.size << BM_WORD_SHIFTS) <= end_index)
+ end_index = (raw_.size << BM_WORD_SHIFTS) - 1;
+ Assert(start_index <= end_index);
+ return raw_.CountBits(start_index, end_index);
+ }
- virtual ~FixedBitmap();
+ inline bool Empty() const { return raw_.Empty(); }
- void Set(uint32 index) override;
+ BitmapMemoryPolicy Policy() const { return policy_; }
- bool Test(uint32 index) const override;
+ const BitmapRaw &Raw() const { return raw_; }
+ BitmapRaw &Raw() { return raw_; }
- void Reset() override;
+ static void DefaultBitmapMemoryPolicy(BitmapRaw &raw, uint32 index) {
+ auto old_bitmap = raw.bitmap;
+ auto old_size = raw.size;
+ auto size = Max(BM_INDEX_WORD_OFF(index) + 1, old_size * 2);
+ auto p = PAX_NEW_ARRAY(size);
+ if (old_size > 0) memcpy(p, old_bitmap, sizeof(T) * old_size);
+ memset(&p[old_size], 0, sizeof(T) * (size - old_size));
+ raw.bitmap = p;
+ raw.size = size;
+ PAX_DELETE_ARRAY(old_bitmap);
+ }
+ static void ReadOnlyRefBitmap(BitmapRaw &/*raw*/, uint32 /*index*/) {
+ // raise
+ CBDB_RAISE(cbdb::CException::kExTypeInvalidMemoryOperation);
+ }
+ static void ReadOnlyOwnBitmap(BitmapRaw &/*raw*/, uint32 /*index*/) {
+ CBDB_RAISE(cbdb::CException::kExTypeInvalidMemoryOperation);
+ }
- void Clear(uint32 index) override;
-
- uint32 Size() const;
-
- uint32 NumBits() const override;
-
- bool BitmapFindFirst(uint32 offset, bool value, uint32 *idx) const;
+ static inline size_t RequireWords(size_t nbits) {
+ return nbits ? ((nbits - 1) >> BM_WORD_SHIFTS) + 1 : 0;
+ }
+ inline size_t CurrentBytes() const { return sizeof(T) * raw_.size; }
+ inline size_t MinimalStoredBytes(size_t nbits) {
+ auto nwords = RequireWords(nbits);
+ if (nwords > raw_.size) nwords = raw_.size;
+ while (nwords > 0 && raw_.bitmap[nwords - 1] == 0) nwords--;
+ return nwords * sizeof(T);
+ }
private:
- FixedBitmap(const FixedBitmap &other) = delete;
- FixedBitmap(FixedBitmap &&other) = delete;
- FixedBitmap &operator=(const FixedBitmap &other) = delete;
- FixedBitmap &operator=(FixedBitmap &&other) = delete;
-
- uint32 byte_size_;
- uint32 num_bits_;
- uint8 *bitmap_;
+ inline bool HasEnoughSpace(uint32 index) const {
+ return raw_.HasEnoughSpace(index);
+ }
+ BitmapRaw raw_;
+ BitmapMemoryPolicy policy_;
};
-class BitmapIterator {
- public:
- explicit BitmapIterator(Bitmap *map);
-
- void SeekTo(size_t bit);
-
- int32 Next(bool value);
+using Bitmap8 = BitmapTpl;
+using Bitmap64 = BitmapTpl;
- private:
- uint32 offset_;
- Bitmap *bitmap_;
-};
} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/comm/bitmap_test.cc b/contrib/pax_storage/src/cpp/comm/bitmap_test.cc
index b92b18b63ca..f5f7f164725 100644
--- a/contrib/pax_storage/src/cpp/comm/bitmap_test.cc
+++ b/contrib/pax_storage/src/cpp/comm/bitmap_test.cc
@@ -5,68 +5,162 @@
namespace pax::tests {
class BitMapTest : public ::testing::Test {};
-TEST_F(BitMapTest, test) {
- FixedBitmap bit_map(100);
- ASSERT_EQ(bit_map.Test(0), false);
- ASSERT_EQ(bit_map.Test(99), false);
- bit_map.Set(0);
- ASSERT_EQ(bit_map.Test(0), true);
- ASSERT_EQ(bit_map.Test(99), false);
- bit_map.Set(99);
- ASSERT_EQ(bit_map.Test(0), true);
- ASSERT_EQ(bit_map.Test(99), true);
- bit_map.Clear(0);
- ASSERT_EQ(bit_map.Test(0), false);
- ASSERT_EQ(bit_map.Test(99), true);
- bit_map.Clear(99);
- ASSERT_EQ(bit_map.Test(0), false);
- ASSERT_EQ(bit_map.Test(99), false);
-
- ASSERT_EQ(bit_map.Size(), 13);
+TEST_F(BitMapTest, Bitmap8) {
+ Bitmap8 bm(20);
+
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= 128; i++) {
+ ASSERT_FALSE(bm.Test(i)); // zeros
+ ASSERT_FALSE(bm.Toggle(i));
+ ASSERT_TRUE(bm.Test(i));
+ ASSERT_TRUE(bm.Toggle(i));
+ ASSERT_FALSE(bm.Test(i));
+
+ ASSERT_FALSE(bm.Test(i)); // zeros
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+
+ bm.Clear(i);
+ ASSERT_FALSE(bm.Test(i));
+ bm.Clear(i);
+ ASSERT_FALSE(bm.Test(i));
+
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ }
+}
+
+TEST_F(BitMapTest, Bitmap8SetN) {
+ Bitmap8 bm(10);
+ const auto nbits = 128;
+
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+
+ auto fn = [&bm, nbits](uint32 index) {
+ bm.ClearAll();
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+ bm.SetN(index);
+ for (uint32 i = 0; i <= index; i++) ASSERT_TRUE(bm.Test(i));
+ for (uint32 i = index + 1; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+ };
+ for (uint32 i = 0; i <= nbits; i++) fn(i);
+}
+
+TEST_F(BitMapTest, Bitmap8ClearN) {
+ Bitmap8 bm(10);
+ const auto nbits = 128;
+
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+
+ auto fn = [&bm, nbits](uint32 index) {
+ for (auto i = 0; i <= nbits; i++) {
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ }
+ bm.ClearN(index);
+ for (uint32 i = 0; i <= index; i++) ASSERT_FALSE(bm.Test(i));
+ for (uint32 i = index + 1; i <= nbits; i++) ASSERT_TRUE(bm.Test(i));
+ };
+ for (uint32 i = 0; i <= nbits; i++) fn(i);
}
-TEST_F(BitMapTest, FixedBitmap) {
- FixedBitmap bit_map(100);
- bit_map.Set(0);
- bit_map.Set(50);
- bit_map.Set(99);
+TEST_F(BitMapTest, Bitmap64) {
+ Bitmap64 bm(100);
- BitmapIterator it(&bit_map);
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= 128; i++) {
+ ASSERT_FALSE(bm.Test(i)); // zeros
+ ASSERT_FALSE(bm.Toggle(i));
+ ASSERT_TRUE(bm.Test(i));
+ ASSERT_TRUE(bm.Toggle(i));
+ ASSERT_FALSE(bm.Test(i));
- ASSERT_EQ(it.Next(true), 0);
- ASSERT_EQ(it.Next(true), 50);
- ASSERT_EQ(it.Next(true), 99);
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
- it.SeekTo(0);
- ASSERT_EQ(it.Next(false), 1);
- ASSERT_EQ(it.Next(false), 2);
- ASSERT_EQ(it.Next(false), 3);
+ bm.Clear(i);
+ ASSERT_FALSE(bm.Test(i));
+ bm.Clear(i);
+ ASSERT_FALSE(bm.Test(i));
+
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ }
}
+TEST_F(BitMapTest, Bitmap64SetN) {
+ Bitmap64 bm(1);
+ const auto nbits = 512;
-TEST_F(BitMapTest, DynamicBitmap) {
- DynamicBitmap bit_map(100);
- bit_map.Set(0);
- bit_map.Set(50);
- bit_map.Set(99);
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
- BitmapIterator it(&bit_map);
+ auto fn = [&bm, nbits](uint32 index) {
+ bm.ClearAll();
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+ bm.SetN(index);
+ for (uint32 i = 0; i <= index; i++) ASSERT_TRUE(bm.Test(i));
+ for (uint32 i = index + 1; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
+ };
+ for (uint32 i = 0; i <= nbits; i++) fn(i);
+}
- ASSERT_EQ(it.Next(true), 0);
- ASSERT_EQ(it.Next(true), 50);
- ASSERT_EQ(it.Next(true), 99);
+TEST_F(BitMapTest, Bitmap64ClearN) {
+ Bitmap64 bm(1);
+ const auto nbits = 512;
- bit_map.Resize(200);
- bit_map.Set(100);
- bit_map.Set(150);
- bit_map.Set(199);
+ ASSERT_TRUE(bm.Empty());
+ for (auto i = 0; i <= nbits; i++) ASSERT_FALSE(bm.Test(i));
- ASSERT_EQ(it.Next(true), 100);
- ASSERT_EQ(it.Next(true), 150);
- ASSERT_EQ(it.Next(true), 199);
+ auto fn = [&bm, &nbits](uint32 index) {
+ for (auto i = 0; i <= nbits; i++) {
+ bm.Set(i);
+ ASSERT_TRUE(bm.Test(i));
+ }
+ bm.ClearN(index);
+ for (uint32 i = 0; i <= index; i++) ASSERT_FALSE(bm.Test(i));
+ for (uint32 i = index + 1; i <= nbits; i++) ASSERT_TRUE(bm.Test(i));
+ };
+ for (uint32 i = 0; i <= nbits; i++) fn(i);
+}
+
+TEST_F(BitMapTest, CountBits) {
+ const uint32 starts[] = {0, 1, 3, 7};
+ const uint32 ends[] = {0, 1, 7, 8, 9, 15, 16, 17};
+ Bitmap8 bm(11);
- it.SeekTo(0);
- ASSERT_EQ(it.Next(false), 1);
- ASSERT_EQ(it.Next(false), 2);
- ASSERT_EQ(it.Next(false), 3);
+ auto fill_bits = [&bm](uint32 bits) {
+ uint32 k = 0;
+ bm.ClearAll();
+ while (bits) {
+ if (bits & 1) bm.Set(k);
+ bits = bits >> 1;
+ k++;
+ }
+ };
+ auto plain_count = [](uint32 bits, uint32 start, uint32 end) {
+ size_t nbits = 0;
+ for (auto i = start; i <= end; i++) {
+ if (bits & (1ULL << i)) nbits++;
+ }
+ return nbits;
+ };
+
+ for (uint32 i = 0; i < 0x3ffff; i++) {
+ fill_bits(i);
+ for (auto start : starts) {
+ for (auto end : ends) {
+ if (end < start) continue;
+ ASSERT_EQ(bm.CountBits(start, end), plain_count(i, start, end));
+ ASSERT_EQ(bm.CountBits(end), plain_count(i, 0, end));
+ }
+ }
+ }
}
+
} // namespace pax::tests
diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_api.h b/contrib/pax_storage/src/cpp/comm/cbdb_api.h
index b97800252ee..dcf074ed37b 100644
--- a/contrib/pax_storage/src/cpp/comm/cbdb_api.h
+++ b/contrib/pax_storage/src/cpp/comm/cbdb_api.h
@@ -1,11 +1,19 @@
#ifndef SRC_CPP_COMM_CBDB_API_H_
#define SRC_CPP_COMM_CBDB_API_H_
+#include "comm/pax_rel.h"
+
#ifdef __cplusplus
extern "C" {
#endif
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wregister"
+
#include "postgres.h" // NOLINT
+#include "postmaster/postmaster.h"
+#include "access/detoast.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/relscan.h"
@@ -14,17 +22,39 @@ extern "C" {
#include "access/tsmapi.h"
#include "access/tupdesc.h"
#include "access/tupdesc_details.h"
+#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
+#include "catalog/gp_indexing.h"
+#include "catalog/index.h"
#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
#include "catalog/oid_dispatch.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_amproc.h"
+#include "catalog/pg_attribute_encoding.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/toasting.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "nodes/execnodes.h"
+#include "funcapi.h"
+#include "partitioning/partdesc.h"
+#include "partitioning/partbounds.h"
+#include "pgstat.h"
+#include "utils/partcache.h"
+#include "utils/ruleutils.h"
+#include "access/nbtree.h"
+#include "access/hash.h"
+#include "parser/parse_utilcmd.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_expr.h"
#ifndef BUILD_PAX_FORMAT
#include "access/reloptions.h"
-#include "catalog/pg_pax_tables.h"
#endif
#include "catalog/storage.h"
#include "cdb/cdbvars.h"
@@ -41,33 +71,32 @@ extern "C" {
#include "storage/lwlock.h"
#include "storage/relfilenode.h"
#include "storage/smgr.h"
+#include "utils/backend_progress.h"
#include "utils/builtins.h"
+#include "utils/date.h"
#include "utils/datum.h"
#include "utils/elog.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/numeric.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
#include "utils/syscache.h"
#include "utils/wait_event.h"
// no header file in cbdb
-extern BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks); // NOLINT
-extern bool extractcolumns_from_node(Node *expr, bool *cols, AttrNumber natts); // NOLINT
+extern BlockNumber system_nextsampleblock(SampleScanState *node, // NOLINT
+ BlockNumber nblocks);
+extern bool extractcolumns_from_node(Node *expr, bool *cols, // NOLINT
+ AttrNumber natts);
+extern int get_partition_for_tuple(PartitionKey key, PartitionDesc partdesc, // NOLINT
+ Datum *values, bool *isnull);
extern Oid GetDefaultOpClass(Oid type_id, Oid am_id);
+
+#pragma GCC diagnostic pop
#ifdef __cplusplus
}
#endif
-#define PAX_TABLE_AM_OID 7014
-#define PAX_AMNAME "pax"
-#define PAX_AM_HANDLER_OID 7600
-#define PAX_AM_HANDLER_NAME "pax_tableam_handler"
-
-#define PAX_AUX_STATS_IN_OID 7601
-#define PAX_AUX_STATS_OUT_OID 7602
-#define PAX_AUX_STATS_TYPE_OID 7603
-#define PAX_AUX_STATS_TYPE_NAME "paxauxstats"
-
#endif // SRC_CPP_COMM_CBDB_API_H_
diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc
index fb7c4beced4..cb683fdfea6 100644
--- a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc
+++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc
@@ -1,6 +1,6 @@
#include "comm/cbdb_wrappers.h"
+
#include "comm/paxc_wrappers.h"
-#include "storage/paxc_block_map_manager.h"
extern "C" {
const char *progname;
}
@@ -83,22 +83,6 @@ void Pfree(void *ptr) {
} // namespace cbdb
-void *operator new(std::size_t size) { return cbdb::Palloc(size); }
-
-void *operator new[](std::size_t size) { return cbdb::Palloc(size); }
-
-void *operator new(std::size_t size, MemoryContext ctx) {
- return cbdb::MemCtxAlloc(ctx, size);
-}
-
-void *operator new[](std::size_t size, MemoryContext ctx) {
- return cbdb::MemCtxAlloc(ctx, size);
-}
-
-void operator delete(void *ptr) { if (ptr) cbdb::Pfree(ptr); }
-
-void operator delete[](void *ptr) { if (ptr) cbdb::Pfree(ptr); }
-
HTAB *cbdb::HashCreate(const char *tabname, int64 nelem, const HASHCTL *info,
int flags) {
CBDB_WRAP_START;
@@ -173,6 +157,14 @@ Datum cbdb::DatumFromPointer(const void *p, int16 typlen) {
}
#endif
+
+struct varlena *cbdb::PgDeToastDatum(struct varlena *datum) {
+ CBDB_WRAP_START;
+ { return detoast_attr(datum); }
+ CBDB_WRAP_END;
+ return nullptr;
+}
+
struct varlena *cbdb::PgDeToastDatumPacked(struct varlena *datum) {
CBDB_WRAP_START;
{ return pg_detoast_datum_packed(datum); }
@@ -191,39 +183,10 @@ void *cbdb::PointerAndLenFromDatum(Datum d, int *len) {
CBDB_WRAP_END;
}
-// pax ctid mapping functions
-
-void cbdb::InitCommandResource() {
+void cbdb::SlotGetMissingAttrs(TupleTableSlot *slot, int start_attno,
+ int last_attno) {
CBDB_WRAP_START;
- { paxc::init_command_resource(); }
- CBDB_WRAP_END;
-}
-void cbdb::ReleaseCommandResource() {
- CBDB_WRAP_START;
- { paxc::release_command_resource(); }
- CBDB_WRAP_END;
-}
-
-void cbdb::GetTableIndexAndTableNumber(Oid table_rel_oid, uint8 *table_no,
- uint32 *table_index) {
- CBDB_WRAP_START;
- {
- paxc::get_table_index_and_table_number(table_rel_oid, table_no,
- table_index);
- }
- CBDB_WRAP_END;
-}
-
-uint32 cbdb::GetBlockNumber(Oid table_rel_oid, uint32 table_index,
- paxc::PaxBlockId block_id) {
- CBDB_WRAP_START;
- { return paxc::get_block_number(table_rel_oid, table_index, block_id); }
- CBDB_WRAP_END;
-}
-paxc::PaxBlockId cbdb::GetBlockId(Oid table_rel_oid, uint8 table_no,
- uint32 block_number) {
- CBDB_WRAP_START;
- { return paxc::get_block_id(table_rel_oid, table_no, block_number); }
+ { slot_getmissingattrs(slot, start_attno, last_attno); }
CBDB_WRAP_END;
}
@@ -299,16 +262,10 @@ std::string cbdb::BuildPaxDirectoryPath(RelFileNode rd_node,
CBDB_WRAP_END;
}
-std::string cbdb::BuildPaxFilePath(const Relation rel,
+std::string cbdb::BuildPaxFilePath(const std::string &rel_path,
const std::string &block_id) {
- CBDB_WRAP_START;
- {
- char *tmp_str = paxc::BuildPaxFilePath(rel, block_id.c_str());
- std::string ret_str(tmp_str);
- pfree(tmp_str);
- return ret_str;
- }
- CBDB_WRAP_END;
+ Assert(!rel_path.empty());
+ return rel_path + "/" + block_id;
}
int cbdb::RelationGetAttributesNumber(Relation rel) {
@@ -317,56 +274,124 @@ int cbdb::RelationGetAttributesNumber(Relation rel) {
CBDB_WRAP_END;
}
+StdRdOptions **cbdb::RelGetAttributeOptions(Relation rel) {
+ CBDB_WRAP_START;
+ { return RelationGetAttributeOptions(rel); }
+ CBDB_WRAP_END;
+}
+
TupleDesc cbdb::RelationGetTupleDesc(Relation rel) {
CBDB_WRAP_START;
{ return RelationGetDescr(rel); }
CBDB_WRAP_END;
}
-bool cbdb::ExtractcolumnsFromNode(Node *expr, bool *cols, AttrNumber natts) {
+bool cbdb::IsSystemAttrNumExist(struct PaxcExtractcolumnContext *context,
+ AttrNumber number) {
+ Assert(number < 0 && number > FirstLowInvalidHeapAttributeNumber && context);
+ return context->system_attr_number_mask[~number];
+}
+
+extern "C" {
+
+static bool paxc_extractcolumns_walker( // NOLINT
+ Node *node, struct PaxcExtractcolumnContext *ec_ctx) {
+ if (node == NULL) {
+ return false;
+ }
+
+ if (IsA(node, Var)) {
+ Var *var = (Var *)node;
+
+ if (IS_SPECIAL_VARNO(var->varno)) return false;
+
+ if (var->varattno < 0) {
+ Assert(var->varattno > FirstLowInvalidHeapAttributeNumber);
+ ec_ctx->system_attr_number_mask[~var->varattno] = true;
+ } else if (ec_ctx->cols) {
+ if (var->varattno == 0) {
+ // If all attributes are included,
+ // set all entries in mask to true.
+ for (int attno = 0; attno < ec_ctx->natts; attno++)
+ ec_ctx->cols[attno] = true;
+ ec_ctx->found = true;
+ } else if (var->varattno <= ec_ctx->natts) {
+ ec_ctx->cols[var->varattno - 1] = true;
+ ec_ctx->found = true;
+ }
+ // Still need fill `system_attr_number_mask`
+ // Let this case return false
+ }
+
+ return false;
+ }
+
+ return expression_tree_walker(node, (bool (*)())paxc_extractcolumns_walker,
+ (void *)ec_ctx);
+}
+
+}; // extern "C"
+
+bool cbdb::ExtractcolumnsFromNode(Node *expr,
+ struct PaxcExtractcolumnContext *ec_ctx) {
+ CBDB_WRAP_START;
+ {
+ paxc_extractcolumns_walker(expr, ec_ctx);
+ return ec_ctx->found;
+ }
+ CBDB_WRAP_END;
+}
+
+bool cbdb::ExtractcolumnsFromNode(Node *expr, bool *cols, int natts) {
CBDB_WRAP_START;
{ return extractcolumns_from_node(expr, cols, natts); }
CBDB_WRAP_END;
}
-bool cbdb::MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum)
-{
+bool cbdb::MinMaxGetStrategyProcinfo(Oid atttypid, Oid subtype, Oid *opfamily, FmgrInfo *finfo,
+ StrategyNumber strategynum) {
CBDB_WRAP_START;
- { return paxc::MinMaxGetStrategyProcinfo(atttypid, procid, finfo, strategynum); }
+ {
+ return paxc::MinMaxGetStrategyProcinfo(atttypid, subtype, opfamily, finfo,
+ strategynum);
+ }
CBDB_WRAP_END;
}
-Datum cbdb::FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
-{
+Datum cbdb::FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1) {
CBDB_WRAP_START;
- { return ::FunctionCall1Coll(flinfo, collation, arg1); }
+ { return ::FunctionCall1Coll(flinfo, collation, arg1); }
CBDB_WRAP_END;
}
-Datum cbdb::FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
-{
+Datum cbdb::FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1,
+ Datum arg2) {
CBDB_WRAP_START;
- { return ::FunctionCall2Coll(flinfo, collation, arg1, arg2); }
+ { return ::FunctionCall2Coll(flinfo, collation, arg1, arg2); }
CBDB_WRAP_END;
}
-Datum cbdb::FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
-{
+Datum cbdb::FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1,
+ Datum arg2, Datum arg3) {
CBDB_WRAP_START;
- { return ::FunctionCall3Coll(flinfo, collation, arg1, arg2, arg3); }
+ { return ::FunctionCall3Coll(flinfo, collation, arg1, arg2, arg3); }
CBDB_WRAP_END;
}
-Datum cbdb::FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
-{
+Datum cbdb::FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1,
+ Datum arg2, Datum arg3, Datum arg4) {
CBDB_WRAP_START;
- { return ::FunctionCall4Coll(flinfo, collation, arg1, arg2, arg3, arg4); }
+ { return ::FunctionCall4Coll(flinfo, collation, arg1, arg2, arg3, arg4); }
CBDB_WRAP_END;
}
-SysScanDesc cbdb::SystableBeginScan(Relation rel, Oid index_id, bool index_ok, Snapshot snapshot, int n_keys, ScanKey keys) {
+SysScanDesc cbdb::SystableBeginScan(Relation rel, Oid index_id, bool index_ok,
+ Snapshot snapshot, int n_keys,
+ ScanKey keys) {
CBDB_WRAP_START;
- { return systable_beginscan(rel, index_id, index_ok, snapshot, n_keys, keys); }
+ {
+ return systable_beginscan(rel, index_id, index_ok, snapshot, n_keys, keys);
+ }
CBDB_WRAP_END;
}
@@ -382,7 +407,8 @@ void cbdb::SystableEndScan(SysScanDesc desc) {
CBDB_WRAP_END;
}
-Datum cbdb::HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc, bool *isnull) {
+Datum cbdb::HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc,
+ bool *isnull) {
CBDB_WRAP_START;
{ return heap_getattr(tup, attnum, tuple_desc, isnull); }
CBDB_WRAP_END;
diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h
index 9f80b5e614c..02b10f2d007 100644
--- a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h
+++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h
@@ -6,13 +6,28 @@
#include
#include "exceptions/CException.h"
-#include "storage/pax_block_id.h"
+
+struct PaxcExtractcolumnContext {
+ // If cols set and call ExtractcolumnsFromNode with
+ // `target list`. Then the cols will fill with projection mask.
+ bool *cols = nullptr;
+ int natts = 0;
+ bool found = false;
+
+ // This mask use to filter system attribute number.
+ // (~AttrNumber) will be index, mapping the [0,
+ // FirstLowInvalidHeapAttributeNumber) call `IsSystemAttrNumExist` to check
+ // system-defined attributes set
+ bool system_attr_number_mask[~FirstLowInvalidHeapAttributeNumber] = {
+ 0}; // NOLINT
+};
namespace cbdb {
#define PAX_ALLOCSET_DEFAULT_MINSIZE ALLOCSET_DEFAULT_MINSIZE
#define PAX_ALLOCSET_DEFAULT_INITSIZE (8 * 1024)
#define PAX_ALLOCSET_DEFAULT_MAXSIZE (3 * 64 * 1024 * 1024)
+
#define PAX_ALLOCSET_DEFAULT_SIZES \
PAX_ALLOCSET_DEFAULT_MINSIZE, PAX_ALLOCSET_DEFAULT_INITSIZE, \
PAX_ALLOCSET_DEFAULT_MAXSIZE
@@ -98,23 +113,17 @@ static inline Datum Int64ToDatum(int64 d) noexcept { return Int64GetDatum(d); }
void *PointerAndLenFromDatum(Datum d, int *len);
+void SlotGetMissingAttrs(TupleTableSlot *slot, int start_attno, int last_attno);
+
#ifdef RUN_GTEST
Datum DatumFromCString(const char *src, size_t length);
Datum DatumFromPointer(const void *p, int16 typlen);
#endif
-struct varlena *PgDeToastDatumPacked(struct varlena *datum);
+struct varlena *PgDeToastDatum(struct varlena *datum);
-// pax ctid mapping functions
-void InitCommandResource();
-void ReleaseCommandResource();
-void GetTableIndexAndTableNumber(Oid table_rel_oid, uint8 *table_no,
- uint32 *table_index);
-uint32 GetBlockNumber(Oid table_rel_oid, uint32 table_index,
- paxc::PaxBlockId block_id);
-paxc::PaxBlockId GetBlockId(Oid table_rel_oid, uint8 table_no,
- uint32 block_number);
+struct varlena *PgDeToastDatumPacked(struct varlena *datum);
void RelationCreateStorageDirectory(RelFileNode rnode, char relpersistence,
SMgrImpl smgr_which, Relation rel);
@@ -137,31 +146,44 @@ void MakedirRecursive(const char *path);
std::string BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend);
+std::string BuildPaxFilePath(const std::string &rel_path, const std::string &block_id);
+
int RelationGetAttributesNumber(Relation rel);
+StdRdOptions **RelGetAttributeOptions(Relation rel);
TupleDesc RelationGetTupleDesc(Relation rel);
-bool ExtractcolumnsFromNode(Node *expr, bool *cols, AttrNumber natts);
+bool ExtractcolumnsFromNode(Node *expr,
+ struct PaxcExtractcolumnContext *ec_ctx);
+
+bool IsSystemAttrNumExist(struct PaxcExtractcolumnContext *context,
+ AttrNumber number);
-std::string BuildPaxFilePath(Relation rel, const std::string &block_id);
+bool ExtractcolumnsFromNode(Node *expr, bool *cols, int natts);
-bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum);
+bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid subtype, Oid *opfamily, FmgrInfo *finfo,
+ StrategyNumber strategynum);
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1);
-Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2);
+Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1,
+ Datum arg2);
-Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3);
+Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2,
+ Datum arg3);
-Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4);
+Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2,
+ Datum arg3, Datum arg4);
-SysScanDesc SystableBeginScan(Relation rel, Oid index_id, bool index_ok, Snapshot snapshot, int n_keys, ScanKey keys);
+SysScanDesc SystableBeginScan(Relation rel, Oid index_id, bool index_ok,
+ Snapshot snapshot, int n_keys, ScanKey keys);
HeapTuple SystableGetNext(SysScanDesc desc);
void SystableEndScan(SysScanDesc desc);
-Datum HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc, bool *isnull);
+Datum HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc,
+ bool *isnull);
Relation TableOpen(Oid relid, LOCKMODE lockmode);
@@ -188,13 +210,3 @@ void TableClose(Relation rel, LOCKMODE lockmode);
} \
}
// clang-format on
-
-// override the default new/delete to use current memory context
-extern void *operator new(std::size_t size);
-extern void *operator new[](std::size_t size);
-extern void operator delete(void *ptr);
-extern void operator delete[](void *ptr);
-
-// specify memory context for this allocation without switching memory context
-extern void *operator new(std::size_t size, MemoryContext ctx);
-extern void *operator new[](std::size_t size, MemoryContext ctx);
diff --git a/contrib/pax_storage/src/cpp/comm/comm_test.cc b/contrib/pax_storage/src/cpp/comm/comm_test.cc
index da867ecae32..3ff39adab1a 100644
--- a/contrib/pax_storage/src/cpp/comm/comm_test.cc
+++ b/contrib/pax_storage/src/cpp/comm/comm_test.cc
@@ -11,8 +11,6 @@ class CommTest : public ::testing::Test {
1 * 1024 * 1024, 1 * 1024 * 1024);
MemoryContextSwitchTo(comm_test_memory_context);
}
-
- void TearDown() override {}
};
TEST_F(CommTest, TestDeleteOperator) {
@@ -32,4 +30,19 @@ TEST_F(CommTest, TestDeleteOperator) {
delete[] array_obj;
}
+
+TEST_F(CommTest, TestNewOperator) {
+ auto obj = new bool[0];
+ ASSERT_NE(obj, nullptr);
+ delete[] obj;
+
+ auto obj2 = cbdb::Palloc(0);
+ ASSERT_NE(obj2, nullptr);
+ cbdb::Pfree(obj2);
+
+ auto obj3 = cbdb::Palloc0(0);
+ ASSERT_NE(obj3, nullptr);
+ cbdb::Pfree(obj3);
+}
+
} // namespace pax::tests
\ No newline at end of file
diff --git a/contrib/pax_storage/src/cpp/comm/guc.cc b/contrib/pax_storage/src/cpp/comm/guc.cc
new file mode 100644
index 00000000000..7ae3334ac50
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/guc.cc
@@ -0,0 +1,15 @@
+#include "comm/guc.h"
+
+#include "storage/pax_defined.h"
+
+namespace pax {
+bool pax_enable_debug = true;
+bool pax_enable_filter = true;
+int pax_scan_reuse_buffer_size = 0;
+int pax_max_tuples_per_group = VEC_BATCH_LENGTH;
+
+#ifdef ENABLE_PLASMA
+bool pax_enable_plasma_in_mem = true;
+#endif
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/comm/guc.h b/contrib/pax_storage/src/cpp/comm/guc.h
new file mode 100644
index 00000000000..c53678dc0dc
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/guc.h
@@ -0,0 +1,13 @@
+#pragma once
+
+namespace pax {
+extern bool pax_enable_debug;
+extern bool pax_enable_filter;
+extern int pax_scan_reuse_buffer_size;
+extern int pax_max_tuples_per_group;
+
+#ifdef ENABLE_PLASMA
+extern bool pax_enable_plasma_in_mem;
+#endif
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/comm/log.h b/contrib/pax_storage/src/cpp/comm/log.h
new file mode 100644
index 00000000000..21f2853b98e
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/log.h
@@ -0,0 +1,12 @@
+#pragma once
+
+// Should never call PAX_LOG* without PAX_ENABLE_DEBUG
+#define PAX_LOG_IF(ok, ...) \
+ do { \
+ if (ok) elog(LOG, __VA_ARGS__); \
+ } while (0)
+
+#define PAX_LOG(...) \
+ do { \
+ elog(LOG, __VA_ARGS__); \
+ } while (0)
diff --git a/contrib/pax_storage/src/cpp/comm/pax_defer.h b/contrib/pax_storage/src/cpp/comm/pax_defer.h
deleted file mode 100644
index ad39ba76bbd..00000000000
--- a/contrib/pax_storage/src/cpp/comm/pax_defer.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include
-
-namespace pax {
-
-template
-class Defer {
- public:
- const F function;
-
- public:
- constexpr explicit Defer(const F &function) : function{function} {}
- constexpr explicit Defer(F &&function) : function{std::move(function)} {}
- ~Defer() { function(); }
-};
-
-template
-inline Defer make_defer(F &&function) {
- return Defer(std::forward(function));
-}
-
-} // namespace pax
-
-#define DEFER_CONCAT(n, ...) \
- const auto defer##n = pax::make_defer([&] { __VA_ARGS__; })
-#define DEFER_FORWARD(n, ...) DEFER_CONCAT(n, __VA_ARGS__)
-#define DEFER(...) DEFER_FORWARD(__LINE__, __VA_ARGS__)
diff --git a/contrib/pax_storage/src/cpp/comm/pax_memory.cc b/contrib/pax_storage/src/cpp/comm/pax_memory.cc
new file mode 100644
index 00000000000..b9c05660709
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/pax_memory.cc
@@ -0,0 +1,25 @@
+#include "comm/pax_memory.h"
+
+#include "comm/cbdb_wrappers.h"
+
+void *operator new(std::size_t size) { return cbdb::Palloc(size); }
+
+void *operator new[](std::size_t size) { return cbdb::Palloc(size); }
+
+void *operator new(std::size_t size, MemoryContext ctx) {
+ return cbdb::MemCtxAlloc(ctx, size);
+}
+
+void *operator new[](std::size_t size, MemoryContext ctx) {
+ return cbdb::MemCtxAlloc(ctx, size);
+}
+
+void operator delete(void *ptr) {
+ if (ptr) cbdb::Pfree(ptr);
+}
+
+void operator delete[](void *ptr) {
+ if (ptr) cbdb::Pfree(ptr);
+}
+
+
diff --git a/contrib/pax_storage/src/cpp/comm/pax_memory.h b/contrib/pax_storage/src/cpp/comm/pax_memory.h
new file mode 100644
index 00000000000..03a81c036b1
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/pax_memory.h
@@ -0,0 +1,60 @@
+#pragma once
+#include "comm/cbdb_api.h"
+#include
+
+//#include "memory_allocator.h"
+
+namespace pax {
+
+template
+static inline T* PAX_NEW(Args&&... args) {
+ return new T(std::forward(args)...);
+}
+
+template
+static inline T* PAX_NEW_ARRAY(size_t N) {
+ return new T[N];
+}
+
+template
+static inline void PAX_DELETE(T *&obj) {
+ delete obj;
+ obj = nullptr;
+}
+
+template
+static inline void PAX_DELETE_ARRAY(T *&obj) {
+ delete []obj;
+ obj = nullptr;
+}
+
+struct PaxMemoryDeleter {
+ template
+ inline void operator()(T* p) const {
+ delete p;
+ }
+};
+
+template
+using pax_unique_ptr = std::unique_ptr;
+
+template
+using pax_shared_ptr = std::shared_ptr;
+
+//template
+//using pax_unique_ptr = std::unique_ptr;
+
+//template
+//using pax_shared_ptr = std::shared_ptr;
+
+}
+
+// override the default new/delete to use current memory context
+extern void *operator new(std::size_t size);
+extern void *operator new[](std::size_t size);
+extern void operator delete(void *ptr);
+extern void operator delete[](void *ptr);
+
+// specify memory context for this allocation without switching memory context
+extern void *operator new(std::size_t size, MemoryContext ctx);
+extern void *operator new[](std::size_t size, MemoryContext ctx);
diff --git a/contrib/pax_storage/src/cpp/comm/pax_rel.h b/contrib/pax_storage/src/cpp/comm/pax_rel.h
new file mode 100644
index 00000000000..43934b475ff
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/comm/pax_rel.h
@@ -0,0 +1,29 @@
+
+#ifndef SRC_CPP_COMM_PAX_REL_H_
+#define SRC_CPP_COMM_PAX_REL_H_
+
+// Oid of pg_ext_aux.pg_pax_tables
+#define PAX_TABLES_RELATION_ID 7061
+#define PAX_TABLES_RELID_INDEX_ID 7047
+
+#define PAX_TABLE_AM_OID 7047
+#define PAX_AMNAME "pax"
+#define PAX_AM_HANDLER_OID 7600
+#define PAX_AM_HANDLER_NAME "pax_tableam_handler"
+
+#define PAX_AUX_STATS_IN_OID 7601
+#define PAX_AUX_STATS_OUT_OID 7602
+#define PAX_AUX_STATS_TYPE_OID 7603
+#define PAX_AUX_STATS_TYPE_NAME "paxauxstats"
+
+#define PAX_FASTSEQUENCE_OID 7604
+#define PAX_FASTSEQUENCE_INDEX_OID 7605
+
+#define PG_PAX_FASTSEQUENCE_NAMESPACE "pg_ext_aux"
+#define PG_PAX_FASTSEQUENCE_TABLE "pg_pax_fastsequence"
+#define PG_PAX_FASTSEQUENCE_INDEX_NAME "pg_pax_fastsequence_objid_idx"
+
+#define AMHandlerIsPAX(amhandler) ((amhandler) == PAX_AM_HANDLER_OID)
+#define RelationIsPAX(relation) AMHandlerIsPAX((relation)->rd_amhandler)
+
+#endif // SRC_CPP_COMM_PAX_REL_H_
\ No newline at end of file
diff --git a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc
index c968ae70940..219b7897259 100644
--- a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc
+++ b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc
@@ -155,21 +155,6 @@ char *BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend) {
return paxrelpath;
}
-// BuildPaxFilePath: function used to build pax storage directory path following
-// pg convension, for example base/{database_oid}/{blocks_relid}_pax. parameter
-// rel IN Relation information. parameter block_id IN micro-partition block id.
-// return palloc'd pax storage directory path.
-char *BuildPaxFilePath(Relation rel, const char *block_id) {
- char *relpath = NULL;
- char *filepath = NULL;
-
- relpath = BuildPaxDirectoryPath(rel->rd_node, rel->rd_backend);
- Assert(relpath[0] != '\0');
- filepath = psprintf("%s/%s", relpath, block_id);
- pfree(relpath);
- return filepath;
-}
-
static void UnlinkIfExistsFname(const char *fname, bool isdir, int elevel) {
if (isdir) {
if (rmdir(fname) != 0 && errno != ENOENT)
@@ -224,12 +209,11 @@ static void DeletePaxDirectoryPathRecursive(
}
}
-bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum)
+bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid subtype, Oid *opfamily, FmgrInfo *finfo, StrategyNumber strategynum)
{
FmgrInfo dummy;
HeapTuple tuple;
Oid opclass;
- Oid opfamily;
Oid oprid;
RegProcedure opcode;
bool isNull;
@@ -238,10 +222,10 @@ bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, Strat
if (!OidIsValid(opclass))
return false;
- opfamily = get_opclass_family(opclass);
- tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
- ObjectIdGetDatum(atttypid),
+ *opfamily = get_opclass_family(opclass);
+ tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(*opfamily),
ObjectIdGetDatum(atttypid),
+ ObjectIdGetDatum(subtype),
Int16GetDatum(strategynum));
if (!HeapTupleIsValid(tuple))
@@ -257,8 +241,6 @@ bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, Strat
return false;
fmgr_info_cxt(opcode, finfo ? finfo : &dummy, CurrentMemoryContext);
- *procid = opcode;
-
return true;
}
diff --git a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h
index fc0315a0b22..d1f21f402b1 100644
--- a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h
+++ b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h
@@ -8,6 +8,5 @@ void CopyFile(const char *srcsegpath, const char *dstsegpath);
void DeletePaxDirectoryPath(const char *dirname, bool delete_topleveldir);
void MakedirRecursive(const char *path);
char *BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend);
-char *BuildPaxFilePath(Relation rel, const char *block_id);
-bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum);
+bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid subtype, Oid *opfamily, FmgrInfo *finfo, StrategyNumber strategynum);
} // namespace paxc
diff --git a/contrib/pax_storage/src/cpp/comm/singleton.h b/contrib/pax_storage/src/cpp/comm/singleton.h
index f53922f7b37..ef019b51683 100644
--- a/contrib/pax_storage/src/cpp/comm/singleton.h
+++ b/contrib/pax_storage/src/cpp/comm/singleton.h
@@ -3,6 +3,9 @@
#include
#include
#include
+
+#include "comm/pax_memory.h"
+
namespace pax {
template
diff --git a/contrib/pax_storage/src/cpp/contrib/cpp-stub b/contrib/pax_storage/src/cpp/contrib/cpp-stub
new file mode 160000
index 00000000000..93d20c639a9
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/contrib/cpp-stub
@@ -0,0 +1 @@
+Subproject commit 93d20c639a99fe93068692803aeb1982ea10dd6c
diff --git a/contrib/pax_storage/src/cpp/contrib/googlebench b/contrib/pax_storage/src/cpp/contrib/googlebench
new file mode 160000
index 00000000000..c2de5261302
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/contrib/googlebench
@@ -0,0 +1 @@
+Subproject commit c2de5261302fa307ebe06b24c0fc30653bed5e17
diff --git a/contrib/pax_storage/src/cpp/contrib/tabulate b/contrib/pax_storage/src/cpp/contrib/tabulate
new file mode 160000
index 00000000000..59f1c648070
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/contrib/tabulate
@@ -0,0 +1 @@
+Subproject commit 59f1c6480705bae8e83800914e6ede4fb077b435
diff --git a/contrib/pax_storage/src/cpp/contrib/zstd b/contrib/pax_storage/src/cpp/contrib/zstd
deleted file mode 160000
index 1e6651126b5..00000000000
--- a/contrib/pax_storage/src/cpp/contrib/zstd
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 1e6651126b5a0daf860c94d81cef019fb12283d7
diff --git a/contrib/pax_storage/src/cpp/pax_gbench.cc b/contrib/pax_storage/src/cpp/pax_gbench.cc
new file mode 100644
index 00000000000..d9e72f48e26
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/pax_gbench.cc
@@ -0,0 +1,9 @@
+#include
+
+static void example_benchmark(benchmark::State &state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(example_benchmark);
+
+BENCHMARK_MAIN();
\ No newline at end of file
diff --git a/contrib/pax_storage/src/cpp/pax_gtest.cc b/contrib/pax_storage/src/cpp/pax_gtest.cc
new file mode 100644
index 00000000000..6cb6c0c3a8f
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/pax_gtest.cc
@@ -0,0 +1,24 @@
+#include
+
+#include "stub.h"
+#include "comm/gtest_wrappers.h"
+#include "comm/cbdb_wrappers.h"
+
+bool MockMinMaxGetStrategyProcinfo(Oid, Oid, Oid *, FmgrInfo *,
+ StrategyNumber) {
+ return false;
+}
+
+// Mock global method which is not link from another libarays
+void GlobalMock(Stub *stub) {
+ stub->set(cbdb::MinMaxGetStrategyProcinfo, MockMinMaxGetStrategyProcinfo);
+}
+
+int main(int argc, char **argv) {
+ Stub *stub_global;
+ stub_global = new Stub();
+ testing::InitGoogleTest(&argc, argv);
+ GlobalMock(stub_global);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/contrib/pax_storage/src/cpp/pax_gtest_helper.cc b/contrib/pax_storage/src/cpp/pax_gtest_helper.cc
new file mode 100644
index 00000000000..a497986848e
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/pax_gtest_helper.cc
@@ -0,0 +1,168 @@
+#include "pax_gtest_helper.h"
+
+#include "storage/micro_partition.h"
+
+namespace pax::tests {
+
+void GenTextBuffer(char *buffer, size_t length) {
+ for (size_t i = 0; i < length; i++) {
+ buffer[i] = static_cast(i);
+ }
+}
+
+void CreateMemoryContext() {
+ MemoryContext test_memory_context = AllocSetContextCreate(
+ (MemoryContext)NULL, "TestMemoryContext", 80 * 1024 * 1024,
+ 80 * 1024 * 1024, 80 * 1024 * 1024);
+ MemoryContextSwitchTo(test_memory_context);
+}
+
+void CreateTestResourceOwner() {
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "TestResourceOwner");
+}
+
+void ReleaseTestResourceOwner() {
+ ResourceOwner tmp_resource_owner = CurrentResourceOwner;
+ CurrentResourceOwner = NULL;
+ ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_BEFORE_LOCKS, false,
+ true);
+ ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_LOCKS, false, true);
+ ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_AFTER_LOCKS, false,
+ true);
+ ResourceOwnerDelete(tmp_resource_owner);
+}
+
+static TupleDesc CreateTestTupleDesc() {
+ auto tuple_desc = reinterpret_cast(cbdb::Palloc0(
+ sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS));
+
+ tuple_desc->natts = COLUMN_NUMS;
+ tuple_desc->attrs[0] = {.atttypid = TEXTOID,
+ .attlen = -1,
+ .attbyval = false,
+ .attalign = TYPALIGN_DOUBLE,
+ .attisdropped = false,
+ .attcollation = DEFAULT_COLLATION_OID};
+
+ tuple_desc->attrs[1] = {.atttypid = TEXTOID,
+ .attlen = -1,
+ .attbyval = false,
+ .attalign = TYPALIGN_DOUBLE,
+ .attisdropped = false,
+ .attcollation = DEFAULT_COLLATION_OID};
+
+ tuple_desc->attrs[2] = {.atttypid = INT4OID,
+ .attlen = 4,
+ .attbyval = true,
+ .attalign = TYPALIGN_INT,
+ .attisdropped = false,
+ .attcollation = InvalidOid};
+ return tuple_desc;
+}
+
+TupleTableSlot *CreateTestTupleTableSlot(bool with_value) {
+ TupleTableSlot *tuple_slot = nullptr;
+ TupleDesc tuple_desc = nullptr;
+
+ tuple_desc = CreateTestTupleDesc();
+
+ tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual);
+
+ if (with_value) {
+ char column_buff[COLUMN_SIZE * 2];
+ GenTextBuffer(column_buff, COLUMN_SIZE);
+ GenTextBuffer(column_buff + COLUMN_SIZE, COLUMN_SIZE);
+
+ tuple_slot->tts_values[0] =
+ cbdb::DatumFromCString(column_buff, COLUMN_SIZE);
+ tuple_slot->tts_values[1] =
+ cbdb::DatumFromCString(column_buff + COLUMN_SIZE, COLUMN_SIZE);
+ tuple_slot->tts_values[2] = cbdb::Int32ToDatum(INT32_COLUMN_VALUE);
+ tuple_slot->tts_isnull[0] = false;
+ tuple_slot->tts_isnull[1] = false;
+ tuple_slot->tts_isnull[2] = false;
+ }
+
+ return tuple_slot;
+}
+
+static bool VerifyTestNonFixed(Datum datum, bool is_null) {
+ struct varlena *vl, *tunpacked;
+ int read_len;
+ char *read_data;
+ char column_buff[COLUMN_SIZE];
+
+ GenTextBuffer(column_buff, COLUMN_SIZE);
+
+ if (is_null) {
+ return false;
+ }
+
+ vl = (struct varlena *)DatumGetPointer(datum);
+ tunpacked = pg_detoast_datum_packed(vl);
+ if ((Pointer)vl != (Pointer)tunpacked) {
+ return false;
+ }
+
+ read_len = VARSIZE(tunpacked);
+ read_data = VARDATA_ANY(tunpacked);
+
+ if (read_len != COLUMN_SIZE + VARHDRSZ) {
+ return false;
+ }
+
+ if (std::memcmp(read_data, column_buff, COLUMN_SIZE) != 0) {
+ return false;
+ }
+ return true;
+}
+
+static bool VerifyTestFixed(Datum datum, bool is_null) {
+ return !is_null && cbdb::DatumToInt32(datum) == INT32_COLUMN_VALUE;
+}
+
+bool VerifyTestTupleTableSlot(TupleTableSlot *tuple_slot) {
+ bool ok = true;
+
+ if (!tuple_slot) {
+ return false;
+ }
+
+ ok &=
+ VerifyTestNonFixed(tuple_slot->tts_values[0], tuple_slot->tts_isnull[0]);
+ ok &=
+ VerifyTestNonFixed(tuple_slot->tts_values[1], tuple_slot->tts_isnull[1]);
+ ok &= VerifyTestFixed(tuple_slot->tts_values[2], tuple_slot->tts_isnull[2]);
+ return ok;
+}
+
+bool VerifyTestTupleTableSlot(TupleTableSlot *tuple_slot, int attrno) {
+ Assert(attrno <= 3 && attrno > 0);
+
+ if (!tuple_slot) {
+ return false;
+ }
+
+ if (attrno <= 2) {
+ return VerifyTestNonFixed(tuple_slot->tts_values[attrno - 1],
+ tuple_slot->tts_isnull[attrno - 1]);
+ } else {
+ return VerifyTestFixed(tuple_slot->tts_values[attrno - 1],
+ tuple_slot->tts_isnull[attrno - 1]);
+ }
+}
+
+void DeleteTestTupleTableSlot(TupleTableSlot *tuple_slot) {
+ cbdb::Pfree(tuple_slot->tts_tupleDescriptor);
+ cbdb::Pfree(tuple_slot);
+}
+
+std::vector CreateTestSchemaTypes() {
+ std::vector types;
+ types.emplace_back(pax::orc::proto::Type_Kind::Type_Kind_STRING);
+ types.emplace_back(pax::orc::proto::Type_Kind::Type_Kind_STRING);
+ types.emplace_back(pax::orc::proto::Type_Kind::Type_Kind_INT);
+ return types;
+}
+
+} // namespace pax::tests
diff --git a/contrib/pax_storage/src/cpp/pax_gtest_helper.h b/contrib/pax_storage/src/cpp/pax_gtest_helper.h
new file mode 100644
index 00000000000..b82bf97e037
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/pax_gtest_helper.h
@@ -0,0 +1,26 @@
+#pragma once
+#include "comm/cbdb_api.h"
+
+#include
+
+#include "storage/proto/proto_wrappers.h"
+
+namespace pax::tests {
+
+// 3 clomun - string(len 100), string(len 100), int(len 4)
+#define COLUMN_NUMS 3
+#define COLUMN_SIZE 100
+#define INT32_COLUMN_VALUE 0x123
+#define INT32_COLUMN_VALUE_DEFAULT 0x001
+
+extern void CreateMemoryContext();
+extern void CreateTestResourceOwner();
+extern void ReleaseTestResourceOwner();
+extern TupleTableSlot *CreateTestTupleTableSlot(bool with_value = true);
+extern bool VerifyTestTupleTableSlot(TupleTableSlot *tuple_slot);
+extern bool VerifyTestTupleTableSlot(TupleTableSlot *tuple_slot, int attrno);
+extern void DeleteTestTupleTableSlot(TupleTableSlot *tuple_slot);
+
+extern void GenTextBuffer(char *buffer, size_t length);
+extern std::vector CreateTestSchemaTypes();
+} // namespace pax::tests
diff --git a/contrib/pax_storage/src/cpp/storage/cache/pax_cache.cc b/contrib/pax_storage/src/cpp/storage/cache/pax_cache.cc
new file mode 100644
index 00000000000..9b135ee5a75
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/storage/cache/pax_cache.cc
@@ -0,0 +1,14 @@
+#include "storage/cache/pax_cache.h"
+
+namespace pax {
+
+bool PaxCache::Status::Ok() const { return ok_; }
+
+std::string PaxCache::Status::Error() { return error_msg_; }
+
+void PaxCache::Status::SetError(const std::string &error_msg) {
+ ok_ = false;
+ error_msg_ = error_msg;
+}
+
+}; // namespace pax
\ No newline at end of file
diff --git a/contrib/pax_storage/src/cpp/storage/cache/pax_cache.h b/contrib/pax_storage/src/cpp/storage/cache/pax_cache.h
new file mode 100644
index 00000000000..121c78342a0
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/storage/cache/pax_cache.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include
+#include
+
+namespace pax {
+
+class PaxCache {
+ public:
+ struct Status {
+ friend class PaxCache;
+
+ bool Ok() const;
+
+ std::string Error();
+
+ void SetError(const std::string &error_msg);
+
+ private:
+ bool ok_ = true;
+ std::string error_msg_;
+ };
+
+ struct BatchBuffer {
+ const char *buffer = nullptr;
+ size_t buffer_len = 0;
+ const char *meta = nullptr;
+ size_t meta_len = 0;
+
+ bool not_exist = false;
+ };
+
+ virtual ~PaxCache() = default;
+
+ virtual Status Initialize() = 0;
+
+ virtual Status Put(const std::string &key,
+ const BatchBuffer &batch_buffer) = 0;
+
+ virtual Status Put(const std::string &key,
+ const std::vector> &buffers,
+ const std::pair &meta) = 0;
+
+ virtual Status Exists(const std::string &key, bool *has) = 0;
+
+ virtual Status Get(const std::string &key, BatchBuffer &batch_buffer) = 0;
+
+ virtual Status Get(const std::vector &keys,
+ std::vector &batchs) = 0;
+
+ virtual Status Release(const std::string &key) = 0;
+
+ virtual Status Release(const std::vector &keys) = 0;
+
+ virtual Status Delete(const std::string &key) = 0;
+
+ virtual Status Delete(const std::vector &key) = 0;
+
+ virtual Status Destroy() = 0;
+
+ virtual size_t KeySizeLimit() = 0;
+};
+
+} // namespace pax
diff --git a/contrib/pax_storage/src/cpp/storage/cache/pax_cache_test.cc b/contrib/pax_storage/src/cpp/storage/cache/pax_cache_test.cc
new file mode 100644
index 00000000000..15ac2015dbb
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/storage/cache/pax_cache_test.cc
@@ -0,0 +1,347 @@
+#ifdef ENABLE_PLASMA
+#include "plasma/store.h"
+#endif
+
+#include
+
+#include
+
+#include
+
+// #include "comm/gtest_wrappers.h"
+#include "pax_gtest_helper.h"
+#include "storage/cache/pax_cache.h"
+#include "storage/cache/pax_plasma_cache.h"
+#ifdef ENABLE_PLASMA
+
+namespace pax::tests {
+
+#define CACHE_DATA_LEN 100
+#define CACHE_META_LEN 20
+
+class PaxCacheTest : public ::testing::Test {
+ void SetUp() override {
+ plasma_server_ = std::thread([this] {
+ plasma::StartServer(
+ plasma_socket_ /* socket_name */, "" /* plasma_directory */,
+ false /* hugepages_enabled */, nullptr /* external_store */,
+ 10 * 1024 * 1024 /* system_memory */,
+ PLASMA_INFO /* plasmaLogSeverity */);
+ plasma::ShutdownServer();
+ });
+ sleep(1);
+ }
+
+ void TearDown() override {
+ plasma::StopServer();
+ plasma_server_.join();
+ }
+
+ protected:
+ static void PutKey(PaxCache *pax_cache, const std::string &key,
+ const PaxCache::BatchBuffer &input) {
+ auto status = pax_cache->Put(key, input);
+ ASSERT_TRUE(status.Ok()) << "fail to put key: " << key << status.Error();
+ }
+
+ static void Exist(PaxCache *pax_cache, const std::string &key, bool exist) {
+ bool exist_rc = false;
+ auto status = pax_cache->Exists(key, &exist_rc);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_TRUE(exist ? exist_rc : !exist_rc) << "key: " << key << " exist";
+ };
+
+ protected:
+ const int64_t client_memory_quota_ = 5 * 1024 * 1024;
+ char plasma_socket_[1024] = "/tmp/plasma";
+ std::thread plasma_server_;
+};
+
+TEST_F(PaxCacheTest, TestCacheInterface) {
+ PaxCache *pax_cache;
+ PaxPlasmaCache::CacheOptions cache_options;
+ PaxCache::Status status;
+ PaxCache::BatchBuffer batch_buffer{0};
+
+ cache_options.domain_socket = std::string(plasma_socket_);
+ cache_options.client_name = "CLI1";
+ cache_options.memory_quota = client_memory_quota_;
+ cache_options.waitting_ms = 0;
+
+ pax_cache = new PaxPlasmaCache(cache_options);
+ status = pax_cache->Initialize();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ // create 3 key
+ char data[CACHE_DATA_LEN];
+ char meta[CACHE_META_LEN];
+ GenTextBuffer(data, CACHE_DATA_LEN);
+ GenTextBuffer(meta, CACHE_META_LEN);
+
+ batch_buffer.buffer = data;
+ batch_buffer.buffer_len = CACHE_DATA_LEN;
+ batch_buffer.meta = nullptr;
+ batch_buffer.meta = 0;
+
+ PutKey(pax_cache, "key1", batch_buffer);
+ batch_buffer.meta = meta;
+ batch_buffer.meta_len = CACHE_META_LEN;
+
+ PutKey(pax_cache, "key2", batch_buffer);
+ PutKey(pax_cache, "key3", batch_buffer);
+
+ Exist(pax_cache, "key1", true);
+ Exist(pax_cache, "key2", true);
+ Exist(pax_cache, "key3", true);
+
+ batch_buffer.buffer = nullptr;
+ batch_buffer.buffer_len = 0;
+ batch_buffer.meta = nullptr;
+ batch_buffer.meta_len = 0;
+
+ // get + release
+ status = pax_cache->Get("key1", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ EXPECT_EQ(CACHE_DATA_LEN, batch_buffer.buffer_len);
+ EXPECT_EQ(0, batch_buffer.meta_len);
+ EXPECT_EQ(0, std::memcmp(batch_buffer.buffer, data, CACHE_DATA_LEN));
+ // still will alloc a meta address with size 0
+ EXPECT_NE(nullptr, batch_buffer.meta);
+
+ status = pax_cache->Get("key2", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ EXPECT_EQ(CACHE_DATA_LEN, batch_buffer.buffer_len);
+ EXPECT_EQ(CACHE_META_LEN, batch_buffer.meta_len);
+ EXPECT_EQ(0, std::memcmp(batch_buffer.buffer, data, CACHE_DATA_LEN));
+ EXPECT_EQ(0, std::memcmp(batch_buffer.meta, meta, CACHE_META_LEN));
+
+ status = pax_cache->Get("key3", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Release("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ std::vector release_list = {"key2", "key3"};
+ status = pax_cache->Release(release_list);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Delete("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ std::vector delete_list = {"key2", "key3"};
+ status = pax_cache->Delete(delete_list);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ Exist(pax_cache, "key1", false);
+ Exist(pax_cache, "key2", false);
+ Exist(pax_cache, "key3", false);
+
+ status = pax_cache->Destroy();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ delete pax_cache;
+}
+
+TEST_F(PaxCacheTest, TestLRUReplace) {
+ PaxCache *pax_cache;
+ PaxPlasmaCache::CacheOptions cache_options;
+ PaxCache::Status status;
+ PaxCache::BatchBuffer batch_buffer{0};
+
+ cache_options.domain_socket = std::string(plasma_socket_);
+ cache_options.client_name = "CLI1";
+ cache_options.memory_quota = CACHE_DATA_LEN * 3;
+ cache_options.waitting_ms = 0;
+
+ pax_cache = new PaxPlasmaCache(cache_options);
+ status = pax_cache->Initialize();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ char data[CACHE_DATA_LEN];
+ GenTextBuffer(data, CACHE_DATA_LEN);
+
+ batch_buffer.buffer = data;
+ batch_buffer.buffer_len = CACHE_DATA_LEN;
+ batch_buffer.meta = nullptr;
+ batch_buffer.meta_len = 0;
+
+ PutKey(pax_cache, "key1", batch_buffer);
+ PutKey(pax_cache, "key2", batch_buffer);
+ PutKey(pax_cache, "key3", batch_buffer);
+ PutKey(pax_cache, "key4", batch_buffer);
+ PutKey(pax_cache, "key5", batch_buffer);
+
+ status = pax_cache->Get("key1", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_TRUE(batch_buffer.not_exist);
+
+ status = pax_cache->Get("key2", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_TRUE(batch_buffer.not_exist);
+
+ status = pax_cache->Get("key3", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_FALSE(batch_buffer.not_exist);
+
+ status = pax_cache->Release("key3");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Delete("key3");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Get("key4", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_FALSE(batch_buffer.not_exist);
+
+ status = pax_cache->Release("key4");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Delete("key4");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Get("key5", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_FALSE(batch_buffer.not_exist);
+
+ status = pax_cache->Release("key5");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Delete("key5");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Destroy();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ delete pax_cache;
+}
+
+TEST_F(PaxCacheTest, TestGetNoExist) {
+ PaxCache *pax_cache;
+ PaxPlasmaCache::CacheOptions cache_options;
+ PaxCache::Status status;
+ PaxCache::BatchBuffer batch_buffer{0};
+
+ cache_options.domain_socket = std::string(plasma_socket_);
+ cache_options.client_name = "CLI1";
+ cache_options.memory_quota = client_memory_quota_;
+ cache_options.waitting_ms = 0;
+
+ pax_cache = new PaxPlasmaCache(cache_options);
+ status = pax_cache->Initialize();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ char data[CACHE_DATA_LEN];
+ char meta[CACHE_META_LEN];
+ GenTextBuffer(data, CACHE_DATA_LEN);
+ GenTextBuffer(meta, CACHE_META_LEN);
+
+ batch_buffer.buffer = data;
+ batch_buffer.buffer_len = CACHE_DATA_LEN;
+ batch_buffer.meta = meta;
+ batch_buffer.meta_len = CACHE_META_LEN;
+
+ PutKey(pax_cache, "key1", batch_buffer);
+ Exist(pax_cache, "key1", true);
+
+ status = pax_cache->Get("key1", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Release("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Get("abc", batch_buffer);
+ ASSERT_TRUE(status.Ok());
+ ASSERT_TRUE(batch_buffer.not_exist);
+
+ std::vector batch_buffers;
+ status = pax_cache->Get({"key1", "abc"}, batch_buffers);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ ASSERT_FALSE(batch_buffers[0].not_exist);
+ ASSERT_TRUE(batch_buffers[1].not_exist);
+
+ status = pax_cache->Release("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Delete("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ status = pax_cache->Destroy();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ delete pax_cache;
+}
+
+TEST_F(PaxCacheTest, TestDifferentClientDelete) {
+ PaxCache *pax_cache;
+ PaxPlasmaCache::CacheOptions cache_options;
+ PaxCache::Status status;
+ PaxCache::BatchBuffer batch_buffer{0};
+
+ cache_options.domain_socket = std::string(plasma_socket_);
+ cache_options.client_name = "CLI1";
+ cache_options.memory_quota = client_memory_quota_;
+ cache_options.waitting_ms = 0;
+
+ pax_cache = new PaxPlasmaCache(cache_options);
+ status = pax_cache->Initialize();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ char data[CACHE_DATA_LEN];
+ char meta[CACHE_META_LEN];
+ GenTextBuffer(data, CACHE_DATA_LEN);
+ GenTextBuffer(meta, CACHE_META_LEN);
+
+ batch_buffer.buffer = data;
+ batch_buffer.buffer_len = CACHE_DATA_LEN;
+ batch_buffer.meta = meta;
+ batch_buffer.meta_len = CACHE_META_LEN;
+
+ PutKey(pax_cache, "key1", batch_buffer);
+ Exist(pax_cache, "key1", true);
+
+ // CLI1 destroy
+ status = pax_cache->Destroy();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ delete pax_cache;
+
+ // create CLI2
+ cache_options.client_name = "CLI2";
+ pax_cache = new PaxPlasmaCache(cache_options);
+
+ status = pax_cache->Initialize();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ // check exist
+ Exist(pax_cache, "key1", true);
+
+ // get key1
+ batch_buffer.buffer = nullptr;
+ batch_buffer.buffer_len = 0;
+ batch_buffer.meta = nullptr;
+ batch_buffer.meta_len = 0;
+
+ status = pax_cache->Get("key1", batch_buffer);
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ EXPECT_EQ(CACHE_DATA_LEN, batch_buffer.buffer_len);
+ EXPECT_EQ(CACHE_META_LEN, batch_buffer.meta_len);
+ EXPECT_EQ(0, std::memcmp(batch_buffer.buffer, data, CACHE_DATA_LEN));
+ EXPECT_EQ(0, std::memcmp(batch_buffer.meta, meta, CACHE_META_LEN));
+
+ status = pax_cache->Release("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ // delete key1
+ status = pax_cache->Delete("key1");
+ ASSERT_TRUE(status.Ok()) << status.Error();
+
+ // should delete success
+ Exist(pax_cache, "key1", false);
+
+ status = pax_cache->Destroy();
+ ASSERT_TRUE(status.Ok()) << status.Error();
+ delete pax_cache;
+}
+
+} // namespace pax::tests
+
+#endif // ENABLE_PLASMA
diff --git a/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.cc b/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.cc
new file mode 100644
index 00000000000..2d9e941a1d2
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.cc
@@ -0,0 +1,276 @@
+#include "storage/cache/pax_plasma_cache.h"
+
+#ifdef ENABLE_PLASMA
+#include
+#include
+#endif // ENABLE_PLASMA
+
+#include
+
+#include "comm/cbdb_wrappers.h"
+
+#ifdef ENABLE_PLASMA
+
+namespace pax {
+
+static inline plasma::ObjectID KeyToPlasmaId(const std::string &key,
+ size_t key_size_limit) {
+ plasma::ObjectID key_id;
+
+ Assert(key.length() <= key_size_limit);
+ memcpy(key_id.mutable_data(), key.c_str(), key.length());
+ memset(key_id.mutable_data() + key.length(), 0,
+ key_size_limit - key.length());
+
+ return key_id;
+}
+
+static inline std::string PlasmaIdToKey(const plasma::ObjectID &key_id) {
+ std::string key;
+ key = key_id.binary();
+ return key;
+}
+
+PaxPlasmaCache::PaxPlasmaCache(const CacheOptions &option)
+ : PaxCache(),
+ options_(option),
+ is_initialized_(false),
+ plasma_client_(PAX_NEW()) {}
+
+PaxPlasmaCache::~PaxPlasmaCache() { PAX_DELETE(plasma_client_); };
+
+PaxCache::Status PaxPlasmaCache::Initialize() {
+ PaxCache::Status status;
+ if (is_initialized_) {
+ status.SetError("Don't initialize twice.");
+ return status;
+ }
+
+ auto plasma_status = plasma_client_->Connect(
+ options_.domain_socket /*store_socket_name*/, "" /*manager_socket_name*/,
+ 0 /*release_delay*/, 3 /*num_retries*/);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ if (options_.memory_quota != 0) {
+ plasma_status = plasma_client_->SetClientOptions(options_.client_name,
+ options_.memory_quota);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+ }
+
+ is_initialized_ = true;
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Put(const std::string &key,
+ const BatchBuffer &batch_buffer) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+ std::shared_ptr plasma_buffer;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+
+ plasma::Status plasma_status = plasma_client_->Create(
+ key_id, batch_buffer.buffer_len, (const uint8_t *)batch_buffer.meta,
+ batch_buffer.meta_len, &plasma_buffer);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ assert((size_t)plasma_buffer->size() == batch_buffer.buffer_len);
+
+ memcpy(plasma_buffer->mutable_data(), batch_buffer.buffer,
+ batch_buffer.buffer_len);
+
+ plasma_status = plasma_client_->Seal(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ plasma_status = plasma_client_->Release(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Put(
+ const std::string &key,
+ const std::vector> &buffers,
+ const std::pair &meta) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+ std::shared_ptr plasma_buffer;
+ size_t total_size = 0;
+ size_t data_offset = 0;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+
+ for (auto &pair : buffers) {
+ total_size += pair.second;
+ }
+
+ plasma::Status plasma_status =
+ plasma_client_->Create(key_id, total_size, (const uint8_t *)meta.first,
+ meta.second, &plasma_buffer);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ assert((size_t)plasma_buffer->size() == total_size);
+
+ for (auto &pair : buffers) {
+ memcpy(plasma_buffer->mutable_data() + data_offset, pair.first,
+ pair.second);
+ data_offset += pair.second;
+ }
+ Assert(data_offset == total_size);
+
+ plasma_status = plasma_client_->Seal(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ plasma_status = plasma_client_->Release(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Exists(const std::string &key, bool *has) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+
+ plasma::Status plasma_status = plasma_client_->Contains(key_id, has);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Get(const std::string &key,
+ BatchBuffer &batch_buffer) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+ plasma::ObjectBuffer obj_buffer;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+ auto plasma_status =
+ plasma_client_->Get(&key_id, 1, options_.waitting_ms, &obj_buffer);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ if (!obj_buffer.data) {
+ // not exist in server
+ batch_buffer.not_exist = true;
+ return status;
+ }
+
+ batch_buffer.buffer = (const char *)obj_buffer.data->data();
+ batch_buffer.buffer_len = obj_buffer.data->size();
+ batch_buffer.meta = (const char *)obj_buffer.metadata->data();
+ batch_buffer.meta_len = obj_buffer.metadata->size();
+ batch_buffer.not_exist = false;
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Get(const std::vector &keys,
+ std::vector &batchs) {
+ PaxCache::Status status;
+ plasma::ObjectID key_ids[keys.size()];
+ plasma::ObjectBuffer obj_buffers[keys.size()];
+
+ assert(is_initialized_);
+
+ for (size_t i = 0; i < keys.size(); i++) {
+ assert(keys[i].length() <= KeySizeLimit());
+ key_ids[i] = KeyToPlasmaId(keys[i], KeySizeLimit());
+ }
+
+ auto plasma_status = plasma_client_->Get(key_ids, keys.size(),
+ options_.waitting_ms, obj_buffers);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ for (size_t i = 0; i < keys.size(); i++) {
+ BatchBuffer batch_buffer;
+ if (!obj_buffers[i].data) {
+ batch_buffer.not_exist = true;
+ } else {
+ batch_buffer.not_exist = false;
+ batch_buffer.buffer = (const char *)obj_buffers[i].data->data();
+ batch_buffer.buffer_len = obj_buffers[i].data->size();
+ batch_buffer.meta = (const char *)obj_buffers[i].metadata->data();
+ batch_buffer.meta_len = obj_buffers[i].metadata->size();
+ }
+ batchs.emplace_back(batch_buffer);
+ }
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Release(const std::string &key) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+ auto plasma_status = plasma_client_->Release(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Release(const std::vector &keys) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+
+ assert(is_initialized_);
+ for (const auto &key : keys) {
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+ auto plasma_status = plasma_client_->Release(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+ }
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Delete(const std::string &key) {
+ PaxCache::Status status;
+ plasma::ObjectID key_id;
+
+ assert(is_initialized_);
+ assert(key.length() <= KeySizeLimit());
+ key_id = KeyToPlasmaId(key, KeySizeLimit());
+ plasma::Status plasma_status = plasma_client_->Delete(key_id);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Delete(const std::vector &keys) {
+ PaxCache::Status status;
+ std::vector key_ids;
+ std::vector obj_buffers;
+
+ assert(is_initialized_);
+ for (const auto &key : keys) {
+ key_ids.emplace_back(KeyToPlasmaId(key, KeySizeLimit()));
+ }
+
+ plasma::Status plasma_status = plasma_client_->Delete(key_ids);
+ CHECK_PLASMA_STATUS(plasma_status, status);
+
+ return status;
+}
+
+PaxCache::Status PaxPlasmaCache::Destroy() {
+ PaxCache::Status status;
+ plasma::Status plasma_status = plasma_client_->Disconnect();
+ assert(is_initialized_);
+ is_initialized_ = false;
+ CHECK_PLASMA_STATUS(plasma_status, status);
+ return status;
+}
+
+size_t PaxPlasmaCache::KeySizeLimit() { return plasma::kUniqueIDSize; }
+
+} // namespace pax
+
+#endif // ENABLE_PLASMA
diff --git a/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.h b/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.h
new file mode 100644
index 00000000000..ab3555a4461
--- /dev/null
+++ b/contrib/pax_storage/src/cpp/storage/cache/pax_plasma_cache.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#ifdef ENABLE_PLASMA
+
+#include
+#include
+
+#include "storage/cache/pax_cache.h"
+
+namespace plasma {
+class PlasmaClient;
+}
+
+namespace pax {
+
+#define CHECK_PLASMA_STATUS(plasma_status, status_rc) \
+ do { \
+ if (!(plasma_status).ok()) { \
+ (status_rc).SetError((plasma_status).ToString()); \
+ return (status_rc); \
+ } \
+ } while (0);
+
+class PaxPlasmaCache : public PaxCache {
+ public:
+ struct CacheOptions {
+ std::string domain_socket;
+ // client name + memory quota will limit current client memory used
+ // if memory_quota_ is 0 means no limit
+ // Notice that: if current plasma server capcity LT memory quota
+ // Then it will make Initialize failed
+ std::string client_name;
+ size_t memory_quota = 0;
+
+ // the waitting time after `Get` call failed
+ // during this period, if the same `key` is put,
+ // the data will be obtained
+ size_t waitting_ms = 0;
+ };
+
+ explicit PaxPlasmaCache(const CacheOptions &option);
+
+ ~PaxPlasmaCache() override;
+
+ PaxCache::Status Initialize() override;
+
+ PaxCache::Status Put(const std::string &key,
+ const BatchBuffer &batch_buffer) override;
+
+ PaxCache::Status Put(const std::string &key,
+ const std::vector> &buffers,
+ const std::pair &meta) override;
+
+ PaxCache::Status Exists(const std::string &key, bool *has) override;
+
+ PaxCache::Status Get(const std::string &key,
+ BatchBuffer &batch_buffer) override;
+
+ PaxCache::Status Get(const std::vector &keys,
+ std::vector &batchs) override;
+
+ PaxCache::Status Release(const std::string &key) override;
+
+ PaxCache::Status Release(const std::vector &keys) override;
+
+ PaxCache::Status Delete(const std::string &key) override;
+
+ PaxCache::Status Delete(const std::vector &keys) override;
+
+ PaxCache::Status Destroy() override;
+
+ size_t KeySizeLimit() override;
+
+ private:
+ CacheOptions options_;
+ bool is_initialized_;
+ plasma::PlasmaClient *plasma_client_;
+};
+
+} // namespace pax
+
+#endif // ENABLE_PLASMA
diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc
index 04eba6feb42..2ad0a7bfa57 100644
--- a/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc
+++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc
@@ -6,125 +6,87 @@
#include
#include
-#include "comm/pax_defer.h"
+#include "storage/columns/pax_column_traits.h"
+#include "storage/pax_defined.h"
namespace pax {
PaxColumn::PaxColumn()
: null_bitmap_(nullptr),
+ total_rows_(0),
+ non_null_rows_(0),
encoded_type_(ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED),
- storage_type_(PaxColumnStorageType::kTypeStorageNonVec) {}
+ compress_level_(0),
+ type_align_size_(PAX_DATA_NO_ALIGN) {}
-PaxColumn::~PaxColumn() {
- if (null_bitmap_) {
- delete null_bitmap_;
- }
-}
+PaxColumn::~PaxColumn() { PAX_DELETE(null_bitmap_); }
PaxColumnTypeInMem PaxColumn::GetPaxColumnTypeInMem() const {
return PaxColumnTypeInMem::kTypeInvalid;
}
-void PaxColumn::Clear() {
- if (null_bitmap_) {
- delete null_bitmap_;
- null_bitmap_ = nullptr;
- }
-}
-
bool PaxColumn::HasNull() { return null_bitmap_ != nullptr; }
-void PaxColumn::SetNulls(DataBuffer *null_bitmap) {
+bool PaxColumn::AllNull() const {
+ return null_bitmap_ && null_bitmap_->Empty();
+}
+
+void PaxColumn::SetBitmap(Bitmap8 *null_bitmap) {
Assert(!null_bitmap_);
null_bitmap_ = null_bitmap;
}
-DataBuffer *PaxColumn::GetNulls() const { return null_bitmap_; }
-
-std::pair PaxColumn::GetRangeNulls(size_t start_pos,
- size_t len) {
- Assert(null_bitmap_);
- CBDB_CHECK((start_pos + len) <= GetRows(),
- cbdb::CException::ExType::kExTypeOutOfRange);
+size_t PaxColumn::GetRows() const { return total_rows_; }
- static_assert(sizeof(char) == sizeof(bool));
- return std::make_pair(null_bitmap_->GetBuffer() + start_pos, len);
-}
+size_t PaxColumn::GetNonNullRows() const { return non_null_rows_; }
-size_t PaxColumn::GetRows() {
- return null_bitmap_ ? null_bitmap_->Used() : GetNonNullRows();
-}
+void PaxColumn::SetRows(size_t total_rows) { total_rows_ = total_rows; }
size_t PaxColumn::GetRangeNonNullRows(size_t start_pos, size_t len) {
CBDB_CHECK((start_pos + len) <= GetRows(),
cbdb::CException::ExType::kExTypeOutOfRange);
- if (null_bitmap_) {
- size_t total_non_null = 0;
- for (size_t i = start_pos; i < (start_pos + len); i++) {
- if ((*null_bitmap_)[i]) {
- total_non_null++;
- }
- }
-
- return total_non_null;
- } else {
- return len;
+ if (!null_bitmap_) return len;
+ if (len == 0) {
+ return 0;
}
+ return null_bitmap_->CountBits(start_pos, start_pos + len - 1);
+}
+
+void PaxColumn::CreateNulls(size_t cap) {
+ Assert(!null_bitmap_);
+ null_bitmap_ = PAX_NEW(cap);
+ null_bitmap_->SetN(total_rows_);
}
void PaxColumn::AppendNull() {
if (!null_bitmap_) {
- size_t current_rows = GetNonNullRows();
- size_t size = current_rows > DEFAULT_CAPACITY
- ? (current_rows / DEFAULT_CAPACITY + 1) * DEFAULT_CAPACITY
- : DEFAULT_CAPACITY;
- null_bitmap_ = new DataBuffer(size);
- null_bitmap_->Brush(current_rows * sizeof(bool));
- memset(null_bitmap_->GetBuffer(), 1, null_bitmap_->Capacity());
+ CreateNulls(DEFAULT_CAPACITY);
}
-
- if (null_bitmap_->Available() == 0) {
- size_t old_cap = null_bitmap_->Capacity();
- null_bitmap_->ReSize(old_cap * 2);
- memset(null_bitmap_->GetAvailableBuffer(), 1, old_cap);
- }
-
- null_bitmap_->Write(false);
- null_bitmap_->Brush(sizeof(bool));
+ null_bitmap_->Clear(total_rows_);
+ ++total_rows_;
}
-void PaxColumn::Append([[maybe_unused]] char *buffer,
- [[maybe_unused]] size_t size) {
- if (null_bitmap_) {
- if (null_bitmap_->Available() == 0) {
- size_t old_cap = null_bitmap_->Capacity();
- null_bitmap_->ReSize(old_cap * 2);
- memset(null_bitmap_->GetAvailableBuffer(), 1, old_cap);
- }
- null_bitmap_->Brush(sizeof(bool));
- }
+void PaxColumn::Append(char * /*buffer*/, size_t /*size*/) {
+ if (null_bitmap_) null_bitmap_->Set(total_rows_);
+ ++total_rows_;
+ ++non_null_rows_;
}
-PaxColumn *PaxColumn::SetColumnEncodeType(ColumnEncoding_Kind encoding_type) {
- encoded_type_ = encoding_type;
- return this;
-}
+size_t PaxColumn::GetAlignSize() const { return type_align_size_; }
-PaxColumn *PaxColumn::SetColumnStorageType(PaxColumnStorageType storage_type) {
- storage_type_ = storage_type;
- return this;
+void PaxColumn::SetAlignSize(size_t align_size) {
+ Assert(align_size > 0 && (align_size & (align_size - 1)) == 0);
+ type_align_size_ = align_size;
}
-ColumnEncoding_Kind PaxColumn::GetEncodingType() const { return encoded_type_; }
-
template
-PaxCommColumn::PaxCommColumn(uint64 capacity) : capacity_(capacity) {
- data_ = new DataBuffer(capacity * sizeof(T));
+PaxCommColumn::PaxCommColumn(uint32 capacity) {
+ data_ = PAX_NEW>(capacity * sizeof(T));
}
template
PaxCommColumn::~PaxCommColumn() {
- delete data_;
+ PAX_DELETE(data_);
}
template // NOLINT: redirect constructor
@@ -132,7 +94,7 @@ PaxCommColumn::PaxCommColumn() : PaxCommColumn(DEFAULT_CAPACITY) {}
template
void PaxCommColumn::Set(DataBuffer *data) {
- delete data_;
+ PAX_DELETE(data_);
data_ = data;
}
@@ -145,10 +107,10 @@ void PaxCommColumn::Append(char *buffer, size_t size) {
// TODO(jiaqizho): Is it necessary to support multiple buffer insertions for
// bulk insert push to mirco partition?
Assert(size == sizeof(T));
- Assert(GetNonNullRows() <= capacity_);
+ Assert(data_->Capacity() >= sizeof(T));
- if (GetNonNullRows() == capacity_) {
- ReSize(capacity_ * 2);
+ if (data_->Available() == 0) {
+ data_->ReSize(data_->Used() + size, 2);
}
data_->Write(buffer_t, sizeof(T));
@@ -156,22 +118,13 @@ void PaxCommColumn::Append(char *buffer, size_t size) {
}
template
-PaxColumnTypeInMem PaxCommColumn::GetPaxColumnTypeInMem() const {
- return PaxColumnTypeInMem::kTypeFixed;
-}
-
-template
-void PaxCommColumn::Clear() {
- PaxColumn::Clear();
- data_->BrushBackAll();
+PaxStorageFormat PaxCommColumn::GetStorageFormat() const {
+ return PaxStorageFormat::kTypeStorageOrcNonVec;
}
template
-void PaxCommColumn