diff --git a/contrib/pax_storage/.ci/tf/qingcloud-provider.tf b/contrib/pax_storage/.ci/tf/qingcloud-provider.tf new file mode 100644 index 00000000000..02941a05be8 --- /dev/null +++ b/contrib/pax_storage/.ci/tf/qingcloud-provider.tf @@ -0,0 +1,86 @@ +variable "qingcloud_access_key" { + sensitive = true + type = string +} + +variable "qingcloud_secret_key" { + sensitive = true + type = string +} + +variable "qingcloud_zone" { + default = "pek3c" +} + +variable "instance_name" { + default = "ci" +} + +variable "instance_image" { + default = "img-qbpas5m2" +} + +variable "instance_class" { + default = 202 +} + +variable "instance_cpu" { + default = 16 +} + +variable "instance_memory" { + default = 16384 +} + +variable "instance_os_disk_size" { + default = 100 +} + +variable "instance_vxnet" { + default = "vxnet-5tjdylj" +} + +variable "instance_keypair" { + default = [ + "kp-o07unn26"] +} + +terraform { + required_providers { + qingcloud = { + source = "HashDataInc/qingcloud" + version = "1.2.7" + } + ansible = { + source = "nbering/ansible" + version = "1.0.4" + } + } +} + +provider "qingcloud" { + access_key = var.qingcloud_access_key + secret_key = var.qingcloud_secret_key + zone = var.qingcloud_zone +} + + +resource "qingcloud_instance" "ci" { + name = var.instance_name + image_id = var.instance_image + instance_class = var.instance_class + cpu = var.instance_cpu + memory = var.instance_memory + os_disk_size = var.instance_os_disk_size + managed_vxnet_id = var.instance_vxnet + keypair_ids = var.instance_keypair +} + +resource "ansible_host" "ci" { + inventory_hostname = qingcloud_instance.ci.private_ip + groups = [ + "runner"] + vars = { + ansible_user = "root" + } +} diff --git a/contrib/pax_storage/.clang-format b/contrib/pax_storage/.clang-format new file mode 100644 index 00000000000..aadee9ee61d --- /dev/null +++ b/contrib/pax_storage/.clang-format @@ -0,0 +1,175 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '"postgres\.h"' + Priority: -1 + - Regex: '^' + Priority: -1 + - Regex: '"comm/cbdb_api\.h"' + Priority: 1 + - Regex: '^<.*\.h>' + Priority: 2 + SortPriority: 0 + - Regex: '^<.*' + Priority: 3 + SortPriority: 0 + - Regex: '^' + Priority: 4 + SortPriority: 0 + - Regex: '.*' + Priority: 5 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentGotoLabels: true +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +ReferenceAlignment: Pointer +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +... + diff --git a/contrib/pax_storage/.clang-tidy b/contrib/pax_storage/.clang-tidy new file mode 100644 index 00000000000..6b6594d4fb7 --- /dev/null +++ b/contrib/pax_storage/.clang-tidy @@ -0,0 +1,228 @@ +Checks: '-*, + misc-throw-by-value-catch-by-reference, + misc-unconventional-assign-operator, + misc-redundant-expression, + misc-static-assert, + misc-unconventional-assign-operator, + misc-uniqueptr-reset-release, + misc-unused-alias-decls, + misc-unused-parameters, + misc-unused-using-decls, + + modernize-avoid-bind, + modernize-loop-convert, + modernize-make-shared, + modernize-make-unique, + modernize-raw-string-literal, + modernize-redundant-void-arg, + modernize-replace-auto-ptr, + modernize-replace-random-shuffle, + modernize-use-auto, + modernize-use-bool-literals, + modernize-use-using, + modernize-use-override, + modernize-use-equals-default, + modernize-use-equals-delete, + + performance-faster-string-find, + performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-constructor-init, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, + + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-convert-member-functions-to-static, + readability-deleted-default, + readability-make-member-function-const, + readability-misplaced-array-index, + readability-non-const-parameter, + readability-redundant-control-flow, + readability-redundant-function-ptr-dereference, + readability-redundant-smartptr-get, + readability-redundant-string-cstr, + readability-redundant-string-init, + readability-static-definition-in-anonymous-namespace, + readability-string-compare, + readability-uniqueptr-delete-release, + readability-redundant-member-init, + readability-simplify-subscript-expr, + readability-simplify-boolean-expr, + readability-inconsistent-declaration-parameter-name, + readability-identifier-naming, + + bugprone-undelegated-constructor, + bugprone-argument-comment, + bugprone-bad-signal-to-kill-thread, + bugprone-bool-pointer-implicit-conversion, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-forward-declaration-namespace, + bugprone-fold-init-type, + bugprone-inaccurate-erase, + bugprone-incorrect-roundings, + bugprone-infinite-loop, + bugprone-integer-division, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-operator-in-strlen-in-alloc, + bugprone-misplaced-pointer-artithmetic-in-alloc, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-parent-virtual-call, + bugprone-posix-return, + bugprone-reserved-identifier, + bugprone-signed-char-misuse, + bugprone-sizeof-container, + bugprone-sizeof-expression, + bugprone-string-constructor, + bugprone-string-integer-assignment, + bugprone-string-literal-with-embedded-nul, + bugprone-suspicious-enum-usage, + bugprone-suspicious-include, + bugprone-suspicious-memset-usage, + bugprone-suspicious-missing-comma, + bugprone-suspicious-string-compare, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-throw-keyword-missing, + bugprone-too-small-loop-variable, + bugprone-undefined-memory-manipulation, + bugprone-unhandled-self-assignment, + bugprone-unused-raii, + bugprone-unused-return-value, + bugprone-use-after-move, + bugprone-virtual-near-miss, + bugprone-assert-side-effect, + + cert-dcl21-cpp, + cert-env33-c, + cert-err34-c, + cert-err52-cpp, + cert-flp30-c, + cert-mem57-cpp, + cert-msc50-cpp, + cert-oop58-cpp, + + google-build-explicit-make-pair, + google-build-namespaces, + google-default-arguments, + google-explicit-constructor, + google-readability-avoid-underscore-in-googletest-name, + google-runtime-int, + google-runtime-operator, + + hicpp-exception-baseclass, + + clang-analyzer-core.CallAndMessage, + clang-analyzer-core.DivideZero, + clang-analyzer-core.NonNullParamChecker, + clang-analyzer-core.NullDereference, + clang-analyzer-core.StackAddressEscape, + clang-analyzer-core.UndefinedBinaryOperatorResult, + clang-analyzer-core.VLASize, + clang-analyzer-core.uninitialized.ArraySubscript, + clang-analyzer-core.uninitialized.Assign, + clang-analyzer-core.uninitialized.Branch, + clang-analyzer-core.uninitialized.CapturedBlockVariable, + clang-analyzer-core.uninitialized.UndefReturn, + clang-analyzer-cplusplus.InnerPointer, + clang-analyzer-cplusplus.NewDelete, + clang-analyzer-cplusplus.PlacementNewChecker, + clang-analyzer-cplusplus.SelfAssignment, + clang-analyzer-deadcode.DeadStores, + clang-analyzer-optin.cplusplus.VirtualCall, + clang-analyzer-security.insecureAPI.UncheckedReturn, + clang-analyzer-security.insecureAPI.bcmp, + clang-analyzer-security.insecureAPI.bcopy, + clang-analyzer-security.insecureAPI.bzero, + clang-analyzer-security.insecureAPI.getpw, + clang-analyzer-security.insecureAPI.gets, + clang-analyzer-security.insecureAPI.mkstemp, + clang-analyzer-security.insecureAPI.mktemp, + clang-analyzer-security.insecureAPI.rand, + clang-analyzer-security.insecureAPI.strcpy, + clang-analyzer-unix.Malloc, + clang-analyzer-unix.MallocSizeof, + clang-analyzer-unix.MismatchedDeallocator, + clang-analyzer-unix.Vfork, + clang-analyzer-unix.cstring.BadSizeArg, + clang-analyzer-unix.cstring.NullArg, + + boost-use-to-string, + + cppcoreguidelines-pro-type-member-init, + cppcoreguidelines-no-malloc, + cppcoreguidelines-virtual-class-destructor, +' +WarningsAsErrors: '*' + +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.ClassMemberCase + value: lower_case + - key: readability-identifier-naming.ConstexprVariableCase + value: CamelCase + - key: readability-identifier-naming.ConstexprVariablePrefix + value: k + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.EnumConstantCase + value: CamelCase + - key: readability-identifier-naming.EnumConstantPrefix + value: k + - key: readability-identifier-naming.FunctionCase + value: CamelCase + - key: readability-identifier-naming.GlobalConstantCase + value: CamelCase + - key: readability-identifier-naming.GlobalConstantPrefix + value: k + - key: readability-identifier-naming.StaticConstantCase + value: CamelCase + - key: readability-identifier-naming.StaticConstantPrefix + value: k + - key: readability-identifier-naming.StaticVariableCase + value: lower_case + - key: readability-identifier-naming.MacroDefinitionCase + value: UPPER_CASE + - key: readability-identifier-naming.MacroDefinitionIgnoredRegexp + value: '^[A-Z]+(_[A-Z]+)*_$' + - key: readability-identifier-naming.MemberCase + value: lower_case + - key: readability-identifier-naming.PrivateMemberSuffix + value: _ + - key: readability-identifier-naming.ProtectedMemberSuffix + value: _ + - key: readability-identifier-naming.PublicMemberSuffix + value: '' + - key: readability-identifier-naming.NamespaceCase + value: lower_case + - key: readability-identifier-naming.ParameterCase + value: lower_case + - key: readability-identifier-naming.TypeAliasCase + value: CamelCase + - key: readability-identifier-naming.TypedefCase + value: CamelCase + - key: readability-identifier-naming.VariableCase + value: lower_case + - key: readability-identifier-naming.IgnoreMainLikeFunctions + value: 1 + - key: bugprone-assert-side-effect.AssertMacros + value: assert,DCHECK + - key: bugprone-dangling-handle.HandleClasses + value: ::std::basic_string_view;::std::span;::absl::string_view;::base::BasicStringPiece;::base::span + - key: bugprone-string-constructor.StringNames + value: ::std::basic_string;::std::basic_string_view;::base::BasicStringPiece;::absl::string_view + - key: modernize-use-default-member-init.UseAssignment + value: 1 + - key: modernize-use-transparent-functors.SafeMode + value: 1 + - key: modernize-use-emplace.IgnoreImplicitConstructors + value: 1 \ No newline at end of file diff --git a/contrib/pax_storage/.githooks/pre-push b/contrib/pax_storage/.githooks/pre-push new file mode 100755 index 00000000000..22bebb8148b --- /dev/null +++ b/contrib/pax_storage/.githooks/pre-push @@ -0,0 +1,25 @@ +#!/bin/sh +# +# Verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# + +rc=0 + +if [ -x "./tools/cpplint.py" ]; then + echo "Running cpplint ..." + mkdir -p .tmp/ + ./tools/cpplint.py --counting=detailed --recursive . > .tmp/cpplint.log 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + tail -n 1 .tmp/cpplint.log + echo "" + echo "ERROR cpplint returned errors!" + echo "ERROR Fix the problem and use 'git add' to update your changes." + echo "ERROR See `pwd`/.tmp/cpplint.log for more information." + echo "" + fi +fi + +exit $rc \ No newline at end of file diff --git a/contrib/pax_storage/.gitignore b/contrib/pax_storage/.gitignore new file mode 100644 index 00000000000..76807d0dbc3 --- /dev/null +++ b/contrib/pax_storage/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files +**/*.so +**/*.o +**/*.lai +**/*.la +**/*.a +**/*.lib +# vs-code +**/.vscode +# OSX +**/.DS_Store +Thumbs.db + +# Temp files dir +.tmp/** +build*/** +clang-tidy.result +# pb +**/*.pb.h +**/*.pb.cc + +# Executables +*.out +!src/data/expected/*.out diff --git a/contrib/pax_storage/.gitlab-ci.yml b/contrib/pax_storage/.gitlab-ci.yml new file mode 100644 index 00000000000..51415fcc444 --- /dev/null +++ b/contrib/pax_storage/.gitlab-ci.yml @@ -0,0 +1,71 @@ +stages: + - build + +.global_variables: &global_variables + # Runner instance name, passed to Terraform + TF_VAR_instance_name: "cbdb-test-pipeline-${CI_PIPELINE_ID}-job-${CI_JOB_ID}" + TF_VAR_qingcloud_access_key: "key" + TF_VAR_qingcloud_secret_key: "secret" + # Custom clone path on runner instance + GIT_SUBMODULE_STRATEGY: "normal" + GIT_DEPTH: 0 + CI_USER: root + # For internal deploy + ARTIFACTORY_USERNAME: "admin" + ARTIFACTORY_PASSWORD: "token" + AWS_ACCESS_KEY_ID: "${TF_VAR_qingcloud_access_key}" + AWS_SECRET_ACCESS_KEY: "${TF_VAR_qingcloud_secret_key}" + GIT_CLONE_PATH: "/code/gpdb_pax_src" + # cbdb project dir + CBDB_PROJECT_DIR: "/code/gpdb_src" + # For artifacts + BUCKET_INTERMEDIATE: "http://artifactory.hashdata.xyz/artifactory/hashdata-repository/intermediate-artifacts" + # For pax storage project + CBDB_PAX_BRANCH: $CI_COMMIT_BRANCH + +.build_script: &build_script + script: | + git clone -b feature-pax https://buildbot:Passw0rd@code.hashdata.xyz/cloudberry/cbdb.git $CBDB_PROJECT_DIR + cd /code/gpdb_src + git submodule update --init --recursive + cd /code + echo "${CI_PIPELINE_ID}" > ${CBDB_PROJECT_DIR}/BUILD_NUMBER + bash ${CBDB_PROJECT_DIR}/hd-ci/compile_cbdb.bash + bash ${GIT_CLONE_PATH}/hd-ci/compile_pax.bash + bash ${GIT_CLONE_PATH}/hd-ci/clang_tidy_pax.bash + cp ${CBDB_PROJECT_DIR}/cbdb-artifacts.txt ${CI_PROJECT_DIR}/cbdb-artifacts.txt + touch /code/CI_STATUS + +.build_artifacts: &build_artifacts + artifacts: + name: "artifacts" + when: always + paths: + - ${CI_PROJECT_DIR}/cbdb-artifacts.txt + - ${GIT_CLONE_PATH}/clang-tidy.result + reports: + dotenv: ${CI_PROJECT_DIR}/cbdb-artifacts.txt + +.cbdb_test_rules: &cbdb_test_rules + rules: + - if: $CI_COMMIT_TAG + when: never + - if: '$RUN_NIGHTLY_BUILD == "true"' + when: always + - if: '$RUN_TEST_BUILD == "true"' + when: always + - if: '$CI_PIPELINE_SOURCE == "pipeline"' + when: always + - when: always + +x86_64:build: + stage: build + variables: + <<: *global_variables + <<: *build_script + <<: *build_artifacts + <<: *cbdb_test_rules + timeout: 8 hours + retry: + max: 2 + when: always diff --git a/contrib/pax_storage/.gitmodules b/contrib/pax_storage/.gitmodules new file mode 100644 index 00000000000..884a7c5972b --- /dev/null +++ b/contrib/pax_storage/.gitmodules @@ -0,0 +1,7 @@ +[submodule "src/cpp/contrib/googletest"] + path = src/cpp/contrib/googletest + url = https://code.hashdata.xyz/cloudberry/googletest +[submodule "src/cpp/contrib/zstd"] + path = src/cpp/contrib/zstd + url = https://code.hashdata.xyz/cloudberry/lib_zstd.git + branch = v1.5.5 diff --git a/contrib/pax_storage/CMakeLists.txt b/contrib/pax_storage/CMakeLists.txt new file mode 100644 index 00000000000..b4ab18181ea --- /dev/null +++ b/contrib/pax_storage/CMakeLists.txt @@ -0,0 +1,74 @@ +project(Pax) +cmake_minimum_required (VERSION 3.11.0) +set(CMAKE_CXX_STANDARD 14) + +find_program( + PG_CONFIG pg_config + HINTS ${PG_PATH} + PATH_SUFFIXES bin + DOC "The path to the pg_config of the CBDB version to compile against") + +if(NOT PG_CONFIG) + message(FATAL_ERROR "Unable to find 'pg_config'") +endif() + +# Function to call pg_config and extract values. +function(GET_PG_CONFIG var) + set(_temp) + + # Only call pg_config if the variable didn't already have a value. + if(NOT ${var}) + execute_process( + COMMAND ${PG_CONFIG} ${ARGN} + OUTPUT_VARIABLE _temp + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + + set(${var} + ${_temp} + PARENT_SCOPE) +endfunction() + +# Get CBDB configuration from pg_config +get_pg_config(PG_INCLUDEDIR --includedir) + +# TODO check exists if this is needed +set(CBDB_INCLUDE_DIR ${PG_INCLUDEDIR}/postgresql/server) + +# Debug options +option(ENBALE_DEBUG "Enable debug" ON) + +# Build gtest options +option(BUILD_GTEST "Build with google test" ON) + +# Build pax format lib +option(BUILD_PAX_FORMAT "Build pax format lib" OFF) + + +if (ENBALE_DEBUG) + ADD_DEFINITIONS(-DENBALE_DEBUG) + # Use to build compile_commands.json + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + SET(CMAKE_BUILD_TYPE "Debug") + SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") + SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +else() + SET(CMAKE_BUILD_TYPE "Release") + # no need build gtest in release mode + SET(BUILD_GTEST OFF) +endif(ENBALE_DEBUG) + +# Vec options +option(VEC_BUILD "Build pax vectorization version" OFF) +set(VEC_HOME "" CACHE STRING "Path to vectorization home") +if (VEC_BUILD) + +if("${VEC_HOME}" STREQUAL "") + message(FATAL_ERROR "No found vectorization home setting. Using -DVEC_HOME to spec vectorization home") +endif() + +set(CBDB_ROOT_INCLUDE_DIR ${PG_INCLUDEDIR}) +ADD_DEFINITIONS(-DVEC_BUILD) + +endif(VEC_BUILD) +add_subdirectory(src/cpp) diff --git a/contrib/pax_storage/CPPLINT.cfg b/contrib/pax_storage/CPPLINT.cfg new file mode 100644 index 00000000000..5afc027850a --- /dev/null +++ b/contrib/pax_storage/CPPLINT.cfg @@ -0,0 +1,15 @@ + +# Don't search for additional CPPLINT.cfg in parent directories. +set noparent + +# Limit line length. +linelength=120 + +# Setting current dir as root +root=. + +# Lint filter defined +filter=-legal/copyright,-runtime/arrays,-build/include_order + +# excludes files or dir +exclude_files=build* diff --git a/contrib/pax_storage/README.md b/contrib/pax_storage/README.md new file mode 100644 index 00000000000..f26eeaf11be --- /dev/null +++ b/contrib/pax_storage/README.md @@ -0,0 +1,210 @@ +# Micro Partition + +## Overview + +- Storage + - Provides efficient data access services for the computing layer, which is more conducive to the data format processed by the computing layer + - Multiple data format support, data openness (third-party tools can be directly read through files) +- Cache + - Provides consistent caching for external table + - Cache consistency +- Dynamic expansion/contraction (second level) +- Compatible with CBDB non-cloud version code + + +## Configuration and build + +You **must** enable the pre-push hooks to automatically check format: + +``` +cp .githooks/* .git/hooks/` +``` + +Also before `git push` +``` +clang-format -i {your changed code} +``` + +### Build PAX + +1. make sure you have already build and install `cbdb` in your env +2. already `source greenplum_path.sh` +3. follow below steps + +``` +git submodule update --init + +mkdir build +cd build +cmake .. +make -j +``` + +### Build GTEST + +1. make sure already build pax with cmake option `-DBUILD_GTEST=on`, default value is on +2. better with debug cmake option `-DENBALE_DEBUG=on`, default value is on +3. run tests + +``` +cd build +./src/cpp/test_main +``` + +### Build extension + +1. After build PAX, `pax.so` will be generated in `src/data` +2. follow below steps + +``` +cd src/data +make install -j +make installcheck +``` +Before starting working with Pax, adding the following line to postgresql.conf is required. This change requires a restart of the PostgreSQL database server. +``` +shared_preload_libraries = 'pax.so' +``` + +## GTEST accesses internal functions/variables + +1. Using marco `RUN_GTEST` to make protected/private functions/variables public. +ex. + +**obj.h**: + +``` +class A { + public: + function a(); + +#ifndef RUN_GTEST + protected: +#endif + function b(); + +#ifndef RUN_GTEST + private: +#endif + int c; +} +``` + +**obj_test.cc**: + +``` +#include "obj.h" + +TEST_F(Example, test) { + A a; + a.a(); // access public function + a.b(); // access protected function + a.c; // access private variables +} +``` + +3. Will generate temp file in disk? +- use relative paths +- generate temporary files in `SetUp` and delete files in `TearDown` + - if generated files in test body, please delete it at the end of the test +- please make sure that no junk files remain after `gtest` + +3. Using `gmock` to mock a class + +### exception && try catch + +There are two way to throw a exception +1. `CBDB_RAISE(ExType)`: direct throw +2. `CBDB_CHECK(check_exp, ExType)`: check failed, then throw + +About try catch, you need to know + 1. Expected exceptions, catcher can handle it. + - Do not `rethrow` it + - Should do `try...catch` to handle it + - Better not to write logic in `try...catch`, but use the return value to cover the logic + - like: network problem... + 2. Unexpected exceptions + - Thinking should we add `try...catch` to handle it? + - have some global resources must do `try...catch` to release it + - memory in top memory context or session memory context + - opened fd + - static resources + - do not have any global resources + - just throw it without `try...catch` + - like: logic error, out of range error... + 3. Do not use `catch(...)` in c++ code + - expect access method layer + - used `catch(...)` in below access method will drop the current stack/tracker. + +About `ereport/elog(ERROR)`, you need to know + 1. Better not use `ereport/elog(ERROR)` in c++ code + - can not use `try...catch` handle it + - make sure resource have been clean up before call `ereport/elog(ERROR)` + 2. use it as a panic + +example here: +1. Expected exceptions + +``` +void RequestResources(bool should_retry) { + RequestTempArgs args; + + // have some alloc in it + InitRequestTempArgs(&args); + + try { + DoHttpRequest(); + } catch { + // free the resource and retry + DestroyRequestTempArgs(&args); + if (should_retry) { + RequestResources(false); + } + } +} + +``` + +2. Unexpected exceptions with global resources + +``` +static ReadContext *context; + +void InitReadContext() { + context = palloc(TopMemoryContext, sizeof(ReadContext)); +} + +void ReadResources() { + Assert(context); + try { + ReadResource(context); + } catch { + // should destroy global resource + // otherwise, will got resource leak in InitReadContext() + DestroyReadContext(context); + throw CurrentException(); + } +} +``` + +3. Unexpected exceptions without global resources + +``` +void ParseResource(Resource * res, size_t offset) { + // direct throw without any try...catch + CBDB_CHECK(offset > res->size(), kExTypeOutOfRange); + ... // normal logic +} +``` + +### Others + +1. please change all `auto *` to `auto` + - notice that `auto` and `auto &` are different +2. split logic code into `.cc` + - should not add logic code(logic in class) into `.h`, `clang-format`/`cpplint` won't detect it + - except `inline`/`static` method +3. don't make the constructor too bloated + - some parameters can be passed through `Set` + - in the method starting with `Set`, if necessary, add a `Assert` to ensure that the parameter is only passed once + - consider using a factory method to construct the object diff --git a/contrib/pax_storage/hd-ci/clang_tidy_pax.bash b/contrib/pax_storage/hd-ci/clang_tidy_pax.bash new file mode 100644 index 00000000000..6d94247c41d --- /dev/null +++ b/contrib/pax_storage/hd-ci/clang_tidy_pax.bash @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -exo pipefail +CBDB_PAX_SRC_PATH="/code/gpdb_pax_src" +CBDB_PAX_DEV_BRANCH="origin/dev" +CBDB_PAX_EXT=("*\.cc" "*\.h") +CBDB_PAXC_GREP="paxc_" +CLANG_TIDY_OUT_FILE_NAME="clang-tidy.result" + +modified_exts=() + +function do_git_diff() { + current_commit=$(git rev-parse HEAD) + dev_branch_commit=$(git rev-parse $CBDB_PAX_DEV_BRANCH) + if [ "$current_commit" = "$dev_branch_commit" ]; then + echo "Current commit is the '$CBDB_PAX_DEV_BRANCH' branch." + exit 0 + fi + + modified_files=$(git diff --name-only $CBDB_PAX_DEV_BRANCH) + for extension in "${CBDB_PAX_EXT[@]}"; do + if echo "$modified_files" | grep -E -e "$extension" | grep -q -v "$CBDB_PAXC_GREP"; then + files=$(echo "$modified_files" | grep -E -e "$extension" | grep -v "$CBDB_PAXC_GREP") + if [ -z "$files" ]; then + continue + fi + for file in $files; do + if [ -e "$file" ]; then + modified_exts+=("$file") + fi + done + fi + done +} + +function run_clang_tidy() { + if [ -z "$modified_exts" ]; then + return 0 + fi + + echo "clang-tidy result will generate in $(pwd)/$CLANG_TIDY_OUT_FILE_NAME" + clang-tidy -p build/ ${modified_exts[@]} > $CLANG_TIDY_OUT_FILE_NAME 2>&1 +} + +function _main() { + pushd $CBDB_PAX_SRC_PATH + do_git_diff + run_clang_tidy + popd +} + +_main "$@" diff --git a/contrib/pax_storage/hd-ci/compile_pax.bash b/contrib/pax_storage/hd-ci/compile_pax.bash new file mode 100644 index 00000000000..7071cd609c1 --- /dev/null +++ b/contrib/pax_storage/hd-ci/compile_pax.bash @@ -0,0 +1,38 @@ +#!/bin/bash + +#!/usr/bin/env bash +set -exo pipefail +CBDB_INSTALL_PATH="/usr/local/cloudberry-db-devel" +CBDB_PAX_SRC_PATH="/code/gpdb_pax_src" +GPDB_PAX_UNIT_TEST_BIN="$CBDB_PAX_SRC_PATH/build/src/cpp/test_main" + +compile_pax() { + source $CBDB_INSTALL_PATH/greenplum_path.sh + mkdir -p $CBDB_PAX_SRC_PATH/build + pushd $CBDB_PAX_SRC_PATH/build + cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=1 .. + make + popd +} + +function compile_cmake() { + wget -O /root/cmake-3.25.1-linux-x86_64.tar.gz https://artifactory.hashdata.xyz/artifactory/utility/cmake-3.25.1-linux-x86_64.tar.gz + mkdir -p /root/cmake-3.25.1-linux-x86_64 + tar -xvf /root/cmake-3.25.1-linux-x86_64.tar.gz -C /root/cmake-3.25.1-linux-x86_64 + rm -rf /usr/bin/cmake + rm -rf /opt/rh/llvm-toolset-13.0/root/usr/bin/cmake + ln -s /root/cmake-3.25.1-linux-x86_64/cmake-3.25.1-linux-x86_64/bin/cmake /usr/bin/cmake + ln -s /root/cmake-3.25.1-linux-x86_64/cmake-3.25.1-linux-x86_64/bin/cmake /opt/rh/llvm-toolset-13.0/root/usr/bin/cmake +} + +function run_unit() { + $GPDB_PAX_UNIT_TEST_BIN +} + +main() { + compile_cmake + compile_pax + run_unit +} + +main diff --git a/contrib/pax_storage/src/cpp/CMakeLists.txt b/contrib/pax_storage/src/cpp/CMakeLists.txt new file mode 100644 index 00000000000..8ac5f846c7a --- /dev/null +++ b/contrib/pax_storage/src/cpp/CMakeLists.txt @@ -0,0 +1,242 @@ +cmake_minimum_required (VERSION 3.11.0) + +# protobuf +include(ExternalProject) +option(ORC_PREFER_STATIC_PROTOBUF "Prefer static protobuf library, if available" ON) +set(THIRDPARTY_CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}") +set(THIRDPARTY_DIR "${CMAKE_BINARY_DIR}/src/cpp/contrib") +set(THIRDPARTY_LOG_OPTIONS LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + LOG_DOWNLOAD 1) + +set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install") +set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") +set(PROTOBUF_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX} + -DCMAKE_INSTALL_LIBDIR=lib + -DBUILD_SHARED_LIBS=OFF + -Dprotobuf_BUILD_TESTS=OFF) + +set(PROTOBUF_CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} -DCMAKE_POSITION_INDEPENDENT_CODE=ON) +set(PROTOBUF_STATIC_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) +set(PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}") +message(STATUS "${PROTOBUF_STATIC_LIB}") +set(PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}") +set(PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}") + +set(PROTOBUF_CONFIGURE CONFIGURE_COMMAND "${THIRDPARTY_CONFIGURE_COMMAND}" ${PROTOBUF_CMAKE_ARGS} + "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-prefix/src/protobuf_ep/cmake") + +ExternalProject_Add(protobuf_ep + URL "https://artifactory.hashdata.xyz/artifactory/utility/protobuf-3.6.1.tar.gz" + ${PROTOBUF_CONFIGURE} + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}") + +set(PROTOBUF_LIBRARY ${PROTOBUF_STATIC_LIB}) +set(PROTOC_LIBRARY ${PROTOC_STATIC_LIB}) +set(PROTOBUF_VENDORED ON) +set(INSTALL_VENDORED_LIBS OFF) + +add_library (orc_protobuf INTERFACE) +add_library (orc::protobuf ALIAS orc_protobuf) +add_library (orc_protoc INTERFACE) +add_library (orc::protoc ALIAS orc_protoc) + +if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOBUF_STATIC_LIB}) + target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_STATIC_LIB}) +else () + target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_LIBRARY}) +endif() + +target_include_directories (orc_protobuf SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR}) + +if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOC_STATIC_LIB}) + target_link_libraries (orc_protoc INTERFACE ${PROTOC_STATIC_LIB}) +else () + target_link_libraries (orc_protoc INTERFACE ${PROTOC_LIBRARY}) +endif() + +target_include_directories (orc_protoc SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR}) + +if (PROTOBUF_VENDORED) + add_dependencies (orc_protoc protobuf_ep) + add_dependencies (orc_protobuf protobuf_ep) + if (INSTALL_VENDORED_LIBS) + install(FILES "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}" + DESTINATION "lib") + endif () +endif () + +set(orc_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.proto") +set(orc_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/orc_proto.pb.cc") + +set(pax_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.proto") +set(pax_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/pax.pb.cc") + +set(catalog_proto_file "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.proto") +set(stats_proto_src "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.pb.h" "${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/micro_partition_stats.pb.cc") + +add_custom_command(OUTPUT ${orc_proto_src} + COMMAND ${PROTOBUF_EXECUTABLE} + -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/ + --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/" + ${orc_proto_file}) + +add_custom_command(OUTPUT ${pax_proto_src} + COMMAND ${PROTOBUF_EXECUTABLE} + -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/ + --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/" + ${pax_proto_file}) + +add_custom_command(OUTPUT ${stats_proto_src} + COMMAND ${PROTOBUF_EXECUTABLE} + -I ${CMAKE_CURRENT_SOURCE_DIR}/storage/proto/ + --cpp_out="${CMAKE_CURRENT_SOURCE_DIR}/storage/proto" + ${catalog_proto_file}) + +add_custom_target(generate_protobuf DEPENDS ${orc_proto_src} ${pax_proto_src} ${stats_proto_src}) + +if (BUILD_GTEST AND NOT BUILD_PAX_FORMAT) + add_subdirectory(contrib/googletest) + ADD_DEFINITIONS(-DRUN_GTEST) + file(GLOB TEST_CASE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/*/*_test.cc + ${CMAKE_CURRENT_SOURCE_DIR}/*/*/*_test.cc) + + link_directories($ENV{GPHOME}/lib) + add_executable(test_main ${TEST_CASE_SOURCES}) + add_dependencies(test_main gtest gmock gtest_main) + target_include_directories(test_main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${gtest_SOURCE_DIR}/include) + target_link_libraries(test_main gtest gmock gtest_main postgres pax) +endif(BUILD_GTEST AND NOT BUILD_PAX_FORMAT) + +# ztsd +set(ZSTD_BUILD_PROGRAMS OFF) +set(ZSTD_BUILD_TESTS OFF) +set(ZSTD_BUILD_CONTRIB) +add_subdirectory(contrib/zstd/build/cmake/) +set(ZTSD_HEADER contrib/zstd/lib) + +set(pax_comm_src + comm/bitmap.cc + comm/paxc_wrappers.cc + comm/cbdb_wrappers.cc) + +set(pax_exceptions_src + exceptions/CException.cc) + +set(pax_storage_src + storage/columns/pax_column.cc + storage/columns/pax_column_int.cc + storage/columns/pax_compress.cc + storage/columns/pax_columns.cc + storage/columns/pax_encoding_utils.cc + storage/columns/pax_encoding_non_fixed_column.cc + storage/columns/pax_encoding_column.cc + storage/columns/pax_decoding.cc + storage/columns/pax_encoding.cc + storage/columns/pax_rlev2_decoding.cc + storage/columns/pax_rlev2_encoding.cc + storage/file_system.cc + storage/pax_filter.cc + storage/local_file_system.cc + storage/micro_partition.cc + storage/micro_partition_file_factory.cc + storage/micro_partition_iterator.cc + storage/micro_partition_metadata.cc + storage/pax_buffer.cc + storage/proto/protobuf_stream.cc + storage/pax_filter.cc + storage/strategy.cc + storage/paxc_block_map_manager.cc + storage/orc/orc.cc + storage/strategy.cc) + +if(NOT BUILD_PAX_FORMAT) + set(pax_storage_src ${pax_storage_src} storage/pax.cc) +endif(NOT BUILD_PAX_FORMAT) + +set(pax_access_src + access/pax_access_handle.cc + access/pax_deleter.cc + access/pax_dml_state.cc + access/pax_inserter.cc + access/pax_updater.cc + access/pax_scanner.cc) + +set(pax_catalog_src + catalog/micro_partition_stats.cc + catalog/pax_aux_table.cc) + +set(pax_vec_src + storage/vec/pax_vec_adapter.cc + storage/vec/pax_vec_reader.cc) + +link_directories($ENV{GPHOME}/lib) + +if(BUILD_PAX_FORMAT) + # paxformat.so + ADD_DEFINITIONS(-DBUILD_PAX_FORMAT) + add_library(paxformat SHARED ${orc_proto_src} ${pax_proto_src} ${pax_storage_src} ${pax_exceptions_src} ${pax_comm_src} ) + target_include_directories(paxformat PUBLIC ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR}) + target_link_libraries(paxformat PUBLIC uuid orc_protobuf zstd z) + set_target_properties(paxformat PROPERTIES + OUTPUT_NAME paxformat) + add_dependencies(paxformat generate_protobuf) + + # export headers + set(PAX_COMM_HEADERS + comm/cbdb_api.h + ) + + ## install dynamic libraray + install(TARGETS paxformat + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) + + # TODO(gongxun): + # We should explicitly specify the headers + # that need to be exported, and use the syntax of + # install(FILES,...) to install the header files + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/storage + DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax + FILES_MATCHING + PATTERN "*.h" +) + +install(FILES ${PAX_COMM_HEADERS} + DESTINATION ${CMAKE_INSTALL_PREFIX}/include/pax/comm +) + +else() + + add_library(pax SHARED ${orc_proto_src} ${pax_proto_src} ${pax_storage_src} ${stats_proto_src} ${pax_exceptions_src} + ${pax_access_src} ${pax_comm_src} ${pax_catalog_src} ${pax_vec_src}) + set_target_properties(pax PROPERTIES OUTPUT_NAME pax) + target_include_directories(pax PUBLIC ${ZTSD_HEADER} ${CMAKE_CURRENT_SOURCE_DIR} ${CBDB_INCLUDE_DIR}) + target_link_libraries(pax PUBLIC uuid orc_protobuf zstd z postgres) + add_dependencies(pax generate_protobuf) + add_custom_command(TARGET pax POST_BUILD + COMMAND ${CMAKE_COMMAND} -E + copy_if_different $ ${CMAKE_CURRENT_SOURCE_DIR}/../data/pax.so) +endif(BUILD_PAX_FORMAT) + +# vec build +if (VEC_BUILD) + set(VEC_HEADER ${VEC_HOME}/src/include/) + + find_package(PkgConfig REQUIRED) + pkg_check_modules(GLIB REQUIRED glib-2.0) + + target_include_directories(pax PRIVATE + ${VEC_HEADER} # for utils/tuptable_vec.h + ${CBDB_ROOT_INCLUDE_DIR} # for arrow-glib/arrow-glib.h and otehr arrow interface + ${GLIB_INCLUDE_DIRS} # for glib-object.h + ) + + if(BUILD_GTEST) + target_include_directories(test_main PRIVATE ${VEC_HEADER} ${CBDB_ROOT_INCLUDE_DIR} ${GLIB_INCLUDE_DIRS}) + endif(BUILD_GTEST) + + target_link_libraries(pax PRIVATE arrow) +endif(VEC_BUILD) diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc new file mode 100644 index 00000000000..be34ff3aecc --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc @@ -0,0 +1,947 @@ +#include "access/pax_access_handle.h" + +#include "comm/cbdb_api.h" + +#include "access/pax_dml_state.h" +#include "access/pax_scanner.h" +#include "access/pax_updater.h" +#include "catalog/pax_aux_table.h" +#include "exceptions/CException.h" +#include "storage/paxc_block_map_manager.h" + +#define NOT_IMPLEMENTED_YET \ + ereport(ERROR, \ + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("not implemented yet on pax relations: %s", __func__))) + +#define NOT_SUPPORTED_YET \ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ + errmsg("not supported on pax relations: %s", __func__))) + +#define PAX_DEFAULT_COMPRESSLEVEL AO_DEFAULT_COMPRESSLEVEL +#define PAX_MIN_COMPRESSLEVEL AO_MIN_COMPRESSLEVEL +#define PAX_MAX_COMPRESSLEVEL AO_MAX_COMPRESSLEVEL + +#define PAX_DEFAULT_COMPRESSTYPE AO_DEFAULT_COMPRESSTYPE + +#define RELATION_IS_PAX(rel) \ + (OidIsValid((rel)->rd_rel->relam) && AMOidIsPax((rel)->rd_rel->relam)) + +// CBDB_TRY(); +// { +// // C++ implementation code +// } +// CBDB_CATCH_MATCH(std::exception &exp); // optional +// { +// // specific exception handler +// error_message.Append("error message: %s", error_message.Message()); +// } +// CBDB_CATCH_DEFAULT(); +// CBDB_END_TRY(); +// +// CBDB_CATCH_MATCH() is optional and can have several match pattern. + +cbdb::CException global_exception(cbdb::CException::kExTypeInvalid); + +// being of a try block w/o explicit handler +#define CBDB_TRY() \ + do { \ + bool internal_cbdb_try_throw_error_ = false; \ + bool internal_cbdb_try_throw_error_with_stack_ = false; \ + cbdb::ErrorMessage error_message; \ + try { +// begin of a catch block +#define CBDB_CATCH_MATCH(exception_decl) \ + } \ + catch (exception_decl) { \ + internal_cbdb_try_throw_error_ = true; + +// catch c++ exception and rethrow ERROR to C code +// only used by the outer c++ code called by C +#define CBDB_CATCH_DEFAULT() \ + } \ + catch (cbdb::CException & e) { \ + internal_cbdb_try_throw_error_ = true; \ + internal_cbdb_try_throw_error_with_stack_ = true; \ + elog(LOG, "\npax stack trace: \n%s", e.Stack()); \ + global_exception = e; \ + } \ + catch (...) { \ + internal_cbdb_try_throw_error_ = true; \ + internal_cbdb_try_throw_error_with_stack_ = false; + +// like PG_FINALLY +#define CBDB_FINALLY(...) \ + } \ + { \ + do { \ + __VA_ARGS__; \ + } while (0); + +// end of a try-catch block +#define CBDB_END_TRY() \ + } \ + if (internal_cbdb_try_throw_error_) { \ + if (internal_cbdb_try_throw_error_with_stack_) { \ + elog(LOG, "\npax stack trace: \n%s", global_exception.Stack()); \ + ereport(ERROR, errmsg("%s", global_exception.What().c_str())); \ + } \ + if (error_message.Length() == 0) \ + error_message.Append("ERROR: %s", __func__); \ + ereport(ERROR, errmsg("%s", error_message.Message())); \ + } \ + } \ + while (0) + +bool AMOidIsPax(Oid am_oid) { + HeapTuple tuple; + Form_pg_am form; + bool is_pax; + + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(am_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for pg_am.oid = %u", am_oid); + + form = (Form_pg_am)GETSTRUCT(tuple); + is_pax = strcmp(NameStr(form->amname), "pax") == 0; + ReleaseSysCache(tuple); + + return is_pax; +} + +// reloptions structure and variables. +static relopt_kind self_relopt_kind; +static const relopt_parse_elt kSelfReloptTab[] = { + {"compresslevel", RELOPT_TYPE_INT, offsetof(PaxOptions, compress_level)}, + {"compresstype", RELOPT_TYPE_STRING, offsetof(PaxOptions, compress_type)}, + {"storage_format", RELOPT_TYPE_STRING, + offsetof(PaxOptions, storage_format)}, +}; + +// access methods that are implemented in C++ +namespace pax { + +TableScanDesc CCPaxAccessMethod::ScanBegin(Relation relation, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc pscan, + uint32 flags) { + CBDB_TRY(); + { + return PaxScanDesc::BeginScan(relation, snapshot, nkeys, key, pscan, flags, + nullptr); + } + CBDB_CATCH_DEFAULT(); + CBDB_END_TRY(); + + pg_unreachable(); +} + +void CCPaxAccessMethod::ScanEnd(TableScanDesc scan) { + CBDB_TRY(); + { PaxScanDesc::EndScan(scan); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({ + // FIXME: destroy PaxScanDesc? + }); + CBDB_END_TRY(); +} + +TableScanDesc CCPaxAccessMethod::ScanExtractColumns( + Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan, + List *targetlist, List *qual, uint32 flags) { + CBDB_TRY(); + { + return pax::PaxScanDesc::BeginScanExtractColumns( + rel, snapshot, parallel_scan, targetlist, qual, flags); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +void CCPaxAccessMethod::RelationSetNewFilenode(Relation rel, + const RelFileNode *newrnode, + char persistence, + TransactionId *freeze_xid, + MultiXactId *minmulti) { + CBDB_TRY(); + { + *freeze_xid = *minmulti = InvalidTransactionId; + pax::CCPaxAuxTable::PaxAuxRelationSetNewFilenode(rel, newrnode, + persistence); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +// * non transactional truncate table case: +// 1. create table inside transactional block, and then truncate table inside +// transactional block. +// 2.create table outside transactional block, insert data +// and truncate table inside transactional block. +void CCPaxAccessMethod::RelationNontransactionalTruncate(Relation rel) { + CBDB_TRY(); + { pax::CCPaxAuxTable::PaxAuxRelationNontransactionalTruncate(rel); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::RelationCopyData(Relation rel, + const RelFileNode *newrnode) { + CBDB_TRY(); + { pax::CCPaxAuxTable::PaxAuxRelationCopyData(rel, newrnode); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +/* + * Used by rebuild_relation, like CLUSTER, VACUUM FULL, etc. + * + * PAX does not have dead tuples, but the core framework requires + * to implement this callback to do CLUSTER/VACUUM FULL/etc. + * PAX may have re-organize semantics for this function. + * + * TODO: how to split the set of micro-partitions to several QE handlers. + */ +void CCPaxAccessMethod::RelationCopyForCluster( + Relation old_heap, Relation new_heap, Relation /*old_index*/, + bool /*use_sort*/, TransactionId /*oldest_xmin*/, + TransactionId * /*xid_cutoff*/, MultiXactId * /*multi_cutoff*/, + double * /*num_tuples*/, double * /*tups_vacuumed*/, + double * /*tups_recently_dead*/) { + Assert(RELATION_IS_PAX(old_heap)); + Assert(RELATION_IS_PAX(new_heap)); + CBDB_TRY(); + { pax::CCPaxAuxTable::PaxAuxRelationCopyDataForCluster(old_heap, new_heap); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::RelationFileUnlink(RelFileNodeBackend rnode) { + CBDB_TRY(); + { + pax::CCPaxAuxTable::PaxAuxRelationFileUnlink(rnode.node, rnode.backend, + true); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::ScanRescan(TableScanDesc scan, ScanKey /*key*/, + bool /*set_params*/, bool /*allow_strat*/, + bool /*allow_sync*/, + bool /*allow_pagemode*/) { + CBDB_TRY(); + { pax::PaxScanDesc::ReScan(scan); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +bool CCPaxAccessMethod::ScanGetNextSlot(TableScanDesc scan, + ScanDirection /*direction*/, + TupleTableSlot *slot) { + CBDB_TRY(); + { return PaxScanDesc::ScanGetNextSlot(scan, slot); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({ + // FIXME: destroy PaxScanDesc? + }); + CBDB_END_TRY(); + + pg_unreachable(); +} + +void CCPaxAccessMethod::TupleInsert(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate) { + CBDB_TRY(); + { + MemoryContext old_ctx; + Assert(cbdb::pax_memory_context); + + old_ctx = MemoryContextSwitchTo(cbdb::pax_memory_context); + CPaxInserter::TupleInsert(relation, slot, cid, options, bistate); + MemoryContextSwitchTo(old_ctx); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({ + // FIXME: destroy CPaxInserter? + }); + CBDB_END_TRY(); +} + +TM_Result CCPaxAccessMethod::TupleDelete(Relation relation, ItemPointer tid, + CommandId cid, Snapshot snapshot, + Snapshot /*crosscheck*/, bool /*wait*/, + TM_FailureData *tmfd, + bool /*changing_part*/) { + CBDB_TRY(); + { return CPaxDeleter::DeleteTuple(relation, tid, cid, snapshot, tmfd); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +TM_Result CCPaxAccessMethod::TupleUpdate(Relation relation, ItemPointer otid, + TupleTableSlot *slot, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, + bool wait, TM_FailureData *tmfd, + LockTupleMode *lockmode, + bool *update_indexes) { + CBDB_TRY(); + { + MemoryContext old_ctx; + TM_Result result; + + Assert(cbdb::pax_memory_context); + old_ctx = MemoryContextSwitchTo(cbdb::pax_memory_context); + result = CPaxUpdater::UpdateTuple(relation, otid, slot, cid, snapshot, + crosscheck, wait, tmfd, lockmode, + update_indexes); + MemoryContextSwitchTo(old_ctx); + return result; + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +bool CCPaxAccessMethod::ScanAnalyzeNextBlock( + TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy /*bstrategy*/) { + CBDB_TRY(); + { return PaxScanDesc::ScanAnalyzeNextBlock(scan, blockno); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +bool CCPaxAccessMethod::ScanAnalyzeNextTuple(TableScanDesc scan, + TransactionId /*oldest_xmin*/, + double *liverows, double *deadrows, + TupleTableSlot *slot) { + CBDB_TRY(); + { return PaxScanDesc::ScanAnalyzeNextTuple(scan, liverows, deadrows, slot); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +bool CCPaxAccessMethod::ScanBitmapNextBlock(TableScanDesc /*scan*/, + TBMIterateResult * /*tbmres*/) { + NOT_IMPLEMENTED_YET; + return false; +} + +bool CCPaxAccessMethod::ScanBitmapNextTuple(TableScanDesc /*scan*/, + TBMIterateResult * /*tbmres*/, + TupleTableSlot * /*slot*/) { + NOT_IMPLEMENTED_YET; + return false; +} + +bool CCPaxAccessMethod::ScanSampleNextBlock(TableScanDesc scan, + SampleScanState *scanstate) { + CBDB_TRY(); + { return PaxScanDesc::ScanSampleNextBlock(scan, scanstate); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +bool CCPaxAccessMethod::ScanSampleNextTuple(TableScanDesc scan, + SampleScanState * /*scanstate*/, + TupleTableSlot *slot) { + CBDB_TRY(); + { return PaxScanDesc::ScanSampleNextTuple(scan, slot); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); + pg_unreachable(); +} + +void CCPaxAccessMethod::MultiInsert(Relation relation, TupleTableSlot **slots, + int ntuples, CommandId cid, int options, + BulkInsertState bistate) { + CBDB_TRY(); + { + MemoryContext old_ctx; + Assert(cbdb::pax_memory_context); + + old_ctx = MemoryContextSwitchTo(cbdb::pax_memory_context); + CPaxInserter::MultiInsert(relation, slots, ntuples, cid, options, bistate); + MemoryContextSwitchTo(old_ctx); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({ + // FIXME: destroy CPaxInserter? + }); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::FinishBulkInsert(Relation relation, int options) { + // Implement Pax dml cleanup for case "create table xxx1 as select * from + // xxx2", which would not call ExtDmlFini callback function and relies on + // FinishBulkInsert callback function to cleanup its dml state. + CBDB_TRY(); + { + // no need switch memory context + // cause it just call dml finish + pax::CPaxInserter::FinishBulkInsert(relation, options); + } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({ + // FIXME: destroy CPaxInserter? + }); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::ExtDmlInit(Relation rel, CmdType operation) { + if (!RELATION_IS_PAX(rel)) { + return; + } + + CBDB_TRY(); + { pax::CPaxDmlStateLocal::Instance()->InitDmlState(rel, operation); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +void CCPaxAccessMethod::ExtDmlFini(Relation rel, CmdType operation) { + if (!RELATION_IS_PAX(rel)) { + return; + } + + CBDB_TRY(); + { pax::CPaxDmlStateLocal::Instance()->FinishDmlState(rel, operation); } + CBDB_CATCH_DEFAULT(); + CBDB_FINALLY({}); + CBDB_END_TRY(); +} + +} // namespace pax +// END of C++ implementation + +// access methods that are implemented in C +namespace paxc { +const TupleTableSlotOps *PaxAccessMethod::SlotCallbacks( + Relation /*rel*/) noexcept { + return &TTSOpsVirtual; +} + +Size PaxAccessMethod::ParallelscanEstimate(Relation /*rel*/) { + NOT_IMPLEMENTED_YET; + return 0; +} + +Size PaxAccessMethod::ParallelscanInitialize(Relation /*rel*/, + ParallelTableScanDesc /*pscan*/) { + NOT_IMPLEMENTED_YET; + return 0; +} + +void PaxAccessMethod::ParallelscanReinitialize( + Relation /*rel*/, ParallelTableScanDesc /*pscan*/) { + NOT_IMPLEMENTED_YET; +} + +struct IndexFetchTableData *PaxAccessMethod::IndexFetchBegin(Relation /*rel*/) { + NOT_SUPPORTED_YET; + return nullptr; +} + +void PaxAccessMethod::IndexFetchEnd(IndexFetchTableData * /*data*/) { + NOT_SUPPORTED_YET; +} + +void PaxAccessMethod::IndexFetchReset(IndexFetchTableData * /*data*/) { + NOT_SUPPORTED_YET; +} + +bool PaxAccessMethod::IndexFetchTuple(struct IndexFetchTableData * /*scan*/, + ItemPointer /*tid*/, + Snapshot /*snapshot*/, + TupleTableSlot * /*slot*/, + bool * /*call_again*/, + bool * /*all_dead*/) { + NOT_SUPPORTED_YET; + return false; +} + +void PaxAccessMethod::TupleInsertSpeculative(Relation /*relation*/, + TupleTableSlot * /*slot*/, + CommandId /*cid*/, int /*options*/, + BulkInsertState /*bistate*/, + uint32 /*spec_token*/) { + NOT_IMPLEMENTED_YET; +} + +void PaxAccessMethod::TupleCompleteSpeculative(Relation /*relation*/, + TupleTableSlot * /*slot*/, + uint32 /*spec_token*/, + bool /*succeeded*/) { + NOT_IMPLEMENTED_YET; +} + +TM_Result PaxAccessMethod::TupleLock(Relation /*relation*/, ItemPointer /*tid*/, + Snapshot /*snapshot*/, + TupleTableSlot * /*slot*/, + CommandId /*cid*/, LockTupleMode /*mode*/, + LockWaitPolicy /*wait_policy*/, + uint8 /*flags*/, + TM_FailureData * /*tmfd*/) { + NOT_IMPLEMENTED_YET; + return TM_Ok; +} + +bool PaxAccessMethod::TupleFetchRowVersion(Relation /*relation*/, + ItemPointer /*tid*/, + Snapshot /*snapshot*/, + TupleTableSlot * /*slot*/) { + NOT_IMPLEMENTED_YET; + return false; +} + +bool PaxAccessMethod::TupleTidValid(TableScanDesc /*scan*/, + ItemPointer /*tid*/) { + NOT_IMPLEMENTED_YET; + return false; +} + +void PaxAccessMethod::TupleGetLatestTid(TableScanDesc /*sscan*/, + ItemPointer /*tid*/) { + NOT_SUPPORTED_YET; +} + +bool PaxAccessMethod::TupleSatisfiesSnapshot(Relation /*rel*/, + TupleTableSlot * /*slot*/, + Snapshot /*snapshot*/) { + NOT_IMPLEMENTED_YET; + return true; +} + +TransactionId PaxAccessMethod::IndexDeleteTuples( + Relation /*rel*/, TM_IndexDeleteOp * /*delstate*/) { + NOT_SUPPORTED_YET; + return 0; +} + +void PaxAccessMethod::RelationVacuum(Relation /*onerel*/, + VacuumParams * /*params*/, + BufferAccessStrategy /*bstrategy*/) { + /* PAX: micro-partitions have no dead tuples, so vacuum is empty */ +} + +uint64 PaxAccessMethod::RelationSize(Relation rel, ForkNumber fork_number) { + Oid pax_aux_oid; + Relation pax_aux_rel; + TupleDesc aux_tup_desc; + HeapTuple aux_tup; + SysScanDesc aux_scan; + uint64 pax_size = 0; + + if (fork_number != MAIN_FORKNUM) return 0; + + // Get the oid of pg_pax_blocks_xxx from pg_pax_tables + GetPaxTablesEntryAttributes(rel->rd_id, &pax_aux_oid, NULL, NULL); + + // Scan pg_pax_blocks_xxx to calculate size of micro partition + pax_aux_rel = heap_open(pax_aux_oid, AccessShareLock); + aux_tup_desc = RelationGetDescr(pax_aux_rel); + + aux_scan = systable_beginscan(pax_aux_rel, InvalidOid, false, NULL, 0, NULL); + while (HeapTupleIsValid(aux_tup = systable_getnext(aux_scan))) { + bool isnull = false; + // TODO(chenhongjie): Exactly what is needed and being obtained is + // compressed size. Later, when the aux table supports size attributes + // before/after compression, we need to distinguish two attributes by names. + Datum tup_datum = heap_getattr( + aux_tup, ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE, aux_tup_desc, &isnull); + + Assert(!isnull); + pax_size += DatumGetUInt32(tup_datum); + } + + systable_endscan(aux_scan); + heap_close(pax_aux_rel, AccessShareLock); + + return pax_size; +} + +bool PaxAccessMethod::RelationNeedsToastTable(Relation /*rel*/) { + // PAX never used the toasting, don't create the toast table from Cloudberry 7 + + return false; +} + +// Similar to the case of AO and AOCS tables, PAX table has auxiliary tables, +// size can be read directly from the auxiliary table, and there is not much +// space for optimization in estimating relsize. So this function is implemented +// in the same way as pax_relation_size(). +void PaxAccessMethod::EstimateRelSize(Relation rel, int32 * /*attr_widths*/, + BlockNumber *pages, double *tuples, + double *allvisfrac) { + Oid pax_aux_oid; + Relation pax_aux_rel; + TupleDesc aux_tup_desc; + HeapTuple aux_tup; + SysScanDesc aux_scan; + uint32 total_tuples = 0; + uint64 pax_size = 0; + + // Even an empty table takes at least one page, + // but number of tuples for an empty table could be 0. + *tuples = 0; + *pages = 1; + // index-only scan is not supported in PAX + *allvisfrac = 0; + + // Get the oid of pg_pax_blocks_xxx from pg_pax_tables + GetPaxTablesEntryAttributes(rel->rd_id, &pax_aux_oid, NULL, NULL); + + // Scan pg_pax_blocks_xxx to get attributes + pax_aux_rel = heap_open(pax_aux_oid, AccessShareLock); + aux_tup_desc = RelationGetDescr(pax_aux_rel); + + aux_scan = systable_beginscan(pax_aux_rel, InvalidOid, false, NULL, 0, NULL); + while (HeapTupleIsValid(aux_tup = systable_getnext(aux_scan))) { + Datum pttupcount_datum; + Datum ptblocksize_datum; + bool isnull = false; + + pttupcount_datum = heap_getattr( + aux_tup, ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT, aux_tup_desc, &isnull); + Assert(!isnull); + total_tuples += DatumGetUInt32(pttupcount_datum); + + isnull = false; + // TODO(chenhongjie): Exactly what we want to get here is uncompressed size, + // but what we're getting is compressed size. Later, when the aux table + // supports size attributes before/after compression, this needs to + // be corrected. + ptblocksize_datum = heap_getattr( + aux_tup, ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE, aux_tup_desc, &isnull); + + Assert(!isnull); + pax_size += DatumGetUInt32(ptblocksize_datum); + } + + systable_endscan(aux_scan); + heap_close(pax_aux_rel, AccessShareLock); + + *tuples = static_cast(total_tuples); + *pages = RelationGuessNumberOfBlocksFromSize(pax_size); +} + +double PaxAccessMethod::IndexBuildRangeScan( + Relation /*heap_relation*/, Relation /*index_relation*/, + IndexInfo * /*index_info*/, bool /*allow_sync*/, bool /*anyvisible*/, + bool /*progress*/, BlockNumber /*start_blockno*/, BlockNumber /*numblocks*/, + IndexBuildCallback /*callback*/, void * /*callback_state*/, + TableScanDesc /*scan*/) { + NOT_SUPPORTED_YET; + return 0.0; +} + +void PaxAccessMethod::IndexValidateScan(Relation /*heap_relation*/, + Relation /*index_relation*/, + IndexInfo * /*index_info*/, + Snapshot /*snapshot*/, + ValidateIndexState * /*state*/) { + NOT_IMPLEMENTED_YET; +} + +#define PAX_COPY_OPT(pax_opts_, pax_opt_name_) \ + do { \ + PaxOptions *pax_opts = reinterpret_cast(pax_opts_); \ + int pax_name_offset_ = *reinterpret_cast(pax_opts->pax_opt_name_); \ + if (pax_name_offset_) \ + strlcpy(pax_opts->pax_opt_name_, \ + reinterpret_cast(pax_opts) + pax_name_offset_, \ + sizeof(pax_opts->pax_opt_name_)); \ + } while (0) +bytea *PaxAccessMethod::Amoptions(Datum reloptions, char /*relkind*/, + bool validate) { + void *rdopts; + + rdopts = build_reloptions(reloptions, validate, self_relopt_kind, + sizeof(PaxOptions), kSelfReloptTab, + lengthof(kSelfReloptTab)); + // adjust string values + PAX_COPY_OPT(rdopts, storage_format); + PAX_COPY_OPT(rdopts, compress_type); + + return reinterpret_cast(rdopts); +} +#undef PAX_COPY_OPT + +void PaxAccessMethod::SwapRelationFiles(Oid relid1, Oid relid2, + TransactionId frozen_xid, + MultiXactId cutoff_multi) { + HeapTuple tuple1; + HeapTuple tuple2; + Relation pax_rel; + + Oid b_relid1; + Oid b_relid2; + + pax_rel = table_open(PaxTablesRelationId, RowExclusiveLock); + + tuple1 = SearchSysCacheCopy1(PAXTABLESID, relid1); + if (!HeapTupleIsValid(tuple1)) + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("cache lookup failed with relid=%u for aux relation " + "in pg_pax_tables.", + relid1))); + + tuple2 = SearchSysCacheCopy1(PAXTABLESID, relid2); + if (!HeapTupleIsValid(tuple2)) + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("cache lookup failed with relid=%u for aux relation " + "in pg_pax_tables.", + relid2))); + + // swap the entries + { + Form_pg_pax_tables form1; + Form_pg_pax_tables form2; + + int16 temp_compresslevel; + NameData temp_compresstype; + + form1 = (Form_pg_pax_tables)GETSTRUCT(tuple1); + form2 = (Form_pg_pax_tables)GETSTRUCT(tuple2); + + Assert(((Form_pg_pax_tables)GETSTRUCT(tuple1))->relid == relid1); + Assert(((Form_pg_pax_tables)GETSTRUCT(tuple2))->relid == relid2); + + b_relid1 = form1->blocksrelid; + b_relid2 = form2->blocksrelid; + + memcpy(&temp_compresstype, &form1->compresstype, sizeof(NameData)); + memcpy(&form1->compresstype, &form2->compresstype, sizeof(NameData)); + memcpy(&form2->compresstype, &temp_compresstype, sizeof(NameData)); + + temp_compresslevel = form1->compresslevel; + form1->compresslevel = form2->compresslevel; + form2->compresslevel = temp_compresslevel; + } + + { + CatalogIndexState indstate; + + indstate = CatalogOpenIndexes(pax_rel); + CatalogTupleUpdateWithInfo(pax_rel, &tuple1->t_self, tuple1, indstate); + CatalogTupleUpdateWithInfo(pax_rel, &tuple2->t_self, tuple2, indstate); + CatalogCloseIndexes(indstate); + } + + table_close(pax_rel, NoLock); + + /* swap relation files for aux table */ + { + Relation b_rel1; + Relation b_rel2; + + b_rel1 = relation_open(b_relid1, AccessExclusiveLock); + b_rel2 = relation_open(b_relid2, AccessExclusiveLock); + + swap_relation_files(b_relid1, b_relid2, false, /* target_is_pg_class */ + true, /* swap_toast_by_content */ + true, /*swap_stats */ + true, /* is_internal */ + frozen_xid, cutoff_multi, NULL); + + relation_close(b_rel1, NoLock); + relation_close(b_rel2, NoLock); + } +} + +} // namespace paxc +// END of C implementation + +extern "C" { + +static const TableAmRoutine kPaxColumnMethods = { + .type = T_TableAmRoutine, + .slot_callbacks = paxc::PaxAccessMethod::SlotCallbacks, + .scan_begin = pax::CCPaxAccessMethod::ScanBegin, + .scan_begin_extractcolumns = pax::CCPaxAccessMethod::ScanExtractColumns, + .scan_end = pax::CCPaxAccessMethod::ScanEnd, + .scan_rescan = pax::CCPaxAccessMethod::ScanRescan, + .scan_getnextslot = pax::CCPaxAccessMethod::ScanGetNextSlot, + + .parallelscan_estimate = paxc::PaxAccessMethod::ParallelscanEstimate, + .parallelscan_initialize = paxc::PaxAccessMethod::ParallelscanInitialize, + .parallelscan_reinitialize = + paxc::PaxAccessMethod::ParallelscanReinitialize, + + .index_fetch_begin = paxc::PaxAccessMethod::IndexFetchBegin, + .index_fetch_reset = paxc::PaxAccessMethod::IndexFetchReset, + .index_fetch_end = paxc::PaxAccessMethod::IndexFetchEnd, + .index_fetch_tuple = paxc::PaxAccessMethod::IndexFetchTuple, + + .tuple_fetch_row_version = paxc::PaxAccessMethod::TupleFetchRowVersion, + .tuple_tid_valid = paxc::PaxAccessMethod::TupleTidValid, + .tuple_get_latest_tid = paxc::PaxAccessMethod::TupleGetLatestTid, + .tuple_satisfies_snapshot = paxc::PaxAccessMethod::TupleSatisfiesSnapshot, + .index_delete_tuples = paxc::PaxAccessMethod::IndexDeleteTuples, + + .tuple_insert = pax::CCPaxAccessMethod::TupleInsert, + .tuple_insert_speculative = paxc::PaxAccessMethod::TupleInsertSpeculative, + .tuple_complete_speculative = + paxc::PaxAccessMethod::TupleCompleteSpeculative, + .multi_insert = pax::CCPaxAccessMethod::MultiInsert, + .tuple_delete = pax::CCPaxAccessMethod::TupleDelete, + .tuple_update = pax::CCPaxAccessMethod::TupleUpdate, + .tuple_lock = paxc::PaxAccessMethod::TupleLock, + .finish_bulk_insert = pax::CCPaxAccessMethod::FinishBulkInsert, + + .relation_set_new_filenode = pax::CCPaxAccessMethod::RelationSetNewFilenode, + .relation_nontransactional_truncate = + pax::CCPaxAccessMethod::RelationNontransactionalTruncate, + .relation_copy_data = pax::CCPaxAccessMethod::RelationCopyData, + .relation_copy_for_cluster = pax::CCPaxAccessMethod::RelationCopyForCluster, + .relation_vacuum = paxc::PaxAccessMethod::RelationVacuum, + .scan_analyze_next_block = pax::CCPaxAccessMethod::ScanAnalyzeNextBlock, + .scan_analyze_next_tuple = pax::CCPaxAccessMethod::ScanAnalyzeNextTuple, + .index_build_range_scan = paxc::PaxAccessMethod::IndexBuildRangeScan, + .index_validate_scan = paxc::PaxAccessMethod::IndexValidateScan, + + .relation_size = paxc::PaxAccessMethod::RelationSize, + .relation_needs_toast_table = + paxc::PaxAccessMethod::RelationNeedsToastTable, + + .relation_estimate_size = paxc::PaxAccessMethod::EstimateRelSize, + .scan_bitmap_next_block = pax::CCPaxAccessMethod::ScanBitmapNextBlock, + .scan_bitmap_next_tuple = pax::CCPaxAccessMethod::ScanBitmapNextTuple, + .scan_sample_next_block = pax::CCPaxAccessMethod::ScanSampleNextBlock, + .scan_sample_next_tuple = pax::CCPaxAccessMethod::ScanSampleNextTuple, + + .amoptions = paxc::PaxAccessMethod::Amoptions, + .swap_relation_files = paxc::PaxAccessMethod::SwapRelationFiles, +}; + +PG_MODULE_MAGIC; +PG_FUNCTION_INFO_V1(pax_tableam_handler); +Datum pax_tableam_handler(PG_FUNCTION_ARGS) { // NOLINT + PG_RETURN_POINTER(&kPaxColumnMethods); +} + +static void PaxValidateStorageFormat(const char *value) { + size_t i; + static const char *storage_formats[] = { + "orc", + "ppt", + }; + + for (i = 0; i < lengthof(storage_formats); i++) { + if (strcmp(value, storage_formats[i]) == 0) return; + } + ereport(ERROR, (errmsg("unsupported storage format: '%s'", value))); +} + +static void PaxValidateCompresstype(const char *value) { + size_t i; + static const char *compress_types[] = { + "none", + "zlib", + }; + + for (i = 0; i < lengthof(compress_types); i++) { + if (strcmp(value, compress_types[i]) == 0) return; + } + ereport(ERROR, (errmsg("unsupported compress type: '%s'", value))); +} + +static shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static ExecutorStart_hook_type prev_executor_start = NULL; +static ExecutorEnd_hook_type prev_executor_end = NULL; +static uint32 executor_run_ref_count = 0; + +void PaxShmemInit() { + if (prev_shmem_startup_hook) prev_shmem_startup_hook(); + + paxc::paxc_shmem_startup(); +} + +static void PaxExecutorStart(QueryDesc *query_desc, int eflags) { + if (prev_executor_start) + prev_executor_start(query_desc, eflags); + else + standard_ExecutorStart(query_desc, eflags); + + executor_run_ref_count++; +} + +static void PaxExecutorEnd(QueryDesc *query_desc) { + if (prev_executor_end) + prev_executor_end(query_desc); + else + standard_ExecutorEnd(query_desc); + + executor_run_ref_count--; + Assert(executor_run_ref_count >= 0); + if (executor_run_ref_count == 0) { + paxc::release_command_resource(); + } +} + +static void PaxXactCallback(XactEvent event, void * /*arg*/) { + if (event == XACT_EVENT_COMMIT || event == XACT_EVENT_ABORT || + event == XACT_EVENT_PARALLEL_ABORT || + event == XACT_EVENT_PARALLEL_COMMIT) { + if (executor_run_ref_count > 0) { + executor_run_ref_count = 0; + paxc::release_command_resource(); + } + } +} + +void _PG_init(void) { // NOLINT + if (!process_shared_preload_libraries_in_progress) { + ereport(ERROR, (errmsg("pax must be loaded via shared_preload_libraries"))); + return; + } + + paxc::paxc_shmem_request(); + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = PaxShmemInit; + + prev_executor_start = ExecutorStart_hook; + ExecutorStart_hook = PaxExecutorStart; + + prev_executor_end = ExecutorEnd_hook; + ExecutorEnd_hook = PaxExecutorEnd; + + ext_dml_init_hook = pax::CCPaxAccessMethod::ExtDmlInit; + ext_dml_finish_hook = pax::CCPaxAccessMethod::ExtDmlFini; + file_unlink_hook = pax::CCPaxAccessMethod::RelationFileUnlink; + + RegisterXactCallback(PaxXactCallback, NULL); + + self_relopt_kind = add_reloption_kind(); + add_string_reloption(self_relopt_kind, "storage_format", "pax storage format", + "orc", PaxValidateStorageFormat, AccessExclusiveLock); + add_string_reloption(self_relopt_kind, "compresstype", "pax compress type", + PAX_DEFAULT_COMPRESSTYPE, PaxValidateCompresstype, + AccessExclusiveLock); + add_int_reloption(self_relopt_kind, "compresslevel", "pax compress level", + PAX_DEFAULT_COMPRESSLEVEL, AO_MIN_COMPRESSLEVEL, + AO_MAX_COMPRESSLEVEL, AccessExclusiveLock); +} +} // extern "C" diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.h b/contrib/pax_storage/src/cpp/access/pax_access_handle.h new file mode 100644 index 00000000000..2bca0ba15a7 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.h @@ -0,0 +1,172 @@ +#pragma once + +#include "comm/cbdb_api.h" + +namespace paxc { +class PaxAccessMethod final { + private: + PaxAccessMethod() = default; + + public: + static const TupleTableSlotOps *SlotCallbacks(Relation rel) noexcept; + + static void ScanSetTidrange(TableScanDesc scan, ItemPointer mintid, + ItemPointer maxtid); + static void ScanGetnextslotTidrange(TableScanDesc scan, + ScanDirection direction, + TupleTableSlot *slot); + + /* Parallel table scan related functions. */ + static Size ParallelscanEstimate(Relation rel); + static Size ParallelscanInitialize(Relation rel, ParallelTableScanDesc pscan); + static void ParallelscanReinitialize(Relation rel, + ParallelTableScanDesc pscan); + + /* Index Scan Callbacks, unsupported yet */ + static struct IndexFetchTableData *IndexFetchBegin(Relation rel); + static void IndexFetchEnd(struct IndexFetchTableData *data); + static void IndexFetchReset(struct IndexFetchTableData *data); + static bool IndexFetchTuple(struct IndexFetchTableData *scan, ItemPointer tid, + Snapshot snapshot, TupleTableSlot *slot, + bool *call_again, bool *all_dead); + + /* Callbacks for non-modifying operations on individual tuples */ + static bool TupleFetchRowVersion(Relation relation, ItemPointer tid, + Snapshot snapshot, TupleTableSlot *slot); + static bool TupleTidValid(TableScanDesc scan, ItemPointer tid); + static void TupleGetLatestTid(TableScanDesc sscan, ItemPointer tid); + static bool TupleSatisfiesSnapshot(Relation rel, TupleTableSlot *slot, + Snapshot snapshot); + static TransactionId IndexDeleteTuples(Relation rel, + TM_IndexDeleteOp *delstate); + + static bool RelationNeedsToastTable(Relation rel); + static uint64 RelationSize(Relation rel, ForkNumber fork_number); + static void EstimateRelSize(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, + double *allvisfrac); + + /* unsupported DML now, may move to CCPaxAccessMethod */ + static void TupleInsertSpeculative(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate, + uint32 spec_token); + static void TupleCompleteSpeculative(Relation relation, TupleTableSlot *slot, + uint32 spec_token, bool succeeded); + static TM_Result TupleLock(Relation relation, ItemPointer tid, + Snapshot snapshot, TupleTableSlot *slot, + CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd); + + static void RelationVacuum(Relation onerel, VacuumParams *params, + BufferAccessStrategy bstrategy); + static double IndexBuildRangeScan( + Relation heap_relation, Relation index_relation, IndexInfo *index_info, + bool allow_sync, bool anyvisible, bool progress, + BlockNumber start_blockno, BlockNumber numblocks, + IndexBuildCallback callback, void *callback_state, TableScanDesc scan); + static void IndexValidateScan(Relation heap_relation, Relation index_relation, + IndexInfo *index_info, Snapshot snapshot, + ValidateIndexState *state); + + static bytea *Amoptions(Datum reloptions, char relkind, bool validate); + + static void SwapRelationFiles(Oid relid1, Oid relid2, + TransactionId frozen_xid, + MultiXactId cutoff_multi); +}; + +} // namespace paxc + +namespace pax { +class CCPaxAccessMethod final { + private: + CCPaxAccessMethod() = default; + + public: + static TableScanDesc ScanBegin(Relation rel, Snapshot snapshot, int nkeys, + struct ScanKeyData *key, + ParallelTableScanDesc pscan, uint32 flags); + static void ScanEnd(TableScanDesc scan); + static void ScanRescan(TableScanDesc scan, struct ScanKeyData *key, + bool set_params, bool allow_strat, bool allow_sync, + bool allow_pagemode); + static bool ScanGetNextSlot(TableScanDesc scan, ScanDirection direction, + TupleTableSlot *slot); + + static TableScanDesc ScanExtractColumns(Relation rel, Snapshot snapshot, + ParallelTableScanDesc parallel_scan, + List *targetlist, List *qual, + uint32 flags); + + /* Manipulations of physical tuples. */ + static void TupleInsert(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, BulkInsertState bistate); + static TM_Result TupleDelete(Relation relation, ItemPointer tid, + CommandId cid, Snapshot snapshot, + Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, bool changing_part); + static TM_Result TupleUpdate(Relation relation, ItemPointer otid, + TupleTableSlot *slot, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, + bool wait, TM_FailureData *tmfd, + LockTupleMode *lockmode, bool *update_indexes); + + static void RelationCopyData(Relation rel, const RelFileNode *newrnode); + + static void RelationCopyForCluster(Relation old_heap, Relation new_heap, + Relation old_index, bool use_sort, + TransactionId oldest_xmin, + TransactionId *xid_cutoff, + MultiXactId *multi_cutoff, + double *num_tuples, double *tups_vacuumed, + double *tups_recently_dead); + + static void RelationSetNewFilenode(Relation rel, const RelFileNode *newrnode, + char persistence, + TransactionId *freeze_xid, + MultiXactId *minmulti); + + static void RelationNontransactionalTruncate(Relation rel); + + static bool ScanAnalyzeNextBlock(TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy); + static bool ScanAnalyzeNextTuple(TableScanDesc scan, + TransactionId oldest_xmin, double *liverows, + double *deadrows, TupleTableSlot *slot); + static bool ScanBitmapNextBlock(TableScanDesc scan, TBMIterateResult *tbmres); + static bool ScanBitmapNextTuple(TableScanDesc scan, TBMIterateResult *tbmres, + TupleTableSlot *slot); + static bool ScanSampleNextBlock(TableScanDesc scan, + SampleScanState *scanstate); + static bool ScanSampleNextTuple(TableScanDesc scan, + SampleScanState *scanstate, + TupleTableSlot *slot); + + static void MultiInsert(Relation relation, TupleTableSlot **slots, + int ntuples, CommandId cid, int options, + BulkInsertState bistate); + + static void FinishBulkInsert(Relation relation, int options); + + // DML init/fini hooks + static void ExtDmlInit(Relation rel, CmdType operation); + static void ExtDmlFini(Relation rel, CmdType operation); + + // MicroPartition File cleanup hook + static void RelationFileUnlink(RelFileNodeBackend rnode); +}; + +} // namespace pax + +extern ext_dml_func_hook_type ext_dml_init_hook; +extern ext_dml_func_hook_type ext_dml_finish_hook; + +// plain structure used by reloptions, can be accessed from C++ code. +struct PaxOptions { + int32 vl_len; /* varlena header (do not touch directly!) */ + char storage_format[16]; + char compress_type[16]; + int compress_level; +}; diff --git a/contrib/pax_storage/src/cpp/access/pax_deleter.cc b/contrib/pax_storage/src/cpp/access/pax_deleter.cc new file mode 100644 index 00000000000..594bc221eb0 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_deleter.cc @@ -0,0 +1,91 @@ +#include "access/pax_deleter.h" + +#include +#include +#include + +#include "access/pax_dml_state.h" +#include "comm/singleton.h" +#include "storage/pax_itemptr.h" +#include "storage/paxc_block_map_manager.h" +namespace pax { +CPaxDeleter::CPaxDeleter(const Relation rel, const Snapshot snapshot) + : rel_(rel), snapshot_(snapshot) {} + +CPaxDeleter::~CPaxDeleter() = default; + +TM_Result CPaxDeleter::DeleteTuple(const Relation relation, + const ItemPointer tid, const CommandId cid, + const Snapshot snapshot, + TM_FailureData *tmfd) { + CPaxDeleter *deleter = + CPaxDmlStateLocal::Instance()->GetDeleter(relation, snapshot); + // TODO(gongxun): need more graceful way to pass snapshot + Assert(deleter != nullptr); + TM_Result result; + result = deleter->MarkDelete(tid); + if (result == TM_SelfModified) { + tmfd->cmax = cid; + } + return result; +} + +TM_Result CPaxDeleter::MarkDelete(const ItemPointer tid) { + PaxItemPointer pax_tid(reinterpret_cast(tid)); + uint8 table_no = pax_tid.GetTableNo(); + uint32 block_number = pax_tid.GetBlockNumber(); + uint32 tuple_number = pax_tid.GetTupleNumber(); + + std::string block_id = + cbdb::GetBlockId(rel_->rd_id, table_no, block_number).ToStr(); + + if (block_bitmap_map_.find(block_id) == block_bitmap_map_.end()) { + // TODO(gongxun): bitmap should support dynamic raise size + block_bitmap_map_[block_id] = + std::unique_ptr(new DynamicBitmap()); // NOLINT + } + DynamicBitmap *bitmap = block_bitmap_map_[block_id].get(); + if (bitmap->NumBits() <= tuple_number) { + bitmap->Resize(bitmap->NumBits() * 2); + } + + if (bitmap->Test(tuple_number)) { + return TM_SelfModified; + } + + bitmap->Set(tuple_number); + return TM_Ok; +} + +void CPaxDeleter::ExecDelete() { + if (block_bitmap_map_.empty()) { + return; + } + + TableDeleter table_deleter(rel_, buildDeleteIterator(), + std::move(block_bitmap_map_), snapshot_); + table_deleter.Delete(); +} + +std::unique_ptr> +CPaxDeleter::buildDeleteIterator() { + std::vector micro_partitions; + for (auto &it : block_bitmap_map_) { + std::string block_id = it.first; + DynamicBitmap *bitmap_ptr = it.second.get(); + BitmapIterator bitmap_it(bitmap_ptr); + int32 tuple_number = bitmap_it.Next(true); + if (tuple_number != -1) { + pax::MicroPartitionMetadata meta_info; + + meta_info.SetFileName(cbdb::BuildPaxFilePath(rel_, block_id)); + meta_info.SetMicroPartitionId(std::move(block_id)); + micro_partitions.push_back(std::move(meta_info)); + } + } + IteratorBase *iter = new VectorIterator(std::move(micro_partitions)); + + return std::unique_ptr>(iter); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_deleter.h b/contrib/pax_storage/src/cpp/access/pax_deleter.h new file mode 100644 index 00000000000..7d94ffe6efd --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_deleter.h @@ -0,0 +1,33 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include +#include +#include + +#include "comm/bitmap.h" +#include "storage/pax.h" + +namespace pax { +class CPaxDeleter { + public: + explicit CPaxDeleter(const Relation rel, const Snapshot snapshot); + + static TM_Result DeleteTuple(const Relation relation, const ItemPointer tid, + const CommandId cid, const Snapshot snapshot, + TM_FailureData *tmfd); + + TM_Result MarkDelete(const ItemPointer tid); + + ~CPaxDeleter(); + + void ExecDelete(); + + private: + std::unique_ptr> buildDeleteIterator(); + std::map> block_bitmap_map_; + const Relation rel_; + const Snapshot snapshot_; +}; // class CPaxDeleter +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_dml_state.cc b/contrib/pax_storage/src/cpp/access/pax_dml_state.cc new file mode 100644 index 00000000000..335eb659197 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_dml_state.cc @@ -0,0 +1,140 @@ +#include "access/pax_dml_state.h" + +namespace pax { +// class CPaxDmlStateLocal + +void CPaxDmlStateLocal::DmlStateResetCallback(void * /*arg*/) { + pax::CPaxDmlStateLocal::Instance()->Reset(); +} + +void CPaxDmlStateLocal::InitDmlState(Relation rel, CmdType operation) { + if (operation == CMD_UPDATE || operation == CMD_DELETE) { + cbdb::InitCommandResource(); + } + + if (!dml_descriptor_tab_) { + HASHCTL hash_ctl; + Assert(!cbdb::pax_memory_context); + + cbdb::pax_memory_context = AllocSetContextCreate( + CurrentMemoryContext, "Pax Storage", PAX_ALLOCSET_DEFAULT_SIZES); + + cbdb::MemoryCtxRegisterResetCallback(cbdb::pax_memory_context, &cb_); + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Oid); + hash_ctl.entrysize = sizeof(PaxDmlState); + hash_ctl.hcxt = cbdb::pax_memory_context; + dml_descriptor_tab_ = cbdb::HashCreate( + "Pax DML state", 128, &hash_ctl, HASH_CONTEXT | HASH_ELEM | HASH_BLOBS); + } + + EntryDmlState(cbdb::RelationGetRelationId(rel)); +} + +void CPaxDmlStateLocal::FinishDmlState(Relation rel, CmdType /*operation*/) { + PaxDmlState *state; + state = RemoveDmlState(cbdb::RelationGetRelationId(rel)); + + if (state == nullptr) { + return; + } + + if (state->deleter) { + // TODO(gongxun): deleter finish + state->deleter->ExecDelete(); + + delete state->deleter; + state->deleter = nullptr; + // FIXME: it's update operation, maybe we should do something here + } + + if (state->inserter) { + MemoryContext old_ctx; + Assert(cbdb::pax_memory_context); + + old_ctx = MemoryContextSwitchTo(cbdb::pax_memory_context); + state->inserter->FinishInsert(); + delete state->inserter; + state->inserter = nullptr; + MemoryContextSwitchTo(old_ctx); + } +} + +CPaxInserter *CPaxDmlStateLocal::GetInserter(Relation rel) { + PaxDmlState *state; + state = FindDmlState(cbdb::RelationGetRelationId(rel)); + // TODO(gongxun): switch memory context?? + if (state->inserter == nullptr) { + state->inserter = new CPaxInserter(rel); + } + return state->inserter; +} + +CPaxDeleter *CPaxDmlStateLocal::GetDeleter(Relation rel, Snapshot snapshot) { + PaxDmlState *state; + state = FindDmlState(cbdb::RelationGetRelationId(rel)); + // TODO(gongxun): switch memory context?? + if (state->deleter == nullptr) { + state->deleter = new CPaxDeleter(rel, snapshot); + } + return state->deleter; +} + +void CPaxDmlStateLocal::Reset() { + last_used_state_ = nullptr; + dml_descriptor_tab_ = nullptr; + cbdb::pax_memory_context = nullptr; +} + +CPaxDmlStateLocal::CPaxDmlStateLocal() + : last_used_state_(nullptr), + dml_descriptor_tab_(nullptr), + cb_{.func = DmlStateResetCallback, .arg = NULL} {} + +PaxDmlState *CPaxDmlStateLocal::EntryDmlState(const Oid &oid) { + PaxDmlState *state; + bool found; + Assert(this->dml_descriptor_tab_); + + state = reinterpret_cast( + cbdb::HashSearch(this->dml_descriptor_tab_, &oid, HASH_ENTER, &found)); + state->inserter = nullptr; + state->deleter = nullptr; + Assert(!found); + + this->last_used_state_ = state; + return state; +} + +PaxDmlState *CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) { + Assert(this->dml_descriptor_tab_); + + PaxDmlState *state; + state = reinterpret_cast( + cbdb::HashSearch(this->dml_descriptor_tab_, &oid, HASH_REMOVE, NULL)); + + if (!state) return NULL; + + if (this->last_used_state_ && this->last_used_state_->oid == oid) + this->last_used_state_ = NULL; + + return state; +} + +PaxDmlState *CPaxDmlStateLocal::FindDmlState(const Oid &oid) { + Assert(this->dml_descriptor_tab_); + + if (this->last_used_state_ && this->last_used_state_->oid == oid) + return last_used_state_; + + PaxDmlState *state; + state = reinterpret_cast( + cbdb::HashSearch(this->dml_descriptor_tab_, &oid, HASH_FIND, NULL)); + Assert(state); + + this->last_used_state_ = state; + return state; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_dml_state.h b/contrib/pax_storage/src/cpp/access/pax_dml_state.h new file mode 100644 index 00000000000..426e12d932b --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_dml_state.h @@ -0,0 +1,54 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include + +#include "access/pax_deleter.h" +#include "access/pax_inserter.h" +#include "comm/cbdb_wrappers.h" +#include "comm/singleton.h" + +namespace pax { +struct PaxDmlState { + Oid oid; + CPaxInserter *inserter; + CPaxDeleter *deleter; +}; + +class CPaxDmlStateLocal final { + friend class Singleton; + + public: + static CPaxDmlStateLocal *Instance() { + return Singleton::GetInstance(); + } + + ~CPaxDmlStateLocal() = default; + + void InitDmlState(Relation rel, CmdType operation); + void FinishDmlState(Relation rel, CmdType operation); + + CPaxInserter *GetInserter(Relation rel); + CPaxDeleter *GetDeleter(Relation rel, Snapshot snapshot); + + void Reset(); + + CPaxDmlStateLocal(const CPaxDmlStateLocal &) = delete; + CPaxDmlStateLocal &operator=(const CPaxDmlStateLocal &) = delete; + + private: + CPaxDmlStateLocal(); + static void DmlStateResetCallback(void * /*arg*/); + + PaxDmlState *EntryDmlState(const Oid &oid); + PaxDmlState *FindDmlState(const Oid &oid); + PaxDmlState *RemoveDmlState(const Oid &oid); + + private: + PaxDmlState *last_used_state_; + HTAB *dml_descriptor_tab_; + MemoryContextCallback cb_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_inserter.cc b/contrib/pax_storage/src/cpp/access/pax_inserter.cc new file mode 100644 index 00000000000..2584efdb3d9 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_inserter.cc @@ -0,0 +1,67 @@ +#include "access/pax_inserter.h" + +#include +#include + +#include "access/pax_dml_state.h" +#include "catalog/micro_partition_stats.h" +#include "catalog/pax_aux_table.h" +#include "comm/cbdb_wrappers.h" +#include "storage/strategy.h" + +namespace pax { + +CPaxInserter::CPaxInserter(Relation rel) : rel_(rel), insert_count_(0) { + writer_ = new TableWriter(rel); + writer_->SetWriteSummaryCallback(&cbdb::AddMicroPartitionEntry) + ->SetFileSplitStrategy(new PaxDefaultSplitStrategy()) + ->SetStatsCollector(new MicroPartitionStats()) + ->Open(); +} + +void CPaxInserter::InsertTuple(Relation relation, TupleTableSlot *slot, + CommandId /*cid*/, int /*options*/, + BulkInsertState /*bistate*/) { + Assert(relation == rel_); + slot->tts_tableOid = cbdb::RelationGetRelationId(relation); + + if (!TTS_IS_VIRTUAL(slot)) { + slot_getallattrs(slot); + } + + CTupleSlot cslot(slot); + writer_->WriteTuple(&cslot); +} + +void CPaxInserter::MultiInsert(Relation relation, TupleTableSlot **slots, + int ntuples, CommandId cid, int options, + BulkInsertState bistate) { + CPaxInserter *inserter = + pax::CPaxDmlStateLocal::Instance()->GetInserter(relation); + Assert(inserter != nullptr); + + for (int i = 0; i < ntuples; i++) { + inserter->InsertTuple(relation, slots[i], cid, options, bistate); + } +} + +void CPaxInserter::FinishBulkInsert(Relation relation, int /*options*/) { + pax::CPaxDmlStateLocal::Instance()->FinishDmlState(relation, CMD_INSERT); +} + +void CPaxInserter::FinishInsert() { + writer_->Close(); + delete writer_; + writer_ = nullptr; +} + +void CPaxInserter::TupleInsert(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate) { + CPaxInserter *inserter = CPaxDmlStateLocal::Instance()->GetInserter(relation); + Assert(inserter != nullptr); + + inserter->InsertTuple(relation, slot, cid, options, bistate); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_inserter.h b/contrib/pax_storage/src/cpp/access/pax_inserter.h new file mode 100644 index 00000000000..abd191981bf --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_inserter.h @@ -0,0 +1,35 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include "storage/micro_partition_metadata.h" +#include "storage/pax.h" + +namespace pax { + +class CPaxInserter { + public: + explicit CPaxInserter(Relation rel); + virtual ~CPaxInserter() = default; + + static void TupleInsert(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, BulkInsertState bistate); + + static void MultiInsert(Relation relation, TupleTableSlot **slots, + int ntuples, CommandId cid, int options, + BulkInsertState bistate); + + static void FinishBulkInsert(Relation relation, int options); + + void InsertTuple(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate); + void FinishInsert(); + + private: + Relation rel_; + uint32 insert_count_; + + TableWriter *writer_; +}; // class CPaxInserter + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_scanner.cc b/contrib/pax_storage/src/cpp/access/pax_scanner.cc new file mode 100644 index 00000000000..9abd53ade28 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_scanner.cc @@ -0,0 +1,234 @@ +#include "access/pax_scanner.h" + +#include "access/pax_access_handle.h" +#include "storage/local_file_system.h" +#include "storage/micro_partition.h" +#include "storage/micro_partition_iterator.h" +#include "storage/orc/orc.h" +#include "storage/pax.h" +#include "storage/pax_buffer.h" + +namespace pax { + +TableScanDesc PaxScanDesc::BeginScan(Relation relation, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc pscan, uint32 flags, + PaxFilter *filter) { + PaxScanDesc *desc; + MemoryContext old_ctx; + TableReader::ReaderOptions reader_options{}; + + StaticAssertStmt( + offsetof(PaxScanDesc, rs_base_) == 0, + "rs_base should be the first field and aligned to the object address"); + + desc = new PaxScanDesc(); + + desc->memory_context_ = cbdb::AllocSetCtxCreate( + CurrentMemoryContext, "Pax Storage", PAX_ALLOCSET_DEFAULT_SIZES); + + memset(&desc->rs_base_, 0, sizeof(desc->rs_base_)); + desc->rs_base_.rs_rd = relation; + desc->rs_base_.rs_snapshot = snapshot; + desc->rs_base_.rs_nkeys = nkeys; + desc->rs_base_.rs_flags = flags; + desc->rs_base_.rs_parallel = pscan; + desc->key_ = key; + desc->reused_buffer_ = new DataBuffer(32 * 1024 * 1024); // 32mb + desc->filter_ = filter; +#ifdef VEC_BUILD + if (flags & (1 << 12)) { + desc->vec_adapter_ = new VecAdapter(cbdb::RelationGetTupleDesc(relation)); + reader_options.is_vec = true; + reader_options.adapter = desc->vec_adapter_; + } +#endif + + // init shared memory + cbdb::InitCommandResource(); + + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + + // build reader + reader_options.build_bitmap = true; + reader_options.reused_buffer = desc->reused_buffer_; + reader_options.rel_oid = desc->rs_base_.rs_rd->rd_id; + reader_options.filter = filter; + + auto iter = MicroPartitionInfoIterator::New(relation, snapshot); + if (filter && filter->HasMicroPartitionFilter()) { + auto wrap = new FilterIterator( + std::move(iter), [filter, relation](const auto &x) { + return filter->TestMicroPartitionScan(x.GetStats(), + RelationGetDescr(relation)); + }); + iter = std::unique_ptr>(wrap); + } + desc->reader_ = new TableReader(std::move(iter), reader_options); + desc->reader_->Open(); + + MemoryContextSwitchTo(old_ctx); + return &desc->rs_base_; +} + +void PaxScanDesc::EndScan(TableScanDesc scan) { + PaxScanDesc *desc = ScanToDesc(scan); + + Assert(desc->reader_); + desc->reader_->Close(); + + delete desc->reused_buffer_; + delete desc->reader_; + delete desc->filter_; + +#ifdef VEC_BUILD + delete desc->vec_adapter_; +#endif + // TODO(jiaqizho): please double check with abort transaction @gongxun + Assert(desc->memory_context_); + cbdb::MemoryCtxDelete(desc->memory_context_); + delete desc; +} + +TableScanDesc PaxScanDesc::BeginScanExtractColumns( + Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan, + List *targetlist, List *qual, uint32 flags) { + TableScanDesc paxscan; + PaxFilter *filter; + auto natts = cbdb::RelationGetAttributesNumber(rel); + bool *cols; + bool found = false; + + filter = new PaxFilter(); + + cols = new bool[natts]; + memset(cols, false, natts); + + found = cbdb::ExtractcolumnsFromNode(reinterpret_cast(targetlist), + cols, natts); + found = cbdb::ExtractcolumnsFromNode(reinterpret_cast(qual), cols, + natts) || + found; + + // In some cases (for example, count(*)), targetlist and qual may be null, + // extractcolumns_walker will return immediately, so no columns are specified. + // We always scan the first column. + if (!found) cols[0] = true; + + // The `cols` life cycle will be bound to `PaxFilter` + filter->SetColumnProjection(cols, natts); + + { + ScanKey scan_keys = nullptr; + int n_scan_keys = 0; + auto ok = pax::BuildScanKeys(rel, qual, false, &scan_keys, &n_scan_keys); + if (ok) filter->SetScanKeys(scan_keys, n_scan_keys); + } + paxscan = BeginScan(rel, snapshot, 0, nullptr, parallel_scan, flags, filter); + + return paxscan; +} + +// FIXME: shall we take these parameters into account? +void PaxScanDesc::ReScan(TableScanDesc scan) { + PaxScanDesc *desc = ScanToDesc(scan); + MemoryContext old_ctx; + Assert(desc && desc->reader_); + + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + desc->reader_->ReOpen(); + MemoryContextSwitchTo(old_ctx); +} + +bool PaxScanDesc::ScanGetNextSlot(TableScanDesc scan, TupleTableSlot *slot) { + PaxScanDesc *desc = ScanToDesc(scan); + MemoryContext old_ctx; + bool ok = false; + + CTupleSlot cslot(slot); + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + + ok = desc->reader_->ReadTuple(&cslot); + + MemoryContextSwitchTo(old_ctx); + return ok; +} + +bool PaxScanDesc::ScanAnalyzeNextBlock(TableScanDesc scan, + BlockNumber blockno) { + PaxScanDesc *desc = ScanToDesc(scan); + desc->target_tuple_id_ = blockno; + + return true; +} + +bool PaxScanDesc::ScanAnalyzeNextTuple(TableScanDesc scan, double *liverows, + const double *deadrows, + TupleTableSlot *slot) { + PaxScanDesc *desc = ScanToDesc(scan); + MemoryContext old_ctx; + bool ok = false; + + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + Assert(*deadrows == 0); // not dead rows in pax latest snapshot + while (desc->next_tuple_id_ < desc->target_tuple_id_) { + ok = PaxScanDesc::ScanGetNextSlot(scan, slot); + if (!ok) break; + desc->next_tuple_id_++; + } + MemoryContextSwitchTo(old_ctx); + if (ok) *liverows += 1; + return ok; +} + +bool PaxScanDesc::ScanSampleNextBlock(TableScanDesc scan, + SampleScanState *scanstate) { + PaxScanDesc *desc = ScanToDesc(scan); + MemoryContext old_ctx; + TsmRoutine *tsm = scanstate->tsmroutine; + BlockNumber blockno = 0; + BlockNumber pages = 0; + double total_tuples = 0; + int32 attrwidths = 0; + double allvisfrac = 0; + bool ok = false; + + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + + if (desc->total_tuples_ == 0) { + paxc::PaxAccessMethod::EstimateRelSize(scan->rs_rd, &attrwidths, &pages, + &total_tuples, &allvisfrac); + desc->total_tuples_ = total_tuples; + } + + if (tsm->NextSampleBlock) + blockno = tsm->NextSampleBlock(scanstate, desc->total_tuples_); + else + blockno = system_nextsampleblock(scanstate, desc->total_tuples_); + + ok = BlockNumberIsValid(blockno); + if (ok) { + desc->fetch_tuple_id_ = blockno; + } + + MemoryContextSwitchTo(old_ctx); + return ok; +} + +bool PaxScanDesc::ScanSampleNextTuple(TableScanDesc scan, + TupleTableSlot *slot) { + PaxScanDesc *desc = ScanToDesc(scan); + MemoryContext old_ctx; + bool ok = false; + + old_ctx = MemoryContextSwitchTo(desc->memory_context_); + while (desc->next_tuple_id_ < desc->fetch_tuple_id_) { + ok = PaxScanDesc::ScanGetNextSlot(scan, slot); + if (!ok) break; + desc->next_tuple_id_++; + } + MemoryContextSwitchTo(old_ctx); + return ok; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_scanner.h b/contrib/pax_storage/src/cpp/access/pax_scanner.h new file mode 100644 index 00000000000..f06ab6c9fb8 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_scanner.h @@ -0,0 +1,72 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include "storage/pax.h" +#include "storage/pax_filter.h" +#ifdef VEC_BUILD +#include "storage/vec/pax_vec_adapter.h" +#endif +namespace pax { + +class PaxScanDesc { + public: + static TableScanDesc BeginScan(Relation relation, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc pscan, uint32 flags, + PaxFilter *filter); + + static void ReScan(TableScanDesc scan); + static void EndScan(TableScanDesc scan); + + static TableScanDesc BeginScanExtractColumns( + Relation rel, Snapshot snapshot, ParallelTableScanDesc parallel_scan, + List *targetlist, List *qual, uint32 flags); + + static bool ScanGetNextSlot(TableScanDesc scan, TupleTableSlot *slot); + + static bool ScanAnalyzeNextBlock(TableScanDesc scan, BlockNumber blockno); + static bool ScanAnalyzeNextTuple(TableScanDesc scan, double *liverows, + const double *deadrows, + TupleTableSlot *slot); + + static bool ScanSampleNextBlock(TableScanDesc scan, + SampleScanState *scanstate); + + static bool ScanSampleNextTuple(TableScanDesc scan, TupleTableSlot *slot); + + ~PaxScanDesc() = default; + + private: + PaxScanDesc() = default; + + static inline PaxScanDesc *ScanToDesc(TableScanDesc scan) { + auto desc = reinterpret_cast(scan); + return desc; + } + + private: + TableScanDescData rs_base_{}; + const ScanKeyData *key_ = nullptr; + TableReader *reader_ = nullptr; + + DataBuffer *reused_buffer_ = nullptr; + + MemoryContext memory_context_ = nullptr; + + // Only used by `scan analyze` and `scan sample` + uint64 next_tuple_id_ = 0; + // Only used by `scan analyze` + uint64 target_tuple_id_ = 0; + // Only used by `scan sample` + uint64 fetch_tuple_id_ = 0; + uint64 total_tuples_ = 0; + + // filter used to do column projection + PaxFilter *filter_ = nullptr; +#ifdef VEC_BUILD + VecAdapter *vec_adapter_ = nullptr; +#endif +}; // class PaxScanDesc + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_updater.cc b/contrib/pax_storage/src/cpp/access/pax_updater.cc new file mode 100644 index 00000000000..e5f79c23ee2 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_updater.cc @@ -0,0 +1,27 @@ +#include "access/pax_updater.h" + +#include "access/pax_deleter.h" +#include "access/pax_dml_state.h" +#include "access/pax_inserter.h" + +namespace pax { +TM_Result CPaxUpdater::UpdateTuple( + const Relation relation, const ItemPointer otid, TupleTableSlot *slot, + const CommandId cid, const Snapshot snapshot, const Snapshot /*crosscheck*/, + const bool /*wait*/, TM_FailureData * /*tmfd*/, + LockTupleMode * /*lockmode*/, bool * /*update_indexes*/) { + TM_Result result; + CPaxDeleter *deleter = + CPaxDmlStateLocal::Instance()->GetDeleter(relation, snapshot); + Assert(deleter != nullptr); + CPaxInserter *inserter = CPaxDmlStateLocal::Instance()->GetInserter(relation); + Assert(inserter != nullptr); + + result = deleter->MarkDelete(otid); + // FIXME(gongxun): check result and return TM_SelfModified if needed + + inserter->InsertTuple(relation, slot, cid, 0, nullptr); + // TODO(gongxun): update pgstat info + return result; +} +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/access/pax_updater.h b/contrib/pax_storage/src/cpp/access/pax_updater.h new file mode 100644 index 00000000000..c560bbec841 --- /dev/null +++ b/contrib/pax_storage/src/cpp/access/pax_updater.h @@ -0,0 +1,15 @@ +#pragma once + +#include "comm/cbdb_api.h" + +namespace pax { +class CPaxUpdater final { + public: + static TM_Result UpdateTuple(const Relation relation, const ItemPointer otid, + TupleTableSlot *slot, const CommandId cid, + const Snapshot snapshot, + const Snapshot crosscheck, const bool wait, + TM_FailureData *tmfd, LockTupleMode *lockmode, + bool *update_indexes); +}; +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc new file mode 100644 index 00000000000..3e0bd53d103 --- /dev/null +++ b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.cc @@ -0,0 +1,341 @@ +#include "catalog/micro_partition_stats.h" + +#include "comm/cbdb_api.h" + +#include "comm/cbdb_wrappers.h" +#include "storage/micro_partition_metadata.h" +#include "storage/proto/proto_wrappers.h" + +namespace pax { +// SetStatsMessage may be called several times in a write, +// one for each micro partition, so all members need to reset. +// Some metainfo like typid, collation, oids for less/greater, +// fmgr should be exactly consistent. +MicroPartitionStats *MicroPartitionStats::SetStatsMessage( + pax::stats::MicroPartitionStatisticsInfo *stats, int natts) { + FmgrInfo finfo; + std::tuple zero_oids = {InvalidOid, InvalidOid, InvalidOid, InvalidOid}; + + Assert(natts > 0); + Assert(stats && stats->columnstats_size() == 0); + initial_check_ = false; + stats_ = stats; + + memset(&finfo, 0, sizeof(finfo)); + procs_.clear(); + finfos_.clear(); + status_.clear(); + for (int i = 0; i < natts; i++) { + procs_.emplace_back(zero_oids); + finfos_.emplace_back(std::pair({finfo, finfo})); + status_.emplace_back('u'); + auto columnstats = stats_->add_columnstats(); + Assert(columnstats->allnull()); + Assert(!columnstats->hasnull()); + } + Assert(stats_->columnstats_size() == natts); + return this; +} + +void MicroPartitionStats::AddRow(TupleTableSlot *slot) { + auto desc = slot->tts_tupleDescriptor; + auto n = desc->natts; + + if (!initial_check_) { + DoInitialCheck(desc); + initial_check_ = true; + } + CBDB_CHECK(status_.size() == static_cast(n), + cbdb::CException::ExType::kExTypeSchemaNotMatch); + for (auto i = 0; i < n; i++) { + auto att = &desc->attrs[i]; + + AssertImply(att->attisdropped, slot->tts_isnull[i]); + if (slot->tts_isnull[i]) + AddNullColumn(i); + else + AddNonNullColumn(i, slot->tts_values[i], desc); + } +} + +void MicroPartitionStats::AddNullColumn(int column_index) { + Assert(column_index >= 0); + Assert(column_index < static_cast(procs_.size())); + + auto column_stats = stats_->mutable_columnstats(column_index); + column_stats->set_hasnull(true); +} + +void MicroPartitionStats::AddNonNullColumn(int column_index, Datum value, + TupleDesc desc) { + Assert(column_index >= 0); + Assert(column_index < static_cast(procs_.size())); + + auto att = TupleDescAttr(desc, column_index); + auto collation = att->attcollation; + auto typlen = att->attlen; + auto typbyval = att->attbyval; + auto column_stats = stats_->mutable_columnstats(column_index); + column_stats->set_allnull(false); + + // update min/max + switch (status_[column_index]) { + case 'x': + break; + case 'y': + Assert(column_stats->minmaxstats().has_typid()); + Assert(column_stats->minmaxstats().has_minimal()); + Assert(column_stats->minmaxstats().has_maximum()); + Assert(column_stats->minmaxstats().has_proclt()); + Assert(column_stats->minmaxstats().has_procgt()); + Assert(column_stats->minmaxstats().has_procle()); + Assert(column_stats->minmaxstats().has_procge()); + Assert(column_stats->minmaxstats().typid() == att->atttypid); + Assert(column_stats->minmaxstats().collation() == collation); + + UpdateMinMaxValue(column_index, value, collation, typlen, typbyval); + break; + case 'n': { + auto minmax = column_stats->mutable_minmaxstats(); + + Assert(!minmax->has_proclt()); + Assert(!minmax->has_procgt()); + Assert(!minmax->has_procle()); + Assert(!minmax->has_procge()); + Assert(!minmax->has_typid()); + Assert(!minmax->has_minimal()); + Assert(!minmax->has_maximum()); + + minmax->set_typid(att->atttypid); + minmax->set_collation(collation); + minmax->set_proclt(std::get<0>(procs_[column_index])); + minmax->set_procgt(std::get<1>(procs_[column_index])); + minmax->set_procle(std::get<2>(procs_[column_index])); + minmax->set_procge(std::get<3>(procs_[column_index])); + minmax->set_minimal(ToValue(value, typlen, typbyval)); + minmax->set_maximum(ToValue(value, typlen, typbyval)); + status_[column_index] = 'y'; + break; + } + default: + Assert(false); + } +} + +void MicroPartitionStats::UpdateMinMaxValue(int column_index, Datum datum, + Oid collation, int typlen, + bool typbyval) { + Assert(initial_check_); + Assert(column_index >= 0 && static_cast(column_index) < status_.size()); + Assert(status_[column_index] == 'y'); + + auto &finfos = finfos_[column_index]; + auto minmax = + stats_->mutable_columnstats(column_index)->mutable_minmaxstats(); + bool ok; + + { + const auto &min = minmax->minimal(); + auto val = FromValue(min, typlen, typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + auto update = + DatumGetBool(cbdb::FunctionCall2Coll(&finfos.first, collation, datum, val)); + if (update) minmax->set_minimal(ToValue(datum, typlen, typbyval)); + } + { + const auto &max = minmax->maximum(); + auto val = FromValue(max, typlen, typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + auto update = + DatumGetBool(cbdb::FunctionCall2Coll(&finfos.second, collation, datum, val)); + if (update) minmax->set_maximum(ToValue(datum, typlen, typbyval)); + } +} + +bool MicroPartitionStats::GetStrategyProcinfo( + Oid typid, std::tuple &procids, + std::pair &finfos) { + return cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<0>(procids), &finfos.first, + BTLessStrategyNumber) && + cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<1>(procids), &finfos.second, + BTGreaterStrategyNumber) && + cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<2>(procids), nullptr, + BTLessEqualStrategyNumber) && + cbdb::MinMaxGetStrategyProcinfo(typid, &std::get<3>(procids), nullptr, + BTGreaterEqualStrategyNumber); +} + +void MicroPartitionStats::DoInitialCheck(TupleDesc desc) { + auto natts = desc->natts; + + Assert(natts == static_cast(status_.size())); + Assert(natts == stats_->columnstats_size()); + Assert(status_.size() == procs_.size()); + Assert(status_.size() == finfos_.size()); + + for (int i = 0; i < natts; i++) { + auto att = TupleDescAttr(desc, i); + if (att->attisdropped || + !GetStrategyProcinfo(att->atttypid, procs_[i], finfos_[i])) { + status_[i] = 'x'; + continue; + } + status_[i] = 'n'; + } +} + +Datum MicroPartitionStats::FromValue(const std::string &s, int typlen, + bool typbyval, bool *ok) { + const char *p = s.data(); + *ok = true; + if (typbyval) { + Assert(typlen > 0); + switch (typlen) { + case 1: { + int8 i = *reinterpret_cast(p); + return cbdb::Int8ToDatum(i); + } + case 2: { + int16 i = *reinterpret_cast(p); + return cbdb::Int16ToDatum(i); + } + case 4: { + int32 i = *reinterpret_cast(p); + return cbdb::Int32ToDatum(i); + } + case 8: { + int64 i = *reinterpret_cast(p); + return cbdb::Int64ToDatum(i); + } + default: + Assert(!"unexpected typbyval, len not in 1,2,4,8"); + *ok = false; + break; + } + return 0; + } + + Assert(typlen == -1 || typlen > 0); + return PointerGetDatum(p); +} + +std::string MicroPartitionStats::ToValue(Datum datum, int typlen, + bool typbyval) { + if (typbyval) { + Assert(typlen > 0); + switch (typlen) { + case 1: { + int8 i = cbdb::DatumToInt8(datum); + return std::string(reinterpret_cast(&i), sizeof(i)); + } + case 2: { + int16 i = cbdb::DatumToInt16(datum); + return std::string(reinterpret_cast(&i), sizeof(i)); + } + case 4: { + int32 i = cbdb::DatumToInt32(datum); + return std::string(reinterpret_cast(&i), sizeof(i)); + } + case 8: { + int64 i = cbdb::DatumToInt64(datum); + return std::string(reinterpret_cast(&i), sizeof(i)); + } + default: + Assert(!"unexpected typbyval, len not in 1,2,4,8"); + break; + } + CBDB_RAISE(cbdb::CException::kExTypeLogicError); + } + + if (typlen == -1) { + void *v; + int len; + + v = cbdb::PointerAndLenFromDatum(datum, &len); + Assert(v && len != -1); + return std::string(reinterpret_cast(v), len); + } + // byref but fixed size + Assert(typlen > 0); + return std::string(reinterpret_cast(cbdb::DatumToPointer(datum)), + typlen); +} +} // namespace pax + +static inline const char *BoolToString(bool b) { return b ? "true" : "false"; } + +static char *TypeValueToCString(Oid typid, Oid collation, + const std::string &value) { + FmgrInfo finfo; + HeapTuple tuple; + Form_pg_type form; + Datum datum; + bool ok; + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + + form = (Form_pg_type)GETSTRUCT(tuple); + Assert(OidIsValid(form->typoutput)); + + datum = pax::MicroPartitionStats::FromValue(value, form->typlen, + form->typbyval, &ok); + if (!ok) + elog(ERROR, "unexpected typlen: %d\n", form->typlen); + + fmgr_info_cxt(form->typoutput, &finfo, CurrentMemoryContext); + datum = FunctionCall1Coll(&finfo, collation, datum); + ReleaseSysCache(tuple); + + return DatumGetCString(datum); +} + +// define stat type for custom output +extern "C" { +extern Datum MicroPartitionStatsInput(PG_FUNCTION_ARGS); +extern Datum MicroPartitionStatsOutput(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(MicroPartitionStatsInput); +PG_FUNCTION_INFO_V1(MicroPartitionStatsOutput); +} + +Datum MicroPartitionStatsInput(PG_FUNCTION_ARGS) { + ereport(ERROR, (errmsg("unsupport MicroPartitionStatsInput"))); + (void)fcinfo; + PG_RETURN_POINTER(NULL); +} + +Datum MicroPartitionStatsOutput(PG_FUNCTION_ARGS) { + struct varlena *v = PG_GETARG_VARLENA_PP(0); + pax::stats::MicroPartitionStatisticsInfo stats; + StringInfoData str; + + bool ok = stats.ParseFromArray(VARDATA_ANY(v), VARSIZE_ANY_EXHDR(v)); + if (!ok) ereport(ERROR, (errmsg("micropartition stats is corrupt"))); + + initStringInfo(&str); + for (int i = 0, n = stats.columnstats_size(); i < n; i++) { + const auto &column = stats.columnstats(i); + + if (i > 0) appendStringInfoChar(&str, ','); + + appendStringInfo(&str, "[(%s,%s)", BoolToString(column.allnull()), + BoolToString(column.hasnull())); + + if (!column.has_minmaxstats()) { + appendStringInfoString(&str, ",None]"); + continue; + } + + const auto &minmax = column.minmaxstats(); + appendStringInfo(&str, ",(%u,%u,%u,%u,%s,%s)]", minmax.typid(), + minmax.collation(), minmax.proclt(), + minmax.procgt(), + TypeValueToCString(minmax.typid(), minmax.collation(), + minmax.minimal()), + TypeValueToCString(minmax.typid(), minmax.collation(), + minmax.maximum())); + } + + PG_RETURN_CSTRING(str.data); +} diff --git a/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h new file mode 100644 index 00000000000..56be8e8cddf --- /dev/null +++ b/contrib/pax_storage/src/cpp/catalog/micro_partition_stats.h @@ -0,0 +1,51 @@ +#pragma once +#include "comm/cbdb_api.h" + +#include +#include +#include + +namespace pax { +namespace stats { +class MicroPartitionStatisticsInfo; +} + +class MicroPartitionStats final { + public: + MicroPartitionStats() = default; + MicroPartitionStats *SetStatsMessage( + pax::stats::MicroPartitionStatisticsInfo *stats, int natts); + + void AddRow(TupleTableSlot *slot); + + static std::string ToValue(Datum datum, int typlen, bool typbyval); + static Datum FromValue(const std::string &s, int typlen, bool typbyval, bool *ok); + + private: + void AddNullColumn(int column_index); + void AddNonNullColumn(int column_index, Datum value, TupleDesc desc); + void DoInitialCheck(TupleDesc desc); + void UpdateMinMaxValue(int column_index, Datum datum, Oid collation, + int typlen, bool typbyval); + static bool GetStrategyProcinfo(Oid typid, std::tuple &procids, + std::pair &finfos); + + // stats_: only references the info object by pointer + pax::stats::MicroPartitionStatisticsInfo *stats_ = nullptr; + + // less: tuple[0], greater: tuple[1], le: tuple[2], ge: tuple[3] + std::vector> procs_; + // less: pair[0], greater: pair[1] + std::vector> finfos_; + + // status to indicate whether the oids are initialized + // or the min-max values are initialized + // 'u': all is uninitialized + // 'x': column doesn't support min-max + // 'n': oids are initialized, but min-max value is missing + // 'y': min-max is set, needs update. + std::vector status_; + bool initial_check_ = false; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc new file mode 100644 index 00000000000..906d8d9fd22 --- /dev/null +++ b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.cc @@ -0,0 +1,392 @@ +#include "catalog/pax_aux_table.h" + +#include "comm/cbdb_api.h" + +#include + +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/file_system.h" +#include "storage/local_file_system.h" +#include "storage/micro_partition_metadata.h" +#include "storage/paxc_block_map_manager.h" + +namespace paxc { + +static inline void InsertTuple(Relation rel, Datum *values, bool *nulls) { + HeapTuple tuple; + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + CatalogTupleInsert(rel, tuple); +} + +static inline void InsertTuple(Oid relid, Datum *values, bool *nulls) { + Relation rel; + + rel = table_open(relid, RowExclusiveLock); + InsertTuple(rel, values, nulls); + table_close(rel, NoLock); +} + +static void CPaxTransactionalTruncateTable(Oid aux_relid) { + Relation aux_rel; + Assert(OidIsValid(aux_relid)); + + // truncate already exist pax block auxiliary table. + aux_rel = relation_open(aux_relid, AccessExclusiveLock); + + /*TODO1 pending-delete operation should be applied here. */ + RelationSetNewRelfilenode(aux_rel, aux_rel->rd_rel->relpersistence); + relation_close(aux_rel, NoLock); +} + +// * non transactional truncate table case: +// 1. create table inside transactional block, and then truncate table inside +// transactional block. +// 2.create table outside transactional block, insert data +// and truncate table inside transactional block. +static void CPaxNontransactionalTruncateTable(Relation rel) { + HeapTuple tuple; + Relation aux_rel; + Oid aux_relid; + + tuple = SearchSysCache1(PAXTABLESID, RelationGetRelid(rel)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("cache lookup failed with relid=%u for aux relation " + "in pg_pax_tables.", + RelationGetRelid(rel)))); + aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tuple))->blocksrelid; + ReleaseSysCache(tuple); + Assert(OidIsValid(aux_relid)); + + aux_rel = relation_open(aux_relid, AccessExclusiveLock); + heap_truncate_one_rel(aux_rel); + relation_close(aux_rel, NoLock); +} + +static void CPaxCreateMicroPartitionTable(const Relation rel) { + Relation pg_class_desc; + char aux_relname[32]; + Oid relid; + Oid aux_relid; + Oid aux_namespace_id; + Oid pax_relid; + TupleDesc tupdesc; + + pg_class_desc = table_open(RelationRelationId, RowExclusiveLock); + pax_relid = RelationGetRelid(rel); + + // 1. create blocks table. + snprintf(aux_relname, sizeof(aux_relname), "pg_pax_blocks_%u", pax_relid); + aux_namespace_id = PG_PAXAUX_NAMESPACE; + aux_relid = GetNewOidForRelation(pg_class_desc, ClassOidIndexId, + Anum_pg_class_oid, // new line + aux_relname, aux_namespace_id); + tupdesc = CreateTemplateTupleDesc(NATTS_PG_PAX_BLOCK_TABLES); + TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME, + "ptblockname", NAMEOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT, + "pttupcount", INT4OID, -1, 0); + // TODO(chenhongjie): uncompressed and compressed ptblocksize are needed. + TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE, + "ptblocksize", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS, + "ptstatistics", PAX_AUX_STATS_TYPE_OID, -1, 0); + relid = heap_create_with_catalog( + aux_relname, aux_namespace_id, InvalidOid, aux_relid, InvalidOid, + InvalidOid, rel->rd_rel->relowner, HEAP_TABLE_AM_OID, tupdesc, NIL, + RELKIND_RELATION, rel->rd_rel->relpersistence, rel->rd_rel->relisshared, + RelationIsMapped(rel), ONCOMMIT_NOOP, NULL, /* GP Policy */ + (Datum)0, false, /* use _user_acl */ + true, true, InvalidOid, NULL, /* typeaddress */ + false /* valid_opts */); + Assert(relid == aux_relid); + table_close(pg_class_desc, NoLock); + + // 2. insert entry into pg_pax_tables. + InsertPaxTablesEntry(pax_relid, aux_relid, "", 0); + + // 3. record pg_depend, pg_pax_blocks_ depends relation. + { + ObjectAddress base; + ObjectAddress aux; + base.classId = RelationRelationId; + base.objectId = pax_relid; + base.objectSubId = 0; + aux.classId = RelationRelationId; + aux.objectId = aux_relid; + aux.objectSubId = 0; + recordDependencyOn(&aux, &base, DEPENDENCY_INTERNAL); + + // pg_pax_tables single row depend + base.classId = RelationRelationId; + base.objectId = pax_relid; + base.objectSubId = 0; + aux.classId = PaxTablesRelationId; + aux.objectId = pax_relid; + aux.objectSubId = 0; + recordDependencyOn(&aux, &base, DEPENDENCY_INTERNAL); + } +} + +static void CPaxDeletePaxBlockEntry(Oid relid, Snapshot pax_meta_data_snapshot, + const char *blockname) { + Relation rel; + ScanKeyData key[1]; + SysScanDesc scan; + HeapTuple tuple; + NameData ptblockname; + + rel = table_open(relid, RowExclusiveLock); + namestrcpy(&ptblockname, blockname); + ScanKeyInit(&key[0], ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME, + BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(&ptblockname)); + + // should add snapshot support + scan = systable_beginscan(rel, InvalidOid, false, pax_meta_data_snapshot, 1, + key); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) { + CatalogTupleDelete(rel, &tuple->t_self); + } + + systable_endscan(scan); + table_close(rel, RowExclusiveLock); +} + +static void CPaxCopyPaxBlockEntry(Relation old_relation, Relation new_relation) { + HeapTuple tuple; + SysScanDesc pax_scan; + Relation old_aux_rel, new_aux_rel; + Oid old_aux_relid = 0, new_aux_relid = 0; + + HeapTuple tupcache; + tupcache = SearchSysCache1(PAXTABLESID, RelationGetRelid(old_relation)); + Assert(HeapTupleIsValid(tupcache)); + old_aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid; + ReleaseSysCache(tupcache); + + tupcache = SearchSysCache1(PAXTABLESID, RelationGetRelid(new_relation)); + Assert(HeapTupleIsValid(tupcache)); + new_aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid; + ReleaseSysCache(tupcache); + + old_aux_rel = table_open(old_aux_relid, RowExclusiveLock); + new_aux_rel = table_open(new_aux_relid, RowExclusiveLock); + + pax_scan = systable_beginscan(old_aux_rel, InvalidOid, false, + NULL, 0, NULL); + while ((tuple = systable_getnext(pax_scan)) != NULL) { + CatalogTupleInsert(new_aux_rel, tuple); + } + systable_endscan(pax_scan); + table_close(old_aux_rel, RowExclusiveLock); + table_close(new_aux_rel, RowExclusiveLock); +} + +} // namespace paxc + +namespace cbdb { +Oid GetPaxAuxRelid(Oid relid) { + Oid aux_relid = InvalidOid; + CBDB_WRAP_START; + { + GetPaxTablesEntryAttributes(relid, &aux_relid, NULL, NULL); + return aux_relid; + } + CBDB_WRAP_END; +} + +static void InsertPaxBlockEntry(Oid relid, const char *blockname, int pttupcount, + int ptblocksize, const ::pax::stats::MicroPartitionStatisticsInfo &mp_stats) { + int stats_length = mp_stats.ByteSize(); + uint32 len = VARHDRSZ + stats_length; + void *output; + + NameData ptblockname; + Datum values[NATTS_PG_PAX_BLOCK_TABLES]; + bool nulls[NATTS_PG_PAX_BLOCK_TABLES]; + + output = cbdb::Palloc(len); + SET_VARSIZE(output, len); + mp_stats.SerializeToArray(VARDATA(output), stats_length); + + Assert(blockname); + namestrcpy(&ptblockname, blockname); + + values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = + NameGetDatum(&ptblockname); + nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME - 1] = false; + + values[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = Int32GetDatum(pttupcount); + nulls[ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT - 1] = false; + values[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] = + Int32GetDatum(ptblocksize); + nulls[ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE - 1] = false; + + // Serialize catalog statitics information into PG bytea format and saved in aux table ptstatitics column. + values[ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS - 1] = PointerGetDatum(output); + nulls[ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS - 1] = false; + + CBDB_WRAP_START; + { + paxc::InsertTuple(relid, values, nulls); + } + CBDB_WRAP_END; + + cbdb::Pfree(output); +} + +static void DeletePaxBlockEntry(Oid relid, Snapshot snapshot, + const char *blockname) { + CBDB_WRAP_START; + { + paxc::CPaxDeletePaxBlockEntry(relid, snapshot, blockname); + } + CBDB_WRAP_END; +} + +void DeleteMicroPartitionEntry(Oid pax_relid, + Snapshot snapshot, + const std::string &block_id) { + Oid aux_relid = GetPaxAuxRelid(pax_relid); + cbdb::DeletePaxBlockEntry(aux_relid, snapshot, + block_id.c_str()); +} + +void AddMicroPartitionEntry(const pax::WriteSummary &summary) { + Oid aux_relid; + aux_relid = GetPaxAuxRelid(summary.rel_oid); + cbdb::InsertPaxBlockEntry(aux_relid, summary.block_id.c_str(), + summary.num_tuples, summary.file_size, summary.mp_stats); +} + +static void PaxTransactionalTruncateTable(Oid aux_relid) { + CBDB_WRAP_START; + { paxc::CPaxTransactionalTruncateTable(aux_relid); } + CBDB_WRAP_END; +} + +static void PaxNontransactionalTruncateTable(Relation rel) { + CBDB_WRAP_START; + { paxc::CPaxNontransactionalTruncateTable(rel); } + CBDB_WRAP_END; +} + +static void PaxCreateMicroPartitionTable(const Relation rel) { + CBDB_WRAP_START; + { paxc::CPaxCreateMicroPartitionTable(rel); } + CBDB_WRAP_END; +} + +static void PaxCopyPaxBlockEntry(Relation old_relation, Relation new_relation) { + CBDB_WRAP_START; + { paxc::CPaxCopyPaxBlockEntry(old_relation, new_relation); } + CBDB_WRAP_END; +} +} // namespace cbdb + +namespace pax { +void CCPaxAuxTable::PaxAuxRelationSetNewFilenode(Relation rel, + const RelFileNode *newrnode, + char persistence) { + HeapTuple tupcache; + std::string path; + FileSystem *fs = pax::Singleton::GetInstance(); + + tupcache = cbdb::SearchSysCache(rel, PAXTABLESID); + if (cbdb::TupleIsValid(tupcache)) { + Oid aux_relid = ((Form_pg_pax_tables)GETSTRUCT(tupcache))->blocksrelid; + cbdb::PaxTransactionalTruncateTable(aux_relid); + cbdb::ReleaseTupleCache(tupcache); + } else { + // create pg_pax_blocks_ + cbdb::PaxCreateMicroPartitionTable(rel); + } + + // Create pax table relfilenode file and database directory under path base/, + // The relfilenode created here is to be compatible with PG normal process + // logic instead of being used by pax storage. + cbdb::RelationCreateStorageDirectory(*newrnode, persistence, SMGR_MD, rel); + path = cbdb::BuildPaxDirectoryPath(*newrnode, rel->rd_backend); + Assert(!path.empty()); + CBDB_CHECK((fs->CreateDirectory(path) == 0), cbdb::CException::ExType::kExTypeIOError); +} + +void CCPaxAuxTable::PaxAuxRelationNontransactionalTruncate(Relation rel) { + cbdb::PaxNontransactionalTruncateTable(rel); + + // Delete all micro partition file on non-transactional truncate but reserve + // top level PAX file directory. + PaxAuxRelationFileUnlink(rel->rd_node, rel->rd_backend, false); +} + +void CCPaxAuxTable::PaxAuxRelationCopyData(Relation rel, + const RelFileNode *newrnode, + bool createnewpath) { + std::string src_path; + std::string dst_path; + std::vector filelist; + + Assert(rel && newrnode); + + FileSystem *fs = pax::Singleton::GetInstance(); + + src_path = cbdb::BuildPaxDirectoryPath(rel->rd_node, rel->rd_backend); + Assert(!src_path.empty()); + + // get micropatition file source folder filename list for copying. + filelist = fs->ListDirectory(src_path); + if (filelist.empty()) return; + + dst_path = cbdb::BuildPaxDirectoryPath(*newrnode, rel->rd_backend); + Assert(!dst_path.empty()); + + if (src_path.empty() || dst_path.empty()) + CBDB_RAISE(cbdb::CException::ExType::kExTypeFileOperationError); + + // createnewpath is used to indicate if creating destination micropartition file directory and storage file for copying or not. + // 1. For RelationCopyData case, createnewpath should be set as true to explicitly create a new destination directory under + // new tablespace path pg_tblspc/. + // 2. For RelationCopyDataForCluster case, createnewpath should be set as false cause the destination directory was already + // created with a new temp table by previously calling PaxAuxRelationSetNewFilenode. + if (createnewpath) { + // create pg_pax_table relfilenode file and dbid directory. + cbdb::RelationCreateStorageDirectory(*newrnode, rel->rd_rel->relpersistence, + SMGR_MD, rel); + // create micropartition file destination folder for copying. + CBDB_CHECK((fs->CreateDirectory(dst_path) == 0), cbdb::CException::ExType::kExTypeIOError); + } + + for (auto &iter : filelist) { + Assert(!iter.empty()); + src_path.append("/"); + src_path.append(iter); + dst_path.append("/"); + dst_path.append(iter); + fs->CopyFile(src_path, dst_path); + } + + // TODO(Tony) : here need to implement pending delete srcPath after set new + // tablespace. +} + +void CCPaxAuxTable::PaxAuxRelationCopyDataForCluster(Relation old_rel, Relation new_rel) { + PaxAuxRelationCopyData(old_rel, &new_rel->rd_node, false); + cbdb::PaxCopyPaxBlockEntry(old_rel, new_rel); + // TODO(Tony) : here need to implement PAX re-organize semantics logic. +} + +void CCPaxAuxTable::PaxAuxRelationFileUnlink(RelFileNode node, + BackendId backend, + bool delete_topleveldir) { + std::string relpath; + FileSystem *fs = pax::Singleton::GetInstance(); + // Delete all micro partition file directory. + relpath = cbdb::BuildPaxDirectoryPath(node, backend); + fs->DeleteDirectory(relpath, delete_topleveldir); +} +} // namespace pax + diff --git a/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h new file mode 100644 index 00000000000..7d79f36f863 --- /dev/null +++ b/contrib/pax_storage/src/cpp/catalog/pax_aux_table.h @@ -0,0 +1,50 @@ +#pragma once +#include "catalog/pax_aux_table.h" + +#include "comm/cbdb_api.h" + +#include + +#include "storage/micro_partition_metadata.h" + +#define ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME 1 +#define ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT 2 +#define ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKSIZE 3 +#define ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS 4 +#define NATTS_PG_PAX_BLOCK_TABLES 4 + +namespace pax { +class CCPaxAuxTable final { + public: + CCPaxAuxTable() = delete; + ~CCPaxAuxTable() = delete; + + static void PaxAuxRelationSetNewFilenode(Relation rel, + const RelFileNode *newrnode, + char persistence); + + static void PaxAuxRelationNontransactionalTruncate(Relation rel); + + static void PaxAuxRelationCopyData(Relation rel, + const RelFileNode *newrnode, + bool createnewpath = true); + + static void PaxAuxRelationCopyDataForCluster(Relation old_rel, Relation new_rel); + + static void PaxAuxRelationFileUnlink(RelFileNode node, BackendId backend, + bool delete_topleveldir); +}; +} // namespace pax + +namespace cbdb { + +Oid GetPaxAuxRelid(Oid relid); + +void AddMicroPartitionEntry(const pax::WriteSummary &summary); + +void DeleteMicroPartitionEntry(Oid pax_relid, + Snapshot snapshot, + const std::string &block_id); + +} // namespace cbdb + diff --git a/contrib/pax_storage/src/cpp/comm/bitmap.cc b/contrib/pax_storage/src/cpp/comm/bitmap.cc new file mode 100644 index 00000000000..7009323445f --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/bitmap.cc @@ -0,0 +1,150 @@ +#include "comm/bitmap.h" + +#include "exceptions/CException.h" + +namespace pax { + +DynamicBitmap::DynamicBitmap() { bitmap_.resize(1024); } +DynamicBitmap::DynamicBitmap(uint32 size) { bitmap_.resize(size); } + +DynamicBitmap::~DynamicBitmap() { bitmap_.clear(); } + +void DynamicBitmap::Set(uint32 index) { + CBDB_CHECK(index >= 0 && index < bitmap_.size(), + cbdb::CException::ExType::kExTypeOutOfRange); + bitmap_[index] = true; +} + +bool DynamicBitmap::Test(uint32 index) const { + CBDB_CHECK(index >= 0 && index < bitmap_.size(), + cbdb::CException::ExType::kExTypeOutOfRange); + return bitmap_[index]; +} + +void DynamicBitmap::Clear(uint32 index) { + CBDB_CHECK(index >= 0 && index < bitmap_.size(), + cbdb::CException::ExType::kExTypeOutOfRange); + bitmap_[index] = false; +} + +void DynamicBitmap::Reset() { bitmap_.clear(); } + +void DynamicBitmap::Resize(int size) { bitmap_.resize(size); } + +// TODO(gongxun): need to do optimization for this function +bool DynamicBitmap::BitmapFindFirst(uint32 offset, bool value, + uint32 *idx) const { + auto it = std::find(bitmap_.begin() + offset, bitmap_.end(), value); + if (it == bitmap_.end()) { + return false; + } + *idx = it - bitmap_.begin(); + return true; +} + +uint32 DynamicBitmap::NumBits() const { return bitmap_.size(); } + +FixedBitmap::FixedBitmap(uint32 size) { + byte_size_ = (size >> 3) + (size & 7 ? 1 : 0); + bitmap_ = new uint8[byte_size_]; + + num_bits_ = size; + memset(bitmap_, 0, byte_size_); +} + +FixedBitmap::~FixedBitmap() { delete[] bitmap_; } + +void FixedBitmap::Set(uint32 index) { + CBDB_CHECK(index >= 0 && index < num_bits_, + cbdb::CException::ExType::kExTypeOutOfRange); + bitmap_[index >> 3] |= 1 << (index & 7); +} + +bool FixedBitmap::Test(uint32 index) const { + CBDB_CHECK(index >= 0 && index < num_bits_, + cbdb::CException::ExType::kExTypeOutOfRange); + return bitmap_[index >> 3] & (1 << (index & 7)); +} + +void FixedBitmap::Reset() { std::memset(bitmap_, 0, byte_size_); } + +void FixedBitmap::Clear(uint32 index) { + CBDB_CHECK(index >= 0 && index < num_bits_, + cbdb::CException::ExType::kExTypeOutOfRange); + bitmap_[index >> 3] &= ~(1 << (index & 7)); +} + +uint32 FixedBitmap::Size() const { return byte_size_; } +uint32 FixedBitmap::NumBits() const { return num_bits_; } +bool FixedBitmap::BitmapFindFirst(uint32 offset, bool value, + uint32 *idx) const { + const uint64 pattern64[2] = {0xffffffffffffffff, 0x0000000000000000}; + const uint8 pattern8[2] = {0xff, 0x00}; + uint32 bit; + + if (offset >= num_bits_) { + return false; + } + + // Jump to the byte at specified offset + const uint8 *p = bitmap_ + (offset >> 3); + uint32 num_bits = num_bits_ - offset; + + // Find a 'value' bit at the end of the first byte + if ((bit = offset & 0x7)) { + for (; bit < 8 && num_bits > 0; ++bit) { + if (Test(((p - bitmap_) << 3) + bit) == value) { + *idx = ((p - bitmap_) << 3) + bit; + return true; + } + + num_bits--; + } + p++; + } + + // check 64bit at the time for a 'value' bit + const uint64 *u64 = (const uint64 *)p; + while (num_bits >= 64 && *u64 == pattern64[value]) { + num_bits -= 64; + u64++; + } + + // check 8bit at the time for a 'value' bit + p = (const uint8 *)u64; + while (num_bits >= 8 && *p == pattern8[value]) { + num_bits -= 8; + p++; + } + + // Find a 'value' bit at the beginning of the last byte + for (bit = 0; num_bits > 0; ++bit) { + if (Test(((p - bitmap_) << 3) + bit) == value) { + *idx = ((p - bitmap_) << 3) + bit; + return true; + } + num_bits--; + } + + return false; +} + +BitmapIterator::BitmapIterator(Bitmap *map) : offset_(0), bitmap_(map) {} + +void BitmapIterator::SeekTo(size_t bit) { + Assert(bit < bitmap_->NumBits()); + offset_ = bit; +} + +int32 BitmapIterator::Next(bool value) { + int32 len = bitmap_->NumBits() - offset_; + if (len <= 0) return -1; + uint32 index; + if (bitmap_->BitmapFindFirst(offset_, value, &index)) { + offset_ = index + 1; + return index; + } + return -1; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/comm/bitmap.h b/contrib/pax_storage/src/cpp/comm/bitmap.h new file mode 100644 index 00000000000..daa5819a3a1 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/bitmap.h @@ -0,0 +1,100 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace pax { + +class Bitmap { + public: + virtual ~Bitmap() {} + virtual void Set(uint32 index) = 0; + virtual bool Test(uint32 index) const = 0; + virtual void Clear(uint32 index) = 0; + virtual void Reset() = 0; + virtual bool BitmapFindFirst(uint32 offset, bool value, + uint32 *idx) const = 0; + virtual uint32 NumBits() const = 0; +}; + +class DynamicBitmap : public Bitmap { + public: + friend class BitmapIterator; + DynamicBitmap(); + explicit DynamicBitmap(uint32 size); + + virtual ~DynamicBitmap(); + + void Set(uint32 index) override; + + bool Test(uint32 index) const override; + + void Clear(uint32 index) override; + + void Reset() override; + + void Resize(int size); + + // TODO(gongxun): need to do optimization for this function + bool BitmapFindFirst(uint32 offset, bool value, uint32 *idx) const override; + + uint32 NumBits() const override; + + private: + std::vector bitmap_; +}; + +class FixedBitmap : public Bitmap { + public: + friend class BitmapIterator; + explicit FixedBitmap(uint32 size); + + virtual ~FixedBitmap(); + + void Set(uint32 index) override; + + bool Test(uint32 index) const override; + + void Reset() override; + + void Clear(uint32 index) override; + + uint32 Size() const; + + uint32 NumBits() const override; + + bool BitmapFindFirst(uint32 offset, bool value, uint32 *idx) const; + + private: + FixedBitmap(const FixedBitmap &other) = delete; + FixedBitmap(FixedBitmap &&other) = delete; + FixedBitmap &operator=(const FixedBitmap &other) = delete; + FixedBitmap &operator=(FixedBitmap &&other) = delete; + + uint32 byte_size_; + uint32 num_bits_; + uint8 *bitmap_; +}; + +class BitmapIterator { + public: + explicit BitmapIterator(Bitmap *map); + + void SeekTo(size_t bit); + + int32 Next(bool value); + + private: + uint32 offset_; + Bitmap *bitmap_; +}; +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/comm/bitmap_test.cc b/contrib/pax_storage/src/cpp/comm/bitmap_test.cc new file mode 100644 index 00000000000..b92b18b63ca --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/bitmap_test.cc @@ -0,0 +1,72 @@ +#include + +#include "comm/bitmap.h" + +namespace pax::tests { +class BitMapTest : public ::testing::Test {}; + +TEST_F(BitMapTest, test) { + FixedBitmap bit_map(100); + ASSERT_EQ(bit_map.Test(0), false); + ASSERT_EQ(bit_map.Test(99), false); + bit_map.Set(0); + ASSERT_EQ(bit_map.Test(0), true); + ASSERT_EQ(bit_map.Test(99), false); + bit_map.Set(99); + ASSERT_EQ(bit_map.Test(0), true); + ASSERT_EQ(bit_map.Test(99), true); + bit_map.Clear(0); + ASSERT_EQ(bit_map.Test(0), false); + ASSERT_EQ(bit_map.Test(99), true); + bit_map.Clear(99); + ASSERT_EQ(bit_map.Test(0), false); + ASSERT_EQ(bit_map.Test(99), false); + + ASSERT_EQ(bit_map.Size(), 13); +} + +TEST_F(BitMapTest, FixedBitmap) { + FixedBitmap bit_map(100); + bit_map.Set(0); + bit_map.Set(50); + bit_map.Set(99); + + BitmapIterator it(&bit_map); + + ASSERT_EQ(it.Next(true), 0); + ASSERT_EQ(it.Next(true), 50); + ASSERT_EQ(it.Next(true), 99); + + it.SeekTo(0); + ASSERT_EQ(it.Next(false), 1); + ASSERT_EQ(it.Next(false), 2); + ASSERT_EQ(it.Next(false), 3); +} + +TEST_F(BitMapTest, DynamicBitmap) { + DynamicBitmap bit_map(100); + bit_map.Set(0); + bit_map.Set(50); + bit_map.Set(99); + + BitmapIterator it(&bit_map); + + ASSERT_EQ(it.Next(true), 0); + ASSERT_EQ(it.Next(true), 50); + ASSERT_EQ(it.Next(true), 99); + + bit_map.Resize(200); + bit_map.Set(100); + bit_map.Set(150); + bit_map.Set(199); + + ASSERT_EQ(it.Next(true), 100); + ASSERT_EQ(it.Next(true), 150); + ASSERT_EQ(it.Next(true), 199); + + it.SeekTo(0); + ASSERT_EQ(it.Next(false), 1); + ASSERT_EQ(it.Next(false), 2); + ASSERT_EQ(it.Next(false), 3); +} +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_api.h b/contrib/pax_storage/src/cpp/comm/cbdb_api.h new file mode 100644 index 00000000000..b97800252ee --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/cbdb_api.h @@ -0,0 +1,73 @@ +#ifndef SRC_CPP_COMM_CBDB_API_H_ +#define SRC_CPP_COMM_CBDB_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif +#include "postgres.h" // NOLINT + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/relscan.h" +#include "access/sdir.h" +#include "access/tableam.h" +#include "access/tsmapi.h" +#include "access/tupdesc.h" +#include "access/tupdesc_details.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/indexing.h" +#include "catalog/oid_dispatch.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_namespace.h" +#ifndef BUILD_PAX_FORMAT +#include "access/reloptions.h" +#include "catalog/pg_pax_tables.h" +#endif +#include "catalog/storage.h" +#include "cdb/cdbvars.h" +#include "commands/cluster.h" +#include "common/file_utils.h" +#include "executor/executor.h" +#include "executor/tuptable.h" +#include "nodes/nodeFuncs.h" +#include "postmaster/syslogger.h" // for PIPE_CHUNK_SIZE +#include "storage/block.h" +#include "storage/bufmgr.h" +#include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/relfilenode.h" +#include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/elog.h" +#include "utils/hsearch.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/relcache.h" +#include "utils/snapshot.h" +#include "utils/syscache.h" +#include "utils/wait_event.h" + +// no header file in cbdb +extern BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks); // NOLINT +extern bool extractcolumns_from_node(Node *expr, bool *cols, AttrNumber natts); // NOLINT +extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); +#ifdef __cplusplus +} +#endif + +#define PAX_TABLE_AM_OID 7014 +#define PAX_AMNAME "pax" +#define PAX_AM_HANDLER_OID 7600 +#define PAX_AM_HANDLER_NAME "pax_tableam_handler" + +#define PAX_AUX_STATS_IN_OID 7601 +#define PAX_AUX_STATS_OUT_OID 7602 +#define PAX_AUX_STATS_TYPE_OID 7603 +#define PAX_AUX_STATS_TYPE_NAME "paxauxstats" + +#endif // SRC_CPP_COMM_CBDB_API_H_ diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc new file mode 100644 index 00000000000..fb7c4beced4 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc @@ -0,0 +1,401 @@ +#include "comm/cbdb_wrappers.h" +#include "comm/paxc_wrappers.h" +#include "storage/paxc_block_map_manager.h" +extern "C" { +const char *progname; +} + +namespace cbdb { + +MemoryContext pax_memory_context = nullptr; + +CAutoExceptionStack::CAutoExceptionStack(void **global_exception_stack, + void **global_error_context_stack) + : m_global_exception_stack_(global_exception_stack), + m_global_error_context_stack_(global_error_context_stack), + m_exception_stack_(*global_exception_stack), + m_error_context_stack_(*global_error_context_stack) {} + +CAutoExceptionStack::~CAutoExceptionStack() { + *m_global_exception_stack_ = m_exception_stack_; + *m_global_error_context_stack_ = m_error_context_stack_; +} + +// set the exception stack to the given address +void CAutoExceptionStack::SetLocalJmp(void *local_jump) { + *m_global_exception_stack_ = local_jump; +} + +void *MemCtxAlloc(MemoryContext ctx, size_t size) { + CBDB_WRAP_START; + { + { return MemoryContextAlloc(ctx, (Size)size); } + } + CBDB_WRAP_END; + return nullptr; +} + +void *Palloc(size_t size) { + CBDB_WRAP_START; + { +#ifdef RUN_GTEST + if (TopMemoryContext == nullptr) { + MemoryContextInit(); + } +#endif + { return palloc(size); } + } + CBDB_WRAP_END; + return nullptr; +} + +void *Palloc0(size_t size) { + CBDB_WRAP_START; + { +#ifdef RUN_GTEST + if (TopMemoryContext == nullptr) { + MemoryContextInit(); + } +#endif + { return palloc0(size); } + } + CBDB_WRAP_END; + return nullptr; +} + +void *RePalloc(void *ptr, size_t size) { + CBDB_WRAP_START; + { return repalloc(ptr, size); } + CBDB_WRAP_END; + return nullptr; +} + +void Pfree(void *ptr) { +#ifdef RUN_GTEST + if (ptr == nullptr) { + return; + } +#endif + CBDB_WRAP_START; + { pfree(ptr); } + CBDB_WRAP_END; +} + +} // namespace cbdb + +void *operator new(std::size_t size) { return cbdb::Palloc(size); } + +void *operator new[](std::size_t size) { return cbdb::Palloc(size); } + +void *operator new(std::size_t size, MemoryContext ctx) { + return cbdb::MemCtxAlloc(ctx, size); +} + +void *operator new[](std::size_t size, MemoryContext ctx) { + return cbdb::MemCtxAlloc(ctx, size); +} + +void operator delete(void *ptr) { if (ptr) cbdb::Pfree(ptr); } + +void operator delete[](void *ptr) { if (ptr) cbdb::Pfree(ptr); } + +HTAB *cbdb::HashCreate(const char *tabname, int64 nelem, const HASHCTL *info, + int flags) { + CBDB_WRAP_START; + { return hash_create(tabname, nelem, info, flags); } + CBDB_WRAP_END; + return nullptr; +} + +void *cbdb::HashSearch(HTAB *hashp, const void *key_ptr, HASHACTION action, + bool *found_ptr) { + CBDB_WRAP_START; + { return hash_search(hashp, key_ptr, action, found_ptr); } + CBDB_WRAP_END; + return nullptr; +} + +MemoryContext cbdb::AllocSetCtxCreate(MemoryContext parent, const char *name, + Size min_context_size, + Size init_block_size, + Size max_block_size) { + CBDB_WRAP_START; + { + return AllocSetContextCreateInternal(parent, name, min_context_size, + init_block_size, max_block_size); + } + CBDB_WRAP_END; + return nullptr; +} + +void cbdb::MemoryCtxDelete(MemoryContext memory_context) { + CBDB_WRAP_START; + { MemoryContextDelete(memory_context); } + CBDB_WRAP_END; +} + +void cbdb::MemoryCtxRegisterResetCallback(MemoryContext context, + MemoryContextCallback *cb) { + CBDB_WRAP_START; + { MemoryContextRegisterResetCallback(context, cb); } + CBDB_WRAP_END; +} + +Oid cbdb::RelationGetRelationId(Relation rel) { + CBDB_WRAP_START; + { return RelationGetRelid(rel); } + CBDB_WRAP_END; +} + +#ifdef RUN_GTEST +Datum cbdb::DatumFromCString(const char *src, size_t length) { + CBDB_WRAP_START; + { + text *result = reinterpret_cast(palloc(length + VARHDRSZ)); + SET_VARSIZE(result, length + VARHDRSZ); + memcpy(VARDATA(result), src, length); + return PointerGetDatum(result); + } + CBDB_WRAP_END; + return 0; +} + +Datum cbdb::DatumFromPointer(const void *p, int16 typlen) { + CBDB_WRAP_START; + { + char *resultptr; + resultptr = reinterpret_cast(palloc(typlen)); + memcpy(resultptr, p, typlen); + return PointerGetDatum(resultptr); + } + CBDB_WRAP_END; + return 0; +} +#endif + +struct varlena *cbdb::PgDeToastDatumPacked(struct varlena *datum) { + CBDB_WRAP_START; + { return pg_detoast_datum_packed(datum); } + CBDB_WRAP_END; + return nullptr; +} + +void *cbdb::PointerAndLenFromDatum(Datum d, int *len) { + struct varlena *vl = nullptr; + CBDB_WRAP_START; + { + vl = (struct varlena *)DatumGetPointer(d); + *len = VARSIZE_ANY(vl); + return static_cast(vl); + } + CBDB_WRAP_END; +} + +// pax ctid mapping functions + +void cbdb::InitCommandResource() { + CBDB_WRAP_START; + { paxc::init_command_resource(); } + CBDB_WRAP_END; +} +void cbdb::ReleaseCommandResource() { + CBDB_WRAP_START; + { paxc::release_command_resource(); } + CBDB_WRAP_END; +} + +void cbdb::GetTableIndexAndTableNumber(Oid table_rel_oid, uint8 *table_no, + uint32 *table_index) { + CBDB_WRAP_START; + { + paxc::get_table_index_and_table_number(table_rel_oid, table_no, + table_index); + } + CBDB_WRAP_END; +} + +uint32 cbdb::GetBlockNumber(Oid table_rel_oid, uint32 table_index, + paxc::PaxBlockId block_id) { + CBDB_WRAP_START; + { return paxc::get_block_number(table_rel_oid, table_index, block_id); } + CBDB_WRAP_END; +} +paxc::PaxBlockId cbdb::GetBlockId(Oid table_rel_oid, uint8 table_no, + uint32 block_number) { + CBDB_WRAP_START; + { return paxc::get_block_id(table_rel_oid, table_no, block_number); } + CBDB_WRAP_END; +} + +void cbdb::RelationCreateStorageDirectory(RelFileNode rnode, + char relpersistence, + SMgrImpl smgr_which, Relation rel) { + CBDB_WRAP_START; + { + SMgrRelation srel = + RelationCreateStorage(rnode, relpersistence, smgr_which, rel); + smgrclose(srel); + } + CBDB_WRAP_END; +} + +void cbdb::RelationDropStorageDirectory(Relation rel) { + CBDB_WRAP_START; + { RelationDropStorage(rel); } + CBDB_WRAP_END; +} + +int cbdb::PathNameCreateDir(const char *path) { + CBDB_WRAP_START; + { return MakePGDirectory(path); } + CBDB_WRAP_END; +} + +HeapTuple cbdb::SearchSysCache(Relation rel, SysCacheIdentifier id) { + CBDB_WRAP_START; + { return SearchSysCache1(id, RelationGetRelid(rel)); } + CBDB_WRAP_END; +} + +bool cbdb::TupleIsValid(HeapTuple tupcache) { + CBDB_WRAP_START; + { return HeapTupleIsValid(tupcache); } + CBDB_WRAP_END; +} + +void cbdb::ReleaseTupleCache(HeapTuple tupcache) { + CBDB_WRAP_START; + { ReleaseSysCache(tupcache); } + CBDB_WRAP_END; +} + +void cbdb::PathNameDeleteDir(const char *path, bool delete_topleveldir) { + CBDB_WRAP_START; + { paxc::DeletePaxDirectoryPath(path, delete_topleveldir); } + CBDB_WRAP_END; +} + +void cbdb::CopyFile(const char *srcsegpath, const char *dstsegpath) { + CBDB_WRAP_START; + { paxc::CopyFile(srcsegpath, dstsegpath); } + CBDB_WRAP_END; +} + +void cbdb::MakedirRecursive(const char *path) { + CBDB_WRAP_START; + { paxc::MakedirRecursive(path); } + CBDB_WRAP_END; +} + +std::string cbdb::BuildPaxDirectoryPath(RelFileNode rd_node, + BackendId rd_backend) { + CBDB_WRAP_START; + { + char *tmp_str = paxc::BuildPaxDirectoryPath(rd_node, rd_backend); + std::string ret_str(tmp_str); + pfree(tmp_str); + return ret_str; + } + CBDB_WRAP_END; +} + +std::string cbdb::BuildPaxFilePath(const Relation rel, + const std::string &block_id) { + CBDB_WRAP_START; + { + char *tmp_str = paxc::BuildPaxFilePath(rel, block_id.c_str()); + std::string ret_str(tmp_str); + pfree(tmp_str); + return ret_str; + } + CBDB_WRAP_END; +} + +int cbdb::RelationGetAttributesNumber(Relation rel) { + CBDB_WRAP_START; + { return RelationGetNumberOfAttributes(rel); } + CBDB_WRAP_END; +} + +TupleDesc cbdb::RelationGetTupleDesc(Relation rel) { + CBDB_WRAP_START; + { return RelationGetDescr(rel); } + CBDB_WRAP_END; +} + +bool cbdb::ExtractcolumnsFromNode(Node *expr, bool *cols, AttrNumber natts) { + CBDB_WRAP_START; + { return extractcolumns_from_node(expr, cols, natts); } + CBDB_WRAP_END; +} + +bool cbdb::MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum) +{ + CBDB_WRAP_START; + { return paxc::MinMaxGetStrategyProcinfo(atttypid, procid, finfo, strategynum); } + CBDB_WRAP_END; +} + +Datum cbdb::FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1) +{ + CBDB_WRAP_START; + { return ::FunctionCall1Coll(flinfo, collation, arg1); } + CBDB_WRAP_END; +} + +Datum cbdb::FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2) +{ + CBDB_WRAP_START; + { return ::FunctionCall2Coll(flinfo, collation, arg1, arg2); } + CBDB_WRAP_END; +} + +Datum cbdb::FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3) +{ + CBDB_WRAP_START; + { return ::FunctionCall3Coll(flinfo, collation, arg1, arg2, arg3); } + CBDB_WRAP_END; +} + +Datum cbdb::FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4) +{ + CBDB_WRAP_START; + { return ::FunctionCall4Coll(flinfo, collation, arg1, arg2, arg3, arg4); } + CBDB_WRAP_END; +} + +SysScanDesc cbdb::SystableBeginScan(Relation rel, Oid index_id, bool index_ok, Snapshot snapshot, int n_keys, ScanKey keys) { + CBDB_WRAP_START; + { return systable_beginscan(rel, index_id, index_ok, snapshot, n_keys, keys); } + CBDB_WRAP_END; +} + +HeapTuple cbdb::SystableGetNext(SysScanDesc desc) { + CBDB_WRAP_START; + { return systable_getnext(desc); } + CBDB_WRAP_END; +} + +void cbdb::SystableEndScan(SysScanDesc desc) { + CBDB_WRAP_START; + { return systable_endscan(desc); } + CBDB_WRAP_END; +} + +Datum cbdb::HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc, bool *isnull) { + CBDB_WRAP_START; + { return heap_getattr(tup, attnum, tuple_desc, isnull); } + CBDB_WRAP_END; +} + +Relation cbdb::TableOpen(Oid relid, LOCKMODE lockmode) { + CBDB_WRAP_START; + { return table_open(relid, lockmode); } + CBDB_WRAP_END; +} + +void cbdb::TableClose(Relation rel, LOCKMODE lockmode) { + CBDB_WRAP_START; + { return table_close(rel, lockmode); } + CBDB_WRAP_END; +} diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h new file mode 100644 index 00000000000..9f80b5e614c --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h @@ -0,0 +1,200 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include +#include + +#include "exceptions/CException.h" +#include "storage/pax_block_id.h" + +namespace cbdb { + +#define PAX_ALLOCSET_DEFAULT_MINSIZE ALLOCSET_DEFAULT_MINSIZE +#define PAX_ALLOCSET_DEFAULT_INITSIZE (8 * 1024) +#define PAX_ALLOCSET_DEFAULT_MAXSIZE (3 * 64 * 1024 * 1024) +#define PAX_ALLOCSET_DEFAULT_SIZES \ + PAX_ALLOCSET_DEFAULT_MINSIZE, PAX_ALLOCSET_DEFAULT_INITSIZE, \ + PAX_ALLOCSET_DEFAULT_MAXSIZE +extern MemoryContext pax_memory_context; + +//--------------------------------------------------------------------------- +// @class: +// CAutoExceptionStack +// +// @doc: +// Auto object for saving and restoring exception stack +// +//--------------------------------------------------------------------------- +class CAutoExceptionStack final { + public: + CAutoExceptionStack(const CAutoExceptionStack &) = delete; + CAutoExceptionStack(CAutoExceptionStack &&) = delete; + void *operator new(std::size_t count, ...) = delete; + void *operator new[](std::size_t count, ...) = delete; + + // ctor + CAutoExceptionStack(void **global_exception_stack, + void **global_error_context_stack); + + // dtor + ~CAutoExceptionStack(); + + // set the exception stack to the given address + void SetLocalJmp(void *local_jump); + + private: + // address of the global exception stack value + void **m_global_exception_stack_; + + // address of the global error context stack value + void **m_global_error_context_stack_; + + // value of exception stack when object is created + void *m_exception_stack_; + + // value of error context stack when object is created + void *m_error_context_stack_; +}; // class CAutoExceptionStack + +void *MemCtxAlloc(MemoryContext ctx, size_t size); +void *Palloc(size_t size); +void *Palloc0(size_t size); +void *RePalloc(void *ptr, size_t size); +void Pfree(void *ptr); + +HTAB *HashCreate(const char *tabname, int64 nelem, const HASHCTL *info, + int flags); +void *HashSearch(HTAB *hashp, const void *key_ptr, HASHACTION action, + bool *found_ptr); +MemoryContext AllocSetCtxCreate(MemoryContext parent, const char *name, + Size min_context_size, Size init_block_size, + Size max_block_size); +void MemoryCtxDelete(MemoryContext memory_context); +void MemoryCtxRegisterResetCallback(MemoryContext context, + MemoryContextCallback *cb); + +Oid RelationGetRelationId(Relation rel); + +static inline void *DatumToPointer(Datum d) noexcept { + return DatumGetPointer(d); +} + +static inline int8 DatumToInt8(Datum d) noexcept { return DatumGetInt8(d); } + +static inline int16 DatumToInt16(Datum d) noexcept { return DatumGetInt16(d); } + +static inline int32 DatumToInt32(Datum d) noexcept { return DatumGetInt32(d); } + +static inline int64 DatumToInt64(Datum d) noexcept { return DatumGetInt64(d); } + +static inline Datum Int8ToDatum(int8 d) noexcept { return Int8GetDatum(d); } + +static inline Datum Int16ToDatum(int16 d) noexcept { return Int16GetDatum(d); } + +static inline Datum Int32ToDatum(int32 d) noexcept { return Int32GetDatum(d); } + +static inline Datum Int64ToDatum(int64 d) noexcept { return Int64GetDatum(d); } + +void *PointerAndLenFromDatum(Datum d, int *len); + +#ifdef RUN_GTEST +Datum DatumFromCString(const char *src, size_t length); + +Datum DatumFromPointer(const void *p, int16 typlen); +#endif + +struct varlena *PgDeToastDatumPacked(struct varlena *datum); + +// pax ctid mapping functions +void InitCommandResource(); +void ReleaseCommandResource(); +void GetTableIndexAndTableNumber(Oid table_rel_oid, uint8 *table_no, + uint32 *table_index); +uint32 GetBlockNumber(Oid table_rel_oid, uint32 table_index, + paxc::PaxBlockId block_id); +paxc::PaxBlockId GetBlockId(Oid table_rel_oid, uint8 table_no, + uint32 block_number); + +void RelationCreateStorageDirectory(RelFileNode rnode, char relpersistence, + SMgrImpl smgr_which, Relation rel); + +bool TupleIsValid(HeapTuple tupcache); + +void ReleaseTupleCache(HeapTuple tupcache); + +void RelationDropStorageDirectory(Relation rel); + +int PathNameCreateDir(const char *path); + +HeapTuple SearchSysCache(Relation rel, SysCacheIdentifier id); + +void PathNameDeleteDir(const char *path, bool delete_topleveldir); + +void CopyFile(const char *srcsegpath, const char *dstsegpath); + +void MakedirRecursive(const char *path); + +std::string BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend); + +int RelationGetAttributesNumber(Relation rel); + +TupleDesc RelationGetTupleDesc(Relation rel); + +bool ExtractcolumnsFromNode(Node *expr, bool *cols, AttrNumber natts); + +std::string BuildPaxFilePath(Relation rel, const std::string &block_id); + +bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum); + +Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1); + +Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2); + +Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3); + +Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4); + +SysScanDesc SystableBeginScan(Relation rel, Oid index_id, bool index_ok, Snapshot snapshot, int n_keys, ScanKey keys); + +HeapTuple SystableGetNext(SysScanDesc desc); + +void SystableEndScan(SysScanDesc desc); + +Datum HeapGetAttr(HeapTuple tup, int attnum, TupleDesc tuple_desc, bool *isnull); + +Relation TableOpen(Oid relid, LOCKMODE lockmode); + +void TableClose(Relation rel, LOCKMODE lockmode); + +} // namespace cbdb + +// clang-format off +#define CBDB_WRAP_START \ + sigjmp_buf local_sigjmp_buf; \ + { \ + cbdb::CAutoExceptionStack aes( \ + reinterpret_cast(&PG_exception_stack), \ + reinterpret_cast(&error_context_stack)); \ + if (0 == sigsetjmp(local_sigjmp_buf, 0)) \ + { \ + aes.SetLocalJmp(&local_sigjmp_buf) + +#define CBDB_WRAP_END \ + } \ + else \ + { \ + CBDB_RAISE(cbdb::CException::ExType::kExTypeCError); \ + } \ + } +// clang-format on + +// override the default new/delete to use current memory context +extern void *operator new(std::size_t size); +extern void *operator new[](std::size_t size); +extern void operator delete(void *ptr); +extern void operator delete[](void *ptr); + +// specify memory context for this allocation without switching memory context +extern void *operator new(std::size_t size, MemoryContext ctx); +extern void *operator new[](std::size_t size, MemoryContext ctx); diff --git a/contrib/pax_storage/src/cpp/comm/comm_test.cc b/contrib/pax_storage/src/cpp/comm/comm_test.cc new file mode 100644 index 00000000000..da867ecae32 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/comm_test.cc @@ -0,0 +1,35 @@ + +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" + +namespace pax::tests { +class CommTest : public ::testing::Test { + public: + void SetUp() override { + MemoryContext comm_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "CommTestMemoryContext", 1 * 1024 * 1024, + 1 * 1024 * 1024, 1 * 1024 * 1024); + MemoryContextSwitchTo(comm_test_memory_context); + } + + void TearDown() override {} +}; + +TEST_F(CommTest, TestDeleteOperator) { + // In standard C++, we allow the delete null operation + // In Pax, we overloaded the delete operator which used + // `pfree` to free the memory. `pfree` not allow pass NULL. + // But we still need to conform the overloaded delete operation + // to the semantics of c++ which achieve a complete semantic replacement. + auto obj = new int32(); + delete obj; + obj = nullptr; + delete obj; + + auto array_obj = new int32[10]; + delete[] array_obj; + array_obj = nullptr; + delete[] array_obj; +} + +} // namespace pax::tests \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/comm/gtest_wrappers.h b/contrib/pax_storage/src/cpp/comm/gtest_wrappers.h new file mode 100644 index 00000000000..90a057044da --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/gtest_wrappers.h @@ -0,0 +1,12 @@ +#pragma once + +#undef Max +#undef gettext +#undef dgettext +#undef ngettext +#undef dngettext +#undef Assert +#include +#include +#undef Assert +#include "comm/cbdb_api.h" diff --git a/contrib/pax_storage/src/cpp/comm/iterator.h b/contrib/pax_storage/src/cpp/comm/iterator.h new file mode 100644 index 00000000000..42cc2db1a1e --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/iterator.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include + +#include "comm/cbdb_api.h" + +namespace pax { +template +class IteratorBase { + public: + virtual bool HasNext() = 0; + virtual T Next() = 0; + virtual void Rewind() = 0; + virtual ~IteratorBase() = default; +}; // class IteratorBase + +// FilterIterator: wrap an iterator that may have a filter whether to pass +// the value from internal iterator. If the qual function returns true, +// the current item will return to the caller, otherwise the current item +// is ignored. +template +class FilterIterator : public IteratorBase { + public: + FilterIterator(std::unique_ptr> &&it, std::function &&qual) + : it_(std::move(it)), qual_(std::move(qual)) { + Assert(it_); + Assert(qual_); + } + + bool HasNext() override { + if (valid_value_) return true; + while (it_->HasNext()) { + value_ = std::move(it_->Next()); + if (qual_(value_)) { + valid_value_ = true; + break; + } + } + return valid_value_; + } + + T Next() override { + Assert(valid_value_); + valid_value_ = false; + return std::move(value_); + } + + void Rewind() override { + it_->Rewind(); + valid_value_ = false; + } + + virtual ~FilterIterator() = default; + + protected: + std::unique_ptr> it_; + std::function qual_; + T value_; + bool valid_value_ = false; +}; + +template +class VectorIterator : public IteratorBase { + public: + explicit VectorIterator(std::vector &&v): v_(std::move(v)){} + virtual ~VectorIterator() = default; + + bool HasNext() override { return index_ < v_.size(); } + T Next() override { + Assert(HasNext()); + return v_[index_++]; + } + void Rewind() override { index_ = 0; } + + protected: + std::vector v_; + size_t index_ = 0; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/comm/pax_defer.h b/contrib/pax_storage/src/cpp/comm/pax_defer.h new file mode 100644 index 00000000000..ad39ba76bbd --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/pax_defer.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace pax { + +template +class Defer { + public: + const F function; + + public: + constexpr explicit Defer(const F &function) : function{function} {} + constexpr explicit Defer(F &&function) : function{std::move(function)} {} + ~Defer() { function(); } +}; + +template +inline Defer make_defer(F &&function) { + return Defer(std::forward(function)); +} + +} // namespace pax + +#define DEFER_CONCAT(n, ...) \ + const auto defer##n = pax::make_defer([&] { __VA_ARGS__; }) +#define DEFER_FORWARD(n, ...) DEFER_CONCAT(n, __VA_ARGS__) +#define DEFER(...) DEFER_FORWARD(__LINE__, __VA_ARGS__) diff --git a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc new file mode 100644 index 00000000000..c968ae70940 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.cc @@ -0,0 +1,265 @@ +#include "comm/paxc_wrappers.h" +#include "comm/cbdb_api.h" + +#include +#include + +#define PAX_MICROPARTITION_NAME_LENGTH 2048 +#define PAX_MICROPARTITION_DIR_POSTFIX "_pax" + +namespace paxc { +// pax file operation +static void DeletePaxDirectoryPathRecursive( + const char *path, const char *toplevel_path, bool delete_topleveldir, + void (*action)(const char *fname, bool isdir, int elevel), + bool process_symlinks, int elevel); + +static void UnlinkIfExistsFname(const char *fname, bool isdir, int elevel); + +// MakedirRecursive: function used to create directory recursively by a +// specified directory path. parameter path IN directory path. return void. +void MakedirRecursive(const char *path) { + char dirpath[PAX_MICROPARTITION_NAME_LENGTH]; + unsigned int pathlen = strlen(path); + struct stat st {}; + + Assert(path != NULL && path[0] != '\0' && + pathlen < PAX_MICROPARTITION_NAME_LENGTH); + + for (unsigned int i = 0; i <= pathlen; i++) { + if (path[i] == '/' || path[i] == '\0') { + strncpy(dirpath, path, i + 1); + dirpath[i + 1] = '\0'; + if (stat(dirpath, &st) != 0) { + if (MakePGDirectory(dirpath) != 0) + ereport( + ERROR, + (errcode_for_file_access(), + errmsg("MakedirRecursive could not create directory \"%s\": %m", + dirpath))); + } + } + } +} + +// CopyFile: function used to copy all files from specified directory path to +// another specified directory. parameter srcsegpath IN source directory path. +// parameter dstsegpath IN destination directory path. +// parameter dst IN destination relfilenode information. +// return void. +void CopyFile(const char *srcsegpath, const char *dstsegpath) { + char *buffer = NULL; + int64 left; + off_t offset; + int dstflags; + // Note: here File type is defined in PG instead of pax::File class. + ::File srcfile; + ::File dstfile; + + Assert(srcsegpath != NULL && srcsegpath[0] != '\0'); + Assert(dstsegpath != NULL && dstsegpath[0] != '\0'); + + // TODO(Tony): needs to adjust BLCKSZ for pax storage. + buffer = reinterpret_cast(palloc0(BLCKSZ)); + + // FIXME(Tony): need to verify if there exits fd leakage problem here. + srcfile = PathNameOpenFile(srcsegpath, O_RDONLY | PG_BINARY); + if (srcfile < 0) + ereport(ERROR, (errcode_for_file_access(), + errmsg("CopyFile could not open file %s: %m", srcsegpath))); + + // TODO(Tony): need to understand if O_DIRECT flag could be optimzed for data + // copying in PAX. + dstflags = O_CREAT | O_WRONLY | O_EXCL | PG_BINARY; + + dstfile = PathNameOpenFile(dstsegpath, dstflags); + if (dstfile < 0) + ereport(ERROR, (errcode_for_file_access(), + errmsg("CopyFile could not create destination file %s: %m", + dstsegpath))); + + // TODO(Tony): here needs to implement exception handling for pg function call + // such as FileDiskSize failure. + left = FileDiskSize(srcfile); + if (left < 0) + ereport(ERROR, (errcode_for_file_access(), + errmsg("CopyFile could not seek to end of file %s: %m", + srcsegpath))); + + offset = 0; + while (left > 0) { + int len; + CHECK_FOR_INTERRUPTS(); + len = Min(left, BLCKSZ); + if (FileRead(srcfile, buffer, len, offset, WAIT_EVENT_DATA_FILE_READ) != + len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("CopyFile could not read %d bytes from file \"%s\": %m", + len, srcsegpath))); + + if (FileWrite(dstfile, buffer, len, offset, WAIT_EVENT_DATA_FILE_WRITE) != + len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("CopyFile could not write %d bytes to file \"%s\": %m", + len, dstsegpath))); + + offset += len; + left -= len; + } + + if (FileSync(dstfile, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("CopyFile could not fsync file \"%s\": %m", dstsegpath))); + FileClose(srcfile); + FileClose(dstfile); + pfree(buffer); +} + +// DeletePaxDirectoryPath: Delete a directory and everything in it, if it +// exists. parameter dirname IN directory to delete recursively. parameter +// reserve_topdir IN flag indicate if reserve top level directory. +void DeletePaxDirectoryPath(const char *dirname, bool delete_topleveldir) { + struct stat statbuf {}; + + if (stat(dirname, &statbuf) != 0) { + // Silently ignore missing directory. + if (errno == ENOENT) + return; + else + ereport( + ERROR, + (errcode_for_file_access(), + errmsg("Check PAX file directory failed, directory path: \"%s\": %m", + dirname))); + } + + DeletePaxDirectoryPathRecursive(dirname, dirname, delete_topleveldir, + UnlinkIfExistsFname, false, LOG); +} + +// BuildPaxDirectoryPath: function used to build pax storage directory path +// following pg convension, for example base/{database_oid}/{blocks_relid}_pax. +// parameter rd_node IN relfilenode information. +// parameter rd_backend IN backend transaction id. +// return palloc'd pax storage directory path. +char *BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend) { + char *relpath = NULL; + char *paxrelpath = NULL; + relpath = relpathbackend(rd_node, rd_backend, MAIN_FORKNUM); + Assert(relpath[0] != '\0'); + paxrelpath = psprintf("%s%s", relpath, PAX_MICROPARTITION_DIR_POSTFIX); + pfree(relpath); + return paxrelpath; +} + +// BuildPaxFilePath: function used to build pax storage directory path following +// pg convension, for example base/{database_oid}/{blocks_relid}_pax. parameter +// rel IN Relation information. parameter block_id IN micro-partition block id. +// return palloc'd pax storage directory path. +char *BuildPaxFilePath(Relation rel, const char *block_id) { + char *relpath = NULL; + char *filepath = NULL; + + relpath = BuildPaxDirectoryPath(rel->rd_node, rel->rd_backend); + Assert(relpath[0] != '\0'); + filepath = psprintf("%s/%s", relpath, block_id); + pfree(relpath); + return filepath; +} + +static void UnlinkIfExistsFname(const char *fname, bool isdir, int elevel) { + if (isdir) { + if (rmdir(fname) != 0 && errno != ENOENT) + ereport(elevel, (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", fname))); + } else { + // Use PathNameDeleteTemporaryFile to report filesize + PathNameDeleteTemporaryFile(fname, false); + } +} + +static void DeletePaxDirectoryPathRecursive( + const char *path, const char *toplevel_path, bool delete_topleveldir, + void (*action)(const char *fname, bool isdir, int elevel), + bool process_symlinks, int elevel) { + DIR *dir; + struct dirent *de; + dir = AllocateDir(path); + + while ((de = ReadDirExtended(dir, path, elevel)) != NULL) { + char subpath[MAXPGPATH * 2]; + CHECK_FOR_INTERRUPTS(); + + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + + switch (get_dirent_type(subpath, de, process_symlinks, elevel)) { + case PGFILETYPE_REG: + (*action)(subpath, false, elevel); + break; + case PGFILETYPE_DIR: + DeletePaxDirectoryPathRecursive( + subpath, toplevel_path, delete_topleveldir, action, false, elevel); + break; + default: + break; + } + } + + // ignore any error here for delete + FreeDir(dir); + + // skip deleting top level dir if delete_topleveldir is set to false. + if (delete_topleveldir || strncmp(path, toplevel_path, strlen(path)) != 0) { + // it's important to fsync the destination directory itself as individual + // file fsyncs don't guarantee that the directory entry for the file is + // synced. However, skip this if AllocateDir failed; the action function + // might not be robust against that. + + if (dir) (*action)(path, true, elevel); + } +} + +bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum) +{ + FmgrInfo dummy; + HeapTuple tuple; + Oid opclass; + Oid opfamily; + Oid oprid; + RegProcedure opcode; + bool isNull; + + opclass = GetDefaultOpClass(atttypid, BRIN_AM_OID); + if (!OidIsValid(opclass)) + return false; + + opfamily = get_opclass_family(opclass); + tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(atttypid), + ObjectIdGetDatum(atttypid), + Int16GetDatum(strategynum)); + + if (!HeapTupleIsValid(tuple)) + return false; // not found operator + + oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple, + Anum_pg_amop_amopopr, &isNull)); + ReleaseSysCache(tuple); + Assert(!isNull && RegProcedureIsValid(oprid)); + + opcode = get_opcode(oprid); + if (!RegProcedureIsValid(opcode)) + return false; + + fmgr_info_cxt(opcode, finfo ? finfo : &dummy, CurrentMemoryContext); + *procid = opcode; + + return true; +} + +} // namespace paxc diff --git a/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h new file mode 100644 index 00000000000..fc0315a0b22 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/paxc_wrappers.h @@ -0,0 +1,13 @@ +#pragma once + +#include "comm/cbdb_api.h" + +namespace paxc { +// pax file operation, will refactor it later +void CopyFile(const char *srcsegpath, const char *dstsegpath); +void DeletePaxDirectoryPath(const char *dirname, bool delete_topleveldir); +void MakedirRecursive(const char *path); +char *BuildPaxDirectoryPath(RelFileNode rd_node, BackendId rd_backend); +char *BuildPaxFilePath(Relation rel, const char *block_id); +bool MinMaxGetStrategyProcinfo(Oid atttypid, Oid *procid, FmgrInfo *finfo, StrategyNumber strategynum); +} // namespace paxc diff --git a/contrib/pax_storage/src/cpp/comm/singleton.h b/contrib/pax_storage/src/cpp/comm/singleton.h new file mode 100644 index 00000000000..f53922f7b37 --- /dev/null +++ b/contrib/pax_storage/src/cpp/comm/singleton.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +namespace pax { + +template +class Singleton final { + public: + Singleton(const Singleton &) = delete; + Singleton &operator=(const Singleton &) = delete; + template + static T *GetInstance(ArgTypes &&...args) { + std::call_once( + of, + [](ArgTypes &&...args) { + instance.reset(new T(std::forward(args)...)); + }, + std::forward(args)...); + + return instance.get(); + } + + static inline void Destroy() { + if (instance) { + instance.reset(); + } + } + + private: + Singleton() = default; + ~Singleton() = default; + static std::once_flag of; + static std::unique_ptr instance; +}; + +template +std::once_flag Singleton::of; + +template +std::unique_ptr Singleton::instance = nullptr; +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/contrib/CPPLINT.cfg b/contrib/pax_storage/src/cpp/contrib/CPPLINT.cfg new file mode 100644 index 00000000000..d6bb3792cb5 --- /dev/null +++ b/contrib/pax_storage/src/cpp/contrib/CPPLINT.cfg @@ -0,0 +1,5 @@ +# Don't search for additional CPPLINT.cfg in parent directories. +set noparent + +# excludes files or dir +exclude_files=. diff --git a/contrib/pax_storage/src/cpp/contrib/googletest b/contrib/pax_storage/src/cpp/contrib/googletest new file mode 160000 index 00000000000..66366cea569 --- /dev/null +++ b/contrib/pax_storage/src/cpp/contrib/googletest @@ -0,0 +1 @@ +Subproject commit 66366cea569337c53c7ef1c72f84b566746e465e diff --git a/contrib/pax_storage/src/cpp/contrib/zstd b/contrib/pax_storage/src/cpp/contrib/zstd new file mode 160000 index 00000000000..1e6651126b5 --- /dev/null +++ b/contrib/pax_storage/src/cpp/contrib/zstd @@ -0,0 +1 @@ +Subproject commit 1e6651126b5a0daf860c94d81cef019fb12283d7 diff --git a/contrib/pax_storage/src/cpp/exceptions/CException.cc b/contrib/pax_storage/src/cpp/exceptions/CException.cc new file mode 100644 index 00000000000..caef46081f0 --- /dev/null +++ b/contrib/pax_storage/src/cpp/exceptions/CException.cc @@ -0,0 +1,236 @@ +#include "CException.h" + +#include "comm/cbdb_wrappers.h" + +#ifdef __GNUC__ +#include +#include +#endif + +#include +#include +namespace cbdb { + +#ifdef __GNUC__ + +static const char *empty_msg = " \n"; +static const char *func_name_msg = " %s: %s + %s\n"; +static const char *no_func_name_msg = " %s: + %s\n"; +static const char *begin_name_msg = " %s: %s() + %s\n"; +static const char *symbol_name = " %s\n"; + +static inline int Append(char *dst, size_t count, const char *format, ...) { + va_list ap; + int n; + va_start(ap, format); + n = vsnprintf(dst, count, format, ap); + va_end(ap); + // ignore n < 0 error + // When there are other problems in logic + // it can only be judged by the generated log + if (n < 0) { + n = 0; + } + return n; +} + +static inline void StackTrace(char *stack_buffer, + uint8 max_depth = DEFAULT_STACK_MAX_DEPTH) { + void *addr_list[max_depth + 1]; + int addr_len; + char **symbol_list; + size_t origin_func_name_size = 256; + size_t func_name_size = origin_func_name_size; + char func_name[func_name_size]; + int index = 0; + + addr_len = backtrace(addr_list, sizeof(addr_list) / sizeof(void *)); + if (addr_len == 0) { + Append(stack_buffer, DEFAULT_STACK_MAX_SIZE, empty_msg); + return; + } + + symbol_list = backtrace_symbols(addr_list, addr_len); + if (!symbol_list) { + return; + } + + char *begin_name, *begin_offset, *end_offset; + int single_buffer_size = 0; + for (int i = 1; i < addr_len; i++) { + begin_name = nullptr; + begin_offset = nullptr; + end_offset = nullptr; + + for (char *p = symbol_list[i]; *p; ++p) { + if (*p == '(') { + begin_name = p; + } else if (*p == '+') { + begin_offset = p; + } else if (*p == ')' && begin_offset) { + end_offset = p; + break; + } + } + + if (begin_name && begin_offset && end_offset && begin_name < begin_offset) { + *begin_name++ = '\0'; + *begin_offset++ = '\0'; + *end_offset = '\0'; + + int status; + char *ret = + abi::__cxa_demangle(begin_name, func_name, &func_name_size, &status); + + if (origin_func_name_size < func_name_size) { + // realloc happen + // should not allow realloc in `abi::__cxa_demangle` + // just return and make sure origin_func_name_size big enough + free(func_name); // NOLINT + goto finish; + } + if (status == 0) { + if (!ret) { + continue; + } + memcpy(func_name, ret, func_name_size); + single_buffer_size = strlen(func_name_msg) + strlen(symbol_list[i]) + + func_name_size + strlen(begin_offset); + if (index > DEFAULT_STACK_MAX_SIZE || + DEFAULT_STACK_MAX_SIZE - index < single_buffer_size) { + goto finish; + } + index += Append(stack_buffer + index, DEFAULT_STACK_MAX_SIZE - index, + func_name_msg, symbol_list[i], func_name, begin_offset); + } else { + if (begin_name[0] == '\0') { + single_buffer_size = strlen(no_func_name_msg) + + strlen(symbol_list[i]) + strlen(begin_offset); + if (index > DEFAULT_STACK_MAX_SIZE || + DEFAULT_STACK_MAX_SIZE - index < single_buffer_size) { + goto finish; + } + index += Append(stack_buffer + index, DEFAULT_STACK_MAX_SIZE - index, + no_func_name_msg, symbol_list[i], begin_offset); + } else { + single_buffer_size = strlen(begin_name_msg) + strlen(symbol_list[i]) + + strlen(begin_name) + strlen(begin_offset); + if (index >= DEFAULT_STACK_MAX_SIZE || + DEFAULT_STACK_MAX_SIZE - index < single_buffer_size) { + goto finish; + } + index += + Append(stack_buffer + index, DEFAULT_STACK_MAX_SIZE - index, + begin_name_msg, symbol_list[i], begin_name, begin_offset); + } + } + } else { + single_buffer_size = strlen(symbol_name) + strlen(symbol_list[i]); + if (index >= DEFAULT_STACK_MAX_SIZE || + DEFAULT_STACK_MAX_SIZE - index < single_buffer_size) { + goto finish; + } + index += Append(stack_buffer + index, DEFAULT_STACK_MAX_SIZE - index, + symbol_name, symbol_list[i]); + } + } + +finish: + if (index < DEFAULT_STACK_MAX_SIZE) { + stack_buffer[index] = '\0'; + } + free(symbol_list); // NOLINT +} + +#endif + +ErrorMessage::ErrorMessage() { // NOLINT + index_ = 0; + message_[0] = '\0'; +} + +ErrorMessage::ErrorMessage(const ErrorMessage &message) { // NOLINT + index_ = message.index_; + std::memcpy(message_, message.message_, static_cast(message.index_)); +} + +void ErrorMessage::Append(const char *format, ...) noexcept { + auto index = (unsigned)index_; + if (index < sizeof(message_)) { + va_list ap; + int n; + va_start(ap, format); + n = vsnprintf(&message_[index], sizeof(message_) - index, format, ap); + va_end(ap); + if (n > 0) index_ += n; + } +} + +void ErrorMessage::AppendV(const char *format, va_list ap) noexcept { + auto index = (unsigned)index_; + if (index < sizeof(message_)) { + int n; + n = vsnprintf(&message_[index], sizeof(message_) - index, format, ap); + if (n > 0) index_ += n; + } +} + +const char *ErrorMessage::Message() const noexcept { return &message_[0]; } +int ErrorMessage::Length() const noexcept { return index_; } + +void CException::Raise(CException ex, bool reraise) { +#ifdef __GNUC__ + if (!reraise) { + StackTrace(&ex.stack_[0]); + } +#endif + throw ex; // NOLINT +} + +CException::CException(ExType extype) // NOLINT + : m_filename_(nullptr), m_lineno_(0), m_extype_(extype) {} + +CException::CException(const char *filename, int lineno, // NOLINT + ExType extype) + : m_filename_(filename), m_lineno_(lineno), m_extype_(extype) {} + +const char *CException::Filename() const { return m_filename_; } + +int CException::Lineno() const { return m_lineno_; } + +CException::ExType CException::EType() const { return m_extype_; } + +std::string CException::What() const { + std::ostringstream buffer; + buffer << m_filename_ << ":" << m_lineno_ << " " + << exception_names[m_extype_]; + return buffer.str(); +} + +const char *CException::Stack() const { return stack_; } + +void CException::Raise(const char *filename, int lineno, ExType extype) { + Raise(CException(filename, lineno, extype), false); +} + +void CException::ReRaise(CException ex) { Raise(ex, true); } + +const char *CException::exception_names[] = { + "Invalid ExType", + "Not implements in cpp", + "Assert Failure", + "Abort", + "Out of Memory", + "IO Error", + "C ERROR", + "Logic ERROR", + "Invalid memory operation", + "Schema not match", + "Invalid orc format", + "Out of range", + "File operation got error", + "Compress got errors", + "Arrow export got errors", +}; + +} // namespace cbdb diff --git a/contrib/pax_storage/src/cpp/exceptions/CException.h b/contrib/pax_storage/src/cpp/exceptions/CException.h new file mode 100644 index 00000000000..a6aa2381503 --- /dev/null +++ b/contrib/pax_storage/src/cpp/exceptions/CException.h @@ -0,0 +1,83 @@ +#pragma once +#include "comm/cbdb_api.h" + +#include +#include + +namespace cbdb { + +#define DEFAULT_STACK_MAX_DEPTH 63 +#define DEFAULT_STACK_MAX_SIZE \ + ((DEFAULT_STACK_MAX_DEPTH + 1) * PIPE_MAX_PAYLOAD) +// error message buffer +class ErrorMessage final { + public: + ErrorMessage(); + ErrorMessage(const ErrorMessage &message); + void Append(const char *format, ...) noexcept; + void AppendV(const char *format, va_list ap) noexcept; + const char *Message() const noexcept; + int Length() const noexcept; + + private: + int index_ = 0; + char message_[128]; +}; + +class CException { + public: + enum ExType { + kExTypeInvalid = 0, + kExTypeUnImplements, + kExTypeAssert, + kExTypeAbort, + kExTypeOOM, + kExTypeIOError, + kExTypeCError, + kExTypeLogicError, + kExTypeInvalidMemoryOperation, + kExTypeSchemaNotMatch, + kExTypeInvalidORCFormat, + kExTypeOutOfRange, + kExTypeFileOperationError, + kExTypeCompressError, + kExTypeArrowExportError, + }; + + explicit CException(ExType extype); + + CException(const char *filename, int lineno, ExType extype); + + const char *Filename() const; + + int Lineno() const; + + ExType EType() const; + + std::string What() const; + + const char *Stack() const; + + static void Raise(const char *filename, int line, ExType extype) + __attribute__((__noreturn__)); + static void Raise(CException ex, bool reraise) __attribute__((__noreturn__)); + static void ReRaise(CException ex) __attribute__((__noreturn__)); + + private: + char stack_[DEFAULT_STACK_MAX_SIZE]; + static const char *exception_names[]; + const char *m_filename_; + int m_lineno_; + ExType m_extype_; +}; + +} // namespace cbdb + +#define CBDB_RAISE(...) cbdb::CException::Raise(__FILE__, __LINE__, __VA_ARGS__) +#define CBDB_RERAISE(ex) cbdb::CException::ReRaise(ex) +#define CBDB_CHECK(check, ...) \ + do { \ + if (unlikely(!(check))) { \ + CBDB_RAISE(__VA_ARGS__); \ + } \ + } while (0) diff --git a/contrib/pax_storage/src/cpp/storage/README_CTID_in_pax.md b/contrib/pax_storage/src/cpp/storage/README_CTID_in_pax.md new file mode 100644 index 00000000000..b1b770f8553 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/README_CTID_in_pax.md @@ -0,0 +1,164 @@ +# CTID in PAX + +Pax table is different from heap/ao table, for the last two bytes, it does't has continuous logical address. So it can't be mapped to physical file through ctid. + +in heap table ,ctid -> (32 bit page id + 16 bit tuple offset number) + +``` + │ page id │ tuple offset num │ + ├──────────────────────────────────┼──────────────────────┤ + │ 32bit │ 16bit │ + └──────────────────────────────────┴──────────────────────┘ +``` + +in ao table,ctid -> (7bit segment id + 40 bit tuple offset number), it take the least significant 15 bits from the row numbers, add one to it. + +``` + │ seg id │ tuple offset │ + ├─────────┼────────────────────────────┬┬─────────────────┤ + │ 7bit │ 25bit ││ 15bit │ + └─────────┴────────────────────────────┴┴─────────────────┘ +``` + + + The Pax table data storage will be broken up into multiple blocks, and the size of each block is 64M~512M, which is stored in the object storage service. There is no logical relationship between blocks. The block id is a 36-byte uuid, not an integer like page id/segment id. + +``` + ┌─────────┐ ┌─────────┐ ┌─────────┐ + │ segment1│ │ segment2│ │ segment3│ + └─────────┘ └─────────┘ └─────────┘ + ┌──────────────────────────────────────────────────────┐ + │ │ + │ Object Storage Service │ + └──────────────────────────────────────────────────────┘ + ┌───────┐ ┌───────┐ ┌───────┐ ┌───────┐ + │block 1│ │block 2│ │block 3│ │block 4│ + └───────┘ └───────┘ └───────┘ └───────┘ + ┌───────┐ ┌───────┐ ┌───────┐ ┌───────┐ + │block 5│ │block 6│ │block 7│ │block 8│ + └───────┘ └───────┘ └───────┘ └───────┘ +``` + +The location address of the block is http url, for example `https://$(object service url)/$(tablespace)/$(table path)/${block id}` + +It is not possible to locate tuples in the physical file using the ctid, as is done with heap/ao tables. However, for the delete and update interfaces of PAX tables, the ItemPointer pointer is used to locate tuples. How can a 48-bit ctid be used to locate tuples in the physical file? + +Similar to heap/ao tables, We have split the ctid into two parts: one part(block_no) saves the block information, and the other part(tuple_offset) saves the offset of the tuple within the block. + +During the scan phase, for each block file of the table that is read, a mapping information of block_no to block_id is constructed and stored in shared memory. The block_no is saved in the block_no part of the ctid. When a delete/update operation retrieves the ctid, it can parse out the block_no and query the shared memory to retrieve the corresponding block file information associated with that block_no. + +In scenarios where multiple scan processes are simultaneously scanning the same table, it becomes necessary to lock the shared memory where the record mapping information is stored. To avoid locking, parallel scan processes for the same table can be allocated separate slots within an array. + +By allocating separate slots for each parallel scan process, they can independently store their respective record mapping information without interfering with each other. This approach eliminates the need for locking the shared memory and allows concurrent scan processes to operate efficiently and independently on the table. + +it would be necessary to add slot information to the ctid. Without slot information, the delete phase would not be able to locate the specific slot in the array based solely on the ctid. + +In the final implementation, the ctid has been divided into three parts: + +Part1(5bit): table_no. it represents the position of the element in the array for the table_no-th occurrence of the same element. + +Part2(22bt): block_no, it represents the index of the block array within the slot. + +Part3(21bit): tuple offset, It represents the index of the tuple within the block file. + + +``` + + │ bi_ho │ bi_lo │ ip_posid │ + ├──────┬───────────┴────────────────┬──┴─────────┬┬───────┤ + │ 5bit │ 22bit │ 13bit ││ 7bit │ + ├──────┼────────────────────────────┼────────────┴┴───────┤ + │ │ │ │ + table_no block_no tuple_offset +``` + +The delete phase is divided into two steps: + +The first step: MarkerDelete(ctid), the delete bitmap is constructed when the `tuple_delete` function is called. retrieve the block id using block_no and table_no. Each block file has its own bitmap, and the index in bitmap corresponds to the tuple offset number. As a result, there will be N delete bitmaps for N block files. + +The second step: ExecuteDelete(), the deletion is performed when FinishDmlState is called. For the same block file, it performs a sequential scan in order, filters out the tuples that need to be deleted based on the delete bitmap, and inserts the remaining tuples into a new block file. + +Update phase: SplitUpdate operator (DELETE + INSERT) is used to implement tuple update, following the AO table. The Update operation is splited into three steps: + +1. MarkerDelete(ctid) +2. InsertTuple(tuple) +3. ExecuteDelete() + + +## implement + +### Share Memory Struct + +because the block map is saved using share memory, before starting working with pax extension , adding the following line to `postgresql.conf` is required. This change requires a restart of the postgresql database server. + +``` +shared_preload_libraries = 'pax.so' +``` + +We use gp_session_id + gp_command_count as the key and utilize a hash table in shared memory to achieve fast lookup of shared structures among different process groups. + +Within the same process group, we use the same shared struct structure. Each process is allocated an independent slot when scanning the table. For every block scanned, it is added to the dynamic array pointed to by the dsm_segment within the slot + +The graphical structure of the relationships between data structures is as follows: + +``` + shared state struct for ctid + + dsm segment + ┌───┐ ┌───┬───┬───┬───┬───┐ + │tbl├─────►│ │ │ │ │ │ + │ │ └───┴───┴───┴───┴─┬─┘ + ├───┤ │ ┌────────┐ + ┌───┐ ┌───────►│ │ └──────►│block id│ + │ │ │ │ │ dsm segment └────────┘ + │ │ │ ├───┤ ┌───┬───┬───┬───┬───┐ + │ ├─┘ │ │ │ │ │ │ │ │ + │ │ │ │ └───┴───┴───┴───┴───┘ + │ │ └───┘ + │ │ + │ │ dsm segment + │ │ ┌───┐ ┌───┬───┬───┬───┬───┐ + ├───┤ │ ├─────►│ │ │ │ │ │ + │ │ │tbl│ └───┴───┴───┴───┴─┬─┘ + │ │ ├───┤ │ ┌────────┐ + │ │ │ │ └──────►│block id│ + │ ├─────────►│tbl│ dsm segment └────────┘ + │ │ ├───┤ ┌───┬───┬───┬───┬───┐ + hash table ──────────> │ │ │ │ │ │ │ │ │ │ + │ │ │ │ └───┴───┴───┴───┴───┘ + key: session_id+command_id │ │ │ │ + │ │ └───┘ + value: shared state ├───┤ + │ │ + │ │ + │ │ dsm segment + │ │ ┌───┐ ┌───┬───┬───┬───┬───┐ + │ ├────┐ │ ├─────►│ │ │ │ │ │ + │ │ │ │tbl│ └───┴───┴───┴───┴─┬─┘ + │ │ │ ├───┤ │ ┌────────┐ + │ │ │ │ │ └──────►│block id│ + │ │ └────►│tbl│ dsm segment └────────┘ + └───┘ ├───┤ ┌───┬───┬───┬───┬───┐ + │ │ │ │ │ │ │ │ + │tbl│ └───┴───┴───┴───┴───┘ + └───┘ +``` + +The initialization and release steps for shared memory are as follows: + +1. in pg_init step, the necessary locks and hash table in shared memory are initialized. +2. when execute scan/delete/update sql, the initialization of shared memory structures required for SQL execution occurs during the beginscan or InitDmlState phase by calling `init_command_resource()`. +3. When ExecutorEnd_hook call, the gp_writer will release the shared memory by calling. `release_command_resource()` + + + +### Lock + +* pax_hash_lock + + ​ we will need a lock to protect operations related to the hash table. This lock ensures that concurrent access to the hash table is properly synchronized and avoids potential data corruption or race conditions. the numer is 1. + +* pax_xact_lock + + We will need a lock to protect the table's slot assigned in same process group. the numer is MaxConnections + diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc new file mode 100644 index 00000000000..04eba6feb42 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column.cc @@ -0,0 +1,363 @@ +#include "storage/columns/pax_column.h" + +#include +#include +#include +#include +#include + +#include "comm/pax_defer.h" + +namespace pax { + +PaxColumn::PaxColumn() + : null_bitmap_(nullptr), + encoded_type_(ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED), + storage_type_(PaxColumnStorageType::kTypeStorageNonVec) {} + +PaxColumn::~PaxColumn() { + if (null_bitmap_) { + delete null_bitmap_; + } +} + +PaxColumnTypeInMem PaxColumn::GetPaxColumnTypeInMem() const { + return PaxColumnTypeInMem::kTypeInvalid; +} + +void PaxColumn::Clear() { + if (null_bitmap_) { + delete null_bitmap_; + null_bitmap_ = nullptr; + } +} + +bool PaxColumn::HasNull() { return null_bitmap_ != nullptr; } + +void PaxColumn::SetNulls(DataBuffer *null_bitmap) { + Assert(!null_bitmap_); + null_bitmap_ = null_bitmap; +} + +DataBuffer *PaxColumn::GetNulls() const { return null_bitmap_; } + +std::pair PaxColumn::GetRangeNulls(size_t start_pos, + size_t len) { + Assert(null_bitmap_); + CBDB_CHECK((start_pos + len) <= GetRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + + static_assert(sizeof(char) == sizeof(bool)); + return std::make_pair(null_bitmap_->GetBuffer() + start_pos, len); +} + +size_t PaxColumn::GetRows() { + return null_bitmap_ ? null_bitmap_->Used() : GetNonNullRows(); +} + +size_t PaxColumn::GetRangeNonNullRows(size_t start_pos, size_t len) { + CBDB_CHECK((start_pos + len) <= GetRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + if (null_bitmap_) { + size_t total_non_null = 0; + for (size_t i = start_pos; i < (start_pos + len); i++) { + if ((*null_bitmap_)[i]) { + total_non_null++; + } + } + + return total_non_null; + } else { + return len; + } +} + +void PaxColumn::AppendNull() { + if (!null_bitmap_) { + size_t current_rows = GetNonNullRows(); + size_t size = current_rows > DEFAULT_CAPACITY + ? (current_rows / DEFAULT_CAPACITY + 1) * DEFAULT_CAPACITY + : DEFAULT_CAPACITY; + null_bitmap_ = new DataBuffer(size); + null_bitmap_->Brush(current_rows * sizeof(bool)); + memset(null_bitmap_->GetBuffer(), 1, null_bitmap_->Capacity()); + } + + if (null_bitmap_->Available() == 0) { + size_t old_cap = null_bitmap_->Capacity(); + null_bitmap_->ReSize(old_cap * 2); + memset(null_bitmap_->GetAvailableBuffer(), 1, old_cap); + } + + null_bitmap_->Write(false); + null_bitmap_->Brush(sizeof(bool)); +} + +void PaxColumn::Append([[maybe_unused]] char *buffer, + [[maybe_unused]] size_t size) { + if (null_bitmap_) { + if (null_bitmap_->Available() == 0) { + size_t old_cap = null_bitmap_->Capacity(); + null_bitmap_->ReSize(old_cap * 2); + memset(null_bitmap_->GetAvailableBuffer(), 1, old_cap); + } + null_bitmap_->Brush(sizeof(bool)); + } +} + +PaxColumn *PaxColumn::SetColumnEncodeType(ColumnEncoding_Kind encoding_type) { + encoded_type_ = encoding_type; + return this; +} + +PaxColumn *PaxColumn::SetColumnStorageType(PaxColumnStorageType storage_type) { + storage_type_ = storage_type; + return this; +} + +ColumnEncoding_Kind PaxColumn::GetEncodingType() const { return encoded_type_; } + +template +PaxCommColumn::PaxCommColumn(uint64 capacity) : capacity_(capacity) { + data_ = new DataBuffer(capacity * sizeof(T)); +} + +template +PaxCommColumn::~PaxCommColumn() { + delete data_; +} + +template // NOLINT: redirect constructor +PaxCommColumn::PaxCommColumn() : PaxCommColumn(DEFAULT_CAPACITY) {} + +template +void PaxCommColumn::Set(DataBuffer *data) { + delete data_; + + data_ = data; +} + +template +void PaxCommColumn::Append(char *buffer, size_t size) { + PaxColumn::Append(buffer, size); + auto buffer_t = reinterpret_cast(buffer); + + // TODO(jiaqizho): Is it necessary to support multiple buffer insertions for + // bulk insert push to mirco partition? + Assert(size == sizeof(T)); + Assert(GetNonNullRows() <= capacity_); + + if (GetNonNullRows() == capacity_) { + ReSize(capacity_ * 2); + } + + data_->Write(buffer_t, sizeof(T)); + data_->Brush(sizeof(T)); +} + +template +PaxColumnTypeInMem PaxCommColumn::GetPaxColumnTypeInMem() const { + return PaxColumnTypeInMem::kTypeFixed; +} + +template +void PaxCommColumn::Clear() { + PaxColumn::Clear(); + data_->BrushBackAll(); +} + +template +void PaxCommColumn::ReSize(uint64 cap) { + if (capacity_ < cap) { + data_->ReSize(cap * sizeof(T)); + capacity_ = cap; + } +} + +template +size_t PaxCommColumn::GetNonNullRows() const { + return data_->Used() / sizeof(T); +} + +template +size_t PaxCommColumn::PhysicalSize() const { + return data_->Used(); +} + +template +int64 PaxCommColumn::GetOriginLength() const { + return NO_ENCODE_ORIGIN_LEN; +} + +template +int32 PaxCommColumn::GetTypeLength() const { + return sizeof(T); +} + +template +std::pair PaxCommColumn::GetBuffer() { + return std::make_pair(data_->Start(), data_->Used()); +} + +template +std::pair PaxCommColumn::GetBuffer(size_t position) { + CBDB_CHECK(position < GetNonNullRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + return std::make_pair(data_->Start() + (sizeof(T) * position), sizeof(T)); +} + +template +std::pair PaxCommColumn::GetRangeBuffer(size_t start_pos, + size_t len) { + CBDB_CHECK((start_pos + len) <= GetNonNullRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + return std::make_pair(data_->Start() + (sizeof(T) * start_pos), + sizeof(T) * len); +} + +template class PaxCommColumn; +template class PaxCommColumn; +template class PaxCommColumn; +template class PaxCommColumn; +template class PaxCommColumn; +template class PaxCommColumn; +template class PaxCommColumn; + +PaxNonFixedColumn::PaxNonFixedColumn(uint64 capacity) : estimated_size_(0) { + data_ = new DataBuffer(capacity * sizeof(char) * 100); + lengths_ = new DataBuffer(capacity * sizeof(char)); +} + +PaxNonFixedColumn::PaxNonFixedColumn() : PaxNonFixedColumn(DEFAULT_CAPACITY) {} + +PaxNonFixedColumn::~PaxNonFixedColumn() { + if (data_) { + delete data_; + } + + if (lengths_) { + delete lengths_; + } +} + +void PaxNonFixedColumn::Set(DataBuffer *data, DataBuffer *lengths, + size_t total_size) { + if (data_) { + delete data_; + } + + if (lengths_) { + delete lengths_; + } + + estimated_size_ = total_size; + data_ = data; + lengths_ = lengths; + offsets_.clear(); + for (size_t i = 0; i < lengths_->GetSize(); i++) { + offsets_.emplace_back(i == 0 ? 0 : offsets_[i - 1] + (*lengths_)[i - 1]); + } +} + +void PaxNonFixedColumn::Append(char *buffer, size_t size) { + Assert(likely(reinterpret_cast MAXALIGN(data_->Position()) == + data_->Position())); + + size_t origin_size; + origin_size = size; + + // FIMXE(gongxun): maybe it should be aligned base on the typalign? + size = MAXALIGN(size); + + PaxColumn::Append(buffer, origin_size); + while (data_->Available() < size) { + data_->ReSize(data_->Capacity() * 2); + } + + if (lengths_->Available() == 0) { + lengths_->ReSize(lengths_->Capacity() * 2); + } + + estimated_size_ += size; + data_->Write(buffer, origin_size); + data_->Brush(size); + + lengths_->Write(reinterpret_cast(&size), sizeof(int64)); + lengths_->Brush(sizeof(int64)); + + offsets_.emplace_back(offsets_.empty() + ? 0 + : offsets_[offsets_.size() - 1] + + (*lengths_)[offsets_.size() - 1]); + Assert(offsets_.size() == lengths_->GetSize()); +} + +DataBuffer *PaxNonFixedColumn::GetLengthBuffer() const { + return lengths_; +} + +PaxColumnTypeInMem PaxNonFixedColumn::GetPaxColumnTypeInMem() const { + return PaxColumnTypeInMem::kTypeNonFixed; +} + +void PaxNonFixedColumn::Clear() { + PaxColumn::Clear(); + + data_->BrushBackAll(); + lengths_->BrushBackAll(); + + offsets_.clear(); +} + +std::pair PaxNonFixedColumn::GetBuffer() { + return std::make_pair(data_->GetBuffer(), data_->Used()); +} + +size_t PaxNonFixedColumn::GetNonNullRows() const { return lengths_->GetSize(); } + +size_t PaxNonFixedColumn::PhysicalSize() const { return estimated_size_; } + +int64 PaxNonFixedColumn::GetOriginLength() const { + return NO_ENCODE_ORIGIN_LEN; +} + +int32 PaxNonFixedColumn::GetTypeLength() const { return -1; } + +std::pair PaxNonFixedColumn::GetBuffer(size_t position) { + CBDB_CHECK(position < GetNonNullRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + + return std::make_pair(data_->GetBuffer() + offsets_[position], + (*lengths_)[position]); +} + +std::pair PaxNonFixedColumn::GetRangeBuffer(size_t start_pos, + size_t len) { + CBDB_CHECK((start_pos + len) <= GetNonNullRows() && len >= 0, + cbdb::CException::ExType::kExTypeOutOfRange); + size_t range_len = 0; + + for (size_t i = start_pos; i < start_pos + len; i++) { + range_len += (*lengths_)[i]; + } + + if (GetNonNullRows() == 0) { + Assert(range_len == 0); + return std::make_pair(data_->GetBuffer(), 0); + } + + Assert(start_pos < offsets_.size()); + return std::make_pair(data_->GetBuffer() + offsets_[start_pos], range_len); +} + +bool PaxNonFixedColumn::IsMemTakeOver() const { + Assert(data_->IsMemTakeOver() == lengths_->IsMemTakeOver()); + return data_->IsMemTakeOver(); +} + +void PaxNonFixedColumn::SetMemTakeOver(bool take_over) { + data_->SetMemTakeOver(take_over); + lengths_->SetMemTakeOver(take_over); +} + +}; // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column.h b/contrib/pax_storage/src/cpp/storage/columns/pax_column.h new file mode 100644 index 00000000000..f6de8f4f567 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column.h @@ -0,0 +1,210 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +#include "storage/columns/pax_compress.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/pax_buffer.h" +#include "storage/proto/proto_wrappers.h" + +namespace pax { + +#define DEFAULT_CAPACITY 2048 +#define NO_ENCODE_ORIGIN_LEN (-1) + +// Used to mapping pg_type +enum PaxColumnTypeInMem { kTypeInvalid = 1, kTypeFixed = 2, kTypeNonFixed = 3 }; + +enum PaxColumnStorageType { + // default non-vec store + // which split null field and null bitmap + kTypeStorageNonVec, + // vec storage format + // spec the storage format + kTypeStorageVec, +}; + +class PaxColumn { + public: + PaxColumn(); + + virtual ~PaxColumn(); + + virtual PaxColumn *SetColumnEncodeType(ColumnEncoding_Kind encoding_type); + + virtual PaxColumn *SetColumnStorageType(PaxColumnStorageType storage_type); + + // Get the column in memory type + virtual PaxColumnTypeInMem GetPaxColumnTypeInMem() const; + + // Empties the vector from all its elements, recursively. + // Do not alter the current capacity. + virtual void Clear(); + + // Get column buffer from current column + virtual std::pair GetBuffer() = 0; + + // Get buffer by position + virtual std::pair GetBuffer(size_t position) = 0; + + // Get buffer by range [start_pos, start_pos + len) + virtual std::pair GetRangeBuffer(size_t start_pos, + size_t len) = 0; + + // Get all rows number(contain null) from column + virtual size_t GetRows(); + + // Get rows number(not null) from column + virtual size_t GetNonNullRows() const = 0; + + // Get all rows number(not null) from column by range [start_pos, start_pos + + // len) + virtual size_t GetRangeNonNullRows(size_t start_pos, size_t len); + + // Append new filed into current column + virtual void Append(char *buffer, size_t size); + + // Append a null filed into last position + virtual void AppendNull(); + + // Estimated memory size from current column + virtual size_t PhysicalSize() const = 0; + + // Get current encoding type + virtual ColumnEncoding_Kind GetEncodingType() const; + + // Get the data size without encoding/compress + virtual int64 GetOriginLength() const = 0; + + // Get the type length, if non-fixed, will return -1 + virtual int32 GetTypeLength() const = 0; + + // Contain null filed or not + bool HasNull(); + + // Set null bitmap + void SetNulls(DataBuffer *null_bitmap); + + // Get null bitmaps + DataBuffer *GetNulls() const; + + // Get bull bitmaps by range [start_pos, start_pos + len) + std::pair GetRangeNulls(size_t start_pos, size_t len); + + protected: + // null field bit map + DataBuffer *null_bitmap_; + + // the column is encoded type + ColumnEncoding_Kind encoded_type_; + + // whether the column is storage + PaxColumnStorageType storage_type_; + + private: + PaxColumn(const PaxColumn &); + PaxColumn &operator=(const PaxColumn &); +}; + +template +class PaxCommColumn : public PaxColumn { + public: + explicit PaxCommColumn(uint64 capacity); + + ~PaxCommColumn() override; + + PaxCommColumn(); + + virtual void Set(DataBuffer *data); + + PaxColumnTypeInMem GetPaxColumnTypeInMem() const override; + + void Append(char *buffer, size_t size) override; + + std::pair GetBuffer(size_t position) override; + + std::pair GetRangeBuffer(size_t start_pos, + size_t len) override; + + size_t GetNonNullRows() const override; + + void Clear() override; + + size_t PhysicalSize() const override; + + int64 GetOriginLength() const override; + + std::pair GetBuffer() override; + + int32 GetTypeLength() const override; + + protected: + virtual void ReSize(uint64 capacity); + + protected: + uint64 capacity_; + DataBuffer *data_; +}; + +extern template class PaxCommColumn; +extern template class PaxCommColumn; +extern template class PaxCommColumn; +extern template class PaxCommColumn; +extern template class PaxCommColumn; +extern template class PaxCommColumn; +extern template class PaxCommColumn; + +class PaxNonFixedColumn : public PaxColumn { + public: + explicit PaxNonFixedColumn(uint64 capacity); + + PaxNonFixedColumn(); + + ~PaxNonFixedColumn() override; + + virtual void Set(DataBuffer *data, DataBuffer *lengths, + size_t total_size); + + void Append(char *buffer, size_t size) override; + + PaxColumnTypeInMem GetPaxColumnTypeInMem() const override; + + void Clear() override; + + std::pair GetBuffer() override; + + size_t PhysicalSize() const override; + + int64 GetOriginLength() const override; + + int32 GetTypeLength() const override; + + std::pair GetBuffer(size_t position) override; + + std::pair GetRangeBuffer(size_t start_pos, + size_t len) override; + + size_t GetNonNullRows() const override; + + DataBuffer *GetLengthBuffer() const; + + bool IsMemTakeOver() const; + + void SetMemTakeOver(bool take_over); + + protected: + size_t estimated_size_; + DataBuffer *data_; + + // orc needs to serialize int64 array + DataBuffer *lengths_; + std::vector offsets_; +}; + +}; // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.cc new file mode 100644 index 00000000000..51419bd6ffd --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.cc @@ -0,0 +1,43 @@ + +#include "storage/columns/pax_column_int.h" + +namespace pax { + +template +PaxIntColumn::PaxIntColumn(const PaxEncoder::EncodingOption &encoding_option) + : PaxEncodingColumn(DEFAULT_CAPACITY, encoding_option) { + PaxEncodingColumn::InitEncoder(); +} + +template +PaxIntColumn::PaxIntColumn(uint64 capacity, + const PaxEncoder::EncodingOption &encoding_option) + : PaxEncodingColumn(capacity, encoding_option) { + PaxEncodingColumn::InitEncoder(); +} + +template +PaxIntColumn::PaxIntColumn(const PaxDecoder::DecodingOption &decoding_option) + : PaxEncodingColumn(DEFAULT_CAPACITY, decoding_option) { + PaxEncodingColumn::InitDecoder(); +} + +template +PaxIntColumn::PaxIntColumn(uint64 capacity, + const PaxDecoder::DecodingOption &decoding_option) + : PaxEncodingColumn(capacity, decoding_option) { + PaxEncodingColumn::InitDecoder(); +} + +template +ColumnEncoding_Kind PaxIntColumn::GetDefaultColumnType() { + return sizeof(T) >= 4 ? ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2 + : ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA; +} + +template class PaxIntColumn; +template class PaxIntColumn; +template class PaxIntColumn; +template class PaxIntColumn; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.h b/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.h new file mode 100644 index 00000000000..e38b8c3f0cd --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_int.h @@ -0,0 +1,31 @@ + +#pragma once +#include "storage/columns/pax_encoding_column.h" + +namespace pax { + +template +class PaxIntColumn final : public PaxEncodingColumn { + public: + explicit PaxIntColumn(const PaxEncoder::EncodingOption &encoding_option); + + PaxIntColumn(uint64 capacity, + const PaxEncoder::EncodingOption &encoding_option); + + explicit PaxIntColumn(const PaxDecoder::DecodingOption &decoding_option); + + PaxIntColumn(uint64 capacity, + const PaxDecoder::DecodingOption &decoding_option); + + ~PaxIntColumn() override = default; + + protected: + ColumnEncoding_Kind GetDefaultColumnType() override; +}; + +extern template class PaxIntColumn; +extern template class PaxIntColumn; +extern template class PaxIntColumn; +extern template class PaxIntColumn; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc new file mode 100644 index 00000000000..640975de062 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc @@ -0,0 +1,633 @@ +#include "storage/columns/pax_column.h" + +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" +#include "exceptions/CException.h" +#include "storage/columns/pax_column_int.h" +#include "storage/columns/pax_encoding_column.h" +#include "storage/columns/pax_encoding_non_fixed_column.h" + +namespace pax::tests { + +static void AppendInt4All(PaxColumn *pax_column, size_t bits) { + int64 data; + for (int16 i = INT16_MIN; i <= INT16_MAX; ++i) { // dead loop + data = i; + pax_column->Append(reinterpret_cast(&data), bits / 8); + if (i == INT16_MAX) { + break; + } + } +} + +template +static void VerifyInt4All(char *verify_buff, size_t verify_len) { + ASSERT_NE(verify_buff, nullptr); + ASSERT_EQ(verify_len, (UINT16_MAX + 1) * sizeof(T)); + + auto verify_int64_buff = reinterpret_cast(verify_buff); + + uint32 index = 0; + for (int16 i = INT16_MIN; i <= INT16_MAX; ++i) { + ASSERT_EQ(i, verify_int64_buff[index++]); + if (i == INT16_MAX) { + break; + } + } +} + +static void VerifyInt4All(char *verify_buff, size_t verify_len, size_t bits) { + switch (bits) { + case 16: + VerifyInt4All(verify_buff, verify_len); + break; + case 32: + VerifyInt4All(verify_buff, verify_len); + break; + case 64: + VerifyInt4All(verify_buff, verify_len); + break; + default: + ASSERT_TRUE(false); + } +} + +static PaxColumn *CreateEncodeColumn( + uint8 bits, const PaxEncoder::EncodingOption &encoding_option) { + PaxColumn *int_column; + + switch (bits) { + case 16: + int_column = new PaxIntColumn(1024, std::move(encoding_option)); + break; + case 32: + int_column = new PaxIntColumn(1024, std::move(encoding_option)); + break; + case 64: + int_column = new PaxIntColumn(1024, std::move(encoding_option)); + break; + default: + int_column = nullptr; + break; + } + return int_column; +} + +static PaxColumn *CreateDecodeColumn( + uint8 bits, size_t origin_lem, + const PaxDecoder::DecodingOption &decoding_option, char *encoded_buff, + size_t encoded_len) { + switch (bits) { + case 16: { + auto *buffer_for_read = new DataBuffer( + reinterpret_cast(encoded_buff), encoded_len, false, false); + buffer_for_read->Brush(encoded_len); + + auto int_column = new PaxIntColumn(origin_lem / sizeof(int16), + std::move(decoding_option)); + int_column->Set(buffer_for_read); + + return int_column; + } + case 32: { + auto *buffer_for_read = new DataBuffer( + reinterpret_cast(encoded_buff), encoded_len, false, false); + buffer_for_read->Brush(encoded_len); + + auto int_column = new PaxIntColumn(origin_lem / sizeof(int32), + std::move(decoding_option)); + int_column->Set(buffer_for_read); + return int_column; + } + case 64: { + auto *buffer_for_read = new DataBuffer( + reinterpret_cast(encoded_buff), encoded_len, false, false); + buffer_for_read->Brush(encoded_len); + + auto int_column = new PaxIntColumn(origin_lem / sizeof(int64), + std::move(decoding_option)); + int_column->Set(buffer_for_read); + return int_column; + } + default: { + return nullptr; + } + } + return nullptr; +} + +class PaxColumnTest : public ::testing::Test { + public: + void SetUp() override { + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxColumn memory context", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + } +}; + +class PaxColumnEncodingTest : public ::testing::TestWithParam { + public: + void SetUp() override { + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxColumn memory context", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + } +}; + +class PaxColumnCompressTest + : public ::testing::TestWithParam< + ::testing::tuple> { + public: + void SetUp() override { + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxColumn memory context", 800 * 1024 * 1024, + 800 * 1024 * 1024, 800 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + } +}; + +class PaxNonFixedColumnCompressTest + : public ::testing::TestWithParam< + ::testing::tuple> { + public: + void SetUp() override { + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxColumn memory context", 800 * 1024 * 1024, + 800 * 1024 * 1024, 800 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + } +}; + +TEST_F(PaxColumnTest, FixColumnGetRangeBufferTest) { + PaxColumn *column; + char *buffer = nullptr; + size_t buffer_len = 0; + + column = new PaxCommColumn(200); + for (int32 i = 0; i < 16; i++) { + column->Append(reinterpret_cast(&i), sizeof(int32)); + } + + std::tie(buffer, buffer_len) = column->GetRangeBuffer(5, 10); + ASSERT_EQ(buffer_len, 10 * sizeof(int32)); + + for (size_t i = 5; i < 16; i++) { + auto *i_32 = reinterpret_cast(buffer + ((i - 5) * sizeof(int32))); + ASSERT_EQ(*i_32, (int32)i); + } + ASSERT_EQ(column->GetRows(), 16); + ASSERT_EQ(column->GetRangeNonNullRows(0, column->GetRows()), 16); + + column->Clear(); + + for (int32 i = 0; i < 16; i++) { + if (i % 3 == 0) { + column->AppendNull(); + } + column->Append(reinterpret_cast(&i), sizeof(int32)); + } + + std::tie(buffer, buffer_len) = column->GetRangeBuffer(5, 10); + ASSERT_EQ(buffer_len, 10 * sizeof(int32)); + + for (size_t i = 5; i < 16; i++) { + auto *i_32 = reinterpret_cast(buffer + ((i - 5) * sizeof(int32))); + ASSERT_EQ(*i_32, (int32)i); + } + + ASSERT_EQ(column->GetRows(), 16 + 6); + ASSERT_EQ(column->GetRangeNonNullRows(0, column->GetRows()), 16); + + delete column; +} + +TEST_F(PaxColumnTest, NonFixColumnGetRangeBufferTest) { + PaxColumn *column; + char *buffer = nullptr; + size_t buffer_len = 0; + + column = new PaxNonFixedColumn(200); + for (int64 i = 0; i < 16; i++) { + column->Append(reinterpret_cast(&i), sizeof(int64)); + } + + std::tie(buffer, buffer_len) = column->GetRangeBuffer(5, 10); + ASSERT_EQ(buffer_len, 10 * sizeof(int64)); + + for (size_t i = 5; i < 16; i++) { + auto *i_32 = reinterpret_cast(buffer + ((i - 5) * sizeof(int64))); + ASSERT_EQ(*i_32, (int64)i); + } + ASSERT_EQ(column->GetRows(), 16); + ASSERT_EQ(column->GetRangeNonNullRows(0, column->GetRows()), 16); + + column->Clear(); + + for (int64 i = 0; i < 16; i++) { + if (i % 3 == 0) { + column->AppendNull(); + } + column->Append(reinterpret_cast(&i), sizeof(int64)); + } + + std::tie(buffer, buffer_len) = column->GetRangeBuffer(5, 10); + ASSERT_EQ(buffer_len, 10 * sizeof(int64)); + + for (size_t i = 5; i < 16; i++) { + auto *i_32 = reinterpret_cast(buffer + ((i - 5) * sizeof(int64))); + ASSERT_EQ(*i_32, (int64)i); + } + + ASSERT_EQ(column->GetRows(), 16 + 6); + ASSERT_EQ(column->GetRangeNonNullRows(0, column->GetRows()), 16); + + delete column; +} + +TEST_P(PaxColumnEncodingTest, GetRangeEncodingColumnTest) { + PaxColumn *int_column; + auto bits = GetParam(); + if (bits < 32) { + return; + } + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + int64 data; + for (int16 i = 0; i < 100; ++i) { + data = i; + int_column->Append(reinterpret_cast(&data), bits / 8); + } + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + ASSERT_LT(encoded_len, UINT16_MAX); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, (100) * bits / 8); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoding_option.is_sign = true; + + auto int_column_for_read = CreateDecodeColumn( + bits, origin_len, std::move(decoding_option), encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = + int_column_for_read->GetRangeBuffer(30, 20); + + for (int16 i = 0; i < 20; ++i) { + switch (bits) { + case 32: + ASSERT_EQ((reinterpret_cast(verify_buff))[i], 30 + i); + break; + case 64: + ASSERT_EQ((reinterpret_cast(verify_buff))[i], 30 + i); + break; + } + } + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxColumnCompressTest, FixedCompressColumnGetRangeTest) { + PaxColumn *int_column; + auto bits = ::testing::get<0>(GetParam()); + auto kind = ::testing::get<1>(GetParam()); + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = kind; + encoding_option.compress_lvl = 5; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + int64 data; + for (int16 i = 0; i < 100; ++i) { + data = i; + int_column->Append(reinterpret_cast(&data), bits / 8); + } + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + ASSERT_LT(encoded_len, UINT16_MAX); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, kind != ColumnEncoding_Kind_NO_ENCODED + ? (100) * bits / 8 + : NO_ENCODE_ORIGIN_LEN); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = kind; + decoding_option.is_sign = true; + + auto int_column_for_read = + CreateDecodeColumn(bits, (100) * bits / 8, std::move(decoding_option), + encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = + int_column_for_read->GetRangeBuffer(30, 20); + + for (int16 i = 0; i < 20; ++i) { + switch (bits) { + case 32: + ASSERT_EQ((reinterpret_cast(verify_buff))[i], 30 + i); + break; + case 64: + ASSERT_EQ((reinterpret_cast(verify_buff))[i], 30 + i); + break; + } + } + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxColumnEncodingTest, PaxEncodingColumnDefault) { + PaxColumn *int_column; + auto bits = GetParam(); + if (bits < 32) { + return; + } + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + AppendInt4All(int_column, bits); + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + ASSERT_LT(encoded_len, UINT16_MAX); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, (UINT16_MAX + 1) * bits / 8); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoding_option.is_sign = true; + + auto int_column_for_read = CreateDecodeColumn( + bits, origin_len, std::move(decoding_option), encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = int_column_for_read->GetBuffer(); + VerifyInt4All(verify_buff, verify_len, bits); + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxColumnEncodingTest, PaxEncodingColumnSpecType) { + PaxColumn *int_column; + auto bits = GetParam(); + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + AppendInt4All(int_column, bits); + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + ASSERT_LT(encoded_len, UINT16_MAX); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, (UINT16_MAX + 1) * bits / 8); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoding_option.is_sign = true; + + auto int_column_for_read = CreateDecodeColumn( + bits, origin_len, std::move(decoding_option), encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = int_column_for_read->GetBuffer(); + VerifyInt4All(verify_buff, verify_len, bits); + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxColumnEncodingTest, PaxEncodingColumnNoEncoding) { + PaxColumn *int_column; + auto bits = GetParam(); + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + AppendInt4All(int_column, bits); + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, NO_ENCODE_ORIGIN_LEN); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED; + decoding_option.is_sign = true; + + auto int_column_for_read = CreateDecodeColumn( + bits, encoded_len, std::move(decoding_option), encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = int_column_for_read->GetBuffer(); + VerifyInt4All(verify_buff, verify_len, bits); + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxColumnCompressTest, PaxEncodingColumnCompressDecompress) { + PaxColumn *int_column; + auto bits = ::testing::get<0>(GetParam()); + auto kind = ::testing::get<1>(GetParam()); + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = kind; + encoding_option.compress_lvl = 5; + encoding_option.is_sign = true; + + int_column = CreateEncodeColumn(bits, std::move(encoding_option)); + ASSERT_TRUE(int_column); + + AppendInt4All(int_column, bits); + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = int_column->GetBuffer(); + ASSERT_NE(encoded_buff, nullptr); + + auto origin_len = int_column->GetOriginLength(); + ASSERT_EQ(origin_len, kind != ColumnEncoding_Kind_NO_ENCODED + ? (UINT16_MAX + 1) * bits / 8 + : NO_ENCODE_ORIGIN_LEN); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = kind; + decoding_option.is_sign = true; + + auto int_column_for_read = + CreateDecodeColumn(bits, (UINT16_MAX + 1) * bits / 8, + std::move(decoding_option), encoded_buff, encoded_len); + + char *verify_buff; + size_t verify_len; + std::tie(verify_buff, verify_len) = int_column_for_read->GetBuffer(); + VerifyInt4All(verify_buff, verify_len, bits); + + delete int_column; + delete int_column_for_read; +} + +TEST_P(PaxNonFixedColumnCompressTest, + PaxEncodingNonFixedColumnCompressDecompress) { + PaxNonFixedColumn *non_fixed_column; + auto number = ::testing::get<0>(GetParam()); + auto kind = ::testing::get<1>(GetParam()); + auto verify_range = ::testing::get<2>(GetParam()); + + const size_t buffer_len = 1024; + + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = kind; + encoding_option.compress_lvl = 5; + encoding_option.is_sign = true; + + non_fixed_column = + new PaxNonFixedEncodingColumn(1024, std::move(encoding_option)); + + std::srand(static_cast(std::time(0))); + char *data = reinterpret_cast(cbdb::Palloc(buffer_len * number)); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 255); + + for (size_t i = 0; i < buffer_len; ++i) { + for (size_t j = 0; j < number; ++j) { + data[j + i * number] = static_cast(dis(gen)); + } + non_fixed_column->Append((data + i * number), number); + } + + char *encoded_buff; + size_t encoded_len; + std::tie(encoded_buff, encoded_len) = non_fixed_column->GetBuffer(); + auto length_buffer = non_fixed_column->GetLengthBuffer(); + ASSERT_NE(encoded_buff, nullptr); + + auto origin_len = non_fixed_column->GetOriginLength(); + ASSERT_EQ(origin_len, kind != ColumnEncoding_Kind_NO_ENCODED + ? buffer_len * number + : NO_ENCODE_ORIGIN_LEN); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = kind; + decoding_option.is_sign = true; + + auto non_fixed_column_for_read = new PaxNonFixedEncodingColumn( + buffer_len * number, std::move(decoding_option)); + auto data_buffer_for_read = + new DataBuffer(encoded_buff, encoded_len, false, false); + data_buffer_for_read->Brush(encoded_len); + auto length_buffer_cpy = new DataBuffer(*length_buffer); + non_fixed_column_for_read->Set(data_buffer_for_read, length_buffer_cpy, + origin_len); + + char *verify_buff; + size_t verify_len; + + if (verify_range) { + std::tie(verify_buff, verify_len) = + non_fixed_column_for_read->GetRangeBuffer(30, 50); + ASSERT_EQ(verify_len, number * (50)); + + for (size_t i = 0; i < verify_len; ++i) { + EXPECT_EQ(verify_buff[i], data[i + (30 * number)]); + } + } else { + std::tie(verify_buff, verify_len) = non_fixed_column_for_read->GetBuffer(); + ASSERT_EQ(verify_len, buffer_len * number); + + for (size_t i = 0; i < buffer_len * number; ++i) { + EXPECT_EQ(verify_buff[i], data[i]); + } + } + + delete data; + delete non_fixed_column; + delete non_fixed_column_for_read; +} + +INSTANTIATE_TEST_CASE_P(PaxColumnEncodingTestCombine, PaxColumnEncodingTest, + testing::Values(16, 32, 64)); + +INSTANTIATE_TEST_CASE_P( + PaxColumnEncodingTestCombine, PaxColumnCompressTest, + testing::Combine(testing::Values(16, 32, 64), + testing::Values(ColumnEncoding_Kind_NO_ENCODED, + ColumnEncoding_Kind_COMPRESS_ZSTD, + ColumnEncoding_Kind_COMPRESS_ZLIB))); + +INSTANTIATE_TEST_CASE_P( + PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest, + testing::Combine(testing::Values(16, 32, 64), + testing::Values(ColumnEncoding_Kind_NO_ENCODED, + ColumnEncoding_Kind_COMPRESS_ZSTD, + ColumnEncoding_Kind_COMPRESS_ZLIB), + testing::Values(true, false))); + +}; // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_columns.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_columns.cc new file mode 100644 index 00000000000..1c57d90b7cb --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_columns.cc @@ -0,0 +1,273 @@ +#include "storage/columns/pax_columns.h" + +#include +#include +#include +#include +#include + +#include "storage/columns/pax_column_int.h" +#include "storage/columns/pax_encoding_column.h" +#include "storage/columns/pax_encoding_non_fixed_column.h" + +namespace pax { + +PaxColumns::PaxColumns( + const std::vector types, + const std::vector column_encoding_types) + : row_nums_(0) { + data_ = new DataBuffer(0); + for (size_t i = 0; i < types.size(); i++) { + auto type = types[i]; + switch (type) { + case (orc::proto::Type_Kind::Type_Kind_STRING): { + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = column_encoding_types[i]; + encoding_option.is_sign = false; + encoding_option.compress_lvl = column_encoding_types[i]; + + auto pax_non_fixed_column = new PaxNonFixedEncodingColumn( // + DEFAULT_CAPACITY, std::move(encoding_option)); + // current memory will copy from tuple, so should take over it + pax_non_fixed_column->SetMemTakeOver(true); + columns_.emplace_back(pax_non_fixed_column); + break; + } + case (orc::proto::Type_Kind::Type_Kind_BOOLEAN): + case (orc::proto::Type_Kind::Type_Kind_BYTE): { // len 1 integer + columns_.emplace_back(new PaxCommColumn()); + break; + } + case (orc::proto::Type_Kind::Type_Kind_SHORT): { // len 2 integer + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = column_encoding_types[i]; + encoding_option.is_sign = true; + columns_.emplace_back( + new PaxIntColumn(std::move(encoding_option))); + break; + } + case (orc::proto::Type_Kind::Type_Kind_INT): { // len 4 integer + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = column_encoding_types[i]; + encoding_option.is_sign = true; + columns_.emplace_back( + new PaxIntColumn(std::move(encoding_option))); + break; + } + case (orc::proto::Type_Kind::Type_Kind_LONG): { // len 8 integer + PaxEncoder::EncodingOption encoding_option; + encoding_option.column_encode_type = column_encoding_types[i]; + encoding_option.is_sign = true; + columns_.emplace_back( + new PaxIntColumn(std::move(encoding_option))); + + break; + } + default: + // TODO(jiaqizho): support other column type + // but now should't be here + Assert(!"non-implemented column type"); + break; + } + } +} + +PaxColumns::PaxColumns() : row_nums_(0) { data_ = new DataBuffer(0); } + +PaxColumns::~PaxColumns() { + for (auto column : columns_) { + delete column; + } + delete data_; +} + +void PaxColumns::Clear() { + row_nums_ = 0; + for (auto column : columns_) { + if (column) column->Clear(); + } + + data_->Clear(); +} + +PaxColumn *PaxColumns::operator[](uint64 i) { return columns_[i]; } + +void PaxColumns::Append(PaxColumn *column) { columns_.emplace_back(column); } + +void PaxColumns::Append([[maybe_unused]] char *buffer, + [[maybe_unused]] size_t size) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +void PaxColumns::Set(DataBuffer *data) { + Assert(data_->GetBuffer() == nullptr); + + delete data_; + data_ = data; +} + +size_t PaxColumns::GetNonNullRows() const { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +int32 PaxColumns::GetTypeLength() const { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +size_t PaxColumns::PhysicalSize() const { + size_t total_size = 0; + for (auto column : columns_) { + if (column) total_size += column->PhysicalSize(); + } + return total_size; +} + +int64 PaxColumns::GetOriginLength() const { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +size_t PaxColumns::GetColumns() const { return columns_.size(); } + +std::pair PaxColumns::GetBuffer() { + PaxColumns::ColumnStreamsFunc column_streams_func_null; + PaxColumns::ColumnEncodingFunc column_encoding_func_null; + auto data_buffer = + GetDataBuffer(column_streams_func_null, column_encoding_func_null); + return std::make_pair(data_buffer->GetBuffer(), data_buffer->Used()); +} + +std::pair PaxColumns::GetBuffer(size_t position) { + if (position >= GetColumns()) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeOutOfRange); + } + if (columns_[position]) { + return columns_[position]->GetBuffer(); + } else { + return std::make_pair(nullptr, 0); + } +} + +std::pair PaxColumns::GetRangeBuffer(size_t /*start_pos*/, + size_t /*len*/) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +DataBuffer *PaxColumns::GetDataBuffer( + const ColumnStreamsFunc &column_streams_func, + const ColumnEncodingFunc &column_encoding_func) { + size_t buffer_len = 0; + + if (data_->GetBuffer() != nullptr) { + // warning here: better not call GetDataBuffer twice + // memcpy will happen in GetDataBuffer + data_->Clear(); + } + + buffer_len = MeasureDataBuffer(column_streams_func, column_encoding_func); + data_->Set(reinterpret_cast(cbdb::Palloc(buffer_len)), buffer_len, 0); + CombineDataBuffer(); + return data_; +} + +size_t PaxColumns::MeasureDataBuffer( + const ColumnStreamsFunc &column_streams_func, + const ColumnEncodingFunc &column_encoding_func) { + size_t buffer_len = 0; + + for (auto column : columns_) { + if (!column) { + continue; + } + + // has null will generate a bitmap in current stripe + if (column->HasNull()) { + size_t non_null_length = column->GetNulls()->Used(); + buffer_len += non_null_length; + column_streams_func(orc::proto::Stream_Kind_PRESENT, column->GetRows(), + non_null_length); + } + + size_t column_size = column->GetNonNullRows(); + + switch (column->GetPaxColumnTypeInMem()) { + case kTypeNonFixed: { + size_t lengths_size = column_size * sizeof(int64); + + buffer_len += lengths_size; + column_streams_func(orc::proto::Stream_Kind_LENGTH, column_size, + lengths_size); + + auto length_data = column->GetBuffer().second; + buffer_len += length_data; + + column_streams_func(orc::proto::Stream_Kind_DATA, column_size, + length_data); + + break; + } + case kTypeFixed: { + auto length_data = column->GetBuffer().second; + buffer_len += length_data; + column_streams_func(orc::proto::Stream_Kind_DATA, column_size, + length_data); + + break; + } + case kTypeInvalid: + default: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + break; + } + } + + column_encoding_func(column->GetEncodingType(), column->GetOriginLength()); + } + return buffer_len; +} + +void PaxColumns::CombineDataBuffer() { + char *buffer = nullptr; + size_t buffer_len = 0; + + for (auto column : columns_) { + if (!column) { + continue; + } + + if (column->HasNull()) { + auto null_data_buffer = column->GetNulls(); + size_t non_null_length = null_data_buffer->Used(); + + data_->Write(reinterpret_cast(null_data_buffer->GetBuffer()), + non_null_length); + data_->Brush(non_null_length); + } + + switch (column->GetPaxColumnTypeInMem()) { + case kTypeNonFixed: { + auto no_fixed_column = reinterpret_cast(column); + auto length_data_buffer = no_fixed_column->GetLengthBuffer(); + + memcpy(data_->GetAvailableBuffer(), length_data_buffer->GetBuffer(), + length_data_buffer->Used()); + data_->Brush(length_data_buffer->Used()); + + std::tie(buffer, buffer_len) = column->GetBuffer(); + data_->Write(buffer, buffer_len); + data_->Brush(buffer_len); + + break; + } + case kTypeFixed: { + std::tie(buffer, buffer_len) = column->GetBuffer(); + data_->Write(buffer, buffer_len); + data_->Brush(buffer_len); + break; + } + case kTypeInvalid: + default: + break; + } + } +} +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_columns.h b/contrib/pax_storage/src/cpp/storage/columns/pax_columns.h new file mode 100644 index 00000000000..c32fc29e441 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_columns.h @@ -0,0 +1,81 @@ +#pragma once +#include +#include +#include + +#include "exceptions/CException.h" +#include "storage/columns/pax_column.h" + +namespace pax { +// PaxColumns are similar to the kind_struct in orc +// It is designed to be nested and some interfaces have semantic differences +// Inheriting PaxCommColumn use to be able to nest itself +class PaxColumns : public PaxColumn { + public: + explicit PaxColumns(std::vector types, + std::vector column_encoding_types); + + PaxColumns(); + + ~PaxColumns() override; + + void Clear() override; + + PaxColumn *operator[](uint64 i); + + void Append(PaxColumn *column); + + void Append(char *buffer, size_t size) override; + + void Set(DataBuffer *data); + + size_t PhysicalSize() const override; + + int64 GetOriginLength() const override; + + int32 GetTypeLength() const override; + + // Get number of column in columns + virtual size_t GetColumns() const; + + // Get the combine buffer of all columns + std::pair GetBuffer() override; + + // Get the combine buffer of single column + std::pair GetBuffer(size_t position) override; + + std::pair GetRangeBuffer(size_t start_pos, + size_t len) override; + + size_t GetNonNullRows() const override; + + using ColumnStreamsFunc = + std::function; + + using ColumnEncodingFunc = + std::function; + + // Get the combined data buffer of all columns + // TODO(jiaqizho): consider add a new api which support split IO from + // different column + virtual DataBuffer *GetDataBuffer( + const ColumnStreamsFunc &column_streams_func, + const ColumnEncodingFunc &column_encoding_func); + + inline void AddRows(size_t row_num) { row_nums_ += row_num; } + inline size_t GetRows() override { return row_nums_; } + + protected: + virtual size_t MeasureDataBuffer( + const ColumnStreamsFunc &column_streams_func, + const ColumnEncodingFunc &column_encoding_func); + + virtual void CombineDataBuffer(); + + protected: + std::vector columns_; + DataBuffer *data_; + size_t row_nums_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc new file mode 100644 index 00000000000..bcef91401b3 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc @@ -0,0 +1,187 @@ +#include "storage/columns/pax_compress.h" + +#include "comm/cbdb_wrappers.h" +#include "zlib.h" // NOLINT +#include "zstd.h" // NOLINT + +namespace pax { + +PaxCompressor *PaxCompressor::CreateBlockCompressor( + const ColumnEncoding_Kind kind) { + PaxCompressor *compressor = nullptr; + switch (kind) { + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD: { + compressor = new PaxZSTDCompressor(); + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB: { + compressor = new PaxZlibCompressor(); + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + } + // two cases here: + // - `encoded type` is not a compress type. + // - `encoded type` is the no_encoding type. + default: { + // do nothing + } + } + + return compressor; +} + +PaxCompressor::PaxCompressorType PaxZSTDCompressor::GetCompressorType() const { + return PaxCompressorType::kTypeBlock; +} + +bool PaxZSTDCompressor::ShouldAlignBuffer() const { return false; } + +size_t PaxZSTDCompressor::GetCompressBound(size_t src_len) { + Assert(src_len > 0); + return ZSTD_compressBound(src_len); +} + +size_t PaxZSTDCompressor::Compress(void *dst_buff, size_t dst_cap, + void *src_buff, size_t src_len, int lvl) { + Assert(dst_buff); + Assert(dst_cap > 0); + Assert(src_buff); + Assert(src_len > 0); + + return ZSTD_compress(dst_buff, dst_cap, src_buff, src_len, lvl); +} + +size_t PaxZSTDCompressor::GetDecompressSize(const void *src_buff, + size_t src_len) { + Assert(src_buff); + Assert(src_len > 0); + + return ZSTD_getFrameContentSize(src_buff, src_len); +} + +size_t PaxZSTDCompressor::Decompress(void *dst_buff, size_t dst_len, + void *src_buff, size_t src_len) { + Assert(dst_buff); + Assert(dst_len > 0); + Assert(src_buff); + Assert(src_len > 0); + + return ZSTD_decompress(dst_buff, dst_len, src_buff, src_len); +} + +bool PaxZSTDCompressor::IsError(size_t code) { return ZSTD_isError(code); } + +const char *PaxZSTDCompressor::ErrorName(size_t code) { + return ZSTD_getErrorName(code); +} + +PaxCompressor::PaxCompressorType PaxZlibCompressor::GetCompressorType() const { + return PaxCompressorType::kTypeStreaming; +} + +bool PaxZlibCompressor::ShouldAlignBuffer() const { return false; } + +size_t PaxZlibCompressor::GetCompressBound(size_t src_len) { + return compressBound(src_len); +} + +size_t PaxZlibCompressor::Compress(void *dst_buff, size_t dst_cap, + void *src_buff, size_t src_len, int lvl) { + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = reinterpret_cast(0); + c_stream.zfree = reinterpret_cast(0); + c_stream.opaque = reinterpret_cast(0); + + err = deflateInit(&c_stream, lvl); + if (err != Z_OK) { + goto error; + } + + c_stream.next_in = reinterpret_cast(src_buff); + c_stream.next_out = reinterpret_cast(dst_buff); + + while (c_stream.total_in != src_len && c_stream.total_out < dst_cap) { + /* force small buffers */ + c_stream.avail_in = c_stream.avail_out = 1; + err = deflate(&c_stream, Z_NO_FLUSH); + if (err != Z_OK) { + goto error; + } + } + + while (true) { + c_stream.avail_out = 1; + err = deflate(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + if (err != Z_OK) { + goto error; + } + } + + err = deflateEnd(&c_stream); + if (err != Z_OK) { + goto error; + } + + return c_stream.total_out; +error: + err_msg_ = c_stream.msg; + return err; +} + +size_t PaxZlibCompressor::GetDecompressSize(const void * /*src_buff*/, + size_t /*src_len*/) { + return -1; +} + +size_t PaxZlibCompressor::Decompress(void *dst_buff, size_t dst_cap, + void *src_buff, size_t src_len) { + int err; + z_stream d_stream; /* decompression stream */ + + d_stream.zalloc = reinterpret_cast(0); + d_stream.zfree = reinterpret_cast(0); + d_stream.opaque = reinterpret_cast(0); + + d_stream.next_in = reinterpret_cast(src_buff); + d_stream.avail_in = 0; + d_stream.next_out = reinterpret_cast(dst_buff); + + err = inflateInit(&d_stream); + if (err != Z_OK) { + goto error; + } + + while (d_stream.total_out < dst_cap && d_stream.total_in < src_len) { + d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + if (err != Z_OK) { + goto error; + } + } + + err = inflateEnd(&d_stream); + if (err != Z_OK) { + goto error; + } + + return d_stream.total_out; +error: + err_msg_ = d_stream.msg; + return err; +} + +bool PaxZlibCompressor::IsError(size_t code) { + return code != Z_OK && !err_msg_.empty(); +} + +const char *PaxZlibCompressor::ErrorName(size_t /*code*/) { + return err_msg_.c_str(); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.h b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.h new file mode 100644 index 00000000000..f183f5a4762 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.h @@ -0,0 +1,106 @@ +#pragma once + +#include + +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/singleton.h" +#include "storage/columns/pax_encoding_utils.h" + +namespace pax { + +class PaxCompressor { + public: + enum PaxCompressorType { + kTypeInvalid = 1, // + kTypeStreaming = 2, // + kTypeBlock = 3 + }; + + PaxCompressor() = default; + + virtual ~PaxCompressor() = default; + + virtual bool ShouldAlignBuffer() const = 0; + + virtual PaxCompressorType GetCompressorType() const = 0; + + virtual size_t GetCompressBound(size_t src_len) = 0; + + virtual size_t Compress(void *dst_buff, size_t dst_cap, void *src_buff, + size_t src_len, int lvl) = 0; + + virtual size_t GetDecompressSize(const void *src_buff, size_t src_len) = 0; + + virtual size_t Decompress(void *dst_buff, size_t dst_len, void *src_buff, + size_t src_len) = 0; + + virtual bool IsError(size_t code) = 0; + + virtual const char *ErrorName(size_t code) = 0; + + /** + * block compress + * + * it has similar interface with `CreateStreamingEncoder` + * but the timing of compression/decompression method calls is different from + * encoding/decoding. + */ + static PaxCompressor *CreateBlockCompressor(ColumnEncoding_Kind kind); +}; + +class PaxZSTDCompressor final : public PaxCompressor { + public: + PaxZSTDCompressor() = default; + + ~PaxZSTDCompressor() override = default; + + PaxCompressor::PaxCompressorType GetCompressorType() const override; + + bool ShouldAlignBuffer() const override; + + size_t GetCompressBound(size_t src_len) override; + + size_t Compress(void *dst_buff, size_t dst_cap, void *src_buff, + size_t src_len, int lvl) override; + + size_t GetDecompressSize(const void *src_buff, size_t src_len) override; + + size_t Decompress(void *dst_buff, size_t dst_len, void *src_buff, + size_t src_len) override; + + bool IsError(size_t code) override; + + const char *ErrorName(size_t code) override; +}; + +class PaxZlibCompressor final : public PaxCompressor { + public: + PaxZlibCompressor() = default; + + ~PaxZlibCompressor() override = default; + + PaxCompressor::PaxCompressorType GetCompressorType() const override; + + bool ShouldAlignBuffer() const override; + + size_t GetCompressBound(size_t src_len) override; + + size_t Compress(void *dst_buff, size_t dst_cap, void *src_buff, + size_t src_len, int lvl) override; + + size_t GetDecompressSize(const void *src_buff, size_t src_len) override; + + size_t Decompress(void *dst_buff, size_t dst_cap, void *src_buff, + size_t src_len) override; + + bool IsError(size_t code) override; + + const char *ErrorName(size_t code) override; + + private: + std::string err_msg_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_compress_test.cc new file mode 100644 index 00000000000..de6e4adbbed --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress_test.cc @@ -0,0 +1,70 @@ +#include "storage/columns/pax_compress.h" + +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" +#include "exceptions/CException.h" +#include "storage/columns/pax_encoding_utils.h" + +namespace pax::tests { +class PaxCompressTest : public ::testing::TestWithParam< + ::testing::tuple> { + void SetUp() override { + MemoryContext pax_compress_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 200 * 1024 * 1024, + 200 * 1024 * 1024, 200 * 1024 * 1024); + + MemoryContextSwitchTo(pax_compress_memory_context); + } +}; + +TEST_P(PaxCompressTest, TestCompressAndDecompress) { + ColumnEncoding_Kind type = ::testing::get<0>(GetParam()); + uint32 data_len = ::testing::get<1>(GetParam()); + size_t dst_len = 0; + PaxCompressor *compressor; + + char *data = reinterpret_cast(cbdb::Palloc(data_len)); + char *result_data = reinterpret_cast(cbdb::Palloc(data_len)); + for (size_t i = 0; i < data_len; ++i) { + data[i] = i; + } + + compressor = PaxCompressor::CreateBlockCompressor(type); + + size_t bound_size = compressor->GetCompressBound(data_len); // NOLINT + ASSERT_GT(bound_size, 0); + result_data = + reinterpret_cast(cbdb::RePalloc(result_data, bound_size)); + dst_len = bound_size; + dst_len = compressor->Compress(result_data, dst_len, data, data_len, 1); + ASSERT_FALSE(compressor->IsError(dst_len)); + ASSERT_GT(dst_len, 0); + + // reset data + for (size_t i = 0; i < data_len; ++i) { + data[i] = 0; + } + + size_t decompress_len = + compressor->Decompress(data, data_len, result_data, dst_len); + ASSERT_GT(decompress_len, 0); + ASSERT_EQ(decompress_len, data_len); + for (size_t i = 0; i < data_len; ++i) { + ASSERT_EQ(data[i], (char)i); + } + + delete compressor; + delete data; + delete result_data; +} + +INSTANTIATE_TEST_CASE_P( + PaxCompressTestCombined, PaxCompressTest, + testing::Combine(testing::Values(ColumnEncoding_Kind_COMPRESS_ZSTD, + ColumnEncoding_Kind_COMPRESS_ZLIB), + testing::Values(1, 128, 4096, 1024 * 1024, + 64 * 1024 * 1024))); + +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc new file mode 100644 index 00000000000..b1f73dbdbb1 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.cc @@ -0,0 +1,44 @@ +#include "storage/columns/pax_decoding.h" + +#include "storage/columns/pax_rlev2_decoding.h" + +namespace pax { + +template +PaxDecoder *PaxDecoder::CreateDecoder(const DecodingOption &decoder_options) { + PaxDecoder *decoder = nullptr; + switch (decoder_options.column_encode_type) { + case ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED: { + // do nothing + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2: { + decoder = new PaxOrcDecoder(decoder_options); + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA: { + /// TODO(jiaqizho) support it + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD: + case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB: + default: { + // do nothing + } + } + + return decoder; +} + +template PaxDecoder *PaxDecoder::CreateDecoder(const DecodingOption &); +template PaxDecoder *PaxDecoder::CreateDecoder(const DecodingOption &); +template PaxDecoder *PaxDecoder::CreateDecoder(const DecodingOption &); +template PaxDecoder *PaxDecoder::CreateDecoder(const DecodingOption &); + +PaxDecoder::PaxDecoder(const DecodingOption &decoder_options) + : decoder_options_(decoder_options) {} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.h new file mode 100644 index 00000000000..8fac8f01792 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_decoding.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/pax_buffer.h" + +namespace pax { + +class PaxDecoder { + public: + struct DecodingOption { + ColumnEncoding_Kind column_encode_type; + bool is_sign; + + DecodingOption() + : column_encode_type( + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED), + is_sign(true) {} + }; + + explicit PaxDecoder(const DecodingOption &decoder_options); + + virtual ~PaxDecoder() = default; + + virtual PaxDecoder *SetSrcBuffer(char *data, size_t data_len) = 0; + + virtual PaxDecoder *SetDataBuffer(DataBuffer *result_buffer) = 0; + + virtual size_t Next(const char *not_null) = 0; + + virtual size_t Decoding() = 0; + + virtual size_t Decoding(const char *not_null, size_t not_null_len) = 0; + + virtual const char *GetBuffer() const = 0; + + virtual size_t GetBufferSize() const = 0; + + template + static PaxDecoder *CreateDecoder(const DecodingOption &decoder_options); + + protected: + const DecodingOption &decoder_options_; +}; + +extern template PaxDecoder *PaxDecoder::CreateDecoder( + const DecodingOption &); +extern template PaxDecoder *PaxDecoder::CreateDecoder( + const DecodingOption &); +extern template PaxDecoder *PaxDecoder::CreateDecoder( + const DecodingOption &); +extern template PaxDecoder *PaxDecoder::CreateDecoder( + const DecodingOption &); + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc new file mode 100644 index 00000000000..e039220b65e --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.cc @@ -0,0 +1,49 @@ +#include "storage/columns/pax_encoding.h" + +#include + +#include "storage/columns/pax_rlev2_encoding.h" + +namespace pax { + +PaxEncoder *PaxEncoder::CreateStreamingEncoder( + const EncodingOption &encoder_options) { + PaxEncoder *encoder = nullptr; + switch (encoder_options.column_encode_type) { + case ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2: { + encoder = new PaxOrcEncoder(std::move(encoder_options)); + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_DIRECT_DELTA: { + // TODO(jiaqizho): support direct delta encoding + // not support yet, then direct return a nullptr(means no encoding) + break; + } + case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + } + // two cases here: + // - `encoded type` is not a encoding type. + // - `encoded type` is the no_encoding type. + default: { + // do nothing + } + } + + return encoder; +} + +PaxEncoder::PaxEncoder(const EncodingOption &encoder_options) + : encoder_options_(encoder_options), result_buffer_(nullptr) {} + +void PaxEncoder::SetDataBuffer(DataBuffer *result_buffer) { + Assert(!result_buffer_ && result_buffer); + Assert(result_buffer->IsMemTakeOver()); + result_buffer_ = result_buffer; +} + +char *PaxEncoder::GetBuffer() const { return result_buffer_->GetBuffer(); } + +size_t PaxEncoder::GetBufferSize() const { return result_buffer_->Used(); } + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h new file mode 100644 index 00000000000..7bcb8d1707f --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/pax_buffer.h" + +namespace pax { + +class PaxEncoder { + public: + struct EncodingOption { + ColumnEncoding_Kind column_encode_type; + bool is_sign; + int compress_lvl; + + EncodingOption() + : column_encode_type( + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED), + is_sign(true), + compress_lvl(0) {} + }; + + public: + explicit PaxEncoder(const EncodingOption &encoder_options); + + void SetDataBuffer(DataBuffer *result_buffer); + + virtual ~PaxEncoder() = default; + + virtual void Append(int64 data) = 0; + + virtual void Flush() = 0; + + virtual char *GetBuffer() const; + + virtual size_t GetBufferSize() const; + + /** + * steaming encoder + * + * streaming means it need hold two DataBuffers + * - one of DataBuffer used to temp save buffer + * - one of DataBuffer used to keep result + * + * compared with the block method, streaming can reduce one memory copy + */ + static PaxEncoder *CreateStreamingEncoder( + const EncodingOption &encoder_options); + + protected: + const EncodingOption &encoder_options_; + DataBuffer *result_buffer_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.cc new file mode 100644 index 00000000000..12eba3d3818 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.cc @@ -0,0 +1,274 @@ +#include "storage/columns/pax_encoding_column.h" + +#include "storage/proto/proto_wrappers.h" +namespace pax { + +template +PaxEncodingColumn::PaxEncodingColumn( + uint64 capacity, const PaxEncoder::EncodingOption &encoding_option) + : PaxCommColumn(capacity), + encoder_options_(encoding_option), + encoder_(nullptr), + origin_len_(NO_ENCODE_ORIGIN_LEN), + non_null_rows_(0), + decoder_(nullptr), + shared_data_(nullptr), + compressor_(nullptr), + compress_route_(true) {} + +template +PaxEncodingColumn::PaxEncodingColumn( + uint64 capacity, const PaxDecoder::DecodingOption &decoding_option) + : PaxCommColumn(capacity), + encoder_(nullptr), + origin_len_(NO_ENCODE_ORIGIN_LEN), + non_null_rows_(0), + decoder_options_{decoding_option}, + decoder_(nullptr), + shared_data_(nullptr), + compressor_(nullptr), + compress_route_(false) {} + +template +PaxEncodingColumn::~PaxEncodingColumn() { + delete encoder_; + delete decoder_; + delete shared_data_; + delete compressor_; +} + +template +void PaxEncodingColumn::InitEncoder() { + if (encoder_options_.column_encode_type == + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED) { + encoder_options_.column_encode_type = GetDefaultColumnType(); + } + + PaxColumn::encoded_type_ = encoder_options_.column_encode_type; + + // Create a streaming encoder + // If current `encoded_type_` can not create a streaming encoder, + // `CreateStreamingEncoder` will return a nullptr. This may be + // caused by three scenarios: + // - `encoded_type_` is not a encoding type. + // - `encoded_type_` is a encoding type, but not support it yet. + // - `encoded_type_` is no_encoding type. + // + // Not allow pass `default`type` of `encoded_type_` into + // `CreateStreamingEncoder`, caller should change it before create a encoder. + encoder_ = PaxEncoder::CreateStreamingEncoder(encoder_options_); + + if (encoder_) { + origin_len_ = 0; + // The memory owner change to `shared_data_` + // Because PaxEncodingColumn can not predict when to resize the memory. + // Should allow call memory resize in the encoding. + PaxCommColumn::data_->SetMemTakeOver(false); + shared_data_ = new DataBuffer(*PaxCommColumn::data_); + shared_data_->SetMemTakeOver(true); + + encoder_->SetDataBuffer(shared_data_); + } else { + // Create a block compressor + // Compressor have a different interface with pax encoder + // If no pax encoder no provided, then try to create a compressor. + compressor_ = + PaxCompressor::CreateBlockCompressor(PaxColumn::encoded_type_); + + // can't find any encoder or compressor + // then should reset encode type + // or will got origin length is -1 but still have encode type + if (!compressor_) { + PaxColumn::encoded_type_ = + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED; + } + } +} + +template +void PaxEncodingColumn::InitDecoder() { + Assert(decoder_options_.column_encode_type != + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED); + PaxColumn::encoded_type_ = decoder_options_.column_encode_type; + + decoder_ = PaxDecoder::CreateDecoder(decoder_options_); + if (decoder_) { + shared_data_ = new DataBuffer(*PaxCommColumn::data_); + decoder_->SetDataBuffer(shared_data_); + // still need set source data buffer in `Set` + } else { + compressor_ = + PaxCompressor::CreateBlockCompressor(PaxColumn::encoded_type_); + if (compressor_) { + PaxCommColumn::data_->SetMemTakeOver(false); + shared_data_ = new DataBuffer(*PaxCommColumn::data_); + shared_data_->SetMemTakeOver(true); + } + } +} + +template +void PaxEncodingColumn::Set(DataBuffer *data) { + if (decoder_) { + // should not decoding null + if (data->Used() != 0) { + Assert(shared_data_); + decoder_->SetSrcBuffer(data->Start(), data->Used()); + // should not setting null bitmap until vec version + decoder_->Decoding(nullptr, 0); + } + + Assert(!data->IsMemTakeOver()); + delete data; + } else if (compressor_) { + if (data->Used() != 0) { + Assert(shared_data_); + size_t d_size = compressor_->Decompress(shared_data_->Start(), + shared_data_->Capacity(), + data->Start(), data->Used()); + if (compressor_->IsError(d_size)) { + // log error with `compressor_->ErrorName(d_size)` + CBDB_RAISE(cbdb::CException::ExType::kExTypeCompressError); + } + + shared_data_->Brush(d_size); + } + + // FIXME(jiaqizho): DataBuffer copy should change to ptr copy + // Then we don't need update back `data_` + PaxCommColumn::data_->Reset(); + PaxCommColumn::data_->Set(shared_data_->Start(), + shared_data_->Capacity(), 0); + PaxCommColumn::data_->Brush(shared_data_->Used()); + + Assert(!data->IsMemTakeOver()); + delete data; + } else { + PaxCommColumn::Set(data); + } +} + +template +std::pair PaxEncodingColumn::GetBuffer(size_t position) { + CBDB_CHECK(!encoder_, cbdb::CException::ExType::kExTypeLogicError); + + if (decoder_) { + Assert(shared_data_); + CBDB_CHECK(position < shared_data_->Used() / sizeof(T), + cbdb::CException::ExType::kExTypeOutOfRange); + + return std::make_pair(shared_data_->Start() + (sizeof(T) * position), + sizeof(T)); + } + return PaxCommColumn::GetBuffer(position); +} + +template +std::pair PaxEncodingColumn::GetBuffer() { + if (encoder_) { + encoder_->Flush(); + } + + if (shared_data_) { + return std::make_pair(shared_data_->Start(), shared_data_->Used()); + } else if (compressor_ && !shared_data_ && compress_route_) { + // all null field should not compress + if (PaxCommColumn::data_->Used() == 0) { + return PaxCommColumn::GetBuffer(); + } else { + size_t bound_size = + compressor_->GetCompressBound(PaxCommColumn::data_->Used()); + shared_data_ = new DataBuffer(bound_size); + + size_t c_size = compressor_->Compress( + shared_data_->Start(), shared_data_->Capacity(), + PaxCommColumn::data_->Start(), PaxCommColumn::data_->Used(), + encoder_options_.compress_lvl); + + if (compressor_->IsError(c_size)) { + // log error with `compressor_->ErrorName(c_size)` + CBDB_RAISE(cbdb::CException::ExType::kExTypeCompressError); + } + + shared_data_->Brush(c_size); + return std::make_pair(shared_data_->Start(), shared_data_->Used()); + } + } else { + return PaxCommColumn::GetBuffer(); + } + + // unreach + Assert(false); +} + +template +std::pair PaxEncodingColumn::GetRangeBuffer(size_t start_pos, + size_t len) { + CBDB_CHECK(!encoder_, cbdb::CException::ExType::kExTypeLogicError); + + if (decoder_) { + Assert(shared_data_); + CBDB_CHECK((start_pos + len) <= GetNonNullRows(), + cbdb::CException::ExType::kExTypeOutOfRange); + return std::make_pair(shared_data_->Start() + (sizeof(T) * start_pos), + sizeof(T) * len); + } + + return PaxCommColumn::GetRangeBuffer(start_pos, len); +} + +template +void PaxEncodingColumn::Append(char *buffer, size_t size) { + Assert(size == sizeof(T)); + if (encoder_) { + // Should not call `PaxCommColumn::Append`, + // but still need call `PaxColumn::Append` to push null bitmap. + PaxColumn::Append(buffer, size); // NOLINT + + non_null_rows_++; + origin_len_ += size; + encoder_->Append(*reinterpret_cast(buffer)); + if (shared_data_->Capacity() != PaxCommColumn::capacity_) { + PaxCommColumn::capacity_ = shared_data_->Capacity(); + } + return; + } + + PaxCommColumn::Append(buffer, size); +} + +template +int64 PaxEncodingColumn::GetOriginLength() const { + return compressor_ ? PaxCommColumn::data_->Used() : origin_len_; +} + +template +size_t PaxEncodingColumn::GetNonNullRows() const { + if (decoder_) { + // must be decoded + Assert(shared_data_); + return shared_data_->Used() / sizeof(T); + } + + if (encoder_) { + return non_null_rows_; + } + + return PaxCommColumn::GetNonNullRows(); +} + +template +size_t PaxEncodingColumn::PhysicalSize() const { + if (shared_data_) { + return shared_data_->Used(); + } + + return PaxCommColumn::PhysicalSize(); +} + +template class PaxEncodingColumn; +template class PaxEncodingColumn; +template class PaxEncodingColumn; +template class PaxEncodingColumn; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.h new file mode 100644 index 00000000000..626621865e8 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.h @@ -0,0 +1,63 @@ +#pragma once +#include "storage/columns/pax_columns.h" +#include "storage/columns/pax_compress.h" +#include "storage/columns/pax_decoding.h" +#include "storage/columns/pax_encoding.h" + +namespace pax { + +template +class PaxEncodingColumn : public PaxCommColumn { + public: + PaxEncodingColumn(uint64 capacity, + const PaxEncoder::EncodingOption &encoding_option); + + PaxEncodingColumn(uint64 capacity, + const PaxDecoder::DecodingOption &decoding_option); + + ~PaxEncodingColumn() override; + + void Set(DataBuffer *data) override; + + void Append(char *buffer, size_t size) override; + + std::pair GetBuffer(size_t position) override; + + std::pair GetBuffer() override; + + std::pair GetRangeBuffer(size_t start_pos, + size_t len) override; + + size_t GetNonNullRows() const override; + + int64 GetOriginLength() const override; + + size_t PhysicalSize() const override; + + protected: + void InitEncoder(); + + void InitDecoder(); + + virtual ColumnEncoding_Kind GetDefaultColumnType() = 0; + + protected: + PaxEncoder::EncodingOption encoder_options_; + PaxEncoder *encoder_; + uint64 origin_len_; + uint64 non_null_rows_; + + PaxDecoder::DecodingOption decoder_options_; + PaxDecoder *decoder_; + DataBuffer *shared_data_; + + PaxCompressor *compressor_; + bool compress_route_; +}; + +extern template class PaxEncodingColumn; +extern template class PaxEncodingColumn; +extern template class PaxEncodingColumn; +extern template class PaxEncodingColumn; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc new file mode 100644 index 00000000000..e7f190f9445 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc @@ -0,0 +1,126 @@ +#include "storage/columns/pax_encoding_non_fixed_column.h" + +namespace pax { + +PaxNonFixedEncodingColumn::PaxNonFixedEncodingColumn( + uint64 capacity, const PaxEncoder::EncodingOption &encoder_options) + : PaxNonFixedColumn(capacity), + encoder_options_(encoder_options), + compressor_(nullptr), + compress_route_(true), + shared_data_(nullptr) { + if (encoder_options.column_encode_type == + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED) { + encoder_options_.column_encode_type = ColumnEncoding_Kind_COMPRESS_ZSTD; + } + + PaxColumn::encoded_type_ = encoder_options_.column_encode_type; + compressor_ = PaxCompressor::CreateBlockCompressor(PaxColumn::encoded_type_); + if (!compressor_) { + PaxColumn::encoded_type_ = + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED; + } +} + +PaxNonFixedEncodingColumn::PaxNonFixedEncodingColumn( + uint64 capacity, const PaxDecoder::DecodingOption &decoding_option) + : PaxNonFixedColumn(capacity), + decoder_options_(decoding_option), + compressor_(nullptr), + compress_route_(false), + shared_data_(nullptr) { + Assert(decoder_options_.column_encode_type != + ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED); + PaxColumn::encoded_type_ = decoder_options_.column_encode_type; + compressor_ = PaxCompressor::CreateBlockCompressor(PaxColumn::encoded_type_); + if (compressor_) { + PaxNonFixedColumn::data_->SetMemTakeOver(false); + shared_data_ = new DataBuffer(*PaxNonFixedColumn::data_); + shared_data_->SetMemTakeOver(true); + } +} + +PaxNonFixedEncodingColumn::~PaxNonFixedEncodingColumn() { + delete compressor_; + delete shared_data_; +} + +void PaxNonFixedEncodingColumn::Set(DataBuffer *data, + DataBuffer *lengths, + size_t total_size) { + if (compressor_) { + Assert(shared_data_); + + // still need update origin logic + if (lengths_) { + delete lengths_; + } + + estimated_size_ = total_size; + lengths_ = lengths; + offsets_.clear(); + for (size_t i = 0; i < lengths_->GetSize(); i++) { + offsets_.emplace_back(i == 0 ? 0 : offsets_[i - 1] + (*lengths_)[i - 1]); + } + + if (data->Used() != 0) { + auto d_size = compressor_->Decompress(shared_data_->Start(), + shared_data_->Capacity(), + data->Start(), data->Used()); + if (compressor_->IsError(d_size)) { + // log error with `compressor_->ErrorName(d_size)` + CBDB_RAISE(cbdb::CException::ExType::kExTypeCompressError); + } + shared_data_->Brush(d_size); + } + + // FIXME(jiaqizho): DataBuffer copy should change to ptr copy + // Then we don't need update back `data_` + PaxNonFixedColumn::data_->Reset(); + PaxNonFixedColumn::data_->Set(shared_data_->Start(), + shared_data_->Capacity(), 0); + PaxNonFixedColumn::data_->Brush(shared_data_->Used()); + + Assert(!data->IsMemTakeOver()); + delete data; + } else { + PaxNonFixedColumn::Set(data, lengths, total_size); + } +} + +std::pair PaxNonFixedEncodingColumn::GetBuffer() { + if (shared_data_) { + return std::make_pair(shared_data_->Start(), shared_data_->Used()); + } else if (compressor_ && !shared_data_ && compress_route_) { + if (PaxNonFixedColumn::data_->Used() == 0) { + return PaxNonFixedColumn::GetBuffer(); + } else { + size_t bound_size = + compressor_->GetCompressBound(PaxNonFixedColumn::data_->Used()); + shared_data_ = new DataBuffer(bound_size); + + auto c_size = compressor_->Compress( + shared_data_->Start(), shared_data_->Capacity(), + PaxNonFixedColumn::data_->Start(), PaxNonFixedColumn::data_->Used(), + encoder_options_.compress_lvl); + + if (compressor_->IsError(c_size)) { + // log error with `compressor_->ErrorName(d_size)` + CBDB_RAISE(cbdb::CException::ExType::kExTypeCompressError); + } + shared_data_->Brush(c_size); + return std::make_pair(shared_data_->Start(), shared_data_->Used()); + } + } else { + return PaxNonFixedColumn::GetBuffer(); + } + + // unreach + Assert(false); +} + +int64 PaxNonFixedEncodingColumn::GetOriginLength() const { + return compressor_ ? PaxNonFixedColumn::data_->Used() : NO_ENCODE_ORIGIN_LEN; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h new file mode 100644 index 00000000000..6de603a4ea8 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h @@ -0,0 +1,40 @@ +#pragma once +#include "storage/columns/pax_columns.h" +#include "storage/columns/pax_compress.h" +#include "storage/columns/pax_decoding.h" +#include "storage/columns/pax_encoding.h" + +namespace pax { + +class PaxNonFixedEncodingColumn final : public PaxNonFixedColumn { + public: + PaxNonFixedEncodingColumn(uint64 capacity, + const PaxEncoder::EncodingOption &encoder_options); + + PaxNonFixedEncodingColumn(uint64 capacity, + const PaxDecoder::DecodingOption &decoding_option); + + ~PaxNonFixedEncodingColumn() override; + + void Set(DataBuffer *data, DataBuffer *lengths, + size_t total_size) override; + + std::pair GetBuffer() override; + + int64 GetOriginLength() const override; + + // The reason why `PaxNonFixedEncodingColumn` not override the + // method `GetRangeBuffer` and `GetNonNullRows` is that + // `PaxNonFixedEncodingColumn` don't have any streaming encoding, also + // `shared_data_` will own the same buffer with `PaxNonFixedColumn::data_`. + + protected: + PaxEncoder::EncodingOption encoder_options_; + PaxDecoder::DecodingOption decoder_options_; + + PaxCompressor *compressor_; + bool compress_route_; + DataBuffer *shared_data_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc new file mode 100644 index 00000000000..fa51aed2510 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_test.cc @@ -0,0 +1,1449 @@ +#include "storage/columns/pax_encoding.h" + +#include +#include +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" +#include "exceptions/CException.h" +#include "storage/columns/pax_decoding.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/columns/pax_rlev2_decoding.h" +#include "storage/columns/pax_rlev2_encoding.h" + +namespace pax::tests { + +PaxDecoder *GetDecoderByBits( + uint8 data_bits, DataBuffer *shared_data, + const PaxDecoder::DecodingOption &decoder_options) { + PaxDecoder *decoder = nullptr; + switch (data_bits) { + case 8: + decoder = PaxDecoder::CreateDecoder(decoder_options); + break; + case 16: + decoder = PaxDecoder::CreateDecoder(decoder_options); + break; + case 32: + decoder = PaxDecoder::CreateDecoder(decoder_options); + break; + case 64: + decoder = PaxDecoder::CreateDecoder(decoder_options); + break; + default: + decoder = nullptr; + break; + } + + if (decoder) + decoder->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + return decoder; +} + +class PaxEncodingTest : public ::testing::Test { + void SetUp() override { + MemoryContext pax_encoding_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 200 * 1024 * 1024, + 200 * 1024 * 1024, 200 * 1024 * 1024); + + MemoryContextSwitchTo(pax_encoding_memory_context); + } +}; + +class PaxEncodingRangeTest + : public ::testing::TestWithParam<::testing::tuple> { + void SetUp() override { + MemoryContext pax_encoding_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 1 * 1024 * 1024, + 1 * 1024 * 1024, 1 * 1024 * 1024); + + MemoryContextSwitchTo(pax_encoding_memory_context); + } +}; + +class PaxEncodingRangeWithBitsTest + : public ::testing::TestWithParam<::testing::tuple> { + public: + void SetUp() override { + MemoryContext pax_encoding_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 1 * 1024 * 1024, + 1 * 1024 * 1024, 1 * 1024 * 1024); + + MemoryContextSwitchTo(pax_encoding_memory_context); + } +}; + +class PaxEncodingShortRepeatRangeTest : public PaxEncodingRangeWithBitsTest {}; +class PaxEncodingDeltaRangeTest : public PaxEncodingRangeWithBitsTest {}; +class PaxEncodingWriteReadLongsRangeTest : public PaxEncodingRangeTest {}; + +class PaxEncodingDeltaIncDecRangeTest + : public ::testing::TestWithParam<::testing::tuple> { + void SetUp() override { + MemoryContext pax_encoding_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 1 * 1024 * 1024, + 1 * 1024 * 1024, 1 * 1024 * 1024); + + MemoryContextSwitchTo(pax_encoding_memory_context); + } +}; + +class PaxEncodingDirectRangeTest : public PaxEncodingDeltaIncDecRangeTest {}; + +class PaxEncodingRawDataTest + : public testing::TestWithParam< + ::testing::tuple, uint8>> { + void SetUp() override { + MemoryContext pax_encoding_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxCompressTestMemoryContext", 200 * 1024 * 1024, + 200 * 1024 * 1024, 200 * 1024 * 1024); + + MemoryContextSwitchTo(pax_encoding_memory_context); + } +}; + +class PaxEncodingPBTest : public PaxEncodingRawDataTest {}; + +TEST_F(PaxEncodingTest, TestPaxUntreatedBuffer) { + auto *data_buffer = new UntreatedDataBuffer(1024); + + for (size_t i = 0; i < 100; i++) { + data_buffer->Write(i); + data_buffer->Brush(1); + data_buffer->BrushUnTreated(1); + } + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 100); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + for (size_t i = 0; i < 100; i++) { + data_buffer->Write(2); + data_buffer->Brush(1); + } + + EXPECT_EQ(data_buffer->Used(), 200); + EXPECT_EQ(data_buffer->UnTreated(), 100); + EXPECT_EQ(data_buffer->UnTouched(), 100); + + data_buffer->TreatedAll(); + + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 0); + EXPECT_EQ(data_buffer->UnTouched(), 100); + + for (size_t i = 0; i < 100; i++) { + EXPECT_EQ((*data_buffer)[i], (char)2); + } + + data_buffer->BrushUnTreatedAll(); + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 100); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->TreatedAll(); + EXPECT_EQ(data_buffer->Used(), 0); + EXPECT_EQ(data_buffer->UnTreated(), 0); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->BrushUnTreatedAll(); + EXPECT_EQ(data_buffer->Used(), 0); + EXPECT_EQ(data_buffer->UnTreated(), 0); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->Brush(100); + data_buffer->BrushUnTreated(100); + + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 100); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->ReSize(2048); + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 100); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->Brush(2048 - 100); + data_buffer->BrushUnTreated(2048 - 100); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->ReSize(2148); + EXPECT_EQ(data_buffer->Used(), 2048); + EXPECT_EQ(data_buffer->UnTreated(), 2048); + EXPECT_EQ(data_buffer->UnTouched(), 0); + + data_buffer->Brush(100); + EXPECT_EQ(data_buffer->Used(), 2148); + EXPECT_EQ(data_buffer->UnTreated(), 2048); + EXPECT_EQ(data_buffer->UnTouched(), 100); + + data_buffer->ReSize(2248); + EXPECT_EQ(data_buffer->Used(), 2148); + EXPECT_EQ(data_buffer->UnTreated(), 2048); + EXPECT_EQ(data_buffer->UnTouched(), 100); + + delete data_buffer; +} + +TEST_F(PaxEncodingTest, TestPaxTreatedBuffer) { + char data[100]; + auto *data_buffer = new TreatedDataBuffer(data, 100); + + for (size_t i = 0; i < 100; i++) { + data[i] = i; + } + + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->Treated(), 0); + EXPECT_EQ(data_buffer->UnTreated(), 100); + + data_buffer->BrushTreated(100); + EXPECT_EQ(data_buffer->Used(), 100); + EXPECT_EQ(data_buffer->Treated(), 100); + EXPECT_EQ(data_buffer->UnTreated(), 0); + + delete data_buffer; +} + +TEST_P(PaxEncodingShortRepeatRangeTest, TestOrcShortRepeatEncoding) { + PaxEncoder *encoder; + int64 *data; + auto shared_data = new DataBuffer(1024); + auto shared_dst_data = new DataBuffer(10240); + auto sr_len = ::testing::get<0>(GetParam()); + auto sign = ::testing::get<1>(GetParam()); + auto data_bits = ::testing::get<2>(GetParam()); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(sizeof(int64))); + *data = sign ? -2 : 2; + for (size_t i = 0; i < sr_len; i++) { + encoder->Append(*data); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_EQ(encoder->GetBufferSize(), 2); + + auto encoding_buff = encoder->GetBuffer(); + // type(2 bytes): 0 + // type len(3 bytes) + // len(3 bytes) + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kShortRepeat); + EXPECT_EQ(encoding_buff[0] & 0x07, sr_len - ORC_MIN_REPEAT); + EXPECT_EQ(((encoding_buff[0] >> 3) & 0x07) + 1, 1); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + + PaxDecoder *decoder = + GetDecoderByBits(data_bits, shared_data, std::move(decoder_options)); + EXPECT_TRUE(decoder); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), sr_len * data_bits / 8); + + switch (data_bits) { + case 8: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < sr_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 16: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < sr_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 32: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < sr_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 64: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < sr_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + default: + break; + } + + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P(PaxEncodingRangeTestCombine, + PaxEncodingShortRepeatRangeTest, + testing::Combine(testing::Values(3, 4, 5, 6, 7, 8, 9, + 10), + testing::Values(true, false), + testing::Values(8, 16, 32, 64))); + +TEST_P(PaxEncodingDeltaRangeTest, TestOrcDeltaEncoding) { + PaxEncoder *encoder; + int64 *data; + auto delta_len = ::testing::get<0>(GetParam()); + auto sign = ::testing::get<1>(GetParam()); + auto data_bits = ::testing::get<2>(GetParam()); + + auto shared_data = new DataBuffer(delta_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(delta_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(sizeof(int64))); + *data = sign ? -10 : 1; + for (size_t i = 0; i < delta_len; i++) { + encoder->Append(*data); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_EQ(encoder->GetBufferSize(), 4); + + // type(2 bytes): 0 + // type len(5 bytes) + // len(9 bytes) + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + EXPECT_EQ((encoding_buff[0] >> 1) & 0x1f, 0); + EXPECT_EQ(((encoding_buff[0] & 0x01) << 8) | (unsigned char)encoding_buff[1], + delta_len - 1); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + + PaxDecoder *decoder = + GetDecoderByBits(data_bits, shared_data, std::move(decoder_options)); + EXPECT_TRUE(decoder); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), delta_len * data_bits / 8); + + switch (data_bits) { + case 8: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 16: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 32: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + case 64: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], *data); + } + delete result_dst_data; + break; + } + default: + break; + } + + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P(PaxEncodingRangeTestCombine, PaxEncodingDeltaRangeTest, + testing::Combine(testing::Values(11, 100, 256, 345, 511, + 512), + testing::Values(true, false), + testing::Values(16, 32, 64))); + +TEST_P(PaxEncodingDeltaIncDecRangeTest, TestOrcIncDeltaEncoding) { + PaxEncoder *encoder; + int64 *data; + auto delta_len = ::testing::get<0>(GetParam()); + auto delta_inc = ::testing::get<1>(GetParam()); + auto sign = ::testing::get<2>(GetParam()); + auto shared_data = new DataBuffer(delta_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(delta_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(delta_len * sizeof(int64))); + for (size_t i = 0; i < delta_len; i++) { + data[i] = i * delta_inc; + } + + for (size_t i = 0; i < delta_len; i++) { + if (sign) { + data[i] = -data[i]; + } + encoder->Append(data[i]); + } + + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + // eq 4 or 5 depends on lens of max value + EXPECT_NE(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + EXPECT_EQ((encoding_buff[0] >> 1) & 0x1f, 0); + EXPECT_EQ(((encoding_buff[0] & 0x01) << 8) | (unsigned char)encoding_buff[1], + delta_len - 1); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), delta_len * sizeof(int64)); + + auto result_dst_data = + new DataBuffer(reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], data[i]); + } + + delete result_dst_data; + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +TEST_P(PaxEncodingDeltaIncDecRangeTest, TestOrcIncWithoutFixedDeltaEncoding) { + PaxEncoder *encoder; + int64 *data; + auto delta_len = ::testing::get<0>(GetParam()); + auto delta_inc = ::testing::get<1>(GetParam()); + auto sign = ::testing::get<2>(GetParam()); + auto shared_data = new DataBuffer(delta_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(delta_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(delta_len * sizeof(int64))); + for (size_t i = 0; i < delta_len; i++) { + data[i] = i * delta_inc; + if (i < delta_inc && i % 2 == 0) { + data[i] -= i; + } + } + + for (size_t i = 0; i < delta_len; i++) { + if (sign) { + data[i] = -data[i]; + } + encoder->Append(data[i]); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + // eq 4 or 5 depends on lens of max value + EXPECT_NE(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), delta_len * sizeof(int64)); + + auto result_dst_data = + new DataBuffer(reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], data[i]); + } + + delete result_dst_data; + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +TEST_P(PaxEncodingDeltaIncDecRangeTest, TestOrcDecDeltaEncoding) { + PaxEncoder *encoder; + int64 *data; + auto delta_len = ::testing::get<0>(GetParam()); + auto delta_dec = ::testing::get<1>(GetParam()); + auto sign = ::testing::get<2>(GetParam()); + auto shared_data = new DataBuffer(delta_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(delta_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + data = reinterpret_cast(cbdb::Palloc(delta_len * sizeof(int64))); + + size_t j = 0; + for (int64 i = (static_cast(delta_len - 1) * delta_dec); i >= 0; + i -= delta_dec) { + data[j] = i; + j++; + } + + for (size_t i = 0; i < delta_len; i++) { + if (sign) { + data[i] = -data[i]; + } + encoder->Append(data[i]); + } + + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_GT(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + EXPECT_EQ((encoding_buff[0] >> 1) & 0x1f, 0); + EXPECT_EQ(((encoding_buff[0] & 0x01) << 8) | (unsigned char)encoding_buff[1], + delta_len - 1); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), delta_len * sizeof(int64)); + + auto result_dst_data = + new DataBuffer(reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], data[i]); + } + + delete result_dst_data; + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +TEST_P(PaxEncodingDeltaIncDecRangeTest, TestOrcDecWithoutFixedDeltaEncoding) { + PaxEncoder *encoder; + int64 *data; + auto delta_len = ::testing::get<0>(GetParam()); + auto delta_dec = ::testing::get<1>(GetParam()); + auto sign = ::testing::get<2>(GetParam()); + auto shared_data = new DataBuffer(delta_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(delta_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + data = reinterpret_cast(cbdb::Palloc(delta_len * sizeof(int64))); + + size_t j = 0; + for (int64 i = (static_cast(delta_len - 1) * delta_dec); i >= 0; + i -= delta_dec) { + data[j] = i; + if (j < delta_dec && j % 2 == 0) { + data[j] += j; + } + j++; + } + + for (size_t i = 0; i < delta_len; i++) { + if (sign) { + data[i] = -data[i]; + } + encoder->Append(data[i]); + } + + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_NE(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), delta_len * sizeof(int64)); + + auto result_dst_data = + new DataBuffer(reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < delta_len; i++) { + EXPECT_EQ((*result_dst_data)[i], data[i]); + } + + delete result_dst_data; + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P(PaxEncodingRangeTestCombine, + PaxEncodingDeltaIncDecRangeTest, + testing::Combine(testing::Values(11, 100, 256, 345, 511, + 512), + testing::Values(1, 7, 99, 4294967295, + 18014398509481984ULL), + testing::Values(true, false))); + +TEST_P(PaxEncodingWriteReadLongsRangeTest, TestOrcDirectWriteReadLong) { + auto write_max = ::testing::get<0>(GetParam()); + auto sign = ::testing::get<1>(GetParam()); + write_max--; + + EXPECT_FALSE(sign); + + DataBuffer *write_dst_buffer; + TreatedDataBuffer *read_src_buffer; + int64 *data, *result; + + write_dst_buffer = new DataBuffer(1024); + read_src_buffer = new TreatedDataBuffer( + reinterpret_cast(write_dst_buffer->GetBuffer()), 1024); + data = reinterpret_cast(cbdb::Palloc(3 * sizeof(int64))); + result = reinterpret_cast(cbdb::Palloc(3 * sizeof(int64))); + + data[0] = 0; + std::random_device rd; + std::mt19937_64 eng(rd()); + std::uniform_int_distribution distr; + data[1] = distr(eng) % write_max; + data[1] = sign ? -data[1] : data[1]; + data[2] = sign ? -write_max : write_max; + + auto bits = write_max == 1 ? 1 : static_cast(log2(write_max)) + 1; + auto bits_align = GetClosestAlignedBits(bits); + + WriteLongs(write_dst_buffer, data, 0, 3, bits_align); + read_src_buffer->Brush(write_dst_buffer->Used()); + + uint32 bits_left = 0; + ReadLongs(read_src_buffer, result, 0, 3, bits_align, &bits_left); + + ASSERT_EQ(result[0], data[0]); + ASSERT_EQ(result[1], data[1]); + ASSERT_EQ(result[2], data[2]); +} + +// Do not change to foreach(2ULL ^ n) +// Then it will +INSTANTIATE_TEST_CASE_P( + PaxEncodingRangeTestCombine, PaxEncodingWriteReadLongsRangeTest, + testing::Combine( + testing::Values( + pow(2ULL, 1), pow(2ULL, 2), pow(2ULL, 3), pow(2ULL, 4), + pow(2ULL, 5), pow(2ULL, 6), pow(2ULL, 7), pow(2ULL, 8), + pow(2ULL, 9), pow(2ULL, 10), pow(2ULL, 11), pow(2ULL, 12), + pow(2ULL, 13), pow(2ULL, 14), pow(2ULL, 15), pow(2ULL, 16), + pow(2ULL, 17), pow(2ULL, 18), pow(2ULL, 19), pow(2ULL, 20), + pow(2ULL, 21), pow(2ULL, 22), pow(2ULL, 23), pow(2ULL, 24), + pow(2ULL, 25), pow(2ULL, 26), pow(2ULL, 27), pow(2ULL, 28), + pow(2ULL, 29), pow(2ULL, 30), pow(2ULL, 31), pow(2ULL, 32), + pow(2ULL, 33), pow(2ULL, 34), pow(2ULL, 35), pow(2ULL, 36), + pow(2ULL, 37), pow(2ULL, 38), pow(2ULL, 39), pow(2ULL, 40), + pow(2ULL, 41), pow(2ULL, 42), pow(2ULL, 43), pow(2ULL, 44), + pow(2ULL, 45), pow(2ULL, 46), pow(2ULL, 47), pow(2ULL, 48), + pow(2ULL, 49), pow(2ULL, 50), pow(2ULL, 51), pow(2ULL, 52), + pow(2ULL, 53), pow(2ULL, 54), pow(2ULL, 55), pow(2ULL, 56), + pow(2ULL, 57), pow(2ULL, 58), pow(2ULL, 59), pow(2ULL, 60), + pow(2ULL, 61), pow(2ULL, 62), pow(2ULL, 63)), + testing::Values(false))); + +TEST_P(PaxEncodingDirectRangeTest, TestOrcDirectEncoding) { + PaxEncoder *encoder; + int64 *data; + auto direct_len = ::testing::get<0>(GetParam()); + auto direct_range = ::testing::get<1>(GetParam()); + auto sign = ::testing::get<2>(GetParam()); + // will auto expanded + auto shared_data = new DataBuffer(direct_len * sizeof(int64)); + auto shared_dst_data = new DataBuffer(direct_len * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = sign; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(direct_len * sizeof(int64))); + for (size_t i = 0; i < direct_len; i++) { + data[i] = i * direct_range; + } + + for (size_t i = 1; i < direct_len; i += 2) { + int64 temp = data[i]; + data[i] = data[i - 1]; + data[i - 1] = temp; + } + + for (size_t i = 0; i < direct_len; i++) { + if (sign) { + data[i] = -data[i]; + } + + encoder->Append(data[i]); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_NE(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDirect); + // EXPECT_EQ((encoding_buff[0] >> 1) & 0x1f, 0); + // EXPECT_EQ(((encoding_buff[0] & 0x01) << 8) | (unsigned + // char)encoding_buff[1], + // direct_len - 1); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = sign; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), direct_len * sizeof(int64)); + + auto result_dst_data = + new DataBuffer(reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < direct_len; i++) { + EXPECT_EQ((*result_dst_data)[i], data[i]); + } + + delete result_dst_data; + delete data; + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P( + PaxEncodingRangeTestCombine, PaxEncodingDirectRangeTest, + testing::Combine(testing::Values(4, 10, 128, 256, 512, 1024), + testing::Values(7, 99, 4294967295, 18014398509481984ULL), + testing::Values(true, false))); + +TEST_P(PaxEncodingPBTest, TestOrcPBEncoding) { + PaxEncoder *encoder; + auto data_vec = ::testing::get<0>(GetParam()); + auto data_bits = ::testing::get<1>(GetParam()); + auto data_lens = data_vec.size(); + auto shared_data = new DataBuffer(data_lens * sizeof(int64)); + auto shared_dst_data = new DataBuffer(data_lens * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = true; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + encoder->SetDataBuffer(shared_data); + + for (size_t i = 0; i < data_lens; i++) { + encoder->Append(data_vec[i]); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_NE(encoder->GetBufferSize(), 0); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kPatchedBase); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + + auto decoder = + GetDecoderByBits(data_bits, shared_data, std::move(decoder_options)); + EXPECT_TRUE(decoder); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), 20 * data_bits / 8); + + switch (data_bits) { + case 32: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < 20; i++) { + EXPECT_EQ((*result_dst_data)[i], data_vec[i]); + } + delete result_dst_data; + break; + } + case 64: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < 20; i++) { + EXPECT_EQ((*result_dst_data)[i], data_vec[i]); + } + delete result_dst_data; + break; + } + default: + break; + } + + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P( + PaxEncodingRangeTestCombine, PaxEncodingPBTest, + testing::Combine( + testing::Values( + std::vector{2030, 2000, 2020, 1000000, 2040, 2050, 2060, + 2070, 2080, 2090, 2100, 2110, 2120, 2130, + 2140, 2150, 2160, 2170, 2180, 2190}, + std::vector{2030, 2000, 2020, 2040, 2050, 2060, 2070, + 2080, 2090, 2100, 1000000, 2110, 2120, 2130, + 2140, 2150, 2160, 2170, 2180, 2190}, + std::vector{2030, 3333, 1111, 4444, 9991, 33213, 3213, + 1, 2090, 2100, 1000000, 2110, 2120, 2130, + 2140, 11, 2160, 2170, 2180, 2190}), + testing::Values(32, 64))); + +TEST_P(PaxEncodingRawDataTest, TestOrcMixEncoding) { + PaxEncoder *encoder; + auto data_vec = ::testing::get<0>(GetParam()); + auto data_bits = ::testing::get<1>(GetParam()); + auto data_lens = data_vec.size(); + auto shared_data = new DataBuffer(data_lens * sizeof(int64)); + auto shared_dst_data = new DataBuffer(data_lens * sizeof(int64)); + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = true; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + encoder->SetDataBuffer(shared_data); + + for (size_t i = 0; i < data_lens; i++) { + encoder->Append(data_vec[i]); + } + encoder->Flush(); + // should allow call flush multi times + encoder->Flush(); + encoder->Flush(); + encoder->Flush(); + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_NE(encoder->GetBufferSize(), 0); + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + + auto decoder = + GetDecoderByBits(data_bits, shared_data, std::move(decoder_options)); + EXPECT_TRUE(decoder); + + decoder->SetDataBuffer(shared_dst_data); + decoder->Decoding(); + + EXPECT_EQ(shared_dst_data->Used(), data_lens * data_bits / 8); + + switch (data_bits) { + case 32: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < data_lens; i++) { + EXPECT_EQ((*result_dst_data)[i], data_vec[i]); + } + delete result_dst_data; + break; + } + case 64: { + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + for (size_t i = 0; i < data_lens; i++) { + EXPECT_EQ((*result_dst_data)[i], data_vec[i]); + } + delete result_dst_data; + break; + } + default: + break; + } + + delete shared_data; + delete shared_dst_data; + delete encoder; + delete decoder; +} + +INSTANTIATE_TEST_CASE_P( + PaxEncodingRangeTestCombine, PaxEncodingRawDataTest, + testing::Combine( + testing::Values( + std::vector{1, 23, 4, 5123, 123123, 3213214, 543123, 3213, + 34, 123, 5213, 23, 52}, + std::vector{-1, -23, -4, -5123, -123123, -3213214, -543123, + -3213, -34, -123, -5213, -23, -52}, + std::vector{1, 2, 3, 4, 5, 123, 3, 3, 3, 3, 4, 4, 4, 5, 55, + 5}, + std::vector{-1, -2, -3, -4, -5, -123, -3, -3, -3, -3, -4, -4, + -4, -5, -55, -5}, + std::vector{2030, 2000, 2020, 1000000, 2040, 2050, 2060, + 2070, 2080, 2090, 2100, 2110, 2120, 2130, + 2140, 2150, 2160, 2170, 2180, 2190}, + std::vector{-2030, -2000, -2020, -1000000, -2040, + -2050, -2060, -2070, -2080, -2090, + -2100, -2110, -2120, -2130, -2140, + -2150, -2160, -2170, -2180, -2190}, + std::vector{1, 2, 3, 4, 5, 123, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 55, 5}, + std::vector{-1, -2, -3, -4, -5, -123, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -4, -4, -4, -5, -55, -5}, + std::vector{1, 2, 3, 4, 5, 123, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 55, 5}, + std::vector{-1, -2, -3, -4, -5, -123, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -5, -55, -5}, + std::vector{1, 2, 3, 4, 5, 123, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 333, 4, 4, + 4, 4, 4, 823, 4, 4, 4, 4, 4, 5, 55, 5}, + std::vector{-1, -2, -3, -4, -5, -123, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -4, -4, -4, -4, -333, -4, -4, -4, -4, + -4, -823, -4, -4, -4, -4, -4, -5, -55, -5}, + std::vector{-1, -2, -3, -4, -5, 123, -3, -3, -3, -3, + -3, -3, -3, 3, -3, 3, -3, -3, -3, -3, + -3, -4, 4, -4, -4, 333, -4, -4, -4, -4, + -4, -823, -4, -4, 4, -4, -4, -5, -55, -5}), + testing::Values(32, 64))); + +TEST_F(PaxEncodingTest, TestOrcShortRepeatWithNULL) { + PaxEncoder *encoder; + int64 *data; + auto shared_data = new DataBuffer(1024); + + size_t sr_len = 10; + size_t total_len = 15; + + PaxEncoder::EncodingOption encoder_options; + + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = true; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(sizeof(int64))); + *data = 2; + for (size_t i = 0; i < sr_len; i++) { + encoder->Append(*data); + } + encoder->Flush(); + + EXPECT_NE(encoder->GetBuffer(), nullptr); + EXPECT_EQ(encoder->GetBufferSize(), 2); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kShortRepeat); + + char *cpy_data = reinterpret_cast(palloc(shared_data->Used())); + memcpy(cpy_data, shared_data->GetBuffer(), shared_data->Used()); + + { + // case 1 null in header + // (total_len - sr_len) * null , sr data + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i >= (total_len - sr_len)); + } + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = total_len - sr_len; i < total_len; i++) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + { + // case 2 null in tail + // sr data, (total_len - sr_len) * null + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i < sr_len); + } + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = 0; i < sr_len; i++) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + { + // case 3 null inside delta + // sr data, (total_len - sr_len) * null + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i % 3 != 0); + } + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = 0; i < total_len; i++) { + if (i % 3 != 0) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + delete cpy_data; + delete data; + delete shared_data; + delete encoder; +} + +TEST_F(PaxEncodingTest, TestOrcDeltaEncodingWithNULL) { + PaxEncoder *encoder; + int64 *data; + auto shared_data = new DataBuffer(1024); + + size_t delta_len = 20; + size_t total_len = 30; + + PaxEncoder::EncodingOption encoder_options; + encoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + encoder_options.is_sign = true; + encoder = PaxEncoder::CreateStreamingEncoder(encoder_options); + + EXPECT_TRUE(encoder); + + encoder->SetDataBuffer(shared_data); + + data = reinterpret_cast(cbdb::Palloc(sizeof(int64))); + *data = 2; + for (size_t i = 0; i < delta_len; i++) { + encoder->Append(*data); + } + encoder->Flush(); + + auto encoding_buff = encoder->GetBuffer(); + EXPECT_EQ(static_cast((encoding_buff[0] >> 6) & 0x03), + EncodingType::kDelta); + + char *cpy_data = reinterpret_cast(palloc(shared_data->Used())); + memcpy(cpy_data, shared_data->GetBuffer(), shared_data->Used()); + + { + // case 1 null in header + // (total_len - delta_len) * null , delta data + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i >= (total_len - delta_len)); + } + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = total_len - delta_len; i < total_len; i++) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + { + // case 2 null in tail + // delta data, (total_len - delta_len) * null + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i < delta_len); + } + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = 0; i < delta_len; i++) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + { + // case 3 null inside delta + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < total_len; ++i) { + not_null.push_back(i % 3 != 0); + } + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = + PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(shared_data->GetBuffer(), shared_data->Used()); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), total_len); + ASSERT_EQ(n_read, shared_dst_data->Used()); + + auto result_dst_data = new DataBuffer( + reinterpret_cast(shared_dst_data->Start()), + shared_dst_data->Used(), false, false); + + ASSERT_EQ(total_len * sizeof(int64), shared_dst_data->Used()); + for (size_t i = 0; i < total_len; i++) { + if (i % 3 != 0) { + ASSERT_EQ(2, (*result_dst_data)[i]); + } + } + + // should no changed + for (size_t i = 0; i < shared_data->Used(); i++) { + ASSERT_EQ(cpy_data[i], (*shared_data)[i]); + } + + delete shared_dst_data; + delete result_dst_data; + delete decoder; + } + + delete cpy_data; + delete data; + delete shared_data; + delete encoder; +} + +TEST_F(PaxEncodingTest, TestEncodingWithAllNULL) { + auto shared_dst_data = new DataBuffer(10240); + + std::vector not_null; + for (size_t i = 0; i < 20; ++i) { + not_null.push_back(false); + } + + PaxDecoder::DecodingOption decoder_options; + decoder_options.column_encode_type = + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2; + decoder_options.is_sign = true; + auto decoder = PaxDecoder::CreateDecoder(decoder_options) + ->SetSrcBuffer(nullptr, 0); + + decoder->SetDataBuffer(shared_dst_data); + auto n_read = decoder->Decoding(not_null.data(), 20); + ASSERT_EQ(n_read, shared_dst_data->Used()); +} + +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.cc new file mode 100644 index 00000000000..745c09180f8 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.cc @@ -0,0 +1,124 @@ +#include "storage/columns/pax_encoding_utils.h" + +namespace pax { +// Map FBS enum to bit width value. +const uint8 kFBSToBitWidthMap[FixedBitSizes::kSIZE] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 26, 28, 30, 32, 40, 48, 56, 64}; + +// Map bit length i to closest fixed bit width that can contain i bits. +const uint8 kClosestBitsMap[65] = { + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 26, 26, 28, 28, 30, 30, 32, 32, 40, + 40, 40, 40, 40, 40, 40, 40, 48, 48, 48, 48, 48, 48, 48, 48, 56, 56, + 56, 56, 56, 56, 56, 56, 64, 64, 64, 64, 64, 64, 64, 64}; + +// Map bit length i to closest aligned fixed bit width that can contain i bits. +const uint8 kClosestAlignedBitsMap[65] = { + 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, + 24, 24, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 40, + 40, 40, 40, 40, 40, 40, 40, 48, 48, 48, 48, 48, 48, 48, 48, 56, 56, + 56, 56, 56, 56, 56, 56, 64, 64, 64, 64, 64, 64, 64, 64}; + +// Map bit width to FBS enum. +const uint8 kBitWidthToFBSMap[65] = { + FixedBitSizes::kONE, FixedBitSizes::kONE, + FixedBitSizes::kTWO, FixedBitSizes::kTHREE, + FixedBitSizes::kFOUR, FixedBitSizes::kFIVE, + FixedBitSizes::kSIX, FixedBitSizes::kSEVEN, + FixedBitSizes::kEIGHT, FixedBitSizes::kNINE, + FixedBitSizes::kTEN, FixedBitSizes::kELEVEN, + FixedBitSizes::kTWELVE, FixedBitSizes::kTHIRTEEN, + FixedBitSizes::kFOURTEEN, FixedBitSizes::kFIFTEEN, + FixedBitSizes::kSIXTEEN, FixedBitSizes::kSEVENTEEN, + FixedBitSizes::kEIGHTEEN, FixedBitSizes::kNINETEEN, + FixedBitSizes::kTWENTY, FixedBitSizes::kTWENTYONE, + FixedBitSizes::kTWENTYTWO, FixedBitSizes::kTWENTYTHREE, + FixedBitSizes::kTWENTYFOUR, FixedBitSizes::kTWENTYSIX, + FixedBitSizes::kTWENTYSIX, FixedBitSizes::kTWENTYEIGHT, + FixedBitSizes::kTWENTYEIGHT, FixedBitSizes::kTHIRTY, + FixedBitSizes::kTHIRTY, FixedBitSizes::kTHIRTYTWO, + FixedBitSizes::kTHIRTYTWO, FixedBitSizes::kFORTY, + FixedBitSizes::kFORTY, FixedBitSizes::kFORTY, + FixedBitSizes::kFORTY, FixedBitSizes::kFORTY, + FixedBitSizes::kFORTY, FixedBitSizes::kFORTY, + FixedBitSizes::kFORTY, FixedBitSizes::kFORTYEIGHT, + FixedBitSizes::kFORTYEIGHT, FixedBitSizes::kFORTYEIGHT, + FixedBitSizes::kFORTYEIGHT, FixedBitSizes::kFORTYEIGHT, + FixedBitSizes::kFORTYEIGHT, FixedBitSizes::kFORTYEIGHT, + FixedBitSizes::kFORTYEIGHT, FixedBitSizes::kFIFTYSIX, + FixedBitSizes::kFIFTYSIX, FixedBitSizes::kFIFTYSIX, + FixedBitSizes::kFIFTYSIX, FixedBitSizes::kFIFTYSIX, + FixedBitSizes::kFIFTYSIX, FixedBitSizes::kFIFTYSIX, + FixedBitSizes::kFIFTYSIX, FixedBitSizes::kSIXTYFOUR, + FixedBitSizes::kSIXTYFOUR, FixedBitSizes::kSIXTYFOUR, + FixedBitSizes::kSIXTYFOUR, FixedBitSizes::kSIXTYFOUR, + FixedBitSizes::kSIXTYFOUR, FixedBitSizes::kSIXTYFOUR, + FixedBitSizes::kSIXTYFOUR}; + +uint32 GetClosestBits(uint32 n) { + if (n <= 64) { + return kClosestBitsMap[n]; + } else { + return 64; + } +} + +uint32 GetClosestAlignedBits(uint32 n) { + if (n <= 64) { + return kClosestAlignedBitsMap[n]; + } else { + return 64; + } +} + +uint32 FindClosestBits(int64 value) { + if (value < 0) { + return GetClosestBits(64); + } + + uint32 count = 0; + while (value != 0) { + count++; + value = value >> 1; + } + return GetClosestBits(count); +} + +void BuildHistogram(int32 *histogram, int64_t *data, size_t number) { + // histogram that store the encoded bit requirement for each values. + // maximum number of bits that can encoded is 32 (refer FixedBitSizes) + memset(histogram, 0, FixedBitSizes::kSIZE * sizeof(int32_t)); + + // compute the histogram + for (size_t i = 0; i < number; i++) { + uint32_t idx = EncodeBits(FindClosestBits((int64_t)data[i])); + histogram[idx] += 1; + } +} + +uint32_t GetPercentileBits(const int32 *const histogram, size_t histogram_len, + double p) { + Assert((p <= 1.0) || (p > 0.0)); + + auto per_len = + static_cast(static_cast(histogram_len) * (1.0 - p)); + + // return the bits required by pth percentile length + for (int32_t i = ORC_HIST_LEN - 1; i >= 0; i--) { + per_len -= histogram[i]; + if (per_len < 0) { + return DecodeBits(static_cast(i)); + } + } + return 0; +} + +void ZigZagBuffers(int64_t *input, int64_t *output, size_t number) { + Assert(input && output); + for (size_t i = 0; i < number; i++) { + output[i] = ZigZag(input[i]); + } +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.h b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.h new file mode 100644 index 00000000000..ffdc6fdcd49 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_encoding_utils.h @@ -0,0 +1,131 @@ + +#pragma once + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/proto/proto_wrappers.h" + +namespace pax { + +/* This is a limitation of the orc protocol. + * This parameters should not be changed under any circumstances. + */ +#define ORC_MAX_LITERAL_SIZE 512 +#define ORC_MIN_REPEAT 3 +#define ORC_HIST_LEN 32 +#define ORC_MAX_SHORT_REPEAT_LENGTH (ORC_MIN_REPEAT + 7) + +enum EncodingType { + kShortRepeat = 0, + kDirect, + kPatchedBase, + kDelta, + // internal used, will not be serialized to disk + kInvalidType, +}; + +struct FixedBitSizes { + enum FBS { + kONE = 0, + kTWO, + kTHREE, + kFOUR, + kFIVE, + kSIX, + kSEVEN, + kEIGHT, + kNINE, + kTEN, + kELEVEN, + kTWELVE, + kTHIRTEEN, + kFOURTEEN, + kFIFTEEN, + kSIXTEEN, + kSEVENTEEN, + kEIGHTEEN, + kNINETEEN, + kTWENTY, + kTWENTYONE, + kTWENTYTWO, + kTWENTYTHREE, + kTWENTYFOUR, + kTWENTYSIX, + kTWENTYEIGHT, + kTHIRTY, + kTHIRTYTWO, + kFORTY, + kFORTYEIGHT, + kFIFTYSIX, + kSIXTYFOUR, + kSIZE + }; +}; + +extern const uint8_t kFBSToBitWidthMap[FixedBitSizes::kSIZE]; +extern const uint8_t kClosestBitsMap[65]; +extern const uint8_t kClosestAlignedBitsMap[65]; +extern const uint8_t kBitWidthToFBSMap[65]; + +inline uint32 DecodeBits(uint32 n) { // + return kFBSToBitWidthMap[n]; +} + +inline uint32 EncodeBits(uint32 n) { + if (n <= 64) { + return kBitWidthToFBSMap[n]; + } else { + return FixedBitSizes::kSIXTYFOUR; + } +} + +uint32 GetClosestBits(uint32 n); +uint32 GetClosestAlignedBits(uint32 n); +uint32 FindClosestBits(int64 value); + +// histogram functions +void BuildHistogram(int32 *histogram, int64_t *data, size_t number); +uint32_t GetPercentileBits(const int32 *histogram, size_t histogram_len, + double p); + +// zig zag encoding for the sign number +inline int64 ZigZag(int64 value) { // + return (value << 1) ^ (value >> 63); +} + +template +inline int64 UnZigZag(T value) { // + return (value >> 1) ^ -(value & 1); +} + +template +inline int64 UnZigZagWithUnsigned(T value) { + switch (sizeof(T)) { + case 1: { + auto us_value = static_cast(value); + return (us_value >> 1) ^ -(us_value & 1); + } + case 2: { + auto us_value = static_cast(value); + return (us_value >> 1) ^ -(us_value & 1); + } + case 4: { + auto us_value = static_cast(value); + return (us_value >> 1) ^ -(us_value & 1); + } + case 8: { + auto us_value = static_cast(value); + return (us_value >> 1) ^ -(us_value & 1); + } + default: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + } + } + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +void ZigZagBuffers(int64_t *input, int64_t *output, size_t number); + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.cc new file mode 100644 index 00000000000..614a604f527 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.cc @@ -0,0 +1,1018 @@ +#include "storage/columns/pax_rlev2_decoding.h" + +#include +#include +#include + +namespace pax { + +#ifndef RUN_GTEST +template +void ReadLongs(TreatedDataBuffer *data_buffer, T *data, uint64 offset, + uint64 len, uint64 fbs, uint32 *bits_left); +#endif + +unsigned char ReadByte(TreatedDataBuffer *data_buffer); +unsigned char ReadByteWithoutBrush(TreatedDataBuffer *data_buffer); +int64 ReadLongBE(TreatedDataBuffer *data_buffer, uint64 bsz); +int64 ReadSignedLong(TreatedDataBuffer *data_buffer); +uint64 ReadUnsignedLong(TreatedDataBuffer *data_buffer); +void UnpackNonAlignedLongs(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len, uint64 fbs, + uint32 *bits_left); + +void UnrolledUnpack4(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len, uint32 *bits_left); +void UnrolledUnpack8(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack16(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack24(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack32(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack40(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack48(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack56(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); +void UnrolledUnpack64(TreatedDataBuffer *data_buffer, int64 *data, + uint64 offset, uint64 len); + +/** + * Decode the next gap and patch from 'unpackedPatch' and update the index on + * it. Used by PATCHED_BASE. + * + * patch_bits bit size of the patch value + * patch_mask mask for the patch value + * res_gap result of gap + * res_patch result of patch + * patch_idx current index in the 'unpackedPatch' buffer + */ +void AdjustGapAndPatch(DataBuffer *unpacked, uint32 patch_bits, + int64 patch_mask, int64 *res_gap, int64 *res_patch, + uint64 *patch_idx); + +/* + * copy temp data to data which will skip the null field + */ +template +uint64 CopyData(T *data, const int64 *temp_data, uint64 len, uint64 offset, + const char *not_null); + +template uint64 CopyData(int64 *data, const int64 *temp_data, uint64 len, + uint64 offset, const char *not_null); + +unsigned char ReadByte(TreatedDataBuffer *data_buffer) { + unsigned char result = data_buffer->GetTreatedRawBuffer()[0]; + data_buffer->BrushTreated(1); + return result; +} + +unsigned char ReadByteWithoutBrush(TreatedDataBuffer *data_buffer) { + unsigned char result = data_buffer->GetTreatedRawBuffer()[0]; + return result; +} + +int64 ReadLongBE(TreatedDataBuffer *data_buffer, uint64 bsz) { + int64 ret = 0, val; + uint64 n = bsz; + while (n > 0) { + n--; + val = ReadByte(data_buffer); + ret |= (val << (n * 8)); + } + return ret; +} + +int64 ReadSignedLong(TreatedDataBuffer *data_buffer) { + return UnZigZag(ReadUnsignedLong(data_buffer)); +} + +uint64 ReadUnsignedLong(TreatedDataBuffer *data_buffer) { + uint64 ret = 0, b; + uint64 offset = 0; + do { + b = ReadByte(data_buffer); + ret |= (0x7f & b) << offset; + offset += 7; + } while (b >= 0x80); + return ret; +} + +template +void UnrolledUnpack4(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len, uint32 *bits_left) { + uint64 cur_idx = offset; + uint32 cur_byte = 0; + uint64 num_groups = 0; + uint32 local_byte; + + Assert(*bits_left == 0); + + while (cur_idx < offset + len) { + // Make sure bits_left is 0 before the loop. bits_left can only be 0, 4, + // or 8. + while (*bits_left > 0 && cur_idx < offset + len) { + *bits_left -= 4; + data[cur_idx++] = (cur_byte >> *bits_left) & 15; + } + if (cur_idx == offset + len) return; + + num_groups = (offset + len - cur_idx) / 2; + num_groups = + std::min(num_groups, static_cast(data_buffer->UnTreated())); + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + + for (uint64 i = 0; i < num_groups; ++i) { + local_byte = *buffer++; + data[cur_idx] = (local_byte >> 4) & 15; + data[cur_idx + 1] = local_byte & 15; + cur_idx += 2; + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + cur_byte = ReadByte(data_buffer); + *bits_left = 8; + } +} + +template +void UnrolledUnpack8(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = data_buffer->UnTreated(); + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + + auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + data[cur_idx++] = *buffer++; + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + + if (cur_idx == offset + len) return; + data[cur_idx++] = ReadByte(data_buffer); + } +} + +template +void UnrolledUnpack16(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 2; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint16 b0, b1; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + buffer += 2; + data[cur_idx++] = (b0 << 8) | b1; + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + data[cur_idx++] = (b0 << 8) | b1; + } +} + +template +void UnrolledUnpack24(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 3; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint32 b0, b1, b2; + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + buffer += 3; + data[cur_idx++] = static_cast((b0 << 16) | (b1 << 8) | b2); + } + data_buffer->BrushTreated(buff_len * 3); + + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + data[cur_idx++] = static_cast((b0 << 16) | (b1 << 8) | b2); + } +} + +template +void UnrolledUnpack32(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 4; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint32 b0, b1, b2, b3; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + b3 = static_cast(*(buffer + 3)); + buffer += 4; + data[cur_idx++] = + static_cast((b0 << 24) | (b1 << 16) | (b2 << 8) | b3); + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + b3 = ReadByte(data_buffer); + data[cur_idx++] = static_cast((b0 << 24) | (b1 << 16) | (b2 << 8) | b3); + } +} + +template +void UnrolledUnpack40(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 5; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint64 b0, b1, b2, b3, b4; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + b3 = static_cast(*(buffer + 3)); + b4 = static_cast(*(buffer + 4)); + buffer += 5; + data[cur_idx++] = + static_cast((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4); + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + b3 = ReadByte(data_buffer); + b4 = ReadByte(data_buffer); + data[cur_idx++] = + static_cast((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4); + } +} + +template +void UnrolledUnpack48(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 6; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint64 b0, b1, b2, b3, b4, b5; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + b3 = static_cast(*(buffer + 3)); + b4 = static_cast(*(buffer + 4)); + b5 = static_cast(*(buffer + 5)); + buffer += 6; + data[cur_idx++] = static_cast((b0 << 40) | (b1 << 32) | (b2 << 24) | + (b3 << 16) | (b4 << 8) | b5); + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + b3 = ReadByte(data_buffer); + b4 = ReadByte(data_buffer); + b5 = ReadByte(data_buffer); + data[cur_idx++] = static_cast((b0 << 40) | (b1 << 32) | (b2 << 24) | + (b3 << 16) | (b4 << 8) | b5); + } +} + +template +void UnrolledUnpack56(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 7; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint64 b0, b1, b2, b3, b4, b5, b6; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + b3 = static_cast(*(buffer + 3)); + b4 = static_cast(*(buffer + 4)); + b5 = static_cast(*(buffer + 5)); + b6 = static_cast(*(buffer + 6)); + buffer += 7; + data[cur_idx++] = + static_cast((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) | + (b4 << 16) | (b5 << 8) | b6); + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + b3 = ReadByte(data_buffer); + b4 = ReadByte(data_buffer); + b5 = ReadByte(data_buffer); + b6 = ReadByte(data_buffer); + data[cur_idx++] = static_cast((b0 << 48) | (b1 << 40) | (b2 << 32) | + (b3 << 24) | (b4 << 16) | (b5 << 8) | b6); + } +} + +template +void UnrolledUnpack64(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len) { + uint64 cur_idx = offset; + int64 buff_len; + + while (cur_idx < offset + len) { + buff_len = (data_buffer->UnTreated()) / 8; + buff_len = std::min(buff_len, static_cast(offset + len - cur_idx)); + uint64 b0, b1, b2, b3, b4, b5, b6, b7; + + const auto *buffer = reinterpret_cast( + data_buffer->GetTreatedRawBuffer()); + for (int i = 0; i < buff_len; ++i) { + b0 = static_cast(*buffer); + b1 = static_cast(*(buffer + 1)); + b2 = static_cast(*(buffer + 2)); + b3 = static_cast(*(buffer + 3)); + b4 = static_cast(*(buffer + 4)); + b5 = static_cast(*(buffer + 5)); + b6 = static_cast(*(buffer + 6)); + b7 = static_cast(*(buffer + 7)); + buffer += 8; + data[cur_idx++] = + static_cast((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | + (b4 << 24) | (b5 << 16) | (b6 << 8) | b7); + } + + data_buffer->BrushTreated(reinterpret_cast(buffer) - + data_buffer->GetTreatedRawBuffer()); + if (cur_idx == offset + len) return; + + b0 = ReadByte(data_buffer); + b1 = ReadByte(data_buffer); + b2 = ReadByte(data_buffer); + b3 = ReadByte(data_buffer); + b4 = ReadByte(data_buffer); + b5 = ReadByte(data_buffer); + b6 = ReadByte(data_buffer); + b7 = ReadByte(data_buffer); + data[cur_idx++] = + static_cast((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | + (b4 << 24) | (b5 << 16) | (b6 << 8) | b7); + } +} + +template +void UnpackNonAlignedLongs(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, uint64 len, uint64 fbs, + uint32 *bits_left) { + unsigned char cur_byte = data_buffer->GetTreatedRawBuffer()[0]; + for (uint64 i = offset; i < (offset + len); i++) { + uint64 result = 0; + uint64 bits_left_to_read = fbs; + + while (bits_left_to_read > *bits_left) { + result <<= *bits_left; + result |= cur_byte & ((1 << *bits_left) - 1); + bits_left_to_read -= *bits_left; + cur_byte = ReadByte(data_buffer); + *bits_left = 8; + } + + // handle the left over bits + if (bits_left_to_read > 0) { + result <<= bits_left_to_read; + *bits_left -= static_cast(bits_left_to_read); + result |= (cur_byte >> *bits_left) & ((1 << bits_left_to_read) - 1); + } + data[i] = static_cast(result); + } +} + +template +void ReadLongs(TreatedDataBuffer *data_buffer, T *data, uint64 offset, + uint64 len, uint64 fbs, uint32 *bits_left) { + switch (fbs) { + case 4: + UnrolledUnpack4(data_buffer, data, offset, len, bits_left); + return; + case 8: + UnrolledUnpack8(data_buffer, data, offset, len); + return; + case 16: { + Assert(sizeof(T) >= 2); + UnrolledUnpack16(data_buffer, data, offset, len); + return; + } + case 24: { + Assert(sizeof(T) >= 3); + UnrolledUnpack24(data_buffer, data, offset, len); + return; + } + case 32: { + Assert(sizeof(T) >= 4); + UnrolledUnpack32(data_buffer, data, offset, len); + return; + } + case 40: { + Assert(sizeof(T) >= 5); + UnrolledUnpack40(data_buffer, data, offset, len); + return; + } + case 48: { + Assert(sizeof(T) >= 6); + UnrolledUnpack48(data_buffer, data, offset, len); + return; + } + case 56: { + Assert(sizeof(T) >= 7); + UnrolledUnpack56(data_buffer, data, offset, len); + return; + } + case 64: { + Assert(sizeof(T) >= 8); + UnrolledUnpack64(data_buffer, data, offset, len); + return; + } + default: + // Fallback to the default implementation for deprecated bit size. + UnpackNonAlignedLongs(data_buffer, data, offset, len, fbs, bits_left); + return; + } +} + +void AdjustGapAndPatch(DataBuffer *unpacked, uint32 patch_bits, + int64 patch_mask, int64 *res_gap, int64 *res_patch, + uint64 *patch_idx) { + uint64 idx = *patch_idx; + uint64 gap = static_cast((*unpacked)[idx]) >> patch_bits; + int64 patch = (*unpacked)[idx] & patch_mask; + int64 actual_gap = 0; + + // special case: gap is >255 then patch value will be 0. + // if gap is <=255 then patch value cannot be 0 + while (gap == 255 && patch == 0) { + actual_gap += 255; + ++idx; + gap = static_cast((*unpacked)[idx]) >> patch_bits; + patch = (*unpacked)[idx] & patch_mask; + } + // add the left over gap + actual_gap += gap; + + *res_gap = actual_gap; + *res_patch = patch; + *patch_idx = idx; +} + +template +uint64 CopyData(T *data, const int64 *const temp_data, uint64 len, + uint64 offset, const char *not_null) { + if (not_null) { + size_t already_fill = 0; + + for (uint64 pos = 0; pos < len;) { + if (!not_null[pos + already_fill + offset]) { + // should never add offset, cause not data is not from start(); + already_fill++; + } else { + data[pos + already_fill] = temp_data[pos]; + pos++; + } + } + return already_fill + len; + } else { + for (size_t i = 0; i < len; i++) { + data[i] = static_cast(temp_data[i]); + } + return len; + } + + // never reach + Assert(false); +} + +template +PaxOrcDecoder::PaxOrcDecoder( + const PaxDecoder::DecodingOption &encoder_options) + : PaxDecoder(encoder_options), + data_buffer_(nullptr), + copy_data_buffer_(nullptr), + unpacked_data_(nullptr), + result_buffer_(nullptr) {} + +template +PaxOrcDecoder::~PaxOrcDecoder() { + if (data_buffer_) { + delete data_buffer_; + } + if (copy_data_buffer_) { + delete copy_data_buffer_; + } + if (unpacked_data_) { + delete unpacked_data_; + } +} + +template +PaxDecoder *PaxOrcDecoder::SetSrcBuffer(char *data, size_t data_len) { + Assert(!data_buffer_); + if (data) { + data_buffer_ = + new TreatedDataBuffer(reinterpret_cast(data), data_len); + copy_data_buffer_ = + new DataBuffer(ORC_MAX_LITERAL_SIZE * sizeof(int64)); + } + + return this; +} + +template +PaxDecoder *PaxOrcDecoder::SetDataBuffer(DataBuffer *result_buffer) { + result_buffer_ = result_buffer; + return this; +} + +template +const char *PaxOrcDecoder::GetBuffer() const { + return result_buffer_->GetBuffer(); +} + +template +size_t PaxOrcDecoder::GetBufferSize() const { + return result_buffer_->Used(); +} + +template +size_t PaxOrcDecoder::Next(const char *const not_null) { + Assert(result_buffer_); + size_t n_read = result_buffer_->Used(); + uint64 read_round = 0; + + if (unlikely(!data_buffer_)) { + return n_read; + } + + if (data_buffer_->UnTreated() < 0) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeOutOfRange); + } else if (data_buffer_->UnTreated() == 0) { + return result_buffer_->Used(); + } + + unsigned char first_byte = ReadByteWithoutBrush(data_buffer_); + + // brush the null field + + if (not_null) { + uint64 null_index = n_read / sizeof(T); + uint64 null_read = n_read; + while (!not_null[null_index++]) { + n_read += sizeof(T); + } + + null_read = n_read - null_read; + result_buffer_->Brush(null_read); + } + Assert(n_read % sizeof(T) == 0); + + auto enc = static_cast((first_byte >> 6) & 0x03); + auto result_data = + reinterpret_cast(result_buffer_->GetAvailableBuffer()); + switch (enc) { + case EncodingType::kShortRepeat: + read_round = NextShortRepeats(data_buffer_, result_data, + n_read / sizeof(T), not_null); + break; + case EncodingType::kDirect: + read_round = + NextDirect(data_buffer_, result_data, n_read / sizeof(T), not_null); + break; + case EncodingType::kPatchedBase: + read_round = + NextPatched(data_buffer_, result_data, n_read / sizeof(T), not_null); + break; + case EncodingType::kDelta: + read_round = + NextDelta(data_buffer_, result_data, n_read / sizeof(T), not_null); + break; + default: + Assert(false); + } + + result_buffer_->Brush(read_round * sizeof(T)); + n_read += read_round; + + return n_read; +} + +template +size_t PaxOrcDecoder::Decoding() { + return Decoding(nullptr, 0); +} + +template +size_t PaxOrcDecoder::Decoding(const char *const not_null, + size_t not_null_len) { + size_t n_read = 0; + size_t last_read = 0; + size_t result_cap = result_buffer_->Available(); + Assert(result_buffer_); + Assert(result_cap > 0); + + if (data_buffer_) { + do { + last_read = n_read; + n_read = Next(not_null); + + CBDB_CHECK(n_read <= result_cap, + cbdb::CException::ExType::kExTypeOutOfRange); + } while (n_read != last_read); + } + + if (not_null) { + Assert(n_read <= (not_null_len * sizeof(T))); + + if (n_read < (not_null_len * sizeof(T))) { + uint64 null_index = n_read / sizeof(T); + uint64 null_read = n_read; + while (!not_null[null_index] && null_index < not_null_len) { + n_read += sizeof(T); + null_index++; + } + + null_read = n_read - null_read; + result_buffer_->Brush(null_read); + } + Assert(n_read == (not_null_len * sizeof(T))); + } + + Assert(result_buffer_->Available() >= 0); + return n_read; +} + +template +uint64 PaxOrcDecoder::NextShortRepeats(TreatedDataBuffer *data_buffer, + T *const data, uint64 offset, + const char *const not_null) { + int64 value = 0; + uint16 data_lens = 0; // 3 - 10 + + unsigned char first_byte = ReadByte(data_buffer); + + // extract the number of fixed bytes + uint64 byte_size = (first_byte >> 3) & 0x07; + byte_size += 1; + + data_lens = first_byte & 0x07; + // run lengths values are stored only after MIN_REPEAT value is met + data_lens += ORC_MIN_REPEAT; + + // read the repeated value which is store using fixed bytes + value = ReadLongBE(data_buffer, byte_size); + + if (decoder_options_.is_sign) { + value = UnZigZag(static_cast(value)); + } + + // It different with orc + // Not sure why orc just fill the null field + // But in our storage, should insert null rather fill null field + if (not_null) { + size_t already_fill = 0; + + for (uint64 pos = 0; pos < data_lens;) { + if (!not_null[pos + already_fill + offset]) { + // should never add offset, cause not data is not from start(); + already_fill++; + } else { + data[pos + already_fill] = static_cast(value); + pos++; + } + } + return already_fill + data_lens; + } + + for (uint64 pos = 0; pos < data_lens; ++pos) { + data[pos] = static_cast(value); + } + return data_lens; +} + +template +uint64 PaxOrcDecoder::NextDirect(TreatedDataBuffer *data_buffer, + T *const data, uint64 offset, + const char *const not_null) { + // extract the number of fixed bits + unsigned char first_byte = ReadByte(data_buffer); + unsigned char fbo = (first_byte >> 1) & 0x1f; + uint32 bits = DecodeBits(fbo); + uint32 data_lens = 0; + uint32 bits_left = 0; + + // extract the run length + data_lens = static_cast(first_byte & 0x01) << 8; + data_lens |= ReadByte(data_buffer); + + // runs are one off + data_lens += 1; + + if (!not_null) { + ReadLongs(data_buffer, data, 0, data_lens, bits, &bits_left); + if (decoder_options_.is_sign) { + for (uint64 i = 0; i < data_lens; ++i) { + data[i] = UnZigZagWithUnsigned(data[i]); + } + } + + return data_lens; + } + + ReadLongs(data_buffer, copy_data_buffer_->StartT(), 0, data_lens, bits, + &bits_left); + + if (decoder_options_.is_sign) { + for (uint64 i = 0; i < data_lens; ++i) { + (*copy_data_buffer_)[i] = + UnZigZag(static_cast((*copy_data_buffer_)[i])); + } + } + + return CopyData(data, copy_data_buffer_->StartT(), data_lens, offset, + not_null); +} + +template +uint64 PaxOrcDecoder::NextDelta(TreatedDataBuffer *data_buffer, + T *data, uint64 offset, + const char *const not_null) { + unsigned char first_byte = ReadByte(data_buffer); + + // extract the number of fixed bits + unsigned char fbo = (first_byte >> 1) & 0x1f; + uint32 bits; + uint32 data_lens = 0; + uint32 bits_left = 0; + + bits = fbo != 0 ? DecodeBits(fbo) : 0; + + // extract the run length + data_lens = static_cast(first_byte & 0x01) << 8; + data_lens |= ReadByte(data_buffer); + ++data_lens; // account for first value + + // it is safe to make no copy here + if (!not_null) { + // read the first value stored as int + T prev_val = decoder_options_.is_sign + ? static_cast(ReadSignedLong(data_buffer)) + : static_cast(ReadUnsignedLong(data_buffer)); + + data[0] = static_cast(prev_val); + int64 delta_base = ReadSignedLong(data_buffer); + + if (bits == 0) { + // add fixed deltas to adjacent values + for (uint64 i = 1; i < data_lens; ++i) { + data[i] = data[i - 1] + delta_base; + } + } else { + prev_val = data[1] = prev_val + delta_base; + if (data_lens < 2) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeInvalidORCFormat); + } + + // write the unpacked values, add it to previous value and store final + // value to result buffer. if the delta base value is negative then it + // is a decreasing sequence else an increasing sequence. + // read deltas using the curr_literals buffer. + ReadLongs(data_buffer, data, 2, data_lens - 2, bits, &bits_left); + if (delta_base < 0) { + for (uint64 i = 2; i < data_lens; ++i) { + prev_val = data[i] = prev_val - data[i]; + } + } else { + for (uint64 i = 2; i < data_lens; ++i) { + prev_val = data[i] = prev_val + data[i]; + } + } + } + + return data_lens; + } + + int64 prev_val = decoder_options_.is_sign + ? ReadSignedLong(data_buffer) + : static_cast(ReadUnsignedLong(data_buffer)); + + int64 *curr_literals = copy_data_buffer_->StartT(); + curr_literals[0] = prev_val; + + int64 delta_base = ReadSignedLong(data_buffer); + + if (bits == 0) { + // TODO(jiaqi zhou): still can no copy here + // add fixed deltas to adjacent values + for (uint64 i = 1; i < data_lens; ++i) { + curr_literals[i] = curr_literals[i - 1] + delta_base; + } + } else { + prev_val = curr_literals[1] = prev_val + delta_base; + if (data_lens < 2) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeInvalidORCFormat); + } + + ReadLongs(data_buffer, curr_literals, 2, data_lens - 2, bits, &bits_left); + if (delta_base < 0) { + for (uint64 i = 2; i < data_lens; ++i) { + prev_val = curr_literals[i] = prev_val - curr_literals[i]; + } + } else { + for (uint64 i = 2; i < data_lens; ++i) { + prev_val = curr_literals[i] = prev_val + curr_literals[i]; + } + } + } + + return CopyData(data, curr_literals, data_lens, offset, not_null); +} + +template +uint64 PaxOrcDecoder::NextPatched(TreatedDataBuffer *data_buffer, + T *const data, uint64 offset, + const char *const not_null) { + unsigned char first_byte = ReadByte(data_buffer); + unsigned char fbo = (first_byte >> 1) & 0x1f; + uint32 bits = DecodeBits(fbo); + uint32 data_lens = 0; + uint32 bits_left = 0; + + data_lens = static_cast(first_byte & 0x01) << 8; + data_lens |= ReadByte(data_buffer); + data_lens += 1; + + uint64 third_byte = ReadByte(data_buffer); + uint64 byte_size = (third_byte >> 5) & 0x07; + + // base width is one off + byte_size += 1; + + // extract patch width + uint32 pwo = third_byte & 0x1f; + uint32 patch_bits = DecodeBits(pwo); + + // read fourth byte and extract patch gap width + uint64 fourth_byte = ReadByte(data_buffer); + uint32 pgw = (fourth_byte >> 5) & 0x07; + + // patch gap width is one off + pgw += 1; + + // extract the length of the patch list + size_t pl = fourth_byte & 0x1f; + CBDB_CHECK(pl != 0, cbdb::CException::ExType::kExTypeInvalidORCFormat); + + int64 base = ReadLongBE(data_buffer, byte_size); + int64 mask = (static_cast(1) << ((byte_size * 8) - 1)); + + // if mask of base value is 1 then base is negative value else positive + if ((base & mask) != 0) { + base = base & ~mask; + base = -base; + } + + /// FIXME(jiaqizho): consider just use result_buffer to reduce copy if no + /// null field here + ReadLongs(data_buffer, copy_data_buffer_->StartT(), 0, data_lens, bits, + &bits_left); + // reset the bit left + bits_left = 0; + + if ((patch_bits + pgw) > 64) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeInvalidORCFormat); + } + + if (unpacked_data_ == nullptr) { + unpacked_data_ = new DataBuffer(pl * sizeof(int64)); + } else { + unpacked_data_->BrushBackAll(); + if (unpacked_data_->Capacity() < (pl * sizeof(int64))) { + unpacked_data_->ReSize(pl * sizeof(int64)); + } + } + + uint32 cfb = GetClosestBits(patch_bits + pgw); + size_t old_treaded = data_buffer->Treated(); + ReadLongs(data_buffer, unpacked_data_->GetAvailableBuffer(), 0, pl, cfb, + &bits_left); + size_t treaded_last_read = data_buffer->Treated() - old_treaded; + + CBDB_CHECK(treaded_last_read < (pl * sizeof(int64)), + cbdb::CException::ExType::kExTypeOutOfRange); + + int64 patch_mask = ((static_cast(1) << patch_bits) - 1); + + int64 gap = 0; + int64 patch = 0; + uint64 patch_idx = 0; + AdjustGapAndPatch(unpacked_data_, patch_bits, patch_mask, &gap, &patch, + &patch_idx); + + for (uint64 i = 0; i < data_lens; ++i) { + if (static_cast(i) != gap) { + // no patching required. add base to unpacked value to get final value + (*copy_data_buffer_)[i] += base; + } else { + // extract the patch value + int64 patched_val = (*copy_data_buffer_)[i] | (patch << bits); + + // add base to patched value + (*copy_data_buffer_)[i] = base + patched_val; + + // increment the patch to point to next entry in patch list + ++patch_idx; + + if (patch_idx < (unpacked_data_->Capacity())) { + AdjustGapAndPatch(unpacked_data_, patch_bits, patch_mask, &gap, &patch, + &patch_idx); + + // next gap is relative to the current gap + gap += i; + } + } + } + + return CopyData(data, copy_data_buffer_->StartT(), data_lens, offset, + not_null); +} + +template class PaxOrcDecoder; +template class PaxOrcDecoder; +template class PaxOrcDecoder; +template class PaxOrcDecoder; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.h new file mode 100644 index 00000000000..80e7261e50f --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_decoding.h @@ -0,0 +1,65 @@ +#pragma once + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/columns/pax_decoding.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/pax_buffer.h" + +namespace pax { + +template +class PaxOrcDecoder final : public PaxDecoder { + public: + explicit PaxOrcDecoder(const PaxDecoder::DecodingOption &encoder_options); + + ~PaxOrcDecoder() override; + + PaxDecoder *SetSrcBuffer(char *data, size_t data_len) override; + + PaxDecoder *SetDataBuffer(DataBuffer *result_buffer) override; + + const char *GetBuffer() const override; + + size_t GetBufferSize() const override; + + size_t Next(const char *not_null) override; + + size_t Decoding() override; + + size_t Decoding(const char *not_null, size_t not_null_len) override; + + private: + uint64 NextShortRepeats(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, const char *not_null); + uint64 NextDirect(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, const char *not_null); + uint64 NextPatched(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, const char *not_null); + uint64 NextDelta(TreatedDataBuffer *data_buffer, T *data, + uint64 offset, const char *not_null); + + private: + TreatedDataBuffer *data_buffer_; + // Used to fill null field + DataBuffer *copy_data_buffer_; + // Used by PATCHED_BASE + DataBuffer *unpacked_data_; + // result buffer + DataBuffer *result_buffer_; +}; + +extern template class PaxOrcDecoder; +extern template class PaxOrcDecoder; +extern template class PaxOrcDecoder; +extern template class PaxOrcDecoder; + +#ifdef RUN_GTEST +template +void ReadLongs(TreatedDataBuffer *data_buffer, T *data, uint64 offset, + uint64 len, uint64 fbs, uint32 *bits_left); +#endif + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.cc b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.cc new file mode 100644 index 00000000000..1dc7da364ae --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.cc @@ -0,0 +1,1004 @@ +#include "storage/columns/pax_rlev2_encoding.h" + +#include +#include +#include +#include + +#include "comm/cbdb_wrappers.h" + +namespace pax { + +#ifndef RUN_GTEST +void WriteLongs(DataBuffer *data_buffer, const int64 *input, + uint32 offset, size_t len, uint32 bits); +#endif +void WriteUnsignedLong(DataBuffer *data_buffer, int64 val); +void WriteSignedLong(DataBuffer *data_buffer, int64 val); + +void WriteUnsignedLong(DataBuffer *data_buffer, int64 val) { + while (true) { + if ((val & ~0x7f) == 0) { + data_buffer->Write(val); + data_buffer->Brush(1); + return; + } else { + data_buffer->Write(static_cast(0x80 | (val & 0x7f))); + data_buffer->Brush(1); + // cast val to unsigned so as to force 0-fill right shift + val = (static_cast(val) >> 7); + } + } +} + +void WriteSignedLong(DataBuffer *data_buffer, int64 val) { + WriteUnsignedLong(data_buffer, ZigZag(val)); +} + +void WriteLongs(DataBuffer *data_buffer, const int64 *const input, + uint32 offset, size_t len, uint32 bits) { + if (input == nullptr || len < 1 || bits < 1) { + return; + } + + if (GetClosestAlignedBits(bits) == bits) { + uint32 num_of_bytes; + auto end_offset = static_cast(offset + len); + if (bits < 8) { + char bit_mask = static_cast((1 << bits) - 1); + uint32 num_hops = 8 / bits; + auto remainder = static_cast(len % num_hops); + uint32 end_unroll = end_offset - remainder; + for (uint32 i = offset; i < end_unroll; i += num_hops) { + char to_write = 0; + for (uint32 j = 0; j < num_hops; ++j) { + to_write |= static_cast((input[i + j] & bit_mask) + << (8 - (j + 1) * bits)); + } + data_buffer->Write(to_write); + data_buffer->Brush(1); + } + + if (remainder > 0) { + uint32 shift = 8 - bits; + char to_write = 0; + for (uint32 i = end_unroll; i < end_offset; ++i) { + to_write |= static_cast((input[i] & bit_mask) << shift); + shift -= bits; + } + data_buffer->Write(to_write); + data_buffer->Brush(1); + } + + } else { + num_of_bytes = bits / 8; + for (uint32 i = offset; i < end_offset; ++i) { + for (uint32 j = 0; j < num_of_bytes; ++j) { + char to_write = static_cast( + (input[i] >> (8 * (num_of_bytes - j - 1))) & 255); + data_buffer->Write(to_write); + data_buffer->Brush(1); + } + } + } + + return; + } + + // write for unaligned bit size + uint32 bits_left = 8; + char current = 0; + for (uint32 i = offset; i < (offset + len); i++) { + int64 value = input[i]; + uint32 bits_to_write = bits; + while (bits_to_write > bits_left) { + // add the bits to the bottom of the current word + current |= static_cast(value >> (bits_to_write - bits_left)); + // subtract out the bits we just added + bits_to_write -= bits_left; + // zero out the bits above bits_to_write + value &= (static_cast(1) << bits_to_write) - 1; + data_buffer->Write(current); + data_buffer->Brush(1); + current = 0; + bits_left = 8; + } + bits_left -= bits_to_write; + current |= static_cast(value << bits_left); + if (bits_left == 0) { + data_buffer->Write(current); + data_buffer->Brush(1); + current = 0; + bits_left = 8; + } + } + + // flush + if (bits_left != 8) { + data_buffer->Write(current); + data_buffer->Brush(1); + } +} + +struct PaxOrcEncoder::EncoderContext::DeltaContext { + int64 adj_deltas[ORC_MAX_LITERAL_SIZE]; + int64 adj_deltas_idx; + bool is_fixed_delta; + int64 fixed_delta_val; + uint32 bits_delta_max; +}; + +struct PaxOrcEncoder::EncoderContext::DirectContext { + uint32 zigzag_bits_100_p; + bool zz_bits_100_p_inited; +}; + +struct PaxOrcEncoder::EncoderContext::PatchBaseContext { + int32 histogram[ORC_HIST_LEN]; + size_t histogram_len; + + int64 base_patch_buffer[ORC_MAX_LITERAL_SIZE]; + int64 base_patch_buffer_count; + + int64 gap_sign_patch_list[ORC_MAX_LITERAL_SIZE]; + int64 gap_sign_patch_list_count; + + uint32 patch_width; + uint32 patch_gap_width; + uint32 patch_len; + uint32 hist_bits_95_p; + uint32 hist_bits_100_p; + int64 min; + int64 max; +}; + +PaxOrcEncoder::EncoderContext::EncoderContext() + : is_sign(true), fixed_len(0), var_len(0), prev_delta(0), current_delta(0) { + internal_buffer_ = reinterpret_cast( + cbdb::Palloc0(sizeof(struct DeltaContext) + sizeof(struct DirectContext) + + sizeof(struct PatchBaseContext))); + + delta_ctx = reinterpret_cast(internal_buffer_); + direct_ctx = reinterpret_cast( + internal_buffer_ + sizeof(struct DeltaContext)); + pb_ctx = reinterpret_cast( + internal_buffer_ + sizeof(struct DeltaContext) + + sizeof(struct DirectContext)); +} + +PaxOrcEncoder::EncoderContext::~EncoderContext() { + cbdb::Pfree(internal_buffer_); +} + +void PaxOrcEncoder::EncoderContext::ResetDirectCtx() const { + direct_ctx->zz_bits_100_p_inited = false; +} + +void PaxOrcEncoder::EncoderContext::ResetDeltaCtx() const { + delta_ctx->adj_deltas_idx = 0; + delta_ctx->is_fixed_delta = false; + delta_ctx->fixed_delta_val = 0; + delta_ctx->bits_delta_max = 0; +} + +void PaxOrcEncoder::EncoderContext::ResetPbCtx() const { + pb_ctx->histogram_len = 0; + pb_ctx->base_patch_buffer_count = 0; + pb_ctx->gap_sign_patch_list_count = 0; + + pb_ctx->patch_width = 0; + pb_ctx->patch_gap_width = 0; + pb_ctx->patch_len = 0; + pb_ctx->hist_bits_95_p = 0; + pb_ctx->hist_bits_100_p = 0; + pb_ctx->min = 0; + pb_ctx->max = 0; +} + +PaxOrcEncoder::PaxOrcEncoder(const EncodingOption &encoder_options) + : PaxEncoder(encoder_options), + data_buffer_(new UntreatedDataBuffer(1024)), + zigzag_buffer_(new DataBuffer(128)), + status_(EncoderStatus::kInit) { + encoder_context_.is_sign = encoder_options_.is_sign; +} + +PaxOrcEncoder::~PaxOrcEncoder() { + delete data_buffer_; + delete zigzag_buffer_; +} + +void PaxOrcEncoder::Append(const int64 data) { AppendInternal(data, false); } + +void PaxOrcEncoder::Flush() { AppendInternal(0, true); } + +void PaxOrcEncoder::AppendData(const int64 data) { + if (data_buffer_->Available() < sizeof(int64)) { + data_buffer_->ReSize(data_buffer_->Capacity() * 2); + } + + data_buffer_->Write(data); + + data_buffer_->Brush(sizeof(int64)); + data_buffer_->BrushUnTreated(sizeof(int64)); +} + +void PaxOrcEncoder::SwitchStatusTo(EncoderStatus new_status) { + status_ = new_status; +} + +void PaxOrcEncoder::AppendInternal(const int64 data, bool is_flush) { + bool already_append_before_delta_changed = false; + bool keep_push_status = true; + + if (is_flush) { + SwitchStatusTo(kFlush); + } + + while (keep_push_status) { + keep_push_status = false; + + switch (status_) { + case EncoderStatus::kInvalid: { + Assert(false); + break; + } + case EncoderStatus::kInit: { + AppendData(data); + encoder_context_.fixed_len = 1; + encoder_context_.var_len = 1; + SwitchStatusTo(kTwoElements); + break; + } + case EncoderStatus::kTwoElements: { + encoder_context_.prev_delta = + data - ((*data_buffer_)[data_buffer_->GetSize() - 1]); + + AppendData(data); + + if (encoder_context_.prev_delta == 0) { + encoder_context_.fixed_len = 2; + encoder_context_.var_len = 0; + } else { + encoder_context_.fixed_len = 0; + encoder_context_.var_len = 2; + } + + SwitchStatusTo(kUntreated); + break; + } + case kUntreated: { + if (data_buffer_->UnTreated() == 0) { + keep_push_status = true; + SwitchStatusTo(kInit); + break; + } + + encoder_context_.current_delta = + data - ((*data_buffer_)[data_buffer_->GetSize() - 1]); + + // Temporary data is duplicated(minimum of 3 repetitions, eq. + // ORC_MIN_REPEAT) But it is not confirmed whether it is already + // duplicated at the beginning + if (encoder_context_.current_delta == encoder_context_.prev_delta && + encoder_context_.current_delta == 0) { + AppendData(data); + if (encoder_context_.var_len > 0) { + // If variable run is non-zero then we are seeing repeating + // values at the end of variable run in which case fixed Run + // length is 2 + encoder_context_.fixed_len = 2; + } + encoder_context_.fixed_len++; + + if (encoder_context_.fixed_len >= ORC_MIN_REPEAT && + encoder_context_.var_len > 0) { + // Ok, got at lease 3 repetitions + // Encode and flush the non-repeating data + keep_push_status = true; + SwitchStatusTo(kTreatPrevBuffer); + break; + } + + // The data becomes repeated at the beginning + if (encoder_context_.fixed_len == ORC_MAX_LITERAL_SIZE) { + encoder_context_.delta_ctx->is_fixed_delta = true; + + keep_push_status = true; + SwitchStatusTo(kTreatDelta); + break; + } + + /// no need switch to next type + break; + } + + // Below is encoder_context_.current_delta != + // encoder_context_.prev_delta != 0 That can happen in two ways: + // 1. Repeating consecutive sequences become non-repeating + // 2. Temporary data is not repeated at the beginning + + if (encoder_context_.fixed_len >= ORC_MIN_REPEAT) { + keep_push_status = true; + SwitchStatusTo(kUntreatedDiscontinuous); + break; + } + + // Repetitions data numbers can less than 3 here + // ex. 0 0 1, then current_delta != 0. and data will transfer to + // non-repeating data + if (encoder_context_.fixed_len > 0 && + encoder_context_.fixed_len < ORC_MIN_REPEAT && + encoder_context_.current_delta != 0) { + encoder_context_.var_len = encoder_context_.fixed_len; + encoder_context_.fixed_len = 0; + } + + // No data remain + if (data_buffer_->UnTreated() == 0) { + keep_push_status = true; + SwitchStatusTo(kInit); + break; + } + + // non-repeating data + encoder_context_.prev_delta = encoder_context_.current_delta; + AppendData(data); + encoder_context_.var_len++; + + if (encoder_context_.var_len == ORC_MAX_LITERAL_SIZE) { + keep_push_status = true; + SwitchStatusTo(kDetermineFlushPrevBuffer); + break; + } + + /// no need switch to next type + break; + } + case EncoderStatus::kUntreatedDiscontinuous: { + already_append_before_delta_changed = true; + keep_push_status = true; + + if (encoder_context_.fixed_len <= ORC_MAX_SHORT_REPEAT_LENGTH) { + SwitchStatusTo(kTreatShortRepeat); + } else { + encoder_context_.delta_ctx->is_fixed_delta = true; + SwitchStatusTo(kTreatDelta); + } + break; + } + case EncoderStatus::kTreatPrevBuffer: { + Assert(data_buffer_->UnTreated()); + Assert(data_buffer_->UnTouched() == 0); + + // fixed_len must equal to ORC_MIN_REPEAT + // the status kTreatPrevBuffer will only occur when + // non-repeating elements change to ORC_MIN_REPEAT repeating elements + // means there must be non-repeating elements before repeating elements + Assert(encoder_context_.fixed_len == ORC_MIN_REPEAT); + + // will shift tail fixed in kTreatDone + data_buffer_->BrushBackUnTreated(sizeof(int64) * ORC_MIN_REPEAT); + encoder_context_.var_len -= (ORC_MIN_REPEAT - 1); + + keep_push_status = true; + SwitchStatusTo(kDetermineFlushPrevBuffer); + break; + } + case EncoderStatus::kFlush: + if (data_buffer_->Used() == 0) { + keep_push_status = true; + SwitchStatusTo(kFinish); + break; + } + + Assert(data_buffer_->UnTreated() == data_buffer_->Used()); + + // must check delta & short repeat can treat or not. + if (encoder_context_.fixed_len != 0) { + if (encoder_context_.fixed_len < ORC_MIN_REPEAT) { + encoder_context_.var_len = encoder_context_.fixed_len; + encoder_context_.fixed_len = 0; + } else if ( // encoder_context_.fixed_len >= ORC_MIN_REPEAT && + encoder_context_.fixed_len <= ORC_MAX_SHORT_REPEAT_LENGTH) { + keep_push_status = true; + SwitchStatusTo(kTreatShortRepeat); + break; + } else { // encoder_context_.fixed_len > ORC_MAX_SHORT_REPEAT_LENGTH + keep_push_status = true; + encoder_context_.delta_ctx->is_fixed_delta = true; + SwitchStatusTo(kTreatDelta); + break; + } + } + + keep_push_status = true; + SwitchStatusTo(kDetermineFlushPrevBuffer); + break; + case EncoderStatus::kDetermineFlushPrevBuffer: { + size_t data_lens = data_buffer_->UnTreated() / sizeof(int64); + + if (data_lens <= ORC_MIN_REPEAT) { + keep_push_status = true; + SwitchStatusTo(kTreatDirect); + break; + } + + bool increasing = true; + bool decreasing = true; + auto delta_ctx = encoder_context_.delta_ctx; + auto direct_ctx = encoder_context_.direct_ctx; + auto pb_ctx = encoder_context_.pb_ctx; + int64 init_delta_val = (*data_buffer_)[1] - (*data_buffer_)[0]; + int64 curr_delta = 0; + int64 max_delta = 0; + uint32 zigzag_bits_90_p = 0; + + delta_ctx->is_fixed_delta = true; + pb_ctx->min = (*data_buffer_)[0]; + pb_ctx->max = (*data_buffer_)[0]; + + delta_ctx->adj_deltas[delta_ctx->adj_deltas_idx++] = init_delta_val; + + for (size_t i = 1; i < data_lens; i++) { + const int64 l1 = (*data_buffer_)[i]; + const int64 l0 = (*data_buffer_)[i - 1]; + curr_delta = l1 - l0; + pb_ctx->min = std::min(pb_ctx->min, l1); + pb_ctx->max = std::max(pb_ctx->max, l1); + + increasing &= (l0 <= l1); + decreasing &= (l0 >= l1); + + delta_ctx->is_fixed_delta &= (curr_delta == init_delta_val); + if (i > 1) { + delta_ctx->adj_deltas[delta_ctx->adj_deltas_idx++] = + std::abs(curr_delta); + max_delta = std::max(max_delta, delta_ctx->adj_deltas[i - 1]); + } + } + + // it's faster to exit under delta overflow condition without checking + // for PATCHED_BASE condition as encoding using DIRECT is faster and has + // less overhead than PATCHED_BASE + auto is_safe_subtract = [](int64 left, int64 right) -> bool { + return ((left ^ right) >= 0) || ((left ^ (left - right)) >= 0); + }; + + if (!is_safe_subtract(pb_ctx->max, pb_ctx->min)) { + keep_push_status = true; + SwitchStatusTo(kTreatDirect); + break; + } + + // invariant - subtracting any number from any other in the literals + // after option point won't overflow + + // if min is equal to max then the delta is 0, option condition happens + // for fixed values run >10 which cannot be encoded with SHORT_REPEAT + if (pb_ctx->min == pb_ctx->max) { + Assert(delta_ctx->is_fixed_delta); + Assert(!curr_delta); + + delta_ctx->fixed_delta_val = 0; + + keep_push_status = true; + SwitchStatusTo(kTreatDelta); + break; + } + + if (delta_ctx->is_fixed_delta) { + Assert(curr_delta == init_delta_val); + delta_ctx->fixed_delta_val = curr_delta; + + keep_push_status = true; + SwitchStatusTo(kTreatDelta); + break; + } + + if (init_delta_val != 0) { + delta_ctx->bits_delta_max = FindClosestBits(max_delta); + + // monotonic condition + if (increasing || decreasing) { + keep_push_status = true; + SwitchStatusTo(kTreatDelta); + break; + } + } + + // Without flush as delta then reset delta ctx + encoder_context_.ResetDeltaCtx(); + + pb_ctx->histogram_len = data_lens; + if (encoder_context_.is_sign) { + zigzag_buffer_->BrushBackAll(); + if (zigzag_buffer_->Capacity() < data_lens * sizeof(int64)) { + zigzag_buffer_->ReSize(data_lens * sizeof(int64)); + } + ZigZagBuffers(data_buffer_->StartT(), zigzag_buffer_->StartT(), + data_lens); + zigzag_buffer_->Brush(data_lens * sizeof(int64)); + } + + // PATCHED_BASE encoding check + + // percentile values are computed for the zigzag encoded values. if the + // number of bit requirement between 90th and 100th percentile varies + // beyond a threshold then we need to patch the values. if the variation + // is not significant then we can use direct encoding + + BuildHistogram(pb_ctx->histogram, + encoder_context_.is_sign ? zigzag_buffer_->StartT() + : data_buffer_->StartT(), + data_lens); + + direct_ctx->zz_bits_100_p_inited = true; + direct_ctx->zigzag_bits_100_p = + GetPercentileBits(pb_ctx->histogram, pb_ctx->histogram_len, 1.0); + zigzag_bits_90_p = + GetPercentileBits(pb_ctx->histogram, pb_ctx->histogram_len, 0.9); + + // if the difference between 90th percentile and 100th percentile fixed + // bits is > 1 then we need patch the values + if (direct_ctx->zigzag_bits_100_p - zigzag_bits_90_p > 1) { + for (size_t i = 0; i < data_lens; i++) { + pb_ctx->base_patch_buffer[pb_ctx->base_patch_buffer_count++] = + ((*data_buffer_)[i] - pb_ctx->min); + } + + pb_ctx->histogram_len = data_lens; + // rebuild histogram with literals[*] - min + BuildHistogram(pb_ctx->histogram, pb_ctx->base_patch_buffer, + data_lens); + + // 95th percentile width is used to determine max allowed value + // after which patching will be done + pb_ctx->hist_bits_95_p = + GetPercentileBits(pb_ctx->histogram, pb_ctx->histogram_len, 0.95); + + // 100th percentile is used to compute the max patch width + pb_ctx->hist_bits_100_p = + GetPercentileBits(pb_ctx->histogram, pb_ctx->histogram_len, 1.0); + + // after base reducing the values, if the difference in bits between + // 95th percentile and 100th percentile value is zero then there + // is no point in patching the values, in which case we will + // fallback to DIRECT encoding. + // The decision to use patched base was based on zigzag values, but + // the actual patching is done on base reduced literals. + if ((pb_ctx->hist_bits_100_p - pb_ctx->hist_bits_95_p) != 0) { + keep_push_status = true; + SwitchStatusTo(kTreatPatchedBase); + break; + } + } + + encoder_context_.ResetPbCtx(); + + keep_push_status = true; + SwitchStatusTo(kTreatDirect); + break; + } + case EncoderStatus::kTreatShortRepeat: { + TreatShortRepeat(); + encoder_context_.fixed_len = 0; + + keep_push_status = true; + SwitchStatusTo(kTreatDone); + break; + } + case EncoderStatus::kTreatDirect: { + TreatDirect(); + encoder_context_.var_len = 0; + + keep_push_status = true; + SwitchStatusTo(kTreatDone); + break; + } + case EncoderStatus::kTreatPatchedBase: { + TreatPatchedBase(); + encoder_context_.var_len = 0; + encoder_context_.ResetPbCtx(); + keep_push_status = true; + SwitchStatusTo(kTreatDone); + break; + } + case EncoderStatus::kTreatDelta: { + bool reset_fix = TreatDelta(); + if (reset_fix) { + encoder_context_.fixed_len = 0; + } else { + encoder_context_.var_len = 0; + } + encoder_context_.ResetDeltaCtx(); + keep_push_status = true; + SwitchStatusTo(kTreatDone); + + break; + } + case EncoderStatus::kTreatDone: { + Assert(data_buffer_->UnTreated() != 0); + + // left shift + data_buffer_->TreatedAll(); + data_buffer_->BrushUnTreatedAll(); + + if (is_flush) { + keep_push_status = true; + SwitchStatusTo( + (encoder_context_.fixed_len == 0 && encoder_context_.var_len == 0) + ? kFinish + : kFlush); + } else { + keep_push_status = already_append_before_delta_changed; + SwitchStatusTo(kUntreated); + already_append_before_delta_changed = false; + } + + break; + } + case EncoderStatus::kFinish: + keep_push_status = false; + SwitchStatusTo(kInvalid); + break; + default: + break; + } + } +} + +void PaxOrcEncoder::TreatShortRepeat() { + int64 repeat_val = encoder_context_.is_sign ? ZigZag((*data_buffer_)[0]) + : (*data_buffer_)[0]; + const uint32 num_of_bits = FindClosestBits(repeat_val); + const uint32 num_of_bytes = + num_of_bits % 8 == 0 ? (num_of_bits >> 3) : ((num_of_bits >> 3) + 1); + + auto header = (uint8)(EncodingType::kShortRepeat << 6); + + header |= encoder_context_.fixed_len - ORC_MIN_REPEAT; + header |= ((num_of_bytes - 1) << 3); + + while (result_buffer_->Available() < (1) + num_of_bytes) { + result_buffer_->ReSize(result_buffer_->Capacity() * 2); + } + + result_buffer_->Write(header); + result_buffer_->Brush(1); + for (auto i = static_cast(num_of_bytes - 1); i >= 0; i--) { + int64 b = ((repeat_val >> (i * 8)) & 0xff); + result_buffer_->Write(static_cast(b)); + result_buffer_->Brush(1); + } +} + +void PaxOrcEncoder::TreatDirect() { + size_t data_lens = data_buffer_->UnTreated() / sizeof(int64); + auto direct_ctx = encoder_context_.direct_ctx; + int64 *data_write = nullptr; + + if (!direct_ctx->zz_bits_100_p_inited) { + auto pb_ctx = encoder_context_.pb_ctx; + if (encoder_context_.is_sign) { + ZigZagBuffers(data_buffer_->StartT(), data_buffer_->StartT(), data_lens); + } + // need to borrow patched base ctx to get the max zigzag bits + pb_ctx->histogram_len = data_lens; + BuildHistogram(pb_ctx->histogram, data_buffer_->StartT(), data_lens); + direct_ctx->zigzag_bits_100_p = + GetPercentileBits(pb_ctx->histogram, pb_ctx->histogram_len, 1.0); + encoder_context_.ResetPbCtx(); + data_write = data_buffer_->StartT(); + } else if (encoder_context_.is_sign) { + Assert(zigzag_buffer_->Used() == data_buffer_->UnTreated()); + data_write = zigzag_buffer_->StartT(); + } else { // inited hist and not sign + data_write = data_buffer_->StartT(); + } + + uint32 fb = GetClosestAlignedBits(direct_ctx->zigzag_bits_100_p); + + const uint32 efb = EncodeBits(fb) << 1; + const uint32 tail_bits = ((encoder_context_.var_len - 1) & 0x100) >> 8; + const char first_byte = + static_cast((EncodingType::kDirect << 6) | efb | tail_bits); + const char second_byte = + static_cast((encoder_context_.var_len - 1) & 0xff); + + // worst case: will write (3 bytes header + untreated buffer) + while (result_buffer_->Available() < + static_cast((3) + data_buffer_->UnTreated())) { + result_buffer_->ReSize(result_buffer_->Capacity() * 2); + } + + result_buffer_->Write(first_byte); + result_buffer_->Brush(1); + result_buffer_->Write(second_byte); + result_buffer_->Brush(1); + + WriteLongs(result_buffer_, data_write, 0, data_lens, fb); + + encoder_context_.ResetDirectCtx(); +} + +void PaxOrcEncoder::PreparePatchedBlob() { + size_t data_lens = data_buffer_->UnTreated() / sizeof(int64); + auto pb_ctx = encoder_context_.pb_ctx; + + // mask will be max value beyond which patch will be generated + int64 mask = + static_cast(static_cast(1) << pb_ctx->hist_bits_95_p) - 1; + + // since we are considering only 95 percentile, the size of gap and + // patch array can contain only be 5% values + pb_ctx->patch_len = static_cast(std::ceil((data_lens / 20))); + + // #bit for patch + pb_ctx->patch_width = pb_ctx->hist_bits_100_p - pb_ctx->hist_bits_95_p; + pb_ctx->patch_width = GetClosestBits(pb_ctx->patch_width); + + // if patch bit requirement is 64 then it will not possible to pack + // gap and patch together in a long. To make sure gap and patch can be + // packed together adjust the patch width + if (pb_ctx->patch_width == 64) { + pb_ctx->patch_width = 56; + pb_ctx->hist_bits_95_p = 8; + mask = + static_cast(static_cast(1) << pb_ctx->hist_bits_95_p) - + 1; + } + + uint32 gap_idx = 0; + uint32 patch_idx = 0; + size_t prev = 0; + size_t max_gap = 0; + + std::vector gap_list; + std::vector patch_list; + + for (size_t i = 0; i < data_lens; i++) { + // if value is above mask then create the patch and record the gap + if (pb_ctx->base_patch_buffer[i] > mask) { + size_t gap = i - prev; + if (gap > max_gap) { + max_gap = gap; + } + + // gaps are relative, so store the previous patched value index + prev = i; + gap_list.push_back(static_cast(gap)); + gap_idx++; + + // extract the most significant bits that are over mask bits + int64 patch = pb_ctx->base_patch_buffer[i] >> pb_ctx->hist_bits_95_p; + patch_list.push_back(patch); + patch_idx++; + + // strip off the MSB to enable safe bit packing + pb_ctx->base_patch_buffer[i] &= mask; + } + } + + // adjust the patch length to number of entries in gap list + pb_ctx->patch_len = gap_idx; + + // if the element to be patched is the first and only element then + // max gap will be 0, but to store the gap as 0 we need atleast 1 bit + if (max_gap == 0 && pb_ctx->patch_len != 0) { + pb_ctx->patch_gap_width = 1; + } else { + pb_ctx->patch_gap_width = FindClosestBits(static_cast(max_gap)); + } + + // special case: if the patch gap width is greater than 256, then + // we need 9 bits to encode the gap width. But we only have 3 bits in + // header to record the gap width. To deal with this case, we will save + // two entries in patch list in the following way + // 256 gap width => 0 for patch value + // actual gap - 256 => actual patch value + // We will do the same for gap width = 511. If the element to be patched is + // the last element in the scope then gap width will be 511. In this case we + // will have 3 entries in the patch list in the following way + // 255 gap width => 0 for patch value + // 255 gap width => 0 for patch value + // 1 gap width => actual patch value + if (pb_ctx->patch_gap_width > 8) { + pb_ctx->patch_gap_width = 8; + // for gap = 511, we need two additional entries in patch list + if (max_gap == 511) { + pb_ctx->patch_len += 2; + } else { + pb_ctx->patch_len += 1; + } + } + + // create gap vs patch list + gap_idx = 0; + patch_idx = 0; + for (size_t i = 0; i < pb_ctx->patch_len; i++) { + int64 g = gap_list[gap_idx++]; + int64 p = patch_list[patch_idx++]; + while (g > 255) { + pb_ctx->gap_sign_patch_list[pb_ctx->gap_sign_patch_list_count++] = + (255L << pb_ctx->patch_width); + i++; + g -= 255; + } + + // store patch value in LSBs and gap in MSBs + pb_ctx->gap_sign_patch_list[pb_ctx->gap_sign_patch_list_count++] = + ((g << pb_ctx->patch_width) | p); + } +} + +void PaxOrcEncoder::TreatPatchedBase() { + // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding + // because patch is applied to MSB bits. For example: If fixed bit width of + // base value is 7 bits and if patch is 3 bits, the actual value is + // constructed by shifting the patch to left by 7 positions. + // actual_value = patch << 7 | base_value + // So, if we align base_value then actual_value can not be reconstructed. + size_t data_lens = data_buffer_->UnTreated() / sizeof(int64); + auto pb_ctx = encoder_context_.pb_ctx; + + PreparePatchedBlob(); + + // write the number of fixed bits required in next 5 bits + const uint32 efb = EncodeBits(pb_ctx->hist_bits_95_p) << 1; + // adjust variable run length, they are one off + uint16 var_len = encoder_context_.var_len - 1; + + // extract the 9th bit of run length + const uint32 tail_bits = (var_len & 0x100) >> 8; + + // create first byte of the header + const char first_byte = + static_cast((EncodingType::kPatchedBase << 6) | efb | tail_bits); + + // second byte of the header stores the remaining 8 bits of runlength + const char second_byte = static_cast(var_len & 0xff); + + // if the min value is negative toggle the sign + const bool neg = (pb_ctx->min < 0); + if (neg) { + pb_ctx->min = -pb_ctx->min; + } + + // find the number of bytes required for base and shift it by 5 bits + // to accommodate patch width. The additional bit is used to store the sign + // of the base value. + const uint32 base_width = FindClosestBits(pb_ctx->min) + 1; + const uint32 base_bytes = + base_width % 8 == 0 ? base_width / 8 : (base_width / 8) + 1; + const uint32 bb = (base_bytes - 1) << 5; + + // if the base value is negative then set MSB to 1 + if (neg) { + pb_ctx->min |= (1LL << ((base_bytes * 8) - 1)); + } + + // third byte contains 3 bits for number of bytes occupied by base + // and 5 bits for patch_width + const char third_byte = + static_cast(bb | EncodeBits(pb_ctx->patch_width)); + + // fourth byte contains 3 bits for page gap width and 5 bits for + // patch length + const char fourth_byte = + static_cast((pb_ctx->patch_gap_width - 1) << 5 | pb_ctx->patch_len); + + // In fact, Worst case will not write more than header(4) + UnTreated() + // But the bytes of writes is not easy to estimate + while (result_buffer_->Available() < + static_cast((4) + data_buffer_->UnTreated())) { + result_buffer_->ReSize(result_buffer_->Capacity() * 2); + } + + // write header + result_buffer_->Write(first_byte); + result_buffer_->Brush(1); + result_buffer_->Write(second_byte); + result_buffer_->Brush(1); + result_buffer_->Write(third_byte); + result_buffer_->Brush(1); + result_buffer_->Write(fourth_byte); + result_buffer_->Brush(1); + + // write the base value using fixed bytes in big endian order + for (auto i = static_cast(base_bytes - 1); i >= 0; i--) { + char b = static_cast(((pb_ctx->min >> (i * 8)) & 0xff)); + result_buffer_->Write(b); + // TODO(jiaqizho): brush out loop + result_buffer_->Brush(1); + } + + // base reduced literals are bit packed + uint32 closest_bits = GetClosestBits(pb_ctx->hist_bits_95_p); + + WriteLongs(result_buffer_, pb_ctx->base_patch_buffer, 0, data_lens, + closest_bits); + + // write patch list + closest_bits = GetClosestBits(pb_ctx->patch_gap_width + pb_ctx->patch_width); + + WriteLongs(result_buffer_, pb_ctx->gap_sign_patch_list, 0, pb_ctx->patch_len, + closest_bits); +} + +bool PaxOrcEncoder::TreatDelta() { + bool reset_fix = true; + auto delta_ctx = encoder_context_.delta_ctx; + uint32 len = 0; + uint32 fb = delta_ctx->bits_delta_max; + uint32 efb = 0; + + fb = GetClosestAlignedBits(fb); + size_t data_lens = data_buffer_->UnTreated() / sizeof(int64); + + if (delta_ctx->is_fixed_delta) { + // if fixed run length is greater than threshold then it will be fixed + // delta sequence with delta value 0 else fixed delta sequence with + // non-zero delta value + if (encoder_context_.fixed_len > ORC_MIN_REPEAT) { + // ex. sequence: 2 2 2 2 2 2 2 2 + len = encoder_context_.fixed_len - 1; + reset_fix = true; + } else { + // ex. sequence: 4 6 8 10 12 14 16 + len = encoder_context_.var_len - 1; + reset_fix = false; + } + } else { + if (fb == 1) { + fb = 2; + } + efb = EncodeBits(fb) << 1; + len = encoder_context_.var_len - 1; + reset_fix = false; + } + + const uint32 tail_bits = (len & 0x100) >> 8; + + // create first byte of the header + const char first_byte = + static_cast((EncodingType::kDelta << 6) | efb | tail_bits); + + // second byte of the header stores the remaining 8 bits of runlength + const char second_byte = static_cast(len & 0xff); + + // In fact, Worst case will not write more than header(2) + UnTreated() + // But the bytes of writes is not easy to estimate + while (result_buffer_->Available() < + static_cast((2) + data_buffer_->UnTreated())) { + result_buffer_->ReSize(result_buffer_->Capacity() * 2); + } + + result_buffer_->Write(first_byte); + result_buffer_->Brush(1); + result_buffer_->Write(second_byte); + result_buffer_->Brush(1); + + if (encoder_context_.is_sign) { + WriteSignedLong(result_buffer_, (*data_buffer_)[0]); + } else { + WriteUnsignedLong(result_buffer_, (*data_buffer_)[0]); + } + + if (delta_ctx->is_fixed_delta) { + WriteSignedLong(result_buffer_, delta_ctx->fixed_delta_val); + } else { + // store the first value as delta value using zigzag encoding + WriteSignedLong(result_buffer_, delta_ctx->adj_deltas[0]); + + // adjacent delta values are bit packed. The length of adj_deltas array is + // always one less than the number of literals (delta difference for n + // elements is n-1). We have already written one element, write the + // remaining data_lens - 2 elements here + WriteLongs(result_buffer_, delta_ctx->adj_deltas, 1, data_lens - 2, fb); + } + + return reset_fix; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h new file mode 100644 index 00000000000..c0149c770d9 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/columns/pax_rlev2_encoding.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "storage/columns/pax_encoding.h" +#include "storage/columns/pax_encoding_utils.h" +#include "storage/pax_buffer.h" + +namespace pax { + +class PaxOrcEncoder final : public PaxEncoder { + public: + explicit PaxOrcEncoder(const EncodingOption &encoder_options); + + ~PaxOrcEncoder() override; + + void Append(int64 data) override; + + void Flush() override; + + private: + struct EncoderContext { + bool is_sign; + + // repeat lengths + uint16 fixed_len; + + // non-repeat lengths + uint16 var_len; + + int64 prev_delta; + int64 current_delta; + + EncoderContext(); + ~EncoderContext(); + + struct DeltaContext; + struct DirectContext; + struct PatchBaseContext; + + struct DeltaContext *delta_ctx; + struct DirectContext *direct_ctx; + struct PatchBaseContext *pb_ctx; + + inline void ResetDirectCtx() const; + inline void ResetDeltaCtx() const; + inline void ResetPbCtx() const; + + private: + char *internal_buffer_; + }; + + enum EncoderStatus { + // current encoder have been flushed or no init + kInvalid = 0, + // no elements in buffer, accept the first element + kInit, + // 1 element in buffer, accept the second element + kTwoElements, + // at lease 2 elements in buffer + kUntreated, + // at lease `ORC_MIN_REPEAT` repeating elements change to non-repeating + // elements + kUntreatedDiscontinuous, + // non-repeating elements change to `ORC_MIN_REPEAT` repeating elements + kTreatPrevBuffer, + // flush all buffer + kFlush, + // treat the non-repeating buffer which is before repeating datas + kDetermineFlushPrevBuffer, + // treat the buffer which belongs to the Short-Repeat rule + kTreatShortRepeat, + // treat the buffer which can deal with other types + kTreatDirect, + // treat the buffer which belongs to the Patched-Base rule + kTreatPatchedBase, + // treat the buffer which belongs to the Delta rule + kTreatDelta, + // done with treat or flush buffer + kTreatDone, + // all done, will change to invalid + kFinish, + }; + + private: + void AppendInternal(int64 data, bool is_flush); + + void AppendData(int64 data); + + void SwitchStatusTo(EncoderStatus new_status); + + void TreatShortRepeat(); + + void TreatDirect(); + + void PreparePatchedBlob(); + + void TreatPatchedBase(); + + bool TreatDelta(); + + private: + EncoderContext encoder_context_; + UntreatedDataBuffer *data_buffer_; + DataBuffer *zigzag_buffer_; + EncoderStatus status_; +}; + +#ifdef RUN_GTEST +void WriteLongs(DataBuffer *data_buffer, const int64 *input, + uint32 offset, size_t len, uint32 bits); +#endif + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/file_system.cc b/contrib/pax_storage/src/cpp/storage/file_system.cc new file mode 100644 index 00000000000..5d308a7eca6 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/file_system.cc @@ -0,0 +1,30 @@ +#include "storage/file_system.h" + +#include "exceptions/CException.h" +namespace pax { + +void File::ReadN(void *ptr, size_t n) { + auto num = Read(ptr, n); + CBDB_CHECK(static_cast(num) == n, + cbdb::CException::ExType::kExTypeIOError); +} + +void File::WriteN(const void *ptr, size_t n) { + auto num = Write(ptr, n); + CBDB_CHECK(static_cast(num) == n, + cbdb::CException::ExType::kExTypeIOError); +} + +void File::PReadN(void *buf, size_t count, off_t offset) { + auto num = PRead(buf, count, offset); + CBDB_CHECK(static_cast(num) == count, + cbdb::CException::ExType::kExTypeIOError); +} + +void File::PWriteN(const void *buf, size_t count, off_t offset) { + auto num = PWrite(buf, count, offset); + CBDB_CHECK(static_cast(num) == count, + cbdb::CException::ExType::kExTypeIOError); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/file_system.h b/contrib/pax_storage/src/cpp/storage/file_system.h new file mode 100644 index 00000000000..9040db75580 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/file_system.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + +namespace pax { + +/* + * The IO functions may have error that have two different ways + * to handle errors. In C style, the function returns -1 and set + * the errno. The other style likes Java that any error will throw + * an exception. + * The IO functions provided by postgres will raise an ERROR + * if unexpected behavior happens. + * + * The following IO functions use the same behavior like postgres, + * but we throw an exception in C++ code. + */ +class File { + public: + virtual ~File() = default; + + // The following [P]Read/[P]Write may partially read/write + virtual ssize_t Read(void *ptr, size_t n) = 0; + virtual ssize_t Write(const void *ptr, size_t n) = 0; + virtual ssize_t PWrite(const void *buf, size_t count, off_t offset) = 0; + virtual ssize_t PRead(void *buf, size_t count, off_t offset) = 0; + + // The *N version of Read/Write means that R/W must read/write complete + // number of bytes, or the function should throw an exception. + // These 4 methods have default implementation that simply calls read/write + // and check the returned number of bytes. + virtual void ReadN(void *ptr, size_t n); + virtual void WriteN(const void *ptr, size_t n); + virtual void PWriteN(const void *buf, size_t count, off_t offset); + virtual void PReadN(void *buf, size_t count, off_t offset); + + virtual void Flush() = 0; + virtual void Close() = 0; + virtual size_t FileLength() const = 0; + virtual std::string GetPath() const = 0; +}; + +class FileSystem { + public: + virtual ~FileSystem() = default; + virtual File *Open(const std::string &file_path) = 0; + virtual std::string BuildPath(const File *file) const = 0; + virtual void Delete(const std::string &file_path) const = 0; + virtual std::vector ListDirectory( + const std::string &path) const = 0; + virtual void CopyFile(const std::string &src_file_path, + const std::string &dst_file_path) const = 0; + virtual int CreateDirectory(const std::string &path) const = 0; + virtual void DeleteDirectory(const std::string &path, + bool delete_topleveldir) const = 0; + protected: +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/file_system_test.cc b/contrib/pax_storage/src/cpp/storage/file_system_test.cc new file mode 100644 index 00000000000..592e313cee9 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/file_system_test.cc @@ -0,0 +1,189 @@ +#include + +#include "comm/singleton.h" +#include "storage/local_file_system.h" + +namespace pax::tests { +#define PAX_TEST_CMD_LENGTH 2048 +#define PAX_TEST_LIST_FILE_NUM 10 + +class LocalFileSystemTest : public ::testing::Test { + public: + void SetUp() override { + remove(file_name_.c_str()); + remove(file_path_.c_str()); + } + + void TearDown() override { + remove(file_name_.c_str()); + remove(file_path_.c_str()); + } + + protected: + const std::string file_name_ = "./test.file"; + const std::string file_path_ = "./test_path"; + const std::string file_full_path_ = + "./test_path/16400/GPDB_1_302206171/13261/16394"; +}; + +TEST_F(LocalFileSystemTest, Open) { + auto local_fs = pax::Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + file_ptr->Close(); + delete file_ptr; +} + +TEST_F(LocalFileSystemTest, BuildPath) { + auto local_fs = pax::Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + ASSERT_NE(nullptr, file_ptr); + + auto path = local_fs->BuildPath(file_ptr); + ASSERT_EQ(path, "./test.file"); + + file_ptr->Close(); + delete file_ptr; +} + +TEST_F(LocalFileSystemTest, WriteRead) { + auto local_fs = pax::Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + ASSERT_NE(nullptr, file_ptr); + + auto write_size = file_ptr->Write("abc", 3); + ASSERT_EQ(3, write_size); + + file_ptr->Flush(); + file_ptr->Close(); + file_ptr = local_fs->Open(file_name_); + ASSERT_NE(nullptr, file_ptr); + + char buff[10] = {0}; + auto read_size = file_ptr->Read(buff, 3); + ASSERT_EQ(3, read_size); + ASSERT_EQ(strncmp("abc", buff, 3), 0); +} + +TEST_F(LocalFileSystemTest, ListDirectory) { + FileSystem *fs = pax::Singleton::GetInstance(); + std::vector filelist; + + fs->DeleteDirectory(file_path_, true); + ASSERT_NE(access(file_path_.c_str(), F_OK), 0); + + ASSERT_EQ(0, fs->CreateDirectory(file_path_)); + ASSERT_EQ(access(file_path_.c_str(), F_OK), 0); + + for (int i = 0; i < PAX_TEST_LIST_FILE_NUM; i++) { + std::string path; + path.append(file_path_); + path.append("/test"); + path.append(std::to_string(i)); + File *f = fs->Open(path); + f->Close(); + } + + filelist = fs->ListDirectory(file_path_); + ASSERT_EQ(filelist.size(), PAX_TEST_LIST_FILE_NUM); +} + +TEST_F(LocalFileSystemTest, CopyFile) { + static const char *pax_copy_test_dir = "./copytest"; + static const char *pax_copy_src_path = "./test_src"; + static const char *pax_copy_dst_path = "./copytest/test_dst"; + + int result = 0; + FileSystem *fs = pax::Singleton::GetInstance(); + + fs->DeleteDirectory(pax_copy_test_dir, true); + ASSERT_NE(access(pax_copy_test_dir, F_OK), 0); + + File *f = fs->Open(pax_copy_src_path); + f->Close(); + + cbdb::MakedirRecursive(pax_copy_test_dir); + + InitFileAccess(); + fs->CopyFile(pax_copy_src_path, pax_copy_dst_path); + result = access(pax_copy_dst_path, F_OK); + ASSERT_NE(result, -1); + + fs->DeleteDirectory(pax_copy_test_dir, true); +} + +TEST_F(LocalFileSystemTest, MakedirRecursive) { + int result = 0; + struct stat st {}; + FileSystem *fs = pax::Singleton::GetInstance(); + + fs->DeleteDirectory(file_full_path_, true); + ASSERT_NE(access(file_full_path_.c_str(), F_OK), 0); + + cbdb::MakedirRecursive(file_full_path_.c_str()); + result = stat(file_full_path_.c_str(), &st); + ASSERT_EQ(result, 0); +} + +TEST_F(LocalFileSystemTest, CreateDeleteDirectory) { + FileSystem *fs = pax::Singleton::GetInstance(); + std::vector filelist; + + fs->DeleteDirectory(file_path_, true); + ASSERT_NE(access(file_path_.c_str(), F_OK), 0); + + ASSERT_EQ(0, fs->CreateDirectory(file_path_)); + ASSERT_EQ(access(file_path_.c_str(), F_OK), 0); + + for (int i = 0; i < PAX_TEST_LIST_FILE_NUM; i++) { + std::string path; + path.append(file_path_); + path.append("/test"); + path.append(std::to_string(i)); + File *f = fs->Open(path); + f->Close(); + } + + filelist = fs->ListDirectory(file_path_); + ASSERT_EQ(filelist.size(), PAX_TEST_LIST_FILE_NUM); + + fs->DeleteDirectory(file_path_, true); + ASSERT_NE(access(file_path_.c_str(), F_OK), 0); +} + +TEST_F(LocalFileSystemTest, DeleteDirectoryReserveToplevel) { + FileSystem *fs = pax::Singleton::GetInstance(); + std::vector filelist; + + fs->DeleteDirectory(file_path_, true); + ASSERT_NE(access(file_path_.c_str(), F_OK), 0); + + ASSERT_EQ(0, fs->CreateDirectory(file_path_)); + ASSERT_EQ(access(file_path_.c_str(), F_OK), 0); + + for (int i = 0; i < PAX_TEST_LIST_FILE_NUM; i++) { + std::string path; + path.append(file_path_); + path.append("/test"); + path.append(std::to_string(i)); + File *f = fs->Open(path); + f->Close(); + } + + filelist = fs->ListDirectory(file_path_); + ASSERT_EQ(filelist.size(), PAX_TEST_LIST_FILE_NUM); + + fs->DeleteDirectory(file_path_, false); + ASSERT_EQ(access(file_path_.c_str(), F_OK), 0); + + filelist = fs->ListDirectory(file_path_); + ASSERT_EQ(filelist.size(), 0); +} +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/local_file_system.cc b/contrib/pax_storage/src/cpp/storage/local_file_system.cc new file mode 100644 index 00000000000..d9952a84ab2 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/local_file_system.cc @@ -0,0 +1,149 @@ +#include "storage/local_file_system.h" + +#include +#include +#include + +#include "exceptions/CException.h" + +namespace pax { + +LocalFile::LocalFile(int fd, const std::string &file_path) + : File(), fd_(fd), file_path_(file_path) { + Assert(fd >= 0); +} + +ssize_t LocalFile::Read(void *ptr, size_t n) { + ssize_t num; + + do { + num = read(fd_, ptr, n); + } while (unlikely(num == -1 && errno == EINTR)); + + CBDB_CHECK(num >= 0, cbdb::CException::ExType::kExTypeIOError); + return num; +} + +ssize_t LocalFile::Write(const void *ptr, size_t n) { + ssize_t num; + + do { + num = write(fd_, ptr, n); + } while (unlikely(num == -1 && errno == EINTR)); + + CBDB_CHECK(num >= 0, cbdb::CException::ExType::kExTypeIOError); + return num; +} + +ssize_t LocalFile::PRead(void *ptr, size_t n, off_t offset) { + ssize_t num; + + do { + num = pread(fd_, ptr, n, offset); + } while (unlikely(num == -1 && errno == EINTR)); + + CBDB_CHECK(num >= 0, cbdb::CException::ExType::kExTypeIOError); + return num; +} + +ssize_t LocalFile::PWrite(const void *ptr, size_t n, off_t offset) { + ssize_t num; + + do { + num = pwrite(fd_, ptr, n, offset); + } while (unlikely(num == -1 && errno == EINTR)); + + CBDB_CHECK(num >= 0, cbdb::CException::ExType::kExTypeIOError); + return num; +} + +void LocalFile::Close() { + int rc; + + do { + rc = close(fd_); + } while (unlikely(rc == -1 && errno == EINTR)); + CBDB_CHECK(rc == 0, cbdb::CException::ExType::kExTypeIOError); +} + +size_t LocalFile::FileLength() const { + struct stat file_stat {}; + + CBDB_CHECK(fstat(fd_, &file_stat) == 0, + cbdb::CException::ExType::kExTypeIOError); + return static_cast(file_stat.st_size); +} + +void LocalFile::Flush() { + CBDB_CHECK(fsync(fd_) == 0, cbdb::CException::ExType::kExTypeIOError); +} + +std::string LocalFile::GetPath() const { return file_path_; } + +File *LocalFileSystem::Open(const std::string &file_path) { + LocalFile *local_file; + int fd = open(file_path.c_str(), O_CREAT | O_RDWR, 0644); + + CBDB_CHECK(fd >= 0, cbdb::CException::ExType::kExTypeIOError); + local_file = new LocalFile(fd, file_path); + return local_file; +} + +void LocalFileSystem::Delete(const std::string &file_path) const { + int rc; + + rc = remove(file_path.c_str()); + CBDB_CHECK(rc == 0 || errno == ENOENT, + cbdb::CException::ExType::kExTypeIOError); +} + +std::string LocalFileSystem::BuildPath(const File *file) const { + return file->GetPath(); +} + +std::vector LocalFileSystem::ListDirectory( + const std::string &path) const { + DIR *dir; + std::vector filelist; + const char *filepath = path.c_str(); + + Assert(filepath != NULL && filepath[0] != '\0'); + + dir = opendir(filepath); + CBDB_CHECK(dir, cbdb::CException::ExType::kExTypeFileOperationError); + + try { + struct dirent *direntry; + while ((direntry = readdir(dir)) != NULL) { + char *filename = &direntry->d_name[0]; + // skip to add '.' or '..' direntry for file enumerating under folder on + // linux OS. + if (*filename == '.' && + (!strcmp(filename, ".") || !strcmp(filename, ".."))) + continue; + filelist.push_back(std::string(filename)); + } + } catch (std::exception &ex) { + closedir(dir); + CBDB_RAISE(cbdb::CException::ExType::kExTypeFileOperationError); + } + + return filelist; +} + +void LocalFileSystem::CopyFile(const std::string &src_file_path, + const std::string &dst_file_path) const { + cbdb::CopyFile(src_file_path.c_str(), dst_file_path.c_str()); +} + +int LocalFileSystem::CreateDirectory(const std::string &path) const { + return cbdb::PathNameCreateDir(path.c_str()); +} + +void LocalFileSystem::DeleteDirectory(const std::string &path, + bool delete_topleveldir) const { + cbdb::PathNameDeleteDir(path.c_str(), delete_topleveldir); +} + +} // namespace pax + diff --git a/contrib/pax_storage/src/cpp/storage/local_file_system.h b/contrib/pax_storage/src/cpp/storage/local_file_system.h new file mode 100644 index 00000000000..ff9bb0c9eb9 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/local_file_system.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +#include +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/singleton.h" +#include "storage/file_system.h" + + +namespace pax { +class LocalFile final : public File { + public: + LocalFile(int fd, const std::string &file_path); + + ssize_t Read(void *ptr, size_t n) override; + ssize_t Write(const void *ptr, size_t n) override; + ssize_t PWrite(const void *ptr, size_t n, off_t offset) override; + ssize_t PRead(void *ptr, size_t n, off_t offset) override; + size_t FileLength() const override; + void Close() override; + void Flush() override; + std::string GetPath() const override; + + private: + int fd_; + std::string file_path_; + // TODO(jiaqizho): added resource owner +}; + +class LocalFileSystem final : public FileSystem { + friend class Singleton; + + public: + File *Open(const std::string &file_path) override; + std::string BuildPath(const File *file) const override; + void Delete(const std::string &file_path) const override; + std::vector ListDirectory( + const std::string &path) const override; + void CopyFile(const std::string &src_file_path, + const std::string &dst_file_path) const override; + int CreateDirectory(const std::string &path) const override; + void DeleteDirectory(const std::string &path, + bool delete_topleveldir) const override; + private: + LocalFileSystem() = default; +}; +} // namespace pax + diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition.cc b/contrib/pax_storage/src/cpp/storage/micro_partition.cc new file mode 100644 index 00000000000..227d928a6a6 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition.cc @@ -0,0 +1,51 @@ +#include "storage/micro_partition.h" + +#include + +#include "storage/pax_itemptr.h" + +namespace pax { + +CTupleSlot::CTupleSlot(TupleTableSlot *tuple_slot) + : slot_(tuple_slot), table_no_(0), block_number_(0), offset_(0) {} + +void CTupleSlot::StoreVirtualTuple() { + // TODO(gongxun): set tts_tid, how to get block number from block id + slot_->tts_tid = + PaxItemPointer::GetTupleId(table_no_, block_number_, offset_); + slot_->tts_flags &= ~TTS_FLAG_EMPTY; + slot_->tts_nvalid = slot_->tts_tupleDescriptor->natts; +} + +TupleDesc CTupleSlot::GetTupleDesc() const { + return slot_->tts_tupleDescriptor; +} + +TupleTableSlot *CTupleSlot::GetTupleTableSlot() const { return slot_; } + +MicroPartitionWriter::MicroPartitionWriter(const WriterOptions &writer_options) + : writer_options_(writer_options) {} + +MicroPartitionWriter *MicroPartitionWriter::SetWriteSummaryCallback( + WriteSummaryCallback callback) { + summary_callback_ = callback; + return this; +} + +MicroPartitionWriter *MicroPartitionWriter::SetStatsCollector( + MicroPartitionStats *mpstats) { + Assert(mpstats_ == nullptr); + mpstats_ = mpstats; + return this; +} + +const MicroPartitionWriter::WriterOptions &MicroPartitionWriter::Options() + const { + return writer_options_; +} + +const std::string &MicroPartitionWriter::FileName() const { + return writer_options_.file_name; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition.h b/contrib/pax_storage/src/cpp/storage/micro_partition.h new file mode 100644 index 00000000000..f01139f82f1 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition.h @@ -0,0 +1,155 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include + +#include +#include +#include + +#include "storage/columns/pax_columns.h" +#include "storage/micro_partition_metadata.h" + +namespace pax { +class CTupleSlot { + public: + explicit CTupleSlot(TupleTableSlot *tuple_slot); + + inline void ClearTuple() { slot_->tts_ops->clear(slot_); } + + inline uint32 GetOffset() const { return offset_; } + + inline uint8 GetTableNo() const { return table_no_; } + + inline void SetOffset(uint64 offset) { offset_ = offset; } + + inline void SetBlockNumber(const int &block_number) { + block_number_ = block_number; + } + + inline void SetTableNo(uint8 table_no) { table_no_ = table_no; } + + void StoreVirtualTuple(); + + TupleDesc GetTupleDesc() const; + + TupleTableSlot *GetTupleTableSlot() const; + + private: + TupleTableSlot *slot_; + uint8 table_no_; + int block_number_; + uint32 offset_; +}; + +struct WriteSummary; +class FileSystem; +class MicroPartitionStats; +class PaxFilter; + +class MicroPartitionWriter { + public: + struct WriterOptions { + std::string file_name; + std::string block_id; + TupleDesc desc; + Oid rel_oid; + }; + + explicit MicroPartitionWriter(const WriterOptions &writer_options); + + virtual ~MicroPartitionWriter() = default; + + // close the current write file. Create may be called after Close + // to write a new micro partition. + virtual void Close() = 0; + + // immediately, flush memory data into file system + virtual void Flush() = 0; + + // estimated size of the writing size, used to determine + // whether to switch to another micro partition. + virtual size_t PhysicalSize() const = 0; + + // append tuple to the current micro partition file + // return the number of tuples the current micro partition has written + virtual void WriteTuple(CTupleSlot *slot) = 0; + virtual void WriteTupleN(CTupleSlot **slot, size_t n) = 0; + + using WriteSummaryCallback = std::function; + + // summary callback is invoked after the file is closed. + // returns MicroPartitionWriter to enable chain call. + virtual MicroPartitionWriter *SetWriteSummaryCallback( + WriteSummaryCallback callback); + + virtual MicroPartitionWriter *SetStatsCollector(MicroPartitionStats *mpstats); + + const WriterOptions &Options() const; + + // return the file name of the current micro partition, excluding its + // directory path + const std::string &FileName() const; + + protected: + WriteSummaryCallback summary_callback_; + const WriterOptions &writer_options_; + FileSystem *file_system_ = nullptr; + // only reference the mpstats, not the owner + MicroPartitionStats *mpstats_ = nullptr; +}; + +template +class DataBuffer; +class MicroPartitionReader { + public: + struct ReaderOptions { + // file name(excluding directory path) for read + std::string file_name; + // additioinal info to initialize a reader. + std::string block_id; + + // Optional, when reused buffer is not set, new memory will be generated for + // ReadTuple + DataBuffer *reused_buffer = nullptr; + + PaxFilter *filter = nullptr; + }; + + MicroPartitionReader() = default; + + virtual ~MicroPartitionReader() = default; + + virtual void Open(const ReaderOptions &options) = 0; + + // Close the current reader. It may be re-Open. + virtual void Close() = 0; + + // read tuple from the micro partition with a filter. + // the default behavior doesn't push the predicate down to + // the low-level storage code. + // returns the offset of the tuple in the micro partition + // NOTE: the ctid is stored in slot, mapping from block_id to micro partition + // is also created during this stage, no matter the map relation is needed or + // not. We may optimize to avoid creating the map relation later. + virtual bool ReadTuple(CTupleSlot *slot) = 0; + + protected: + // Allow different MicroPartitionReader shared columns + // but should not let export columns out of micro partition + // + // In MicroPartition writer/reader implementation, all in-memory data should + // be accessed by pax column This is because most of the common logic of + // column operation is done in pax column, such as type mapping, bitwise + // fetch, compression/encoding. At the same time, pax column can also be used + // as a general interface for internal using, because it's zero copy from + // buffer. more details in `storage/columns` + virtual PaxColumns *GetAllColumns() = 0; +#ifdef VEC_BUILD + private: + friend class PaxVecReader; +#endif +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.cc b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.cc new file mode 100644 index 00000000000..7d14e774214 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.cc @@ -0,0 +1,34 @@ +#include "storage/micro_partition_file_factory.h" + +#include "storage/orc/orc.h" + +namespace pax { +MicroPartitionReader *MicroPartitionFileFactory::CreateMicroPartitionReader( + const std::string &type, File *file, + const MicroPartitionReader::ReaderOptions &options) { + if (type == MICRO_PARTITION_TYPE_PAX) { + MicroPartitionReader *reader = new OrcReader(file); + + reader->Open(options); + return reader; + } + + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +MicroPartitionWriter *MicroPartitionFileFactory::CreateMicroPartitionWriter( + const std::string &type, File *file, + const MicroPartitionWriter::WriterOptions &options) { + if (type == MICRO_PARTITION_TYPE_PAX) { + std::vector type_kinds; + std::vector encoding_types; + MicroPartitionWriter *writer = nullptr; + std::tie(type_kinds, encoding_types) = OrcWriter::BuildSchema(options); + writer = new OrcWriter(std::move(options), std::move(type_kinds), + std::move(encoding_types), file); + return writer; + } + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.h b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.h new file mode 100644 index 00000000000..8ad01e0688b --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +#include "storage/file_system.h" +#include "storage/micro_partition.h" + +namespace pax { + +#define MICRO_PARTITION_TYPE_PAX "PAX" + +class MicroPartitionFileFactory final { + public: + // type must be "pax" now! + static MicroPartitionWriter *CreateMicroPartitionWriter( + const std::string &type, File *file, + const MicroPartitionWriter::WriterOptions &options); + + // type must be "pax" now! + static MicroPartitionReader *CreateMicroPartitionReader( + const std::string &type, File *file, + const MicroPartitionReader::ReaderOptions &options); +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory_test.cc b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory_test.cc new file mode 100644 index 00000000000..bdb2cdb6774 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_file_factory_test.cc @@ -0,0 +1,232 @@ +#include + +#include "storage/micro_partition_file_factory.h" + +#include +#include +#include +#include + +#include "access/tupdesc_details.h" +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" +#include "comm/singleton.h" +#include "exceptions/CException.h" +#include "storage/local_file_system.h" + +namespace pax::tests { +// 3 clomun - string(len 100), string(len 100), int(len 4) +#define COLUMN_NUMS 3 +#define COLUMN_SIZE 100 +#define INT32_COLUMN_VALUE 0x123 +#define INT32_COLUMN_VALUE_DEFAULT 0x001 + +static void GenFakeBuffer(char *buffer, size_t length) { + for (size_t i = 0; i < length; i++) { + buffer[i] = static_cast(i); + } +} + +class MicroPartitionFileFactoryTest : public ::testing::Test { + public: + const char *pax_format = MICRO_PARTITION_TYPE_PAX; + void SetUp() override { + Singleton::GetInstance(); + remove(file_name_.c_str()); + + MemoryContext micro_partition_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "OrcTestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(micro_partition_test_memory_context); + CurrentResourceOwner = ResourceOwnerCreate(NULL, "OrcTestResourceOwner"); + } + + void TearDown() override { + Singleton::GetInstance()->Delete(file_name_); + ResourceOwner tmp_resource_owner = CurrentResourceOwner; + CurrentResourceOwner = NULL; + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_BEFORE_LOCKS, false, + true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_LOCKS, false, true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_AFTER_LOCKS, false, + true); + ResourceOwnerDelete(tmp_resource_owner); + } + + protected: + static CTupleSlot *CreateFakeCTupleSlot(bool with_value = true) { + TupleTableSlot *tuple_slot = nullptr; + + auto tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS)); + + tuple_desc->natts = COLUMN_NUMS; + tuple_desc->attrs[0] = { + .atttypid = TEXTOID, + .attlen = -1, + .attbyval = false, + .attisdropped = false, + }; + + tuple_desc->attrs[1] = { + .atttypid = TEXTOID, + .attlen = -1, + .attbyval = false, + .attisdropped = false, + }; + + tuple_desc->attrs[2] = { + .atttypid = INT4OID, + .attlen = 4, + .attbyval = true, + .attisdropped = false, + }; + + tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual); + + if (with_value) { + char column_buff[COLUMN_SIZE * 2]; + GenFakeBuffer(column_buff, COLUMN_SIZE); + GenFakeBuffer(column_buff + COLUMN_SIZE, COLUMN_SIZE); + + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + + fake_is_null[0] = false; + fake_is_null[1] = false; + fake_is_null[2] = false; + + tuple_slot->tts_values[0] = + cbdb::DatumFromCString(column_buff, COLUMN_SIZE); + tuple_slot->tts_values[1] = + cbdb::DatumFromCString(column_buff + COLUMN_SIZE, COLUMN_SIZE); + tuple_slot->tts_values[2] = cbdb::Int32ToDatum(INT32_COLUMN_VALUE); + tuple_slot->tts_isnull = fake_is_null; + } + + auto ctuple_slot = new CTupleSlot(tuple_slot); + + return ctuple_slot; + } + + static CTupleSlot *CreateEmptyCTupleSlot() { + auto tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS)); + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + auto tuple_slot = reinterpret_cast( + cbdb::Palloc0(sizeof(TupleTableSlot))); + auto tts_values = + reinterpret_cast(cbdb::Palloc0(sizeof(Datum) * COLUMN_NUMS)); + tuple_desc->natts = COLUMN_NUMS; + tuple_desc->attrs[0] = { + .attlen = -1, + .attbyval = false, + .attisdropped = false, + }; + + tuple_desc->attrs[1] = { + .attlen = -1, + .attbyval = false, + .attisdropped = false, + }; + + tuple_desc->attrs[2] = { + .attlen = 4, + .attbyval = true, + .attisdropped = false, + }; + tuple_slot->tts_tupleDescriptor = tuple_desc; + tuple_slot->tts_values = tts_values; + tuple_slot->tts_isnull = fake_is_null; + return new CTupleSlot(tuple_slot); + } + + static void DeleteCTupleSlot(CTupleSlot *ctuple_slot) { + auto tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + cbdb::Pfree(tuple_table_slot->tts_tupleDescriptor); + if (tuple_table_slot->tts_isnull) { + cbdb::Pfree(tuple_table_slot->tts_isnull); + } + + cbdb::Pfree(tuple_table_slot); + delete ctuple_slot; + } + + protected: + const std::string file_name_ = "./test.file"; +}; + +TEST_F(MicroPartitionFileFactoryTest, CreateMicroPartitionWriter) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + MicroPartitionWriter::WriterOptions writer_options; + writer_options.desc = tuple_slot->GetTupleDesc(); + + auto writer = MicroPartitionFileFactory::CreateMicroPartitionWriter( + pax_format, file_ptr, writer_options); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + DeleteCTupleSlot(tuple_slot); + delete writer; +} + +TEST_F(MicroPartitionFileFactoryTest, CreateMicroPartitionReader) { + char column_buff[COLUMN_SIZE]; + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + MicroPartitionWriter::WriterOptions writer_options; + + writer_options.desc = tuple_slot->GetTupleDesc(); + + auto writer = MicroPartitionFileFactory::CreateMicroPartitionWriter( + pax_format, file_ptr, writer_options); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + + auto reader = MicroPartitionFileFactory::CreateMicroPartitionReader( + pax_format, file_ptr, reader_options); + tuple_slot_empty->GetTupleDesc()->natts = COLUMN_NUMS; + reader->ReadTuple(tuple_slot_empty); + + auto vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[0]); + auto tunpacked = pg_detoast_datum_packed(vl); + EXPECT_EQ((Pointer)vl, (Pointer)tunpacked); + + int read_len = VARSIZE(tunpacked); + char *read_data = VARDATA_ANY(tunpacked); + + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, memcmp(read_data, column_buff, COLUMN_SIZE)); + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.cc b/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.cc new file mode 100644 index 00000000000..f329e7651ac --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.cc @@ -0,0 +1,101 @@ +#include "storage/micro_partition_iterator.h" + +#include "comm/cbdb_wrappers.h" +#include "exceptions/CException.h" +#include "catalog/pax_aux_table.h" + +namespace pax { +void MicroPartitionInfoIterator::Begin() { + Assert(pax_rel_); + Assert(snapshot_); + Assert(!desc_); + Assert(!tuple_); + + if (!aux_rel_) { + auto aux_oid = cbdb::GetPaxAuxRelid(RelationGetRelid(pax_rel_)); + aux_rel_ = cbdb::TableOpen(aux_oid, AccessShareLock); + } + + desc_ = cbdb::SystableBeginScan(aux_rel_, InvalidOid, false, snapshot_, 0, NULL); +} + +void MicroPartitionInfoIterator::End() { + if (desc_) { + auto desc = desc_; + auto aux_rel = aux_rel_; + desc_ = nullptr; + aux_rel_ = nullptr; + tuple_ = nullptr; + cbdb::SystableEndScan(desc); + cbdb::TableClose(aux_rel, NoLock); + } + Assert(!tuple_); +} + +bool MicroPartitionInfoIterator::HasNext() { + if (tuple_) return true; + tuple_ = cbdb::SystableGetNext(desc_); + return tuple_ != nullptr; +} + +MicroPartitionMetadata MicroPartitionInfoIterator::Next() { + auto tuple = tuple_; + Assert(tuple); + + tuple_ = nullptr; + return std::move(ToValue(tuple)); +} + +void MicroPartitionInfoIterator::Rewind() { + End(); + Begin(); +} + +std::unique_ptr> MicroPartitionInfoIterator::New(Relation pax_rel, Snapshot snapshot) { + MicroPartitionInfoIterator *it; + it = new MicroPartitionInfoIterator(pax_rel, snapshot); + it->Begin(); + return std::unique_ptr>(it); +} + +MicroPartitionInfoIterator::~MicroPartitionInfoIterator() { + End(); +} + +MicroPartitionMetadata MicroPartitionInfoIterator::ToValue(HeapTuple tuple) { + MicroPartitionMetadata v; + ::pax::stats::MicroPartitionStatisticsInfo stats_info; + bool is_null; + auto tup_desc = RelationGetDescr(aux_rel_); + + { + auto blockid = cbdb::HeapGetAttr(tuple, ANUM_PG_PAX_BLOCK_TABLES_PTBLOCKNAME, tup_desc, &is_null); + CBDB_CHECK(!is_null, cbdb::CException::kExTypeLogicError); + + auto name = DatumGetName(blockid)->data; + auto file_name = cbdb::BuildPaxFilePath(pax_rel_, name); + v.SetFileName(std::move(file_name)); + v.SetMicroPartitionId(std::move(name)); + } + + auto tup_count = cbdb::HeapGetAttr(tuple, ANUM_PG_PAX_BLOCK_TABLES_PTTUPCOUNT, tup_desc, &is_null); + CBDB_CHECK(!is_null, cbdb::CException::kExTypeLogicError); + v.SetTupleCount(cbdb::DatumToInt32(tup_count)); + + { + auto stats = reinterpret_cast(cbdb::DatumToPointer(cbdb::HeapGetAttr(tuple, ANUM_PG_PAX_BLOCK_TABLES_PTSTATISITICS, tup_desc, &is_null))); + CBDB_CHECK(!is_null, cbdb::CException::kExTypeLogicError); + auto flat_stats = cbdb::PgDeToastDatumPacked(stats); + auto ok = stats_info.ParseFromArray(VARDATA_ANY(flat_stats), VARSIZE_ANY_EXHDR(flat_stats)); + CBDB_CHECK(ok, cbdb::CException::kExTypeIOError); + v.SetStats(std::move(stats_info)); + + if (flat_stats != stats) + cbdb::Pfree(flat_stats); + } + + // deserialize protobuf message + return v; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.h b/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.h new file mode 100644 index 00000000000..38e2b4ee888 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_iterator.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +#include "comm/cbdb_api.h" +#include "comm/iterator.h" +#include "storage/micro_partition_metadata.h" + +namespace pax { +class MicroPartitionInfoIterator final: public IteratorBase { + public: + static std::unique_ptr> New(Relation pax_rel, Snapshot snapshot); + + bool HasNext() override; + MicroPartitionMetadata Next() override; + void Rewind() override; + + private: + MicroPartitionInfoIterator(Relation pax_rel, Snapshot snapshot) + : pax_rel_(pax_rel), snapshot_(snapshot) {} + ~MicroPartitionInfoIterator() override; + void Begin(); + void End(); + MicroPartitionMetadata ToValue(HeapTuple tuple); + + Relation pax_rel_ = nullptr; + Relation aux_rel_ = nullptr; + Snapshot snapshot_ = nullptr; + SysScanDesc desc_ = nullptr; + HeapTuple tuple_ = nullptr; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.cc b/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.cc new file mode 100644 index 00000000000..304e0c6a974 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.cc @@ -0,0 +1,25 @@ +#include "storage/micro_partition_metadata.h" + +namespace pax { + +WriteSummary::WriteSummary() + : file_size(0), num_tuples(0), rel_oid(InvalidOid) {} + +MicroPartitionMetadata::MicroPartitionMetadata( + MicroPartitionMetadata &&other) { + micro_partition_id_ = std::move(other.micro_partition_id_); + file_name_ = std::move(other.file_name_); + tuple_count_ = other.tuple_count_; + stats_ = std::move(other.stats_); +} + +MicroPartitionMetadata &MicroPartitionMetadata::operator=( + MicroPartitionMetadata &&other) { + micro_partition_id_ = std::move(other.micro_partition_id_); + file_name_ = std::move(other.file_name_); + tuple_count_ = other.tuple_count_; + stats_ = std::move(other.stats_); + return *this; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.h b/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.h new file mode 100644 index 00000000000..876abb57bce --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/micro_partition_metadata.h @@ -0,0 +1,61 @@ +#pragma once +#include "comm/cbdb_api.h" +#include "storage/proto/proto_wrappers.h" + +#include +#include + +namespace pax { +// WriteSummary is generated after the current micro partition is flushed and +// closed. +struct WriteSummary { + std::string file_name; + std::string block_id; + size_t file_size; + size_t num_tuples; + unsigned int rel_oid; + pax::stats::MicroPartitionStatisticsInfo mp_stats; + WriteSummary(); + WriteSummary(const WriteSummary &summary) = default; +}; + +struct MicroPartitionMetadata { + public: + MicroPartitionMetadata() = default; + MicroPartitionMetadata(const MicroPartitionMetadata &other) = default; + + ~MicroPartitionMetadata() = default; + + MicroPartitionMetadata(MicroPartitionMetadata &&other); + + MicroPartitionMetadata &operator=(const MicroPartitionMetadata &other) = default; + + MicroPartitionMetadata &operator=(MicroPartitionMetadata &&other); + + inline const std::string &GetMicroPartitionId() const { return micro_partition_id_; } + + inline const std::string &GetFileName() const { return file_name_; } + + inline uint32 GetTupleCount() const { return tuple_count_; } + + inline const ::pax::stats::MicroPartitionStatisticsInfo &GetStats() const { return stats_; } + + inline void SetMicroPartitionId(std::string &&id) { micro_partition_id_ = std::move(id); } + + inline void SetFileName(std::string &&name) { file_name_ = std::move(name); } + + inline void SetTupleCount(uint32 tuple_count) { tuple_count_ = tuple_count; } + + inline void SetStats(::pax::stats::MicroPartitionStatisticsInfo &&stats) { stats_ = std::move(stats); } + + private: + std::string micro_partition_id_; + + std::string file_name_; + + // statistics info + uint32 tuple_count_ = 0; + + ::pax::stats::MicroPartitionStatisticsInfo stats_; +}; // class MicroPartitionMetadata +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc.cc b/contrib/pax_storage/src/cpp/storage/orc/orc.cc new file mode 100644 index 00000000000..928ae710177 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/orc/orc.cc @@ -0,0 +1,1109 @@ +#include "storage/orc/orc.h" + +#include "comm/cbdb_api.h" + +#include +#include +#include + +#include "catalog/micro_partition_stats.h" +#include "comm/cbdb_wrappers.h" +#include "exceptions/CException.h" +#include "storage/columns/pax_column_int.h" +#include "storage/columns/pax_encoding_non_fixed_column.h" +#include "storage/pax_filter.h" +namespace pax { + +std::pair, std::vector> +OrcWriter::BuildSchema(const MicroPartitionWriter::WriterOptions &options) { + std::vector type_kinds; + std::vector encoding_types; + TupleDesc desc; + + desc = options.desc; + Assert(desc); + + for (int i = 0; i < desc->natts; i++) { + auto *attr = &desc->attrs[i]; + if (attr->attbyval) { + switch (attr->attlen) { + case 1: + type_kinds.emplace_back(orc::proto::Type_Kind::Type_Kind_BYTE); + encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED); + break; + case 2: + type_kinds.emplace_back(orc::proto::Type_Kind::Type_Kind_SHORT); + encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED); + break; + case 4: + type_kinds.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED); + break; + case 8: + type_kinds.emplace_back(orc::proto::Type_Kind::Type_Kind_LONG); + // TODO: parse options + encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2); + break; + default: + Assert(!"should not be here! pg_type which attbyval=true only have typlen of " + "1, 2, 4, or 8"); + } + } else { + Assert(attr->attlen > 0 || attr->attlen == -1); + type_kinds.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED); + } + } + + Assert(type_kinds.size() == encoding_types.size()); + + return std::make_pair(type_kinds, encoding_types); +} + +OrcWriter::OrcWriter( + const MicroPartitionWriter::WriterOptions &orc_writer_options, + const std::vector &column_types, + const std::vector &column_encoding_types, File *file) + : MicroPartitionWriter(orc_writer_options), + column_types_(column_types), + column_encoding_types_(column_encoding_types), + file_(file), + current_offset_(0) { + pax_columns_ = new PaxColumns(column_types, column_encoding_types); + + summary_.rel_oid = orc_writer_options.rel_oid; + summary_.block_id = orc_writer_options.block_id; + summary_.file_name = orc_writer_options.file_name; + + file_footer_.set_headerlength(0); + file_footer_.set_contentlength(0); + file_footer_.set_numberofrows(0); + file_footer_.set_rowindexstride(0); + file_footer_.set_writer(ORC_WRITER_ID); + file_footer_.set_softwareversion(ORC_SOFT_VERSION); + BuildFooterType(); + + post_script_.set_footerlength(0); + post_script_.set_compression(orc::proto::CompressionKind::NONE); + post_script_.set_compressionblocksize(0); + + post_script_.add_version(ORC_FILE_MAJOR_VERSION); + post_script_.set_writerversion(ORC_WRITER_VERSION); + post_script_.set_magic(ORC_MAGIC_ID); + + InitStripe(); +} + +OrcWriter::~OrcWriter() { + delete pax_columns_; + delete file_; +} + +MicroPartitionWriter *OrcWriter::SetStatsCollector( + MicroPartitionStats *mpstats) { + if (mpstats) + mpstats->SetStatsMessage(&summary_.mp_stats, column_types_.size()); + return MicroPartitionWriter::SetStatsCollector(mpstats); +} + +void OrcWriter::Flush() { + BufferedOutputStream buffer_mem_stream(nullptr, 2048); + if (WriteStripe(&buffer_mem_stream)) { + Assert(current_offset_ >= buffer_mem_stream.GetDataBuffer()->Used()); + file_->PWriteN(buffer_mem_stream.GetDataBuffer()->GetBuffer(), + buffer_mem_stream.GetDataBuffer()->Used(), + current_offset_ - buffer_mem_stream.GetDataBuffer()->Used()); + file_->Flush(); + pax_columns_->Clear(); + } +} + +void OrcWriter::WriteTuple(CTupleSlot *slot) { + int n; + TupleTableSlot *table_slot; + TupleDesc table_desc; + int16 type_len; + bool type_by_val; + + summary_.num_tuples++; + + table_slot = slot->GetTupleTableSlot(); + table_desc = slot->GetTupleDesc(); + n = table_desc->natts; + + CBDB_CHECK(pax_columns_->GetColumns() == static_cast(n), + cbdb::CException::ExType::kExTypeSchemaNotMatch); + + pax_columns_->AddRows(1); + + for (int i = 0; i < n; i++) { + type_len = table_desc->attrs[i].attlen; + type_by_val = table_desc->attrs[i].attbyval; + + AssertImply(table_desc->attrs[i].attisdropped, table_slot->tts_isnull[i]); + + if (table_slot->tts_isnull[i]) { + (*pax_columns_)[i]->AppendNull(); + continue; + } + + if (type_by_val) { + switch (type_len) { + case 1: { + auto value = cbdb::DatumToInt8(table_slot->tts_values[i]); + (*pax_columns_)[i]->Append(reinterpret_cast(&value), + type_len); + break; + } + case 2: { + auto value = cbdb::DatumToInt16(table_slot->tts_values[i]); + (*pax_columns_)[i]->Append(reinterpret_cast(&value), + type_len); + break; + } + case 4: { + auto value = cbdb::DatumToInt32(table_slot->tts_values[i]); + (*pax_columns_)[i]->Append(reinterpret_cast(&value), + type_len); + break; + } + case 8: { + auto value = cbdb::DatumToInt64(table_slot->tts_values[i]); + (*pax_columns_)[i]->Append(reinterpret_cast(&value), + type_len); + break; + } + default: + Assert(!"should not be here! pg_type which attbyval=true only have typlen of " + "1, 2, 4, or 8 "); + } + } else { + switch (type_len) { + case -1: { + void *vl = nullptr; + int len = -1; + vl = cbdb::PointerAndLenFromDatum(table_slot->tts_values[i], &len); + Assert(vl != nullptr && len != -1); + (*pax_columns_)[i]->Append(reinterpret_cast(vl), len); + break; + } + default: + Assert(type_len > 0); + (*pax_columns_)[i]->Append(static_cast(cbdb::DatumToPointer( + table_slot->tts_values[i])), + type_len); + break; + } + } + } +} + +void OrcWriter::WriteTupleN(CTupleSlot **slot, size_t n) { + // TODO(jiaqizho): support WriteTupleN +} + +bool OrcWriter::WriteStripe(BufferedOutputStream *buffer_mem_stream) { + std::vector streams; + std::vector encoding_kinds; + orc::proto::StripeFooter stripe_footer; + auto stripe_stats = meta_data_.add_stripestats(); + + size_t data_len = 0; + size_t number_of_row = 0; + + number_of_row = pax_columns_->GetRows(); + stripe_rows_ = number_of_row; + + // No need add stripe if nothing in memeory + if (number_of_row == 0) { + return false; + } + + PaxColumns::ColumnStreamsFunc column_streams_func = + [&streams](const orc::proto::Stream_Kind &kind, size_t column, + size_t length) { + orc::proto::Stream stream; + stream.set_kind(kind); + stream.set_column(static_cast(column)); + stream.set_length(length); + streams.push_back(std::move(stream)); + }; + + PaxColumns::ColumnEncodingFunc column_encoding_func = + [&encoding_kinds](const ColumnEncoding_Kind &encoding_kind, + size_t origin_len) { + ColumnEncoding column_encoding; + column_encoding.set_kind(encoding_kind); + column_encoding.set_length(origin_len); + + encoding_kinds.push_back(std::move(column_encoding)); + }; + + DataBuffer *data_buffer = + pax_columns_->GetDataBuffer(column_streams_func, column_encoding_func); + + for (const auto &stream : streams) { + *stripe_footer.add_streams() = stream; + data_len += stream.length(); + } + + for (size_t i = 0; i < pax_columns_->GetColumns(); i++) { + auto pb_stats = stripe_stats->add_colstats(); + PaxColumn *pax_column = (*pax_columns_)[i]; + + *stripe_footer.add_pax_col_encodings() = encoding_kinds[i]; + + pb_stats->set_hasnull(pax_column->HasNull()); + pb_stats->set_numberofvalues(pax_column->GetRows()); + } + + stripe_footer.set_writertimezone("GMT"); + buffer_mem_stream->Set(data_buffer, 2048); + + // check memory io with protobuf + CBDB_CHECK(stripe_footer.SerializeToZeroCopyStream(buffer_mem_stream), + cbdb::CException::ExType::kExTypeIOError); + + stripe_info_.set_indexlength(0); + stripe_info_.set_datalength(data_len); + stripe_info_.set_footerlength(buffer_mem_stream->GetSize()); + stripe_info_.set_numberofrows(stripe_rows_); + + *file_footer_.add_stripes() = stripe_info_; + + current_offset_ += buffer_mem_stream->GetSize(); + total_rows_ += stripe_rows_; + + // reset the stripe + InitStripe(); + return true; +} + +void OrcWriter::Close() { + BufferedOutputStream buffer_mem_stream(nullptr, 2048); + size_t file_offset = current_offset_; + bool not_empty_stripe = false; + DataBuffer *data_buffer; + + not_empty_stripe = WriteStripe(&buffer_mem_stream); + if (!not_empty_stripe) { + data_buffer = new DataBuffer(2048); + buffer_mem_stream.Set(data_buffer, 2048); + } + + WriteMetadata(&buffer_mem_stream); + WriteFileFooter(&buffer_mem_stream); + WritePostscript(&buffer_mem_stream); + if (summary_callback_) { + summary_.file_size = buffer_mem_stream.GetDataBuffer()->Used(); + summary_callback_(summary_); + } + + file_->PWriteN(buffer_mem_stream.GetDataBuffer()->GetBuffer(), + buffer_mem_stream.GetDataBuffer()->Used(), file_offset); + file_->Flush(); + file_->Close(); + if (!not_empty_stripe) { + delete data_buffer; + } +} + +size_t OrcWriter::PhysicalSize() const { return pax_columns_->PhysicalSize(); } + +void OrcWriter::InitStripe() { + stripe_info_.set_offset(current_offset_); + stripe_info_.set_indexlength(0); + stripe_info_.set_datalength(0); + stripe_info_.set_footerlength(0); + stripe_info_.set_numberofrows(0); + + stripe_rows_ = 0; +} + +void OrcWriter::BuildFooterType() { + auto proto_type = file_footer_.add_types(); + proto_type->set_maximumlength(0); + proto_type->set_precision(0); + proto_type->set_scale(0); + proto_type->set_kind(::orc::proto::Type_Kind_STRUCT); + + // TODO(jiaqizho): support interface for meta kv, if we do need this function + // protoAttr->set_key(key); + // protoAttr->set_value(value); + for (size_t i = 0; i < column_types_.size(); ++i) { + auto orc_type = column_types_[i]; + + auto sub_proto_type = file_footer_.add_types(); + sub_proto_type->set_maximumlength(0); + sub_proto_type->set_precision(0); + sub_proto_type->set_scale(0); + sub_proto_type->set_kind(orc_type); + + file_footer_.mutable_types(0)->add_subtypes(i); + } +} + +void OrcWriter::WriteMetadata(BufferedOutputStream *buffer_mem_stream) { + buffer_mem_stream->StartBufferOutRecord(); + CBDB_CHECK(meta_data_.SerializeToZeroCopyStream(buffer_mem_stream), + cbdb::CException::ExType::kExTypeIOError); + + post_script_.set_metadatalength(buffer_mem_stream->EndBufferOutRecord()); +} + +void OrcWriter::WriteFileFooter(BufferedOutputStream *buffer_mem_stream) { + file_footer_.set_contentlength(current_offset_ - file_footer_.headerlength()); + file_footer_.set_numberofrows(total_rows_); + + for (size_t i = 0; i < pax_columns_->GetColumns(); i++) { + auto pb_stats = file_footer_.add_statistics(); + // FIXME(jiaqizho): the statistics in file footer is not accurate + // but statistics in stripe stats is accurate + pb_stats->set_hasnull(false); + pb_stats->set_numberofvalues((*pax_columns_)[i]->GetRows()); + } + + buffer_mem_stream->StartBufferOutRecord(); + CBDB_CHECK(file_footer_.SerializeToZeroCopyStream(buffer_mem_stream), + cbdb::CException::ExType::kExTypeIOError); + + post_script_.set_footerlength(buffer_mem_stream->EndBufferOutRecord()); +} + +void OrcWriter::WritePostscript(BufferedOutputStream *buffer_mem_stream) { + buffer_mem_stream->StartBufferOutRecord(); + CBDB_CHECK(post_script_.SerializeToZeroCopyStream(buffer_mem_stream), + cbdb::CException::ExType::kExTypeIOError); + + char ps_len = static_cast(buffer_mem_stream->EndBufferOutRecord()); + buffer_mem_stream->DirectWrite(&ps_len, sizeof(unsigned char)); +} + +OrcReader::OrcReader(File *file) + : file_(file), + reused_buffer_(nullptr), + working_pax_columns_(nullptr), + num_of_stripes_(0), + proj_map_(nullptr), + proj_len_(0), + is_close_(true) {} + +OrcReader::~OrcReader() { delete file_; } + +PaxColumns *OrcReader::GetAllColumns() { + Assert(GetNumberOfStripes() == 1); + + if (!working_pax_columns_) { + working_pax_columns_ = + ReadStripe(current_stripe_index_++, proj_map_, proj_len_); + current_row_index_ = 0; + for (size_t i = 0; i < column_types_.size(); i++) { + current_nulls_[i] = 0; + auto column = (*working_pax_columns_)[i]; + if (column) { + Assert(column->GetBuffer().first); + } + } + } + + return working_pax_columns_; +} + +void OrcReader::ReadMetadata(ssize_t file_length, uint64 post_script_len) { + uint64 meta_len = post_script_.metadatalength(); + uint64 footer_len = post_script_.footerlength(); + off_t meta_start = file_length - meta_len - footer_len - post_script_len - + ORC_POST_SCRIPT_SIZE; + char read_buffer[meta_len]; + SeekableInputStream input_stream(read_buffer, meta_len); + + Assert(meta_start >= 0); + file_->PReadN(read_buffer, meta_len, meta_start); + + CBDB_CHECK(meta_data_.ParseFromZeroCopyStream(&input_stream), + cbdb::CException::ExType::kExTypeIOError); +} + +void OrcReader::BuildProtoTypes() { + int max_id = 0; + + max_id = file_footer_.types_size(); + + CBDB_CHECK(max_id > 0, cbdb::CException::ExType::kExTypeInvalidORCFormat); + + const orc::proto::Type &type = file_footer_.types(0); + + // There is an assumption here: for all pg tables, the outermost structure + // should be Type_Kind_STRUCT + CBDB_CHECK(type.kind() == orc::proto::Type_Kind_STRUCT, + cbdb::CException::ExType::kExTypeInvalidORCFormat); + CBDB_CHECK(type.subtypes_size() == max_id - 1, + cbdb::CException::ExType::kExTypeInvalidORCFormat); + + for (int j = 0; j < type.subtypes_size(); ++j) { + int sub_type_id = static_cast(type.subtypes(j)) + 1; + const orc::proto::Type &sub_type = file_footer_.types(sub_type_id); + // should allow struct contain struct + // but not support yet + CBDB_CHECK(sub_type.kind() != orc::proto::Type_Kind_STRUCT, + cbdb::CException::ExType::kExTypeInvalidORCFormat); + + column_types_.emplace_back(sub_type.kind()); + } +} + +void OrcReader::ReadFooter(size_t footer_offset, size_t footer_len) { + char buffer[footer_len]; + + file_->PReadN(&buffer, footer_len, footer_offset); + + SeekableInputStream input_stream(buffer, footer_len); + CBDB_CHECK(file_footer_.ParseFromZeroCopyStream(&input_stream), + cbdb::CException::ExType::kExTypeIOError); + + BuildProtoTypes(); + current_nulls_ = new uint32[column_types_.size()]; + memset(current_nulls_, 0, column_types_.size() * sizeof(uint32)); +} + +void OrcReader::ReadPostScript(size_t file_size, uint64 post_script_len) { + char post_script_buffer[post_script_len]; + off_t offset; + + offset = (off_t)(file_size - ORC_POST_SCRIPT_SIZE - post_script_len); + Assert(offset >= 0); + + file_->PReadN(post_script_buffer, post_script_len, offset); + + post_script_.ParseFromArray(&post_script_buffer, + static_cast(post_script_len)); + // TODO(jiaqizho): verify orc format here +} + +static bool ProjShouldReadAll(const bool *const proj_map, size_t proj_len) { + if (!proj_map) { + return true; + } + + for (size_t i = 0; i < proj_len; i++) { + if (!proj_map[i]) { + return false; + } + } + + return true; +} + +orc::proto::StripeFooter OrcReader::ReadStripeWithProjection( + DataBuffer *data_buffer, OrcReader::StripeInformation *stripe_info, + const bool *const proj_map, size_t proj_len) { + size_t stripe_footer_offset = 0; + orc::proto::StripeFooter stripe_footer; + size_t streams_index = 0; + uint64_t batch_len = 0; + uint64_t batch_offset = 0; + size_t index = 0; + + stripe_footer_offset = stripe_info->data_length + stripe_info->index_length; + + /* Check all column projection is true. + * If no need do column projection, read all + * buffer(data + stripe footer) from stripe and decode stripe footer. + */ + if (ProjShouldReadAll(proj_map, proj_len)) { + file_->PReadN(data_buffer->GetBuffer(), stripe_info->footer_length, + stripe_info->offset); + SeekableInputStream input_stream( + data_buffer->GetBuffer() + stripe_footer_offset, + stripe_info->footer_length - stripe_footer_offset); + if (!stripe_footer.ParseFromZeroCopyStream(&input_stream)) { + // fail to do memory io with protobuf + CBDB_RAISE(cbdb::CException::ExType::kExTypeIOError); + } + + return stripe_footer; + } + + Assert(stripe_info->index_length == 0); + + /* If need do column projection here + * Then read stripe footer and decode it before read data part + */ + file_->PReadN(data_buffer->GetBuffer(), + stripe_info->footer_length - stripe_footer_offset, + stripe_info->offset + stripe_footer_offset); + + SeekableInputStream input_stream( + data_buffer->GetBuffer(), + stripe_info->footer_length - stripe_footer_offset); + + if (!stripe_footer.ParseFromZeroCopyStream(&input_stream)) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeIOError); + } + + data_buffer->BrushBackAll(); + + batch_offset = stripe_info->offset; + + while (index < column_types_.size()) { + // Current column have been skipped + // Move `batch_offset` and `streams_index` to the right position + if (!proj_map[index]) { + index++; + + const orc::proto::Stream *n_stream = nullptr; + do { + n_stream = &stripe_footer.streams(streams_index++); + batch_offset += n_stream->length(); + } while (n_stream->kind() != ::orc::proto::Stream_Kind::Stream_Kind_DATA); + + continue; + } + + batch_len = 0; + + /* Current column should be read + * In this case, did a greedy algorithm to combine io: while + * the current column is being read, it is necessary + * to ensure that subsequent columns will be read in the same io. + * + * So in `do...while`, only the `batch_size` which io needs to read + * is calculated, until meet a column which needs to be skipped. + */ + do { + bool has_null = stripe_info->stripe_statistics.colstats(index).hasnull(); + if (has_null) { + const orc::proto::Stream &non_null_stream = + stripe_footer.streams(streams_index++); + batch_len += non_null_stream.length(); + } + + const orc::proto::Stream *len_or_data_stream = + &stripe_footer.streams(streams_index++); + batch_len += len_or_data_stream->length(); + + if (len_or_data_stream->kind() == + ::orc::proto::Stream_Kind::Stream_Kind_LENGTH) { + len_or_data_stream = &stripe_footer.streams(streams_index++); + batch_len += len_or_data_stream->length(); + } + } while ((++index) < column_types_.size() && proj_map[index]); + + file_->PReadN(data_buffer->GetAvailableBuffer(), batch_len, batch_offset); + data_buffer->Brush(batch_len); + batch_offset += batch_len; + } + + return stripe_footer; +} + +template +static PaxColumn *GetIntEncodingColumn(DataBuffer *data_buffer, + const orc::proto::Stream &data_stream, + const ColumnEncoding &data_encoding) { + uint32 column_data_size = 0; + uint64 column_data_len = 0; + + DataBuffer *column_data_buffer = nullptr; + PaxIntColumn *pax_column = nullptr; + + column_data_size = static_cast(data_stream.column()); + column_data_len = static_cast(data_stream.length()); + + column_data_buffer = new DataBuffer( + reinterpret_cast(data_buffer->GetAvailableBuffer()), column_data_len, + false, false); + column_data_buffer->BrushAll(); + + data_buffer->Brush(column_data_len); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = data_encoding.kind(); + decoding_option.is_sign = true; + + if (data_encoding.kind() == + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED) { + Assert(column_data_size == column_data_buffer->GetSize()); + pax_column = new PaxIntColumn(std::move(decoding_option)); + } else { + Assert(data_encoding.length() / sizeof(T) == column_data_size); + pax_column = + new PaxIntColumn(column_data_size, std::move(decoding_option)); + } + + pax_column->Set(column_data_buffer); + return pax_column; +} + +PaxColumns *OrcReader::ReadStripe(size_t index, bool *proj_map, + size_t proj_len) { + auto stripe_info = GetStripeInfo(index); + auto pax_columns = new PaxColumns(); + DataBuffer *data_buffer = nullptr; + orc::proto::StripeFooter stripe_footer; + size_t streams_index = 0; + size_t streams_size = 0; + size_t encoding_kinds_size = 0; + + Assert(stripe_info->index_length == 0); + pax_columns->AddRows(stripe_info->numbers_of_row); + + DEFER({ delete stripe_info; }); + + if (unlikely(stripe_info->footer_length == 0)) { + return pax_columns; + } + + if (reused_buffer_) { + while (reused_buffer_->Capacity() < stripe_info->footer_length) { + reused_buffer_->ReSize(reused_buffer_->Capacity() / 2 * 3); + } + data_buffer = new DataBuffer( + reused_buffer_->GetBuffer(), reused_buffer_->Capacity(), false, false); + + } else { + data_buffer = new DataBuffer(stripe_info->footer_length); + } + pax_columns->Set(data_buffer); + + /* `ReadStripeWithProjection` will read the column memory which filter by + * `proj_map`, and initialize `stripe_footer` + * + * Notice that: should catch `kExTypeIOError` then delete pax columns + * But for now we will destroy memory context if exception happen. + * And we don't have a decision that should we use `try...catch` at yet, + * so it's ok that we just no catch here. + */ + stripe_footer = + ReadStripeWithProjection(data_buffer, stripe_info, proj_map, proj_len); + + streams_size = stripe_footer.streams_size(); + encoding_kinds_size = stripe_footer.pax_col_encodings_size(); + + if (unlikely(streams_size == 0 && column_types_.empty())) { + return pax_columns; + } + + data_buffer->BrushBackAll(); + + AssertImply(proj_len != 0, column_types_.size() <= proj_len); + Assert(encoding_kinds_size <= column_types_.size()); + + for (size_t index = 0; index < column_types_.size(); index++) { + /* Skip read current column, just move `streams_index` after + * `Stream_Kind_DATA` but still need append nullptr into `PaxColumns` to + * make sure sizeof pax_columns eq with column number + */ + if (proj_map && !proj_map[index]) { + const orc::proto::Stream *n_stream = nullptr; + do { + n_stream = &stripe_footer.streams(streams_index++); + } while (n_stream->kind() != ::orc::proto::Stream_Kind::Stream_Kind_DATA); + + pax_columns->Append(nullptr); + continue; + } + + DataBuffer *non_null_bitmap = nullptr; + bool has_null = stripe_info->stripe_statistics.colstats(index).hasnull(); + if (has_null) { + uint64 non_null_length = 0; + const orc::proto::Stream &non_null_stream = + stripe_footer.streams(streams_index++); + non_null_length = static_cast(non_null_stream.length()); + + non_null_bitmap = new DataBuffer( + reinterpret_cast(data_buffer->GetAvailableBuffer()), + non_null_length, false, false); + non_null_bitmap->BrushAll(); + data_buffer->Brush(non_null_length); + } + + switch (column_types_[index]) { + case (orc::proto::Type_Kind::Type_Kind_STRING): { + uint32 column_lens_size = 0; + uint64 column_lens_len = 0; + uint64 column_data_len = 0; + DataBuffer *column_len_buffer = nullptr; + DataBuffer *column_data_buffer = nullptr; + PaxNonFixedEncodingColumn *pax_column = nullptr; + + const orc::proto::Stream &len_stream = + stripe_footer.streams(streams_index++); + const orc::proto::Stream &data_stream = + stripe_footer.streams(streams_index++); + const ColumnEncoding &data_encoding = + stripe_footer.pax_col_encodings(index); + + column_lens_size = static_cast(len_stream.column()); + column_lens_len = static_cast(len_stream.length()); + + column_len_buffer = new DataBuffer( + reinterpret_cast(data_buffer->GetAvailableBuffer()), + column_lens_len, false, false); + column_len_buffer->BrushAll(); + data_buffer->Brush(column_lens_len); + + column_data_len = data_stream.length(); + +#ifdef ENBALE_DEBUG + if (data_encoding.kind() == + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED) { + size_t segs_size = 0; + for (size_t i = 0; i < column_len_buffer->GetSize(); i++) { + segs_size += (*column_len_buffer)[i]; + } + Assert(column_data_len == segs_size); + } +#endif + + column_data_buffer = new DataBuffer( + data_buffer->GetAvailableBuffer(), column_data_len, false, false); + column_data_buffer->BrushAll(); + data_buffer->Brush(column_data_len); + + Assert(static_cast(data_stream.column()) == column_lens_size); + + PaxDecoder::DecodingOption decoding_option; + decoding_option.column_encode_type = data_encoding.kind(); + decoding_option.is_sign = true; + + if (data_encoding.kind() == + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED) { + Assert(column_data_len == column_data_buffer->GetSize()); + pax_column = + new PaxNonFixedEncodingColumn(0, std::move(decoding_option)); + } else { + pax_column = new PaxNonFixedEncodingColumn( + data_encoding.length(), std::move(decoding_option)); + } + + // current memory will be freed in pax_columns->data_ + pax_column->Set(column_data_buffer, column_len_buffer, column_data_len); + pax_column->SetMemTakeOver(false); + pax_columns->Append(pax_column); + break; + } + case (orc::proto::Type_Kind::Type_Kind_BOOLEAN): + case (orc::proto::Type_Kind::Type_Kind_BYTE): { + const orc::proto::Stream &data_stream = + stripe_footer.streams(streams_index++); + uint32 column_data_size = 0; + uint64 column_data_len = 0; + DataBuffer *column_data_buffer = nullptr; + PaxCommColumn *pax_column = nullptr; + + column_data_size = static_cast(data_stream.column()); + column_data_len = static_cast(data_stream.length()); + column_data_buffer = new DataBuffer( + reinterpret_cast(data_buffer->GetAvailableBuffer()), + column_data_len, false, false); + + column_data_buffer->BrushAll(); + data_buffer->Brush(column_data_len); + + Assert(column_data_size == column_data_buffer->GetSize()); + pax_column = new PaxCommColumn(); + pax_column->Set(column_data_buffer); + pax_columns->Append(pax_column); + break; + } + case (orc::proto::Type_Kind::Type_Kind_SHORT): { + const orc::proto::Stream &data_stream = + stripe_footer.streams(streams_index++); + const ColumnEncoding &data_encoding = + stripe_footer.pax_col_encodings(index); + pax_columns->Append(GetIntEncodingColumn( + data_buffer, data_stream, data_encoding)); + break; + } + case (orc::proto::Type_Kind::Type_Kind_INT): { + const orc::proto::Stream &data_stream = + stripe_footer.streams(streams_index++); + const ColumnEncoding &data_encoding = + stripe_footer.pax_col_encodings(index); + pax_columns->Append(GetIntEncodingColumn( + data_buffer, data_stream, data_encoding)); + break; + } + case (orc::proto::Type_Kind::Type_Kind_LONG): { + const orc::proto::Stream &data_stream = + stripe_footer.streams(streams_index++); + const ColumnEncoding &data_encoding = + stripe_footer.pax_col_encodings(index); + pax_columns->Append(GetIntEncodingColumn( + data_buffer, data_stream, data_encoding)); + break; + } + default: + // should't be here + Assert(!"should't be here, non-implemented type"); + break; + } + + // fill nulls data buffer + if (has_null) { + Assert(pax_columns->GetColumns() > 0 && non_null_bitmap); + auto last_column = (*pax_columns)[pax_columns->GetColumns() - 1]; + last_column->SetNulls(non_null_bitmap); + } + } + + Assert(streams_size == streams_index); + return pax_columns; +} + +OrcReader::StripeInformation *OrcReader::GetStripeInfo(size_t index) const { + auto *stripe_info_in_mem = new StripeInformation(); + orc::proto::StripeInformation stripe_info; + + CBDB_CHECK(index < num_of_stripes_, + cbdb::CException::ExType::kExTypeLogicError); + + stripe_info = file_footer_.stripes(static_cast(index)); + stripe_info_in_mem->footer_length = stripe_info.footerlength(); + stripe_info_in_mem->data_length = stripe_info.datalength(); + stripe_info_in_mem->numbers_of_row = stripe_info.numberofrows(); + stripe_info_in_mem->offset = stripe_info.offset(); + + stripe_info_in_mem->index_length = stripe_info.indexlength(); + stripe_info_in_mem->stripe_footer_start = stripe_info.offset() + + stripe_info.indexlength() + + stripe_info.datalength(); + + stripe_info_in_mem->stripe_statistics = + meta_data_.stripestats(static_cast(index)); + + return stripe_info_in_mem; +} + +size_t OrcReader::GetNumberOfStripes() const { return num_of_stripes_; } + +void OrcReader::Open(const ReaderOptions &options) { + size_t file_length = 0; + uint64 post_script_len = 0; + + size_t footer_offset = 0; + size_t footer_len = 0; + size_t tail_len = 0; + + Assert(file_); + Assert(is_close_); + + // Must not open twice. + Assert(!reused_buffer_); + if (options.reused_buffer) { + CBDB_CHECK(options.reused_buffer->IsMemTakeOver(), + cbdb::CException::ExType::kExTypeLogicError); + options.reused_buffer->BrushBackAll(); + + reused_buffer_ = options.reused_buffer; + } + + Assert(!proj_map_ && !proj_len_); + if (options.filter) + std::tie(proj_map_, proj_len_) = options.filter->GetColumnProjection(); + + // Begin read footer + + // TODO(jiaqizho): + // There is an optimization here, in standard ORC, A single ORC + // file will read + // follow these step: + // - read postscript size + // - read post script + // - read file footer + // - read meta if exist + // the footer information of a single ORC file needing cost 3-4 iops + // consider add a new filed after postscript size, contain the full size of + // footer information + file_length = file_->FileLength(); + file_->PRead(&post_script_len, ORC_POST_SCRIPT_SIZE, + (off_t)(file_length - ORC_POST_SCRIPT_SIZE)); + + ReadPostScript(file_length, post_script_len); + + footer_len = post_script_.footerlength(); + tail_len = ORC_POST_SCRIPT_SIZE + post_script_len + footer_len; + footer_offset = file_length - tail_len; + + ReadFooter(footer_offset, footer_len); + num_of_stripes_ = file_footer_.stripes_size(); + + if (post_script_.metadatalength() != 0) + ReadMetadata(file_length, post_script_len); + is_close_ = false; +} + +void OrcReader::ResetCurrentReading() { + if (working_pax_columns_) { + delete working_pax_columns_; + working_pax_columns_ = nullptr; + } + current_stripe_index_ = 0; + current_row_index_ = 0; + current_offset_ = 0; + memset(current_nulls_, 0, column_types_.size() * sizeof(uint32)); +} + +void OrcReader::Close() { + if (is_close_) { + return; + } + + ResetCurrentReading(); + delete[] current_nulls_; + current_nulls_ = nullptr; + file_->Close(); + is_close_ = true; +} + +bool OrcReader::ReadTuple(CTupleSlot *cslot) { + size_t row_nums = 0; + TupleTableSlot *slot; + size_t column_numbers = 0; + size_t index = 0; + size_t nattrs = 0; + AttrMissing *attrmiss = nullptr; + + slot = cslot->GetTupleTableSlot(); + + while (true) { + nattrs = static_cast(slot->tts_tupleDescriptor->natts); + if (!working_pax_columns_) { + // no data remain + if (current_stripe_index_ >= GetNumberOfStripes()) { + return false; + } + + working_pax_columns_ = + ReadStripe(current_stripe_index_++, proj_map_, proj_len_); + current_row_index_ = 0; + for (size_t i = 0; i < column_types_.size(); i++) { + current_nulls_[i] = 0; + } + } + + column_numbers = working_pax_columns_->GetColumns(); + + // The column number in Pax file meta could be smaller than the column + // number in TupleSlot in case after alter table add column DDL operation + // was done. + if (column_numbers > nattrs) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeSchemaNotMatch); + } + + row_nums = working_pax_columns_->GetRows(); + + // skip the empty stripe or current stripe already consumed + if (unlikely(row_nums == 0) || current_row_index_ == row_nums) { + delete working_pax_columns_; + working_pax_columns_ = nullptr; + } else { + break; + } + } + + char *buffer = nullptr; + size_t buffer_len = 0; + + // first check if column has missing value + if (slot->tts_tupleDescriptor->constr) + attrmiss = slot->tts_tupleDescriptor->constr->missing; + + for (index = 0; index < nattrs; index++) { + if (proj_map_ && !proj_map_[index]) { + continue; + } + + // handle PAX columns number inconsistent with pg catalog nattrs in case + // data not been inserted yet or read pax file conserved before last add + // column DDL is done, for these cases it is normal that pg catalog schema + // is not match with that in PAX file: + // 1. if atthasmissing is set, then return default column value. + // 2. if atthasmissing is not set, then return null value. + if (index >= column_numbers) { + slot->tts_isnull[index] = true; + // The attrmiss default value memory is managed in CacheMemoryContext, + // which was allocated in RelationBuildTupleDesc. + if (attrmiss && (slot->tts_tupleDescriptor->attrs[index].atthasmissing && + attrmiss[index].am_present)) { + slot->tts_values[index] = attrmiss[index].am_value; + slot->tts_isnull[index] = false; + } + continue; + } + + // In case column is droped, then set its value as null without reading data + // tuples. + if (unlikely(slot->tts_tupleDescriptor->attrs[index].attisdropped)) { + slot->tts_isnull[index] = true; + continue; + } + + PaxColumn *column = ((*working_pax_columns_)[index]); + + // set default is not null + slot->tts_isnull[index] = false; + if (column->HasNull()) { + auto null_bitmap = column->GetNulls(); + if (!(*null_bitmap)[current_row_index_]) { + slot->tts_isnull[index] = true; + current_nulls_[index]++; + continue; + } + } + + Assert(current_row_index_ >= current_nulls_[index]); + + std::tie(buffer, buffer_len) = + column->GetBuffer(current_row_index_ - current_nulls_[index]); + switch (column->GetPaxColumnTypeInMem()) { + case kTypeNonFixed: { + slot->tts_values[index] = PointerGetDatum(buffer); + break; + } + case kTypeFixed: { + // FIXME(gongxun): get value info from PaxColumn + switch (slot->tts_tupleDescriptor->attrs[index].attlen) { + case 1: + slot->tts_values[index] = + cbdb::Int8ToDatum(*reinterpret_cast(buffer)); + break; + case 2: + slot->tts_values[index] = + cbdb::Int16ToDatum(*reinterpret_cast(buffer)); + break; + case 4: + slot->tts_values[index] = + cbdb::Int32ToDatum(*reinterpret_cast(buffer)); + break; + case 8: + slot->tts_values[index] = + cbdb::Int64ToDatum(*reinterpret_cast(buffer)); + break; + default: + Assert(!"should't be here, fixed type len should be 1, 2, 4, 8"); + } + break; + } + default: { + Assert(!"should't be here, non-implemented column type in memory"); + break; + } + } + } + + current_row_index_++; + current_offset_++; + cslot->SetOffset(current_offset_); + + return true; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc.h b/contrib/pax_storage/src/cpp/storage/orc/orc.h new file mode 100644 index 00000000000..26755e56373 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/orc/orc.h @@ -0,0 +1,175 @@ +#pragma once + +#include +#include +#include + +#include "comm/cbdb_wrappers.h" +#include "comm/pax_defer.h" +#include "exceptions/CException.h" +#include "storage/columns/pax_column.h" +#include "storage/columns/pax_columns.h" +#include "storage/file_system.h" +#include "storage/micro_partition.h" +#include "storage/proto/proto_wrappers.h" +#include "storage/proto/protobuf_stream.h" + +namespace pax { +class MicroPartitionStats; + +#define ORC_MAGIC_ID "ORC" +// ORC cpp writer +#define ORC_WRITER_ID 1 +#define ORC_SOFT_VERSION "1" +#define ORC_FILE_MAJOR_VERSION 1 +#define ORC_WRITER_VERSION 1 +#define ORC_POST_SCRIPT_SIZE 1 + +class OrcWriter : public MicroPartitionWriter { + public: + OrcWriter(const MicroPartitionWriter::WriterOptions &orc_writer_options, + const std::vector &column_types, + const std::vector &column_encoding_types, + File *file); + + ~OrcWriter() override; + + void Flush() override; + + void WriteTuple(CTupleSlot *slot) override; + + void WriteTupleN(CTupleSlot **slot, size_t n) override; + + void Close() override; + + MicroPartitionWriter *SetStatsCollector( + MicroPartitionStats *mpstats) override; + + size_t PhysicalSize() const override; + + static std::pair, + std::vector> + BuildSchema(const MicroPartitionWriter::WriterOptions &options); + +#ifndef RUN_GTEST + protected: // NOLINT +#endif + + // only for test + static MicroPartitionWriter *CreateWriter( + const MicroPartitionWriter::WriterOptions &options, + const std::vector &column_types, File *file) { + std::vector all_no_encoding_types; + for (auto _ : column_types) { + (void)_; + all_no_encoding_types.emplace_back( + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED); + } + + return new OrcWriter(options, column_types, all_no_encoding_types, file); + } + + // after create a new writer or old stripe have been flushed + // stripe_info_ in memory should reinit + void InitStripe(); + + void BuildFooterType(); + bool WriteStripe(BufferedOutputStream *buffer_mem_stream); + void WriteMetadata(BufferedOutputStream *buffer_mem_stream); + void WriteFileFooter(BufferedOutputStream *buffer_mem_stream); + void WritePostscript(BufferedOutputStream *buffer_mem_stream); + + protected: + PaxColumns *pax_columns_; + const std::vector column_types_; + const std::vector column_encoding_types_; + File *file_; + WriteSummary summary_; + + ::orc::proto::Footer file_footer_; + ::orc::proto::PostScript post_script_; + ::orc::proto::StripeInformation stripe_info_; + ::orc::proto::Metadata meta_data_; + + uint64 stripe_rows_ = 0; + uint64 total_rows_ = 0; + uint64 current_offset_ = 0; +}; + +class OrcReader : public MicroPartitionReader { + public: + struct StripeInformation { + uint64 footer_length; + uint64 data_length; + uint64 numbers_of_row; + uint64 offset; + + uint64 index_length; + uint64 stripe_footer_start; + + // refine column statistics if we do need it + ::orc::proto::StripeStatistics stripe_statistics; + }; + + explicit OrcReader(File *file); + + ~OrcReader() override; + + StripeInformation *GetStripeInfo(size_t index) const; + + PaxColumns *ReadStripe(size_t index, bool *proj_map = nullptr, + size_t proj_len = 0); + + size_t GetNumberOfStripes() const; + + void Open(const ReaderOptions &options) override; + + void Close() override; + + bool ReadTuple(CTupleSlot *cslot) override; + +#ifndef RUN_GTEST + protected: // NOLINT +#endif + + PaxColumns *GetAllColumns() override; + + orc::proto::StripeFooter ReadStripeWithProjection( + DataBuffer *data_buffer, OrcReader::StripeInformation *stripe_info, + const bool *proj_map, size_t proj_len); + + void ReadMetadata(ssize_t file_length, uint64 post_script_len); + + void BuildProtoTypes(); + + void ReadFooter(size_t footer_offset, size_t footer_len); + + void ReadPostScript(size_t file_size, uint64 post_script_len); + + // Clean up reading status + void ResetCurrentReading(); + + protected: + std::vector column_types_; + File *file_; + + DataBuffer *reused_buffer_; + PaxColumns *working_pax_columns_; + size_t current_stripe_index_ = 0; + size_t current_row_index_ = 0; + uint64 current_offset_ = 0; + + uint32 *current_nulls_ = nullptr; + + orc::proto::PostScript post_script_; + orc::proto::Footer file_footer_; + orc::proto::Metadata meta_data_; + + size_t num_of_stripes_; + bool *proj_map_; + size_t proj_len_; + + bool is_close_; +}; + +}; // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc_test.cc b/contrib/pax_storage/src/cpp/storage/orc/orc_test.cc new file mode 100644 index 00000000000..2a5fe5bc9ad --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/orc/orc_test.cc @@ -0,0 +1,1206 @@ +#include "storage/orc/orc.h" // NOLINT + +#include +#include +#include +#include + +#include "access/tupdesc_details.h" +#include "comm/cbdb_wrappers.h" +#include "comm/gtest_wrappers.h" +#include "exceptions/CException.h" +#include "storage/local_file_system.h" + +namespace pax::tests { + +// 3 clomun - string(len 100), string(len 100), int(len 4) +#define COLUMN_NUMS 3 +#define COLUMN_SIZE 100 +#define INT32_COLUMN_VALUE 0x123 +#define INT32_COLUMN_VALUE_DEFAULT 0x001 +#define PROJECTION_COLUMN 2 +#define PROJECTION_COLUMN_SINGLE 1 + +static void GenFakeBuffer(char *buffer, size_t length) { + for (size_t i = 0; i < length; i++) { + buffer[i] = static_cast(i); + } +} + +static void CreateOrcTestResourceOwner() { + CurrentResourceOwner = ResourceOwnerCreate(NULL, "OrcTestResourceOwner"); +} + +static void ReleaseOrcTestResourceOwner() { + ResourceOwner tmp_resource_owner = CurrentResourceOwner; + CurrentResourceOwner = NULL; + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_BEFORE_LOCKS, false, + true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_LOCKS, false, true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_AFTER_LOCKS, false, + true); + ResourceOwnerDelete(tmp_resource_owner); +} + +class OrcTest : public ::testing::Test { + public: + void SetUp() override { + Singleton::GetInstance(); + remove(file_name_.c_str()); + + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "OrcTestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + CreateOrcTestResourceOwner(); + } + + void TearDown() override { + Singleton::GetInstance()->Delete(file_name_); + ReleaseOrcTestResourceOwner(); + } + + static void DeleteCTupleSlot(CTupleSlot *ctuple_slot) { + auto tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + cbdb::Pfree(tuple_table_slot->tts_tupleDescriptor); + if (tuple_table_slot->tts_isnull) { + cbdb::Pfree(tuple_table_slot->tts_isnull); + } + + cbdb::Pfree(tuple_table_slot); + delete ctuple_slot; + } + + static CTupleSlot *CreateFakeCTupleSlot(bool with_value = true) { + TupleTableSlot *tuple_slot = nullptr; + + auto tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS)); + + tuple_desc->natts = COLUMN_NUMS; + tuple_desc->attrs[0] = { + .attlen = -1, + .attbyval = false, + }; + + tuple_desc->attrs[1] = { + .attlen = -1, + .attbyval = false, + }; + + tuple_desc->attrs[2] = { + .attlen = 4, + .attbyval = true, + }; + + tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual); + + if (with_value) { + char column_buff[COLUMN_SIZE * 2]; + GenFakeBuffer(column_buff, COLUMN_SIZE); + GenFakeBuffer(column_buff + COLUMN_SIZE, COLUMN_SIZE); + + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + + fake_is_null[0] = false; + fake_is_null[1] = false; + fake_is_null[2] = false; + + tuple_slot->tts_values[0] = + cbdb::DatumFromCString(column_buff, COLUMN_SIZE); + tuple_slot->tts_values[1] = + cbdb::DatumFromCString(column_buff + COLUMN_SIZE, COLUMN_SIZE); + tuple_slot->tts_values[2] = cbdb::Int32ToDatum(INT32_COLUMN_VALUE); + tuple_slot->tts_isnull = fake_is_null; + } + + auto ctuple_slot = new CTupleSlot(tuple_slot); + + return ctuple_slot; + } + + static CTupleSlot *CreateEmptyCTupleSlot() { + auto tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS)); + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + auto tuple_slot = reinterpret_cast( + cbdb::Palloc0(sizeof(TupleTableSlot))); + auto tts_values = + reinterpret_cast(cbdb::Palloc0(sizeof(Datum) * COLUMN_NUMS)); + tuple_desc->natts = COLUMN_NUMS; + tuple_desc->attrs[0] = { + .attlen = -1, + .attbyval = false, + }; + + tuple_desc->attrs[1] = { + .attlen = -1, + .attbyval = false, + }; + + tuple_desc->attrs[2] = { + .attlen = 4, + .attbyval = true, + }; + tuple_slot->tts_tupleDescriptor = tuple_desc; + tuple_slot->tts_values = tts_values; + tuple_slot->tts_isnull = fake_is_null; + return new CTupleSlot(tuple_slot); + } + + static void VerifySingleStripe(PaxColumns *columns, + const bool *const proj_map = nullptr) { + char column_buff[COLUMN_SIZE]; + struct varlena *vl = nullptr; + struct varlena *tunpacked = nullptr; + int read_len = -1; + char *read_data = nullptr; + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + EXPECT_EQ(COLUMN_NUMS, columns->GetColumns()); + + if (!proj_map || proj_map[0]) { + auto column1 = reinterpret_cast((*columns)[0]); + EXPECT_EQ(1, column1->GetNonNullRows()); + char *column1_buffer = column1->GetBuffer(0).first; + EXPECT_EQ( + 0, std::memcmp(column1_buffer + VARHDRSZ, column_buff, COLUMN_SIZE)); + vl = (struct varlena *)DatumGetPointer(column1_buffer); + tunpacked = pg_detoast_datum_packed(vl); + EXPECT_EQ((Pointer)vl, (Pointer)tunpacked); + read_len = VARSIZE(tunpacked); + read_data = VARDATA_ANY(tunpacked); + // read_len is COLUMN_SIZE + VARHDRSZ + // because DatumFromCString set it + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + // read_data should pass the pointer rather than memcpy + EXPECT_EQ(read_data, column1_buffer + VARHDRSZ); + } + + if (!proj_map || proj_map[1]) { + auto column2 = reinterpret_cast((*columns)[1]); + char *column2_buffer = column2->GetBuffer(0).first; + EXPECT_EQ(1, column2->GetNonNullRows()); + EXPECT_EQ( + 0, std::memcmp(column2_buffer + VARHDRSZ, column_buff, COLUMN_SIZE)); + vl = (struct varlena *)DatumGetPointer(column2_buffer); + tunpacked = pg_detoast_datum_packed(vl); + EXPECT_EQ((Pointer)vl, (Pointer)tunpacked); + read_len = VARSIZE(tunpacked); + read_data = VARDATA_ANY(tunpacked); + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + EXPECT_EQ(read_data, column2_buffer + VARHDRSZ); + } + + if (!proj_map || proj_map[2]) { + auto column3 = reinterpret_cast *>((*columns)[2]); + std::tie(read_data, read_len) = column3->GetBuffer(); + EXPECT_EQ(4, read_len); + EXPECT_EQ(INT32_COLUMN_VALUE, *(int32 *)read_data); + } + } + + protected: + const std::string file_name_ = "./test.file"; +}; + +class OrcTestProjection + : public ::testing::TestWithParam<::testing::tuple> { + public: + void SetUp() override { + Singleton::GetInstance(); + remove(file_name_.c_str()); + + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "OrcTestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + CreateOrcTestResourceOwner(); + } + + void TearDown() override { + Singleton::GetInstance()->Delete(file_name_); + ReleaseOrcTestResourceOwner(); + } + + protected: + const std::string file_name_ = "./test.file"; +}; + +TEST_F(OrcTest, WriteTuple) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + OrcWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + DeleteCTupleSlot(tuple_slot); + delete writer; +} + +TEST_F(OrcTest, OpenOrc) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + auto writer = + OrcWriter::CreateWriter(writer_options, std::move(types), file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + reader->GetStripeInfo(0); + reader->Close(); + + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadStripes) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + // file_ptr in orc writer will be freed when writer do destruct + // current OrcWriter::CreateWriter only for test + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + // file_ptr in orc reader will be freed when reader do destruct + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + auto columns = reader->ReadStripe(0); + OrcTest::VerifySingleStripe(columns); + reader->Close(); + + delete columns; + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadStripesTwice) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + auto columns_stripe = reader->ReadStripe(0); + + reader->Close(); + char column_buff[COLUMN_SIZE]; + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + EXPECT_EQ(COLUMN_NUMS, columns_stripe->GetColumns()); + auto column1 = reinterpret_cast((*columns_stripe)[0]); + auto column2 = reinterpret_cast((*columns_stripe)[1]); + + EXPECT_EQ(2, column1->GetNonNullRows()); + EXPECT_EQ(0, std::memcmp(column1->GetBuffer(0).first + VARHDRSZ, column_buff, + COLUMN_SIZE)); + EXPECT_EQ(0, std::memcmp(column1->GetBuffer(1).first + VARHDRSZ, column_buff, + COLUMN_SIZE)); + EXPECT_EQ(2, column2->GetNonNullRows()); + EXPECT_EQ(0, std::memcmp(column2->GetBuffer(0).first + VARHDRSZ, column_buff, + COLUMN_SIZE)); + EXPECT_EQ(0, std::memcmp(column2->GetBuffer(1).first + VARHDRSZ, column_buff, + COLUMN_SIZE)); + + delete columns_stripe; + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadMultiStripes) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Flush(); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(2, reader->GetNumberOfStripes()); + auto columns1 = reader->ReadStripe(0); + auto columns2 = reader->ReadStripe(1); + OrcTest::VerifySingleStripe(columns1); + OrcTest::VerifySingleStripe(columns2); + reader->Close(); + + delete columns1; + delete columns2; + + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadCloseEmptyOrc) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + writer->WriteTuple(tuple_slot); + writer->Flush(); + + // close without any data + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + auto columns = reader->ReadStripe(0); + OrcTest::VerifySingleStripe(columns); + reader->Close(); + + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadEmptyOrc) { + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + // flush empty + writer->Flush(); + // direct close + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(0, reader->GetNumberOfStripes()); + reader->Close(); + + delete writer; + delete reader; +} + +TEST_F(OrcTest, ReadTuple) { + char column_buff[COLUMN_SIZE]; + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + tuple_slot_empty->GetTupleDesc()->natts = COLUMN_NUMS; + reader->ReadTuple(tuple_slot_empty); + + auto vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[0]); + auto tunpacked = pg_detoast_datum_packed(vl); + EXPECT_EQ((Pointer)vl, (Pointer)tunpacked); + + int read_len = VARSIZE(tunpacked); + char *read_data = VARDATA_ANY(tunpacked); + + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +class OrcEncodingTest : public ::testing::TestWithParam { + void SetUp() override { + Singleton::GetInstance(); + remove(file_name_.c_str()); + + MemoryContext orc_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "OrcTestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(orc_test_memory_context); + CreateOrcTestResourceOwner(); + } + + void TearDown() override { + Singleton::GetInstance()->Delete(file_name_); + ReleaseOrcTestResourceOwner(); + } + + protected: + const std::string file_name_ = "./test_encoding.file"; +}; + +TEST_P(OrcEncodingTest, ReadTupleWithEncoding) { + TupleTableSlot *tuple_slot = nullptr; + auto encoding_kind = GetParam(); + + auto tuple_desc = reinterpret_cast( + cbdb::Palloc0(sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * 2)); + + tuple_desc->natts = 2; + tuple_desc->attrs[0] = { + .attlen = 8, + .attbyval = true, + }; + + tuple_desc->attrs[1] = { + .attlen = 8, + .attbyval = true, + }; + + tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual); + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + fake_is_null[0] = false; + fake_is_null[1] = false; + + tuple_slot->tts_values[0] = Int64GetDatum(0); + tuple_slot->tts_values[1] = Int64GetDatum(1); + tuple_slot->tts_isnull = fake_is_null; + auto ctuple_slot = new CTupleSlot(tuple_slot); + + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_LONG); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_LONG); + std::vector types_encoding; + types_encoding.emplace_back(encoding_kind); + types_encoding.emplace_back(encoding_kind); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = new OrcWriter(writer_options, types, types_encoding, file_ptr); + + for (size_t i = 0; i < 10000; i++) { + ctuple_slot->GetTupleTableSlot()->tts_values[0] = Int64GetDatum(i); + ctuple_slot->GetTupleTableSlot()->tts_values[1] = Int64GetDatum(i + 1); + writer->WriteTuple(ctuple_slot); + } + + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + for (size_t i = 0; i < 10000; i++) { + ASSERT_TRUE(reader->ReadTuple(ctuple_slot)); + ASSERT_EQ(ctuple_slot->GetTupleTableSlot()->tts_values[0], i); + ASSERT_EQ(ctuple_slot->GetTupleTableSlot()->tts_values[1], i + 1); + } + reader->Close(); + + OrcTest::DeleteCTupleSlot(ctuple_slot); + delete writer; + delete reader; +} + +INSTANTIATE_TEST_CASE_P( + OrcEncodingTestCombine, OrcEncodingTest, + testing::Values(ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED, + ColumnEncoding_Kind::ColumnEncoding_Kind_NO_ENCODED, + ColumnEncoding_Kind::ColumnEncoding_Kind_ORC_RLE_V2)); + +TEST_F(OrcTest, ReadTupleDefaultColumn) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(true); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto *writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + TupleTableSlot *slot = tuple_slot_empty->GetTupleTableSlot(); + + slot->tts_tupleDescriptor->attrs[3] = { + .attlen = 4, + .attbyval = true, + }; + + slot->tts_tupleDescriptor->natts = COLUMN_NUMS + 1; + + slot->tts_tupleDescriptor->attrs[3].atthasmissing = true; + slot->tts_tupleDescriptor->constr = + reinterpret_cast(cbdb::Palloc0(sizeof(TupleConstr))); + slot->tts_tupleDescriptor->constr->missing = reinterpret_cast( + cbdb::Palloc0((COLUMN_NUMS + 1) * sizeof(AttrMissing))); + + slot->tts_tupleDescriptor->constr->missing[3].am_value = + cbdb::Int32ToDatum(INT32_COLUMN_VALUE_DEFAULT); + slot->tts_tupleDescriptor->constr->missing[3].am_present = true; + reader->ReadTuple(tuple_slot_empty); + + ASSERT_EQ(tuple_slot_empty->GetTupleTableSlot()->tts_values[3], + INT32_COLUMN_VALUE_DEFAULT); + + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, ReadTupleDroppedColumn) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(true); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto *writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + TupleTableSlot *slot = tuple_slot_empty->GetTupleTableSlot(); + + slot->tts_tupleDescriptor->attrs[2].attisdropped = true; + + reader->ReadTuple(tuple_slot_empty); + + ASSERT_EQ(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2], true); + + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, ReadTupleDroppedColumnWithProjection) { + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(true); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + TupleTableSlot *slot = tuple_slot_empty->GetTupleTableSlot(); + + slot->tts_tupleDescriptor->attrs[2].attisdropped = true; + + reader->ReadTuple(tuple_slot_empty); + + ASSERT_EQ(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2], true); + + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadBigTuple) { + TupleTableSlot *tuple_slot = nullptr; + auto tuple_desc = reinterpret_cast( + cbdb::Palloc0(sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * 2)); + + tuple_desc->natts = 2; + tuple_desc->attrs[0] = { + .attlen = 4, + .attbyval = true, + }; + tuple_desc->attrs[1] = { + .attlen = 4, + .attbyval = true, + }; + + tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual); + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool) * COLUMN_NUMS)); + fake_is_null[0] = false; + fake_is_null[1] = false; + + tuple_slot->tts_values[0] = Int32GetDatum(0); + tuple_slot->tts_values[1] = Int32GetDatum(1); + tuple_slot->tts_isnull = fake_is_null; + auto ctuple_slot = new CTupleSlot(tuple_slot); + + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + for (size_t i = 0; i < 10000; i++) { + ctuple_slot->GetTupleTableSlot()->tts_values[0] = Int32GetDatum(i); + ctuple_slot->GetTupleTableSlot()->tts_values[1] = Int32GetDatum(i + 1); + writer->WriteTuple(ctuple_slot); + } + + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + EXPECT_EQ(1, reader->GetNumberOfStripes()); + for (size_t i = 0; i < 10000; i++) { + ASSERT_TRUE(reader->ReadTuple(ctuple_slot)); + ASSERT_EQ(ctuple_slot->GetTupleTableSlot()->tts_values[0], i); + ASSERT_EQ(ctuple_slot->GetTupleTableSlot()->tts_values[1], i + 1); + } + reader->Close(); + + DeleteCTupleSlot(ctuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadNoFixedColumnInSameTuple) { + char column_buff_origin[COLUMN_SIZE]; + char column_buff_reset[COLUMN_SIZE]; + + CTupleSlot *tuple_slot = CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + + // using the same tuple slot with different data + cbdb::Pfree( + cbdb::DatumToPointer(tuple_slot->GetTupleTableSlot()->tts_values[0])); + memset(&column_buff_reset, 0, COLUMN_SIZE); + tuple_slot->GetTupleTableSlot()->tts_values[0] = + cbdb::DatumFromCString(column_buff_reset, COLUMN_SIZE); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + auto columns = reader->ReadStripe(0); + + EXPECT_EQ(COLUMN_NUMS, columns->GetColumns()); + auto column1 = reinterpret_cast((*columns)[0]); + + GenFakeBuffer(column_buff_origin, COLUMN_SIZE); + + EXPECT_EQ(2, column1->GetNonNullRows()); + EXPECT_EQ(0, std::memcmp(column1->GetBuffer(0).first + VARHDRSZ, + column_buff_origin, COLUMN_SIZE)); + EXPECT_EQ(0, std::memcmp(column1->GetBuffer(1).first + VARHDRSZ, + column_buff_reset, COLUMN_SIZE)); + + reader->Close(); + + delete columns; + DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +TEST_F(OrcTest, WriteReadWithNullField) { + char column_buff[COLUMN_SIZE]; + CTupleSlot *ctuple_slot = CreateFakeCTupleSlot(); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + OrcWriter::WriterOptions writer_options; + + auto *writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + // str str int + // null null int + // str str null + // null null null + writer->WriteTuple(ctuple_slot); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = false; + writer->WriteTuple(ctuple_slot); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = false; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = false; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = true; + writer->WriteTuple(ctuple_slot); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = true; + writer->WriteTuple(ctuple_slot); + + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + tuple_slot_empty->GetTupleDesc()->natts = COLUMN_NUMS; + + reader->ReadTuple(tuple_slot_empty); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + + reader->ReadTuple(tuple_slot_empty); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + EXPECT_EQ( + cbdb::DatumToInt32(tuple_slot_empty->GetTupleTableSlot()->tts_values[2]), + INT32_COLUMN_VALUE); + + reader->ReadTuple(tuple_slot_empty); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + auto vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[0]); + int read_len = VARSIZE(vl); + char *read_data = VARDATA_ANY(vl); + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + + vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[1]); + read_len = VARSIZE(vl); + read_data = VARDATA_ANY(vl); + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + + reader->ReadTuple(tuple_slot_empty); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(ctuple_slot); + delete reader; + delete writer; +} + +TEST_F(OrcTest, WriteReadWithBoundNullField) { + char column_buff[COLUMN_SIZE]; + CTupleSlot *ctuple_slot = CreateFakeCTupleSlot(); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + GenFakeBuffer(column_buff, COLUMN_SIZE); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + OrcWriter::WriterOptions writer_options; + + auto *writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + // null null null + // str str int + // null null null + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = true; + writer->WriteTuple(ctuple_slot); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = false; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = false; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = false; + writer->WriteTuple(ctuple_slot); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = true; + writer->WriteTuple(ctuple_slot); + + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + tuple_slot_empty->GetTupleDesc()->natts = COLUMN_NUMS; + + reader->ReadTuple(tuple_slot_empty); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + + reader->ReadTuple(tuple_slot_empty); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_FALSE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + + auto vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[0]); + int read_len = VARSIZE(vl); + char *read_data = VARDATA_ANY(vl); + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + + vl = (struct varlena *)DatumGetPointer( + tuple_slot_empty->GetTupleTableSlot()->tts_values[1]); + read_len = VARSIZE(vl); + read_data = VARDATA_ANY(vl); + EXPECT_EQ(read_len, COLUMN_SIZE + VARHDRSZ); + EXPECT_EQ(0, std::memcmp(read_data, column_buff, COLUMN_SIZE)); + EXPECT_EQ(DatumGetInt32(tuple_slot_empty->GetTupleTableSlot()->tts_values[2]), + INT32_COLUMN_VALUE); + + reader->ReadTuple(tuple_slot_empty); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(ctuple_slot); + delete reader; + delete writer; +} + +TEST_F(OrcTest, WriteReadWithALLNullField) { + CTupleSlot *ctuple_slot = CreateFakeCTupleSlot(); + auto *local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + + auto *file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + OrcWriter::WriterOptions writer_options; + + auto *writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + ctuple_slot->GetTupleTableSlot()->tts_isnull[0] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[1] = true; + ctuple_slot->GetTupleTableSlot()->tts_isnull[2] = true; + for (size_t i = 0; i < 1000; i++) { + writer->WriteTuple(ctuple_slot); + } + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + CTupleSlot *tuple_slot_empty = CreateEmptyCTupleSlot(); + + EXPECT_EQ(1, reader->GetNumberOfStripes()); + tuple_slot_empty->GetTupleDesc()->natts = COLUMN_NUMS; + for (size_t i = 0; i < 1000; i++) { + reader->ReadTuple(tuple_slot_empty); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[0]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[1]); + EXPECT_TRUE(tuple_slot_empty->GetTupleTableSlot()->tts_isnull[2]); + } + reader->Close(); + + DeleteCTupleSlot(tuple_slot_empty); + DeleteCTupleSlot(ctuple_slot); + delete reader; + delete writer; +} + +TEST_P(OrcTestProjection, ReadTupleWithProjectionColumn) { + CTupleSlot *tuple_slot = OrcTest::CreateFakeCTupleSlot(); + auto local_fs = Singleton::GetInstance(); + ASSERT_NE(nullptr, local_fs); + bool proj_map[COLUMN_NUMS] = {false, false, false}; + size_t proj_index = ::testing::get<0>(GetParam()); + auto reversal = ::testing::get<1>(GetParam()); + + ASSERT_LE(proj_index, COLUMN_NUMS); + ASSERT_GE(proj_index, 0); + if (reversal) { + memset(proj_map, true, COLUMN_NUMS); + } + + if (proj_index < COLUMN_NUMS) { + proj_map[proj_index] = !proj_map[proj_index]; + } + + auto file_ptr = local_fs->Open(file_name_); + EXPECT_NE(nullptr, file_ptr); + + std::vector types; + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_STRING); + types.emplace_back(orc::proto::Type_Kind::Type_Kind_INT); + MicroPartitionWriter::WriterOptions writer_options; + + auto writer = OrcWriter::CreateWriter(writer_options, types, file_ptr); + + writer->WriteTuple(tuple_slot); + writer->Flush(); + + writer->WriteTuple(tuple_slot); + writer->Close(); + + file_ptr = local_fs->Open(file_name_); + + MicroPartitionReader::ReaderOptions reader_options; + auto reader = new OrcReader(file_ptr); + reader->Open(reader_options); + + EXPECT_EQ(2, reader->GetNumberOfStripes()); + + auto stripe1 = reader->ReadStripe(0, proj_map, COLUMN_NUMS); + auto stripe2 = reader->ReadStripe(1, proj_map, COLUMN_NUMS); + OrcTest::VerifySingleStripe(stripe1, proj_map); + OrcTest::VerifySingleStripe(stripe2, proj_map); + reader->Close(); + + delete stripe1; + delete stripe2; + + OrcTest::DeleteCTupleSlot(tuple_slot); + delete writer; + delete reader; +} + +INSTANTIATE_TEST_CASE_P(OrcTestProjectionCombine, OrcTestProjection, + testing::Combine(testing::Values(0, 1, 2, 3), + testing::Values(false, true))); + +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/pax.cc b/contrib/pax_storage/src/cpp/storage/pax.cc new file mode 100644 index 00000000000..fad15fed303 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax.cc @@ -0,0 +1,315 @@ +#include "storage/pax.h" + +#include + +#include + +#include "catalog/micro_partition_stats.h" +#include "catalog/pax_aux_table.h" +#include "comm/cbdb_wrappers.h" +#include "storage/micro_partition_file_factory.h" +#include "storage/micro_partition_metadata.h" + +#ifdef VEC_BUILD +#include "storage/vec/pax_vec_reader.h" +#endif + +namespace pax { + +static std::string GenRandomBlockId() { + CBDB_WRAP_START; + { + uuid_t uuid; + char str[36] = {0}; + + uuid_generate(uuid); + uuid_unparse(uuid, str); + + std::string uuid_str = str; + return uuid_str; + } + CBDB_WRAP_END; +} + +TableWriter::TableWriter(Relation relation) + : relation_(relation), summary_callback_(nullptr) {} + +TableWriter *TableWriter::SetWriteSummaryCallback( + WriteSummaryCallback callback) { + Assert(!summary_callback_); + summary_callback_ = callback; + return this; +} + +TableWriter *TableWriter::SetFileSplitStrategy( + const FileSplitStrategy *strategy) { + Assert(!strategy_); + strategy_ = strategy; + return this; +} + +TableWriter *TableWriter::SetStatsCollector(MicroPartitionStats *mp_stats) { + Assert(!mp_stats_); + Assert(!writer_); // must be set before the writer is created. + + mp_stats_ = mp_stats; + return this; +} + +TableWriter::~TableWriter() { + // must call close before delete table writer + Assert(writer_ == nullptr); + + delete strategy_; + delete mp_stats_; +} + +const FileSplitStrategy *TableWriter::GetFileSplitStrategy() const { + return strategy_; +} + +std::string TableWriter::GenFilePath(const std::string &block_id) { + return cbdb::BuildPaxFilePath(relation_, block_id); +} + +void TableWriter::Open() { + MicroPartitionWriter::WriterOptions options; + std::string file_path; + std::string block_id; + + Assert(relation_); + Assert(strategy_); + Assert(summary_callback_); + + block_id = GenRandomBlockId(); + file_path = GenFilePath(block_id); + + options.rel_oid = relation_->rd_id; + options.desc = relation_->rd_att; + options.block_id = std::move(block_id); + options.file_name = std::move(file_path); + + File *file = + Singleton::GetInstance()->Open(options.file_name); + + writer_ = MicroPartitionFileFactory::CreateMicroPartitionWriter( + MICRO_PARTITION_TYPE_PAX, file, std::move(options)); + + writer_->SetWriteSummaryCallback(summary_callback_); + writer_->SetStatsCollector(mp_stats_); +} + +void TableWriter::WriteTuple(CTupleSlot *slot) { + Assert(writer_); + Assert(strategy_); + // should check split strategy before write tuple + // otherwise, may got a empty file in the disk + if (strategy_->ShouldSplit(writer_, num_tuples_)) { + this->Close(); + this->Open(); + } + if (mp_stats_) mp_stats_->AddRow(slot->GetTupleTableSlot()); + + writer_->WriteTuple(slot); + ++num_tuples_; + ++total_tuples_; +} + +void TableWriter::Close() { + writer_->Close(); + delete writer_; + writer_ = nullptr; + num_tuples_ = 0; +} + +TableReader::TableReader( + std::unique_ptr> &&iterator, + ReaderOptions options) + : iterator_(std::move(iterator)), + reader_(nullptr), + is_empty_(true), + reader_options_(options), + table_no_(0), + table_index_(0) {} + +TableReader::~TableReader() { + if (reader_) { + reader_->Close(); + delete reader_; + reader_ = nullptr; + } +} + +void TableReader::Open() { + if (!iterator_->HasNext()) { + is_empty_ = true; + return; + } + + if (reader_options_.build_bitmap) { + // first open, now alloc a table no in pax shmem for scan + cbdb::GetTableIndexAndTableNumber(reader_options_.rel_oid, &table_no_, + &table_index_); + } + OpenFile(); + is_empty_ = false; +} + +void TableReader::ReOpen() { + Close(); + iterator_->Rewind(); + Open(); +} + +void TableReader::Close() { + if (is_empty_) { + return; + } + + if (reader_) { + reader_->Close(); + reader_ = nullptr; + } +} + +bool TableReader::ReadTuple(CTupleSlot *slot) { + if (is_empty_) { + return false; + } + + slot->ClearTuple(); + while (!reader_->ReadTuple(slot)) { + reader_->Close(); + if (!iterator_->HasNext()) { + is_empty_ = true; + return false; + } + OpenFile(); + } + slot->SetTableNo(table_no_); + slot->SetBlockNumber(current_block_number_); + slot->StoreVirtualTuple(); + return true; +} + +void TableReader::OpenFile() { + Assert(iterator_->HasNext()); + auto it = iterator_->Next(); + MicroPartitionReader::ReaderOptions options; + micro_partition_id_ = options.block_id = it.GetMicroPartitionId(); + if (reader_options_.build_bitmap) { + int block_number = 0; + block_number = + cbdb::GetBlockNumber(reader_options_.rel_oid, table_index_, + paxc::PaxBlockId(options.block_id.c_str())); + + Assert(block_number >= 0); + current_block_number_ = block_number; + } + options.file_name = it.GetFileName(); + options.filter = reader_options_.filter; + options.reused_buffer = reader_options_.reused_buffer; + + if (reader_) { + delete reader_; + } + + reader_ = new OrcReader( + Singleton::GetInstance()->Open(options.file_name)); + +#ifdef VEC_BUILD + if (reader_options_.is_vec) { + Assert(reader_options_.adapter); + reader_ = new PaxVecReader(reader_, reader_options_.adapter); + } +#endif + + reader_->Open(options); +} + +TableDeleter::TableDeleter( + Relation rel, + std::unique_ptr> &&iterator, + std::map> &&delete_bitmap, + Snapshot snapshot) + : rel_(rel), + iterator_(std::move(iterator)), + delete_bitmap_(std::move(delete_bitmap)), + snapshot_(snapshot), + reader_(nullptr), + writer_(nullptr), + slot_(nullptr) {} + +TableDeleter::~TableDeleter() { + if (reader_) { + reader_->Close(); + delete reader_; + reader_ = nullptr; + } + + if (writer_) { + writer_->Close(); + delete writer_; + writer_ = nullptr; + } + if (slot_) { + ExecDropSingleTupleTableSlot(slot_); + } +} + +void TableDeleter::Delete() { + if (!iterator_->HasNext()) { + return; + } + slot_ = MakeTupleTableSlot(rel_->rd_att, &TTSOpsVirtual); + OpenReader(); + OpenWriter(); + + CTupleSlot cslot(slot_); + // TODO(gongxun): because bulk insert as AO/HEAP does with tuples iteration + // not implemented. we should implement bulk insert firstly. and then we can + // use ReadTupleN and WriteTupleN to delete tuples in batch. + while (reader_->ReadTuple(&cslot)) { + auto block_id = reader_->GetCurrentMicroPartitionId(); + auto it = delete_bitmap_.find(block_id); + if (it == delete_bitmap_.end()) { + // should not be here + Assert(!"should not be here, block_id is marked as delete but not in " + "delete_bitmap_"); + continue; + } + + auto bitmap = it->second.get(); + if (cslot.GetOffset() < bitmap->NumBits() && + bitmap->Test(cslot.GetOffset())) { + continue; + } + writer_->WriteTuple(&cslot); + } + + // loop delete_bitmap + for (const auto &it : delete_bitmap_) { + auto block_id = it.first; + cbdb::DeleteMicroPartitionEntry(rel_->rd_id, snapshot_, block_id); + + // TODO(gongxun): delete the block file + } +} + +void TableDeleter::OpenWriter() { + writer_ = new TableWriter(rel_); + writer_->SetWriteSummaryCallback(&cbdb::AddMicroPartitionEntry) + ->SetFileSplitStrategy(new PaxDefaultSplitStrategy()) + ->SetStatsCollector(new MicroPartitionStats()) + ->Open(); +} + +void TableDeleter::OpenReader() { + TableReader::ReaderOptions reader_options{}; + reader_options.build_bitmap = false; + reader_options.rel_oid = rel_->rd_id; + reader_ = new TableReader(std::move(iterator_), reader_options); + reader_->Open(); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax.h b/contrib/pax_storage/src/cpp/storage/pax.h new file mode 100644 index 00000000000..b3c3360b88a --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax.h @@ -0,0 +1,143 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "comm/bitmap.h" +#include "comm/iterator.h" +#include "storage/file_system.h" +#include "storage/local_file_system.h" +#include "storage/micro_partition.h" +#include "storage/micro_partition_metadata.h" +#include "storage/orc/orc.h" +#include "storage/pax_block_id.h" +#include "storage/pax_filter.h" +#include "storage/paxc_block_map_manager.h" +#include "storage/strategy.h" + +#ifdef VEC_BUILD +#include "storage/vec/pax_vec_adapter.h" +#endif + +namespace pax { + +class TableWriter { + public: + using WriteSummaryCallback = MicroPartitionWriter::WriteSummaryCallback; + + explicit TableWriter(Relation relation); + + virtual ~TableWriter(); + + virtual const FileSplitStrategy *GetFileSplitStrategy() const; + + virtual void WriteTuple(CTupleSlot *slot); + + virtual void Open(); + + virtual void Close(); + + TableWriter *SetWriteSummaryCallback(WriteSummaryCallback callback); + + TableWriter *SetFileSplitStrategy(const FileSplitStrategy *strategy); + + TableWriter *SetStatsCollector(MicroPartitionStats *mp_stats); + + protected: + virtual std::string GenFilePath(const std::string &block_id); + + protected: + const Relation relation_ = nullptr; + MicroPartitionWriter *writer_ = nullptr; + const FileSplitStrategy *strategy_ = nullptr; + MicroPartitionStats *mp_stats_ = nullptr; + WriteSummaryCallback summary_callback_; + const FileSystem *file_system_ = Singleton::GetInstance(); + + size_t num_tuples_ = 0; + size_t total_tuples_ = 0; +}; + +class TableReader final { + public: + struct ReaderOptions { + bool build_bitmap = false; + Oid rel_oid = 0; + + DataBuffer *reused_buffer = nullptr; + + // Will not used in TableReader + // But pass into micro partition reader + PaxFilter *filter = nullptr; +#ifdef VEC_BUILD + bool is_vec = false; + VecAdapter *adapter = nullptr; +#endif + }; + + TableReader(std::unique_ptr> &&iterator, + ReaderOptions options); + virtual ~TableReader(); + + void Open(); + + void ReOpen(); + + void Close(); + + bool ReadTuple(CTupleSlot *slot); + + // deprecate: + // DON'T USE, this function will be removed + const std::string &GetCurrentMicroPartitionId() const { + return micro_partition_id_; + } + + private: + void OpenFile(); + + private: + const std::unique_ptr> iterator_; + MicroPartitionReader *reader_ = nullptr; + bool is_empty_ = false; + const ReaderOptions reader_options_; + int current_block_number_ = 0; + + std::string micro_partition_id_; + // only for ctid bitmap + uint8 table_no_; + uint32 table_index_; +}; + +class TableDeleter final { + public: + TableDeleter( + Relation rel, + std::unique_ptr> &&iterator, + std::map> &&delete_bitmap, + Snapshot snapshot); + + ~TableDeleter(); + + void Delete(); + + private: + void OpenWriter(); + + void OpenReader(); + + private: + Relation rel_; + std::unique_ptr> iterator_; + std::map> delete_bitmap_; + Snapshot snapshot_; + TableReader *reader_; + TableWriter *writer_; + TupleTableSlot *slot_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_block_id.h b/contrib/pax_storage/src/cpp/storage/pax_block_id.h new file mode 100644 index 00000000000..9f140fa774a --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_block_id.h @@ -0,0 +1,19 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include + +#define BLOCK_ID_SIZE 36 +namespace paxc { +struct PaxBlockId { + char pax_block_id[BLOCK_ID_SIZE + 1]; + explicit PaxBlockId(const char *block_id) { + Assert(strlen(block_id) == BLOCK_ID_SIZE); + strncpy(pax_block_id, block_id, BLOCK_ID_SIZE); + pax_block_id[BLOCK_ID_SIZE] = '\0'; + } + + const char *ToStr() const { return pax_block_id; } +}; +} // namespace paxc diff --git a/contrib/pax_storage/src/cpp/storage/pax_buffer.cc b/contrib/pax_storage/src/cpp/storage/pax_buffer.cc new file mode 100644 index 00000000000..54b1a869f5d --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_buffer.cc @@ -0,0 +1,256 @@ +#include "storage/pax_buffer.h" + +#include "exceptions/CException.h" + +namespace pax { + +#define MEMORY_RESIZE_LIMIT (768UL * 1024 * 1024) + +BlockBuffer::BlockBuffer(char *begin_offset, char *end_offset) + : begin_offset_(begin_offset), end_offset_(end_offset) {} + +BlockBufferBase::BlockBufferBase(char *ptr, size_t size, size_t offset) + : block_pos_(ptr + offset), block_buffer_(ptr, ptr + size) { + Assert(offset <= size); +} + +void BlockBufferBase::Set(char *ptr, size_t size, size_t offset) { + Assert(offset <= size); + block_buffer_ = BlockBuffer(ptr, ptr + size); + block_pos_ = ptr + offset; +} + +void BlockBufferBase::Set(char *ptr, size_t size) { + block_buffer_ = BlockBuffer(ptr, ptr + size); + block_pos_ = ptr; +} + +void BlockBufferBase::Write(char *ptr, size_t size) { + Assert(block_pos_ + size <= block_buffer_.End()); + memcpy(block_pos_, ptr, size); +} + +void BlockBufferBase::Combine(const BlockBufferBase &buffer) { + Assert(Available() > buffer.Used()); + Write(buffer.block_buffer_.Start(), buffer.Used()); +} + +template +DataBuffer::DataBuffer(T *data_buffer, size_t size, bool allow_null, + bool mem_take_over) + : BlockBufferBase(nullptr, 0, 0), + mem_take_over_(mem_take_over), + data_buffer_(data_buffer) { + if (!allow_null && !data_buffer_ && size != 0) { + data_buffer_ = reinterpret_cast(cbdb::Palloc(size)); + } + BlockBufferBase::Set(reinterpret_cast(data_buffer_), size, 0); +} + +template +DataBuffer::DataBuffer(const DataBuffer &data_buffer) + : BlockBufferBase(data_buffer), + mem_take_over_(false), + data_buffer_(data_buffer.data_buffer_) {} + +template // NOLINT: redirect constructor +DataBuffer::DataBuffer(size_t size) + : DataBuffer(nullptr, size, false, true) {} + +template +T &DataBuffer::operator[](size_t i) { + return data_buffer_[i]; +} + +template +size_t DataBuffer::GetSize() { + return Used() / sizeof(T); +} + +template +DataBuffer::~DataBuffer() { + if (mem_take_over_ && data_buffer_) { + cbdb::Pfree(data_buffer_); + } +} + +template +void DataBuffer::Set(char *ptr, size_t size, size_t offset) { + Assert(data_buffer_ == nullptr); + BlockBufferBase::Set(ptr, size, offset); + data_buffer_ = reinterpret_cast(ptr); +} + +template +void DataBuffer::Set(char *ptr, size_t size) { + Assert(data_buffer_ == nullptr); + BlockBufferBase::Set(ptr, size); + data_buffer_ = reinterpret_cast(ptr); +} + +template +void DataBuffer::Reset() { + Assert(!mem_take_over_); + BlockBufferBase::Set(nullptr, 0); + data_buffer_ = nullptr; +} + +template +void DataBuffer::Write(T value) { + Assert(block_pos_ + sizeof(T) <= block_buffer_.End()); + *(reinterpret_cast(block_pos_)) = value; +} + +template +void DataBuffer::Write(T *ptr, size_t size) { + Assert(size % sizeof(T) == 0 && (block_pos_ + size) <= block_buffer_.End()); + memcpy(block_pos_, reinterpret_cast(ptr), size); +} + +template +void DataBuffer::Write(const T *ptr, size_t size) { + Assert(size % sizeof(T) == 0 && (block_pos_ + size) <= block_buffer_.End()); + memcpy(block_pos_, reinterpret_cast(ptr), size); +} + +template +void DataBuffer::Read(T *dst) { + Assert(Used() > sizeof(T) && Used() <= Capacity()); + memcpy(dst, block_pos_, sizeof(T)); +} + +template +void DataBuffer::Read(void *dst, size_t n) { + Assert(Used() > n && Used() <= Capacity()); + memcpy(dst, block_pos_, n); +} + +template +T *DataBuffer::GetBuffer() const { + return data_buffer_; +} + +template +T *DataBuffer::GetAvailableBuffer() const { + return data_buffer_ + Used(); +} + +template +void DataBuffer::ReSize(size_t size) { + if (!mem_take_over_) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeInvalidMemoryOperation); + } + + if (unlikely(size > MEMORY_RESIZE_LIMIT)) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeOOM); + } + + size_t used = Used(); + if (data_buffer_) { + data_buffer_ = reinterpret_cast(cbdb::RePalloc(data_buffer_, size)); + } else { + data_buffer_ = reinterpret_cast(cbdb::Palloc(size)); + } + BlockBufferBase::Set(reinterpret_cast(data_buffer_), size, used); +} + +template +bool DataBuffer::IsMemTakeOver() const { + return mem_take_over_; +} + +template +void DataBuffer::SetMemTakeOver(bool take_over) { + mem_take_over_ = take_over; +} + +template +void DataBuffer::Clear() { + if (mem_take_over_ && data_buffer_) { + cbdb::Pfree(data_buffer_); + } + data_buffer_ = nullptr; +} + +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; +template class DataBuffer; + +template +UntreatedDataBuffer::UntreatedDataBuffer(size_t size) + : DataBuffer(nullptr, size, false, true) { + untreated_pos_ = BlockBufferBase::block_buffer_.Start(); +} + +template +void UntreatedDataBuffer::BrushUnTreated(size_t size) { + Assert(size >= 0); + Assert(untreated_pos_ + size <= BlockBufferBase::block_pos_); + untreated_pos_ += size; +} + +template +void UntreatedDataBuffer::BrushBackUnTreated(size_t size) { + size_t new_offset = UnTreated() - size; + Assert(new_offset >= 0 && UnTreated() <= BlockBufferBase::Used()); + + untreated_pos_ = BlockBufferBase::block_buffer_.Start() + new_offset; +} + +template +void UntreatedDataBuffer::TreatedAll() { + Assert(UnTreated() <= BlockBufferBase::Used()); + size_t treated = UnTreated(); + size_t untouched = UnTouched(); + if (untouched <= treated) { + memcpy(BlockBufferBase::block_buffer_.Start(), untreated_pos_, untouched); + untreated_pos_ = BlockBufferBase::block_buffer_.Start(); + BlockBufferBase::block_pos_ = untreated_pos_ + untouched; + return; + } + + char *write_pos = BlockBufferBase::block_buffer_.Start(); + size_t batch_size = 0; + while (untouched != 0) { + batch_size = untouched > treated ? treated : untouched; + + memcpy(write_pos, untreated_pos_, batch_size); + untreated_pos_ = untreated_pos_ + batch_size; + write_pos = write_pos + batch_size; + + untouched -= batch_size; + } + + BlockBufferBase::block_pos_ = write_pos; + untreated_pos_ = BlockBufferBase::block_buffer_.Start(); +} + +template +void UntreatedDataBuffer::ReSize(size_t size) { + size_t untreated = UnTreated(); + DataBuffer::ReSize(size); + untreated_pos_ = BlockBufferBase::block_buffer_.Start() + untreated; +} + +template class UntreatedDataBuffer; +template class UntreatedDataBuffer; + +template +TreatedDataBuffer::TreatedDataBuffer(T *data_buffer, size_t size) + : DataBuffer(data_buffer, size, false, false) { + Assert(data_buffer); + Assert(size != 0); + BlockBufferBase::Brush(size); + treated_pos_ = BlockBufferBase::block_buffer_.Start(); +} + +template class TreatedDataBuffer; +template class TreatedDataBuffer; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_buffer.h b/contrib/pax_storage/src/cpp/storage/pax_buffer.h new file mode 100644 index 00000000000..2bc838be317 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_buffer.h @@ -0,0 +1,274 @@ +#pragma once +#include + +#include +#include +#include + +#include "comm/cbdb_wrappers.h" + +namespace pax { + +struct BlockBuffer { + BlockBuffer(char *begin_offset, char *end_offset); + + BlockBuffer(const BlockBuffer &block_buffer) = default; + + inline char *Start() const { return begin_offset_; } + + inline char *End() const { return end_offset_; } + + inline size_t Size() const { return size_t(end_offset_ - begin_offset_); } + + inline void Resize(size_t size) { end_offset_ = begin_offset_ + size; } + + inline bool IsEmpty() const { return Size() == 0; } + + inline size_t Stripe(size_t size) const { + return size_t(end_offset_ - begin_offset_) / size; + } + + inline void Swap(BlockBuffer &other) { + std::swap(begin_offset_, other.begin_offset_); + std::swap(end_offset_, other.end_offset_); + } + + private: + char *begin_offset_; + char *end_offset_; +}; + +class BlockBufferBase { + public: + BlockBufferBase(char *ptr, size_t size, size_t offset); + + BlockBufferBase(const BlockBufferBase &block_buffer_base) = default; + + inline BlockBuffer &Buffer() { return block_buffer_; } + inline char *Position() { return block_pos_; } + + /* Should not call Brush inside BlockBuffer or DataBuffer */ + inline void Brush(size_t size) { block_pos_ += size; } + inline void BrushAll() { block_pos_ = block_buffer_.End(); } + + inline void BrushBack(size_t size) { + size_t new_offset = Used() - size; + CBDB_CHECK(new_offset >= 0, cbdb::CException::ExType::kExTypeOutOfRange); + block_pos_ = block_buffer_.Start() + new_offset; + } + + inline void BrushBackAll() { block_pos_ = block_buffer_.Start(); } + + inline char *Start() const { return block_buffer_.Start(); } + + inline size_t Used() const { + return size_t(block_pos_ - block_buffer_.Start()); + } + + inline size_t Available() const { + return size_t(block_buffer_.End() - block_pos_); + } + + inline size_t Capacity() const { + return size_t(block_buffer_.End() - block_buffer_.Start()); + } + + virtual void Set(char *ptr, size_t size, size_t offset); + + virtual void Set(char *ptr, size_t size); + + void Write(char *ptr, size_t size); + + void Combine(const BlockBufferBase &buffer); + + virtual ~BlockBufferBase() = default; + + protected: + char *block_pos_; + BlockBuffer block_buffer_; +}; + +// DataBuffer used to manage a chunk of memory buffer. +// It provides a series of methods for template access, +// the internal buffer(T* data_buffer_) are ordered and can be used as a +// array. The internal buffer have a working pointer(block_pos_) which +// distinguishes the used buffer and available buffer. +// Below is the internal buffer visualization +// +// internal buffer +// ---------------------------------- +// | used buffer | available buffer| +// ---------------------------------- +// ↑ +// working pointer +template +class DataBuffer : public BlockBufferBase { + public: + // `data_buffer` can be exist buffer or nullptr + // `size means` size of current buffer + // `allow_null` if true then will not used `size` to alloc new buffer, + // otherwise DataBuffer will used `size` to alloc a new buffer. + // `mem_take_over` if true the internal buffer which passed by `data_buffer` + // or new alloced will be freed when `DataBuffer` been freed, otherwise the + // internal buffer should be freed by caller also the method `ReSize` can't be + // called if `mem_take_over` is false. + DataBuffer(T *data_buffer, size_t size, bool allow_null = true, + bool mem_take_over = true); + + // will alloc a size of buffer and memory will take over with DataBuffer + explicit DataBuffer(size_t size); + + DataBuffer(const DataBuffer &data_buffer); + + friend class DataBuffer; + + // copy constructor for DataBuffer + // at the same time, this is also a way to convert templates to + // templates . + // + // must pay attention that after origin DataBuffer call `ReSize`, The + // copied DataBuffer will become illegal this is because there is no way + // for the internal pointer to be updated. + // + template + explicit DataBuffer(const DataBuffer &data_buffer) + : BlockBufferBase(data_buffer), + mem_take_over_(false), + data_buffer_(reinterpret_cast(data_buffer.data_buffer_)) {} + + // Direct access elements of internal buffer + T &operator[](size_t i); + + T *StartT() const { return data_buffer_; } + + // Get size of elements of internal buffer + size_t GetSize(); + + ~DataBuffer() override; + + // Set a memory buffer, should make sure internal buffer is nullptr. + // This method is split from the constructor. + // Sometimes caller need prealloc a DataBuffer without internal buffer. + void Set(char *ptr, size_t size, size_t offset) override; + + void Set(char *ptr, size_t size) override; + + // Reset the DataBuffer + void Reset(); + + // Direct write a element into available buffer + // Should call `Brush` after write + virtual void Write(T value); + + virtual void Write(T *ptr, size_t size); + + virtual void Write(const T *ptr, size_t size); + + // Read all to dst pointer + virtual void Read(T *dst); + + virtual void Read(void *dst, size_t n); + + // Get the internal buffer pointer + T *GetBuffer() const; + + // Get the available buffer pointer + T *GetAvailableBuffer() const; + + // Resize the internal buffer, size should bigger than capacity of internal + // buffer `mem_take_over` should be true + virtual void ReSize(size_t size); + + // Is current internal buffer take over by DataBuffer + bool IsMemTakeOver() const; + + void SetMemTakeOver(bool take_over); + + // Clear up the DataBuffer + // Caller should call `Set` to reuse current `DataBuffer` after call `Clear` + virtual void Clear(); + + protected: + bool mem_take_over_; + T *data_buffer_ = nullptr; +}; + +// extern template DataBuffer::DataBuffer(pax::DataBuffer +// const&); + +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; +extern template class DataBuffer; + +template +class UntreatedDataBuffer final : public DataBuffer { + public: + explicit UntreatedDataBuffer(size_t size); + + void ReSize(size_t size) override; + + void BrushUnTreated(size_t size); + + inline void BrushUnTreatedAll() { + untreated_pos_ = BlockBufferBase::block_pos_; + } + + void BrushBackUnTreated(size_t size); + + void TreatedAll(); + + inline size_t UnTreated() const { + return size_t(untreated_pos_ - BlockBufferBase::block_buffer_.Start()); + } + + inline size_t UnTouched() const { + return size_t(BlockBufferBase::block_pos_ - untreated_pos_); + } + + private: + char *untreated_pos_ = nullptr; +}; + +extern template class UntreatedDataBuffer; +extern template class UntreatedDataBuffer; + +template +class TreatedDataBuffer final : public DataBuffer { + public: + TreatedDataBuffer(T *data_buffer, size_t size); + + inline void BrushTreated(size_t size) { + Assert(treated_pos_ + size <= BlockBufferBase::block_pos_); + treated_pos_ += size; + } + + inline char *GetTreatedRawBuffer() const { return treated_pos_; } + + inline T *GetTreatedBuffer() const { + return reinterpret_cast(treated_pos_); + } + + inline size_t Treated() const { + Assert(treated_pos_); + return size_t(treated_pos_ - BlockBufferBase::block_buffer_.Start()); + } + + inline size_t UnTreated() const { + Assert(treated_pos_); + return size_t(BlockBufferBase::block_pos_ - treated_pos_); + } + + private: + char *treated_pos_ = nullptr; +}; + +extern template class TreatedDataBuffer; +extern template class TreatedDataBuffer; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_filter.cc b/contrib/pax_storage/src/cpp/storage/pax_filter.cc new file mode 100644 index 00000000000..62e9cc8dfa2 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_filter.cc @@ -0,0 +1,371 @@ +#include "storage/pax_filter.h" + +#include "comm/cbdb_api.h" + +#include "catalog/micro_partition_stats.h" +#include "comm/cbdb_wrappers.h" +#include "storage/proto/proto_wrappers.h" + +namespace paxc { +static bool BuildScanKeys(Relation rel, List *quals, bool isorderby, + ScanKey *p_scan_keys, int *p_num_scan_keys) { + ListCell *qual_cell; + ScanKey scan_keys; + int n_scan_keys; + int j; + TupleDesc desc; + + /* Allocate array for ScanKey structs: one per qual */ + n_scan_keys = list_length(quals); + scan_keys = (ScanKey)palloc(n_scan_keys * sizeof(ScanKeyData)); + desc = rel->rd_att; + Oid *opfamilies = (Oid *)palloc(sizeof(Oid) * desc->natts); + for (auto i = 0; i < desc->natts; i++) { + auto attr = &desc->attrs[i]; + if (attr->attisdropped) { + opfamilies[i] = 0; + continue; + } + Oid opclass = GetDefaultOpClass(attr->atttypid, BRIN_AM_OID); + if (!OidIsValid(opclass)) { + opfamilies[i] = 0; + continue; + } + opfamilies[i] = get_opclass_family(opclass); + } + + j = 0; + foreach (qual_cell, quals) { + Expr *clause = (Expr *)lfirst(qual_cell); + ScanKey this_scan_key = &scan_keys[j]; + Oid opno; /* operator's OID */ + RegProcedure opfuncid; /* operator proc id used in scan */ + Oid opfamily; /* opfamily of index column */ + int op_strategy; /* operator's strategy number */ + Oid op_lefttype; /* operator's declared input types */ + Oid op_righttype; + Expr *leftop; /* expr on lhs of operator */ + Expr *rightop; /* expr on rhs ... */ + AttrNumber varattno; /* att number used in scan */ + int indnkeyatts; + + indnkeyatts = RelationGetNumberOfAttributes(rel); + if (IsA(clause, OpExpr)) { + /* indexkey op const or indexkey op expression */ + int flags = 0; + Datum scanvalue; + + opno = ((OpExpr *)clause)->opno; + opfuncid = ((OpExpr *)clause)->opfuncid; + + /* + * leftop should be the index key Var, possibly relabeled + */ + leftop = (Expr *)get_leftop(clause); + + if (leftop && IsA(leftop, RelabelType)) + leftop = ((RelabelType *)leftop)->arg; + + Assert(leftop != NULL); + + if (!IsA(leftop, Var)) goto ignore_clause; + + varattno = ((Var *)leftop)->varattno; + if (varattno < 1 || varattno > indnkeyatts) + elog(ERROR, "bogus index qualification"); + + /* + * We have to look up the operator's strategy number. This + * provides a cross-check that the operator does match the index. + */ + opfamily = opfamilies[varattno - 1]; + if (!OidIsValid(opfamily)) goto ignore_clause; + + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, + &op_lefttype, &op_righttype); + + if (isorderby) flags |= SK_ORDER_BY; + + /* + * rightop is the constant or variable comparison value + */ + rightop = (Expr *)get_rightop(clause); + + if (rightop && IsA(rightop, RelabelType)) + rightop = ((RelabelType *)rightop)->arg; + + Assert(rightop != NULL); + + if (IsA(rightop, Const)) { + /* OK, simple constant comparison value */ + scanvalue = ((Const *)rightop)->constvalue; + if (((Const *)rightop)->constisnull) flags |= SK_ISNULL; + } else { + // No support for runtime keys now + goto ignore_clause; + } + + /* + * initialize the scan key's fields appropriately + */ + ScanKeyEntryInitialize(this_scan_key, flags, + varattno, /* attribute number to scan */ + op_strategy, /* op's strategy */ + op_righttype, /* strategy subtype */ + ((OpExpr *)clause)->inputcollid, /* collation */ + opfuncid, /* reg proc to use */ + scanvalue); /* constant */ + j++; + } else if (IsA(clause, NullTest)) { + /* indexkey IS NULL or indexkey IS NOT NULL */ + auto ntest = reinterpret_cast(clause); + int flags; + + Assert(!isorderby); + + /* + * argument should be the index key Var, possibly relabeled + */ + leftop = ntest->arg; + + if (leftop && IsA(leftop, RelabelType)) + leftop = ((RelabelType *)leftop)->arg; + + Assert(leftop != NULL); + + if (!IsA(leftop, Var)) goto ignore_clause; + + varattno = ((Var *)leftop)->varattno; + + /* + * initialize the scan key's fields appropriately + */ + switch (ntest->nulltesttype) { + case IS_NULL: + flags = SK_ISNULL | SK_SEARCHNULL; + break; + case IS_NOT_NULL: + flags = SK_ISNULL | SK_SEARCHNOTNULL; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int)ntest->nulltesttype); + flags = 0; /* keep compiler quiet */ + break; + } + + ScanKeyEntryInitialize(this_scan_key, flags, + varattno, /* attribute number to scan */ + InvalidStrategy, /* no strategy */ + InvalidOid, /* no strategy subtype */ + InvalidOid, /* no collation */ + InvalidOid, /* no reg proc for this */ + (Datum)0); /* constant */ + j++; + } else { + // not support other qual types yet + } + + ignore_clause: + continue; + } + pfree(opfamilies); + + /* + * Return info to our caller. + */ + if (j > 0) { + *p_scan_keys = scan_keys; + *p_num_scan_keys = j; + return true; + } + return false; +} +} // namespace paxc + +namespace pax { + +bool BuildScanKeys(Relation rel, List *quals, bool isorderby, + ScanKey *scan_keys, int *num_scan_keys) { + CBDB_WRAP_START; + { + return paxc::BuildScanKeys(rel, quals, isorderby, scan_keys, num_scan_keys); + } + CBDB_WRAP_END; +} + +PaxFilter::~PaxFilter() { delete[] proj_; } + +std::pair PaxFilter::GetColumnProjection() { + return std::make_pair(proj_, proj_len_); +} + +void PaxFilter::SetColumnProjection(bool *proj, size_t proj_len) { + proj_ = proj; + proj_len_ = proj_len; +} + +void PaxFilter::SetScanKeys(ScanKey scan_keys, int num_scan_keys) { + Assert(num_scan_keys_ == 0); + + if (num_scan_keys > 0) { + scan_keys_ = scan_keys; + num_scan_keys_ = num_scan_keys; + } +} + +static inline bool CheckNullKey( + ScanKey scan_key, const ::pax::stats::ColumnStatisitcsInfo &column_stats) { + // handle null test + // SK_SEARCHNULL and SK_SEARCHNOTNULL must not co-exist with each other + Assert(scan_key->sk_flags & SK_ISNULL); + Assert((scan_key->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL)) != + (SK_SEARCHNULL | SK_SEARCHNOTNULL)); + + if (scan_key->sk_flags & SK_SEARCHNULL) { + // test: IS NULL + if (!column_stats.hasnull()) return false; + } else if (scan_key->sk_flags & SK_SEARCHNOTNULL) { + // test: IS NOT NULL + if (column_stats.allnull()) return false; + } else { + // Neither IS NULL nor IS NOT NULL was used; assume all indexable + // operators are strict and thus return false with NULL value in + // the scan key. + return false; + } + return true; +} + +static inline bool CheckProcid(const ::pax::stats::MinmaxStatistics &minmax, + StrategyNumber strategy, Oid procid) { + switch (strategy) { + case BTLessStrategyNumber: + return minmax.proclt() == procid; + case BTLessEqualStrategyNumber: + return minmax.procle() == procid; + case BTGreaterStrategyNumber: + return minmax.procgt() == procid; + case BTGreaterEqualStrategyNumber: + return minmax.procge() == procid; + default: + Assert(false); + break; + } + // should not reach here, otherwise we ignore the scan key. + return false; +} + +static bool CheckNonnullValue(const ::pax::stats::MinmaxStatistics &minmax, + ScanKey scan_key, Form_pg_attribute attr) { + Oid procid; + FmgrInfo finfo; + Datum datum; + Datum matches; + auto value = scan_key->sk_argument; + auto typid = attr->atttypid; + auto collation = minmax.collation(); + auto typlen = attr->attlen; + auto typbyval = attr->attbyval; + + switch (scan_key->sk_strategy) { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: { + auto ok = cbdb::MinMaxGetStrategyProcinfo(typid, &procid, &finfo, + scan_key->sk_strategy); + if (!ok || !CheckProcid(minmax, scan_key->sk_strategy, procid)) + return true; + datum = pax::MicroPartitionStats::FromValue(minmax.minimal(), typlen, + typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + matches = cbdb::FunctionCall2Coll(&finfo, collation, datum, value); + break; + } + case BTEqualStrategyNumber: { + auto ok = cbdb::MinMaxGetStrategyProcinfo(typid, &procid, &finfo, + BTLessEqualStrategyNumber); + if (!ok || !CheckProcid(minmax, BTLessEqualStrategyNumber, procid)) + return true; + datum = pax::MicroPartitionStats::FromValue(minmax.minimal(), typlen, + typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + matches = cbdb::FunctionCall2Coll(&finfo, collation, datum, value); + + if (!DatumGetBool(matches)) + // not (min <= value) --> min > value + return false; + + ok = cbdb::MinMaxGetStrategyProcinfo(typid, &procid, &finfo, + BTGreaterEqualStrategyNumber); + if (!ok || !CheckProcid(minmax, BTGreaterEqualStrategyNumber, procid)) + return true; + datum = pax::MicroPartitionStats::FromValue(minmax.maximum(), typlen, + typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + matches = cbdb::FunctionCall2Coll(&finfo, collation, datum, value); + break; + } + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: { + auto ok = cbdb::MinMaxGetStrategyProcinfo(typid, &procid, &finfo, + scan_key->sk_strategy); + if (!ok || !CheckProcid(minmax, scan_key->sk_strategy, procid)) + return true; + datum = pax::MicroPartitionStats::FromValue(minmax.maximum(), typlen, + typbyval, &ok); + CBDB_CHECK(ok, cbdb::CException::kExTypeLogicError); + matches = cbdb::FunctionCall2Coll(&finfo, collation, datum, value); + break; + } + default: + Assert(false); + matches = BoolGetDatum(true); + break; + } + return DatumGetBool(matches); +} + +// returns true: if the micro partition needs to scan +// returns false: the micro partition could be ignored +bool PaxFilter::TestMicroPartitionScanInternal( + const pax::stats::MicroPartitionStatisticsInfo &stats, + TupleDesc desc) const { + auto natts = desc->natts; + + Assert(num_scan_keys_ > 0); + Assert(stats.columnstats_size() <= natts); + for (int i = 0; i < num_scan_keys_; i++) { + auto scan_key = &scan_keys_[i]; + auto column_index = scan_key->sk_attno - 1; + Assert(column_index >= 0 && column_index < natts); + + auto attr = &desc->attrs[column_index]; + // scan key should never contain dropped column + Assert(!attr->attisdropped); + // the collation in catalog and scan key should be consistent + Assert(scan_key->sk_collation == attr->attcollation); + + if (column_index >= stats.columnstats_size()) + continue; // missing attributes have no stats + + const auto &column_stats = stats.columnstats(column_index); + const auto &minmax = column_stats.minmaxstats(); + + // Check whether alter column type will result rewriting whole table. + Assert(attr->atttypid == minmax.typid()); + + if (scan_key->sk_flags & SK_ISNULL) { + if (!CheckNullKey(scan_key, column_stats)) return false; + } else if (column_stats.allnull()) { + // ALL values are null, but the scan key is not null + return false; + } else if (scan_key->sk_collation != minmax.collation()) { + // collation doesn't match ignore this scan key + } else if (!CheckNonnullValue(minmax, scan_key, attr)) { + return false; + } + } + return true; +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_filter.h b/contrib/pax_storage/src/cpp/storage/pax_filter.h new file mode 100644 index 00000000000..1d3cfda7b65 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_filter.h @@ -0,0 +1,54 @@ +#pragma once +#include "comm/cbdb_api.h" + +#include + +namespace pax { +namespace stats { +class MicroPartitionStatisticsInfo; +} +bool BuildScanKeys(Relation rel, List *quals, bool isorderby, + ScanKey *scan_keys, int *num_scan_keys); + +class PaxFilter final { + public: + PaxFilter() = default; + + ~PaxFilter(); + + bool HasMicroPartitionFilter() const { return num_scan_keys_ > 0; } + + std::pair GetColumnProjection(); + + void SetColumnProjection(bool *proj, size_t proj_len); + + void SetScanKeys(ScanKey scan_keys, int num_scan_keys); + + // true: if failed to filter the whole micro-partition, reader SHOULD scan the + // tuples false: if success to filter the micro-partition, the whole + // micro-partition SHOULD be ignored. + inline bool TestMicroPartitionScan( + const pax::stats::MicroPartitionStatisticsInfo &stats, + TupleDesc desc) const { + if (num_scan_keys_ == 0) return true; + return TestMicroPartitionScanInternal(stats, desc); + } + + private: + bool TestMicroPartitionScanInternal( + const pax::stats::MicroPartitionStatisticsInfo &stats, + TupleDesc desc) const; + + // micro partition filter: we use the scan keys to filter a whole of micro + // partition by comparing the scan keys with the min/max values in micro + // partition stats. The memory of the scan keys is allocated by alloc. + // PaxFilter assumes it only references them. + ScanKey scan_keys_ = nullptr; + int num_scan_keys_ = 0; + + // column projection + bool *proj_ = nullptr; + size_t proj_len_ = 0; +}; // class PaxFilter + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_itemptr.h b/contrib/pax_storage/src/cpp/storage/pax_itemptr.h new file mode 100644 index 00000000000..0f4f28ef103 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_itemptr.h @@ -0,0 +1,108 @@ +#pragma once +#include "comm/cbdb_api.h" + +#include + +#include + +namespace pax { +#define PAX_TABLE_NUM_BIT_SIZE 5 +#define PAX_BLOCK_BIT_SIZE 22 +#define PAX_TUPLE_BIT_SIZE (48 - 7 - PAX_BLOCK_BIT_SIZE) +#define MAX_TABLE_NUM_IN_CTID ((1 << PAX_TABLE_NUM_BIT_SIZE) - 1) + +#define BLOCK_NO_BITS_IN_BYTES_0_1 (16 - PAX_TABLE_NUM_BIT_SIZE) +#define TUPLE_NO_BITS_IN_BYTES_2_3 (PAX_TUPLE_BIT_SIZE - 16) +#define BLOCK_NO_BITS_IN_BYTES_2_3 (16 - TUPLE_NO_BITS_IN_BYTES_2_3) + +// 9bit +// 0x1ff +#define BLOCK_NO_MASK_IN_BYTES_0_1 (0xffff >> PAX_TABLE_NUM_BIT_SIZE) +// 11bit +// 0x07fff +#define BLOCK_NO_MASK_IN_BYTES_2_3 (0xffff >> TUPLE_NO_BITS_IN_BYTES_2_3) + +// 5bit +// 0x001f +#define TUPLE_NO_MASK_IN_BYTES_2_3 (0xffff >> BLOCK_NO_BITS_IN_BYTES_2_3) + +// #define PAX_BLOCK_BIT_IN_BI_LO_BITS (PAX_BLOCK_BIT_SIZE - 16) +// #define PAX_TUPLE_BIT_IN_BI_LO_BITS (32 - PAX_BLOCK_BIT_SIZE) +// #define PAX_TUPLE_BIT_IN_BI_LO_MASK (0xFFFF >> PAX_BLOCK_BIT_IN_BI_LO_BITS) + +#define PAX_TUPLE_ID_MAX_ROW_NUM INT64CONST((1 << (PAX_TUPLE_BIT_SIZE - 1)) - 1) + +// | block number (24 bits) | tuple number (23 bits) | +// | (16 bits) | 8bit |8bit |1bit |7bit | 8bit | +struct PaxItemPointer final { + uint16 bytes_0_1; + uint16 bytes_2_3; + uint16 bytes_4_5; + PaxItemPointer() { + bytes_0_1 = 0; + bytes_2_3 = 0; + bytes_4_5 = 0; + } + PaxItemPointer(uint8 table_no, uint32 block_number, uint32 tuple_number) { + bytes_0_1 = (table_no << BLOCK_NO_BITS_IN_BYTES_0_1); + bytes_0_1 |= (block_number >> BLOCK_NO_BITS_IN_BYTES_2_3); + + // |7bit 9bit|11 bit 5 biy| 16bit| + + bytes_2_3 |= (block_number & BLOCK_NO_MASK_IN_BYTES_2_3) + << TUPLE_NO_BITS_IN_BYTES_2_3; + bytes_2_3 = (tuple_number >> 15); + + bytes_4_5 = (tuple_number & 0x7FFF) + 1; + } + + explicit PaxItemPointer(const PaxItemPointer *tid) { + bytes_0_1 = tid->bytes_0_1; + bytes_2_3 = tid->bytes_2_3; + bytes_4_5 = tid->bytes_4_5; + } + + inline bool Valid() const { return bytes_4_5 != 0; } + static ItemPointerData GetTupleId(uint8 table_no, uint32 block_number, + uint32 tuple_number) { + ItemPointerData tid; + // table_no in bi_hi + tid.ip_blkid.bi_hi = (table_no << BLOCK_NO_BITS_IN_BYTES_0_1); + + // block_number in bi_hi + tid.ip_blkid.bi_hi |= (block_number >> BLOCK_NO_BITS_IN_BYTES_2_3); + + // |7bit 9bit|11 bit 5 biy| 16bit| + + // block_number in bi_lo + tid.ip_blkid.bi_lo = (block_number & BLOCK_NO_MASK_IN_BYTES_2_3) + << TUPLE_NO_BITS_IN_BYTES_2_3; + // tuple_number in bi_lo + tid.ip_blkid.bi_lo |= (tuple_number >> 15); + // tuple_number in ip_posid + tid.ip_posid = (tuple_number & 0x7FFF) + 1; + return tid; + } + + uint8 GetTableNo() const { return bytes_0_1 >> BLOCK_NO_BITS_IN_BYTES_0_1; } + + uint32 GetBlockNumber() const { + Assert(Valid()); + // get block_number in bytes_0_1 + uint32 block_number = (bytes_0_1 & BLOCK_NO_MASK_IN_BYTES_0_1) + << BLOCK_NO_BITS_IN_BYTES_2_3; + block_number |= (bytes_2_3 >> TUPLE_NO_BITS_IN_BYTES_2_3); + return block_number; + } + uint32 GetTupleNumber() const { + Assert(Valid()); + return bytes_4_5 - 1 + ((bytes_2_3 & TUPLE_NO_MASK_IN_BYTES_2_3) << 15); + } + + void Clear() { + bytes_0_1 = 0; + bytes_2_3 = 0; + bytes_4_5 = 0; + } +}; +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/pax_itemptr_test.cc b/contrib/pax_storage/src/cpp/storage/pax_itemptr_test.cc new file mode 100644 index 00000000000..25e9d3cea11 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_itemptr_test.cc @@ -0,0 +1,56 @@ +#include + +#include "storage/pax_itemptr.h" + +#include "comm/cbdb_api.h" + +namespace pax::tests { +class PaxItemPtrTest : public ::testing::Test { + public: + void SetUp() override {} + + void TearDown() override {} +}; + +TEST_F(PaxItemPtrTest, GetBlockNumber) { + auto tid = new PaxItemPointer(); + tid->bytes_0_1 = 0xffff; + tid->bytes_2_3 = 0xff00; + tid->bytes_4_5 = 0; + + PaxItemPointer pax_tid_1(tid); + EXPECT_EQ(pax_tid_1.Valid(), false); + + ItemPointerData htid; + + htid = PaxItemPointer::GetTupleId(0, 0xff, 1); + PaxItemPointer pax_tid_2(reinterpret_cast(&htid)); + EXPECT_EQ(pax_tid_2.GetTableNo(), 0); + EXPECT_EQ(pax_tid_2.GetBlockNumber(), 0xff); + EXPECT_EQ(pax_tid_2.GetTupleNumber(), 1); + EXPECT_EQ(pax_tid_2.Valid(), true); + + htid = PaxItemPointer::GetTupleId(31, 0xffff, 0xff00); + PaxItemPointer pax_tid_3(reinterpret_cast(&htid)); + EXPECT_EQ(pax_tid_3.GetTableNo(), 31); + EXPECT_EQ(pax_tid_3.GetBlockNumber(), 0xffff); + EXPECT_EQ(pax_tid_3.GetTupleNumber(), 0xff00); + EXPECT_EQ(pax_tid_3.Valid(), true); + + htid = PaxItemPointer::GetTupleId(0xf, 0xffff, PAX_TUPLE_ID_MAX_ROW_NUM); + PaxItemPointer pax_tid_4(reinterpret_cast(&htid)); + EXPECT_EQ(pax_tid_4.GetTableNo(), 0xf); + EXPECT_EQ(pax_tid_4.GetBlockNumber(), 0xffff); + EXPECT_EQ(pax_tid_4.GetTupleNumber(), PAX_TUPLE_ID_MAX_ROW_NUM); + EXPECT_EQ(pax_tid_4.Valid(), true); + + htid = PaxItemPointer::GetTupleId(0x14, 0x12345, PAX_TUPLE_ID_MAX_ROW_NUM); + PaxItemPointer pax_tid_5(reinterpret_cast(&htid)); + EXPECT_EQ(pax_tid_5.GetTableNo(), 0x14); + EXPECT_EQ(pax_tid_5.GetBlockNumber(), 0x12345); + EXPECT_EQ(pax_tid_5.GetTupleNumber(), PAX_TUPLE_ID_MAX_ROW_NUM); + EXPECT_EQ(pax_tid_5.Valid(), true); + + delete tid; +} +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/pax_test.cc b/contrib/pax_storage/src/cpp/storage/pax_test.cc new file mode 100644 index 00000000000..29940298013 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/pax_test.cc @@ -0,0 +1,237 @@ +#include + +#include "storage/pax.h" + +#include +#include +#include + +#include "comm/gtest_wrappers.h" +#include "exceptions/CException.h" +#include "storage/local_file_system.h" +#include "storage/micro_partition.h" +#include "storage/orc/orc.h" + +namespace pax::tests { +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Return; + +const char *pax_file_name = "./test.pax"; +#define COLUMN_NUMS 2 + +CTupleSlot *CreateFakeCTupleSlot(bool with_value) { + TupleTableSlot *tuple_slot; + + auto tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * COLUMN_NUMS)); + + tuple_desc->natts = COLUMN_NUMS; + tuple_desc->attrs[0] = { + .attlen = 4, + .attbyval = true, + }; + + tuple_desc->attrs[1] = { + .attlen = 4, + .attbyval = true, + }; + + tuple_slot = MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual); + + if (with_value) { + bool *fake_is_null = new bool[COLUMN_NUMS]; + + fake_is_null[0] = false; + fake_is_null[1] = false; + + tuple_slot->tts_values[0] = Int32GetDatum(1); + tuple_slot->tts_values[1] = Int32GetDatum(2); + tuple_slot->tts_isnull = fake_is_null; + } + + auto ctuple_slot = new CTupleSlot(tuple_slot); + + return ctuple_slot; +} + +class MockReaderInterator : public IteratorBase { + public: + explicit MockReaderInterator( + const std::vector &meta_info_list) + : index_(0) { + micro_partitions_.insert(micro_partitions_.end(), meta_info_list.begin(), + meta_info_list.end()); + } + + bool HasNext() override { return index_ < micro_partitions_.size(); } + + void Rewind() override { index_ = 0; } + + MicroPartitionMetadata Next() override { return micro_partitions_[index_++]; } + + private: + uint32 index_; + std::vector micro_partitions_; +}; + +class MockWriter : public TableWriter { + public: + MockWriter(const Relation relation, WriteSummaryCallback callback) + : TableWriter(relation) { + SetWriteSummaryCallback(callback); + SetFileSplitStrategy(new PaxDefaultSplitStrategy()); + } + + MOCK_METHOD(std::string, GenFilePath, (const std::string &), (override)); +}; + +class PaxWriterTest : public ::testing::Test { + public: + void SetUp() override { + Singleton::GetInstance(); + CurrentResourceOwner = ResourceOwnerCreate(NULL, "OrcTestResourceOwner"); + } + + void TearDown() override { + std::remove(pax_file_name); + ResourceOwner tmp_resource_owner = CurrentResourceOwner; + CurrentResourceOwner = NULL; + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_LOCKS, false, + true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_AFTER_LOCKS, + false, true); + ResourceOwnerDelete(tmp_resource_owner); + } +}; + +TEST_F(PaxWriterTest, WriteReadTuple) { + CTupleSlot *slot = CreateFakeCTupleSlot(true); + + auto relation = (Relation)cbdb::Palloc0(sizeof(RelationData)); + relation->rd_att = slot->GetTupleTableSlot()->tts_tupleDescriptor; + bool callback_called = false; + + TableWriter::WriteSummaryCallback callback = + [&callback_called](const WriteSummary & /*summary*/) { + callback_called = true; + }; + + auto writer = new MockWriter(relation, callback); + EXPECT_CALL(*writer, GenFilePath(_)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(pax_file_name)); + + writer->Open(); + + writer->WriteTuple(slot); + writer->Close(); + ASSERT_TRUE(callback_called); + + cbdb::Pfree(slot->GetTupleTableSlot()); + delete writer; + + std::vector meta_info_list; + MicroPartitionMetadata meta_info; + + meta_info.SetFileName(pax_file_name); + meta_info.SetMicroPartitionId(pax_file_name); + + meta_info_list.push_back(std::move(meta_info)); + + std::unique_ptr> meta_info_iterator = + std::unique_ptr>( + new MockReaderInterator(meta_info_list)); + + TableReader *reader; + TableReader::ReaderOptions reader_options{}; + reader_options.build_bitmap = false; + reader_options.rel_oid = 0; + reader = new TableReader(std::move(meta_info_iterator), reader_options); + reader->Open(); + + CTupleSlot *rslot = CreateFakeCTupleSlot(true); + + reader->ReadTuple(rslot); + + ASSERT_EQ(1, cbdb::DatumToInt32(rslot->GetTupleTableSlot()->tts_values[0])); + ASSERT_EQ(2, cbdb::DatumToInt32(rslot->GetTupleTableSlot()->tts_values[1])); + delete relation; + delete reader; +} + +TEST_F(PaxWriterTest, WriteReadTupleSplitFile) { + CTupleSlot *slot = CreateFakeCTupleSlot(true); + auto relation = (Relation)cbdb::Palloc0(sizeof(RelationData)); + + relation->rd_att = slot->GetTupleTableSlot()->tts_tupleDescriptor; + bool callback_called = false; + + TableWriter::WriteSummaryCallback callback = + [&callback_called](const WriteSummary & /*summary*/) { + callback_called = true; + }; + + auto writer = new MockWriter(relation, callback); + uint32 call_times = 0; + EXPECT_CALL(*writer, GenFilePath(_)) + .Times(AtLeast(2)) + .WillRepeatedly(testing::Invoke([&call_times]() -> std::string { + return pax_file_name + std::to_string(call_times++); + })); + + writer->Open(); + + ASSERT_TRUE(writer->GetFileSplitStrategy()->SplitTupleNumbers()); + auto split_size = writer->GetFileSplitStrategy()->SplitTupleNumbers(); + + for (size_t i = 0; i < split_size + 1; i++) writer->WriteTuple(slot); + writer->Close(); + ASSERT_TRUE(callback_called); + + cbdb::Pfree(slot->GetTupleTableSlot()); + delete writer; + + std::vector meta_info_list; + MicroPartitionMetadata meta_info1; + meta_info1.SetMicroPartitionId(std::string(pax_file_name)); + meta_info1.SetFileName(pax_file_name + std::to_string(0)); + + MicroPartitionMetadata meta_info2; + meta_info2.SetMicroPartitionId(std::string(pax_file_name)); + meta_info2.SetFileName(pax_file_name + std::to_string(1)); + + meta_info_list.push_back(std::move(meta_info1)); + meta_info_list.push_back(std::move(meta_info2)); + + std::unique_ptr> meta_info_iterator = + std::unique_ptr>( + new MockReaderInterator(meta_info_list)); + + TableReader *reader; + TableReader::ReaderOptions reader_options{.build_bitmap = false, + .rel_oid = 0}; + reader_options.build_bitmap = false; + reader = new TableReader(std::move(meta_info_iterator), reader_options); + reader->Open(); + + CTupleSlot *rslot = CreateFakeCTupleSlot(true); + + for (size_t i = 0; i < split_size + 1; i++) { + ASSERT_TRUE(reader->ReadTuple(rslot)); + ASSERT_EQ(1, cbdb::DatumToInt32(rslot->GetTupleTableSlot()->tts_values[0])); + ASSERT_EQ(2, cbdb::DatumToInt32(rslot->GetTupleTableSlot()->tts_values[1])); + } + ASSERT_FALSE(reader->ReadTuple(rslot)); + reader->Close(); + + delete reader; + delete relation; + + std::remove((pax_file_name + std::to_string(0)).c_str()); + std::remove((pax_file_name + std::to_string(1)).c_str()); +} + +} // namespace pax::tests diff --git a/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.cc b/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.cc new file mode 100644 index 00000000000..64d8e819be7 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.cc @@ -0,0 +1,479 @@ +#include "storage/paxc_block_map_manager.h" + +#include + +#include "comm/cbdb_wrappers.h" +namespace paxc { + +static PaxXactSharedState *current_xact_shared_pax_ss = NULL; + +#define DEFAULT_BLOCK_IDS_SIZE 16 + +#define ACQUIRE_HASH_LOCK(hash_lock, lockmode) \ + LWLockAcquire(&hash_lock->lock, lockmode); + +#define RELEASE_HASH_LOCK(hash_lock) LWLockRelease(&hash_lock->lock) + +static LocalTableBlockMappingData + local_pax_block_mapping_data[BLOCK_MAPPING_ARRAY_SIZE]; + +static MemoryContext pax_block_mapping_context = NULL; + +static int max_procs = 0; +// the lock for pax_xact_hash +static LWLockPadded *pax_hash_lock = NULL; +static HTAB *pax_xact_hash = NULL; +// common PaxSharedState +static PaxSharedState *pax_shared_state; + +void init_local_command_resource(); +void init_command_shmem_resource(); +void cleanup_local_command_resource(); +void cleanup_command_shmem_resource(); + +Size struct_mem_size() { return sizeof(PaxSharedState); } + +Size pax_mem_size() { + Size size = 0; + size = add_size(size, struct_mem_size()); + // hash size + size = add_size(size, hash_estimate_size(max_procs, sizeof(XactHashEntry))); + return size; +} + +void init_pax_xact_hash() { + HASHCTL info; + memset(&info, 0, sizeof(HASHCTL)); + info.keysize = sizeof(XactHashKey); + info.entrysize = sizeof(XactHashEntry); + pax_xact_hash = ShmemInitHash("pax_xact_hash", max_procs, max_procs, &info, + HASH_ELEM | HASH_BLOBS); +} + +void init_shmem_locks() { + pax_hash_lock = GetNamedLWLockTranche("pax_hash_lock"); +} + +void paxc_shmem_request() { + max_procs = MaxConnections; + RequestAddinShmemSpace(pax_mem_size()); + RequestNamedLWLockTranche("pax_hash_lock", 1); +} + +void paxc_shmem_startup() { + bool found; + current_xact_shared_pax_ss = NULL; + pax_block_mapping_context = AllocSetContextCreate( + TopMemoryContext, "Pax Block Mapping Context", ALLOCSET_DEFAULT_SIZES); + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + pax_shared_state = reinterpret_cast( + ShmemInitStruct("pax_shared_stat", struct_mem_size(), &found)); + if (!found) { + pax_shared_state->pax_xact_lock_tranche_id_ = LWLockNewTrancheId(); + } + LWLockRegisterTranche(pax_shared_state->pax_xact_lock_tranche_id_, + "pax_xact_array_locks"); + init_pax_xact_hash(); + init_shmem_locks(); + LWLockRelease(AddinShmemInitLock); + if (pax_shared_state == NULL) { + ereport( + FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + errdetail("Failed while allocation block %lu bytes in shared memory.", + struct_mem_size()))); + } +} + +void init_command_resource() { + init_command_shmem_resource(); + init_local_command_resource(); +} + +void release_command_resource() { + cleanup_local_command_resource(); + cleanup_command_shmem_resource(); +} + +// there may be multiple scan processes scanning the same table, +// which will modify the current_xact_shared_pax_ss state, +// so a LW_EXCLUSIVE lock is required +void get_table_index_and_table_number(const Oid table_rel_oid, uint8 *table_no, + uint32 *table_index) { + LWLockAcquire(¤t_xact_shared_pax_ss->lock_, LW_EXCLUSIVE); + uint8 alloc_table_no = 0; + for (uint32 i = 0; i < current_xact_shared_pax_ss->block_mapping_used_size_; + i++) { + if (current_xact_shared_pax_ss->shared_block_mapping_[i].relid_ == + table_rel_oid) { + alloc_table_no++; + } + } + *table_index = current_xact_shared_pax_ss->block_mapping_used_size_++; + *table_no = alloc_table_no; + ereport( + DEBUG1, + (errmsg("get_table_index_and_table_number pax_xact_hash=%p, lock=%p," + "gp_session_id=%d " + "pid=%d, db_id=%d, segment_id=%d, gp_command_id=%d " + "table_oid=%d, table_index=%d, " + "table_no=%d, is_gp_writer=%d, shmem_ptr=%p, lock=%p", + pax_xact_hash, &pax_hash_lock->lock, gp_session_id, getpid(), + GpIdentity.dbid, GpIdentity.segindex, gp_command_count, + table_rel_oid, *table_index, *table_no, Gp_is_writer, + current_xact_shared_pax_ss, ¤t_xact_shared_pax_ss->lock_))); + LWLockRelease(¤t_xact_shared_pax_ss->lock_); + if (*table_index >= BLOCK_MAPPING_ARRAY_SIZE) { + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), errmsg("out of array size"), + errdetail("Failed while allocation table slot %d in " + "current_xact_shared_pax_ss->shared_block_mapping_, max " + "size is %d", + *table_index, BLOCK_MAPPING_ARRAY_SIZE))); + } + if (alloc_table_no > MAX_TABLE_NUM_IN_CTID) { + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), errmsg("out of array size"), + errdetail("Failed while table no %d overflow the max table num %d", + *table_no, MAX_TABLE_NUM_IN_CTID))); + } +} + +// FIXME(gongxun): the delete and update processes only read +// current_xact_shared_pax_ss , whether it is possible to not add a shared lock? +uint32 pax_get_table_index(const Oid table_rel_oid, const uint8 table_no) { + LWLockAcquire(¤t_xact_shared_pax_ss->lock_, LW_SHARED); + uint8 tmp_table_no = -1; + int index = -1; + for (uint32 i = 0; i < current_xact_shared_pax_ss->block_mapping_used_size_; + i++) { + if (current_xact_shared_pax_ss->shared_block_mapping_[i].relid_ == + table_rel_oid) { + if ((++tmp_table_no) == table_no) { + index = i; + break; + } + } + } + LWLockRelease(¤t_xact_shared_pax_ss->lock_); + Assert(tmp_table_no == table_no); + return index; +} + +void dump_shared_block_ids(const Oid table_rel_oid, const uint32 table_index) { + LocalTableBlockMappingData *block_mapping_data = + &local_pax_block_mapping_data[table_index]; + + PaxBlockId *shared_ptr; + + // save old segment, if shared memory is full, we need to alloc a new + // segment and copy old data,then free old segment + dsm_segment *old_segment = block_mapping_data->block_ids_segment_; + dsm_segment *new_segment = nullptr; + block_mapping_data->relid_ = table_rel_oid; + + // if local memory size is large than shared memory's size, we need to + // resize shared memory + SharedTableBlockMappingData *shared_block_mapping_data = + ¤t_xact_shared_pax_ss->shared_block_mapping_[table_index]; + ereport(DEBUG1, (errmsg("dump_shared_block_ids pax_xact_hash=%p, lock=%p," + "gp_session_id=%d " + "pid=%d, db_id=%d, segment_id=%d, gp_command_id=%d " + "table_oid=%d, table_index=%d, local_size =%d, " + "shared_size=%d, is_gp_writer=%d", + pax_xact_hash, &pax_hash_lock->lock, gp_session_id, + getpid(), GpIdentity.dbid, GpIdentity.segindex, + gp_command_count, table_rel_oid, table_index, + block_mapping_data->used_block_ids_, + shared_block_mapping_data->shared_size_block_ids_, + Gp_is_writer))); + if (block_mapping_data->used_block_ids_ > + shared_block_mapping_data->shared_size_block_ids_ || + old_segment == NULL) { + // need to resize shared memory + ResourceOwner oldowner; + oldowner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + uint32 new_size = block_mapping_data->size_block_ids_; + new_segment = dsm_create(sizeof(PaxBlockId) * new_size, + DSM_CREATE_NULL_IF_MAXSEGMENTS); + + if (new_segment == NULL) { + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("could not create DSM segment for %d PaxBlockId", + new_size))); + } + + dsm_pin_mapping(new_segment); + dsm_pin_segment(new_segment); + block_mapping_data->block_ids_segment_ = new_segment; + + shared_block_mapping_data->shared_size_block_ids_ = new_size; + + shared_block_mapping_data->shared_used_block_ids_ = 0; + + shared_ptr = + reinterpret_cast(dsm_segment_address(new_segment)); + + CurrentResourceOwner = oldowner; + } + + shared_ptr = reinterpret_cast( + dsm_segment_address(block_mapping_data->block_ids_segment_)); + + memcpy( + shared_ptr + shared_block_mapping_data->shared_used_block_ids_, + block_mapping_data->block_ids_ + + shared_block_mapping_data->shared_used_block_ids_, + sizeof(PaxBlockId) * (block_mapping_data->used_block_ids_ - + shared_block_mapping_data->shared_used_block_ids_)); + + pg_write_barrier(); + shared_block_mapping_data->shared_used_block_ids_ = + block_mapping_data->used_block_ids_; + + shared_block_mapping_data->relid_ = block_mapping_data->relid_; + if (old_segment != block_mapping_data->block_ids_segment_) { + shared_block_mapping_data->shared_block_ids_handle_ = + dsm_segment_handle(block_mapping_data->block_ids_segment_); + + if (old_segment) { + dsm_unpin_segment(dsm_segment_handle(old_segment)); + dsm_detach(old_segment); + } + } +} + +void load_shared_block_ids(const Oid table_rel_oid, const uint32 table_index) { + dsm_handle table_block_mapping_handle; + dsm_segment *attached_block_ids; + PaxBlockId *shared_ptr; + + for (;;) { + table_block_mapping_handle = + current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_block_ids_handle_; + + attached_block_ids = dsm_attach(table_block_mapping_handle); + if (attached_block_ids != NULL) break; + + if (table_block_mapping_handle == + current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_block_ids_handle_) { + elog(ERROR, "could not attach to table(%d) shared block ids array", + table_rel_oid); + } + } + + shared_ptr = + reinterpret_cast(dsm_segment_address(attached_block_ids)); + + pg_read_barrier(); + + if (current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_used_block_ids_ > + local_pax_block_mapping_data[table_index].size_block_ids_) { + // resize local memory + uint32 new_size = + current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_size_block_ids_; + MemoryContext oldcontext = MemoryContextSwitchTo(pax_block_mapping_context); + if (local_pax_block_mapping_data[table_index].block_ids_ == nullptr) { + local_pax_block_mapping_data[table_index].used_block_ids_ = 0; + local_pax_block_mapping_data[table_index].size_block_ids_ = new_size; + local_pax_block_mapping_data[table_index].block_ids_ = + reinterpret_cast( + palloc0(sizeof(PaxBlockId) * new_size)); + } else { + local_pax_block_mapping_data[table_index].block_ids_ = + reinterpret_cast( + repalloc(local_pax_block_mapping_data[table_index].block_ids_, + sizeof(PaxBlockId) * new_size)); + } + MemoryContextSwitchTo(oldcontext); + local_pax_block_mapping_data[table_index].size_block_ids_ = new_size; + } + + memcpy(local_pax_block_mapping_data[table_index].block_ids_ + + local_pax_block_mapping_data[table_index].used_block_ids_, + shared_ptr + local_pax_block_mapping_data[table_index].used_block_ids_, + (current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_used_block_ids_ - + local_pax_block_mapping_data[table_index].used_block_ids_) * + sizeof(PaxBlockId)); + local_pax_block_mapping_data[table_index].relid_ = table_rel_oid; + local_pax_block_mapping_data[table_index].used_block_ids_ = + current_xact_shared_pax_ss->shared_block_mapping_[table_index] + .shared_used_block_ids_; + + dsm_detach(attached_block_ids); +} + +uint32 get_block_number(const Oid table_rel_oid, const uint32 table_index, + const PaxBlockId block_id) { + LocalTableBlockMappingData *block_mapping_data = + &local_pax_block_mapping_data[table_index]; + + if (block_mapping_data->used_block_ids_ >= + block_mapping_data->size_block_ids_) { + MemoryContext oldcontext = MemoryContextSwitchTo(pax_block_mapping_context); + if (block_mapping_data->block_ids_ == nullptr) { + block_mapping_data->block_ids_ = reinterpret_cast( + palloc0(sizeof(PaxBlockId) * DEFAULT_BLOCK_IDS_SIZE)); + block_mapping_data->size_block_ids_ = DEFAULT_BLOCK_IDS_SIZE; + block_mapping_data->block_ids_segment_ = NULL; + } else { + uint32 new_size = block_mapping_data->size_block_ids_ * 2; + block_mapping_data->block_ids_ = reinterpret_cast(repalloc( + block_mapping_data->block_ids_, sizeof(PaxBlockId) * new_size)); + block_mapping_data->size_block_ids_ = new_size; + } + MemoryContextSwitchTo(oldcontext); + } + + uint32 block_number = block_mapping_data->used_block_ids_++; + block_mapping_data->block_ids_[block_number] = block_id; + // TODO(gongxun): should we add the condition that only Gp_reader dump + // to shared memory? + dump_shared_block_ids(table_rel_oid, table_index); + return block_number; +} + +PaxBlockId pax_get_block_id(const Oid table_rel_oid, const uint32 table_index, + const uint32 block_number) { + LocalTableBlockMappingData *block_mapping_data = + &local_pax_block_mapping_data[table_index]; + + if (block_mapping_data->relid_ != table_rel_oid || + block_number >= block_mapping_data->used_block_ids_) { + load_shared_block_ids(table_rel_oid, table_index); + } + + block_mapping_data = &local_pax_block_mapping_data[table_index]; + if (block_number >= block_mapping_data->used_block_ids_) { + elog(FATAL, "invalid block number %d for table %d", block_number, + table_rel_oid); + } + return block_mapping_data->block_ids_[block_number]; +} + +PaxBlockId get_block_id(const Oid table_rel_oid, const uint8 table_no, + const uint32 block_number) { + uint32 table_index = pax_get_table_index(table_rel_oid, table_no); + return pax_get_block_id(table_rel_oid, table_index, block_number); +} + +void init_command_shmem_resource() { + XactHashKey pax_xact_hash_key; + pax_xact_hash_key.session_id_ = gp_session_id; + pax_xact_hash_key.command_id_ = gp_command_count; + XactHashEntry *entry = NULL; + bool found; + ACQUIRE_HASH_LOCK(pax_hash_lock, LW_EXCLUSIVE); + entry = reinterpret_cast( + hash_search(pax_xact_hash, &pax_xact_hash_key, HASH_ENTER, &found)); + if (!found) { + entry->key_ = pax_xact_hash_key; + LWLockInitialize(&entry->shared_state_.lock_, + pax_shared_state->pax_xact_lock_tranche_id_); + entry->shared_state_.block_mapping_used_size_ = 0; + } else { + if (!entry) { + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + errdetail( + "Failed while allocation block %lu bytes in shared memory.", + sizeof(XactHashEntry)))); + } + } + + ereport(DEBUG1, + (errmsg("init_command_shmem_resource pax_xact_hash=%p, lock=%p," + "gp_session_id=%d " + "pid=%d, db_id=%d, segment_id=%d, found=%s, gp_command_id=%d " + ",gp_is_writer=%d", + pax_xact_hash, &pax_hash_lock->lock, gp_session_id, getpid(), + GpIdentity.dbid, GpIdentity.segindex, + found ? "true" : "false", gp_command_count, Gp_is_writer))); + RELEASE_HASH_LOCK(pax_hash_lock); + current_xact_shared_pax_ss = &entry->shared_state_; +} + +// FIXME(gongxun): do we need to lock current_xact_shared_pax_ss here to protect +// the case where gp_reader is still executing dump_shared_block_ids +void cleanup_command_shmem_resource() { + if (Gp_is_writer && current_xact_shared_pax_ss != NULL) { + XactHashKey pax_xact_hash_key; + pax_xact_hash_key.session_id_ = gp_session_id; + pax_xact_hash_key.command_id_ = gp_command_count; + bool found; + ereport(DEBUG1, + (errmsg("cleanup_command_shmem_resource pax_xact_hash=%p, " + "gp_session_id=%d " + "pid=%d, db_id=%d, segment_id=%d, gp_command_id=%d " + ",gp_is_writer=%d", + pax_xact_hash, gp_session_id, getpid(), GpIdentity.dbid, + GpIdentity.segindex, gp_command_count, Gp_is_writer))); + for (uint32 i = 0; i < current_xact_shared_pax_ss->block_mapping_used_size_; + i++) { + current_xact_shared_pax_ss->shared_block_mapping_[i].relid_ = InvalidOid; + current_xact_shared_pax_ss->shared_block_mapping_[i] + .shared_used_block_ids_ = 0; + current_xact_shared_pax_ss->shared_block_mapping_[i] + .shared_size_block_ids_ = 0; + if (current_xact_shared_pax_ss->shared_block_mapping_[i] + .shared_block_ids_handle_ != 0) { + dsm_unpin_segment(current_xact_shared_pax_ss->shared_block_mapping_[i] + .shared_block_ids_handle_); + } + current_xact_shared_pax_ss->shared_block_mapping_[i] + .shared_block_ids_handle_ = 0; + } + current_xact_shared_pax_ss->block_mapping_used_size_ = 0; + ACQUIRE_HASH_LOCK(pax_hash_lock, LW_EXCLUSIVE); + hash_search(pax_xact_hash, &pax_xact_hash_key, HASH_REMOVE, &found); + RELEASE_HASH_LOCK(pax_hash_lock); + current_xact_shared_pax_ss = NULL; + } +} + +void init_local_command_resource() { + for (uint32 i = 0; i < BLOCK_MAPPING_ARRAY_SIZE; i++) { + local_pax_block_mapping_data[i].relid_ = InvalidOid; + local_pax_block_mapping_data[i].size_block_ids_ = 0; + local_pax_block_mapping_data[i].used_block_ids_ = 0; + + // FIXME(gongxun): unpin the segment and clean the local buffer, because if + // transaction abort, we don't release the resource correctly, so we need to + // unpin the segment here until we fix the bug + local_pax_block_mapping_data[i].block_ids_ = NULL; + if (local_pax_block_mapping_data[i].block_ids_segment_) { + dsm_detach(local_pax_block_mapping_data[i].block_ids_segment_); + } + local_pax_block_mapping_data[i].block_ids_segment_ = NULL; + if (local_pax_block_mapping_data[i].block_ids_) { + pfree(local_pax_block_mapping_data[i].block_ids_); + } + } +} + +void cleanup_local_command_resource() { + // TODO(gongxun): should only clean the slot used + for (uint32 i = 0; i < BLOCK_MAPPING_ARRAY_SIZE; i++) { + local_pax_block_mapping_data[i].relid_ = InvalidOid; + local_pax_block_mapping_data[i].size_block_ids_ = 0; + local_pax_block_mapping_data[i].used_block_ids_ = 0; + if (local_pax_block_mapping_data[i].block_ids_) { + pfree(local_pax_block_mapping_data[i].block_ids_); + local_pax_block_mapping_data[i].block_ids_ = NULL; + } + if (local_pax_block_mapping_data[i].block_ids_segment_) { + dsm_detach(local_pax_block_mapping_data[i].block_ids_segment_); + } + local_pax_block_mapping_data[i].block_ids_segment_ = NULL; + } + MemoryContextReset(pax_block_mapping_context); +} +} // namespace paxc diff --git a/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.h b/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.h new file mode 100644 index 00000000000..aa352e039b3 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/paxc_block_map_manager.h @@ -0,0 +1,83 @@ +#pragma once + +#include "comm/cbdb_api.h" + +#include "storage/pax_block_id.h" +#include "storage/pax_itemptr.h" + +namespace paxc { + +#define BLOCK_MAPPING_ARRAY_SIZE 64 +struct SharedTableBlockMappingData { + Oid relid_; + uint32 shared_size_block_ids_; + uint32 shared_used_block_ids_; + dsm_handle shared_block_ids_handle_; + SharedTableBlockMappingData() { + relid_ = InvalidOid; + shared_size_block_ids_ = 0; + shared_used_block_ids_ = 0; + shared_block_ids_handle_ = 0; + } +}; + +struct PaxXactSharedState { + LWLock lock_; + uint32 block_mapping_used_size_; + SharedTableBlockMappingData shared_block_mapping_[BLOCK_MAPPING_ARRAY_SIZE]; +}; + +struct LocalTableBlockMappingData { + Oid relid_; + uint32 size_block_ids_; + uint32 used_block_ids_; + dsm_segment *block_ids_segment_; + PaxBlockId *block_ids_; + LocalTableBlockMappingData() { + relid_ = InvalidOid; + size_block_ids_ = 0; + used_block_ids_ = 0; + block_ids_segment_ = nullptr; + block_ids_ = nullptr; + } +}; + +struct TableEntry { + uint16 table_no; + Oid relid_; + uint32 table_index_; +}; + +struct XactHashKey { + int session_id_; + int command_id_; +}; + +struct XactHashEntry { + XactHashKey key_; + PaxXactSharedState shared_state_; +}; + +struct XactLockSlot { + bool used; +}; +// use this struct find which the lock slot is not used, and assigned it to hash +// table entry when sql command start +struct PaxSharedState { + int pax_xact_lock_tranche_id_; +}; + +void paxc_shmem_request(); +void paxc_shmem_startup(); + +void init_command_resource(); +void release_command_resource(); + +void get_table_index_and_table_number(const Oid table_rel_oid, uint8 *table_no, + uint32 *table_index); + +uint32 get_block_number(const Oid table_rel_oid, const uint32 table_index, + const PaxBlockId block_id); +PaxBlockId get_block_id(const Oid table_rel_oid, const uint8 table_no, + const uint32 block_number); +} // namespace paxc diff --git a/contrib/pax_storage/src/cpp/storage/proto/CPPLINT.cfg b/contrib/pax_storage/src/cpp/storage/proto/CPPLINT.cfg new file mode 100644 index 00000000000..b34a665dd2d --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/CPPLINT.cfg @@ -0,0 +1,2 @@ +# excludes files or dir +exclude_files=orc_proto.pb.* diff --git a/contrib/pax_storage/src/cpp/storage/proto/micro_partition_stats.proto b/contrib/pax_storage/src/cpp/storage/proto/micro_partition_stats.proto new file mode 100644 index 00000000000..3f474a9cd0d --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/micro_partition_stats.proto @@ -0,0 +1,25 @@ +syntax = "proto2"; +package pax.stats; + +message MinmaxStatistics { + required uint32 typid = 1; // type oid of the column + required uint32 collation = 2; // collation oid of the column + required uint32 procLt = 3; // Proc id < + required uint32 procGt = 4; // Proc id > + required uint32 procLe = 5; // Proc id <= + required uint32 procGe = 6; // Proc id >= + required bytes minimal = 7; // Minimal value stored as Datum. + required bytes maximum = 8; // Maximum value stored as Datum. +} + +message MicroPartitionStatisticsInfo +{ + repeated ColumnStatisitcsInfo columnStats = 1; // Statistics info stored by single column. +} + +message ColumnStatisitcsInfo { + optional bool allnull = 1 [default=true]; // all-null value for single column. + optional bool hasnull = 2 [default=false]; // has-null value for single column. + optional MinmaxStatistics minmaxStats = 3; // minimal vaule stats. +} + diff --git a/contrib/pax_storage/src/cpp/storage/proto/orc_proto.proto b/contrib/pax_storage/src/cpp/storage/proto/orc_proto.proto new file mode 100644 index 00000000000..45e22af27b7 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/orc_proto.proto @@ -0,0 +1,420 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; + +package orc.proto; + +option java_package = "org.apache.orc"; +import "pax.proto"; + +message IntegerStatistics { + optional sint64 minimum = 1; + optional sint64 maximum = 2; + optional sint64 sum = 3; +} + +message DoubleStatistics { + optional double minimum = 1; + optional double maximum = 2; + optional double sum = 3; +} + +message StringStatistics { + optional string minimum = 1; + optional string maximum = 2; + // sum will store the total length of all strings in a stripe + optional sint64 sum = 3; + // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper + // bound instead of the minimum or maximum values above. + optional string lowerBound = 4; + optional string upperBound = 5; +} + +message BucketStatistics { + repeated uint64 count = 1 [packed=true]; +} + +message DecimalStatistics { + optional string minimum = 1; + optional string maximum = 2; + optional string sum = 3; +} + +message DateStatistics { + // min,max values saved as days since epoch + optional sint32 minimum = 1; + optional sint32 maximum = 2; +} + +message TimestampStatistics { + // min,max values saved as milliseconds since epoch + optional sint64 minimum = 1; + optional sint64 maximum = 2; + optional sint64 minimumUtc = 3; + optional sint64 maximumUtc = 4; + // store the lower 6 TS digits for min/max to achieve nanosecond precision + optional int32 minimumNanos = 5; + optional int32 maximumNanos = 6; +} + +message BinaryStatistics { + // sum will store the total binary blob length in a stripe + optional sint64 sum = 1; +} + +// Statistics for list and map +message CollectionStatistics { + optional uint64 minChildren = 1; + optional uint64 maxChildren = 2; + optional uint64 totalChildren = 3; +} + +message ColumnStatistics { + optional uint64 numberOfValues = 1; + optional IntegerStatistics intStatistics = 2; + optional DoubleStatistics doubleStatistics = 3; + optional StringStatistics stringStatistics = 4; + optional BucketStatistics bucketStatistics = 5; + optional DecimalStatistics decimalStatistics = 6; + optional DateStatistics dateStatistics = 7; + optional BinaryStatistics binaryStatistics = 8; + optional TimestampStatistics timestampStatistics = 9; + optional bool hasNull = 10; + optional uint64 bytesOnDisk = 11; + optional CollectionStatistics collectionStatistics = 12; +} + +message RowIndexEntry { + repeated uint64 positions = 1 [packed=true]; + optional ColumnStatistics statistics = 2; +} + +message RowIndex { + repeated RowIndexEntry entry = 1; +} + +message BloomFilter { + optional uint32 numHashFunctions = 1; + repeated fixed64 bitset = 2; + optional bytes utf8bitset = 3; +} + +message BloomFilterIndex { + repeated BloomFilter bloomFilter = 1; +} + +message Stream { + // if you add new index stream kinds, you need to make sure to update + // StreamName to ensure it is added to the stripe in the right area + enum Kind { + PRESENT = 0; + DATA = 1; + LENGTH = 2; + DICTIONARY_DATA = 3; + DICTIONARY_COUNT = 4; + SECONDARY = 5; + ROW_INDEX = 6; + BLOOM_FILTER = 7; + BLOOM_FILTER_UTF8 = 8; + // Virtual stream kinds to allocate space for encrypted index and data. + ENCRYPTED_INDEX = 9; + ENCRYPTED_DATA = 10; + + // stripe statistics streams + STRIPE_STATISTICS = 100; + // A virtual stream kind that is used for setting the encryption IV. + FILE_STATISTICS = 101; + } + optional Kind kind = 1; + optional uint32 column = 2; + optional uint64 length = 3; +} + +message StripeFooter { + repeated Stream streams = 1; + repeated pax.ColumnEncoding pax_col_encodings = 5; + optional string writerTimezone = 3; +} + +// the file tail looks like: +// encrypted stripe statistics: ColumnarStripeStatistics (order by variant) +// stripe statistics: Metadata +// footer: Footer +// postscript: PostScript +// psLen: byte + +message StringPair { + optional string key = 1; + optional string value = 2; +} + +message Type { + enum Kind { + BOOLEAN = 0; + BYTE = 1; + SHORT = 2; + INT = 3; + LONG = 4; + FLOAT = 5; + DOUBLE = 6; + STRING = 7; + BINARY = 8; + TIMESTAMP = 9; + LIST = 10; + MAP = 11; + STRUCT = 12; + UNION = 13; + DECIMAL = 14; + DATE = 15; + VARCHAR = 16; + CHAR = 17; + TIMESTAMP_INSTANT = 18; + } + optional Kind kind = 1; + repeated uint32 subtypes = 2 [packed=true]; + repeated string fieldNames = 3; + optional uint32 maximumLength = 4; + optional uint32 precision = 5; + optional uint32 scale = 6; + repeated StringPair attributes = 7; +} + +message StripeInformation { + // the global file offset of the start of the stripe + optional uint64 offset = 1; + // the number of bytes of index + optional uint64 indexLength = 2; + // the number of bytes of data + optional uint64 dataLength = 3; + // the number of bytes in the stripe footer + optional uint64 footerLength = 4; + // the number of rows in this stripe + optional uint64 numberOfRows = 5; + // If this is present, the reader should use this value for the encryption + // stripe id for setting the encryption IV. Otherwise, the reader should + // use one larger than the previous stripe's encryptStripeId. + // For unmerged ORC files, the first stripe will use 1 and the rest of the + // stripes won't have it set. For merged files, the stripe information + // will be copied from their original files and thus the first stripe of + // each of the input files will reset it to 1. + // Note that 1 was choosen, because protobuf v3 doesn't serialize + // primitive types that are the default (eg. 0). + optional uint64 encryptStripeId = 6; + // For each encryption variant, the new encrypted local key to use + // until we find a replacement. + repeated bytes encryptedLocalKeys = 7; +} + +message UserMetadataItem { + optional string name = 1; + optional bytes value = 2; +} + +// StripeStatistics (1 per a stripe), which each contain the +// ColumnStatistics for each column. +// This message type is only used in ORC v0 and v1. +message StripeStatistics { + repeated ColumnStatistics colStats = 1; +} + +// This message type is only used in ORC v0 and v1. +message Metadata { + repeated StripeStatistics stripeStats = 1; +} + +// In ORC v2 (and for encrypted columns in v1), each column has +// their column statistics written separately. +message ColumnarStripeStatistics { + // one value for each stripe in the file + repeated ColumnStatistics colStats = 1; +} + +enum EncryptionAlgorithm { + UNKNOWN_ENCRYPTION = 0; // used for detecting future algorithms + AES_CTR_128 = 1; + AES_CTR_256 = 2; +} + +message FileStatistics { + repeated ColumnStatistics column = 1; +} + +// How was the data masked? This isn't necessary for reading the file, but +// is documentation about how the file was written. +message DataMask { + // the kind of masking, which may include third party masks + optional string name = 1; + // parameters for the mask + repeated string maskParameters = 2; + // the unencrypted column roots this mask was applied to + repeated uint32 columns = 3 [packed = true]; +} + +// Information about the encryption keys. +message EncryptionKey { + optional string keyName = 1; + optional uint32 keyVersion = 2; + optional EncryptionAlgorithm algorithm = 3; +} + +// The description of an encryption variant. +// Each variant is a single subtype that is encrypted with a single key. +message EncryptionVariant { + // the column id of the root + optional uint32 root = 1; + // The master key that was used to encrypt the local key, referenced as + // an index into the Encryption.key list. + optional uint32 key = 2; + // the encrypted key for the file footer + optional bytes encryptedKey = 3; + // the stripe statistics for this variant + repeated Stream stripeStatistics = 4; + // encrypted file statistics as a FileStatistics + optional bytes fileStatistics = 5; +} + +// Which KeyProvider encrypted the local keys. +enum KeyProviderKind { + UNKNOWN = 0; + HADOOP = 1; + AWS = 2; + GCP = 3; + AZURE = 4; +} + +message Encryption { + // all of the masks used in this file + repeated DataMask mask = 1; + // all of the keys used in this file + repeated EncryptionKey key = 2; + // The encrypted variants. + // Readers should prefer the first variant that the user has access to + // the corresponding key. If they don't have access to any of the keys, + // they should get the unencrypted masked data. + repeated EncryptionVariant variants = 3; + // How are the local keys encrypted? + optional KeyProviderKind keyProvider = 4; +} + +enum CalendarKind { + UNKNOWN_CALENDAR = 0; + // A hybrid Julian/Gregorian calendar with a cutover point in October 1582. + JULIAN_GREGORIAN = 1; + // A calendar that extends the Gregorian calendar back forever. + PROLEPTIC_GREGORIAN = 2; +} + +message Footer { + optional uint64 headerLength = 1; + optional uint64 contentLength = 2; + repeated StripeInformation stripes = 3; + repeated Type types = 4; + repeated UserMetadataItem metadata = 5; + optional uint64 numberOfRows = 6; + repeated ColumnStatistics statistics = 7; + optional uint32 rowIndexStride = 8; + + // Each implementation that writes ORC files should register for a code + // 0 = ORC Java + // 1 = ORC C++ + // 2 = Presto + // 3 = Scritchley Go from https://github.com/scritchley/orc + // 4 = Trino + optional uint32 writer = 9; + + // information about the encryption in this file + optional Encryption encryption = 10; + optional CalendarKind calendar = 11; + + // informative description about the version of the software that wrote + // the file. It is assumed to be within a given writer, so for example + // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT". + optional string softwareVersion = 12; +} + +enum CompressionKind { + NONE = 0; + ZLIB = 1; + SNAPPY = 2; + LZO = 3; + LZ4 = 4; + ZSTD = 5; +} + +// Serialized length must be less that 255 bytes +message PostScript { + optional uint64 footerLength = 1; + optional CompressionKind compression = 2; + optional uint64 compressionBlockSize = 3; + // the version of the file format + // [0, 11] = Hive 0.11 + // [0, 12] = Hive 0.12 + repeated uint32 version = 4 [packed = true]; + optional uint64 metadataLength = 5; + + // The version of the writer that wrote the file. This number is + // updated when we make fixes or large changes to the writer so that + // readers can detect whether a given bug is present in the data. + // + // Only the Java ORC writer may use values under 6 (or missing) so that + // readers that predate ORC-202 treat the new writers correctly. Each + // writer should assign their own sequence of versions starting from 6. + // + // Version of the ORC Java writer: + // 0 = original + // 1 = HIVE-8732 fixed (fixed stripe/file maximum statistics & + // string statistics use utf8 for min/max) + // 2 = HIVE-4243 fixed (use real column names from Hive tables) + // 3 = HIVE-12055 added (vectorized writer implementation) + // 4 = HIVE-13083 fixed (decimals write present stream correctly) + // 5 = ORC-101 fixed (bloom filters use utf8 consistently) + // 6 = ORC-135 fixed (timestamp statistics use utc) + // 7 = ORC-517 fixed (decimal64 min/max incorrect) + // 8 = ORC-203 added (trim very long string statistics) + // 9 = ORC-14 added (column encryption) + // + // Version of the ORC C++ writer: + // 6 = original + // + // Version of the Presto writer: + // 6 = original + // + // Version of the Scritchley Go writer: + // 6 = original + // + // Version of the Trino writer: + // 6 = original + // + optional uint32 writerVersion = 6; + + // the number of bytes in the encrypted stripe statistics + optional uint64 stripeStatisticsLength = 7; + + // Leave this last in the record + optional string magic = 8000; +} + +// The contents of the file tail that must be serialized. +// This gets serialized as part of OrcSplit, also used by footer cache. +message FileTail { + optional PostScript postscript = 1; + optional Footer footer = 2; + optional uint64 fileLength = 3; + optional uint64 postscriptLength = 4; +} diff --git a/contrib/pax_storage/src/cpp/storage/proto/pax.proto b/contrib/pax_storage/src/cpp/storage/proto/pax.proto new file mode 100644 index 00000000000..db7a715334f --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/pax.proto @@ -0,0 +1,22 @@ +/** + * it's a common proto for pax column + */ +syntax = "proto2"; + +package pax; + +message ColumnEncoding { + enum Kind { + DEF_ENCODED = -1; // only used in memory, should not store in disk + NO_ENCODED = 0; // no encoded + ORC_RLE_V2 = 1; // used orc rle v2 + DIRECT_DELTA = 2; // used direct delta + + COMPRESS_ZSTD = 3; // use ZTSD to compress + COMPRESS_ZLIB = 4; // use ZLIB to compress + } + + optional Kind kind = 1; + optional uint64 length = 2; // data origin size + optional uint64 compress_lvl = 3; // work for compress +} diff --git a/contrib/pax_storage/src/cpp/storage/proto/proto_wrappers.h b/contrib/pax_storage/src/cpp/storage/proto/proto_wrappers.h new file mode 100644 index 00000000000..e34191ac2aa --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/proto_wrappers.h @@ -0,0 +1,8 @@ +#pragma once + +// The libproto defined `FATAL` inside as a marco linker +#undef FATAL +#include "storage/proto/micro_partition_stats.pb.h" +#include "storage/proto/orc_proto.pb.h" +#include "storage/proto/pax.pb.h" +#define FATAL 22 diff --git a/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.cc b/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.cc new file mode 100644 index 00000000000..52a1d934c6d --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.cc @@ -0,0 +1,125 @@ +#include "storage/proto/protobuf_stream.h" + +#include "exceptions/CException.h" + +namespace pax { + +BufferedOutputStream::BufferedOutputStream(DataBuffer *data_buffer, + uint64 block_size) + : data_buffer_(data_buffer), block_size_(block_size) {} + +void BufferedOutputStream::Set(DataBuffer *data_buffer, + uint64 block_size) { + Assert(data_buffer); + data_buffer_ = data_buffer; + block_size_ = block_size; +} + +bool BufferedOutputStream::Next(void **buffer, int *size) { + uint64 old_capacity = data_buffer_->Capacity(); + uint64 new_capacity = data_buffer_->Capacity(); + + while (new_capacity < data_buffer_->Used() + block_size_) { + if (new_capacity == 0) { + new_capacity += block_size_; + } else { + new_capacity += data_buffer_->Capacity(); + } + } + + if (new_capacity == old_capacity) { // No resize + *size = block_size_; + } else { + data_buffer_->ReSize(new_capacity); + *size = static_cast(new_capacity - old_capacity); + } + + *buffer = data_buffer_->GetBuffer() + data_buffer_->Used(); + + data_buffer_->Brush(*size); + return true; +} + +void BufferedOutputStream::BackUp(int count) { + if (count >= 0) { + if (static_cast(count) > data_buffer_->Used()) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeIOError); + } + data_buffer_->BrushBack(count); + } +} + +google::protobuf::int64 BufferedOutputStream::ByteCount() const { + return static_cast(data_buffer_->Used()); +} + +bool BufferedOutputStream::WriteAliasedRaw([[maybe_unused]] const void *data, + [[maybe_unused]] int size) { + return false; +} + +bool BufferedOutputStream::AllowsAliasing() const { return false; } + +uint64 BufferedOutputStream::GetSize() const { return data_buffer_->Used(); } + +DataBuffer *BufferedOutputStream::GetDataBuffer() const { + return data_buffer_; +} + +void BufferedOutputStream::StartBufferOutRecord() { + last_used_ = data_buffer_->Used(); +} + +size_t BufferedOutputStream::EndBufferOutRecord() { + return data_buffer_->Used() - last_used_; +} + +void BufferedOutputStream::DirectWrite(char *ptr, size_t size) { + if (data_buffer_->Available() < size) { + data_buffer_->ReSize(data_buffer_->Capacity() + size); + } + data_buffer_->Write(ptr, size); + data_buffer_->Brush(size); +} + +SeekableInputStream::SeekableInputStream(char *data_buffer, uint64 length) + : data_buffer_(data_buffer, length, true, false) {} + +bool SeekableInputStream::Next(const void **buffer, int *size) { + if (data_buffer_.Available() > 0) { + *buffer = data_buffer_.Position(); + *size = static_cast(data_buffer_.Available()); + data_buffer_.BrushAll(); + return true; + } + *size = 0; + return false; +} + +void SeekableInputStream::BackUp(int count) { + if (count >= 0) { + if (static_cast(count) > data_buffer_.Used()) { + CBDB_RAISE(cbdb::CException::ExType::kExTypeIOError); + } + data_buffer_.BrushBack(count); + } +} + +bool SeekableInputStream::Skip(int count) { + if (count >= 0) { + auto unsigned_count = static_cast(count); + if (unsigned_count + data_buffer_.Used() <= data_buffer_.Capacity()) { + data_buffer_.Brush(unsigned_count); + return true; + } else { + data_buffer_.BrushAll(); + } + } + return false; +} + +google::protobuf::int64 SeekableInputStream::ByteCount() const { + return static_cast(data_buffer_.Used()); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.h b/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.h new file mode 100644 index 00000000000..61fc9685b87 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/proto/protobuf_stream.h @@ -0,0 +1,58 @@ +#pragma once +#include + +#include + +#include "storage/pax_buffer.h" + +namespace pax { + +class BufferedOutputStream : public google::protobuf::io::ZeroCopyOutputStream { + public: + BufferedOutputStream(DataBuffer *data_buffer, uint64 block_size); + + virtual void Set(DataBuffer *data_buffer, uint64 block_size); + + bool Next(void **buffer, int *size) override; + + void BackUp(int count) override; + + google::protobuf::int64 ByteCount() const override; + + bool WriteAliasedRaw(const void *data, int size) override; + + bool AllowsAliasing() const override; + + virtual uint64 GetSize() const; + + virtual DataBuffer *GetDataBuffer() const; + + virtual void StartBufferOutRecord(); + + virtual size_t EndBufferOutRecord(); + + virtual void DirectWrite(char *ptr, size_t size); + + private: + size_t last_used_ = 0; + DataBuffer *data_buffer_; + uint64 block_size_; +}; + +class SeekableInputStream : public google::protobuf::io::ZeroCopyInputStream { + public: + SeekableInputStream(char *data_buffer, uint64 length); + + bool Next(const void **buffer, int *size) override; + + void BackUp(int count) override; + + bool Skip(int count) override; + + google::protobuf::int64 ByteCount() const override; + + private: + DataBuffer data_buffer_; +}; + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/strategy.cc b/contrib/pax_storage/src/cpp/storage/strategy.cc new file mode 100644 index 00000000000..6fa32b945b4 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/strategy.cc @@ -0,0 +1,25 @@ +#include "storage/strategy.h" + +#include "storage/micro_partition.h" + +namespace pax { + +size_t PaxDefaultSplitStrategy::SplitTupleNumbers() const { + // The reason why we chose 16384 as a separator value + // is because in the vectorized version, the number of + // rows returned by each tuple cannot be greater than 16384 + // and needs to be as close as possible to this value + return 16384 * 10; +} + +size_t PaxDefaultSplitStrategy::SplitFileSize() const { + return 64 * 1024 * 1024; +} + +bool PaxDefaultSplitStrategy::ShouldSplit(MicroPartitionWriter *writer, + size_t num_tuples) const { + return (num_tuples >= SplitTupleNumbers()) || + (writer->PhysicalSize() >= SplitFileSize()); +} + +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/strategy.h b/contrib/pax_storage/src/cpp/storage/strategy.h new file mode 100644 index 00000000000..50e49e09ae8 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/strategy.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +namespace pax { +class MicroPartitionWriter; +class FileSplitStrategy { + public: + virtual ~FileSplitStrategy() = default; + + virtual bool ShouldSplit(MicroPartitionWriter *writer, + size_t num_tuples) const = 0; + + virtual size_t SplitTupleNumbers() const = 0; + + virtual size_t SplitFileSize() const = 0; +}; + +class PaxDefaultSplitStrategy final : public FileSplitStrategy { + public: + PaxDefaultSplitStrategy() = default; + ~PaxDefaultSplitStrategy() override = default; + + size_t SplitTupleNumbers() const override; + + size_t SplitFileSize() const override; + + bool ShouldSplit(MicroPartitionWriter *writer, + size_t num_tuples) const override; +}; +} // namespace pax diff --git a/contrib/pax_storage/src/cpp/storage/vec/arrow_wrapper.h b/contrib/pax_storage/src/cpp/storage/vec/arrow_wrapper.h new file mode 100644 index 00000000000..3f8d11504ba --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/arrow_wrapper.h @@ -0,0 +1,58 @@ +#pragma once + +#ifdef VEC_BUILD + +// FIXME(jiaqizho): There marco defined in datatime.h +// which include in `cbdb_api.h`. In pax, we always need +// include `cbdb_api.h`. + +#undef RESERV +#undef MONTH +#undef YEAR +#undef DAY +#undef JULIAN +#undef TZ +#undef DTZ +#undef DYNTZ +#undef IGNORE_DTF +#undef AMPM +#undef HOUR +#undef MINUTE +#undef SECOND +#undef MILLISECOND +#undef MICROSECOND +#undef IsPowerOf2 + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" + +#include +#include +#include +#include +#include +#include + +#pragma GCC diagnostic pop + +#define RESERV 0 +#define MONTH 1 +#define YEAR 2 +#define DAY 3 +#define JULIAN 4 +#define TZ 5 +#define DTZ 6 +#define DYNTZ 7 +#define IGNORE_DTF 8 +#define AMPM 9 +#define HOUR 10 +#define MINUTE 11 +#define SECOND 12 +#define MILLISECOND 13 +#define MICROSECOND 14 + +// NOLINTNEXTLINE +#define IsPowerOf2(x) (x > 0 && ((x) & ((x)-1)) == 0) + +#endif // VEC_BUILD diff --git a/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.cc b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.cc new file mode 100644 index 00000000000..72e7e378cfa --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.cc @@ -0,0 +1,662 @@ +#include "storage/vec/pax_vec_adapter.h" + +#ifdef VEC_BUILD + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" + +extern "C" { +#include "utils/tuptable_vec.h" // for vec tuple +} + +#pragma GCC diagnostic pop + +#include "storage/vec/arrow_wrapper.h" + +/// export interface wrapper of arrow +namespace arrow { + +template +struct ArrowExportTraits {}; + +template +using Arrow = std::function; + +template <> +struct ArrowExportTraits { + static Arrow export_func; +}; + +template <> +struct ArrowExportTraits { + static Arrow export_func; +}; + +template <> +struct ArrowExportTraits { + static Arrow export_func; +}; + +Arrow ArrowExportTraits::export_func = ExportType; +Arrow ArrowExportTraits::export_func = ExportField; +Arrow ArrowExportTraits::export_func = ExportSchema; + +} // namespace arrow + +namespace pax { + +static void CopyFixedRawBufferWithNull(PaxColumn *column, size_t range_begin, + size_t range_lens, + size_t data_index_begin, + size_t data_range_lens, + DataBuffer *out_data_buffer); +static inline void CopyFixedRawBuffer(char *buffer, size_t len, + DataBuffer *data_buffer); + +static void CopyNonFixedRawBuffer(PaxColumn *column, size_t range_begin, + size_t range_lens, size_t data_index_begin, + size_t data_range_lens, + DataBuffer *offset_buffer, + DataBuffer *out_data_buffer); +static void ConvSchemaAndDataToVec( + Oid pg_type_oid, char *attname, size_t all_nums_of_row, + VecAdapter::VecBatchBuffer *vec_batch_buffer, + std::vector> &schema_types, + arrow::ArrayVector &array_vector, std::vector &field_names); + +static void NullBytesToNullBits(bool *null_buffer, size_t buffer_len, + DataBuffer *null_bits_map); + +VecAdapter::VecBatchBuffer::VecBatchBuffer() + : vec_buffer(0), null_bits_buffer(0), offset_buffer(0), null_counts(0) { + SetMemoryTakeOver(true); +}; + +void VecAdapter::VecBatchBuffer::Reset() { + // Current `DataBuffer` will not hold the buffers. + // And the buffers will be trans to `ArrayVector` which will hold it. + // Released in `release` callback or Memory context reset in `EndScan` + SetMemoryTakeOver(false); + vec_buffer.Reset(); + null_bits_buffer.Reset(); + offset_buffer.Reset(); + null_counts = 0; + SetMemoryTakeOver(true); +} + +void VecAdapter::VecBatchBuffer::SetMemoryTakeOver(bool take) { + vec_buffer.SetMemTakeOver(take); + null_bits_buffer.SetMemTakeOver(take); + offset_buffer.SetMemTakeOver(take); +} + +static void CopyFixedRawBufferWithNull(PaxColumn *column, size_t range_begin, + size_t range_lens, + size_t data_index_begin, + size_t data_range_lens, + DataBuffer *out_data_buffer) { + char *buffer; + size_t buffer_len; + + std::tie(buffer, buffer_len) = + column->GetRangeBuffer(data_index_begin, data_range_lens); + + auto null_bitmap = column->GetNulls(); + size_t non_null_offset = 0; + size_t type_len = column->GetTypeLength(); + + for (size_t i = range_begin; i < (range_begin + range_lens); i++) { + if ((*null_bitmap)[i]) { + out_data_buffer->Write(buffer + non_null_offset, type_len); + non_null_offset += type_len; + } + + out_data_buffer->Brush(type_len); + } + + Assert((non_null_offset / type_len) == data_range_lens); +} + +static inline void CopyFixedRawBuffer(char *buffer, size_t len, + DataBuffer *data_buffer) { + data_buffer->Write(buffer, len); + data_buffer->Brush(len); +} + +static void NullBytesToNullBits(bool *null_buffer, size_t buffer_len, + DataBuffer *null_bits_map) { + Assert(null_bits_map->Capacity() >= + (buffer_len % 8 == 0 ? buffer_len / 8 : buffer_len / 8 + 1)); + for (size_t i = 0; i < buffer_len; i++) { + arrow::bit_util::SetBitTo((uint8 *)null_bits_map->GetBuffer(), i, + null_buffer[i]); + } + null_bits_map->BrushAll(); +} + +static void CopyNonFixedRawBuffer(PaxColumn *column, size_t range_begin, + size_t range_lens, size_t data_index_begin, + size_t data_range_lens, + DataBuffer *offset_buffer, + DataBuffer *out_data_buffer) { + size_t dst_offset = out_data_buffer->Used(); + char *buffer = nullptr; + size_t buffer_len = 0; + + auto null_bitmap = column->GetNulls(); + size_t non_null_offset = 0; + + for (size_t i = range_begin; i < (range_begin + range_lens); i++) { + if (null_bitmap && !(*null_bitmap)[i]) { + offset_buffer->Write(dst_offset); + offset_buffer->Brush(sizeof(int32)); + + } else { + std::tie(buffer, buffer_len) = + column->GetBuffer(data_index_begin + non_null_offset); + + auto vl = (struct varlena *)(buffer); + size_t read_len = 0; + + auto tunpacked = pg_detoast_datum_packed(vl); + Assert((Pointer)vl == (Pointer)tunpacked); + + read_len = VARSIZE_ANY_EXHDR(tunpacked); + auto read_data = VARDATA_ANY(tunpacked); + + out_data_buffer->Write(read_data, read_len); + out_data_buffer->Brush(read_len); + + offset_buffer->Write(dst_offset); + offset_buffer->Brush(sizeof(int32)); + + dst_offset += read_len; + + non_null_offset++; + } + } + + offset_buffer->Write(dst_offset); + offset_buffer->Brush(sizeof(int32)); + + CBDB_CHECK(non_null_offset == data_range_lens, + cbdb::CException::ExType::kExTypeOutOfRange); +} + +static std::tuple, + std::shared_ptr, + std::shared_ptr> +ConvToVecBuffer(VecAdapter::VecBatchBuffer *vec_batch_buffer) { + std::shared_ptr arrow_buffer = nullptr; + std::shared_ptr arrow_null_buffer = nullptr; + std::shared_ptr arrow_offset_buffer = nullptr; + + arrow_buffer = std::make_shared( + (uint8 *)vec_batch_buffer->vec_buffer.GetBuffer(), + (int64)vec_batch_buffer->vec_buffer.Capacity()); + + if (vec_batch_buffer->null_bits_buffer.GetBuffer()) { + arrow_null_buffer = std::make_shared( + (uint8 *)vec_batch_buffer->null_bits_buffer.GetBuffer(), + (int64)vec_batch_buffer->null_bits_buffer.Capacity()); + } + + if (vec_batch_buffer->offset_buffer.GetBuffer()) { + arrow_offset_buffer = std::make_shared( + (uint8 *)vec_batch_buffer->offset_buffer.GetBuffer(), + (int64)vec_batch_buffer->offset_buffer.Capacity()); + } + return std::make_tuple(arrow_buffer, arrow_null_buffer, arrow_offset_buffer); +} + +template +static void ConvArrowSchemaAndBuffer( + const std::string &field_name, std::shared_ptr data_type, + VecAdapter::VecBatchBuffer *vec_batch_buffer, size_t all_nums_of_row, + std::vector> &schema_types, + arrow::ArrayVector &array_vector, std::vector &field_names) { + std::shared_ptr arrow_buffer; + std::shared_ptr arrow_null_buffer; + + auto arrow_buffers = ConvToVecBuffer(vec_batch_buffer); + arrow_buffer = std::get<0>(arrow_buffers); + arrow_null_buffer = std::get<1>(arrow_buffers); + + schema_types.emplace_back(arrow::field(field_name, data_type)); + auto array = std::make_shared(all_nums_of_row, arrow_buffer, + arrow_null_buffer, + vec_batch_buffer->null_counts); + + array_vector.emplace_back(array); + field_names.emplace_back(field_name); +} + +static void ConvSchemaAndDataToVec( + Oid pg_type_oid, char *attname, size_t all_nums_of_row, + VecAdapter::VecBatchBuffer *vec_batch_buffer, + std::vector> &schema_types, + arrow::ArrayVector &array_vector, std::vector &field_names) { + switch (pg_type_oid) { + case BOOLOID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::boolean(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case CHAROID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::int8(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case TIMEOID: + case TIMESTAMPOID: + case TIMESTAMPTZOID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::date64(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case TIDOID: + case INT8OID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::int64(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case INT2OID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::int16(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case DATEOID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::date32(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case INT4OID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::int32(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case BPCHAROID: + Assert(false); + break; + case VARCHAROID: + case TEXTOID: { + std::shared_ptr arrow_buffer; + std::shared_ptr arrow_null_buffer; + std::shared_ptr arrow_offset_buffer; + + auto arrow_buffers = ConvToVecBuffer(vec_batch_buffer); + arrow_buffer = std::get<0>(arrow_buffers); + arrow_null_buffer = std::get<1>(arrow_buffers); + arrow_offset_buffer = std::get<2>(arrow_buffers); + + schema_types.emplace_back( + arrow::field(std::string(attname), arrow::utf8())); + auto array = std::make_shared( + all_nums_of_row, arrow_offset_buffer, arrow_buffer, arrow_null_buffer, + vec_batch_buffer->null_counts); + + array_vector.emplace_back(array); + field_names.emplace_back(std::string(std::string(attname))); + break; + } + case FLOAT4OID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::float32(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case FLOAT8OID: { + ConvArrowSchemaAndBuffer( + std::string(attname), arrow::float64(), vec_batch_buffer, + all_nums_of_row, schema_types, array_vector, field_names); + break; + } + case INT2ARRAYOID: + case INT4ARRAYOID: + case INT8ARRAYOID: + case FLOAT4ARRAYOID: + case FLOAT8ARRAYOID: + case TEXTARRAYOID: + case BPCHARARRAYOID: { + Assert(false); + } + case NAMEOID: + case XIDOID: + case CIDOID: + case OIDVECTOROID: + case JSONOID: + case OIDOID: + case REGPROCOID: + case NUMERICOID: // TODO(jiaqizho): support it in 0.11 + default: { + Assert(false); + } + } +} + +VecAdapter::VecAdapter(TupleDesc tuple_desc) + : rel_tuple_desc_(tuple_desc), + cached_batch_lens_(0), + vec_cache_buffer_(nullptr), + vec_cache_buffer_lens_(0), + process_columns_(nullptr), + current_cached_pax_columns_index_(0) { + Assert(rel_tuple_desc_); +}; + +VecAdapter::~VecAdapter() { + if (vec_cache_buffer_) { + for (int i = 0; i < vec_cache_buffer_lens_; i++) { + vec_cache_buffer_[i].SetMemoryTakeOver(false); + } + delete[] vec_cache_buffer_; + } +} + +void VecAdapter::SetDataSource(PaxColumns *columns) { + Assert(columns); + process_columns_ = columns; + current_cached_pax_columns_index_ = 0; + cached_batch_lens_ = 0; + if (!vec_cache_buffer_) { + vec_cache_buffer_ = new VecBatchBuffer[columns->GetColumns()]; + vec_cache_buffer_lens_ = columns->GetColumns(); + } +} + +bool VecAdapter::AppendToVecBuffer() { + PaxColumns *columns; + PaxColumn *column; + size_t range_begin = current_cached_pax_columns_index_; + size_t range_lens = VEC_BATCH_LENGTH; + + columns = process_columns_; + Assert(cached_batch_lens_ <= VEC_BATCH_LENGTH); + + // There are three cases to direct return + // 1. no call `DataSource`, then no source setup + // 2. already have cached vec batch without flush + // 3. all of data in pax columns have been comsume + if (!columns || cached_batch_lens_ != 0 || + range_begin == columns->GetRows()) { + return false; + } + + Assert(range_begin <= columns->GetRows()); + + // recompute `range_lens`, if remain data LT `VEC_BATCH_LENGTH` + // then should reduce `range_lens` + if ((range_begin + range_lens) > columns->GetRows()) { + range_lens = columns->GetRows() - range_begin; + } + + // null length depends on `range_lens` + auto null_align_bytes = TYPEALIGN( + MEMORY_ALIGN_SIZE, + range_lens % 8 == 0 ? (range_lens / 8) : ((range_lens / 8) + 1)); + + for (size_t index = 0; index < columns->GetColumns(); index++) { + size_t data_index_begin = 0; + size_t data_range_lens = 0; + DataBuffer *vec_buffer = nullptr; + DataBuffer *null_bits_buffer = nullptr; + DataBuffer *offset_buffer = nullptr; + + char *raw_buffer = nullptr; + size_t buffer_len = 0; + + if ((*columns)[index] == nullptr) { + continue; + } + + column = (*columns)[index]; + Assert(index < (size_t)vec_cache_buffer_lens_ && vec_cache_buffer_); + + data_index_begin = column->GetRangeNonNullRows(0, range_begin); + data_range_lens = column->GetRangeNonNullRows(range_begin, range_lens); + + // data buffer holder + vec_buffer = &(vec_cache_buffer_[index].vec_buffer); + null_bits_buffer = &(vec_cache_buffer_[index].null_bits_buffer); + offset_buffer = &(vec_cache_buffer_[index].offset_buffer); + + vec_cache_buffer_[index].null_counts = range_lens - data_range_lens; + + std::tie(raw_buffer, buffer_len) = + column->GetRangeBuffer(data_index_begin, data_range_lens); + + switch (column->GetPaxColumnTypeInMem()) { + case PaxColumnTypeInMem::kTypeNonFixed: { + auto raw_data_size = buffer_len - (data_range_lens * VARHDRSZ_SHORT); + auto align_size = TYPEALIGN(MEMORY_ALIGN_SIZE, raw_data_size); + + auto offset_align_bytes = + TYPEALIGN(MEMORY_ALIGN_SIZE, (range_lens + 1) * sizeof(int32)); + + Assert(!vec_buffer->GetBuffer() && !offset_buffer->GetBuffer()); + vec_buffer->Set((char *)cbdb::Palloc(align_size), align_size); + offset_buffer->Set((char *)cbdb::Palloc0(offset_align_bytes), + offset_align_bytes); + + CopyNonFixedRawBuffer(column, range_begin, range_lens, data_index_begin, + data_range_lens, offset_buffer, vec_buffer); + + break; + } + case PaxColumnTypeInMem::kTypeFixed: { + Assert(column->GetTypeLength() > 0); + auto align_size = TYPEALIGN(MEMORY_ALIGN_SIZE, + (range_lens * column->GetTypeLength())); + Assert(!vec_buffer->GetBuffer()); + + vec_buffer->Set((char *)cbdb::Palloc(align_size), align_size); + + if (column->HasNull()) { + CopyFixedRawBufferWithNull(column, range_begin, range_lens, + data_index_begin, data_range_lens, + vec_buffer); + } else { + CopyFixedRawBuffer(raw_buffer, buffer_len, vec_buffer); + } + + break; + } + default: { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); + } + } // switch column type + + if (column->HasNull()) { + bool *null_buffer; + size_t null_buffer_len; + Assert(!null_bits_buffer->GetBuffer()); + null_bits_buffer->Set((char *)cbdb::Palloc(null_align_bytes), + null_align_bytes); + std::tie(null_buffer, null_buffer_len) = + column->GetRangeNulls(range_begin, range_lens); + NullBytesToNullBits(null_buffer, null_buffer_len, null_bits_buffer); + } + + } // for each column + + current_cached_pax_columns_index_ = range_begin + range_lens; + cached_batch_lens_ += range_lens; + return true; +} + +size_t VecAdapter::FlushVecBuffer(CTupleSlot *cslot) { + ArrowSchema *arrow_schema = nullptr; + ArrowArray *arrow_array = nullptr; + std::vector> schema_types; + arrow::ArrayVector array_vector; + std::vector field_names; + VecTupleTableSlot *vslot = nullptr; + VecBatchBuffer *vec_batch_buffer = nullptr; + PaxColumns *columns = nullptr; + + TupleDesc target_desc; + + // column size from current pax columns(which is same size with disk stored) + // may not equal with `rel_tuple_desc_->natts`, but must LE with + // `rel_tuple_desc_->natts` + size_t column_size = 0; + size_t rc = 0; + + columns = process_columns_; + Assert(columns); + + vslot = VECSLOT(cslot->GetTupleTableSlot()); + Assert(vslot); + + target_desc = cslot->GetTupleDesc(); + column_size = columns->GetColumns(); + + Assert(column_size <= (size_t)rel_tuple_desc_->natts); + + // Vec executor is different with cbdb executor + // if select single column in multi column defined relation + // then `target_desc->natts` will be one, rather then actually column numbers + // So we need use `rel_tuple_desc_` which own full relation tuple desc + // to fill target arrow data + for (size_t index = 0; index < column_size; index++) { + auto attr = &rel_tuple_desc_->attrs[index]; + char *column_name = NameStr(attr->attname); + + if ((*columns)[index] == nullptr || attr->attisdropped) { + continue; + } + + vec_batch_buffer = &vec_cache_buffer_[index]; + + ConvSchemaAndDataToVec(attr->atttypid, column_name, cached_batch_lens_, + vec_batch_buffer, schema_types, array_vector, + field_names); + + vec_batch_buffer->Reset(); + } + + Assert(schema_types.size() <= (size_t)target_desc->natts); + + // The reason why use we can put null column into `target_desc` is that + // this situation will only happen when the column is missing in disk. + // `add column` will make this happen + // for example + // 1. CREATE TABLE aa(a int4, b int4) using pax; + // 2. insert into aa values(...); // it will generate pax file1 with column + // a,b + // 3. alter table aa add c int4; + // 4. insert into aa values(...); // it will generate pax file2 with column + // a,b,c + // 5. select * from aa; + // + // In step5, file1 missing the column c, `schema_types.size()` is 2. + // So we need full null in it. But in file2, `schema_types.size()` is 3, + // so do nothing. + // + // Notice that: `drop column` will not effect this logic. Because we already + // deal the `drop column` above(using the relation tuple desc filter the + // column). + // + // A example about `drop column` + `add column`: + // 1. CREATE TABLE aa(a int4, b int4) using pax; + // 2. insert into aa values(...); // it will generate pax file1 with column + // a,b + // 3. alter table aa drop b; + // 4. alter table aa add c int4; + // 5. insert into aa values(...); // it will generate pax file2 with column + // a,c + // 6. select * from aa; // need column a + column c + // + // In step6, file 1 missing the column c, column b in file1 will be filter by + // `attisdropped` so `schema_types.size()` is 1, we need full null in it. But + // in file2, `schema_types.size()` is 3, so do nothing. + for (int index = schema_types.size(); index < target_desc->natts; index++) { + VecBatchBuffer temp_batch_buffer; + char *target_column_name = NameStr(target_desc->attrs[index].attname); + + // FIXME(jiaqizho): should setting default value here + // but missing this part of logic + + // No sure whether can direct add `null()` here + ConvSchemaAndDataToVec(target_desc->attrs[index].atttypid, + target_column_name, cached_batch_lens_, + &temp_batch_buffer, schema_types, array_vector, + field_names); + } + + Assert(schema_types.size() == (size_t)target_desc->natts); + Assert(array_vector.size() == schema_types.size()); + Assert(field_names.size() == array_vector.size()); + + arrow_schema = (ArrowSchema *)cbdb::Palloc0(sizeof(ArrowSchema)); + arrow_array = (ArrowArray *)cbdb::Palloc0(sizeof(ArrowArray)); + + auto export_status = arrow::ArrowExportTraits::export_func( + *arrow::struct_(std::move(schema_types)), arrow_schema); + + CBDB_CHECK(export_status.ok(), + cbdb::CException::ExType::kExTypeArrowExportError); + + export_status = arrow::ExportArray( + **arrow::StructArray::Make(std::move(array_vector), field_names), + arrow_array); + + CBDB_CHECK(export_status.ok(), + cbdb::CException::ExType::kExTypeArrowExportError); + + arrow_array->release = [](struct ArrowArray *array) { // + for (int64 i = 0; i < array->n_children; i++) { + if (array->children && array->children[i] && + array->children[i]->release) { + array->children[i]->release(array->children[i]); + } + } + + for (int64 i = 0; i < array->n_buffers; i++) { + if (array->buffers[i]) { + // cbdb::Pfree CException will not be deal + // just let long jump happen + pfree((void *)array->buffers[i]); + } + } + + // FIXME(jiaqizho): memory leak here + // Will consider not use `arrow::ExportArray` + // delete reinterpret_cast(array->private_data); + array->release = NULL; + }; + + // `ArrowRecordBatch/ArrowSchema/ArrowArray` alloced by pax memory context. + // Can not possible to hold the lifecycle of these three objects in pax. + // It will be freed after memory context reset. + auto *arrow_rb = (ArrowRecordBatch *)cbdb::Palloc0(sizeof(ArrowRecordBatch)); + arrow_rb->batch = arrow_array; + arrow_rb->schema = arrow_schema; + vslot->tts_recordbatch = arrow_rb; + + // Pax will put any data into tts_value in `ReadVecTuple` + memset(vslot->tts_shouldfree, 0, vslot->base.tts_nvalid); + + rc = cached_batch_lens_; + cached_batch_lens_ = 0; + + return rc; +} + +bool VecAdapter::IsInitialized() const { return process_columns_; } + +bool VecAdapter::IsEnd() const { + CBDB_CHECK(process_columns_, cbdb::CException::ExType::kExTypeLogicError); + return current_cached_pax_columns_index_ == process_columns_->GetRows(); +} + +} // namespace pax + +#endif // VEC_BUILD \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.h b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.h new file mode 100644 index 00000000000..bbf347adf30 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_adapter.h @@ -0,0 +1,57 @@ +#pragma once + +#ifdef VEC_BUILD + +#include "storage/columns/pax_column.h" +#include "storage/columns/pax_columns.h" +#include "storage/micro_partition.h" + +// TODO(jiaqizho) : after vec define this value to cbdb, +// then read the GUC value +#define VEC_BATCH_LENGTH (16384) +#define MEMORY_ALIGN_SIZE (8) + +namespace pax { + +class VecAdapter final { + public: + struct VecBatchBuffer { + DataBuffer vec_buffer; + DataBuffer null_bits_buffer; + DataBuffer offset_buffer; + size_t null_counts; + + VecBatchBuffer(); + + void Reset(); + + void SetMemoryTakeOver(bool take); + }; + + explicit VecAdapter(TupleDesc tuple_desc); + + ~VecAdapter(); + + void SetDataSource(PaxColumns *columns); + + bool IsInitialized() const; + + bool IsEnd() const; + + bool AppendToVecBuffer(); + + size_t FlushVecBuffer(CTupleSlot *cslot); + + private: + TupleDesc rel_tuple_desc_; + size_t cached_batch_lens_; + VecBatchBuffer *vec_cache_buffer_; + int vec_cache_buffer_lens_; + + PaxColumns *process_columns_; + size_t current_cached_pax_columns_index_; +}; + +} // namespace pax + +#endif // #ifdef VEC_BUILD diff --git a/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.cc b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.cc new file mode 100644 index 00000000000..55aa8b0fe3b --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.cc @@ -0,0 +1,36 @@ +#include "storage/vec/pax_vec_reader.h" + +#include "storage/vec/pax_vec_adapter.h" +#ifdef VEC_BUILD + +namespace pax { + +PaxVecReader::PaxVecReader(MicroPartitionReader *reader, VecAdapter *adapter) + : reader_(reader), adapter_(adapter) {} + +PaxVecReader::~PaxVecReader() { delete reader_; } + +void PaxVecReader::Open(const ReaderOptions &options) { + reader_->Open(options); + PaxColumns *pax_columns = reader_->GetAllColumns(); + adapter_->SetDataSource(pax_columns); +} + +void PaxVecReader::Close() { reader_->Close(); } + +bool PaxVecReader::ReadTuple(CTupleSlot *cslot) { + if (!adapter_->AppendToVecBuffer()) { + return false; + } + + auto flush_nums_of_rows = adapter_->FlushVecBuffer(cslot); + Assert(flush_nums_of_rows); + return true; +} + +PaxColumns *PaxVecReader::GetAllColumns() { + CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError); +} +}; // namespace pax + +#endif // VEC_BUILD diff --git a/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.h b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.h new file mode 100644 index 00000000000..b8b255e67c5 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_reader.h @@ -0,0 +1,35 @@ +#pragma once +#include "storage/micro_partition.h" + +#ifdef VEC_BUILD + +namespace pax { + +class VecAdapter; + +class PaxVecReader : public MicroPartitionReader { + public: + // If enable read tuple from vec reader, + // then OrcReader will be hold by PaxVecReader, + // current MicroPartitionReader lifecycle will be bound to the PaxVecReader) + PaxVecReader(MicroPartitionReader *reader, VecAdapter *adapter); + + ~PaxVecReader() override; + + void Open(const ReaderOptions &options) override; + + void Close() override; + + bool ReadTuple(CTupleSlot *cslot) override; + + protected: + PaxColumns *GetAllColumns() override; + + private: + MicroPartitionReader *reader_; + VecAdapter *adapter_; +}; + +} // namespace pax + +#endif // VEC_BUILD diff --git a/contrib/pax_storage/src/cpp/storage/vec/pax_vec_test.cc b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_test.cc new file mode 100644 index 00000000000..113a58810c0 --- /dev/null +++ b/contrib/pax_storage/src/cpp/storage/vec/pax_vec_test.cc @@ -0,0 +1,1010 @@ +#include "comm/gtest_wrappers.h" +#include "storage/pax.h" +#include "storage/vec/pax_vec_adapter.h" + +#ifdef VEC_BUILD +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" + +extern "C" { +#include "utils/tuptable_vec.h" // for vec tuple +} + +#pragma GCC diagnostic pop +#include "storage/vec/arrow_wrapper.h" +#endif // VEC_BUILD + +namespace pax::tests { + +#ifdef VEC_BUILD +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Return; + +static void GenFakeBuffer(char *buffer, size_t length) { + for (size_t i = 0; i < length; i++) { + buffer[i] = static_cast(i); + } +} + +static void CreateOrcTestResourceOwner() { + CurrentResourceOwner = ResourceOwnerCreate(NULL, "PaxVecTestResourceOwner"); +} + +static void ReleaseOrcTestResourceOwner() { + ResourceOwner tmp_resource_owner = CurrentResourceOwner; + CurrentResourceOwner = NULL; + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_BEFORE_LOCKS, false, + true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_LOCKS, false, true); + ResourceOwnerRelease(tmp_resource_owner, RESOURCE_RELEASE_AFTER_LOCKS, false, + true); + ResourceOwnerDelete(tmp_resource_owner); +} + +class PaxVecTest : public ::testing::TestWithParam { + public: + void SetUp() override { + Singleton::GetInstance()->Delete(file_name_); + MemoryContext pax_vec_test_memory_context = AllocSetContextCreate( + (MemoryContext)NULL, "PaxVecTestMemoryContext", 80 * 1024 * 1024, + 80 * 1024 * 1024, 80 * 1024 * 1024); + + MemoryContextSwitchTo(pax_vec_test_memory_context); + CreateOrcTestResourceOwner(); + } + + static CTupleSlot *CreateCtuple(bool is_fixed, bool with_value = false) { + TupleTableSlot *tuple_slot; + TupleDescData *tuple_desc; + CTupleSlot *ctuple_slot; + + tuple_desc = reinterpret_cast(cbdb::Palloc0( + sizeof(TupleDescData) + sizeof(FormData_pg_attribute) * 1)); + + tuple_desc->natts = 1; + if (is_fixed) { + tuple_desc->attrs[0] = { + .atttypid = INT4OID, + .attlen = 4, + .attbyval = true, + }; + } else { + tuple_desc->attrs[0] = { + .atttypid = TEXTOID, + .attlen = -1, + .attbyval = false, + }; + } + + tuple_slot = (TupleTableSlot *)cbdb::RePalloc( + MakeTupleTableSlot(tuple_desc, &TTSOpsVirtual), + MAXALIGN(TTSOpsVirtual.base_slot_size) + + MAXALIGN(tuple_desc->natts * sizeof(Datum)) + + MAXALIGN(tuple_desc->natts * sizeof(bool)) + + MAXALIGN(sizeof(VecTupleTableSlot))); + + if (with_value) { + if (is_fixed) { + tuple_slot->tts_values[0] = cbdb::Int32ToDatum(0x123); + } else { + char column_buff[100]; + GenFakeBuffer(column_buff, 100); + tuple_slot->tts_values[0] = cbdb::DatumFromCString(column_buff, 100); + } + + bool *fake_is_null = + reinterpret_cast(cbdb::Palloc0(sizeof(bool))); + fake_is_null[0] = false; + tuple_slot->tts_isnull = fake_is_null; + } + + tuple_slot->tts_tupleDescriptor = tuple_desc; + ctuple_slot = new CTupleSlot(tuple_slot); + + return ctuple_slot; + } + + static void DeleteCTupleSlot(CTupleSlot *ctuple_slot) { + auto tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + cbdb::Pfree(tuple_table_slot->tts_tupleDescriptor); + cbdb::Pfree(tuple_table_slot); + delete ctuple_slot; + } + + void TearDown() override { + // Singleton::GetInstance()->Delete(file_name_); + ReleaseOrcTestResourceOwner(); + } + + protected: + const char *file_name_ = "./test.file"; +}; + +TEST_P(PaxVecTest, PaxColumnToVec) { + VecAdapter *adapter; + PaxColumns *columns; + PaxColumn *column; + + auto is_fixed = GetParam(); + auto ctuple_slot = CreateCtuple(is_fixed); + + adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + columns = new PaxColumns(); + if (is_fixed) { + column = new PaxCommColumn(VEC_BATCH_LENGTH + 1000); + } else { + column = new PaxNonFixedColumn(VEC_BATCH_LENGTH + 1000); + } + + for (size_t i = 0; i < VEC_BATCH_LENGTH + 1000; i++) { + if (is_fixed) { + column->Append((char *)&i, sizeof(int32)); + } else { + auto data = cbdb::DatumFromCString((char *)&i, sizeof(int32)); + int len = -1; + auto vl = cbdb::PointerAndLenFromDatum(data, &len); + + column->Append(reinterpret_cast(vl), len); + } + } + + columns->AddRows(column->GetRows()); + columns->Append(column); + adapter->SetDataSource(columns); + auto append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + // already full + append_rc = adapter->AppendToVecBuffer(); + ASSERT_FALSE(append_rc); + + size_t flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(VEC_BATCH_LENGTH, flush_counts); + + // verify ctuple_slot 1 + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, VEC_BATCH_LENGTH); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, VEC_BATCH_LENGTH); + ASSERT_EQ(child_array->null_count, 0); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + ASSERT_EQ(child_array->buffers[0], nullptr); // null bitmap + + if (is_fixed) { + ASSERT_NE(child_array->buffers[1], nullptr); + + char *buffer = (char *)child_array->buffers[1]; + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), i); + } + } else { + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + char *offset_buffer = (char *)child_array->buffers[1]; + char *buffer = (char *)child_array->buffers[2]; + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), + i * sizeof(int32)); + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), i); + } + + ASSERT_EQ(*((int32 *)(offset_buffer + VEC_BATCH_LENGTH * sizeof(int32))), + VEC_BATCH_LENGTH * sizeof(int32)); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(1000, flush_counts); + + // verify ctuple_slot 2 + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, 1000); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, 1000); + ASSERT_EQ(child_array->null_count, 0); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + ASSERT_EQ(child_array->buffers[0], nullptr); // null bitmap + + if (is_fixed) { + ASSERT_NE(child_array->buffers[1], nullptr); + + char *buffer = (char *)child_array->buffers[1]; + for (size_t i = 0; i < 1000; i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), + i + VEC_BATCH_LENGTH); + } + } else { + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + char *offset_buffer = (char *)child_array->buffers[1]; + char *buffer = (char *)child_array->buffers[2]; + for (size_t i = 0; i < 1000; i++) { + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), + i * sizeof(int32)); + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), + i + VEC_BATCH_LENGTH); + } + + ASSERT_EQ(*((int32 *)(offset_buffer + 1000 * sizeof(int32))), + 1000 * sizeof(int32)); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + DeleteCTupleSlot(ctuple_slot); + + delete columns; + delete adapter; +} + +TEST_P(PaxVecTest, PaxColumnWithNullToVec) { + VecAdapter *adapter; + PaxColumns *columns; + PaxColumn *column; + CTupleSlot *ctuple_slot; + size_t null_counts = 0; + auto is_fixed = GetParam(); + + ctuple_slot = CreateCtuple(is_fixed); + + adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + columns = new PaxColumns(); + if (is_fixed) { + column = new PaxCommColumn(VEC_BATCH_LENGTH + 1000); + } else { + column = new PaxNonFixedColumn(VEC_BATCH_LENGTH + 1000); + } + + for (size_t i = 0; i < VEC_BATCH_LENGTH + 1000; i++) { + if (i % 5 == 0) { + null_counts++; + column->AppendNull(); + } + + if (is_fixed) { + column->Append((char *)&i, sizeof(int32)); + } else { + auto data = cbdb::DatumFromCString((char *)&i, sizeof(int32)); + int len = -1; + auto vl = cbdb::PointerAndLenFromDatum(data, &len); + + column->Append(reinterpret_cast(vl), len); + } + } + + columns->AddRows(column->GetRows()); + columns->Append(column); + adapter->SetDataSource(columns); + + auto append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + append_rc = adapter->AppendToVecBuffer(); + ASSERT_FALSE(append_rc); + + size_t flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(VEC_BATCH_LENGTH, flush_counts); + + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, VEC_BATCH_LENGTH); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, VEC_BATCH_LENGTH); + ASSERT_EQ( + child_array->null_count, + VEC_BATCH_LENGTH - column->GetRangeNonNullRows(0, VEC_BATCH_LENGTH)); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + + if (is_fixed) { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + // verify null bitmap + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + // N 0 1 2 3 4 N 5 6 7 8 9 N 10 11 ... + // should % 6 rather then 5 + if (i % 6 == 0) { + ASSERT_FALSE(arrow::bit_util::GetBit(null_bits_array, i)); + } else { + ASSERT_TRUE(arrow::bit_util::GetBit(null_bits_array, i)); + } + } + + // verify data + char *buffer = (char *)child_array->buffers[1]; + size_t verify_null_counts = 0; + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + if (i % 6 == 0) { + verify_null_counts++; + continue; + } + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), + i - verify_null_counts); + } + + ASSERT_EQ(verify_null_counts, child_array->null_count); + } else { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + // verify null bitmap + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + if (i % 6 == 0) { + ASSERT_FALSE(arrow::bit_util::GetBit(null_bits_array, i)); + } else { + ASSERT_TRUE(arrow::bit_util::GetBit(null_bits_array, i)); + } + } + + // verify offset data + char *offset_buffer = (char *)child_array->buffers[1]; + size_t last_offset = 0; + size_t verify_null_counts = 0; + for (size_t i = 0; i < VEC_BATCH_LENGTH; i++) { + if (i % 6 == 0) { + verify_null_counts++; + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), + last_offset == 0 ? 0 : last_offset + sizeof(int32)); + continue; + } + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), + (i - verify_null_counts) * sizeof(int32)); + last_offset = *((int32 *)(offset_buffer + i * sizeof(int32))); + } + + ASSERT_EQ(*((int32 *)(offset_buffer + VEC_BATCH_LENGTH * sizeof(int32))), + last_offset + sizeof(int32)); + ASSERT_EQ(verify_null_counts, child_array->null_count); + + // verify data + char *buffer = (char *)child_array->buffers[2]; + for (size_t i = 0; i < VEC_BATCH_LENGTH - verify_null_counts; i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), i); + } + + ASSERT_EQ(verify_null_counts, child_array->null_count); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(null_counts + 1000, flush_counts); + + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + size_t range_size = null_counts + 1000; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, range_size); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, range_size); + ASSERT_EQ( + child_array->null_count, + range_size - column->GetRangeNonNullRows(VEC_BATCH_LENGTH, range_size)); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + + if (is_fixed) { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + char *buffer = (char *)child_array->buffers[1]; + + size_t verify_null_counts = 0; + size_t start = column->GetRangeNonNullRows(0, VEC_BATCH_LENGTH); + for (size_t i = 0; i < range_size; i++) { + if (arrow::bit_util::GetBit(null_bits_array, i)) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), start++); + } else { + verify_null_counts++; + } + } + + ASSERT_EQ(verify_null_counts, child_array->null_count); + } else { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + // verify null bitmap + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + size_t verify_null_counts = 0; + for (size_t i = 0; i < range_size; i++) { + if (!arrow::bit_util::GetBit(null_bits_array, i)) { + verify_null_counts++; + } + } + + ASSERT_EQ(verify_null_counts, child_array->null_count); + + // verify data + char *buffer = (char *)child_array->buffers[2]; + size_t start = column->GetRangeNonNullRows(0, VEC_BATCH_LENGTH); + for (size_t i = 0; i < (range_size - child_array->null_count); i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), start++); + } + + // verify offset with data + char *offset_buffer = (char *)child_array->buffers[1]; + start = column->GetRangeNonNullRows(0, VEC_BATCH_LENGTH); + + verify_null_counts = 0; + for (size_t i = 0; i < range_size; i++) { + auto current_offset = *((int32 *)(offset_buffer + i * sizeof(int32))); + auto next_offset = + *((int32 *)(offset_buffer + (i + 1) * sizeof(int32))); + if (current_offset != next_offset) { + ASSERT_EQ( + *((int32 *)(buffer + (i - verify_null_counts) * sizeof(int32))), + start++); + } else { + verify_null_counts++; + } + } + ASSERT_EQ(verify_null_counts, child_array->null_count); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + DeleteCTupleSlot(ctuple_slot); + + delete columns; + delete adapter; +} + +TEST_P(PaxVecTest, PaxColumnToVecNoFull) { + VecAdapter *adapter; + PaxColumns *columns; + PaxColumn *column; + + auto is_fixed = GetParam(); + auto ctuple_slot = CreateCtuple(is_fixed); + + adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + columns = new PaxColumns(); + if (is_fixed) { + column = new PaxCommColumn(VEC_BATCH_LENGTH + 1000); + } else { + column = new PaxNonFixedColumn(VEC_BATCH_LENGTH + 1000); + } + + for (size_t i = 0; i < 1000; i++) { + if (is_fixed) { + column->Append((char *)&i, sizeof(int32)); + } else { + auto data = cbdb::DatumFromCString((char *)&i, sizeof(int32)); + int len = -1; + auto vl = cbdb::PointerAndLenFromDatum(data, &len); + + column->Append(reinterpret_cast(vl), len); + } + } + + columns->AddRows(column->GetRows()); + columns->Append(column); + adapter->SetDataSource(columns); + auto append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + // append finish + append_rc = adapter->AppendToVecBuffer(); + ASSERT_FALSE(append_rc); + + size_t flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(1000, flush_counts); + + // verify ctuple_slot + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, 1000); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, 1000); + ASSERT_EQ(child_array->null_count, 0); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + ASSERT_EQ(child_array->buffers[0], nullptr); // null bitmap + + if (is_fixed) { + ASSERT_NE(child_array->buffers[1], nullptr); + + char *buffer = (char *)child_array->buffers[1]; + for (size_t i = 0; i < 1000; i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), i); + } + } else { + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + char *offset_buffer = (char *)child_array->buffers[1]; + char *buffer = (char *)child_array->buffers[2]; + for (size_t i = 0; i < 1000; i++) { + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), + i * sizeof(int32)); + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), i); + } + + ASSERT_EQ(*((int32 *)(offset_buffer + 1000 * sizeof(int32))), + 1000 * sizeof(int32)); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + DeleteCTupleSlot(ctuple_slot); + + delete columns; + delete adapter; +} + +TEST_P(PaxVecTest, PaxColumnWithNullToVecNoFull) { + VecAdapter *adapter; + PaxColumns *columns; + PaxColumn *column; + size_t null_counts = 0; + + auto is_fixed = GetParam(); + auto ctuple_slot = CreateCtuple(is_fixed); + + adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + columns = new PaxColumns(); + if (is_fixed) { + column = new PaxCommColumn(VEC_BATCH_LENGTH + 1000); + } else { + column = new PaxNonFixedColumn(VEC_BATCH_LENGTH + 1000); + } + + for (size_t i = 0; i < 1000; i++) { + if (i % 5 == 0) { + null_counts++; + column->AppendNull(); + } + + if (is_fixed) { + column->Append((char *)&i, sizeof(int32)); + } else { + auto data = cbdb::DatumFromCString((char *)&i, sizeof(int32)); + int len = -1; + auto vl = cbdb::PointerAndLenFromDatum(data, &len); + + column->Append(reinterpret_cast(vl), len); + } + } + ASSERT_EQ(column->GetRows() - column->GetNonNullRows(), null_counts); + ASSERT_EQ(column->GetNonNullRows(), 1000); + + columns->AddRows(column->GetRows()); + columns->Append(column); + adapter->SetDataSource(columns); + auto append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + // already full + append_rc = adapter->AppendToVecBuffer(); + ASSERT_FALSE(append_rc); + + size_t flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(1000 + null_counts, flush_counts); + + // verify ctuple_slot 2 + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, 1000 + null_counts); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, 1000 + null_counts); + ASSERT_EQ(child_array->null_count, null_counts); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + + if (is_fixed) { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + char *buffer = (char *)child_array->buffers[1]; + + size_t verify_null_counts = 0; + size_t start = 0; + for (int64 i = 0; i < child_array->length; i++) { + if (arrow::bit_util::GetBit(null_bits_array, i)) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), start++); + } else { + verify_null_counts++; + } + } + + ASSERT_EQ(start, 1000); + ASSERT_EQ(verify_null_counts, child_array->null_count); + } else { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + // verify null bitmap + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + size_t verify_null_counts = 0; + for (int64 i = 0; i < child_array->length; i++) { + if (!arrow::bit_util::GetBit(null_bits_array, i)) { + verify_null_counts++; + } + } + + ASSERT_EQ(verify_null_counts, child_array->null_count); + + // verify data + char *buffer = (char *)child_array->buffers[2]; + size_t start = 0; + for (int64 i = 0; i < (child_array->length - child_array->null_count); + i++) { + ASSERT_EQ(*((int32 *)(buffer + i * sizeof(int32))), start++); + } + ASSERT_EQ(start, 1000); + + // verify offset with data + char *offset_buffer = (char *)child_array->buffers[1]; + start = 0; + + verify_null_counts = 0; + for (int64 i = 0; i < child_array->length; i++) { + auto current_offset = *((int32 *)(offset_buffer + i * sizeof(int32))); + auto next_offset = + *((int32 *)(offset_buffer + (i + 1) * sizeof(int32))); + if (current_offset != next_offset) { + ASSERT_EQ( + *((int32 *)(buffer + (i - verify_null_counts) * sizeof(int32))), + start++); + } else { + verify_null_counts++; + } + } + ASSERT_EQ(start, 1000); + ASSERT_EQ(verify_null_counts, child_array->null_count); + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + DeleteCTupleSlot(ctuple_slot); + + delete columns; + delete adapter; +} + +TEST_P(PaxVecTest, PaxColumnAllNullToVec) { + VecAdapter *adapter; + PaxColumns *columns; + PaxColumn *column; + + auto is_fixed = GetParam(); + auto ctuple_slot = CreateCtuple(is_fixed); + + adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + columns = new PaxColumns(); + if (is_fixed) { + column = new PaxCommColumn(1000); + } else { + column = new PaxNonFixedColumn(1000); + } + + for (size_t i = 0; i < 1000; i++) { + column->AppendNull(); + } + + columns->AddRows(column->GetRows()); + columns->Append(column); + adapter->SetDataSource(columns); + auto append_rc = adapter->AppendToVecBuffer(); + ASSERT_TRUE(append_rc); + + // already full + append_rc = adapter->AppendToVecBuffer(); + ASSERT_FALSE(append_rc); + + size_t flush_counts = adapter->FlushVecBuffer(ctuple_slot); + ASSERT_EQ(1000, flush_counts); + + { + VecTupleTableSlot *vslot = nullptr; + TupleTableSlot *tuple_table_slot = ctuple_slot->GetTupleTableSlot(); + vslot = (VecTupleTableSlot *)tuple_table_slot; + + auto rb = (ArrowRecordBatch *)vslot->tts_recordbatch; + ASSERT_NE(rb, nullptr); + ArrowArray *arrow_array = rb->batch; + ASSERT_EQ(arrow_array->length, 1000); + ASSERT_EQ(arrow_array->null_count, 0); + ASSERT_EQ(arrow_array->offset, 0); + ASSERT_EQ(arrow_array->n_buffers, 1); + ASSERT_EQ(arrow_array->n_children, 1); + ASSERT_NE(arrow_array->children, nullptr); + ASSERT_EQ(arrow_array->buffers[0], nullptr); + ASSERT_EQ(arrow_array->dictionary, nullptr); + ASSERT_EQ(arrow_array->private_data, arrow_array->buffers); + + ArrowArray *child_array = arrow_array->children[0]; + ASSERT_EQ(child_array->length, 1000); + ASSERT_EQ(child_array->null_count, 1000); + ASSERT_EQ(child_array->offset, 0); + ASSERT_EQ(child_array->n_buffers, is_fixed ? 2 : 3); + ASSERT_EQ(child_array->n_children, 0); + ASSERT_EQ(child_array->children, nullptr); + + if (is_fixed) { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + // verify null bitmap + for (size_t i = 0; i < 1000; i++) { + ASSERT_FALSE(arrow::bit_util::GetBit(null_bits_array, i)); + } + + } else { + ASSERT_NE(child_array->buffers[0], nullptr); + ASSERT_NE(child_array->buffers[1], nullptr); + ASSERT_NE(child_array->buffers[2], nullptr); + + auto null_bits_array = (uint8 *)child_array->buffers[0]; + + // verify null bitmap + for (size_t i = 0; i < 1000; i++) { + ASSERT_FALSE(arrow::bit_util::GetBit(null_bits_array, i)); + } + + char *offset_buffer = (char *)child_array->buffers[1]; + for (size_t i = 0; i <= 1000; i++) { + // all of offset is 0 + // no data in data part + ASSERT_EQ(*((int32 *)(offset_buffer + i * sizeof(int32))), 0); + } + } + + ASSERT_EQ(child_array->dictionary, nullptr); + ASSERT_EQ(child_array->private_data, child_array->buffers); + } + + DeleteCTupleSlot(ctuple_slot); + + delete columns; + delete adapter; +} + +class MockWriter : public TableWriter { + public: + MockWriter(const Relation relation, WriteSummaryCallback callback) + : TableWriter(relation) { + SetWriteSummaryCallback(callback); + SetFileSplitStrategy(new PaxDefaultSplitStrategy()); + } + + MOCK_METHOD(std::string, GenFilePath, (const std::string &), (override)); +}; + +class MockReaderInterator : public IteratorBase { + public: + explicit MockReaderInterator( + const std::vector &meta_info_list) + : index_(0) { + micro_partitions_.insert(micro_partitions_.end(), meta_info_list.begin(), + meta_info_list.end()); + } + + bool HasNext() override { return index_ < micro_partitions_.size(); } + + void Rewind() override { index_ = 0; } + + MicroPartitionMetadata Next() override { return micro_partitions_[index_++]; } + + private: + uint32 index_; + std::vector micro_partitions_; +}; + +TEST_P(PaxVecTest, PaxVecReaderTest) { + auto is_fixed = GetParam(); + CTupleSlot *ctuple_slot = CreateCtuple(is_fixed, true); + + auto relation = (Relation)cbdb::Palloc0(sizeof(RelationData)); + relation->rd_att = ctuple_slot->GetTupleTableSlot()->tts_tupleDescriptor; + bool callback_called = false; + + TableWriter::WriteSummaryCallback callback = + [&callback_called](const WriteSummary & /*summary*/) { + callback_called = true; + }; + + auto writer = new MockWriter(relation, callback); + EXPECT_CALL(*writer, GenFilePath(_)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(file_name_)); + + writer->Open(); + + for (size_t i = 0; i < VEC_BATCH_LENGTH + 1000; i++) { + writer->WriteTuple(ctuple_slot); + } + + writer->Close(); + ASSERT_TRUE(callback_called); + + DeleteCTupleSlot(ctuple_slot); + delete writer; + + ctuple_slot = CreateCtuple(is_fixed); + auto adapter = new VecAdapter(ctuple_slot->GetTupleDesc()); + + std::vector meta_info_list; + MicroPartitionMetadata meta_info; + + meta_info.SetFileName(file_name_); + meta_info.SetMicroPartitionId(file_name_); + meta_info_list.push_back(std::move(meta_info)); + + std::unique_ptr> meta_info_iterator = + std::unique_ptr>( + new MockReaderInterator(meta_info_list)); + + TableReader *reader; + TableReader::ReaderOptions reader_options{}; + reader_options.build_bitmap = false; + reader_options.rel_oid = 0; + reader_options.is_vec = true; + reader_options.adapter = adapter; + + reader = new TableReader(std::move(meta_info_iterator), reader_options); + reader->Open(); + + bool ok = reader->ReadTuple(ctuple_slot); + ASSERT_TRUE(ok); + + ok = reader->ReadTuple(ctuple_slot); + ASSERT_TRUE(ok); + ok = reader->ReadTuple(ctuple_slot); + ASSERT_FALSE(ok); + + reader->Close(); + DeleteCTupleSlot(ctuple_slot); + delete adapter; + delete relation; + delete reader; +} + +INSTANTIATE_TEST_CASE_P(PaxVecTestCombine, PaxVecTest, + testing::Values(true, false)); + +#endif // VEC_BUILD + +} // namespace pax::tests \ No newline at end of file diff --git a/contrib/pax_storage/src/data/Makefile b/contrib/pax_storage/src/data/Makefile new file mode 100644 index 00000000000..fc899d3fd77 --- /dev/null +++ b/contrib/pax_storage/src/data/Makefile @@ -0,0 +1,37 @@ +# contrib/pax_storage/Makefile + +MODULE_big = pax +OBJS = \ + $(WIN32RES) +PG_CPPFLAGS = -I/usr/local/include +PG_CXXFLAGS = -std=c++14 + +PGFILEDESC = "pax - PAX table access method" +SHLIB_LINK += -luuid + +REGRESS = ddl types join update update_gp + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pax_storage/src/data +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +.PHONY: install-data +install-data: + $(INSTALL_DATA) pax-cdbinit--1.0.sql '$(DESTDIR)$(datadir)/cdb_init.d/pax-cdbinit--1.0.sql' + +install: install-data + +.PHONY: uninstall-data + +uninstall-data: + $(RM) '$(DESTDIR)$(datadir)/cdb_init.d/pax-cdbinit--1.0.sql' + +uninstall: uninstall-data + diff --git a/contrib/pax_storage/src/data/expected/ddl.out b/contrib/pax_storage/src/data/expected/ddl.out new file mode 100644 index 00000000000..11cbc134c80 --- /dev/null +++ b/contrib/pax_storage/src/data/expected/ddl.out @@ -0,0 +1,48 @@ +-- start_ignore +create extension pax; +drop table if exists users; +-- end_ignore +create table users( + id int , + name text not null, + height float not null, + decimal_col decimal(10, 2) not null, + created_at timestamp with time zone not null, + updated_at timestamp with time zone not null +) using pax distributed BY (id); +insert into users (id, name, height, decimal_col, created_at, updated_at) values + (1, 'Alice', 1.65, 1.23, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'), + (2, 'Bob', 1.75, 2.34, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'), + (3, 'Carol', 1.85, 3.45, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'); +select * from users; + id | name | height | decimal_col | created_at | updated_at +----+-------+--------+-------------+-------------------------------------+------------------------------------- + 1 | Alice | 1.65 | 1.23 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT + 2 | Bob | 1.75 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT + 3 | Carol | 1.85 | 3.45 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT +(3 rows) + +DELETE FROM users WHERE id = 1; +select * from users; + id | name | height | decimal_col | created_at | updated_at +----+-------+--------+-------------+-------------------------------------+------------------------------------- + 2 | Bob | 1.75 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT + 3 | Carol | 1.85 | 3.45 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT +(2 rows) + +UPDATE users SET name = 'Alice' WHERE id = 2; +select * from users; + id | name | height | decimal_col | created_at | updated_at +----+-------+--------+-------------+-------------------------------------+------------------------------------- + 3 | Carol | 1.85 | 3.45 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT + 2 | Alice | 1.75 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT +(2 rows) + +UPDATE users SET height = (select max(height) from users),decimal_col = (select min(decimal_col) from users); +select * from users; + id | name | height | decimal_col | created_at | updated_at +----+-------+--------+-------------+-------------------------------------+------------------------------------- + 3 | Carol | 1.85 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT + 2 | Alice | 1.85 | 2.34 | Wed May 17 02:56:49.633664 2023 PDT | Wed May 17 02:56:49.633664 2023 PDT +(2 rows) + diff --git a/contrib/pax_storage/src/data/expected/join.out b/contrib/pax_storage/src/data/expected/join.out new file mode 100644 index 00000000000..193e7f7abfb --- /dev/null +++ b/contrib/pax_storage/src/data/expected/join.out @@ -0,0 +1,46 @@ +-- start_ignore +create extension pax; +drop table if exists t1; +-- end_ignore +create table t1(v int) using pax distributed by(v); +insert into t1 select generate_series(1,10); +select * from t1 order by v; + v +---- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +update t1 set v=(select max(v) from t1) where v <= 5; +select * from t1 order by v; + v +---- + 6 + 7 + 8 + 9 + 10 + 10 + 10 + 10 + 10 + 10 +(10 rows) + +select * from t1 as a join t1 as b on a.v=b.v where a.v<10; + v | v +---+--- + 6 | 6 + 9 | 9 + 7 | 7 + 8 | 8 +(4 rows) + diff --git a/contrib/pax_storage/src/data/expected/types.out b/contrib/pax_storage/src/data/expected/types.out new file mode 100644 index 00000000000..585ea81da8b --- /dev/null +++ b/contrib/pax_storage/src/data/expected/types.out @@ -0,0 +1,58 @@ +-- start_ignore +create extension pax; +drop table if exists all_typbyval_pg_types; +-- end_ignore +CREATE TABLE all_typbyval_pg_types ( + id int, + bool_col bool, + char_col char, + int2_col int2, + cid_col cid, + + float4_col float4, + int4_col int4, + date_col date, + + oid_col oid, + -- åxid_col xid, + time_stamp_col timestamp, + int8_col int8, + -- xid8_col xid8, + float8_col float8, + money_col money, + time_col time, + timestamptz_col timestamptz, + pg_lsn_col pg_lsn +) USING pax distributed by (id); +insert into all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'); +select * from all_typbyval_pg_types; + id | bool_col | char_col | int2_col | cid_col | float4_col | int4_col | date_col | oid_col | time_stamp_col | int8_col | float8_col | money_col | time_col | timestamptz_col | pg_lsn_col +----+----------+----------+----------+---------+------------+----------+------------+---------+--------------------------+----------+------------+-----------+----------+------------------------------+------------ + 1 | t | c | 2 | 0 | 4.2 | 5 | 05-17-2023 | 7 | Wed May 17 17:56:49 2023 | 10 | 11.1111 | $12.00 | 17:56:49 | Wed May 17 17:56:49 2023 PDT | 16/0 + 1 | t | c | 2 | 0 | 4.2 | 5 | 05-17-2023 | 7 | Wed May 17 17:56:49 2023 | 10 | 11.1111 | $12.00 | 17:56:49 | Wed May 17 17:56:49 2023 PDT | 16/0 + 1 | t | c | 2 | 0 | 4.2 | 5 | 05-17-2023 | 7 | Wed May 17 17:56:49 2023 | 10 | 11.1111 | $12.00 | 17:56:49 | Wed May 17 17:56:49 2023 PDT | 16/0 +(3 rows) + +-- start_ignore +drop table if exists all_typlen_lt_0_pg_type; +-- end_ignore +create table all_typlen_lt_0_pg_type ( + id int, + name_col name, + numeric_col numeric, + text_col text, + varchar_col varchar(128), + point_col point +) USING pax distributed by (id); +insert into all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2)); +select * from all_typlen_lt_0_pg_type; + id | name_col | numeric_col | text_col | varchar_col | point_col +----+----------+-------------+----------+-------------+----------- + 1 | hello | 1.23 | text | varchar | (1,2) +(1 row) + +-- start_ignore +drop table if exists all_typbyval_pg_types; +-- end_ignore diff --git a/contrib/pax_storage/src/data/expected/update.out b/contrib/pax_storage/src/data/expected/update.out new file mode 100644 index 00000000000..b4ef40b4f44 --- /dev/null +++ b/contrib/pax_storage/src/data/expected/update.out @@ -0,0 +1,968 @@ +-- start_ignore +create extension pax; +ERROR: extension "pax" already exists +drop table if exists update_test; +NOTICE: table "update_test" does not exist, skipping +drop table if exists upsert_test; +NOTICE: table "upsert_test" does not exist, skipping +-- end_ignore +set default_table_access_method = 'pax'; +CREATE TABLE update_test ( + a INT DEFAULT 10, + b INT, + c TEXT +) using pax; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE upsert_test ( + a INT , + b TEXT +) using pax; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO update_test VALUES (5, 10, 'foo'); +INSERT INTO update_test(b, a, c) VALUES (15, 10, ''); +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +----+----+----- + 5 | 10 | foo + 10 | 15 | +(2 rows) + +UPDATE update_test SET a = DEFAULT, b = 0; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +----+---+----- + 10 | 0 | + 10 | 0 | foo +(2 rows) + +-- aliases for the UPDATE target table +UPDATE update_test AS t SET b = 10 WHERE t.a = 10; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +----+----+----- + 10 | 10 | + 10 | 10 | foo +(2 rows) + +UPDATE update_test t SET b = t.b + 10 WHERE t.a = 10; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +----+----+----- + 10 | 20 | + 10 | 20 | foo +(2 rows) + +-- +-- Test VALUES in FROM +-- +UPDATE update_test SET a=v.i FROM (VALUES(100, 20)) AS v(i, j) + WHERE update_test.b = v.j; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +-----+----+----- + 100 | 20 | + 100 | 20 | foo +(2 rows) + +-- fail, wrong data type: +UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i, j) + WHERE update_test.b = v.j; +ERROR: column "a" is of type integer but expression is of type record +LINE 1: UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i... + ^ +HINT: You will need to rewrite or cast the expression. +-- +-- Test multiple-set-clause syntax +-- +INSERT INTO update_test SELECT a,b+1,c FROM update_test; +SELECT * FROM update_test; + a | b | c +-----+----+----- + 100 | 20 | foo + 100 | 20 | + 100 | 21 | foo + 100 | 21 | +(4 rows) + +UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo'; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +-----+----+------- + 10 | 31 | bugle + 10 | 32 | bugle + 100 | 20 | + 100 | 21 | +(4 rows) + +UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10; +SELECT a,b,c FROM update_test ORDER BY a,b,c; + a | b | c +-----+----+----- + 11 | 41 | car + 11 | 42 | car + 100 | 20 | + 100 | 21 | +(4 rows) + +-- fail, multi assignment to same column: +UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10; +ERROR: multiple assignments to same column "b" +-- uncorrelated sub-select: +UPDATE update_test + SET (b,a) = (select a,b from update_test where b = 41 and c = 'car') + WHERE a = 100 AND b = 20; +SELECT * FROM update_test; + a | b | c +-----+----+----- + 41 | 11 | + 11 | 41 | car + 11 | 42 | car + 100 | 21 | +(4 rows) + +-- correlated sub-select: +UPDATE update_test o + SET (b,a) = (select a+1,b from update_test i + where i.a=o.a and i.b=o.b and i.c is not distinct from o.c); +SELECT * FROM update_test; + a | b | c +----+-----+----- + 41 | 12 | car + 42 | 12 | car + 11 | 42 | + 21 | 101 | +(4 rows) + +-- fail, multiple rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test); +ERROR: more than one row returned by a subquery used as an expression +-- set to null if no rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000) + WHERE a = 11; +SELECT * FROM update_test; + a | b | c +----+-----+----- + 41 | 12 | car + 42 | 12 | car + | | + 21 | 101 | +(4 rows) + +-- *-expansion should work in this context: +UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j) + WHERE update_test.a = v.i; +-- you might expect this to work, but syntactically it's not a RowExpr: +UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j) + WHERE update_test.a = v.i; +ERROR: source for a multiple-column UPDATE item must be a sub-SELECT or ROW() expression +LINE 1: UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) ... + ^ +-- if an alias for the target table is specified, don't allow references +-- to the original table name +UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10; +ERROR: invalid reference to FROM-clause entry for table "update_test" +LINE 1: UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a... + ^ +HINT: Perhaps you meant to reference the table alias "t". +-- Make sure that we can update to a TOASTed value. +UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car'; +SELECT a, b, char_length(c) FROM update_test; + a | b | char_length +----+-----+------------- + 21 | 100 | 0 + | | 0 + 41 | 12 | 10000 + 42 | 12 | 10000 +(4 rows) + +-- Check multi-assignment with a Result node to handle a one-time filter. +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE update_test t + SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a) + WHERE CURRENT_USER = SESSION_USER; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Update on public.update_test t + -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) + Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, (DMLAction) + -> Split + Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, DMLAction + -> Seq Scan on public.update_test t + Output: $1, $2, t.c, (SubPlan 1 (returns $1,$2)), t.ctid, t.gp_segment_id, t.* + SubPlan 1 (returns $1,$2) + -> Result + Output: s.b, s.a + Filter: (s.a = t.a) + -> Materialize + Output: s.b, s.a + -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: s.b, s.a + -> Seq Scan on public.update_test s + Output: s.b, s.a + Optimizer: Postgres query optimizer +(18 rows) + +UPDATE update_test t + SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a) + WHERE CURRENT_USER = SESSION_USER; +SELECT a, b, char_length(c) FROM update_test; + a | b | char_length +-----+----+------------- + | | 0 + 12 | 41 | 10000 + 12 | 42 | 10000 + 100 | 21 | 0 +(4 rows) + +-- start_ignore +-- Test ON CONFLICT DO UPDATE +-- skip, not support primary key, can't test +set default_table_access_method = 'pax'; +CREATE TABLE upsert_test ( + a INT PRIMARY KEY, + b TEXT +) using pax; +ERROR: relation "upsert_test" already exists +-- INSERT INTO upsert_test VALUES(1, 'Boo'), (3, 'Zoo'); +-- -- uncorrelated sub-select: +-- WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test +-- VALUES (1, 'Bar') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *; +-- -- correlated sub-select: +-- INSERT INTO upsert_test VALUES (1, 'Baz'), (3, 'Zaz') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a) +-- RETURNING *; +-- -- correlated sub-select (EXCLUDED.* alias): +-- INSERT INTO upsert_test VALUES (1, 'Bat'), (3, 'Zot') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING *; +-- -- ON CONFLICT using system attributes in RETURNING, testing both the +-- -- inserting and updating paths. See bug report at: +-- -- https://www.postgresql.org/message-id/73436355-6432-49B1-92ED-1FE4F7E7E100%40finefun.com.au +-- INSERT INTO upsert_test VALUES (2, 'Beeble') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = 0 AS xmax_correct; +-- -- currently xmax is set after a conflict - that's probably not good, +-- -- but it seems worthwhile to have to be explicit if that changes. +-- INSERT INTO upsert_test VALUES (2, 'Brox') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = pg_current_xact_id()::xid AS xmax_correct; +-- DROP TABLE update_test; +-- DROP TABLE upsert_test; +-- -- Test ON CONFLICT DO UPDATE with partitioned table and non-identical children +-- CREATE TABLE upsert_test ( +-- a INT PRIMARY KEY, +-- b TEXT +-- ) PARTITION BY LIST (a); +-- CREATE TABLE upsert_test_1 PARTITION OF upsert_test FOR VALUES IN (1); +-- CREATE TABLE upsert_test_2 (b TEXT, a INT PRIMARY KEY); +-- ALTER TABLE upsert_test ATTACH PARTITION upsert_test_2 FOR VALUES IN (2); +-- INSERT INTO upsert_test VALUES(1, 'Boo'), (2, 'Zoo'); +-- -- uncorrelated sub-select: +-- WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test +-- VALUES (1, 'Bar') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *; +-- -- correlated sub-select: +-- WITH aaa AS (SELECT 1 AS ctea, ' Foo' AS cteb) INSERT INTO upsert_test +-- VALUES (1, 'Bar'), (2, 'Baz') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT upsert_test.b||cteb, upsert_test.a FROM aaa) RETURNING *; +-- DROP TABLE upsert_test; +--------------------------- +-- UPDATE with row movement +--------------------------- +-- When a partitioned table receives an UPDATE to the partitioned key and the +-- new values no longer meet the partition's bound, the row must be moved to +-- the correct partition for the new partition key (if one exists). We must +-- also ensure that updatable views on partitioned tables properly enforce any +-- WITH CHECK OPTION that is defined. The situation with triggers in this case +-- also requires thorough testing as partition key updates causing row +-- movement convert UPDATEs into DELETE+INSERT. +set default_table_access_method = 'pax'; +CREATE TABLE range_parted ( + a text, + b bigint, + c numeric, + d int, + e varchar +) PARTITION BY RANGE (a, b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +-- Create partitions intentionally in descending bound order, so as to test +-- that update-row-movement works with the leaf partitions not in bound order. +CREATE TABLE part_b_20_b_30 (e varchar, c numeric, a text, b bigint, d int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +-- GPDB: distribution policy must match the parent table. +alter table part_b_20_b_30 set distributed by (a); +ALTER TABLE range_parted ATTACH PARTITION part_b_20_b_30 FOR VALUES FROM ('b', 20) TO ('b', 30); +CREATE TABLE part_b_10_b_20 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY RANGE (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +alter table part_b_10_b_20 set distributed by (a); +CREATE TABLE part_b_1_b_10 PARTITION OF range_parted FOR VALUES FROM ('b', 1) TO ('b', 10); +NOTICE: table has parent, setting distribution columns to match parent table +ALTER TABLE range_parted ATTACH PARTITION part_b_10_b_20 FOR VALUES FROM ('b', 10) TO ('b', 20); +CREATE TABLE part_a_10_a_20 PARTITION OF range_parted FOR VALUES FROM ('a', 10) TO ('a', 20); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10); +NOTICE: table has parent, setting distribution columns to match parent table +-- Check that partition-key UPDATE works sanely on a partitioned table that +-- does not have any child partitions. +UPDATE part_b_10_b_20 set b = b - 6; +-- Create some more partitions following the above pattern of descending bound +-- order, but let's make the situation a bit more complex by having the +-- attribute numbers of the columns vary from their parent partition. +CREATE TABLE part_c_100_200 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY range (abs(d)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +ALTER TABLE part_c_100_200 DROP COLUMN e, DROP COLUMN c, DROP COLUMN a; +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +ALTER TABLE part_c_100_200 ADD COLUMN c numeric, ADD COLUMN e varchar, ADD COLUMN a text; +ALTER TABLE part_c_100_200 DROP COLUMN b; +ALTER TABLE part_c_100_200 ADD COLUMN b bigint; +CREATE TABLE part_d_1_15 PARTITION OF part_c_100_200 FOR VALUES FROM (1) TO (15); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE part_d_15_20 PARTITION OF part_c_100_200 FOR VALUES FROM (15) TO (20); +NOTICE: table has parent, setting distribution columns to match parent table +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200); +ERROR: distribution policy for "part_c_100_200" must be the same as that for "part_b_10_b_20" +-- GPDB: distribution policy must match the parent table, so the previous command fails. +-- Change the distribution key and try again. +alter table part_c_100_200 set distributed by (a); +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200); +CREATE TABLE part_c_1_100 (e varchar, d int, c numeric, b bigint, a text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +alter table part_c_1_100 set distributed by (a); +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_1_100 FOR VALUES FROM (1) TO (100); +\set init_range_parted 'truncate range_parted; insert into range_parted VALUES (''a'', 1, 1, 1 ,''e''), (''a'', 10, 200, 1 ,''e''), (''b'', 12, 96, 1 ,''e''), (''b'', 13, 97, 2 ,''e''), (''b'', 15, 105, 16 ,''e''), (''b'', 17, 105, 19 ,''e'')' +\set show_data 'select tableoid::regclass::text COLLATE "C" partname, * from range_parted ORDER BY 1, 2, 3, 4, 5, 6' +:init_range_parted; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 12 | 96 | 1 | e + part_c_1_100 | b | 13 | 97 | 2 | e + part_d_15_20 | b | 15 | 105 | 16 | e + part_d_15_20 | b | 17 | 105 | 19 | e +(6 rows) + +-- The order of subplans should be in bound order +EXPLAIN (costs off) UPDATE range_parted set c = c - 50 WHERE c > 97; + QUERY PLAN +------------------------------------------------------- + Update on range_parted + Update on part_a_1_a_10 range_parted_1 + Update on part_a_10_a_20 range_parted_2 + Update on part_b_1_b_10 range_parted_3 + Update on part_c_1_100 range_parted_4 + Update on part_d_1_15 range_parted_5 + Update on part_d_15_20 range_parted_6 + Update on part_b_20_b_30 range_parted_7 + -> Append + -> Seq Scan on part_a_1_a_10 range_parted_1 + Filter: (c > '97'::numeric) + -> Seq Scan on part_a_10_a_20 range_parted_2 + Filter: (c > '97'::numeric) + -> Seq Scan on part_b_1_b_10 range_parted_3 + Filter: (c > '97'::numeric) + -> Seq Scan on part_c_1_100 range_parted_4 + Filter: (c > '97'::numeric) + -> Seq Scan on part_d_1_15 range_parted_5 + Filter: (c > '97'::numeric) + -> Seq Scan on part_d_15_20 range_parted_6 + Filter: (c > '97'::numeric) + -> Seq Scan on part_b_20_b_30 range_parted_7 + Filter: (c > '97'::numeric) + Optimizer: Postgres query optimizer +(24 rows) + +-- fail, row movement happens only within the partition subtree. +UPDATE part_c_100_200 set c = c - 20, d = c WHERE c = 105; +-- fail, no partition key update, so no attempt to move tuple, +-- but "a = 'a'" violates partition constraint enforced by root partition) +UPDATE part_b_10_b_20 set a = 'a'; +ERROR: new row for relation "part_b_10_b_20" violates partition constraint (seg2 127.0.0.1:7004 pid=32017) +DETAIL: Failing row contains (e, 96, a, 12, 1). +-- ok, partition key update, no constraint violation +UPDATE range_parted set d = d - 10 WHERE d > 10; +-- ok, no partition key update, no constraint violation +UPDATE range_parted set e = d; +-- No row found +UPDATE part_c_1_100 set c = c + 20 WHERE c = 98; +-- ok, row movement +UPDATE part_b_10_b_20 set c = c + 20 returning c, b, a; + c | b | a +---+---+--- +(0 rows) + +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+---+--- + part_a_10_a_20 | a | 10 | 200 | 1 | 1 + part_a_1_a_10 | a | 1 | 1 | 1 | 1 +(2 rows) + +-- fail, row movement happens only within the partition subtree. +UPDATE part_b_10_b_20 set b = b - 6 WHERE c > 116 returning *; + e | c | a | b | d +---+---+---+---+--- +(0 rows) + +-- ok, row movement, with subset of rows moved into different partition. +UPDATE range_parted set b = b - 6 WHERE c > 116 returning a, b + c; + a | ?column? +---+---------- +(0 rows) + +:show_data; + partname | a | b | c | d | e +---------------+---+---+---+---+--- + part_a_1_a_10 | a | 1 | 1 | 1 | 1 +(1 row) + +--------------------------- Common table needed for multiple test scenarios. --------------------------- +CREATE TABLE mintab(c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT into mintab VALUES (120); +-- update partition key using updatable view. +CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 FROM mintab) WITH CHECK OPTION; +-- ok +UPDATE upview set c = 199 WHERE b = 4; +-- fail, check option violation +UPDATE upview set c = 120 WHERE b = 4; +-- fail, row movement with check option violation +UPDATE upview set a = 'b', b = 15, c = 120 WHERE b = 4; +-- ok, row movement, check option passes +UPDATE upview set a = 'b', b = 15 WHERE b = 4; +:show_data; + partname | a | b | c | d | e +---------------+---+---+---+---+--- + part_a_1_a_10 | a | 1 | 1 | 1 | 1 +(1 row) + +-- cleanup +DROP VIEW upview; +-- RETURNING having whole-row vars. +:init_range_parted; +UPDATE range_parted set c = 95 WHERE a = 'b' and b > 10 and c > 100 returning (range_parted), *; + range_parted | a | b | c | d | e +--------------+---+---+---+---+--- +(0 rows) + +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+---+--- + part_a_10_a_20 | a | 10 | 200 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 12 | 96 | 1 | e + part_c_1_100 | b | 13 | 97 | 2 | e +(4 rows) + +-- Transition tables with update row movement +:init_range_parted; +CREATE FUNCTION trans_updatetrigfunc() RETURNS trigger LANGUAGE plpgsql AS +$$ + begin + raise notice 'trigger = %, old table = %, new table = %', + TG_NAME, + (select string_agg(old_table::text, ', ' ORDER BY a) FROM old_table), + (select string_agg(new_table::text, ', ' ORDER BY a) FROM new_table); + return null; + end; +$$; +CREATE TRIGGER trans_updatetrig + AFTER UPDATE ON range_parted REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); +ERROR: Triggers for statements are not yet supported +UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end ) WHERE a = 'b' and b > 10 and c >= 96; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 13 | 98 | 2 | e + part_d_15_20 | b | 15 | 106 | 16 | e + part_d_15_20 | b | 17 | 106 | 19 | e +(5 rows) + +:init_range_parted; +-- -- Enabling OLD TABLE capture for both DELETE as well as UPDATE stmt triggers +-- -- should not cause DELETEd rows to be captured twice. Similar thing for +-- -- INSERT triggers and inserted rows. +-- CREATE TRIGGER trans_deletetrig +-- AFTER DELETE ON range_parted REFERENCING OLD TABLE AS old_table +-- FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); +-- CREATE TRIGGER trans_inserttrig +-- AFTER INSERT ON range_parted REFERENCING NEW TABLE AS new_table +-- FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); +-- UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; +-- DROP TRIGGER trans_deletetrig ON range_parted; +-- DROP TRIGGER trans_inserttrig ON range_parted; +-- -- Don't drop trans_updatetrig yet. It is required below. +-- -- Test with transition tuple conversion happening for rows moved into the +-- -- new partition. This requires a trigger that references transition table +-- -- (we already have trans_updatetrig). For inserted rows, the conversion +-- -- is not usually needed, because the original tuple is already compatible with +-- -- the desired transition tuple format. But conversion happens when there is a +-- -- BR trigger because the trigger can change the inserted row. So install a +-- -- BR triggers on those child partitions where the rows will be moved. +-- CREATE FUNCTION func_parted_mod_b() RETURNS trigger AS $$ +-- BEGIN +-- NEW.b = NEW.b + 1; +-- return NEW; +-- END $$ language plpgsql; +-- CREATE TRIGGER trig_c1_100 BEFORE UPDATE OR INSERT ON part_c_1_100 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- CREATE TRIGGER trig_d1_15 BEFORE UPDATE OR INSERT ON part_d_1_15 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- CREATE TRIGGER trig_d15_20 BEFORE UPDATE OR INSERT ON part_d_15_20 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- :init_range_parted; +-- UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end) WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; +-- :init_range_parted; +-- UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; +-- -- Case where per-partition tuple conversion map array is allocated, but the +-- -- map is not required for the particular tuple that is routed, thanks to +-- -- matching table attributes of the partition and the target table. +-- :init_range_parted; +-- UPDATE range_parted set b = 15 WHERE b = 1; +-- :show_data; +-- DROP TRIGGER trans_updatetrig ON range_parted; +-- DROP TRIGGER trig_c1_100 ON part_c_1_100; +-- DROP TRIGGER trig_d1_15 ON part_d_1_15; +-- DROP TRIGGER trig_d15_20 ON part_d_15_20; +-- DROP FUNCTION func_parted_mod_b(); +-- RLS policies with update-row-movement +----------------------------------------- +ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; +CREATE USER regress_range_parted_user; +NOTICE: resource queue required -- using default resource queue "pg_default" +GRANT ALL ON range_parted, mintab TO regress_range_parted_user; +CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true); +CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0); +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- This should fail with RLS violation error while moving row from +-- part_a_10_a_20 to part_d_1_15, because we are setting 'c' to an odd number. +UPDATE range_parted set a = 'b', c = 151 WHERE a = 'a' and c = 200; +ERROR: new row violates row-level security policy for table "range_parted" (seg1 127.0.0.1:7003 pid=32016) +RESET SESSION AUTHORIZATION; +-- Create a trigger on part_d_1_15 +CREATE FUNCTION func_d_1_15() RETURNS trigger AS $$ +BEGIN + NEW.c = NEW.c + 1; -- Make even numbers odd, or vice versa + return NEW; +END $$ LANGUAGE plpgsql; +CREATE TRIGGER trig_d_1_15 BEFORE INSERT ON part_d_1_15 + FOR EACH ROW EXECUTE PROCEDURE func_d_1_15(); +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- Here, RLS checks should succeed while moving row from part_a_10_a_20 to +-- part_d_1_15. Even though the UPDATE is setting 'c' to an odd number, the +-- trigger at the destination partition again makes it an even number. +UPDATE range_parted set a = 'b', c = 151 WHERE a = 'a' and c = 200; +ERROR: new row violates row-level security policy for table "range_parted" (seg1 127.0.0.1:7003 pid=32016) +RESET SESSION AUTHORIZATION; +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- This should fail with RLS violation error. Even though the UPDATE is setting +-- 'c' to an even number, the trigger at the destination partition again makes +-- it an odd number. +UPDATE range_parted set a = 'b', c = 150 WHERE a = 'a' and c = 200; +-- Cleanup +RESET SESSION AUTHORIZATION; +DROP TRIGGER trig_d_1_15 ON part_d_1_15; +DROP FUNCTION func_d_1_15(); +-- Policy expression contains SubPlan +RESET SESSION AUTHORIZATION; +:init_range_parted; +CREATE POLICY policy_range_parted_subplan on range_parted + AS RESTRICTIVE for UPDATE USING (true) + WITH CHECK ((SELECT range_parted.c <= c1 FROM mintab)); +SET SESSION AUTHORIZATION regress_range_parted_user; +-- fail, mintab has row with c1 = 120 +UPDATE range_parted set a = 'b', c = 122 WHERE a = 'a' and c = 200; +ERROR: new row violates row-level security policy "policy_range_parted_subplan" for table "range_parted" (seg1 127.0.0.1:7003 pid=32016) +-- ok +UPDATE range_parted set a = 'b', c = 120 WHERE a = 'a' and c = 200; +-- RLS policy expression contains whole row. +RESET SESSION AUTHORIZATION; +:init_range_parted; +CREATE POLICY policy_range_parted_wholerow on range_parted AS RESTRICTIVE for UPDATE USING (true) + WITH CHECK (range_parted = row('b', 10, 112, 1, NULL)::range_parted); +SET SESSION AUTHORIZATION regress_range_parted_user; +-- ok, should pass the RLS check +UPDATE range_parted set a = 'b', c = 112 WHERE a = 'a' and c = 200; +ERROR: new row violates row-level security policy "policy_range_parted_wholerow" for table "range_parted" (seg1 127.0.0.1:7003 pid=32016) +RESET SESSION AUTHORIZATION; +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- fail, the whole row RLS check should fail +UPDATE range_parted set a = 'b', c = 116 WHERE a = 'a' and c = 200; +ERROR: new row violates row-level security policy "policy_range_parted_wholerow" for table "range_parted" (seg1 127.0.0.1:7003 pid=32016) +-- Cleanup +RESET SESSION AUTHORIZATION; +DROP POLICY policy_range_parted ON range_parted; +DROP POLICY policy_range_parted_subplan ON range_parted; +DROP POLICY policy_range_parted_wholerow ON range_parted; +REVOKE ALL ON range_parted, mintab FROM regress_range_parted_user; +DROP USER regress_range_parted_user; +DROP TABLE mintab; +----- ok above +-- statement triggers with update row movement +--------------------------------------------------- +:init_range_parted; +CREATE FUNCTION trigfunc() returns trigger language plpgsql as +$$ + begin + raise notice 'trigger = % fired on table % during %', + TG_NAME, TG_TABLE_NAME, TG_OP; + return null; + end; +$$; +-- Triggers on root partition +CREATE TRIGGER parent_delete_trig + AFTER DELETE ON range_parted for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER parent_update_trig + AFTER UPDATE ON range_parted for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER parent_insert_trig + AFTER INSERT ON range_parted for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +-- Triggers on leaf partition part_c_1_100 +CREATE TRIGGER c1_delete_trig + AFTER DELETE ON part_c_1_100 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER c1_update_trig + AFTER UPDATE ON part_c_1_100 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER c1_insert_trig + AFTER INSERT ON part_c_1_100 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +-- Triggers on leaf partition part_d_1_15 +CREATE TRIGGER d1_delete_trig + AFTER DELETE ON part_d_1_15 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER d1_update_trig + AFTER UPDATE ON part_d_1_15 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER d1_insert_trig + AFTER INSERT ON part_d_1_15 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +-- Triggers on leaf partition part_d_15_20 +CREATE TRIGGER d15_delete_trig + AFTER DELETE ON part_d_15_20 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER d15_update_trig + AFTER UPDATE ON part_d_15_20 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +CREATE TRIGGER d15_insert_trig + AFTER INSERT ON part_d_15_20 for each statement execute procedure trigfunc(); +ERROR: Triggers for statements are not yet supported +-- Move all rows from part_c_100_200 to part_c_1_100. None of the delete or +-- insert statement triggers should be fired. +UPDATE range_parted set c = c - 50 WHERE c > 97; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+---+--- + part_a_10_a_20 | a | 10 | 150 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 12 | 96 | 1 | e + part_c_1_100 | b | 13 | 97 | 2 | e +(4 rows) + +DROP TRIGGER parent_delete_trig ON range_parted; +ERROR: trigger "parent_delete_trig" for table "range_parted" does not exist +DROP TRIGGER parent_update_trig ON range_parted; +ERROR: trigger "parent_update_trig" for table "range_parted" does not exist +DROP TRIGGER parent_insert_trig ON range_parted; +ERROR: trigger "parent_insert_trig" for table "range_parted" does not exist +DROP TRIGGER c1_delete_trig ON part_c_1_100; +ERROR: trigger "c1_delete_trig" for table "part_c_1_100" does not exist +DROP TRIGGER c1_update_trig ON part_c_1_100; +ERROR: trigger "c1_update_trig" for table "part_c_1_100" does not exist +DROP TRIGGER c1_insert_trig ON part_c_1_100; +ERROR: trigger "c1_insert_trig" for table "part_c_1_100" does not exist +DROP TRIGGER d1_delete_trig ON part_d_1_15; +ERROR: trigger "d1_delete_trig" for table "part_d_1_15" does not exist +DROP TRIGGER d1_update_trig ON part_d_1_15; +ERROR: trigger "d1_update_trig" for table "part_d_1_15" does not exist +DROP TRIGGER d1_insert_trig ON part_d_1_15; +ERROR: trigger "d1_insert_trig" for table "part_d_1_15" does not exist +DROP TRIGGER d15_delete_trig ON part_d_15_20; +ERROR: trigger "d15_delete_trig" for table "part_d_15_20" does not exist +DROP TRIGGER d15_update_trig ON part_d_15_20; +ERROR: trigger "d15_update_trig" for table "part_d_15_20" does not exist +DROP TRIGGER d15_insert_trig ON part_d_15_20; +ERROR: trigger "d15_insert_trig" for table "part_d_15_20" does not exist +-- Creating default partition for range +:init_range_parted; +create table part_def partition of range_parted default; +NOTICE: table has parent, setting distribution columns to match parent table +\d+ part_def + Table "public.part_def" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+--------------+------------- + a | text | | | | extended | | + b | bigint | | | | plain | | + c | numeric | | | | main | | + d | integer | | | | plain | | + e | character varying | | | | extended | | +Partition of: range_parted DEFAULT +Partition constraint: (NOT ((a IS NOT NULL) AND (b IS NOT NULL) AND (((a = 'a'::text) AND (b >= '1'::bigint) AND (b < '10'::bigint)) OR ((a = 'a'::text) AND (b >= '10'::bigint) AND (b < '20'::bigint)) OR ((a = 'b'::text) AND (b >= '1'::bigint) AND (b < '10'::bigint)) OR ((a = 'b'::text) AND (b >= '10'::bigint) AND (b < '20'::bigint)) OR ((a = 'b'::text) AND (b >= '20'::bigint) AND (b < '30'::bigint))))) +Distributed by: (a) + +insert into range_parted values ('c', 9, 0, 0, ''); +-- ok +update part_def set a = 'd' where a = 'c'; +-- fail +update part_def set a = 'a' where a = 'd'; +ERROR: new row for relation "part_def" violates partition constraint (seg2 127.0.0.1:7004 pid=32017) +DETAIL: Failing row contains (a, 9, 0, 0, ). +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 12 | 96 | 1 | e + part_c_1_100 | b | 13 | 97 | 2 | e + part_d_15_20 | b | 15 | 105 | 16 | e + part_d_15_20 | b | 17 | 105 | 19 | e + part_def | d | 9 | 0 | 0 | +(7 rows) + +-- Update row movement from non-default to default partition. +-- fail, default partition is not under part_a_10_a_20; +UPDATE part_a_10_a_20 set a = 'ad' WHERE a = 'a'; +ERROR: new row for relation "part_a_10_a_20" violates partition constraint (seg2 127.0.0.1:7004 pid=32017) +DETAIL: Failing row contains (ad, 10, 200, 1, e). +-- ok +-- UPDATE range_parted set a = 'ad' WHERE a = 'a'; +UPDATE range_parted set a = 'bd' WHERE a = 'b'; +:show_data; + partname | a | b | c | d | e +----------+----+----+-----+----+--- + part_def | ad | 1 | 1 | 1 | e + part_def | ad | 10 | 200 | 1 | e + part_def | bd | 12 | 96 | 1 | e + part_def | bd | 13 | 97 | 2 | e + part_def | bd | 15 | 105 | 16 | e + part_def | bd | 17 | 105 | 19 | e + part_def | d | 9 | 0 | 0 | +(7 rows) + +-- Update row movement from default to non-default partitions. +-- ok +UPDATE range_parted set a = 'a' WHERE a = 'ad'; +UPDATE range_parted set a = 'b' WHERE a = 'bd'; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | e + part_a_1_a_10 | a | 1 | 1 | 1 | e + part_c_1_100 | b | 12 | 96 | 1 | e + part_c_1_100 | b | 13 | 97 | 2 | e + part_d_15_20 | b | 15 | 105 | 16 | e + part_d_15_20 | b | 17 | 105 | 19 | e + part_def | d | 9 | 0 | 0 | +(7 rows) + +-- Cleanup: range_parted no longer needed. +DROP TABLE range_parted; +CREATE TABLE list_parted ( + a text, + b int +) PARTITION BY list (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE list_part1 PARTITION OF list_parted for VALUES in ('a', 'b'); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE list_default PARTITION OF list_parted default; +NOTICE: table has parent, setting distribution columns to match parent table +INSERT into list_part1 VALUES ('a', 1); +INSERT into list_default VALUES ('d', 10); +-- fail +UPDATE list_default set a = 'a' WHERE a = 'd'; +ERROR: new row for relation "list_default" violates partition constraint (seg2 127.0.0.1:7004 pid=32017) +DETAIL: Failing row contains (a, 10). +-- ok +UPDATE list_default set a = 'x' WHERE a = 'd'; +DROP TABLE list_parted; +-- Test retrieval of system columns with non-consistent partition row types. +-- This is only partially supported, as seen in the results. +-- start_ignore +-- create table utrtest (a int, b text) partition by list (a); +-- create table utr1 (a int check (a in (1)), q text, b text); +-- create table utr2 (a int check (a in (2)), b text); +-- alter table utr1 drop column q; +-- alter table utrtest attach partition utr1 for values in (1); +-- alter table utrtest attach partition utr2 for values in (2); +-- -- xmin_ok is likely false, xmin and pg_current_xact_id() comes from +-- -- data segment and master, respectively. +-- insert into utrtest values (1, 'foo') +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; +-- insert into utrtest values (2, 'bar') +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails +-- insert into utrtest values (2, 'bar') +-- returning *, tableoid::regclass; +-- update utrtest set b = b || b from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; +-- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails +-- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass; +-- delete from utrtest +-- returning *, tableoid::regclass, xmax = pg_current_xact_id()::xid as xmax_ok; +-- drop table utrtest; +-- end_ignore +-------------- +-- Some more update-partition-key test scenarios below. This time use list +-- partitions. +-------------- +-- Setup for list partitions +CREATE TABLE list_parted (a numeric, b int, c int8) PARTITION BY list (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE sub_parted PARTITION OF list_parted for VALUES in (1) PARTITION BY list (b); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE sub_part1(b int, c int8, a numeric); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +alter table sub_part1 set distributed by (a); -- GPDB: distribution policy must match the parent table. +ALTER TABLE sub_parted ATTACH PARTITION sub_part1 for VALUES in (1); +CREATE TABLE sub_part2(b int, c int8, a numeric); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +alter table sub_part2 set distributed by (a); -- GPDB: distribution policy must match the parent table. +ALTER TABLE sub_parted ATTACH PARTITION sub_part2 for VALUES in (2); +CREATE TABLE list_part1(a numeric, b int, c int8); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +ALTER TABLE list_parted ATTACH PARTITION list_part1 for VALUES in (2,3); +INSERT into list_parted VALUES (2,5,50); +INSERT into list_parted VALUES (3,6,60); +INSERT into sub_parted VALUES (1,1,60); +INSERT into sub_parted VALUES (1,2,10); +-- Test partition constraint violation when intermediate ancestor is used and +-- constraint is inherited from upper root. +UPDATE sub_parted set a = 2 WHERE c = 10; +ERROR: new row for relation "sub_parted" violates partition constraint (seg1 127.0.0.1:7003 pid=32016) +DETAIL: Failing row contains (2, 2, 10). +-- Test update-partition-key, where the unpruned partitions do not have their +-- partition keys updated. +SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1; + tableoid | a | b | c +------------+---+---+---- + list_part1 | 2 | 5 | 50 +(1 row) + +UPDATE list_parted set b = c + a WHERE a = 2; +SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1; + tableoid | a | b | c +------------+---+----+---- + list_part1 | 2 | 52 | 50 +(1 row) + +-- Test the case where BR UPDATE triggers change the partition key. +-- CREATE FUNCTION func_parted_mod_b() returns trigger as $$ +-- BEGIN +-- NEW.b = 2; -- This is changing partition key column. +-- return NEW; +-- END $$ LANGUAGE plpgsql; +-- CREATE TRIGGER parted_mod_b before update on sub_part1 +-- for each row execute procedure func_parted_mod_b(); +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- -- This should do the tuple routing even though there is no explicit +-- -- partition-key update, because there is a trigger on sub_part1. +-- UPDATE list_parted set c = 70 WHERE b = 1; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- DROP TRIGGER parted_mod_b ON sub_part1; +-- -- If BR DELETE trigger prevented DELETE from happening, we should also skip +-- -- the INSERT if that delete is part of UPDATE=>DELETE+INSERT. +-- CREATE OR REPLACE FUNCTION func_parted_mod_b() returns trigger as $$ +-- BEGIN +-- raise notice 'Trigger: Got OLD row %, but returning NULL', OLD; +-- return NULL; +-- END $$ LANGUAGE plpgsql; +-- CREATE TRIGGER trig_skip_delete before delete on sub_part2 +-- for each row execute procedure func_parted_mod_b(); +-- UPDATE list_parted set b = 1 WHERE c = 70; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- -- Drop the trigger. Now the row should be moved. +-- DROP TRIGGER trig_skip_delete ON sub_part2; +-- UPDATE list_parted set b = 1 WHERE c = 70; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- DROP FUNCTION func_parted_mod_b(); +-- UPDATE partition-key with FROM clause. If join produces multiple output +-- rows for the same row to be modified, we should tuple-route the row only +-- once. There should not be any rows inserted. +CREATE TABLE non_parted (id int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT into non_parted VALUES (1), (1), (1), (2), (2), (2), (3), (3), (3); +UPDATE list_parted t1 set a = 2 FROM non_parted t2 WHERE t1.a = t2.id and a = 1; +ERROR: multiple updates to a row by the same query is not allowed (seg2 127.0.0.1:7004 pid=25968) +-- In GPDB, the above UPDATE fails because the distribution key is updated, and +-- the Split Update codepath isn't smart enough to handle this situation. With +-- a non-Split Update, it works: +-- ALTER TABLE list_parted SET DISTRIBUTED BY (c); +UPDATE list_parted t1 set a = 2 FROM non_parted t2 WHERE t1.a = t2.id and a = 1; +ERROR: multiple updates to a row by the same query is not allowed (seg2 127.0.0.1:7004 pid=25968) +SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; + tableoid | a | b | c +------------+---+----+---- + list_part1 | 2 | 52 | 50 + list_part1 | 3 | 6 | 60 + sub_part1 | 1 | 1 | 60 + sub_part2 | 1 | 2 | 10 +(4 rows) + +DROP TABLE non_parted; +-- Cleanup: list_parted no longer needed. +DROP TABLE list_parted; +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); +create table hash_parted ( + a int, + b int +) partition by hash (a custom_opclass, b custom_opclass); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1); +NOTICE: table has parent, setting distribution columns to match parent table +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +NOTICE: table has parent, setting distribution columns to match parent table +create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0); +NOTICE: table has parent, setting distribution columns to match parent table +create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4); +NOTICE: table has parent, setting distribution columns to match parent table +insert into hpart1 values (1, 1); +insert into hpart2 values (2, 5); +insert into hpart4 values (3, 4); +-- fail +update hpart1 set a = 3, b=4 where a = 1; +ERROR: new row for relation "hpart1" violates partition constraint (seg0 127.0.0.1:7002 pid=32015) +DETAIL: Failing row contains (3, 4). +-- ok, row movement +update hash_parted set b = b - 1 where b = 1; +-- ok +update hash_parted set b = b + 8 where b = 1; +-- cleanup +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); +-- end_ignore diff --git a/contrib/pax_storage/src/data/expected/update_gp.out b/contrib/pax_storage/src/data/expected/update_gp.out new file mode 100644 index 00000000000..ba6ca2bc930 --- /dev/null +++ b/contrib/pax_storage/src/data/expected/update_gp.out @@ -0,0 +1,763 @@ +-- Test DELETE and UPDATE on an inherited table. +-- The special aspect of this table is that the inherited table has +-- a different distribution key. 'p' table's distribution key matches +-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion +-- node correctly for p2. +set default_table_access_method = 'pax'; +create table todelete (a int) distributed by (a); +create table parent (a int, b int, c int) distributed by (a); +create table child (a int, b int, c int) inherits (parent) distributed by (b); +NOTICE: merging column "a" with inherited definition +NOTICE: merging column "b" with inherited definition +NOTICE: merging column "c" with inherited definition +insert into parent select g, g, g from generate_series(1,5) g; +insert into child select g, g, g from generate_series(6,10) g; +insert into todelete select generate_series(3,4); +delete from parent using todelete where parent.a = todelete.a; +insert into todelete select generate_series(5,7); +update parent set c=c+100 from todelete where parent.a = todelete.a; +select * from parent; + a | b | c +----+----+----- + 5 | 5 | 105 + 9 | 9 | 9 + 10 | 10 | 10 + 6 | 6 | 106 + 2 | 2 | 2 + 8 | 8 | 8 + 7 | 7 | 107 + 1 | 1 | 1 +(8 rows) + +drop table todelete; +drop table child; +drop table parent; +-- This is similar to the above, but with a partitioned table (which is +-- implemented by inheritance) rather than an explicitly inherited table. +-- The scans on some of the partitions degenerate into Result nodes with +-- False one-time filter, which don't need a Motion node. +create table todelete (a int, b int) distributed by (a); +create table target (a int, b int, c int) + distributed by (a) + partition by range (c) (start(1) end(5) every(1), default partition extra); +insert into todelete select g, g % 4 from generate_series(1, 10) g; +insert into target select g, 0, 3 from generate_series(1, 5) g; +insert into target select g, 0, 1 from generate_series(1, 5) g; +delete from target where c = 3 and a in (select b from todelete); +insert into todelete values (1, 5); +update target set b=target.b+100 where c = 3 and a in (select b from todelete); +select * from target; + a | b | c +---+-----+--- + 5 | 0 | 1 + 5 | 100 | 3 + 1 | 0 | 1 + 2 | 0 | 1 + 3 | 0 | 1 + 4 | 0 | 1 + 4 | 0 | 3 +(7 rows) + +-- Also test an update with a qual that doesn't match any partition. The +-- Append degenerates into a dummy Result with false One-Time Filter. +alter table target drop default partition; +update target set b = 10 where c = 10; +drop table todelete; +drop table target; +-- +-- Test updated on inheritance parent table, where some child tables need a +-- Split Update, but not all. +-- +create table base_tbl (a int4, b int4) distributed by (a); +create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a); +NOTICE: merging column "a" with inherited definition +NOTICE: merging column "b" with inherited definition +create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b); +NOTICE: merging column "a" with inherited definition +NOTICE: merging column "b" with inherited definition +insert into base_tbl select g, g from generate_series(1, 5) g; +-- start_ignore +explain (costs off) update base_tbl set a=a+1; +ERROR: can't split update for inherit table: base_tbl (preptlist.c:138) +-- end_ignore +update base_tbl set a = 5; +ERROR: can't split update for inherit table: base_tbl (preptlist.c:138) +-- +-- Explicit Distribution motion must be added if any of the child nodes +-- contains any motion excluding the motions in initplans. +-- These test cases and expectation are applicable for GPDB planner not for ORCA. +-- +SET gp_autostats_mode = NONE; +CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo1 VALUES ('1', '1', '1'); +CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo2 VALUES ('1'); +CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per); +INSERT INTO keo3 VALUES ('1', '1'); +CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo4 VALUES ('1', '1'); +-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA) +-- start_ignore +EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM + ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b + ON b.projects_pk=a.user_vie_project_code_pk + WHERE a.user_vie_fiscal_year_period_sk = + (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = + (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = + (SELECT min (keo4.keo_para_budget_date) FROM keo4))) + ) t1 +WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Update on keo1 + InitPlan 3 (returns $2) (slice4) + -> Finalize Aggregate + InitPlan 2 (returns $1) (slice6) + -> Gather Motion 3:1 (slice7; segments: 3) + InitPlan 1 (returns $0) (slice8) + -> Finalize Aggregate + -> Gather Motion 3:1 (slice9; segments: 3) + -> Partial Aggregate + -> Seq Scan on keo4 + -> Seq Scan on keo4 keo4_1 + Filter: ((keo_para_budget_date)::text = $0) + -> Gather Motion 3:1 (slice5; segments: 3) + -> Partial Aggregate + -> Seq Scan on keo3 + Filter: ((bky_per)::text = ($1)::text) + -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((b.projects_pk)::text = (a.user_vie_project_code_pk)::text) + -> Seq Scan on keo2 b + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: ((keo1.user_vie_project_code_pk)::text = (a.user_vie_project_code_pk)::text) + -> Seq Scan on keo1 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on keo1 a + Filter: ((user_vie_fiscal_year_period_sk)::text = $2) + Optimizer: Postgres query optimizer +(30 rows) + +-- end_ignore +UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM + ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b + ON b.projects_pk=a.user_vie_project_code_pk + WHERE a.user_vie_fiscal_year_period_sk = + (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = + (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = + (SELECT min (keo4.keo_para_budget_date) FROM keo4))) + ) t1 +WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; +SELECT user_vie_act_cntr_marg_cum FROM keo1; + user_vie_act_cntr_marg_cum +---------------------------- + 234.682 +(1 row) + +-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA) +CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x); +INSERT INTO keo5 VALUES (1,1); +-- start_ignore +EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); + QUERY PLAN +------------------------------------------------------- + Delete on keo5 + InitPlan 1 (returns $0) (slice1) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on keo5 keo5_2 + Filter: (x < 2) + -> Result + One-Time Filter: $0 + -> Hash Join + Hash Cond: (keo5.x = keo5_1.x) + -> Seq Scan on keo5 + -> Hash + -> HashAggregate + Group Key: keo5_1.x + -> Seq Scan on keo5 keo5_1 + Optimizer: Postgres query optimizer +(15 rows) + +-- end_ignore +DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); +SELECT x FROM keo5; + x +--- +(0 rows) + +RESET gp_autostats_mode; +DROP TABLE keo1; +DROP TABLE keo2; +DROP TABLE keo3; +DROP TABLE keo4; +DROP TABLE keo5; +-- start_ignore +-- -- text types. We should support the following updates. +-- -- +-- CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a); +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; +-- CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a); +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; +-- CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a); +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; +-- CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a); +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; +-- DROP TABLE IF EXISTS update_distr_key; +-- CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a); +-- INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i; +-- UPDATE update_distr_key SET a = 5 WHERE b = 10; +-- SELECT * from update_distr_key; +-- DROP TABLE update_distr_key; +-- end_ignore +-- below cases is to test multi-hash-cols +CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3); +CREATE TABLE tab5(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3, c4, c5); +INSERT INTO tab3 SELECT i, i, i, i, i FROM generate_series(1, 10)i; +INSERT INTO tab5 SELECT i, i, i, i, i FROM generate_series(1, 10)i; +-- test tab3 +SELECT gp_segment_id, * FROM tab3; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 0 | 5 | 5 | 5 | 5 | 5 + 0 | 8 | 8 | 8 | 8 | 8 + 2 | 1 | 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 | 2 | 2 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 4 | 4 | 4 | 4 | 4 + 1 | 9 | 9 | 9 | 9 | 9 + 1 | 10 | 10 | 10 | 10 | 10 +(10 rows) + +UPDATE tab3 set c1 = 9 where c4 = 1; +SELECT gp_segment_id, * FROM tab3; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 4 | 4 | 4 | 4 | 4 + 1 | 9 | 9 | 9 | 9 | 9 + 1 | 10 | 10 | 10 | 10 | 10 + 2 | 2 | 2 | 2 | 2 | 2 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 0 | 5 | 5 | 5 | 5 | 5 + 0 | 8 | 8 | 8 | 8 | 8 + 0 | 9 | 1 | 1 | 1 | 1 +(10 rows) + +UPDATE tab3 set (c1,c2) = (5,6) where c4 = 1; +SELECT gp_segment_id, * FROM tab3; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 4 | 4 | 4 | 4 | 4 + 1 | 9 | 9 | 9 | 9 | 9 + 1 | 10 | 10 | 10 | 10 | 10 + 2 | 2 | 2 | 2 | 2 | 2 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 5 | 6 | 1 | 1 | 1 + 0 | 5 | 5 | 5 | 5 | 5 + 0 | 8 | 8 | 8 | 8 | 8 +(10 rows) + +UPDATE tab3 set (c1,c2,c3) = (3,2,1) where c4 = 1; +SELECT gp_segment_id, * FROM tab3; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 0 | 5 | 5 | 5 | 5 | 5 + 0 | 8 | 8 | 8 | 8 | 8 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 4 | 4 | 4 | 4 | 4 + 1 | 9 | 9 | 9 | 9 | 9 + 1 | 10 | 10 | 10 | 10 | 10 + 2 | 2 | 2 | 2 | 2 | 2 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 3 | 2 | 1 | 1 | 1 +(10 rows) + +UPDATE tab3 set c1 = 11 where c2 = 10 and c2 < 1; +SELECT gp_segment_id, * FROM tab3; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 4 | 4 | 4 | 4 | 4 + 1 | 9 | 9 | 9 | 9 | 9 + 1 | 10 | 10 | 10 | 10 | 10 + 0 | 5 | 5 | 5 | 5 | 5 + 0 | 8 | 8 | 8 | 8 | 8 + 2 | 2 | 2 | 2 | 2 | 2 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 3 | 2 | 1 | 1 | 1 +(10 rows) + +-- test tab5 +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 + 1 | 1 | 1 | 1 | 1 | 1 + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 +(10 rows) + +UPDATE tab5 set c1 = 1000 where c4 = 1; +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+------+----+----+----+---- + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 1 | 1000 | 1 | 1 | 1 | 1 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 +(10 rows) + +UPDATE tab5 set (c1,c2) = (9,10) where c4 = 1; +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 + 0 | 9 | 10 | 1 | 1 | 1 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 +(10 rows) + +UPDATE tab5 set (c1,c2,c4) = (5,8,6) where c4 = 1; +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 + 0 | 5 | 8 | 1 | 6 | 1 +(10 rows) + +UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1; +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 1 | 1 | 2 | 3 | 0 | 6 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 +(10 rows) + +UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1; +SELECT gp_segment_id, * FROM tab5; + gp_segment_id | c1 | c2 | c3 | c4 | c5 +---------------+----+----+----+----+---- + 1 | 2 | 2 | 2 | 2 | 2 + 1 | 3 | 3 | 3 | 3 | 3 + 1 | 5 | 5 | 5 | 5 | 5 + 1 | 1 | 2 | 3 | 0 | 6 + 2 | 6 | 6 | 6 | 6 | 6 + 2 | 7 | 7 | 7 | 7 | 7 + 2 | 8 | 8 | 8 | 8 | 8 + 0 | 4 | 4 | 4 | 4 | 4 + 0 | 9 | 9 | 9 | 9 | 9 + 0 | 10 | 10 | 10 | 10 | 10 +(10 rows) + +-- start_ignore +EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1; + QUERY PLAN +--------------------------------------------------------------- + Update on tab3 + -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) + -> Split + -> Seq Scan on tab3 + Optimizer: Postgres query optimizer +(5 rows) + +-- end_ignore +-- clean up +drop table tab3; +drop table tab5; +-- Update distribution key +-- start_ignore +drop table if exists r; +NOTICE: table "r" does not exist, skipping +drop table if exists s; +NOTICE: table "s" does not exist, skipping +drop table if exists update_dist; +NOTICE: table "update_dist" does not exist, skipping +drop table if exists update_ao_table; +NOTICE: table "update_ao_table" does not exist, skipping +drop table if exists update_aoco_table; +NOTICE: table "update_aoco_table" does not exist, skipping +-- end_ignore +-- Update normal table distribution key +create table update_dist(a int) distributed by (a); +insert into update_dist values(1); +update update_dist set a=0 where a=1; +select * from update_dist; + a +--- + 0 +(1 row) + +-- Update distribution key with join +create table r (a int, b int) distributed by (a); +create table s (a int, b int) distributed by (a); +insert into r select generate_series(1, 5), generate_series(1, 5) * 2; +insert into s select generate_series(1, 5), generate_series(1, 5) * 2; +select * from r; + a | b +---+---- + 1 | 2 + 5 | 10 + 2 | 4 + 3 | 6 + 4 | 8 +(5 rows) + +select * from s; + a | b +---+---- + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 + 1 | 2 +(5 rows) + +update r set a = r.a + 1 from s where r.a = s.a; +select * from r; + a | b +---+---- + 3 | 4 + 4 | 6 + 2 | 2 + 5 | 8 + 6 | 10 +(5 rows) + +update r set a = r.a + 1 where a in (select a from s); +select * from r; + a | b +---+---- + 4 | 4 + 3 | 2 + 6 | 10 + 5 | 6 + 6 | 8 +(5 rows) + +-- Update redistribution +delete from r; +delete from s; +insert into r select generate_series(1, 5), generate_series(1, 5); +insert into s select generate_series(1, 5), generate_series(1, 5) * 2; +select * from r; + a | b +---+--- + 5 | 5 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 +(5 rows) + +select * from s; + a | b +---+---- + 1 | 2 + 5 | 10 + 2 | 4 + 3 | 6 + 4 | 8 +(5 rows) + +update r set a = r.a + 1 from s where r.b = s.b; +select * from r; + a | b +---+--- + 3 | 3 + 3 | 2 + 1 | 1 + 5 | 5 + 5 | 4 +(5 rows) + +update r set a = r.a + 1 where b in (select b from s); +select * from r; + a | b +---+--- + 1 | 1 + 5 | 5 + 6 | 4 + 3 | 3 + 4 | 2 +(5 rows) + +-- Update hash aggreate group by +delete from r; +delete from s; +insert into r select generate_series(1, 5), generate_series(1, 5) * 2; +insert into s select generate_series(1, 5), generate_series(1, 5); +select * from r; + a | b +---+---- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +select * from s; + a | b +---+--- + 2 | 2 + 3 | 3 + 4 | 4 + 1 | 1 + 5 | 5 +(5 rows) + +update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b); +select * from s; + a | b +---+--- + 5 | 5 + 5 | 4 + 1 | 1 + 3 | 3 + 3 | 2 +(5 rows) + +-- start_ignore +-- Update ao table distribution key +-- create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a); +-- insert into update_ao_table select g, g from generate_series(1, 5) g; +-- select * from update_ao_table; +-- update update_ao_table set a = a + 1 where b = 3; +-- select * from update_ao_table; +-- Update aoco table distribution key +-- create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a); +-- insert into update_aoco_table select g,g from generate_series(1, 5) g; +-- select * from update_aoco_table; +-- update update_aoco_table set a = a + 1 where b = 3; +-- select * from update_aoco_table; +-- end_ignore +-- Update prepare +delete from s; +insert into s select generate_series(1, 5), generate_series(1, 5); +select * from r; + a | b +---+---- + 2 | 4 + 3 | 6 + 4 | 8 + 1 | 2 + 5 | 10 +(5 rows) + +select * from s; + a | b +---+--- + 1 | 1 + 5 | 5 + 2 | 2 + 3 | 3 + 4 | 4 +(5 rows) + +prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b); +execute update_s(10); +select * from s; + a | b +----+--- + 3 | 3 + 1 | 1 + 12 | 2 + 5 | 5 + 14 | 4 +(5 rows) + +-- Confirm that a split update is not created for a table excluded by +-- constraints in the planner. +create table nosplitupdate (a int) distributed by (a); +-- start_ignore +explain update nosplitupdate set a=0 where a=1 and a<1; + QUERY PLAN +----------------------------------------------------------- + Update on nosplitupdate (cost=0.00..0.01 rows=0 width=0) + -> Result (cost=0.00..0.00 rows=0 width=46) + One-Time Filter: false + Optimizer: Postgres query optimizer +(4 rows) + +-- end_ignore +-- test split-update when split-node's flow is entry +create table tsplit_entry (c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into tsplit_entry values (1), (2); +analyze tsplit_entry; +-- start_ignore +explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Update on tsplit_entry (cost=10000000001.01..10000000002.17 rows=0 width=0) + -> Explicit Redistribute Motion 1:3 (slice1) (cost=10000000001.01..10000000002.17 rows=2 width=74) + -> Split (cost=10000000001.01..10000000002.08 rows=7 width=74) + -> Nested Loop (cost=10000000001.01..10000000002.08 rows=3 width=74) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=38) + -> Seq Scan on tsplit_entry (cost=0.00..1.01 rows=1 width=38) + -> Materialize (cost=1.01..1.04 rows=1 width=40) + -> Subquery Scan on s (cost=1.01..1.03 rows=1 width=40) + -> Aggregate (cost=1.01..1.02 rows=1 width=8) + -> Seq Scan on gp_segment_configuration (cost=0.00..1.01 rows=1 width=0) + Optimizer: Postgres query optimizer +(11 rows) + +-- end_ignore +update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; +-- start_ignore +-- CREATE TABLE update_gp_foo ( +-- a_dist int, +-- b int, +-- c_part int, +-- d int +-- ) +-- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part) +-- ( +-- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false) +-- ); +-- CREATE TABLE update_gp_foo1 ( +-- a_dist int, +-- b int, +-- c_part int, +-- d int +-- ) +-- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part) +-- ( +-- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false) +-- ); +-- INSERT INTO update_gp_foo VALUES (12, 40, 1, 50); +-- INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50); +-- UPDATE update_gp_foo +-- SET b = update_gp_foo.c_part, +-- d = update_gp_foo1.a_dist +-- FROM update_gp_foo1; +-- SELECT * from update_gp_foo; +-- end_ignore +-- Test insert on conflict do update +-- Insert on conflict do update is an insert statement but might +-- invoke ExecUpdate on segments, but updating distkeys of a table +-- may lead to wrong data distribution. We will check this before +-- planning, if a `insert on conflict do update` statement set the +-- dist keys of the table, it will raise an error. +-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444 +-- start_ignore +create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a); +create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b); +ERROR: not supported on pax relations: IndexBuildRangeScan +-- the following statement should error out because the on conflict update want to +-- modify the tuple's distkey which might lead to wrong data distribution +insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1; +ERROR: modification of distribution columns in OnConflictUpdate is not supported +drop index uidx_t_insert_on_conflict_update_distkey; +ERROR: index "uidx_t_insert_on_conflict_update_distkey" does not exist +drop table t_insert_on_conflict_update_distkey; +-- randomly distributed table cannot add unique constrain, so next we test replicated table +create table t_insert_on_conflict_update_distkey(a int, b int) distributed replicated; +create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b); +ERROR: not supported on pax relations: IndexBuildRangeScan +-- the following statement should succeed because replicated table does not contain distkey +insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1; +ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification +-- end_ignore +-- Some tests on a partitioned table. +CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a); +CREATE TABLE update_gp_rangep_1_to_10 PARTITION OF update_gp_rangep FOR VALUES FROM (1) TO (10); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE update_gp_rangep_10_to_20 PARTITION OF update_gp_rangep FOR VALUES FROM (10) TO (20); +NOTICE: table has parent, setting distribution columns to match parent table +INSERT INTO update_gp_rangep SELECT g, g, g FROM generate_series(1, 4) g; +-- Simple case: Same partition, same node. +UPDATE update_gp_rangep SET a = 9 WHERE a = 1; +-- Distribution key update, same partition. +UPDATE update_gp_rangep SET b = 1 WHERE a = 2; +-- Move row to different partition, but no change in distribution key +UPDATE update_gp_rangep SET a = 10 WHERE a = 3; +-- Move row to different partition and also change distribution key +UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4; +-- start_ignore +SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a; + tableoid | a | b | orig_a +---------------------------+----+---+-------- + update_gp_rangep_1_to_10 | 9 | 1 | 1 + update_gp_rangep_1_to_10 | 2 | 1 | 2 + update_gp_rangep_10_to_20 | 11 | 1 | 4 +(3 rows) + +-- end_ignore +-- Also do a lookup with specific distribution key. If the rows were not +-- correctly moved across segments, this would fail to find them, assuming +-- that direct dispatch is effective. +SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; + tableoid | a | b | orig_a +---------------------------+----+---+-------- + update_gp_rangep_1_to_10 | 9 | 1 | 1 + update_gp_rangep_1_to_10 | 2 | 1 | 2 + update_gp_rangep_10_to_20 | 11 | 1 | 4 +(3 rows) + +-- start_ignore +drop table r; +drop table s; +drop table update_dist; +drop table update_ao_table; +ERROR: table "update_ao_table" does not exist +drop table update_aoco_table; +ERROR: table "update_aoco_table" does not exist +drop table nosplitupdate; +drop table tsplit_entry; +-- end_ignore diff --git a/contrib/pax_storage/src/data/pax-cdbinit--1.0.sql b/contrib/pax_storage/src/data/pax-cdbinit--1.0.sql new file mode 100644 index 00000000000..c563aa4f73b --- /dev/null +++ b/contrib/pax_storage/src/data/pax-cdbinit--1.0.sql @@ -0,0 +1,7 @@ +-- insert pax catalog values +INSERT INTO pg_proc VALUES(7600,'pax_tableam_handler',11,10,13,1,0,0,0,'f','f','f','t','f','s','u',1,0,269,'2281',null,null,null,null,null,'pax_tableam_handler','$libdir/pax',null,null,null,'n','a'); +INSERT INTO pg_am VALUES(7014,'pax',7600,'t'); +COMMENT ON FUNCTION pax_tableam_handler IS 'column-optimized PAX table access method handler'; +INSERT INTO pg_proc VALUES(7601,'paxauxstats_in',11,10,13,1,0,0,0,'f','f','f','t','f','i','u',1,0,7603,'2275',null,null,null,null,null,'MicroPartitionStatsInput','$libdir/pax',null,null,null,'n','a'); +INSERT INTO pg_proc VALUES(7602,'paxauxstats_out',11,10,13,1,0,0,0,'f','f','f','t','f','i','u',1,0,2275,'7603',null,null,null,null,null,'MicroPartitionStatsOutput','$libdir/pax',null,null,null,'n','a'); +INSERT INTO pg_type VALUES(7603,'paxauxstats',11,10,-1,'f','b','U','f','t',',',0,0,0,0,7601,7602,0,0,0,0,0,'i','x','t',0,-1,0,0,null,null,null); diff --git a/contrib/pax_storage/src/data/sql/ddl.sql b/contrib/pax_storage/src/data/sql/ddl.sql new file mode 100644 index 00000000000..66a6c239d1a --- /dev/null +++ b/contrib/pax_storage/src/data/sql/ddl.sql @@ -0,0 +1,28 @@ +-- start_ignore +create extension pax; +drop table if exists users; +-- end_ignore +create table users( + id int , + name text not null, + height float not null, + decimal_col decimal(10, 2) not null, + created_at timestamp with time zone not null, + updated_at timestamp with time zone not null +) using pax distributed BY (id); + +insert into users (id, name, height, decimal_col, created_at, updated_at) values + (1, 'Alice', 1.65, 1.23, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'), + (2, 'Bob', 1.75, 2.34, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'), + (3, 'Carol', 1.85, 3.45, '2023-05-17 17:56:49.633664+08', '2023-05-17 17:56:49.633664+08'); +select * from users; + +DELETE FROM users WHERE id = 1; +select * from users; + +UPDATE users SET name = 'Alice' WHERE id = 2; +select * from users; + +UPDATE users SET height = (select max(height) from users),decimal_col = (select min(decimal_col) from users); +select * from users; + diff --git a/contrib/pax_storage/src/data/sql/join.sql b/contrib/pax_storage/src/data/sql/join.sql new file mode 100644 index 00000000000..8b28bd7cb92 --- /dev/null +++ b/contrib/pax_storage/src/data/sql/join.sql @@ -0,0 +1,10 @@ +-- start_ignore +create extension pax; +drop table if exists t1; +-- end_ignore +create table t1(v int) using pax distributed by(v); +insert into t1 select generate_series(1,10); +select * from t1 order by v; +update t1 set v=(select max(v) from t1) where v <= 5; +select * from t1 order by v; +select * from t1 as a join t1 as b on a.v=b.v where a.v<10; diff --git a/contrib/pax_storage/src/data/sql/setup.sql b/contrib/pax_storage/src/data/sql/setup.sql new file mode 100644 index 00000000000..2ba583b79bd --- /dev/null +++ b/contrib/pax_storage/src/data/sql/setup.sql @@ -0,0 +1,3 @@ +-- start_ignore +create EXTENSION if not exists pax; +-- end_ignore \ No newline at end of file diff --git a/contrib/pax_storage/src/data/sql/teardown.sql b/contrib/pax_storage/src/data/sql/teardown.sql new file mode 100644 index 00000000000..e69de29bb2d diff --git a/contrib/pax_storage/src/data/sql/types.sql b/contrib/pax_storage/src/data/sql/types.sql new file mode 100644 index 00000000000..6eb3e09f895 --- /dev/null +++ b/contrib/pax_storage/src/data/sql/types.sql @@ -0,0 +1,52 @@ +-- start_ignore +create extension pax; +drop table if exists all_typbyval_pg_types; +-- end_ignore + +CREATE TABLE all_typbyval_pg_types ( + id int, + bool_col bool, + char_col char, + int2_col int2, + cid_col cid, + + float4_col float4, + int4_col int4, + date_col date, + + oid_col oid, + -- åxid_col xid, + time_stamp_col timestamp, + int8_col int8, + -- xid8_col xid8, + float8_col float8, + money_col money, + time_col time, + timestamptz_col timestamptz, + pg_lsn_col pg_lsn +) USING pax distributed by (id); + +insert into all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'); +select * from all_typbyval_pg_types; + +-- start_ignore +drop table if exists all_typlen_lt_0_pg_type; +-- end_ignore + +create table all_typlen_lt_0_pg_type ( + id int, + name_col name, + numeric_col numeric, + text_col text, + varchar_col varchar(128), + point_col point +) USING pax distributed by (id); + +insert into all_typlen_lt_0_pg_type values(1,'hello', 1.23, 'text', 'varchar', point(1,2)); +select * from all_typlen_lt_0_pg_type; + +-- start_ignore +drop table if exists all_typbyval_pg_types; +-- end_ignore \ No newline at end of file diff --git a/contrib/pax_storage/src/data/sql/update.sql b/contrib/pax_storage/src/data/sql/update.sql new file mode 100644 index 00000000000..a305246765a --- /dev/null +++ b/contrib/pax_storage/src/data/sql/update.sql @@ -0,0 +1,703 @@ +-- start_ignore +create extension pax; +drop table if exists update_test; +drop table if exists upsert_test; +-- end_ignore +set default_table_access_method = 'pax'; +CREATE TABLE update_test ( + a INT DEFAULT 10, + b INT, + c TEXT +) using pax; + +CREATE TABLE upsert_test ( + a INT , + b TEXT +) using pax; + +INSERT INTO update_test VALUES (5, 10, 'foo'); +INSERT INTO update_test(b, a, c) VALUES (15, 10, ''); + +SELECT a,b,c FROM update_test ORDER BY a,b,c; + +UPDATE update_test SET a = DEFAULT, b = 0; + +SELECT a,b,c FROM update_test ORDER BY a,b,c; + +-- aliases for the UPDATE target table +UPDATE update_test AS t SET b = 10 WHERE t.a = 10; + +SELECT a,b,c FROM update_test ORDER BY a,b,c; + +UPDATE update_test t SET b = t.b + 10 WHERE t.a = 10; + +SELECT a,b,c FROM update_test ORDER BY a,b,c; + +-- +-- Test VALUES in FROM +-- + +UPDATE update_test SET a=v.i FROM (VALUES(100, 20)) AS v(i, j) + WHERE update_test.b = v.j; + +SELECT a,b,c FROM update_test ORDER BY a,b,c; + +-- fail, wrong data type: +UPDATE update_test SET a = v.* FROM (VALUES(100, 20)) AS v(i, j) + WHERE update_test.b = v.j; + +-- +-- Test multiple-set-clause syntax +-- + +INSERT INTO update_test SELECT a,b+1,c FROM update_test; +SELECT * FROM update_test; + +UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo'; +SELECT a,b,c FROM update_test ORDER BY a,b,c; +UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10; +SELECT a,b,c FROM update_test ORDER BY a,b,c; +-- fail, multi assignment to same column: +UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10; + +-- uncorrelated sub-select: +UPDATE update_test + SET (b,a) = (select a,b from update_test where b = 41 and c = 'car') + WHERE a = 100 AND b = 20; +SELECT * FROM update_test; +-- correlated sub-select: +UPDATE update_test o + SET (b,a) = (select a+1,b from update_test i + where i.a=o.a and i.b=o.b and i.c is not distinct from o.c); +SELECT * FROM update_test; + +-- fail, multiple rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test); +-- set to null if no rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000) + WHERE a = 11; +SELECT * FROM update_test; +-- *-expansion should work in this context: +UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j) + WHERE update_test.a = v.i; +-- you might expect this to work, but syntactically it's not a RowExpr: +UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j) + WHERE update_test.a = v.i; + +-- if an alias for the target table is specified, don't allow references +-- to the original table name +UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10; + +-- Make sure that we can update to a TOASTed value. +UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car'; +SELECT a, b, char_length(c) FROM update_test; + +-- Check multi-assignment with a Result node to handle a one-time filter. +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE update_test t + SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a) + WHERE CURRENT_USER = SESSION_USER; +UPDATE update_test t + SET (a, b) = (SELECT b, a FROM update_test s WHERE s.a = t.a) + WHERE CURRENT_USER = SESSION_USER; +SELECT a, b, char_length(c) FROM update_test; + +-- start_ignore +-- Test ON CONFLICT DO UPDATE + +-- skip, not support primary key, can't test +set default_table_access_method = 'pax'; +CREATE TABLE upsert_test ( + a INT PRIMARY KEY, + b TEXT +) using pax; + +-- INSERT INTO upsert_test VALUES(1, 'Boo'), (3, 'Zoo'); +-- -- uncorrelated sub-select: +-- WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test +-- VALUES (1, 'Bar') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *; +-- -- correlated sub-select: +-- INSERT INTO upsert_test VALUES (1, 'Baz'), (3, 'Zaz') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a) +-- RETURNING *; +-- -- correlated sub-select (EXCLUDED.* alias): +-- INSERT INTO upsert_test VALUES (1, 'Bat'), (3, 'Zot') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING *; + +-- -- ON CONFLICT using system attributes in RETURNING, testing both the +-- -- inserting and updating paths. See bug report at: +-- -- https://www.postgresql.org/message-id/73436355-6432-49B1-92ED-1FE4F7E7E100%40finefun.com.au +-- INSERT INTO upsert_test VALUES (2, 'Beeble') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = 0 AS xmax_correct; +-- -- currently xmax is set after a conflict - that's probably not good, +-- -- but it seems worthwhile to have to be explicit if that changes. +-- INSERT INTO upsert_test VALUES (2, 'Brox') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a) +-- RETURNING tableoid::regclass, xmin = pg_current_xact_id()::xid AS xmin_correct, xmax = pg_current_xact_id()::xid AS xmax_correct; + +-- DROP TABLE update_test; +-- DROP TABLE upsert_test; + +-- -- Test ON CONFLICT DO UPDATE with partitioned table and non-identical children + +-- CREATE TABLE upsert_test ( +-- a INT PRIMARY KEY, +-- b TEXT +-- ) PARTITION BY LIST (a); + +-- CREATE TABLE upsert_test_1 PARTITION OF upsert_test FOR VALUES IN (1); +-- CREATE TABLE upsert_test_2 (b TEXT, a INT PRIMARY KEY); +-- ALTER TABLE upsert_test ATTACH PARTITION upsert_test_2 FOR VALUES IN (2); + +-- INSERT INTO upsert_test VALUES(1, 'Boo'), (2, 'Zoo'); +-- -- uncorrelated sub-select: +-- WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test +-- VALUES (1, 'Bar') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *; +-- -- correlated sub-select: +-- WITH aaa AS (SELECT 1 AS ctea, ' Foo' AS cteb) INSERT INTO upsert_test +-- VALUES (1, 'Bar'), (2, 'Baz') ON CONFLICT(a) +-- DO UPDATE SET (b, a) = (SELECT upsert_test.b||cteb, upsert_test.a FROM aaa) RETURNING *; + +-- DROP TABLE upsert_test; + +--------------------------- +-- UPDATE with row movement +--------------------------- + +-- When a partitioned table receives an UPDATE to the partitioned key and the +-- new values no longer meet the partition's bound, the row must be moved to +-- the correct partition for the new partition key (if one exists). We must +-- also ensure that updatable views on partitioned tables properly enforce any +-- WITH CHECK OPTION that is defined. The situation with triggers in this case +-- also requires thorough testing as partition key updates causing row +-- movement convert UPDATEs into DELETE+INSERT. +set default_table_access_method = 'pax'; + +CREATE TABLE range_parted ( + a text, + b bigint, + c numeric, + d int, + e varchar +) PARTITION BY RANGE (a, b); + +-- Create partitions intentionally in descending bound order, so as to test +-- that update-row-movement works with the leaf partitions not in bound order. +CREATE TABLE part_b_20_b_30 (e varchar, c numeric, a text, b bigint, d int); +-- GPDB: distribution policy must match the parent table. +alter table part_b_20_b_30 set distributed by (a); +ALTER TABLE range_parted ATTACH PARTITION part_b_20_b_30 FOR VALUES FROM ('b', 20) TO ('b', 30); +CREATE TABLE part_b_10_b_20 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY RANGE (c); +alter table part_b_10_b_20 set distributed by (a); +CREATE TABLE part_b_1_b_10 PARTITION OF range_parted FOR VALUES FROM ('b', 1) TO ('b', 10); +ALTER TABLE range_parted ATTACH PARTITION part_b_10_b_20 FOR VALUES FROM ('b', 10) TO ('b', 20); +CREATE TABLE part_a_10_a_20 PARTITION OF range_parted FOR VALUES FROM ('a', 10) TO ('a', 20); +CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10); + +-- Check that partition-key UPDATE works sanely on a partitioned table that +-- does not have any child partitions. +UPDATE part_b_10_b_20 set b = b - 6; + +-- Create some more partitions following the above pattern of descending bound +-- order, but let's make the situation a bit more complex by having the +-- attribute numbers of the columns vary from their parent partition. +CREATE TABLE part_c_100_200 (e varchar, c numeric, a text, b bigint, d int) PARTITION BY range (abs(d)); +ALTER TABLE part_c_100_200 DROP COLUMN e, DROP COLUMN c, DROP COLUMN a; +ALTER TABLE part_c_100_200 ADD COLUMN c numeric, ADD COLUMN e varchar, ADD COLUMN a text; +ALTER TABLE part_c_100_200 DROP COLUMN b; +ALTER TABLE part_c_100_200 ADD COLUMN b bigint; +CREATE TABLE part_d_1_15 PARTITION OF part_c_100_200 FOR VALUES FROM (1) TO (15); +CREATE TABLE part_d_15_20 PARTITION OF part_c_100_200 FOR VALUES FROM (15) TO (20); + +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200); + +-- GPDB: distribution policy must match the parent table, so the previous command fails. +-- Change the distribution key and try again. +alter table part_c_100_200 set distributed by (a); +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_100_200 FOR VALUES FROM (100) TO (200); + +CREATE TABLE part_c_1_100 (e varchar, d int, c numeric, b bigint, a text); +alter table part_c_1_100 set distributed by (a); +ALTER TABLE part_b_10_b_20 ATTACH PARTITION part_c_1_100 FOR VALUES FROM (1) TO (100); + +\set init_range_parted 'truncate range_parted; insert into range_parted VALUES (''a'', 1, 1, 1 ,''e''), (''a'', 10, 200, 1 ,''e''), (''b'', 12, 96, 1 ,''e''), (''b'', 13, 97, 2 ,''e''), (''b'', 15, 105, 16 ,''e''), (''b'', 17, 105, 19 ,''e'')' +\set show_data 'select tableoid::regclass::text COLLATE "C" partname, * from range_parted ORDER BY 1, 2, 3, 4, 5, 6' +:init_range_parted; +:show_data; + +-- The order of subplans should be in bound order +EXPLAIN (costs off) UPDATE range_parted set c = c - 50 WHERE c > 97; + +-- fail, row movement happens only within the partition subtree. +UPDATE part_c_100_200 set c = c - 20, d = c WHERE c = 105; +-- fail, no partition key update, so no attempt to move tuple, +-- but "a = 'a'" violates partition constraint enforced by root partition) +UPDATE part_b_10_b_20 set a = 'a'; +-- ok, partition key update, no constraint violation +UPDATE range_parted set d = d - 10 WHERE d > 10; +-- ok, no partition key update, no constraint violation +UPDATE range_parted set e = d; +-- No row found +UPDATE part_c_1_100 set c = c + 20 WHERE c = 98; +-- ok, row movement +UPDATE part_b_10_b_20 set c = c + 20 returning c, b, a; +:show_data; + +-- fail, row movement happens only within the partition subtree. +UPDATE part_b_10_b_20 set b = b - 6 WHERE c > 116 returning *; +-- ok, row movement, with subset of rows moved into different partition. +UPDATE range_parted set b = b - 6 WHERE c > 116 returning a, b + c; + +:show_data; + + + +--------------------------- Common table needed for multiple test scenarios. --------------------------- +CREATE TABLE mintab(c1 int); +INSERT into mintab VALUES (120); + +-- update partition key using updatable view. +CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 FROM mintab) WITH CHECK OPTION; +-- ok +UPDATE upview set c = 199 WHERE b = 4; +-- fail, check option violation +UPDATE upview set c = 120 WHERE b = 4; +-- fail, row movement with check option violation +UPDATE upview set a = 'b', b = 15, c = 120 WHERE b = 4; +-- ok, row movement, check option passes +UPDATE upview set a = 'b', b = 15 WHERE b = 4; + +:show_data; + +-- cleanup +DROP VIEW upview; + +-- RETURNING having whole-row vars. +:init_range_parted; +UPDATE range_parted set c = 95 WHERE a = 'b' and b > 10 and c > 100 returning (range_parted), *; +:show_data; + + +-- Transition tables with update row movement +:init_range_parted; + +CREATE FUNCTION trans_updatetrigfunc() RETURNS trigger LANGUAGE plpgsql AS +$$ + begin + raise notice 'trigger = %, old table = %, new table = %', + TG_NAME, + (select string_agg(old_table::text, ', ' ORDER BY a) FROM old_table), + (select string_agg(new_table::text, ', ' ORDER BY a) FROM new_table); + return null; + end; +$$; + +CREATE TRIGGER trans_updatetrig + AFTER UPDATE ON range_parted REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); + +UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end ) WHERE a = 'b' and b > 10 and c >= 96; +:show_data; +:init_range_parted; + +-- -- Enabling OLD TABLE capture for both DELETE as well as UPDATE stmt triggers +-- -- should not cause DELETEd rows to be captured twice. Similar thing for +-- -- INSERT triggers and inserted rows. +-- CREATE TRIGGER trans_deletetrig +-- AFTER DELETE ON range_parted REFERENCING OLD TABLE AS old_table +-- FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); +-- CREATE TRIGGER trans_inserttrig +-- AFTER INSERT ON range_parted REFERENCING NEW TABLE AS new_table +-- FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc(); +-- UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; +-- DROP TRIGGER trans_deletetrig ON range_parted; +-- DROP TRIGGER trans_inserttrig ON range_parted; +-- -- Don't drop trans_updatetrig yet. It is required below. + +-- -- Test with transition tuple conversion happening for rows moved into the +-- -- new partition. This requires a trigger that references transition table +-- -- (we already have trans_updatetrig). For inserted rows, the conversion +-- -- is not usually needed, because the original tuple is already compatible with +-- -- the desired transition tuple format. But conversion happens when there is a +-- -- BR trigger because the trigger can change the inserted row. So install a +-- -- BR triggers on those child partitions where the rows will be moved. +-- CREATE FUNCTION func_parted_mod_b() RETURNS trigger AS $$ +-- BEGIN +-- NEW.b = NEW.b + 1; +-- return NEW; +-- END $$ language plpgsql; +-- CREATE TRIGGER trig_c1_100 BEFORE UPDATE OR INSERT ON part_c_1_100 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- CREATE TRIGGER trig_d1_15 BEFORE UPDATE OR INSERT ON part_d_1_15 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- CREATE TRIGGER trig_d15_20 BEFORE UPDATE OR INSERT ON part_d_15_20 +-- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +-- :init_range_parted; +-- UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end) WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; +-- :init_range_parted; +-- UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; +-- :show_data; + +-- -- Case where per-partition tuple conversion map array is allocated, but the +-- -- map is not required for the particular tuple that is routed, thanks to +-- -- matching table attributes of the partition and the target table. +-- :init_range_parted; +-- UPDATE range_parted set b = 15 WHERE b = 1; +-- :show_data; + +-- DROP TRIGGER trans_updatetrig ON range_parted; +-- DROP TRIGGER trig_c1_100 ON part_c_1_100; +-- DROP TRIGGER trig_d1_15 ON part_d_1_15; +-- DROP TRIGGER trig_d15_20 ON part_d_15_20; +-- DROP FUNCTION func_parted_mod_b(); + +-- RLS policies with update-row-movement +----------------------------------------- + +ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; +CREATE USER regress_range_parted_user; +GRANT ALL ON range_parted, mintab TO regress_range_parted_user; +CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true); +CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0); + +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- This should fail with RLS violation error while moving row from +-- part_a_10_a_20 to part_d_1_15, because we are setting 'c' to an odd number. +UPDATE range_parted set a = 'b', c = 151 WHERE a = 'a' and c = 200; + +RESET SESSION AUTHORIZATION; +-- Create a trigger on part_d_1_15 +CREATE FUNCTION func_d_1_15() RETURNS trigger AS $$ +BEGIN + NEW.c = NEW.c + 1; -- Make even numbers odd, or vice versa + return NEW; +END $$ LANGUAGE plpgsql; +CREATE TRIGGER trig_d_1_15 BEFORE INSERT ON part_d_1_15 + FOR EACH ROW EXECUTE PROCEDURE func_d_1_15(); + +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; + +-- Here, RLS checks should succeed while moving row from part_a_10_a_20 to +-- part_d_1_15. Even though the UPDATE is setting 'c' to an odd number, the +-- trigger at the destination partition again makes it an even number. +UPDATE range_parted set a = 'b', c = 151 WHERE a = 'a' and c = 200; + +RESET SESSION AUTHORIZATION; +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- This should fail with RLS violation error. Even though the UPDATE is setting +-- 'c' to an even number, the trigger at the destination partition again makes +-- it an odd number. +UPDATE range_parted set a = 'b', c = 150 WHERE a = 'a' and c = 200; + +-- Cleanup +RESET SESSION AUTHORIZATION; +DROP TRIGGER trig_d_1_15 ON part_d_1_15; +DROP FUNCTION func_d_1_15(); + +-- Policy expression contains SubPlan +RESET SESSION AUTHORIZATION; +:init_range_parted; +CREATE POLICY policy_range_parted_subplan on range_parted + AS RESTRICTIVE for UPDATE USING (true) + WITH CHECK ((SELECT range_parted.c <= c1 FROM mintab)); +SET SESSION AUTHORIZATION regress_range_parted_user; +-- fail, mintab has row with c1 = 120 +UPDATE range_parted set a = 'b', c = 122 WHERE a = 'a' and c = 200; +-- ok +UPDATE range_parted set a = 'b', c = 120 WHERE a = 'a' and c = 200; + +-- RLS policy expression contains whole row. + +RESET SESSION AUTHORIZATION; +:init_range_parted; +CREATE POLICY policy_range_parted_wholerow on range_parted AS RESTRICTIVE for UPDATE USING (true) + WITH CHECK (range_parted = row('b', 10, 112, 1, NULL)::range_parted); +SET SESSION AUTHORIZATION regress_range_parted_user; +-- ok, should pass the RLS check +UPDATE range_parted set a = 'b', c = 112 WHERE a = 'a' and c = 200; +RESET SESSION AUTHORIZATION; +:init_range_parted; +SET SESSION AUTHORIZATION regress_range_parted_user; +-- fail, the whole row RLS check should fail +UPDATE range_parted set a = 'b', c = 116 WHERE a = 'a' and c = 200; + +-- Cleanup +RESET SESSION AUTHORIZATION; +DROP POLICY policy_range_parted ON range_parted; +DROP POLICY policy_range_parted_subplan ON range_parted; +DROP POLICY policy_range_parted_wholerow ON range_parted; +REVOKE ALL ON range_parted, mintab FROM regress_range_parted_user; +DROP USER regress_range_parted_user; +DROP TABLE mintab; +----- ok above +-- statement triggers with update row movement +--------------------------------------------------- + +:init_range_parted; + +CREATE FUNCTION trigfunc() returns trigger language plpgsql as +$$ + begin + raise notice 'trigger = % fired on table % during %', + TG_NAME, TG_TABLE_NAME, TG_OP; + return null; + end; +$$; +-- Triggers on root partition +CREATE TRIGGER parent_delete_trig + AFTER DELETE ON range_parted for each statement execute procedure trigfunc(); +CREATE TRIGGER parent_update_trig + AFTER UPDATE ON range_parted for each statement execute procedure trigfunc(); +CREATE TRIGGER parent_insert_trig + AFTER INSERT ON range_parted for each statement execute procedure trigfunc(); + +-- Triggers on leaf partition part_c_1_100 +CREATE TRIGGER c1_delete_trig + AFTER DELETE ON part_c_1_100 for each statement execute procedure trigfunc(); +CREATE TRIGGER c1_update_trig + AFTER UPDATE ON part_c_1_100 for each statement execute procedure trigfunc(); +CREATE TRIGGER c1_insert_trig + AFTER INSERT ON part_c_1_100 for each statement execute procedure trigfunc(); + +-- Triggers on leaf partition part_d_1_15 +CREATE TRIGGER d1_delete_trig + AFTER DELETE ON part_d_1_15 for each statement execute procedure trigfunc(); +CREATE TRIGGER d1_update_trig + AFTER UPDATE ON part_d_1_15 for each statement execute procedure trigfunc(); +CREATE TRIGGER d1_insert_trig + AFTER INSERT ON part_d_1_15 for each statement execute procedure trigfunc(); +-- Triggers on leaf partition part_d_15_20 +CREATE TRIGGER d15_delete_trig + AFTER DELETE ON part_d_15_20 for each statement execute procedure trigfunc(); +CREATE TRIGGER d15_update_trig + AFTER UPDATE ON part_d_15_20 for each statement execute procedure trigfunc(); +CREATE TRIGGER d15_insert_trig + AFTER INSERT ON part_d_15_20 for each statement execute procedure trigfunc(); + +-- Move all rows from part_c_100_200 to part_c_1_100. None of the delete or +-- insert statement triggers should be fired. +UPDATE range_parted set c = c - 50 WHERE c > 97; +:show_data; + +DROP TRIGGER parent_delete_trig ON range_parted; +DROP TRIGGER parent_update_trig ON range_parted; +DROP TRIGGER parent_insert_trig ON range_parted; +DROP TRIGGER c1_delete_trig ON part_c_1_100; +DROP TRIGGER c1_update_trig ON part_c_1_100; +DROP TRIGGER c1_insert_trig ON part_c_1_100; +DROP TRIGGER d1_delete_trig ON part_d_1_15; +DROP TRIGGER d1_update_trig ON part_d_1_15; +DROP TRIGGER d1_insert_trig ON part_d_1_15; +DROP TRIGGER d15_delete_trig ON part_d_15_20; +DROP TRIGGER d15_update_trig ON part_d_15_20; +DROP TRIGGER d15_insert_trig ON part_d_15_20; + + +-- Creating default partition for range +:init_range_parted; +create table part_def partition of range_parted default; +\d+ part_def +insert into range_parted values ('c', 9, 0, 0, ''); +-- ok +update part_def set a = 'd' where a = 'c'; +-- fail +update part_def set a = 'a' where a = 'd'; + +:show_data; + +-- Update row movement from non-default to default partition. +-- fail, default partition is not under part_a_10_a_20; +UPDATE part_a_10_a_20 set a = 'ad' WHERE a = 'a'; +-- ok +-- UPDATE range_parted set a = 'ad' WHERE a = 'a'; +UPDATE range_parted set a = 'bd' WHERE a = 'b'; +:show_data; +-- Update row movement from default to non-default partitions. +-- ok +UPDATE range_parted set a = 'a' WHERE a = 'ad'; +UPDATE range_parted set a = 'b' WHERE a = 'bd'; +:show_data; + +-- Cleanup: range_parted no longer needed. +DROP TABLE range_parted; + +CREATE TABLE list_parted ( + a text, + b int +) PARTITION BY list (a); +CREATE TABLE list_part1 PARTITION OF list_parted for VALUES in ('a', 'b'); +CREATE TABLE list_default PARTITION OF list_parted default; +INSERT into list_part1 VALUES ('a', 1); +INSERT into list_default VALUES ('d', 10); + +-- fail +UPDATE list_default set a = 'a' WHERE a = 'd'; +-- ok +UPDATE list_default set a = 'x' WHERE a = 'd'; + +DROP TABLE list_parted; + +-- Test retrieval of system columns with non-consistent partition row types. +-- This is only partially supported, as seen in the results. +-- start_ignore +-- create table utrtest (a int, b text) partition by list (a); +-- create table utr1 (a int check (a in (1)), q text, b text); +-- create table utr2 (a int check (a in (2)), b text); +-- alter table utr1 drop column q; +-- alter table utrtest attach partition utr1 for values in (1); +-- alter table utrtest attach partition utr2 for values in (2); + +-- -- xmin_ok is likely false, xmin and pg_current_xact_id() comes from +-- -- data segment and master, respectively. +-- insert into utrtest values (1, 'foo') +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; +-- insert into utrtest values (2, 'bar') +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails +-- insert into utrtest values (2, 'bar') +-- returning *, tableoid::regclass; + +-- update utrtest set b = b || b from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; + +-- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails + +-- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x +-- returning *, tableoid::regclass; + +-- delete from utrtest +-- returning *, tableoid::regclass, xmax = pg_current_xact_id()::xid as xmax_ok; + +-- drop table utrtest; +-- end_ignore + + +-------------- +-- Some more update-partition-key test scenarios below. This time use list +-- partitions. +-------------- + +-- Setup for list partitions +CREATE TABLE list_parted (a numeric, b int, c int8) PARTITION BY list (a); +CREATE TABLE sub_parted PARTITION OF list_parted for VALUES in (1) PARTITION BY list (b); + +CREATE TABLE sub_part1(b int, c int8, a numeric); +alter table sub_part1 set distributed by (a); -- GPDB: distribution policy must match the parent table. +ALTER TABLE sub_parted ATTACH PARTITION sub_part1 for VALUES in (1); +CREATE TABLE sub_part2(b int, c int8, a numeric); +alter table sub_part2 set distributed by (a); -- GPDB: distribution policy must match the parent table. +ALTER TABLE sub_parted ATTACH PARTITION sub_part2 for VALUES in (2); + +CREATE TABLE list_part1(a numeric, b int, c int8); +ALTER TABLE list_parted ATTACH PARTITION list_part1 for VALUES in (2,3); + +INSERT into list_parted VALUES (2,5,50); +INSERT into list_parted VALUES (3,6,60); +INSERT into sub_parted VALUES (1,1,60); +INSERT into sub_parted VALUES (1,2,10); + +-- Test partition constraint violation when intermediate ancestor is used and +-- constraint is inherited from upper root. +UPDATE sub_parted set a = 2 WHERE c = 10; + +-- Test update-partition-key, where the unpruned partitions do not have their +-- partition keys updated. +SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1; +UPDATE list_parted set b = c + a WHERE a = 2; +SELECT tableoid::regclass::text, * FROM list_parted WHERE a = 2 ORDER BY 1; + + +-- Test the case where BR UPDATE triggers change the partition key. +-- CREATE FUNCTION func_parted_mod_b() returns trigger as $$ +-- BEGIN +-- NEW.b = 2; -- This is changing partition key column. +-- return NEW; +-- END $$ LANGUAGE plpgsql; +-- CREATE TRIGGER parted_mod_b before update on sub_part1 +-- for each row execute procedure func_parted_mod_b(); + +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; + +-- -- This should do the tuple routing even though there is no explicit +-- -- partition-key update, because there is a trigger on sub_part1. +-- UPDATE list_parted set c = 70 WHERE b = 1; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; + +-- DROP TRIGGER parted_mod_b ON sub_part1; + +-- -- If BR DELETE trigger prevented DELETE from happening, we should also skip +-- -- the INSERT if that delete is part of UPDATE=>DELETE+INSERT. +-- CREATE OR REPLACE FUNCTION func_parted_mod_b() returns trigger as $$ +-- BEGIN +-- raise notice 'Trigger: Got OLD row %, but returning NULL', OLD; +-- return NULL; +-- END $$ LANGUAGE plpgsql; +-- CREATE TRIGGER trig_skip_delete before delete on sub_part2 +-- for each row execute procedure func_parted_mod_b(); +-- UPDATE list_parted set b = 1 WHERE c = 70; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- -- Drop the trigger. Now the row should be moved. +-- DROP TRIGGER trig_skip_delete ON sub_part2; +-- UPDATE list_parted set b = 1 WHERE c = 70; +-- SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +-- DROP FUNCTION func_parted_mod_b(); + +-- UPDATE partition-key with FROM clause. If join produces multiple output +-- rows for the same row to be modified, we should tuple-route the row only +-- once. There should not be any rows inserted. +CREATE TABLE non_parted (id int); +INSERT into non_parted VALUES (1), (1), (1), (2), (2), (2), (3), (3), (3); +UPDATE list_parted t1 set a = 2 FROM non_parted t2 WHERE t1.a = t2.id and a = 1; + +-- In GPDB, the above UPDATE fails because the distribution key is updated, and +-- the Split Update codepath isn't smart enough to handle this situation. With +-- a non-Split Update, it works: +-- ALTER TABLE list_parted SET DISTRIBUTED BY (c); +UPDATE list_parted t1 set a = 2 FROM non_parted t2 WHERE t1.a = t2.id and a = 1; + +SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; +DROP TABLE non_parted; + +-- Cleanup: list_parted no longer needed. +DROP TABLE list_parted; + +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); + +create table hash_parted ( + a int, + b int +) partition by hash (a custom_opclass, b custom_opclass); +create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1); +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0); +create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4); +insert into hpart1 values (1, 1); +insert into hpart2 values (2, 5); +insert into hpart4 values (3, 4); + +-- fail +update hpart1 set a = 3, b=4 where a = 1; +-- ok, row movement +update hash_parted set b = b - 1 where b = 1; +-- ok +update hash_parted set b = b + 8 where b = 1; + +-- cleanup +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); +-- end_ignore diff --git a/contrib/pax_storage/src/data/sql/update_gp.sql b/contrib/pax_storage/src/data/sql/update_gp.sql new file mode 100644 index 00000000000..3fe87351ac5 --- /dev/null +++ b/contrib/pax_storage/src/data/sql/update_gp.sql @@ -0,0 +1,398 @@ +-- Test DELETE and UPDATE on an inherited table. +-- The special aspect of this table is that the inherited table has +-- a different distribution key. 'p' table's distribution key matches +-- that of 'r', but 'p2's doesn't. Test that the planner adds a Motion +-- node correctly for p2. +set default_table_access_method = 'pax'; +create table todelete (a int) distributed by (a); +create table parent (a int, b int, c int) distributed by (a); +create table child (a int, b int, c int) inherits (parent) distributed by (b); + +insert into parent select g, g, g from generate_series(1,5) g; +insert into child select g, g, g from generate_series(6,10) g; + +insert into todelete select generate_series(3,4); + +delete from parent using todelete where parent.a = todelete.a; + +insert into todelete select generate_series(5,7); + +update parent set c=c+100 from todelete where parent.a = todelete.a; + +select * from parent; + +drop table todelete; +drop table child; +drop table parent; + +-- This is similar to the above, but with a partitioned table (which is +-- implemented by inheritance) rather than an explicitly inherited table. +-- The scans on some of the partitions degenerate into Result nodes with +-- False one-time filter, which don't need a Motion node. +create table todelete (a int, b int) distributed by (a); +create table target (a int, b int, c int) + distributed by (a) + partition by range (c) (start(1) end(5) every(1), default partition extra); + +insert into todelete select g, g % 4 from generate_series(1, 10) g; +insert into target select g, 0, 3 from generate_series(1, 5) g; +insert into target select g, 0, 1 from generate_series(1, 5) g; + +delete from target where c = 3 and a in (select b from todelete); + +insert into todelete values (1, 5); + +update target set b=target.b+100 where c = 3 and a in (select b from todelete); + +select * from target; + +-- Also test an update with a qual that doesn't match any partition. The +-- Append degenerates into a dummy Result with false One-Time Filter. +alter table target drop default partition; +update target set b = 10 where c = 10; + +drop table todelete; +drop table target; + +-- +-- Test updated on inheritance parent table, where some child tables need a +-- Split Update, but not all. +-- +create table base_tbl (a int4, b int4) distributed by (a); +create table child_a (a int4, b int4) inherits (base_tbl) distributed by (a); +create table child_b (a int4, b int4) inherits (base_tbl) distributed by (b); +insert into base_tbl select g, g from generate_series(1, 5) g; +-- start_ignore +explain (costs off) update base_tbl set a=a+1; +-- end_ignore +update base_tbl set a = 5; + +-- +-- Explicit Distribution motion must be added if any of the child nodes +-- contains any motion excluding the motions in initplans. +-- These test cases and expectation are applicable for GPDB planner not for ORCA. +-- +SET gp_autostats_mode = NONE; +CREATE TABLE keo1 ( user_vie_project_code_pk character varying(24), user_vie_fiscal_year_period_sk character varying(24), user_vie_act_cntr_marg_cum character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo1 VALUES ('1', '1', '1'); + +CREATE TABLE keo2 ( projects_pk character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo2 VALUES ('1'); + +CREATE TABLE keo3 ( sky_per character varying(24), bky_per character varying(24)) DISTRIBUTED BY (sky_per); +INSERT INTO keo3 VALUES ('1', '1'); + +CREATE TABLE keo4 ( keo_para_required_period character varying(6), keo_para_budget_date character varying(24)) DISTRIBUTED RANDOMLY; +INSERT INTO keo4 VALUES ('1', '1'); +-- Explicit Redistribution motion should be added in case of GPDB Planner (test case not applicable for ORCA) +-- start_ignore +EXPLAIN (COSTS OFF) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM + ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b + ON b.projects_pk=a.user_vie_project_code_pk + WHERE a.user_vie_fiscal_year_period_sk = + (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = + (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = + (SELECT min (keo4.keo_para_budget_date) FROM keo4))) + ) t1 +WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; +-- end_ignore +UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM + ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b + ON b.projects_pk=a.user_vie_project_code_pk + WHERE a.user_vie_fiscal_year_period_sk = + (SELECT MAX (sky_per) FROM keo3 WHERE bky_per = + (SELECT keo4.keo_para_required_period FROM keo4 WHERE keo_para_budget_date = + (SELECT min (keo4.keo_para_budget_date) FROM keo4))) + ) t1 +WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; +SELECT user_vie_act_cntr_marg_cum FROM keo1; + +-- Explicit Redistribution motion should not be added in case of GPDB Planner (test case not applicable to ORCA) +CREATE TABLE keo5 (x int, y int) DISTRIBUTED BY (x); +INSERT INTO keo5 VALUES (1,1); +-- start_ignore +EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); +-- end_ignore +DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS (SELECT x FROM keo5 WHERE x < 2)); +SELECT x FROM keo5; + +RESET gp_autostats_mode; +DROP TABLE keo1; +DROP TABLE keo2; +DROP TABLE keo3; +DROP TABLE keo4; +DROP TABLE keo5; + +-- start_ignore +-- -- text types. We should support the following updates. +-- -- + +-- CREATE TEMP TABLE ttab1 (a varchar(15), b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a varchar(15), b integer) DISTRIBUTED BY (a); + +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; + +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; + + +-- CREATE TEMP TABLE ttab1 (a text, b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a text, b integer) DISTRIBUTED BY (a); + +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; + + +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; + +-- CREATE TEMP TABLE ttab1 (a varchar, b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a varchar, b integer) DISTRIBUTED BY (a); + +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; + + +-- DROP TABLE ttab1; +-- DROP TABLE ttab2; + +-- CREATE TEMP TABLE ttab1 (a char(15), b integer) DISTRIBUTED BY (a); +-- CREATE TEMP TABLE ttab2 (a char(15), b integer) DISTRIBUTED BY (a); + +-- UPDATE ttab1 SET b = ttab2.b FROM ttab2 WHERE ttab1.a = ttab2.a; + +-- DROP TABLE IF EXISTS update_distr_key; + +-- CREATE TEMP TABLE update_distr_key (a int, b int) DISTRIBUTED BY (a); +-- INSERT INTO update_distr_key select i, i* 10 from generate_series(0, 9) i; + +-- UPDATE update_distr_key SET a = 5 WHERE b = 10; + +-- SELECT * from update_distr_key; + +-- DROP TABLE update_distr_key; + +-- end_ignore + +-- below cases is to test multi-hash-cols +CREATE TABLE tab3(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3); +CREATE TABLE tab5(c1 int, c2 int, c3 int, c4 int, c5 int) DISTRIBUTED BY (c1, c2, c3, c4, c5); + +INSERT INTO tab3 SELECT i, i, i, i, i FROM generate_series(1, 10)i; +INSERT INTO tab5 SELECT i, i, i, i, i FROM generate_series(1, 10)i; + +-- test tab3 +SELECT gp_segment_id, * FROM tab3; +UPDATE tab3 set c1 = 9 where c4 = 1; +SELECT gp_segment_id, * FROM tab3; +UPDATE tab3 set (c1,c2) = (5,6) where c4 = 1; +SELECT gp_segment_id, * FROM tab3; +UPDATE tab3 set (c1,c2,c3) = (3,2,1) where c4 = 1; +SELECT gp_segment_id, * FROM tab3; +UPDATE tab3 set c1 = 11 where c2 = 10 and c2 < 1; +SELECT gp_segment_id, * FROM tab3; + +-- test tab5 +SELECT gp_segment_id, * FROM tab5; +UPDATE tab5 set c1 = 1000 where c4 = 1; +SELECT gp_segment_id, * FROM tab5; +UPDATE tab5 set (c1,c2) = (9,10) where c4 = 1; +SELECT gp_segment_id, * FROM tab5; +UPDATE tab5 set (c1,c2,c4) = (5,8,6) where c4 = 1; +SELECT gp_segment_id, * FROM tab5; +UPDATE tab5 set (c1,c2,c3,c4,c5) = (1,2,3,0,6) where c5 = 1; +SELECT gp_segment_id, * FROM tab5; +UPDATE tab5 set c1 = 11 where c3 = 10 and c3 < 1; +SELECT gp_segment_id, * FROM tab5; +-- start_ignore +EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1; +-- end_ignore + +-- clean up +drop table tab3; +drop table tab5; + +-- Update distribution key + +-- start_ignore +drop table if exists r; +drop table if exists s; +drop table if exists update_dist; +drop table if exists update_ao_table; +drop table if exists update_aoco_table; +-- end_ignore + +-- Update normal table distribution key +create table update_dist(a int) distributed by (a); +insert into update_dist values(1); +update update_dist set a=0 where a=1; +select * from update_dist; + +-- Update distribution key with join + +create table r (a int, b int) distributed by (a); +create table s (a int, b int) distributed by (a); +insert into r select generate_series(1, 5), generate_series(1, 5) * 2; +insert into s select generate_series(1, 5), generate_series(1, 5) * 2; +select * from r; +select * from s; +update r set a = r.a + 1 from s where r.a = s.a; +select * from r; +update r set a = r.a + 1 where a in (select a from s); +select * from r; + +-- Update redistribution +delete from r; +delete from s; +insert into r select generate_series(1, 5), generate_series(1, 5); +insert into s select generate_series(1, 5), generate_series(1, 5) * 2; +select * from r; +select * from s; +update r set a = r.a + 1 from s where r.b = s.b; +select * from r; +update r set a = r.a + 1 where b in (select b from s); +select * from r; + +-- Update hash aggreate group by +delete from r; +delete from s; +insert into r select generate_series(1, 5), generate_series(1, 5) * 2; +insert into s select generate_series(1, 5), generate_series(1, 5); +select * from r; +select * from s; +update s set a = s.a + 1 where exists (select 1 from r where s.a = r.b); +select * from s; +-- start_ignore +-- Update ao table distribution key +-- create table update_ao_table (a int, b int) WITH (appendonly=true) distributed by (a); +-- insert into update_ao_table select g, g from generate_series(1, 5) g; +-- select * from update_ao_table; +-- update update_ao_table set a = a + 1 where b = 3; +-- select * from update_ao_table; + +-- Update aoco table distribution key +-- create table update_aoco_table (a int, b int) WITH (appendonly=true, orientation=column) distributed by (a); +-- insert into update_aoco_table select g,g from generate_series(1, 5) g; +-- select * from update_aoco_table; +-- update update_aoco_table set a = a + 1 where b = 3; +-- select * from update_aoco_table; +-- end_ignore + +-- Update prepare +delete from s; +insert into s select generate_series(1, 5), generate_series(1, 5); +select * from r; +select * from s; +prepare update_s(int) as update s set a = s.a + $1 where exists (select 1 from r where s.a = r.b); +execute update_s(10); +select * from s; + +-- Confirm that a split update is not created for a table excluded by +-- constraints in the planner. +create table nosplitupdate (a int) distributed by (a); +-- start_ignore +explain update nosplitupdate set a=0 where a=1 and a<1; +-- end_ignore + +-- test split-update when split-node's flow is entry +create table tsplit_entry (c int); +insert into tsplit_entry values (1), (2); +analyze tsplit_entry; + +-- start_ignore +explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; +-- end_ignore +update tsplit_entry set c = s.a from (select count(*) as a from gp_segment_configuration) s; +-- start_ignore +-- CREATE TABLE update_gp_foo ( +-- a_dist int, +-- b int, +-- c_part int, +-- d int +-- ) +-- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part) +-- ( +-- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo_1_prt_p20190305', appendonly=false) +-- ); + +-- CREATE TABLE update_gp_foo1 ( +-- a_dist int, +-- b int, +-- c_part int, +-- d int +-- ) +-- WITH (appendonly=false) DISTRIBUTED BY (a_dist) PARTITION BY RANGE(c_part) +-- ( +-- PARTITION p20190305 START (1) END (2) WITH (tablename='update_gp_foo1_1_prt_p20190305', appendonly=false) +-- ); + +-- INSERT INTO update_gp_foo VALUES (12, 40, 1, 50); +-- INSERT INTO update_gp_foo1 VALUES (12, 3, 1, 50); + +-- UPDATE update_gp_foo +-- SET b = update_gp_foo.c_part, +-- d = update_gp_foo1.a_dist +-- FROM update_gp_foo1; + +-- SELECT * from update_gp_foo; +-- end_ignore + +-- Test insert on conflict do update +-- Insert on conflict do update is an insert statement but might +-- invoke ExecUpdate on segments, but updating distkeys of a table +-- may lead to wrong data distribution. We will check this before +-- planning, if a `insert on conflict do update` statement set the +-- dist keys of the table, it will raise an error. +-- See github issue: https://github.com/greenplum-db/gpdb/issues/9444 +-- start_ignore +create table t_insert_on_conflict_update_distkey(a int, b int) distributed by (a); +create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b); + +-- the following statement should error out because the on conflict update want to +-- modify the tuple's distkey which might lead to wrong data distribution +insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1; + +drop index uidx_t_insert_on_conflict_update_distkey; +drop table t_insert_on_conflict_update_distkey; +-- randomly distributed table cannot add unique constrain, so next we test replicated table + +create table t_insert_on_conflict_update_distkey(a int, b int) distributed replicated; +create unique index uidx_t_insert_on_conflict_update_distkey on t_insert_on_conflict_update_distkey(a, b); +-- the following statement should succeed because replicated table does not contain distkey +insert into t_insert_on_conflict_update_distkey values (1, 1) on conflict(a, b) do update set a = 1; +-- end_ignore + +-- Some tests on a partitioned table. +CREATE TABLE update_gp_rangep (a int, b int, orig_a int) DISTRIBUTED BY (b) PARTITION BY RANGE (a); + +CREATE TABLE update_gp_rangep_1_to_10 PARTITION OF update_gp_rangep FOR VALUES FROM (1) TO (10); +CREATE TABLE update_gp_rangep_10_to_20 PARTITION OF update_gp_rangep FOR VALUES FROM (10) TO (20); + +INSERT INTO update_gp_rangep SELECT g, g, g FROM generate_series(1, 4) g; + +-- Simple case: Same partition, same node. +UPDATE update_gp_rangep SET a = 9 WHERE a = 1; + +-- Distribution key update, same partition. +UPDATE update_gp_rangep SET b = 1 WHERE a = 2; + +-- Move row to different partition, but no change in distribution key +UPDATE update_gp_rangep SET a = 10 WHERE a = 3; + +-- Move row to different partition and also change distribution key +UPDATE update_gp_rangep SET a = 11, b = 1 WHERE a = 4; +-- start_ignore +SELECT tableoid::regclass, * FROM update_gp_rangep ORDER BY orig_a; +-- end_ignore +-- Also do a lookup with specific distribution key. If the rows were not +-- correctly moved across segments, this would fail to find them, assuming +-- that direct dispatch is effective. +SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; + +-- start_ignore +drop table r; +drop table s; +drop table update_dist; +drop table update_ao_table; +drop table update_aoco_table; +drop table nosplitupdate; +drop table tsplit_entry; +-- end_ignore diff --git a/contrib/pax_storage/tools/cpplint.py b/contrib/pax_storage/tools/cpplint.py new file mode 100755 index 00000000000..b7652c47124 --- /dev/null +++ b/contrib/pax_storage/tools/cpplint.py @@ -0,0 +1,6938 @@ +#!/usr/bin/env python +# +# Copyright (c) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does google-lint on c++ files. + +The goal of this script is to identify places in the code that *may* +be in non-compliance with google style. It does not attempt to fix +up these problems -- the point is to educate. It does also not +attempt to find all problems, or to ensure that everything it does +find is legitimately a problem. + +In particular, we can get very confused by /* and // inside strings! +We do a small hack, which is to ignore //'s with "'s after them on the +same line, but it is far from perfect (in either direction). +""" + +# cpplint predates fstrings +# pylint: disable=consider-using-f-string + +# pylint: disable=invalid-name + +import codecs +import copy +import getopt +import glob +import itertools +import math # for log +import os +import re +import sre_compile +import string +import sys +import sysconfig +import unicodedata +import xml.etree.ElementTree + +# if empty, use defaults +_valid_extensions = set([]) + +__VERSION__ = '1.6.1' + +try: + # -- pylint: disable=used-before-assignment + xrange # Python 2 +except NameError: + # -- pylint: disable=redefined-builtin + xrange = range # Python 3 + + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] + [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--repository=path] + [--linelength=digits] [--headers=x,y,...] + [--recursive] + [--exclude=path] + [--extensions=hpp,cpp,...] + [--includeorder=default|standardcfirst] + [--quiet] + [--version] + [file] ... + + Style checker for C/C++ source files. + This is a fork of the Google style checker with minor extensions. + + The style guidelines this tries to follow are those in + https://google.github.io/styleguide/cppguide.html + + Every problem is given a confidence score from 1-5, with 5 meaning we are + certain of the problem, and 1 meaning it could be a legitimate construct. + This will miss some errors, and is not a substitute for a code review. + + To suppress false-positive errors of certain categories, add a + 'NOLINT(category[, category...])' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. To suppress categories + on the next line use NOLINTNEXTLINE instead of NOLINT. + + The files passed in will be linted; at least one file must be provided. + Default linted extensions are %s. + Other file types will be ignored. + Change the extensions with the --extensions flag. + + Flags: + + output=emacs|eclipse|vs7|junit|sed|gsed + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Further support exists for + eclipse (eclipse), and JUnit (junit). XML parsers such as those used + in Jenkins and Bamboo may also be used. + The sed format outputs sed commands that should fix some of the errors. + Note that this requires gnu sed. If that is installed as gsed on your + system (common e.g. on macOS with homebrew) you can use the gsed output + format. Sed commands are written to stdout, not stderr, so you should be + able to pipe output straight to a shell to run the fixes. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + Errors with lower verbosity levels have lower confidence and are more + likely to be false positives. + + quiet + Don't print anything if no errors are found. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=-whitespace,-runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'build/class'. + + repository=path + The top level directory of the repository, used to derive the header + guard CPP variable. By default, this is determined by searching for a + path that contains .git, .hg, or .svn. When this flag is specified, the + given path is used instead. This option allows the header guard CPP + variable to remain consistent even if members of a team have different + repository root directories (such as when checking out a subdirectory + with SVN). In addition, users of non-mainstream version control systems + can use this flag to ensure readable header guard CPP variables. + + Examples: + Assuming that Alice checks out ProjectName and Bob checks out + ProjectName/trunk and trunk contains src/chrome/ui/browser.h, then + with no --repository flag, the header guard CPP variable will be: + + Alice => TRUNK_SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + If Alice uses the --repository=trunk flag and Bob omits the flag or + uses --repository=. then the header guard CPP variable will be: + + Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + root=subdir + The root directory used for deriving header guard CPP variable. + This directory is relative to the top level directory of the repository + which by default is determined by searching for a directory that contains + .git, .hg, or .svn but can also be controlled with the --repository flag. + If the specified directory does not exist, this flag is ignored. + + Examples: + Assuming that src is the top level directory of the repository (and + cwd=top/src), the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + recursive + Search for files to lint recursively. Each directory given in the list + of files to be linted is replaced by all files that descend from that + directory. Files with extensions not in the valid extensions list are + excluded. + + exclude=path + Exclude the given path from the list of files to be linted. Relative + paths are evaluated relative to the current directory and shell globbing + is performed. This flag can be provided multiple times to exclude + multiple files. + + Examples: + --exclude=one.cc + --exclude=src/*.cc + --exclude=src/*.cc --exclude=test/*.cc + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=%s + + includeorder=default|standardcfirst + For the build/include_order rule, the default is to blindly assume angle + bracket includes with file extension are c-system-headers (default), + even knowing this will have false classifications. + The default is established at google. + standardcfirst means to instead use an allow-list of known c headers and + treat all others as separate group of "other system headers". The C headers + included are those of the C-standard lib and closely related ones. + + headers=x,y,... + The header extensions that cpplint will treat as .h in checks. Values are + automatically added to --extensions list. + (by default, only files with extensions %s will be assumed to be headers) + + Examples: + --headers=%s + --headers=hpp,hxx + --headers=hpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + headers=x,y,... + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through the linter. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). Paths are relative to the directory of the CPPLINT.cfg. + + The "headers" option is similar in function to the --headers flag + (see example above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. +""" + +# We categorize each error message we print. Here are the categories. +# We want an explicit list so we can list them all in cpplint --filter=. +# If you add a new error message with a new category, add it to the list +# here! cpplint_unittest.py should tell you if you forget to do this. +_ERROR_CATEGORIES = [ + 'build/class', + 'build/c++14', + 'build/c++tr1', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_subdir', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces_headers', + 'build/namespaces_literals', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/indentation_namespace', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# keywords to use with --outputs which generate stdout for machine processing +_MACHINE_OUTPUTS = [ + 'junit', + 'sed', + 'gsed' +] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'readability/streams', + 'readability/function', + ] + +# These prefixes for categories should be ignored since they relate to other +# tools which also use the NOLINT syntax, e.g. clang-tidy. +_OTHER_NOLINT_CATEGORY_PREFIXES = [ + 'clang-analyzer', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = ['-build/include_alpha'] + +# The default list of categories suppressed for C (not C++) files. +_DEFAULT_C_SUPPRESSED_CATEGORIES = [ + 'readability/casting', + ] + +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + +# We used to check for high-bit characters, but after much discussion we +# decided those were OK, as long as they were in UTF-8 and didn't represent +# hard-coded international strings, which belong in a separate i18n file. + +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + # 'complex.h', collides with System C header "complex.h" + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # 17.6.1.2 C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # 17.6.1.2 C++14 headers + 'shared_mutex', + # 17.6.1.2 C++17 headers + 'any', + 'charconv', + 'codecvt', + 'execution', + 'filesystem', + 'memory_resource', + 'optional', + 'string_view', + 'variant', + # 17.6.1.2 C++20 headers + 'barrier', + 'bit', + 'compare', + 'concepts', + 'coroutine', + 'format', + 'latch' + 'numbers', + 'ranges', + 'semaphore', + 'source_location', + 'span', + 'stop_token', + 'syncstream', + 'version', + # 17.6.1.2 C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# C headers +_C_HEADERS = frozenset([ + # System C headers + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + # additional POSIX C headers + 'aio.h', + 'arpa/inet.h', + 'cpio.h', + 'dirent.h', + 'dlfcn.h', + 'fcntl.h', + 'fmtmsg.h', + 'fnmatch.h', + 'ftw.h', + 'glob.h', + 'grp.h', + 'iconv.h', + 'langinfo.h', + 'libgen.h', + 'monetary.h', + 'mqueue.h', + 'ndbm.h', + 'net/if.h', + 'netdb.h', + 'netinet/in.h', + 'netinet/tcp.h', + 'nl_types.h', + 'poll.h', + 'pthread.h', + 'pwd.h', + 'regex.h', + 'sched.h', + 'search.h', + 'semaphore.h', + 'setjmp.h', + 'signal.h', + 'spawn.h', + 'strings.h', + 'stropts.h', + 'syslog.h', + 'tar.h', + 'termios.h', + 'trace.h', + 'ulimit.h', + 'unistd.h', + 'utime.h', + 'utmpx.h', + 'wordexp.h', + # additional GNUlib headers + 'a.out.h', + 'aliases.h', + 'alloca.h', + 'ar.h', + 'argp.h', + 'argz.h', + 'byteswap.h', + 'crypt.h', + 'endian.h', + 'envz.h', + 'err.h', + 'error.h', + 'execinfo.h', + 'fpu_control.h', + 'fstab.h', + 'fts.h', + 'getopt.h', + 'gshadow.h', + 'ieee754.h', + 'ifaddrs.h', + 'libintl.h', + 'mcheck.h', + 'mntent.h', + 'obstack.h', + 'paths.h', + 'printf.h', + 'pty.h', + 'resolv.h', + 'shadow.h', + 'sysexits.h', + 'ttyent.h', + # Additional linux glibc headers + 'dlfcn.h', + 'elf.h', + 'features.h', + 'gconv.h', + 'gnu-versions.h', + 'lastlog.h', + 'libio.h', + 'link.h', + 'malloc.h', + 'memory.h', + 'netash/ash.h', + 'netatalk/at.h', + 'netax25/ax25.h', + 'neteconet/ec.h', + 'netipx/ipx.h', + 'netiucv/iucv.h', + 'netpacket/packet.h', + 'netrom/netrom.h', + 'netrose/rose.h', + 'nfs/nfs.h', + 'nl_types.h', + 'nss.h', + 're_comp.h', + 'regexp.h', + 'sched.h', + 'sgtty.h', + 'stab.h', + 'stdc-predef.h', + 'stdio_ext.h', + 'syscall.h', + 'termio.h', + 'thread_db.h', + 'ucontext.h', + 'ustat.h', + 'utmp.h', + 'values.h', + 'wait.h', + 'xlocale.h', + # Hardware specific headers + 'arm_neon.h', + 'emmintrin.h', + 'xmmintin.h', + ]) + +# Folders of C libraries so commonly used in C++, +# that they have parity with standard C libraries. +C_STANDARD_HEADER_FOLDERS = frozenset([ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ]) + +# Type names +_TYPES = re.compile( + r'^(?:' + # [dcl.type.simple] + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' + # [support.types] + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + # [cstdint.syn] + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') + + +# These headers are excluded from [build/include] and [build/include_order] +# checks: +# - Anything not following google file name conventions (containing an +# uppercase character, such as Python.h or nsStringAPI.h, for example). +# - Lua headers. +_THIRD_PARTY_HEADERS_PATTERN = re.compile( + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + +# Pattern for matching FileInfo.BaseName() against test file name +_test_suffixes = ['_test', '_regtest', '_unittest'] +_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' + +# Pattern that matches only complete whitespace, possibly across multiple lines. +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement + _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement + +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments and multi-line strings +# but those have always been troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + + +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_OTHER_SYS_HEADER = 3 +_LIKELY_MY_HEADER = 4 +_POSSIBLE_MY_HEADER = 5 +_OTHER_HEADER = 6 + +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + +# Match strings that indicate we're working on a C (not C++) file. +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') + +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +# Commands for sed to fix the problem +_SED_FIXUPS = { + 'Remove spaces around =': r's/ = /=/', + 'Remove spaces around !=': r's/ != /!=/', + 'Remove space before ( in if (': r's/if (/if(/', + 'Remove space before ( in for (': r's/for (/for(/', + 'Remove space before ( in while (': r's/while (/while(/', + 'Remove space before ( in switch (': r's/switch (/switch(/', + 'Should have a space between // and comment': r's/\/\//\/\/ /', + 'Missing space before {': r's/\([^ ]\){/\1 {/', + 'Tab found, replace by spaces': r's/\t/ /g', + 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', + 'You don\'t need a ; after a }': r's/};/}/', + 'Missing space after ,': r's/,\([^ ]\)/, \1/g', +} + +_regexp_compile_cache = {} + +# {str, set(int)}: a map from error categories to sets of linenumbers +# on which those errors are expected and should be suppressed. +_error_suppressions = {} + +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None +_root_debug = False + +# The top level repository directory. If set, _root is calculated relative to +# this directory instead of the directory containing version control artifacts. +# This is set by the --repository flag. +_repository = None + +# Files to exclude from linting. This is set by the --exclude flag. +_excludes = None + +# Whether to supress all PrintInfo messages, UNRELATED to --quiet flag +_quiet = False + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# This allows to use different include order rule than default +_include_order = "default" + +try: + # -- pylint: disable=used-before-assignment + unicode +except NameError: + # -- pylint: disable=redefined-builtin + basestring = unicode = str + +try: + # -- pylint: disable=used-before-assignment + long +except NameError: + # -- pylint: disable=redefined-builtin + long = int + +if sys.version_info < (3,): + # -- pylint: disable=no-member + # BINARY_TYPE = str + itervalues = dict.itervalues + iteritems = dict.iteritems +else: + # BINARY_TYPE = bytes + itervalues = dict.values + iteritems = dict.items + +def unicode_escape_decode(x): + if sys.version_info < (3,): + return codecs.unicode_escape_decode(x)[0] + else: + return x + +# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. +# This is set by --headers flag. +_hpp_headers = set([]) + +# {str, bool}: a map from error categories to booleans which indicate if the +# category should be suppressed for every line. +_global_error_suppressions = {} + +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(',')} + except ValueError: + PrintUsage('Header extensions must be comma separated list.') + +def ProcessIncludeOrderOption(val): + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + +def IsHeaderExtension(file_extension): + return file_extension in GetHeaderExtensions() + +def GetHeaderExtensions(): + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if 'h' in h} + return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + +# The allowed extensions for file names +# This is set by --extensions flag +def GetAllExtensions(): + return GetHeaderExtensions().union(_valid_extensions or set( + ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) + +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(',')] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage('Extensions should be a comma-separated list of values;' + 'for example: extensions=hpp,cpp\n' + 'This could not be parsed: "%s"' % (val,)) + +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) + +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + if matched: + if matched.group(1): + suppressed_line = linenum + 1 + else: + suppressed_line = linenum + categories = matched.group(2) + if categories in (None, '(*)'): # => "suppress all" + _error_suppressions.setdefault(None, set()).add(suppressed_line) + elif categories.startswith('(') and categories.endswith(')'): + for category in set(map(lambda c: c.strip(), categories[1:-1].split(','))): + if category in _ERROR_CATEGORIES: + _error_suppressions.setdefault(category, set()).add(suppressed_line) + elif any(c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c)): + # Ignore any categories from other tools. + pass + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + 'Unknown NOLINT error category: %s' % category) + + +def ProcessGlobalSuppresions(lines): + """Updates the list of global error suppressions. + + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + + +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.clear() + _global_error_suppressions.clear() + + +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment or + global suppression. + """ + return (_global_error_suppressions.get(category, False) or + linenum in _error_suppressions.get(category, set()) or + linenum in _error_suppressions.get(None, set())) + + +def Match(pattern, s): + """Matches the string with the pattern, caching the compiled regexp.""" + # The regexp compilation caching is inlined in both Match and Search for + # performance reasons; factoring it out into a separate function turns out + # to be noticeably expensive. + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].match(s) + + +def ReplaceAll(pattern, rep, s): + """Replaces instances of pattern in a string with a replacement. + + The compiled regex is kept in a cache shared by Match and Search. + + Args: + pattern: regex pattern + rep: replacement text + s: search string + + Returns: + string with replacements made (or original string if no replacements) + """ + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].sub(rep, s) + + +def Search(pattern, s): + """Searches the string for the pattern, caching the compiled regexp.""" + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].search(s) + + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() + + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _OTHER_SYS_HEADER: 'other system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_SYS_SECTION: 'other system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self._section = None + self._last_header = None + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ('Found %s after %s' % + (self._TYPE_NAMES[header_type], + self._SECTION_NAMES[self._section])) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + + +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messagess? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = 'emacs' + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + ' (%s does not)' % filt) + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(iteritems(self.errors_by_category)): + self.PrintInfo('Category \'%s\' errors found: %d\n' % + (category, count)) + if self.error_count > 0: + self.PrintInfo('Total errors found: %d\n' % self.error_count) + + def PrintInfo(self, message): + # _quiet does not represent --quiet flag. + # Hide infos from stdout to keep stdout pure for machine consumption + if not _quiet and self.output_format not in _MACHINE_OUTPUTS: + sys.stdout.write(message) + + def PrintError(self, message): + if self.output_format == 'junit': + self._junit_errors.append(message) + else: + sys.stderr.write(message) + + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, + confidence)) + + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) + + testsuite = xml.etree.ElementTree.Element('testsuite') + testsuite.attrib['errors'] = str(num_errors) + testsuite.attrib['failures'] = str(num_failures) + testsuite.attrib['name'] = 'cpplint' + + if num_errors == 0 and num_failures == 0: + testsuite.attrib['tests'] = str(1) + xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + + else: + testsuite.attrib['tests'] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = 'errors' + error = xml.etree.ElementTree.SubElement(testcase, 'error') + error.text = '\n'.join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, 'failure') + template = '{0}: {1} [{2}] [{3}]' + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = '\n'.join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') + + +_cpplint_state = _CppLintState() + + +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + +def _Quiet(): + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + +def _SetQuiet(quiet): + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) + + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if Match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + ' %s has %d non-comment lines' + ' (error triggered by exceeding %d lines).' % ( + self.current_function, self.lines_in_function, trigger)) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False + + +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + + +class FileInfo(object): + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace('\\', '/') + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period, includes that period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + + +def _ShouldPrintError(category, confidence, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + if one_filter.startswith('-'): + if category.startswith(one_filter[1:]): + is_filtered = True + elif one_filter.startswith('+'): + if category.startswith(one_filter[1:]): + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True + + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of + "cpplint(category)" comments on the offending line. These are + parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + _cpplint_state.PrintError('%s(%s): error cpplint: [%s] %s [%d]\n' % ( + filename, linenum, category, message, confidence)) + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + elif _cpplint_state.output_format == 'junit': + _cpplint_state.AddJUnitFailure(filename, linenum, message, category, + confidence) + elif _cpplint_state.output_format in ['sed', 'gsed']: + if message in _SED_FIXUPS: + sys.stdout.write(_cpplint_state.output_format + " -i '%s%s' %s # %s [%s] [%d]\n" % ( + linenum, _SED_FIXUPS[message], filename, message, category, confidence)) + else: + sys.stderr.write('# %s:%s: "%s" [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + else: + final_message = '%s:%s: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence) + sys.stderr.write(final_message) + +# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +# Match a single C style comment on the same line. +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +# Matches multi-line C style comments. +# This RE is a little bit more complicated than one might expect, because we +# have to take care of space removals tools so we can handle comments inside +# statements better. +# The current rule is: We only clear spaces from both sides when we're at the +# end of the line. Otherwise, we try to remove spaces from the right side, +# if this doesn't work we try on left side but only if there's a non-character +# on the right. +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') + + +def IsCppString(line): + """Does line terminate so, that the next symbol is in string constant. + + This function does not consider single-line nor multi-line comments. + + Args: + line: is a partial line of code starting from the 0..n. + + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ + + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + + +def CleanseRawStrings(raw_lines): + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = Match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings + + +def FindNextMultiLineCommentStart(lines, lineix): + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) + + +def FindNextMultiLineCommentEnd(lines, lineix): + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) + + +def RemoveMultiLineCommentsFromRange(lines, begin, end): + """Clears a range of lines for multi-line comments.""" + # Having // comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' + + +def RemoveMultiLineComments(filename, lines, error): + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 + + +def CleanseComments(line): + """Removes //-comments and single-line C-style /* */ comments. + + Args: + line: A line of C++ source. + + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + # # pylint: disable=consider-using-enumerate + for linenum in range(len(self.lines_without_raw_strings)): + self.lines.append(CleanseComments( + self.lines_without_raw_strings[linenum])) + elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in xrange(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) + + +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) + + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) + + +def FindStartOfExpressionInLine(line, endpos, stack): + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + Match(r'\s>=\s', line[i - 1:]) or + Search(r'\boperator\s*$', line[0:i]))): + i -= 1 + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) + + +def ReverseCloseExpression(clean_lines, linenum, pos): + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) + + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) + + +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # placeholder line at the front. + for line in xrange(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + + +def GetIndentLevel(line): + """Return the number of leading spaces in line. + + Args: + line: A string to check. + + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = Match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 + +def PathSplitToList(path): + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n" + % (_root, fileinfo.RepositoryName())) + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + + if _root_debug: + sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + # adapt slashes for windows + root_abspath = os.path.abspath(_root).replace('\\', '/') + + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) + + if _root_debug: + sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write("_root ignore, returning %s\n" % (file_path_from_root)) + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if Search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if Search(r'^\s*#pragma\s+once', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + 'No #ifndef header guard found, suggested CPP variable is: %s' % + cppvar) + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + '#ifndef header guard has wrong style, please use: %s' % cppvar) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif // %s"' % cppvar) + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in xrange(1, len(raw_lines) - 1): + line = raw_lines[i] + if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif /* %s */"' % cppvar) + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + '#endif line should be "#endif // %s"' % cppvar) + + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a source file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + first_include = None + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = '%s should include its header file %s' % (fileinfo.RepositoryName(), headername) + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + error(filename, first_include, 'build/include', 5, message) + + +def CheckForBadCharacters(filename, lines, error): + """Logs an error for each line containing bad characters. + + Two kinds of bad characters: + + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. + + 2. NUL bytes. These are problematic for some tools. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if unicode_escape_decode('\ufffd') in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + + +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') + + +def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') + + +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if Search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + +# Matches invalid increment: *count++, which moves pointer instead of +# incrementing a value. +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') + + +def CheckInvalidIncrement(filename, clean_lines, linenum, error): + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') + + +def IsMacroDefinition(clean_lines, linenum): + if Search(r'^#define', clean_lines[linenum]): + return True + + if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + return True + + return False + + +def IsForwardClassDeclaration(clean_lines, linenum): + return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + + +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. + + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. + + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo + + +class _ExternCInfo(_BlockInfo): + """Stores information about an 'extern "C"' block.""" + + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) + + +class _ClassInfo(_BlockInfo): + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False + + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in xrange(linenum - 1, self.starting_linenum, -1): + match = Search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not Match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = Match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + 'Closing brace should be aligned with beginning of %s' % parent) + + +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') + else: + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') + + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + + +class NestingState(object): + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = Match( + r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' + r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = Match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + Match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + '%s%s: should be indented +1 space inside %s' % ( + access_match.group(2), slots, parent)) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif Match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckCompletedBlocks(self, filename, error): + """Checks that all classes and namespaces have been completely parsed. + + Call this when all lines in a file have been processed. + Args: + filename: The name of the current file. + error: The function to call with any errors found. + """ + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) + elif isinstance(obj, _NamespaceInfo): + error(filename, obj.starting_linenum, 'build/namespaces', 5, + 'Failed to find complete declaration of namespace %s' % + obj.name) + + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = Match( + r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' + r'(?:(?:inline|constexpr)\s+)*%s\s*' + r'\(((?:[^()]|\([^()]*\))*)\)' + % re.escape(base_classname), + line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + variadic_args = [arg for arg in constructor_args if '&&...' in arg] + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1) or + # variadic arguments with zero or one argument + (len(constructor_args) <= 2 and + len(variadic_args) >= 1)) + initializer_list_constructor = bool( + onearg_constructor and + Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + Match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' + r'%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' + % re.escape(base_classname), constructor_args[0].strip())) + + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args or variadic_args: + error(filename, linenum, 'runtime/explicit', 5, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 5, + 'Single-parameter constructors should be marked explicit.') + elif is_marked_explicit and not onearg_constructor: + if noarg_constructor: + error(filename, linenum, 'runtime/explicit', 5, + 'Zero-parameter constructors should not be marked explicit.') + + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = Search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not Search(r'\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (Search(r'\w\s+\(', fncall) and + not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not Search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if Search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if Search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if Search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') + + +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + nesting_state.previous_stack_top == nesting_state.stack[-2]) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = Match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not Match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in xrange(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if Search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + if Search(r'{', start_line): + body_found = True + function = Search(r'((\w|:)*)\(', line).group(1) + if Match(r'TEST', function): # Handle TEST... macros + parameter_regexp = Search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif Match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not Match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. + + +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + + +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (Match(r'//[^ ]*\w', comment) and + not Match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') + + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if Match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and Match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or Match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and Match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = Match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + 'Do not leave a blank line after "%s:"' % matched.group(1)) + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except for C++11 attributes + # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if (Search(r'\w\s+\[(?!\[)', line) and + not Search(r'(?:auto&?|delete|return)\s+\[', line)): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search(r'for *\(.*[^:]:[^: ]', line) or + Search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((Search(r'[\w.]=', line) or + Search(r'=[\w.]', line)) + and not Search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not Search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around %s' % match.group(1)) + elif not Match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = Search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + 'Extra space for operator %s' % match.group(1)) + + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = Search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + 'Missing space before ( in %s' % match.group(1)) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = Search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + 'Mismatching spaces inside () in %s' % match.group(1)) + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + 'Should have zero or one spaces inside ( and ) in %s' % + match.group(1)) + + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and + Search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if Search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + + +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = Match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in xrange(first_line, last_line + 1, 1): + if Search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = Match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in xrange(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if Search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if Search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif Search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (Search(r'\s+;\s*$', line) and + not Search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') + + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if Search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if Search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + '"%s:" should be preceded by a blank line' % matched.group(1)) + + +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. + + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. + + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ + + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) + + +def CheckBraces(filename, clean_lines, linenum, error): + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if Match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not Search(r'[,;:}{(]\s*$', prevline) and + not Match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if Match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if Search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # Likewise, an else should never have the else clause on the same line + if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): + error(filename, linenum, 'whitespace/newline', 4, + 'Else clause should never be on same line as else (use 2 lines)') + + # In the same way, a do/while should never be on one line + if Match(r'\s*do [^\s{]', line): + error(filename, linenum, 'whitespace/newline', 4, + 'do/while clauses should not be on a single line') + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = Search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) + if if_else_match and not Match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = Search(r'\bif\s*(|constexpr)\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not Match(r'\s*{', endline[endpos:]) + and not (Match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and Match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + + +def CheckTrailingSemicolon(filename, clean_lines, linenum, error): + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = Match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a list of safe macros instead of a list of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = Match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or + Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + Search(r'\bdecltype$', line_prefix) or + Search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and Search(r'[;{}]\s*$', prevline): + match = Match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") + + +def CheckEmptyBlockBody(filename, clean_lines, linenum, error): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = Match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not Search(r'^\s*\{', opening_line_fragment): + if Search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + bodylist = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(bodylist) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if Search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not Match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in xrange(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = Match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if Match(match_constant, lhs) or Match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + 'Consider using %s instead of %s(a %s b)' % ( + _CHECK_REPLACEMENT[check_macro][operator], + check_macro, operator)) + + +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, unicode): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) + + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not Search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not Match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + Match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith('#ifndef %s' % cppvar) or + line.startswith('#define %s' % cppvar) or + line.startswith('#endif // %s' % cppvar)): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if (not line.startswith('#include') and not is_header_guard and + not Match(r'^\s*//.*http(s?)://\S*$', line) and + not Match(r'^\s*//\s*[^\s]*$', line) and + not Match(r'^// \$Id:.*#[0-9]+ \$$', line) and + not Match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + 'Lines should be <= %i characters long' % _line_length) + + if (cleansed_line.count(';') > 1 and + # allow simple single line lambdas + not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', + line) and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + + +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + ('%s.%s' % (test_suffix.lstrip('_'), ext) + for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), + ('%s.%s' % (suffix, ext) + for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + + +def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or (include in _C_HEADERS + # additional linux glibc header folders + or Search(r'(?:%s)\/.*\.h' % "|".join(C_STANDARD_HEADER_FOLDERS), include)) + + # Headers with C++ extensions shouldn't be considered C system headers + include_ext = os.path.splitext(include)[1] + is_system = used_angle_brackets and not include_ext in ['.hh', '.hpp', '.hxx', '.h++'] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + '/../public') + target_dir_pub = target_dir_pub.replace('\\', '/') + if target_base == include_base and ( + include_dir == target_dir or + include_dir == target_dir_pub): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + + + +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = Match(r'#include\s*"([^/]+\.(.*))"', line) + if match: + if (IsHeaderExtension(match.group(2)) and + not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1))): + error(filename, linenum, 'build/include_subdir', 4, + 'Include the directory when naming header files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + used_angle_brackets = match.group(1) == '<' + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + '"%s" already included at %s:%s' % + (include, filename, duplicate_line)) + return + + for extension in GetNonHeaderExtensions(): + if (include.endswith('.' + extension) and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .' + extension + ' files from other packages') + return + + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break + + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + '%s. Should be: %s.h, c system, c++ system, other.' % + (error_message, fileinfo.BaseName())) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + 'Include "%s" not in alphabetical order' % include) + include_state.SetLastHeader(canonical_include) + + + +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(itervalues(matching_punctuation)) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] + + +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32 inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if Search(r'\bshort port\b', line): + if not Search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = Search(r'\b(short|long(?! +double)|long long)\b', line) + if match: + error(filename, linenum, 'runtime/int', 4, + 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if Search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if Search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = Match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do %s("%%s", %s) instead.' + % (function_name, match.group(1))) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + 'Did you mean "memset(%s, 0, %s)"?' + % (match.group(1), match.group(2))) + + if Search(r'\busing namespace\b', line): + if Search(r'\bliterals\b', line): + error(filename, linenum, 'build/namespaces_literals', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + else: + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if Search(r'sizeof\(.+\)', tok): continue + if Search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if Match(r'\d+', tok): continue + if Match(r'0[xX][0-9a-fA-F]+', tok): continue + if Match(r'k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and Search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces_headers', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + + +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = Match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not Search(r'\boperator\W', line) and + not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if Search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string ' + 'instead: "%schar%s %s[]".' % + (match.group(1), match.group(2) or '', match.group(3))) + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') + + +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, + 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' + 'to snprintf.' % (match.group(1), match.group(2))) + + # Check if some verboten C functions are being used. + if Search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = Search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + 'Almost always, snprintf is better than %s' % match.group(1)) + + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + Search(r'\boverride\b', line[closing_paren:])) + return False + + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in xrange(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = Match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if Search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if Search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if Search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in xrange(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not Search(r'[),]\s*$', previous_line): + break + if Match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if Search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if Search(allowed_functions, line): + return + elif not Search(r'\S+\([^)]*$', line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in xrange(2): + if (linenum > i and + Search(allowed_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + ReplaceAll(' *<', '<', parameter)) + + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = Search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if Match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not Search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + matched_type) + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64)|size_t)\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = Search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if Match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) + else: + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) + + +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = Search(pattern, line) + if not match: + return False + + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False + + # operator++(int) and operator--(int) + if (context.endswith(' operator++') or context.endswith(' operator--') or + context.endswith('::operator++') or context.endswith('::operator--')): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False + + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + 'Using C-style cast. Use %s<%s>(...) instead' % + (cast_type, match.group(1))) + + return True + + +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + Search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) + + +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), + + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) + +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), + ) + +_RE_PATTERN_STRING = re.compile(r'\bstring\b') + +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # 'type::max()'. + _re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) +# Match set, but not foo->set, foo.set +_re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\bset\s*\<'), + 'set<>', + '')) +# Match 'map var' and 'std::map(...)', but not 'map(...)'' +_re_pattern_headers_maybe_templates.append( + (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), + 'map<>', + '')) + +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'(\<|\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + + +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the source (e.g. .cc) file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + fileinfo_cc = FileInfo(filename_cc) + if not fileinfo_cc.Extension().lstrip('.') in GetNonHeaderExtensions(): + return (False, '') + + fileinfo_h = FileInfo(filename_h) + if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): + return (False, '') + + filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] + matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + filename_h = filename_h[:-(len(fileinfo_h.Extension()))] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + + +def UpdateIncludeState(filename, include_dict, io=codecs): + """Fill up the include_dict with new includes found from the file. + + Args: + filename: the name of the header to read. + include_dict: a dictionary in which the headers are inserted. + io: The io factory to use to read the file. Provided for testability. + + Returns: + True if a header was successfully added. False otherwise. + """ + headerfile = None + try: + with io.open(filename, 'r', 'utf8', 'replace') as headerfile: + linenum = 0 + for line in headerfile: + linenum += 1 + clean_line = CleanseComments(line) + match = _RE_PATTERN_INCLUDE.search(clean_line) + if match: + include = match.group(2) + include_dict.setdefault(include, linenum) + return True + except IOError: + return False + + + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in xrange(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + # String is special -- it is a non-templatized type in STL. + matched = _RE_PATTERN_STRING.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[''] = (linenum, 'string') + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if not '<' in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # The policy is that if you #include something in foo.h you don't need to + # include it again in foo.cc. Here, we will look at possible includes. + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # Did we find the header for this file (if any) and successfully load it? + header_found = False + + # Use the absolute path so that matching works properly. + abs_filename = FileInfo(filename).FullName() + + # For Emacs's flymake. + # If cpplint is invoked from Emacs's flymake, a temporary file is generated + # by flymake and that file name might end with '_flymake.cc'. In that case, + # restore original file name here so that the corresponding header file can be + # found. + # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' + # instead of 'foo_flymake.h' + abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) + + # include_dict is modified during iteration, so we iterate over a copy of + # the keys. + header_keys = list(include_dict.keys()) + for header in header_keys: + (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) + fullpath = common_path + header + if same_module and UpdateIncludeState(fullpath, include_dict, io): + header_found = True + + # If we can't find the header file for a .cc, assume it's because we don't + # know where to look. In that case we'll give up as we're not sure they + # didn't include it in the .h file. + # TODO(unknown): Do a better job of finding .h files so we are confident that + # not having the .h file means there isn't one. + if not header_found: + for extension in GetNonHeaderExtensions(): + if filename.endswith('.' + extension): + return + + # All the lines have been processed, report the errors found. + for required_header_unstripped in sorted(required, key=required.__getitem__): + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + + +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') + + +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if Match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = Match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = Search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + 'already declared as "%s"' % match.group(1))) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if Search(r'[^\w]\s*$', line): + break + + +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) + + + + +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + return len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)) + + + return (len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.stack[-2], _NamespaceInfo)) + + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) + + +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if Match(r'^\s+', line): + error(filename, linenum, 'runtime/indentation_namespace', 4, + 'Do not indent within a namespace') + + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=None): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + +def FlagCxx11Features(filename, clean_lines, linenum, error): + """Flag those c++11 features that we only allow in certain places. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++ TR1 headers. + if include and include.group(1).startswith('tr1/'): + error(filename, linenum, 'build/c++tr1', 5, + ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) + +# # Flag unapproved C++11 headers. +# if include and include.group(1) in ('cfenv', +# 'condition_variable', +# 'fenv.h', +# 'future', +# 'mutex', +# 'thread', +# 'chrono', +# 'ratio', +# 'regex', +# 'system_error', +# ): +# error(filename, linenum, 'build/c++11', 5, +# ('<%s> is an unapproved C++11 header.') % include.group(1)) + + # The only place where we need to worry about C++11 keywords and library + # features in preprocessor directives is in macro definitions. + if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return + + # These are classes and free functions. The classes are always + # mentioned as std::*, but we only catch the free functions if + # they're not found by ADL. They're alphabetical by header. + for top_name in ( + # type_traits + 'alignment_of', + 'aligned_union', + ): + if Search(r'\bstd::%s\b' % top_name, line): + error(filename, linenum, 'build/c++11', 5, + ('std::%s is an unapproved C++11 class or function. Send c-style ' + 'an example of where it would make your code more readable, and ' + 'they may let you use it.') % top_name) + + +def FlagCxx14Features(filename, clean_lines, linenum, error): + """Flag those C++14 features that we restrict. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++14 headers. + if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): + error(filename, linenum, 'build/c++14', 5, + ('<%s> is an unapproved C++14 header.') % include.group(1)) + + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=None): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppresions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in xrange(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxx11Features(filename, clean_lines, line, error) + nesting_state.CheckCompletedBlocks(filename, error) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, "CPPLINT.cfg") + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with codecs.open(cfg_file, 'r', 'utf8', 'replace') as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo('Ignoring "%s": file excluded by "%s". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (filename, cfg_file, base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError('Line length must be numeric.') + elif name == 'extensions': + ProcessExtensionsOption(val) + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + elif name == 'includeorder': + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + 'Invalid configuration option (%s) in file %s\n' % + (name, cfg_file)) + + except IOError: + _cpplint_state.PrintError( + "Skipping config file '%s': Can't open for reading\n" % cfg_file) + keep_looking = False + + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) + + return True + + +def ProcessFile(filename, vlevel, extra_check_functions=None): + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: + lines = target_file.read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + _cpplint_state.PrintError( + "Skipping input '%s': Can't open for reading\n" % filename) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in GetAllExtensions(): + _cpplint_state.PrintError('Ignoring %s; not a valid file name ' + '(%s)\n' % (filename, ', '.join(GetAllExtensions()))) + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + _cpplint_state.PrintInfo('Done processing %s\n' % filename) + _RestoreFilters() + + +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE % (sorted(list(GetAllExtensions())), + ','.join(sorted(list(GetAllExtensions()))), + sorted(GetHeaderExtensions()), + ','.join(sorted(GetHeaderExtensions())))) + + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: + sys.exit(0) + +def PrintVersion(): + sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') + sys.stdout.write('cpplint ' + __VERSION__ + '\n') + sys.stdout.write('Python ' + sys.version + '\n') + sys.exit(0) + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) + sys.exit(0) + + +def ParseArguments(args): + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'v=', + 'version', + 'counting=', + 'filter=', + 'root=', + 'repository=', + 'linelength=', + 'extensions=', + 'exclude=', + 'recursive', + 'headers=', + 'includeorder=', + 'quiet']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + recursive = False + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + if opt == '--version': + PrintVersion() + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse', 'junit', 'sed', 'gsed'): + PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' + 'sed, gsed and junit.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose' or opt == '--v': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--repository': + global _repository + _repository = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--exclude': + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == '--extensions': + ProcessExtensionsOption(val) + elif opt == '--headers': + ProcessHppHeadersOption(val) + elif opt == '--recursive': + recursive = True + elif opt == '--includeorder': + ProcessIncludeOrderOption(val) + + if not filenames: + PrintUsage('No files were specified.') + + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() + return filenames + +def _ExpandDirectories(filenames): + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith('.' + os.path.sep): + fullname = fullname[len('.' + os.path.sep):] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered + +def _FilterExcludedFiles(fnames): + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [f for f in fnames + if not any(e for e in exclude_paths + if _IsParentOrSame(e, os.path.abspath(f)))] + +def _IsParentOrSame(parent, child): + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix):] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) + +def main(): + filenames = ParseArguments(sys.argv[1:]) + backup_err = sys.stderr + try: + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReader(sys.stderr, 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() + + if _cpplint_state.output_format == 'junit': + sys.stderr.write(_cpplint_state.FormatJUnitXML()) + + finally: + sys.stderr = backup_err + + sys.exit(_cpplint_state.error_count > 0) + + +if __name__ == '__main__': + main() diff --git a/contrib/pax_storage/tools/gen_sql.c b/contrib/pax_storage/tools/gen_sql.c new file mode 100644 index 00000000000..f7a01fe2fae --- /dev/null +++ b/contrib/pax_storage/tools/gen_sql.c @@ -0,0 +1,198 @@ +#include "postgres.h" // NOLINT + +#include + +#define USE_PAX_MACRO + +#if defined(USE_PAX_MACRO) +/* define these values in pax header file */ +#include "comm/cbdb_api.h" +#else +// only for tests, you should use the macros in cbdb_api.h + +#define PAX_TABLE_AM_OID 7014 +#define PAX_AMNAME "pax" +#define PAX_AM_HANDLER_OID 7600 +#define PAX_AM_HANDLER_NAME "pax_tableam_handler" + +#define PAX_AUX_STATS_IN_OID 7601 +#define PAX_AUX_STATS_OUT_OID 7602 +#define PAX_AUX_STATS_TYPE_OID 7603 +#define PAX_AUX_STATS_TYPE_NAME "paxauxstats" +#endif + +#include "catalog/pg_am.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" + +#ifdef printf +#undef printf +#endif + +#define PAX_COMMENT "column-optimized PAX table access method handler" +int main() { + printf("-- insert pax catalog values\n"); + printf( + "INSERT INTO pg_proc " + "VALUES(%u,'%s',%u,%u,%u,%u,%u,%u,%u,'%c','%c','%c','%c','%c','%c','%c'," + "1,0,%u,'%u',null,null,null,null,null,'%s','%s',null,null,null,'%c','%c')" + ";\n", + PAX_AM_HANDLER_OID, /* oid: pg_proc.oid */ + PAX_AM_HANDLER_NAME, /* proname */ + PG_CATALOG_NAMESPACE, /* pronamespace: pg_namespace.oid: pg_catalog */ + BOOTSTRAP_SUPERUSERID, /* proowner: pg_authid.oid */ + ClanguageId, /* prolang: pg_language.oid */ + 1, /* procost: 1 */ + 0, /* prorows: 0 */ + 0, /* provariadic: pg_type.oid*/ + 0, /* prosupport: pg_proc.oid */ + 'f', /* prokind: 'f' normal function */ + 'f', /* prosecdef */ + 'f', /* proleakproof */ + 't', /* proisstrict */ + 'f', /* proretset */ + 's', /* provolatile */ + 'u', /* proparallel */ + /* pronargs: 1 */ + /* pronargdefaults: 0 */ + TABLE_AM_HANDLEROID, /* prorettype: pg_type.oid */ + INTERNALOID, /* proargtypes: pg_type.oid, internal */ + /* proallargtypes: null */ + /* proargmodes: null */ + /* proargnames: null */ + /* proargdefaults: nulll */ + /* protrftypes: null */ + PAX_AM_HANDLER_NAME, /* prosrc */ + "$libdir/pax", /* probin */ + /* prosqlbody: null */ + /* proconfig: null */ + /* proacl: null */ + 'n', /* prodataaccess */ + 'a' /* proexeclocation: all */); + + printf("INSERT INTO pg_am VALUES(%u,'%s',%u,'%c');\n", + PAX_TABLE_AM_OID, /* pg_am.oid */ + PAX_AMNAME, /* pg_am.amname */ + PAX_AM_HANDLER_OID, /* pg_am.amhandler: pg_proc.oid */ + 't' /* pg_am.amtype: TABLE */); + + printf("COMMENT ON FUNCTION %s IS '%s';\n", PAX_AM_HANDLER_NAME, PAX_COMMENT); + + /* create type for micropartition stats */ + printf( + "INSERT INTO pg_proc " + "VALUES(%u,'%s',%u,%u,%u,%u,%u,%u,%u,'%c','%c','%c','%c','%c','%c','%c'," + "1,0,%u,'%u',null,null,null,null,null,'%s','%s',null,null,null,'%c','%c')" + ";\n", + PAX_AUX_STATS_IN_OID, /* oid: pg_proc.oid */ + "paxauxstats_in", /* proname */ + PG_CATALOG_NAMESPACE, /* pronamespace: pg_namespace.oid: pg_catalog */ + BOOTSTRAP_SUPERUSERID, /* proowner: pg_authid.oid */ + ClanguageId, /* prolang: pg_language.oid */ + 1, /* procost: 1 */ + 0, /* prorows: 0 */ + 0, /* provariadic: pg_type.oid*/ + 0, /* prosupport: pg_proc.oid */ + 'f', /* prokind: 'f' normal function */ + 'f', /* prosecdef */ + 'f', /* proleakproof */ + 't', /* proisstrict */ + 'f', /* proretset */ + 'i', /* provolatile */ + 'u', /* proparallel */ + /* pronargs: 1 */ + /* pronargdefaults: 0 */ + PAX_AUX_STATS_TYPE_OID, /* prorettype: pg_type.oid */ + CSTRINGOID, /* proargtypes: pg_type.oid, internal */ + /* proallargtypes: null */ + /* proargmodes: null */ + /* proargnames: null */ + /* proargdefaults: nulll */ + /* protrftypes: null */ + "MicroPartitionStatsInput", /* prosrc */ + "$libdir/pax", /* probin */ + /* prosqlbody: null */ + /* proconfig: null */ + /* proacl: null */ + 'n', /* prodataaccess */ + 'a' /* proexeclocation: all */); + + printf( + "INSERT INTO pg_proc " + "VALUES(%u,'%s',%u,%u,%u,%u,%u,%u,%u,'%c','%c','%c','%c','%c','%c','%c'," + "1,0,%u,'%u',null,null,null,null,null,'%s','%s',null,null,null,'%c','%c')" + ";\n", + PAX_AUX_STATS_OUT_OID, /* oid: pg_proc.oid */ + "paxauxstats_out", /* proname */ + PG_CATALOG_NAMESPACE, /* pronamespace: pg_namespace.oid: pg_catalog */ + BOOTSTRAP_SUPERUSERID, /* proowner: pg_authid.oid */ + ClanguageId, /* prolang: pg_language.oid */ + 1, /* procost: 1 */ + 0, /* prorows: 0 */ + 0, /* provariadic: pg_type.oid*/ + 0, /* prosupport: pg_proc.oid */ + 'f', /* prokind: 'f' normal function */ + 'f', /* prosecdef */ + 'f', /* proleakproof */ + 't', /* proisstrict */ + 'f', /* proretset */ + 'i', /* provolatile */ + 'u', /* proparallel */ + /* pronargs: 1 */ + /* pronargdefaults: 0 */ + CSTRINGOID, /* proargtypes: pg_type.oid, internal */ + PAX_AUX_STATS_TYPE_OID, /* prorettype: pg_type.oid */ + /* proallargtypes: null */ + /* proargmodes: null */ + /* proargnames: null */ + /* proargdefaults: nulll */ + /* protrftypes: null */ + "MicroPartitionStatsOutput", /* prosrc */ + "$libdir/pax", /* probin */ + /* prosqlbody: null */ + /* proconfig: null */ + /* proacl: null */ + 'n', /* prodataaccess */ + 'a' /* proexeclocation: all */); + + printf( + "INSERT INTO pg_type " + "VALUES(%u,'%s',%u,%u,%d,'%c','%c','%c','%c','%c','%c'," + "%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,'%c','%c','%c'," + "%u,%d,%d,%u,null,null,null);\n", + PAX_AUX_STATS_TYPE_OID, /* pg_type.oid */ + PAX_AUX_STATS_TYPE_NAME, /* pg_type.typname */ + PG_CATALOG_NAMESPACE, /* pg_type.typnamespace: pg_namespace.oid: + pg_catalog */ + BOOTSTRAP_SUPERUSERID, /* pg_type.typowner: pg_authid.oid */ + -1, /* pg_type.typlen: -1 variable length */ + 'f', /* pg_type.typbyval */ + 'b', /* pg_type.typtype */ + 'U', /* pg_type.typcategory */ + 'f', /* pg_type.typispreferred */ + 't', /* pg_type.typisdefined */ + ',', /* pg_type.typdelim */ + InvalidOid, /* pg_type.typrelid */ + InvalidOid, /* pg_type.typsubscript */ + InvalidOid, /* pg_type.typelem */ + InvalidOid, /* pg_type.typarray */ + PAX_AUX_STATS_IN_OID, /* pg_type.typinput */ + PAX_AUX_STATS_OUT_OID, /* pg_type.typoutput */ + InvalidOid, /* pg_type.typreceive */ + InvalidOid, /* pg_type.typsend */ + InvalidOid, /* pg_type.typmodin */ + InvalidOid, /* pg_type.typmodout */ + InvalidOid, /* pg_type.typanalyze */ + 'i', /* pg_type.typalign */ + 'x', /* pg_type.typstorage */ + 't', /* pg_type.typnotnull */ + InvalidOid, /* pg_type.typbasetype */ + -1, /* pg_type.typtypmod */ + 0, /* pg_type.ndims */ + InvalidOid /* pg_type.typcollation */ + ); + + return 0; +}