Skip to content

Commit 2a7a37e

Browse files
authored
Merge branch 'main' into AddJammyBuild
2 parents 0a71816 + cf9a6ea commit 2a7a37e

34 files changed

Lines changed: 596 additions & 169 deletions

File tree

.github/workflows/sonarqube.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474
fi
7575
7676
- name: Install Build Wrapper
77-
uses: SonarSource/sonarqube-scan-action/install-build-wrapper@v5
77+
uses: SonarSource/sonarqube-scan-action/install-build-wrapper@v6
7878

7979
- name: Run Build Wrapper
8080
run: |
@@ -112,7 +112,7 @@ jobs:
112112
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} make -j$(nproc)
113113
114114
- name: SonarQube Scan
115-
uses: SonarSource/sonarqube-scan-action@v5
115+
uses: SonarSource/sonarqube-scan-action@v6
116116
env:
117117
SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }}
118118
with:

contrib/pax_storage/src/cpp/access/pax_dml_state.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void CPaxDmlStateLocal::Reset() { cbdb::pax_memory_context = nullptr; }
104104
CPaxDmlStateLocal::CPaxDmlStateLocal()
105105
: last_oid_(InvalidOid), cb_{.func = DmlStateResetCallback, .arg = NULL} {}
106106

107-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
107+
pg_attribute_always_inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
108108
CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
109109
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue> value;
110110

@@ -121,7 +121,7 @@ CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
121121
return value;
122122
}
123123

124-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
124+
pg_attribute_always_inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
125125
CPaxDmlStateLocal::FindDmlState(const Oid &oid) {
126126
Assert(OidIsValid(oid));
127127

contrib/pax_storage/src/cpp/comm/bitmap.h

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,28 @@ struct BitmapRaw final {
134134
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
135135
return (index >> BM_WORD_SHIFTS) < size;
136136
}
137-
inline bool Empty() const {
137+
138+
inline bool Empty(uint32 end_index) const {
138139
if (!bitmap) return true;
139-
for (size_t i = 0; i < size; i++)
140-
if (bitmap[i]) return false;
140+
141+
uint32 end_word = BM_INDEX_WORD_OFF(end_index);
142+
uint32 end_bit_offset = BM_INDEX_BIT_OFF(end_index);
143+
144+
for (uint32 i = 0; i < end_word && i < size; i++) {
145+
if (bitmap[i] != 0) return false;
146+
}
147+
148+
// Check partial word at end
149+
if (end_word < size && end_bit_offset > 0) {
150+
T mask = (T(1) << end_bit_offset) - 1;
151+
if (bitmap[end_word] & mask) return false;
152+
}
153+
141154
return true;
142155
}
156+
157+
inline bool Empty() const { return Empty(size * sizeof(T) * 8ULL); }
158+
143159
BitmapRaw() = default;
144160
BitmapRaw(T *buffer, size_t size) : bitmap(buffer), size(size) {}
145161
BitmapRaw(const BitmapRaw &) = delete;
@@ -160,13 +176,14 @@ struct BitmapRaw final {
160176
template <typename T>
161177
class BitmapTpl final {
162178
public:
163-
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32);
164-
explicit BitmapTpl(uint32 initial_size = 16) {
179+
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32, uint8);
180+
explicit BitmapTpl(uint32 initial_size = 16, uint8 init_value = 0) {
165181
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
166182
sizeof(T) == 8);
167183
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
168184
policy_ = DefaultBitmapMemoryPolicy;
169-
policy_(raw_, Max(initial_size, 16));
185+
policy_(raw_, Max(initial_size, 16), init_value);
186+
init_value_ = init_value;
170187
}
171188
explicit BitmapTpl(const BitmapRaw<T> &raw) {
172189
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
@@ -177,8 +194,7 @@ class BitmapTpl final {
177194
raw_.size = raw.size;
178195
}
179196
BitmapTpl(const BitmapTpl &tpl) = delete;
180-
BitmapTpl(BitmapTpl &&tpl)
181-
: raw_(std::move(tpl.raw_)), policy_(tpl.policy_) {
197+
BitmapTpl(BitmapTpl &&tpl) : raw_(std::move(tpl.raw_)), policy_(tpl.policy_) {
182198
tpl.raw_.bitmap = nullptr;
183199
tpl.policy_ = ReadOnlyRefBitmap;
184200
}
@@ -188,8 +204,7 @@ class BitmapTpl final {
188204
BitmapTpl &operator=(BitmapTpl &&tpl) = delete;
189205
~BitmapTpl() {
190206
// Reference doesn't free the memory
191-
if (policy_ == DefaultBitmapMemoryPolicy)
192-
PAX_DELETE_ARRAY(raw_.bitmap);
207+
if (policy_ == DefaultBitmapMemoryPolicy) PAX_DELETE_ARRAY(raw_.bitmap);
193208
raw_.bitmap = nullptr;
194209
}
195210

@@ -205,11 +220,13 @@ class BitmapTpl final {
205220

206221
inline size_t WordBits() const { return BM_WORD_BITS; }
207222
inline void Set(uint32 index) {
208-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
223+
if (unlikely(!raw_.HasEnoughSpace(index)))
224+
policy_(raw_, index, init_value_);
209225
raw_.Set(index);
210226
}
211227
inline void SetN(uint32 index) {
212-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
228+
if (unlikely(!raw_.HasEnoughSpace(index)))
229+
policy_(raw_, index, init_value_);
213230
raw_.SetN(index);
214231
}
215232
inline void Clear(uint32 index) {
@@ -228,7 +245,8 @@ class BitmapTpl final {
228245
}
229246
// invert the bit and return the old value.
230247
inline bool Toggle(uint32 index) {
231-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
248+
if (unlikely(!raw_.HasEnoughSpace(index)))
249+
policy_(raw_, index, init_value_);
232250
return raw_.Toggle(index);
233251
}
234252
// count bits in range [0, index]
@@ -248,23 +266,28 @@ class BitmapTpl final {
248266

249267
inline bool Empty() const { return raw_.Empty(); }
250268

269+
// check if the bitmap is empty in the range [0, end_index)
270+
inline bool Empty(uint32 end_index) const { return raw_.Empty(end_index); }
271+
251272
BitmapMemoryPolicy Policy() const { return policy_; }
252273

253274
const BitmapRaw<T> &Raw() const { return raw_; }
254275
BitmapRaw<T> &Raw() { return raw_; }
255276

256-
static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index) {
277+
static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index,
278+
uint8 init_value = 0) {
257279
auto old_bitmap = raw.bitmap;
258280
auto old_size = raw.size;
259281
auto size = Max(BM_INDEX_WORD_OFF(index) + 1, old_size * 2);
260282
auto p = PAX_NEW_ARRAY<T>(size);
261283
if (old_size > 0) memcpy(p, old_bitmap, sizeof(T) * old_size);
262-
memset(&p[old_size], 0, sizeof(T) * (size - old_size));
284+
memset(&p[old_size], init_value, sizeof(T) * (size - old_size));
263285
raw.bitmap = p;
264286
raw.size = size;
265287
PAX_DELETE_ARRAY(old_bitmap);
266288
}
267-
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/) {
289+
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/,
290+
uint8 /*init_value*/) {
268291
// raise
269292
CBDB_RAISE(cbdb::CException::kExTypeInvalidMemoryOperation);
270293
}
@@ -280,12 +303,14 @@ class BitmapTpl final {
280303
return nwords * sizeof(T);
281304
}
282305

283-
static std::unique_ptr<BitmapTpl<T>> BitmapTplCopy(const BitmapTpl<T> *bitmap) {
306+
static std::unique_ptr<BitmapTpl<T>> BitmapTplCopy(
307+
const BitmapTpl<T> *bitmap) {
284308
if (bitmap == nullptr) return nullptr;
285309
return bitmap->Clone();
286310
}
287311

288-
static std::unique_ptr<BitmapTpl<T>> Union(const BitmapTpl<T> *a, const BitmapTpl<T> *b) {
312+
static std::unique_ptr<BitmapTpl<T>> Union(const BitmapTpl<T> *a,
313+
const BitmapTpl<T> *b) {
289314
std::unique_ptr<BitmapTpl<T>> result;
290315
const BitmapTpl<T> *large;
291316
const BitmapTpl<T> *small;
@@ -315,6 +340,7 @@ class BitmapTpl final {
315340

316341
BitmapRaw<T> raw_;
317342
BitmapMemoryPolicy policy_;
343+
uint8 init_value_ = 0;
318344
};
319345

320346
using Bitmap8 = BitmapTpl<uint8>;

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,6 @@ void cbdb::MemoryCtxRegisterResetCallback(MemoryContext context,
124124
CBDB_WRAP_END;
125125
}
126126

127-
Oid cbdb::RelationGetRelationId(Relation rel) {
128-
CBDB_WRAP_START;
129-
{ return RelationGetRelid(rel); }
130-
CBDB_WRAP_END;
131-
}
132-
133127
#ifdef RUN_GTEST
134128
Datum cbdb::DatumFromCString(const char *src, size_t length) {
135129
CBDB_WRAP_START;

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ void MemoryCtxDelete(MemoryContext memory_context);
114114
void MemoryCtxRegisterResetCallback(MemoryContext context,
115115
MemoryContextCallback *cb);
116116

117-
Oid RelationGetRelationId(Relation rel);
118-
119117
static inline void *DatumToPointer(Datum d) noexcept {
120118
return DatumGetPointer(d);
121119
}
@@ -164,6 +162,10 @@ static inline float8 DatumToFloat8(Datum d) noexcept {
164162
return DatumGetFloat8(d);
165163
}
166164

165+
static pg_attribute_always_inline Oid RelationGetRelationId(Relation rel) noexcept {
166+
return RelationGetRelid(rel);
167+
}
168+
167169
BpChar *BpcharInput(const char *s, size_t len, int32 atttypmod);
168170
VarChar *VarcharInput(const char *s, size_t len, int32 atttypmod);
169171
text *CstringToText(const char *s, size_t len);

contrib/pax_storage/src/cpp/storage/columns/pax_column.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,21 @@ size_t PaxColumn::GetRangeNonNullRows(size_t start_pos, size_t len) {
8383

8484
void PaxColumn::CreateNulls(size_t cap) {
8585
Assert(!null_bitmap_);
86-
null_bitmap_ = std::make_unique<Bitmap8>(cap);
87-
null_bitmap_->SetN(total_rows_);
86+
// By default, initialize every bit in the null bitmap to 1.
87+
// This is based on the assumption that null values are much less frequent
88+
// than non-null values in most datasets. As a result, when appending non-null
89+
// values, we can simply skip setting the bit to 1, since it is already set.
90+
// Only when appending a null value do we need to explicitly clear the
91+
// corresponding bit.
92+
null_bitmap_ = std::make_unique<Bitmap8>(cap, 0xff);
8893
}
8994

9095
void PaxColumn::AppendNull() {
9196
if (!null_bitmap_) {
92-
CreateNulls(DEFAULT_CAPACITY);
97+
// Ensure that the capacity of null_bitmap_ is pax_max_tuples_per_group.
98+
// This design allows the use of raw_bitmap in normal cases without
99+
// incurring the overhead of checking the bitmap's capacity.
100+
CreateNulls(pax::pax_max_tuples_per_group);
93101
}
94102
null_bitmap_->Clear(total_rows_);
95103
++total_rows_;
@@ -111,7 +119,6 @@ void PaxColumn::AppendToast(char *buffer, size_t size) {
111119
}
112120

113121
void PaxColumn::Append(char * /*buffer*/, size_t /*size*/) {
114-
if (null_bitmap_) null_bitmap_->Set(total_rows_);
115122
++total_rows_;
116123
++non_null_rows_;
117124
}

contrib/pax_storage/src/cpp/storage/columns/pax_column.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646

4747
namespace pax {
4848

49-
#define DEFAULT_CAPACITY MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))
49+
#define DEFAULT_CAPACITY \
50+
MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))
5051

5152
// Used to mapping pg_type
5253
enum PaxColumnTypeInMem {
@@ -230,7 +231,14 @@ class PaxColumn {
230231
inline bool HasNull() { return null_bitmap_ != nullptr; }
231232

232233
// Are all values null?
233-
inline bool AllNull() const { return null_bitmap_ && null_bitmap_->Empty(); }
234+
// Check whether all bits in the specified range are zero.
235+
// In pax_column, to avoid checking the capacity of the null bitmap, we
236+
// allocate memory based on pax_max_tuples_per_group. As a result, the last
237+
// group may contain fewer tuples than pax_max_tuples_per_group, so we need to
238+
// check whether all bits in the range [0, total_rows_) are zero.
239+
inline bool AllNull() const {
240+
return null_bitmap_ && null_bitmap_->Empty(total_rows_);
241+
}
234242

235243
// Set the null bitmap
236244
inline void SetBitmap(std::unique_ptr<Bitmap8> null_bitmap) {

contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,11 @@ std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
367367
// Numeric always need ensure that with the 4B header, otherwise it will
368368
// be converted twice in the vectorization path.
369369
if (required_stats_cols[i] || VARATT_IS_COMPRESSED(tts_value_vl) ||
370-
VARATT_IS_EXTERNAL(tts_value_vl) || attrs->atttypid == NUMERICOID) {
370+
VARATT_IS_EXTERNAL(tts_value_vl)
371+
#ifdef VEC_BUILD
372+
|| attrs->atttypid == NUMERICOID
373+
#endif
374+
) {
371375
// still detoast the origin toast
372376
detoast_vl = cbdb::PgDeToastDatum(tts_value_vl);
373377
Assert(detoast_vl != nullptr);

contrib/pax_storage/src/cpp/storage/pax.cc

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#include "storage/vec/pax_vec_reader.h"
5050
#endif
5151

52+
#define PAX_SPLIT_STRATEGY_CHECK_INTERVAL (16)
53+
5254
namespace paxc {
5355
class IndexUpdaterInternal {
5456
public:
@@ -280,14 +282,25 @@ void TableWriter::Open() {
280282
// insert tuple into the aux table before inserting any tuples.
281283
cbdb::InsertMicroPartitionPlaceHolder(RelationGetRelid(relation_),
282284
current_blockno_);
285+
cur_physical_size_ = 0;
283286
}
284287

285288
void TableWriter::WriteTuple(TupleTableSlot *slot) {
286289
Assert(writer_);
287290
Assert(strategy_);
288-
// should check split strategy before write tuple
289-
// otherwise, may got a empty file in the disk
290-
if (strategy_->ShouldSplit(writer_->PhysicalSize(), num_tuples_)) {
291+
// Because of the CTID constraint, we have to strictly enforce the accuracy of
292+
// the tuple count and make sure it doesn't exceed
293+
// PAX_MAX_NUM_TUPLES_PER_FILE. That's why we kept this precise check here.
294+
295+
// On the other hand,the biggest performance hit here is the PhysicalSize()
296+
// function.So to reduce the overhead of calling it so often,
297+
// we only update the file size every PAX_SPLIT_STRATEGY_CHECK_INTERVAL
298+
// tuples.
299+
if ((num_tuples_ % PAX_SPLIT_STRATEGY_CHECK_INTERVAL) == 0) {
300+
cur_physical_size_ = writer_->PhysicalSize();
301+
}
302+
303+
if (strategy_->ShouldSplit(cur_physical_size_, num_tuples_)) {
291304
writer_->Close();
292305
writer_ = nullptr;
293306
Open();

contrib/pax_storage/src/cpp/storage/pax.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class TableWriter {
131131
std::vector<std::tuple<ColumnEncoding_Kind, int>> encoding_opts_;
132132

133133
bool is_dfs_table_space_;
134+
size_t cur_physical_size_ = 0;
134135
};
135136

136137
class TableReader final {

0 commit comments

Comments
 (0)