Skip to content

Commit f1d6691

Browse files
committed
Optimize OrcWriter: precompute varlena indices to skip invalid loops
Reduces per-tuple branching and cache touches, especially on schemas with many fixed-length/byval columns 1. Add OrcWriter member: std::vector<int> varlena_slowpath_indices_ 2. Precompute non-byval, typlen == -1 (varlena) column indices in constructor from tuple_desc 3. Update PrepareWriteTuple to iterate only precomputed indices, skipping fixed-length and byval columns
1 parent 0cd5c60 commit f1d6691

2 files changed

Lines changed: 15 additions & 6 deletions

File tree

contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,16 @@ OrcWriter::OrcWriter(
249249

250250
group_stats_.Initialize(writer_options.enable_min_max_col_idxs,
251251
writer_options.enable_bf_col_idxs);
252+
253+
// Precompute slowpath indices for varlena columns (non-byval and typlen == -1)
254+
varlena_slowpath_indices_.clear();
255+
varlena_slowpath_indices_.reserve(writer_options.rel_tuple_desc->natts);
256+
for (int i = 0; i < writer_options.rel_tuple_desc->natts; ++i) {
257+
auto attrs = TupleDescAttr(writer_options.rel_tuple_desc, i);
258+
if (!attrs->attbyval && attrs->attlen == -1) {
259+
varlena_slowpath_indices_.push_back(i);
260+
}
261+
}
252262
}
253263

254264
OrcWriter::~OrcWriter() {}
@@ -311,8 +321,6 @@ void OrcWriter::Flush() {
311321
std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
312322
TupleTableSlot *table_slot) {
313323
TupleDesc tuple_desc;
314-
int16 type_len;
315-
bool type_by_val;
316324
bool is_null;
317325
Datum tts_value;
318326
char type_storage;
@@ -323,18 +331,16 @@ std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
323331
Assert(tuple_desc);
324332
const auto &required_stats_cols = group_stats_.GetRequiredStatsColsMask();
325333

326-
for (int i = 0; i < tuple_desc->natts; i++) {
334+
for (int i : varlena_slowpath_indices_) {
327335
bool save_origin_datum;
328336
auto attrs = TupleDescAttr(tuple_desc, i);
329-
type_len = attrs->attlen;
330-
type_by_val = attrs->attbyval;
331337
is_null = table_slot->tts_isnull[i];
332338
tts_value = table_slot->tts_values[i];
333339
type_storage = attrs->attstorage;
334340

335341
AssertImply(attrs->attisdropped, is_null);
336342

337-
if (is_null || type_by_val || type_len != -1) {
343+
if (is_null) {
338344
continue;
339345
}
340346

contrib/pax_storage/src/cpp/storage/orc/porc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ class OrcWriter : public MicroPartitionWriter {
138138
::pax::porc::proto::Footer file_footer_;
139139
::pax::porc::proto::PostScript post_script_;
140140
::pax::MicroPartitionStats group_stats_;
141+
142+
// indices of columns that are non-byval and have typlen == -1 (varlena)
143+
std::vector<int> varlena_slowpath_indices_;
141144
};
142145

143146
class OrcReader : public MicroPartitionReader {

0 commit comments

Comments
 (0)