-
Notifications
You must be signed in to change notification settings - Fork 111
feat(schema): represent, serialize and validate v3 column default values (1/4) #746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,9 @@ | |
| /// type (e.g. a struct). | ||
|
|
||
| #include <cstdint> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <optional> | ||
| #include <string> | ||
| #include <string_view> | ||
|
|
||
|
|
@@ -46,8 +48,14 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { | |
| /// \param[in] type The field type. | ||
| /// \param[in] optional Whether values of this field are required or nullable. | ||
| /// \param[in] doc Optional documentation string for the field. | ||
| /// \param[in] initial_default The v3 `initial-default` value, or null if absent. The | ||
| /// field shares ownership of the (immutable) value. | ||
| /// \param[in] write_default The v3 `write-default` value, or null if absent. The field | ||
| /// shares ownership of the (immutable) value. | ||
| SchemaField(int32_t field_id, std::string_view name, std::shared_ptr<Type> type, | ||
| bool optional, std::string_view doc = {}); | ||
| bool optional, std::string_view doc = {}, | ||
| std::shared_ptr<const Literal> initial_default = nullptr, | ||
| std::shared_ptr<const Literal> write_default = nullptr); | ||
|
|
||
| /// \brief Construct an optional (nullable) field. | ||
| static SchemaField MakeOptional(int32_t field_id, std::string_view name, | ||
|
|
@@ -71,6 +79,32 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { | |
| /// \brief Get the field documentation. | ||
| std::string_view doc() const; | ||
|
|
||
| /// \brief Get the default value for this field used when reading rows written | ||
| /// before the field existed (v3 `initial-default`). Empty if absent. | ||
| /// | ||
| /// The returned reference is a non-owning view into a value owned by this field; | ||
| /// it remains valid for the lifetime of this SchemaField. | ||
| [[nodiscard]] std::optional<std::reference_wrapper<const Literal>> initial_default() | ||
| const; | ||
|
|
||
| /// \brief Get the default value for this field used when a writer does not | ||
| /// supply a value (v3 `write-default`). Empty if absent. | ||
| /// | ||
| /// The returned reference is a non-owning view into a value owned by this field; | ||
| /// it remains valid for the lifetime of this SchemaField. | ||
| [[nodiscard]] std::optional<std::reference_wrapper<const Literal>> write_default() | ||
| const; | ||
|
|
||
| /// \brief Get the shared owning pointer to the `initial-default` value, or null if | ||
| /// absent. Prefer initial_default() for reading; this exists so a rebuilt field can | ||
| /// share the (immutable) value rather than copy it. | ||
| [[nodiscard]] const std::shared_ptr<const Literal>& initial_default_ptr() const; | ||
|
|
||
| /// \brief Get the shared owning pointer to the `write-default` value, or null if | ||
| /// absent. Prefer write_default() for reading; this exists so a rebuilt field can | ||
| /// share the (immutable) value rather than copy it. | ||
| [[nodiscard]] const std::shared_ptr<const Literal>& write_default_ptr() const; | ||
|
|
||
| [[nodiscard]] std::string ToString() const override; | ||
|
|
||
| Status Validate() const; | ||
|
|
@@ -100,6 +134,11 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { | |
| std::shared_ptr<Type> type_; | ||
| bool optional_; | ||
| std::string doc_; | ||
| // Default values are owned by this field and never mutated after being set; copies | ||
| // of the field share the same payload (reference-counted) instead of deep-copying, | ||
| // like `type_` above. Sharing is unobservable because the payload is immutable. | ||
| std::shared_ptr<const Literal> initial_default_; | ||
| std::shared_ptr<const Literal> write_default_; | ||
|
Comment on lines
+140
to
+141
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, confirmed. Defaults are now constructor args, and |
||
| }; | ||
|
|
||
| } // namespace iceberg | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The deserialization first constructs a bare
SchemaField, then conditionally callsWithInitialDefault/WithWriteDefault, each of which copies the entire field (including theshared_ptr<Type>). This is an unnecessary intermediate copy.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed —
FieldFromJsonnow parses the defaults first and builds the field in one construction. Intermediate copy gone.