From 1eb96812475d0684865f8195f6f7fbc248e9571d Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Mon, 29 Jun 2026 15:06:49 -0700 Subject: [PATCH 1/4] feat(evolution): support v3 column default values in UpdateSchema (4/4) AddColumn / AddRequiredColumn now accept an optional default value, used as both the initial-default and write-default of the new column; a non-null default also lets a required column be added (or an added column be made required) without AllowIncompatibleChanges(). UpdateColumnDefault sets or clears the write-default. Defaults are cast to the column type (rejecting uncastable or out-of-range values) and preserved across rename / doc / type updates and nested field-id reassignment. Part 4 of the v3 column-default-values work (POC #731), built on #746. --- .../test/resources/TableMetadataV3Valid.json | 123 ++++++++ src/iceberg/test/update_schema_test.cc | 279 ++++++++++++++++++ src/iceberg/update/update_schema.cc | 171 +++++++++-- src/iceberg/update/update_schema.h | 56 +++- src/iceberg/util/type_util.cc | 19 +- 5 files changed, 598 insertions(+), 50 deletions(-) create mode 100644 src/iceberg/test/resources/TableMetadataV3Valid.json diff --git a/src/iceberg/test/resources/TableMetadataV3Valid.json b/src/iceberg/test/resources/TableMetadataV3Valid.json new file mode 100644 index 000000000..712d4b5bd --- /dev/null +++ b/src/iceberg/test/resources/TableMetadataV3Valid.json @@ -0,0 +1,123 @@ +{ + "format-version": 3, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "next-row-id": 0, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [ + 1, + 2 + ], + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/1.avro" + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 1 + } + ], + "snapshot-log": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770 + }, + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770 + } + ], + "metadata-log": [] +} diff --git a/src/iceberg/test/update_schema_test.cc b/src/iceberg/test/update_schema_test.cc index 07872e69a..23102594a 100644 --- a/src/iceberg/test/update_schema_test.cc +++ b/src/iceberg/test/update_schema_test.cc @@ -19,11 +19,13 @@ #include "iceberg/update/update_schema.h" +#include #include #include #include +#include "iceberg/expression/literal.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" #include "iceberg/test/matchers.h" @@ -82,6 +84,283 @@ TEST_F(UpdateSchemaTest, AddRequiredColumnWithAllowIncompatible) { EXPECT_EQ(new_field.doc(), "A required string column"); } +/// Default values require a v3 table for Apply() to validate successfully. +class UpdateSchemaDefaultValueTest : public UpdateSchemaTest { + protected: + std::string MetadataResource() const override { return "TableMetadataV3Valid.json"; } +}; + +TEST_F(UpdateSchemaTest, AddColumnWithDefaultValueRequiresV3) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)); + + auto result = update->Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidSchema)); + EXPECT_THAT(result, HasErrorMessage("is not supported until v3")); +} + +TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithDefaultValue) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(new_field_opt.has_value()); + + const auto& new_field = new_field_opt->get(); + ASSERT_NE(new_field.initial_default(), nullptr); + EXPECT_EQ(*new_field.initial_default(), Literal::Int(42)); + ASSERT_NE(new_field.write_default(), nullptr); + EXPECT_EQ(*new_field.write_default(), Literal::Int(42)); +} + +TEST_F(UpdateSchemaDefaultValueTest, AddRequiredColumnWithDefaultValue) { + // A required column with a default does not need AllowIncompatibleChanges(): + // old rows read the initial-default instead of null. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddRequiredColumn("required_col", string(), "A required string column", + Literal::String("n/a")); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, + result.schema->FindFieldByName("required_col")); + ASSERT_TRUE(new_field_opt.has_value()); + + const auto& new_field = new_field_opt->get(); + EXPECT_FALSE(new_field.optional()); + ASSERT_NE(new_field.initial_default(), nullptr); + EXPECT_EQ(*new_field.initial_default(), Literal::String("n/a")); + ASSERT_NE(new_field.write_default(), nullptr); + EXPECT_EQ(*new_field.write_default(), Literal::String("n/a")); +} + +TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithMismatchedDefaultValueFails) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::String("oops")); + + auto result = update->Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Cannot cast default value")); +} + +TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithNarrowingDefaultValueFails) { + // CastTo signals narrowing with AboveMax/BelowMin sentinels; they must not be + // stored as defaults. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", + Literal::Long(std::numeric_limits::max())); + + auto result = update->Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Cannot cast default value")); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefault) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)) + .UpdateColumnDefault("new_col", Literal::Int(7)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(new_field_opt.has_value()); + + const auto& new_field = new_field_opt->get(); + // initial-default is fixed at column addition; write-default is updated. + ASSERT_NE(new_field.initial_default(), nullptr); + EXPECT_EQ(*new_field.initial_default(), Literal::Int(42)); + ASSERT_NE(new_field.write_default(), nullptr); + EXPECT_EQ(*new_field.write_default(), Literal::Int(7)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultOnExistingColumn) { + // Updating the write-default of a pre-existing column must survive Apply(). + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->UpdateColumnDefault("x", Literal::Long(0)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("x")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + EXPECT_EQ(field.initial_default(), nullptr); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Long(0)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultClearsWithNullopt) { + // Passing std::nullopt removes the write-default (Java parity with null). + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)) + .UpdateColumnDefault("new_col", std::nullopt); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + // initial-default stays; write-default is cleared. + ASSERT_NE(field.initial_default(), nullptr); + EXPECT_EQ(*field.initial_default(), Literal::Int(42)); + EXPECT_EQ(field.write_default(), nullptr); +} + +TEST_F(UpdateSchemaDefaultValueTest, AddNestedColumnPreservesNestedDefaults) { + // The added column's type gets fresh field ids; defaults on its nested fields must + // survive the reassignment. + auto nested_type = std::make_shared(std::vector{ + SchemaField(/*field_id=*/100, "inner", int32(), /*optional=*/false, /*doc=*/{}, + std::make_shared(Literal::Int(5)), + std::make_shared(Literal::Int(9)))}); + + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("outer", nested_type, "A nested column"); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto outer_opt, result.schema->FindFieldByName("outer")); + ASSERT_TRUE(outer_opt.has_value()); + + const auto& outer_struct = + internal::checked_cast(*outer_opt->get().type()); + ASSERT_EQ(outer_struct.fields().size(), 1); + const SchemaField& inner = outer_struct.fields()[0]; + ASSERT_NE(inner.initial_default(), nullptr); + EXPECT_EQ(*inner.initial_default(), Literal::Int(5)); + ASSERT_NE(inner.write_default(), nullptr); + EXPECT_EQ(*inner.write_default(), Literal::Int(9)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultCastsToColumnType) { + // An int default for a long column is cast to the column type, not rejected. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->UpdateColumnDefault("x", Literal::Int(5)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("x")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Long(5)); +} + +TEST_F(UpdateSchemaDefaultValueTest, RequireColumnAddedWithDefault) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)) + .RequireColumn("new_col"); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(new_field_opt.has_value()); + EXPECT_FALSE(new_field_opt->get().optional()); +} + +TEST_F(UpdateSchemaDefaultValueTest, RequireNestedMapListColumnAddedWithDefault) { + // A field added into a map value-struct or list element-struct is addressed by its + // user-facing short name ("locations.alt", "points.z"), which drops the synthetic + // "value"/"element" segment. Making such a just-added, defaulted column required must + // resolve that short name and not require AllowIncompatibleChanges(). + auto map_key_struct = std::make_shared( + std::vector{SchemaField(20, "address", string(), false)}); + auto map_value_struct = std::make_shared( + std::vector{SchemaField(12, "lat", float32(), false), + SchemaField(13, "long", float32(), false)}); + auto map_type = + std::make_shared(SchemaField(10, "key", map_key_struct, false), + SchemaField(11, "value", map_value_struct, false)); + auto list_element_struct = std::make_shared(std::vector{ + SchemaField(15, "x", int64(), false), SchemaField(16, "y", int64(), false)}); + auto list_type = + std::make_shared(SchemaField(14, "element", list_element_struct, true)); + + ICEBERG_UNWRAP_OR_FAIL(auto setup_update, table_->NewUpdateSchema()); + setup_update->AddColumn("locations", map_type, "map of address to coordinate") + .AddColumn("points", list_type, "2-D cartesian points"); + EXPECT_THAT(setup_update->Commit(), IsOk()); + + ICEBERG_UNWRAP_OR_FAIL(auto reloaded, catalog_->LoadTable(table_ident_)); + ICEBERG_UNWRAP_OR_FAIL(auto update, reloaded->NewUpdateSchema()); + update->AddColumn("locations", "alt", float32(), "altitude", Literal::Float(0.0f)) + .RequireColumn("locations.alt") + .AddColumn("points", "z", int64(), "z coordinate", Literal::Long(0)) + .RequireColumn("points.z"); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + + ICEBERG_UNWRAP_OR_FAIL(auto locations_opt, result.schema->FindFieldByName("locations")); + ASSERT_TRUE(locations_opt.has_value()); + const auto& map = checked_cast(*locations_opt->get().type()); + const auto& value_struct = checked_cast(*map.value().type()); + ICEBERG_UNWRAP_OR_FAIL(auto alt_opt, value_struct.GetFieldByName("alt")); + ASSERT_TRUE(alt_opt.has_value()); + EXPECT_FALSE(alt_opt->get().optional()); + ASSERT_NE(alt_opt->get().initial_default(), nullptr); + + ICEBERG_UNWRAP_OR_FAIL(auto points_opt, result.schema->FindFieldByName("points")); + ASSERT_TRUE(points_opt.has_value()); + const auto& list = checked_cast(*points_opt->get().type()); + const auto& element_struct = checked_cast(*list.element().type()); + ICEBERG_UNWRAP_OR_FAIL(auto z_opt, element_struct.GetFieldByName("z")); + ASSERT_TRUE(z_opt.has_value()); + EXPECT_FALSE(z_opt->get().optional()); + ASSERT_NE(z_opt->get().initial_default(), nullptr); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDocPreservesDefaultValues) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)) + .UpdateColumnDoc("new_col", "updated doc"); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + EXPECT_EQ(field.doc(), "updated doc"); + ASSERT_NE(field.initial_default(), nullptr); + EXPECT_EQ(*field.initial_default(), Literal::Int(42)); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Int(42)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnTypePromotesDefaultValues) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42)) + .UpdateColumn("new_col", int64()); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + EXPECT_EQ(field.type(), int64()); + ASSERT_NE(field.initial_default(), nullptr); + EXPECT_EQ(*field.initial_default(), Literal::Long(42)); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Long(42)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnTypePromotesDecimalDefault) { + // decimal(9,2) -> decimal(18,2) is an allowed precision widening. Literal::CastTo + // does not cast between decimal types, so the default must still be promoted (the + // unscaled value is unchanged). + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update + ->AddColumn("new_col", decimal(9, 2), "A decimal column", + Literal::Decimal(1234, 9, 2)) + .UpdateColumn("new_col", decimal(18, 2)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + EXPECT_EQ(field.type()->ToString(), decimal(18, 2)->ToString()); + ASSERT_NE(field.initial_default(), nullptr); + EXPECT_EQ(*field.initial_default(), Literal::Decimal(1234, 18, 2)); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2)); +} + TEST_F(UpdateSchemaTest, AddMultipleColumns) { ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); update->AddColumn("col1", int32(), "First column") diff --git a/src/iceberg/update/update_schema.cc b/src/iceberg/update/update_schema.cc index 5c50ee41d..836a4f2f2 100644 --- a/src/iceberg/update/update_schema.cc +++ b/src/iceberg/update/update_schema.cc @@ -29,6 +29,7 @@ #include #include +#include "iceberg/expression/literal.h" #include "iceberg/json_serde_internal.h" #include "iceberg/name_mapping.h" #include "iceberg/schema.h" @@ -39,6 +40,7 @@ #include "iceberg/type.h" #include "iceberg/util/checked_cast.h" #include "iceberg/util/error_collector.h" +#include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" #include "iceberg/util/string_util.h" #include "iceberg/util/type_util.h" @@ -213,12 +215,17 @@ class ApplyChangesVisitor { // any child fields that were added. if (update_it != updates_.end()) { + // The updated field already carries the new name/doc/nullability/defaults; only + // its id and (recursively rewritten) type need to be applied. const auto& update_field = update_it->second; return SchemaField(field_id, update_field->name(), std::move(result_type), - update_field->optional(), update_field->doc()); + update_field->optional(), update_field->doc(), + update_field->initial_default(), update_field->write_default()); } else if (result_type != field.type()) { + // A nested type was rewritten by recursion; preserve every other attribute, + // including any default values. return SchemaField(field_id, field.name(), std::move(result_type), field.optional(), - field.doc()); + field.doc(), field.initial_default(), field.write_default()); } else { return field; } @@ -345,39 +352,43 @@ UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) { } UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr type, - std::string_view doc) { + std::string_view doc, + std::optional default_value) { ICEBERG_BUILDER_CHECK(!name.contains('.'), "Cannot add column with ambiguous name: {}, use " "AddColumn(parent, name, type, doc)", name); - return AddColumnInternal(std::nullopt, name, /*is_optional=*/true, std::move(type), - doc); + return AddColumnInternal(std::nullopt, name, /*is_optional=*/true, std::move(type), doc, + std::move(default_value)); } UpdateSchema& UpdateSchema::AddColumn(std::optional parent, std::string_view name, std::shared_ptr type, - std::string_view doc) { + std::string_view doc, + std::optional default_value) { return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type), - doc); + doc, std::move(default_value)); } UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name, std::shared_ptr type, - std::string_view doc) { + std::string_view doc, + std::optional default_value) { ICEBERG_BUILDER_CHECK(!name.contains('.'), "Cannot add column with ambiguous name: {}, use " "AddRequiredColumn(parent, name, type, doc)", name); return AddColumnInternal(std::nullopt, name, /*is_optional=*/false, std::move(type), - doc); + doc, std::move(default_value)); } UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional parent, std::string_view name, std::shared_ptr type, - std::string_view doc) { + std::string_view doc, + std::optional default_value) { return AddColumnInternal(std::move(parent), name, /*is_optional=*/false, - std::move(type), doc); + std::move(type), doc, std::move(default_value)); } UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name, @@ -399,8 +410,36 @@ UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name, "Cannot change column type: {}: {} -> {}", name, field.type()->ToString(), new_type->ToString()); + // The SchemaField ctor stores defaults verbatim without coercing them to the field + // type, so an existing default must be promoted to the new type here; otherwise it + // would keep the old type and be rejected when the schema is validated. + auto promote_default = [&](const std::shared_ptr& value) + -> Result> { + // A null pointer means the column has no such default; nothing to promote. + if (value == nullptr) { + return nullptr; + } + // IsPromotionAllowed permits widening a decimal to a larger precision at the same + // scale, but Literal::CastTo only handles identical decimal types. Such a promotion + // leaves the unscaled value unchanged, so rebuild the literal with the new type. + if (value->type()->type_id() == TypeId::kDecimal && + new_type->type_id() == TypeId::kDecimal) { + const auto& decimal_type = internal::checked_cast(*new_type); + return std::make_shared( + Literal::Decimal(std::get(value->value()).value(), + decimal_type.precision(), decimal_type.scale())); + } + ICEBERG_ASSIGN_OR_RAISE(Literal promoted, value->CastTo(new_type)); + return std::make_shared(std::move(promoted)); + }; + ICEBERG_BUILDER_ASSIGN_OR_RETURN(std::shared_ptr initial_default, + promote_default(field.initial_default())); + ICEBERG_BUILDER_ASSIGN_OR_RETURN(std::shared_ptr write_default, + promote_default(field.write_default())); + updates_[field_id] = std::make_shared( - field.field_id(), field.name(), new_type, field.optional(), field.doc()); + field.field_id(), field.name(), new_type, field.optional(), field.doc(), + std::move(initial_default), std::move(write_default)); return *this; } @@ -420,9 +459,52 @@ UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name, return *this; } - updates_[field_id] = - std::make_shared(field.field_id(), field.name(), field.type(), - field.optional(), std::string(new_doc)); + updates_[field_id] = std::make_shared( + field.field_id(), field.name(), field.type(), field.optional(), new_doc, + field.initial_default(), field.write_default()); + + return *this; +} + +UpdateSchema& UpdateSchema::UpdateColumnDefault(std::string_view name, + std::optional new_default) { + ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto field_opt, FindFieldForUpdate(name)); + ICEBERG_BUILDER_CHECK(field_opt.has_value(), "Cannot update missing column: {}", name); + + const auto& field = field_opt->get(); + int32_t field_id = field.field_id(); + + ICEBERG_BUILDER_CHECK(!deletes_.contains(field_id), + "Cannot update a column that will be deleted: {}", field.name()); + + // Only the write-default is replaced; the initial-default is fixed when the column is + // added and is preserved here. + auto rebuild = [&](std::shared_ptr write_default) { + return std::make_shared( + field.field_id(), field.name(), field.type(), field.optional(), field.doc(), + field.initial_default(), std::move(write_default)); + }; + + // An empty default clears the column's write-default. + if (!new_default.has_value()) { + updates_[field_id] = rebuild(nullptr); + return *this; + } + + ICEBERG_BUILDER_CHECK(field.type()->is_primitive(), + "Invalid default value for {}: {} (must be null)", *field.type(), + *new_default); + ICEBERG_BUILDER_ASSIGN_OR_RETURN_WITH_ERROR( + Literal typed_default, + new_default->CastTo(internal::checked_pointer_cast(field.type())), + "Cannot cast default value to {}: {}", *field.type(), *new_default); + // CastTo reports narrowing by returning sentinel values instead of failing. + ICEBERG_BUILDER_CHECK(!typed_default.IsNull() && !typed_default.IsAboveMax() && + !typed_default.IsBelowMin(), + "Cannot cast default value to {}: {}", *field.type(), + *new_default); + + updates_[field_id] = rebuild(std::make_shared(std::move(typed_default))); return *this; } @@ -444,8 +526,8 @@ UpdateSchema& UpdateSchema::RenameColumn(std::string_view name, update_it != updates_.end() ? *update_it->second : field; updates_[field_id] = std::make_shared( - base_field.field_id(), std::string(new_name), base_field.type(), - base_field.optional(), base_field.doc()); + base_field.field_id(), new_name, base_field.type(), base_field.optional(), + base_field.doc(), base_field.initial_default(), base_field.write_default()); auto it = std::ranges::find(identifier_field_names_, name); if (it != identifier_field_names_.end()) { @@ -474,9 +556,10 @@ UpdateSchema& UpdateSchema::UpdateColumnRequirementInternal(std::string_view nam return *this; } - // TODO(GuotaoYu): support added column with default value - // bool is_defaulted_add = IsAdded(name) && field.initial_default() != null; - bool is_defaulted_add = false; + // A column added in this update with a default value can be made required: rows + // written before the change read the initial-default instead of null. + bool is_defaulted_add = added_name_to_id_.contains(CaseSensitivityAwareName(name)) && + field.initial_default() != nullptr; ICEBERG_BUILDER_CHECK(is_optional || is_defaulted_add || allow_incompatible_changes_, "Cannot change column nullability: {}: optional -> required", @@ -625,13 +708,17 @@ Result UpdateSchema::Apply() { .updated_props = std::move(updated_props)}; } -// TODO(Guotao Yu): v3 default value is not yet supported UpdateSchema& UpdateSchema::AddColumnInternal(std::optional parent, std::string_view name, bool is_optional, std::shared_ptr type, - std::string_view doc) { + std::string_view doc, + std::optional default_value) { int32_t parent_id = kTableRootId; std::string full_name; + // User-facing name for the added column. For map/list parents this drops the + // synthetic "value"/"element" segment that full_name carries, so lookups keyed on + // the spelling a caller uses (e.g. RequireColumn("locations.alt")) can resolve it. + std::string short_name; if (parent.has_value()) { ICEBERG_BUILDER_CHECK(!parent->empty(), "Parent name cannot be empty"); @@ -661,8 +748,8 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional pa ICEBERG_BUILDER_CHECK(!deletes_.contains(parent_id), "Cannot add to a column that will be deleted: {}", *parent); - auto current_name = std::format("{}.{}", *parent, name); - ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field, FindField(current_name)); + short_name = std::format("{}.{}", *parent, name); + ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field, FindField(short_name)); ICEBERG_BUILDER_CHECK( !current_field.has_value() || deletes_.contains(current_field->get().field_id()), "Cannot add column, name already exists: {}.{}", *parent, name); @@ -680,16 +767,21 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional pa "Cannot add column, name already exists: {}", name); full_name = std::string(name); + short_name = full_name; } ICEBERG_BUILDER_CHECK( - is_optional || allow_incompatible_changes_, + is_optional || default_value.has_value() || allow_incompatible_changes_, "Incompatible change: cannot add required column without a default value: {}", full_name); int32_t new_id = AssignNewColumnId(); added_name_to_id_[CaseSensitivityAwareName(full_name)] = new_id; + // Also index the user-facing short name (differs from full_name only for map/list + // parents) so later operations in this update that address the column by its + // shorthand path resolve it. + added_name_to_id_[CaseSensitivityAwareName(short_name)] = new_id; if (parent_id != kTableRootId) { id_to_parent_[new_id] = parent_id; } @@ -697,11 +789,32 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional pa AssignFreshIdVisitor id_assigner([this]() { return AssignNewColumnId(); }); auto type_with_fresh_ids = id_assigner.Visit(type); - auto new_field = std::make_shared(new_id, std::string(name), - std::move(type_with_fresh_ids), - is_optional, std::string(doc)); + std::shared_ptr initial_default; + std::shared_ptr write_default; + if (default_value.has_value()) { + ICEBERG_BUILDER_CHECK(type_with_fresh_ids->is_primitive(), + "Invalid default value for {}: {} (must be null)", + *type_with_fresh_ids, *default_value); + ICEBERG_BUILDER_ASSIGN_OR_RETURN_WITH_ERROR( + Literal typed_default, + default_value->CastTo( + internal::checked_pointer_cast(type_with_fresh_ids)), + "Cannot cast default value to {}: {}", *type_with_fresh_ids, *default_value); + // CastTo reports narrowing by returning sentinel values instead of failing. + ICEBERG_BUILDER_CHECK(!typed_default.IsNull() && !typed_default.IsAboveMax() && + !typed_default.IsBelowMin(), + "Cannot cast default value to {}: {}", *type_with_fresh_ids, + *default_value); + // A new column's default applies both to rows written before it existed + // (initial-default) and to writers that omit a value (write-default). + auto shared_default = std::make_shared(std::move(typed_default)); + initial_default = shared_default; + write_default = std::move(shared_default); + } - updates_[new_id] = std::move(new_field); + updates_[new_id] = std::make_shared( + new_id, name, std::move(type_with_fresh_ids), is_optional, doc, + std::move(initial_default), std::move(write_default)); parent_to_added_ids_[parent_id].push_back(new_id); return *this; diff --git a/src/iceberg/update/update_schema.h b/src/iceberg/update/update_schema.h index 2be3732a0..e2e7fce1f 100644 --- a/src/iceberg/update/update_schema.h +++ b/src/iceberg/update/update_schema.h @@ -30,6 +30,7 @@ #include #include +#include "iceberg/expression/literal.h" #include "iceberg/iceberg_export.h" #include "iceberg/result.h" #include "iceberg/type_fwd.h" @@ -41,10 +42,6 @@ namespace iceberg { /// /// When committing, these changes will be applied to the current table metadata. /// Commit conflicts will not be resolved and will result in a CommitFailed error. -/// -/// TODO(Guotao Yu): Add support for V3 default values when adding columns. Currently, all -/// added columns use null as the default value, but Iceberg V3 supports custom -/// default values for new columns. class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { public: static Result> Make( @@ -78,15 +75,19 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// If type is a nested type, its field IDs are reassigned when added to the /// existing schema. /// - /// The added column will be optional with a null default value. + /// Without a default value, the added column is optional with a null default. + /// With a default value (v3+), it is used as both the `initial-default` and the + /// `write-default` of the new column. /// /// \param name Name for the new column. /// \param type Type for the new column. /// \param doc Documentation string for the new column. + /// \param default_value Optional default value for the new column (v3+). /// \return Reference to this for method chaining. /// \note InvalidArgument will be reported if name contains ".". UpdateSchema& AddColumn(std::string_view name, std::shared_ptr type, - std::string_view doc = ""); + std::string_view doc = "", + std::optional default_value = std::nullopt); /// \brief Add a new optional column to a nested struct with documentation. /// @@ -102,22 +103,28 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// If type is a nested type, its field IDs are reassigned when added to the /// existing schema. /// - /// The added column will be optional with a null default value. + /// Without a default value, the added column is optional with a null default. + /// With a default value (v3+), it is used as both the `initial-default` and the + /// `write-default` of the new column. /// /// \param parent Name of the parent struct to which the column will be added. /// \param name Name for the new column. /// \param type Type for the new column. /// \param doc Documentation string for the new column. + /// \param default_value Optional default value for the new column (v3+). /// \return Reference to this for method chaining. /// \note InvalidArgument will be reported if parent doesn't identify a struct. UpdateSchema& AddColumn(std::optional parent, std::string_view name, - std::shared_ptr type, std::string_view doc = ""); + std::shared_ptr type, std::string_view doc = "", + std::optional default_value = std::nullopt); /// \brief Add a new required top-level column with documentation. /// /// Adding a required column without a default is an incompatible change that can /// break reading older data. To suppress exceptions thrown when an incompatible - /// change is detected, call AllowIncompatibleChanges(). + /// change is detected, call AllowIncompatibleChanges() or provide a default value + /// (v3+), which is used as both the `initial-default` and the `write-default` of + /// the new column. /// /// Because "." may be interpreted as a column path separator or may be used in /// field names, it is not allowed in names passed to this method. To add to nested @@ -130,16 +137,20 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// \param name Name for the new column. /// \param type Type for the new column. /// \param doc Documentation string for the new column. + /// \param default_value Optional default value for the new column (v3+). /// \return Reference to this for method chaining. /// \note InvalidArgument will be reported if name contains ".". UpdateSchema& AddRequiredColumn(std::string_view name, std::shared_ptr type, - std::string_view doc = ""); + std::string_view doc = "", + std::optional default_value = std::nullopt); /// \brief Add a new required column to a nested struct with documentation. /// /// Adding a required column without a default is an incompatible change that can /// break reading older data. To suppress exceptions thrown when an incompatible - /// change is detected, call AllowIncompatibleChanges(). + /// change is detected, call AllowIncompatibleChanges() or provide a default value + /// (v3+), which is used as both the `initial-default` and the `write-default` of + /// the new column. /// /// The parent name is used to find the parent using Schema::FindFieldByName(). If /// the parent name is null or empty, the new column will be added to the root as a @@ -157,11 +168,13 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// \param name Name for the new column. /// \param type Type for the new column. /// \param doc Documentation string for the new column. + /// \param default_value Optional default value for the new column (v3+). /// \return Reference to this for method chaining. /// \note InvalidArgument will be reported if parent doesn't identify a struct. UpdateSchema& AddRequiredColumn(std::optional parent, std::string_view name, std::shared_ptr type, - std::string_view doc = ""); + std::string_view doc = "", + std::optional default_value = std::nullopt); /// \brief Rename a column in the schema. /// @@ -210,6 +223,21 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// the column will be deleted. UpdateSchema& UpdateColumnDoc(std::string_view name, std::string_view new_doc); + /// \brief Update the `write-default` value for a column (v3+). + /// + /// The name is used to find the column to update using Schema::FindFieldByName(). + /// The column's `initial-default` is not changed: it is fixed when the column is + /// added and applies to rows that predate the column. + /// + /// \param name Name of the column to update the default value for. + /// \param new_default Replacement `write-default` value for the column, or + /// `std::nullopt` to clear it. + /// \return Reference to this for method chaining. + /// \note InvalidArgument will be reported if name doesn't identify a column in the + /// schema or if the column will be deleted. + UpdateSchema& UpdateColumnDefault(std::string_view name, + std::optional new_default); + /// \brief Update a column to be optional. /// /// \param name Name of the column to mark optional. @@ -364,10 +392,12 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate { /// \param is_optional Whether the column is optional. /// \param type Type for the new column. /// \param doc Optional documentation string. + /// \param default_value Optional default value for the new column (v3+). /// \return Reference to this for method chaining. UpdateSchema& AddColumnInternal(std::optional parent, std::string_view name, bool is_optional, - std::shared_ptr type, std::string_view doc); + std::shared_ptr type, std::string_view doc, + std::optional default_value); /// \brief Internal implementation for updating column requirement (optional/required). /// diff --git a/src/iceberg/util/type_util.cc b/src/iceberg/util/type_util.cc index 1f1e02747..6656c5636 100644 --- a/src/iceberg/util/type_util.cc +++ b/src/iceberg/util/type_util.cc @@ -385,9 +385,9 @@ std::shared_ptr AssignFreshIdVisitor::Visit(const StructType& type) std::vector fresh_fields; for (size_t i = 0; i < type.fields().size(); ++i) { const auto& field = type.fields()[i]; - fresh_fields.emplace_back(fresh_ids[i], std::string(field.name()), - Visit(field.type()), field.optional(), - std::string(field.doc())); + fresh_fields.emplace_back( + fresh_ids[i], std::string(field.name()), Visit(field.type()), field.optional(), + std::string(field.doc()), field.initial_default(), field.write_default()); } return std::make_shared(std::move(fresh_fields)); } @@ -397,7 +397,8 @@ std::shared_ptr AssignFreshIdVisitor::Visit(const ListType& type) cons int32_t fresh_id = next_id_(); SchemaField fresh_elem_field(fresh_id, std::string(elem_field.name()), Visit(elem_field.type()), elem_field.optional(), - std::string(elem_field.doc())); + std::string(elem_field.doc()), + elem_field.initial_default(), elem_field.write_default()); return std::make_shared(std::move(fresh_elem_field)); } @@ -410,10 +411,12 @@ std::shared_ptr AssignFreshIdVisitor::Visit(const MapType& type) const SchemaField fresh_key_field(fresh_key_id, std::string(key_field.name()), Visit(key_field.type()), key_field.optional(), - std::string(key_field.doc())); - SchemaField fresh_value_field(fresh_value_id, std::string(value_field.name()), - Visit(value_field.type()), value_field.optional(), - std::string(value_field.doc())); + std::string(key_field.doc()), key_field.initial_default(), + key_field.write_default()); + SchemaField fresh_value_field( + fresh_value_id, std::string(value_field.name()), Visit(value_field.type()), + value_field.optional(), std::string(value_field.doc()), + value_field.initial_default(), value_field.write_default()); return std::make_shared(std::move(fresh_key_field), std::move(fresh_value_field)); } From 1de257929269d2c5e641f51bf2fddd00c9542cd6 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Wed, 1 Jul 2026 20:42:04 -0700 Subject: [PATCH 2/4] share decimal default promotion across add/update column default Extract the decimal same-scale precision-widening logic into a shared CastDefaultToType helper and use it in UpdateColumn, UpdateColumnDefault, and AddColumnInternal. Previously only UpdateColumn handled it, so setting or adding a decimal default whose precision differs from the column (e.g. Decimal(_,9,2) into a decimal(18,2) column) was wrongly rejected. --- src/iceberg/test/update_schema_test.cc | 35 ++++++++++++++++++++++++ src/iceberg/update/update_schema.cc | 37 ++++++++++++++++---------- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/iceberg/test/update_schema_test.cc b/src/iceberg/test/update_schema_test.cc index 23102594a..94b46ec26 100644 --- a/src/iceberg/test/update_schema_test.cc +++ b/src/iceberg/test/update_schema_test.cc @@ -361,6 +361,41 @@ TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnTypePromotesDecimalDefault) { EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2)); } +TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithWiderPrecisionDecimalDefault) { + // A decimal default whose precision differs from the column (same scale) is accepted: + // Literal::CastTo does not cast between decimal types, so the default is rebuilt at the + // column's precision instead of being rejected. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", decimal(18, 2), "A decimal column", + Literal::Decimal(1234, 9, 2)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + ASSERT_NE(field.initial_default(), nullptr); + EXPECT_EQ(*field.initial_default(), Literal::Decimal(1234, 18, 2)); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2)); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultWiderPrecisionDecimal) { + // UpdateColumnDefault accepts a decimal default whose precision differs from the + // column (same scale), rebuilding it at the column's precision. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", decimal(18, 2), "A decimal column") + .UpdateColumnDefault("new_col", Literal::Decimal(1234, 9, 2)); + + ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply()); + ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col")); + ASSERT_TRUE(field_opt.has_value()); + + const auto& field = field_opt->get(); + ASSERT_NE(field.write_default(), nullptr); + EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2)); +} + TEST_F(UpdateSchemaTest, AddMultipleColumns) { ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); update->AddColumn("col1", int32(), "First column") diff --git a/src/iceberg/update/update_schema.cc b/src/iceberg/update/update_schema.cc index 836a4f2f2..a5f378ddb 100644 --- a/src/iceberg/update/update_schema.cc +++ b/src/iceberg/update/update_schema.cc @@ -289,6 +289,24 @@ std::vector ApplyChangesVisitor::MoveFields( return reordered; } +/// \brief Cast a default value literal to a target primitive type. +/// +/// `Literal::CastTo` only returns a value for identical decimal types, but schema +/// evolution permits widening a decimal to a larger precision at the same scale. Such a +/// promotion leaves the unscaled value unchanged, so the literal is rebuilt with the +/// target type instead of going through `CastTo`. All other conversions delegate to +/// `CastTo`, which reports narrowing via AboveMax/BelowMin sentinels that callers reject. +Result CastDefaultToType(const Literal& value, + const std::shared_ptr& target_type) { + if (value.type()->type_id() == TypeId::kDecimal && + target_type->type_id() == TypeId::kDecimal) { + const auto& decimal_type = internal::checked_cast(*target_type); + return Literal::Decimal(std::get(value.value()).value(), + decimal_type.precision(), decimal_type.scale()); + } + return value.CastTo(target_type); +} + } // namespace Result> UpdateSchema::Make( @@ -419,17 +437,7 @@ UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name, if (value == nullptr) { return nullptr; } - // IsPromotionAllowed permits widening a decimal to a larger precision at the same - // scale, but Literal::CastTo only handles identical decimal types. Such a promotion - // leaves the unscaled value unchanged, so rebuild the literal with the new type. - if (value->type()->type_id() == TypeId::kDecimal && - new_type->type_id() == TypeId::kDecimal) { - const auto& decimal_type = internal::checked_cast(*new_type); - return std::make_shared( - Literal::Decimal(std::get(value->value()).value(), - decimal_type.precision(), decimal_type.scale())); - } - ICEBERG_ASSIGN_OR_RAISE(Literal promoted, value->CastTo(new_type)); + ICEBERG_ASSIGN_OR_RAISE(Literal promoted, CastDefaultToType(*value, new_type)); return std::make_shared(std::move(promoted)); }; ICEBERG_BUILDER_ASSIGN_OR_RETURN(std::shared_ptr initial_default, @@ -496,7 +504,8 @@ UpdateSchema& UpdateSchema::UpdateColumnDefault(std::string_view name, *new_default); ICEBERG_BUILDER_ASSIGN_OR_RETURN_WITH_ERROR( Literal typed_default, - new_default->CastTo(internal::checked_pointer_cast(field.type())), + CastDefaultToType(*new_default, + internal::checked_pointer_cast(field.type())), "Cannot cast default value to {}: {}", *field.type(), *new_default); // CastTo reports narrowing by returning sentinel values instead of failing. ICEBERG_BUILDER_CHECK(!typed_default.IsNull() && !typed_default.IsAboveMax() && @@ -797,8 +806,8 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional pa *type_with_fresh_ids, *default_value); ICEBERG_BUILDER_ASSIGN_OR_RETURN_WITH_ERROR( Literal typed_default, - default_value->CastTo( - internal::checked_pointer_cast(type_with_fresh_ids)), + CastDefaultToType(*default_value, internal::checked_pointer_cast( + type_with_fresh_ids)), "Cannot cast default value to {}: {}", *type_with_fresh_ids, *default_value); // CastTo reports narrowing by returning sentinel values instead of failing. ICEBERG_BUILDER_CHECK(!typed_default.IsNull() && !typed_default.IsAboveMax() && From 635c290a97bb5e91803cf451686dcd1d6740e20f Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Thu, 2 Jul 2026 00:48:52 -0700 Subject: [PATCH 3/4] reject decimal default with mismatched scale Guard the decimal default fast path in CastDefaultToType with a same-scale check. Decimal stores only the unscaled value, so rebuilding a decimal(9,3) default as decimal(18,2) would reinterpret 1234 from 1.234 to 12.34. Only same-scale precision widening is accepted; a differing scale falls through to CastTo and is rejected, matching Java's requirement that a decimal default's scale match the column type. --- src/iceberg/test/update_schema_test.cc | 22 ++++++++++++++++++++++ src/iceberg/update/update_schema.cc | 16 +++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/iceberg/test/update_schema_test.cc b/src/iceberg/test/update_schema_test.cc index 94b46ec26..6071353d5 100644 --- a/src/iceberg/test/update_schema_test.cc +++ b/src/iceberg/test/update_schema_test.cc @@ -396,6 +396,28 @@ TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultWiderPrecisionDecimal) { EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2)); } +TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithDifferentScaleDecimalDefaultFails) { + // A decimal default whose scale differs from the column is rejected, not silently + // reinterpreted: the unscaled value only keeps its meaning at the same scale. + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", decimal(18, 2), "A decimal column", + Literal::Decimal(1234, 9, 3)); + + auto result = update->Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Cannot cast default value")); +} + +TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultDifferentScaleDecimalFails) { + ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); + update->AddColumn("new_col", decimal(18, 2), "A decimal column") + .UpdateColumnDefault("new_col", Literal::Decimal(1234, 9, 3)); + + auto result = update->Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Cannot cast default value")); +} + TEST_F(UpdateSchemaTest, AddMultipleColumns) { ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema()); update->AddColumn("col1", int32(), "First column") diff --git a/src/iceberg/update/update_schema.cc b/src/iceberg/update/update_schema.cc index a5f378ddb..c23c7b9e2 100644 --- a/src/iceberg/update/update_schema.cc +++ b/src/iceberg/update/update_schema.cc @@ -292,17 +292,23 @@ std::vector ApplyChangesVisitor::MoveFields( /// \brief Cast a default value literal to a target primitive type. /// /// `Literal::CastTo` only returns a value for identical decimal types, but schema -/// evolution permits widening a decimal to a larger precision at the same scale. Such a +/// evolution permits widening a decimal to a larger precision at the SAME scale. Such a /// promotion leaves the unscaled value unchanged, so the literal is rebuilt with the -/// target type instead of going through `CastTo`. All other conversions delegate to -/// `CastTo`, which reports narrowing via AboveMax/BelowMin sentinels that callers reject. +/// target type instead of going through `CastTo`. A scale change is NOT handled here: the +/// unscaled value only keeps its meaning at the same scale (a decimal default must match +/// the column scale), so differing scales fall through to `CastTo` and are rejected. All +/// other conversions delegate to `CastTo`, which reports narrowing via AboveMax/BelowMin +/// sentinels that callers reject. Result CastDefaultToType(const Literal& value, const std::shared_ptr& target_type) { if (value.type()->type_id() == TypeId::kDecimal && target_type->type_id() == TypeId::kDecimal) { + const auto& source_type = internal::checked_cast(*value.type()); const auto& decimal_type = internal::checked_cast(*target_type); - return Literal::Decimal(std::get(value.value()).value(), - decimal_type.precision(), decimal_type.scale()); + if (source_type.scale() == decimal_type.scale()) { + return Literal::Decimal(std::get(value.value()).value(), + decimal_type.precision(), decimal_type.scale()); + } } return value.CastTo(target_type); } From 4ff0974151ab73443c456801e8dc7ff4b1b441c2 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Thu, 2 Jul 2026 08:17:12 -0700 Subject: [PATCH 4/4] Document CastDefaultToType null-pointer precondition --- src/iceberg/update/update_schema.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/iceberg/update/update_schema.cc b/src/iceberg/update/update_schema.cc index c23c7b9e2..dfded51fd 100644 --- a/src/iceberg/update/update_schema.cc +++ b/src/iceberg/update/update_schema.cc @@ -299,6 +299,10 @@ std::vector ApplyChangesVisitor::MoveFields( /// the column scale), so differing scales fall through to `CastTo` and are rejected. All /// other conversions delegate to `CastTo`, which reports narrowing via AboveMax/BelowMin /// sentinels that callers reject. +/// +/// \pre `target_type` is non-null; every caller passes a resolved field type. `value` is +/// the caller's dereferenced default, so a missing default must be filtered out before +/// this is reached. Result CastDefaultToType(const Literal& value, const std::shared_ptr& target_type) { if (value.type()->type_id() == TypeId::kDecimal &&