Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/iceberg/partition_spec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,7 @@ Status PartitionSpec::Validate(const Schema& schema, bool allow_missing_fields)
partition_field);
}
const auto& source_type = source_field.value().get().type();
if (!field_transform->CanTransform(*source_type)) {
return InvalidArgument("Invalid source type {} for transform {}",
source_type->ToString(), field_transform->ToString());
}
ICEBERG_RETURN_UNEXPECTED(field_transform->Validate(source_type));

// The only valid parent types for a PartitionField are StructTypes. This must be
// checked recursively.
Expand Down
5 changes: 1 addition & 4 deletions src/iceberg/sort_order.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,7 @@ Status SortOrder::Validate(const Schema& schema) const {

const auto& source_type = schema_field.value().get().type();

if (!field.transform()->CanTransform(*source_type)) {
return InvalidArgument("Invalid source type {} for transform {}",
source_type->ToString(), field.transform()->ToString());
}
ICEBERG_RETURN_UNEXPECTED(field.transform()->Validate(source_type));
}
return {};
}
Expand Down
16 changes: 16 additions & 0 deletions src/iceberg/test/partition_spec_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,22 @@ TEST(PartitionSpecTest, InvalidTransformForType) {
EXPECT_THAT(result_void, IsOk());
}

TEST(PartitionSpecTest, InvalidParameterizedTransform) {
auto field_id = SchemaField::MakeRequired(1, "id", int32());
auto field_name = SchemaField::MakeRequired(2, "name", string());
Schema schema({field_id, field_name}, Schema::kInitialSchemaId);

PartitionField bucket_field(1, 1000, "id_bucket", Transform::Bucket(0));
auto bucket_result = PartitionSpec::Make(schema, 1, {bucket_field}, false);
EXPECT_THAT(bucket_result, IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(bucket_result, HasErrorMessage("Number of buckets must be positive"));

PartitionField truncate_field(2, 1000, "name_trunc", Transform::Truncate(0));
auto truncate_result = PartitionSpec::Make(schema, 1, {truncate_field}, false);
EXPECT_THAT(truncate_result, IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(truncate_result, HasErrorMessage("Width must be positive"));
}

TEST(PartitionSpecTest, SourceIdNotFound) {
auto field1 = SchemaField::MakeRequired(1, "id", int64());
auto field2 = SchemaField::MakeRequired(2, "ts", timestamp());
Expand Down
15 changes: 15 additions & 0 deletions src/iceberg/test/sort_order_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,21 @@ TEST_F(SortOrderTest, MakeInvalidSortOrderTransformCannotApply) {
EXPECT_THAT(sort_order, HasErrorMessage("Invalid source type"));
}

TEST_F(SortOrderTest, MakeInvalidParameterizedTransform) {
SortField bucket_field(1, Transform::Bucket(0), SortDirection::kAscending,
NullOrder::kFirst);
auto bucket_order = SortOrder::Make(*schema_, 1, std::vector<SortField>{bucket_field});
EXPECT_THAT(bucket_order, IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(bucket_order, HasErrorMessage("Number of buckets must be positive"));

SortField truncate_field(2, Transform::Truncate(0), SortDirection::kAscending,
NullOrder::kFirst);
auto truncate_order =
SortOrder::Make(*schema_, 1, std::vector<SortField>{truncate_field});
EXPECT_THAT(truncate_order, IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(truncate_order, HasErrorMessage("Width must be positive"));
}

TEST_F(SortOrderTest, MakeInvalidSortOrderNonPrimitiveField) {
auto struct_field = std::make_unique<SchemaField>(
4, "struct_field",
Expand Down
4 changes: 3 additions & 1 deletion src/iceberg/test/transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,12 @@ TEST(TransformFromStringTest, PositiveCases) {
}

TEST(TransformFromStringTest, NegativeCases) {
constexpr std::array<std::string_view, 6> invalid_cases = {
constexpr std::array<std::string_view, 8> invalid_cases = {
"bucket", // missing param
"bucket[]", // empty param
"bucket[abc]", // invalid number
"bucket[0]", // invalid number of buckets
"truncate[0]", // invalid width
"unknown", // unsupported transform
"bucket[16", // missing closing bracket
"truncate[1]extra" // extra characters
Expand Down
16 changes: 16 additions & 0 deletions src/iceberg/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "iceberg/expression/term.h"
#include "iceberg/result.h"
#include "iceberg/transform_function.h"
#include "iceberg/transform_internal.h"
#include "iceberg/type.h"
#include "iceberg/util/base64.h"
#include "iceberg/util/checked_cast.h"
Expand Down Expand Up @@ -232,6 +233,19 @@ bool Transform::CanTransform(const Type& source_type) const {
std::unreachable();
}

Status Transform::Validate(const std::shared_ptr<Type>& source_type) const {
if (!source_type) {
return InvalidArgument("Source type cannot be null for transform {}", ToString());
}
// Keep type mismatches as InvalidArgument; Bind reports them as NotSupported.
if (!CanTransform(*source_type)) {
return InvalidArgument("Invalid source type {} for transform {}",
source_type->ToString(), ToString());
}
ICEBERG_RETURN_UNEXPECTED(Bind(source_type));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth a one-line comment on why both CanTransform and Bind are called here: CanTransform keeps type mismatches as InvalidArgument/"Invalid source type", whereas BindMake would report them as NotSupported. Otherwise a future cleanup may drop CanTransform as redundant and silently change the error kind.

return {};
}

bool Transform::PreservesOrder() const {
switch (transform_type_) {
case TransformType::kUnknown:
Expand Down Expand Up @@ -554,9 +568,11 @@ Result<std::shared_ptr<Transform>> TransformFromString(std::string_view transfor
StringUtils::ParseNumber<int32_t>(match[2].str()));

if (type_str == kBucketName) {
ICEBERG_RETURN_UNEXPECTED(internal::ValidateBucketTransformParameter(param));
return Transform::Bucket(param);
}
if (type_str == kTruncateName) {
ICEBERG_RETURN_UNEXPECTED(internal::ValidateTruncateTransformParameter(param));
return Transform::Truncate(param);
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/iceberg/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
/// \return true if this transform can be applied to the type, false otherwise
bool CanTransform(const Type& source_type) const;

/// \brief Validates whether this transform can bind to the given source type.
/// \param source_type The source type to validate against.
/// \return Error status if the transform cannot bind to the source type.
Status Validate(const std::shared_ptr<Type>& source_type) const;

/// \brief Whether the transform preserves the order of values (is monotonic).
bool PreservesOrder() const;

Expand Down
9 changes: 3 additions & 6 deletions src/iceberg/transform_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cassert>

#include "iceberg/expression/literal.h"
#include "iceberg/transform_internal.h"
#include "iceberg/type.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/bucket_util.h"
Expand Down Expand Up @@ -90,9 +91,7 @@ Result<std::unique_ptr<TransformFunction>> BucketTransform::Make(
return NotSupported("{} is not a valid input type for bucket transform",
source_type->ToString());
}
if (num_buckets <= 0) {
return InvalidArgument("Number of buckets must be positive, got {}", num_buckets);
}
ICEBERG_RETURN_UNEXPECTED(internal::ValidateBucketTransformParameter(num_buckets));
return std::make_unique<BucketTransform>(source_type, num_buckets);
}

Expand Down Expand Up @@ -124,9 +123,7 @@ Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
return NotSupported("{} is not a valid input type for truncate transform",
source_type->ToString());
}
if (width <= 0) {
return InvalidArgument("Width must be positive, got {}", width);
}
ICEBERG_RETURN_UNEXPECTED(internal::ValidateTruncateTransformParameter(width));
return std::make_unique<TruncateTransform>(source_type, width);
}

Expand Down
42 changes: 42 additions & 0 deletions src/iceberg/transform_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <cstdint>

#include "iceberg/result.h"

namespace iceberg::internal {

inline Status ValidateBucketTransformParameter(int32_t num_buckets) {
if (num_buckets <= 0) {
return InvalidArgument("Number of buckets must be positive, got {}", num_buckets);
}
return {};
}

inline Status ValidateTruncateTransformParameter(int32_t width) {
if (width <= 0) {
return InvalidArgument("Width must be positive, got {}", width);
}
return {};
}

} // namespace iceberg::internal
Loading