2020#include " iceberg/table_metadata.h"
2121
2222#include < algorithm>
23+ #include < atomic>
2324#include < charconv>
2425#include < chrono>
2526#include < cstdint>
3637#include " iceberg/exception.h"
3738#include " iceberg/file_io.h"
3839#include " iceberg/json_internal.h"
40+ #include " iceberg/metrics_config.h"
3941#include " iceberg/partition_spec.h"
4042#include " iceberg/result.h"
4143#include " iceberg/schema.h"
4749#include " iceberg/util/gzip_internal.h"
4850#include " iceberg/util/location_util.h"
4951#include " iceberg/util/macros.h"
52+ #include " iceberg/util/property_util.h"
53+ #include " iceberg/util/type_util.h"
5054#include " iceberg/util/uuid.h"
5155namespace iceberg {
5256namespace {
5357const TimePointMs kInvalidLastUpdatedMs = TimePointMs::min();
5458constexpr int32_t kLastAdded = -1 ;
5559constexpr std::string_view kMetadataFolderName = " metadata" ;
60+
61+ // TableMetadata private static methods
62+ Result<std::shared_ptr<PartitionSpec>> FreshPartitionSpec (
63+ int32_t spec_id, const PartitionSpec& spec, const Schema& base_schema,
64+ const Schema& fresh_schema, std::function<int32_t ()> next_id) {
65+ std::vector<PartitionField> partition_fields;
66+ for (auto & field : spec.fields ()) {
67+ ICEBERG_ASSIGN_OR_RAISE (auto source_name,
68+ base_schema.FindColumnNameById (field.source_id ()));
69+ int32_t source_id;
70+ if (!source_name.has_value ()) {
71+ // In the case of a source field not found, the column has been deleted.
72+ // This only happens in V1 tables where the reference is still around as a void
73+ // transform
74+ source_id = field.source_id ();
75+ } else {
76+ ICEBERG_ASSIGN_OR_RAISE (auto fresh_field,
77+ fresh_schema.FindFieldByName (source_name.value ()));
78+ if (!fresh_field.has_value ()) [[unlikely]] {
79+ return InvalidSchema (" Partition field {} does not exist in the schema" ,
80+ source_name.value ());
81+ }
82+ source_id = fresh_field.value ().get ().field_id ();
83+ }
84+ partition_fields.emplace_back (source_id, next_id ? next_id () : field.field_id (),
85+ std::string (field.name ()), field.transform ());
86+ }
87+ return PartitionSpec::Make (fresh_schema, spec_id, std::move (partition_fields), false );
88+ }
89+
90+ Result<std::shared_ptr<SortOrder>> FreshSortOrder (int32_t order_id, const Schema& schema,
91+ const SortOrder& order) {
92+ if (order.is_unsorted ()) {
93+ return SortOrder::Unsorted ();
94+ }
95+
96+ std::vector<SortField> fresh_fields;
97+ for (const auto & field : order.fields ()) {
98+ ICEBERG_ASSIGN_OR_RAISE (auto source_name,
99+ schema.FindColumnNameById (field.source_id ()));
100+ if (!source_name.has_value ()) {
101+ return InvalidSchema (" Unable to find source field with ID {} in the old schema" ,
102+ field.source_id ());
103+ }
104+
105+ ICEBERG_ASSIGN_OR_RAISE (auto fresh_field,
106+ schema.FindFieldByName (source_name.value ()));
107+ if (!fresh_field.has_value ()) {
108+ return InvalidSchema (" Unable to find field '{}' in the new schema" ,
109+ source_name.value ());
110+ }
111+
112+ int32_t new_source_id = fresh_field.value ().get ().field_id ();
113+ fresh_fields.emplace_back (new_source_id, field.transform (), field.direction (),
114+ field.null_order ());
115+ }
116+
117+ return SortOrder::Make (order_id, std::move (fresh_fields));
118+ }
56119} // namespace
57120
58121std::string ToString (const SnapshotLogEntry& entry) {
@@ -65,6 +128,53 @@ std::string ToString(const MetadataLogEntry& entry) {
65128 entry.metadata_file );
66129}
67130
131+ Result<std::unique_ptr<TableMetadata>> TableMetadata::Make (
132+ const iceberg::Schema& schema, const iceberg::PartitionSpec& spec,
133+ const iceberg::SortOrder& sort_order, const std::string& location,
134+ const std::unordered_map<std::string, std::string>& properties, int format_version) {
135+ for (const auto & [key, _] : properties) {
136+ if (TableProperties::reserved_properties ().contains (key)) {
137+ return InvalidArgument (
138+ " Table properties should not contain reserved properties, but got {}" , key);
139+ }
140+ }
141+
142+ // Reassign all column ids to ensure consistency
143+ std::atomic<int32_t > last_column_id = 0 ;
144+ auto next_id = [&last_column_id]() -> int32_t { return ++last_column_id; };
145+ ICEBERG_ASSIGN_OR_RAISE (auto fresh_schema,
146+ AssignFreshIds (Schema::kInitialSchemaId , schema, next_id));
147+
148+ // Rebuild the partition spec using the new column ids
149+ std::atomic<int32_t > last_partition_field_id = PartitionSpec::kInvalidPartitionFieldId ;
150+ auto next_partition_field_id = [&last_partition_field_id]() -> int32_t {
151+ return ++last_partition_field_id;
152+ };
153+ ICEBERG_ASSIGN_OR_RAISE (auto fresh_spec,
154+ FreshPartitionSpec (PartitionSpec::kInitialSpecId , spec, schema,
155+ *fresh_schema, next_partition_field_id));
156+
157+ // rebuild the sort order using the new column ids
158+ int32_t fresh_order_id =
159+ sort_order.is_unsorted () ? sort_order.order_id () : SortOrder::kInitialSortOrderId ;
160+ ICEBERG_ASSIGN_OR_RAISE (auto fresh_order,
161+ FreshSortOrder (fresh_order_id, *fresh_schema, sort_order))
162+
163+ // Validata the metrics configuration.
164+ ICEBERG_RETURN_UNEXPECTED (
165+ MetricsConfig::VerifyReferencedColumns (properties, *fresh_schema));
166+
167+ PropertyUtil::ValidateCommitProperties (properties);
168+
169+ return TableMetadataBuilder::BuildFromEmpty (format_version)
170+ ->SetLocation (location)
171+ .SetCurrentSchema (std::move (fresh_schema), last_column_id.load ())
172+ .SetDefaultPartitionSpec (std::move (fresh_spec))
173+ .SetDefaultSortOrder (std::move (fresh_order))
174+ .SetProperties (properties)
175+ .Build ();
176+ }
177+
68178Result<std::shared_ptr<Schema>> TableMetadata::Schema () const {
69179 return SchemaById (current_schema_id);
70180}
@@ -405,6 +515,10 @@ class TableMetadataBuilder::Impl {
405515 const TableMetadata* base () const { return base_; }
406516 const TableMetadata& metadata () const { return metadata_; }
407517
518+ void SetLocation (std::string_view location) {
519+ metadata_.location = std::string (location);
520+ }
521+
408522 void SetMetadataLocation (std::string_view metadata_location) {
409523 metadata_location_ = std::string (metadata_location);
410524 if (base_ != nullptr ) {
@@ -826,7 +940,8 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveProperties(
826940}
827941
828942TableMetadataBuilder& TableMetadataBuilder::SetLocation (std::string_view location) {
829- throw IcebergError (std::format (" {} not implemented" , __FUNCTION__));
943+ impl_->SetLocation (location);
944+ return *this ;
830945}
831946
832947TableMetadataBuilder& TableMetadataBuilder::AddEncryptionKey (
0 commit comments