diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 2e9d860a8d6..cba4a0ecd3a 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -3270,7 +3270,7 @@ std::shared_ptr run_end_encoded(std::shared_ptr run_end_type std::shared_ptr sparse_union(FieldVector child_fields, std::vector type_codes) { if (type_codes.empty()) { - type_codes = internal::Iota(static_cast(child_fields.size())); + type_codes = internal::Iota(0, child_fields.size()); } return std::make_shared(std::move(child_fields), std::move(type_codes)); @@ -3278,7 +3278,7 @@ std::shared_ptr sparse_union(FieldVector child_fields, std::shared_ptr dense_union(FieldVector child_fields, std::vector type_codes) { if (type_codes.empty()) { - type_codes = internal::Iota(static_cast(child_fields.size())); + type_codes = internal::Iota(0, child_fields.size()); } return std::make_shared(std::move(child_fields), std::move(type_codes)); } @@ -3310,7 +3310,7 @@ std::shared_ptr sparse_union(const ArrayVector& children, std::vector field_names, std::vector type_codes) { if (type_codes.empty()) { - type_codes = internal::Iota(static_cast(children.size())); + type_codes = internal::Iota(0, children.size()); } auto fields = FieldsFromArraysAndNames(std::move(field_names), children); return sparse_union(std::move(fields), std::move(type_codes)); @@ -3320,7 +3320,7 @@ std::shared_ptr dense_union(const ArrayVector& children, std::vector field_names, std::vector type_codes) { if (type_codes.empty()) { - type_codes = internal::Iota(static_cast(children.size())); + type_codes = internal::Iota(0, children.size()); } auto fields = FieldsFromArraysAndNames(std::move(field_names), children); return dense_union(std::move(fields), std::move(type_codes)); diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index cb17f1ac3fc..e9b1d30e6e7 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2191,6 +2192,36 @@ TEST(TestUnionType, Basics) { ASSERT_EQ(ty6->child_ids(), child_ids2); } +TEST(TestUnionType, MaxTypeCode) { + std::vector> fields; + for (int32_t i = 0; i <= UnionType::kMaxTypeCode; i++) { + fields.push_back(field(std::to_string(i), int32())); + } + + std::vector type_codes(fields.size()); + std::iota(type_codes.begin(), type_codes.end(), 0); + + auto t1 = checked_pointer_cast(dense_union(fields, type_codes)); + ASSERT_EQ(t1->type_codes().size(), UnionType::kMaxTypeCode + 1); + ASSERT_EQ(t1->child_ids().size(), UnionType::kMaxTypeCode + 1); + + auto t2 = checked_pointer_cast(dense_union(fields)); + ASSERT_EQ(t2->type_codes().size(), UnionType::kMaxTypeCode + 1); + ASSERT_EQ(t2->child_ids().size(), UnionType::kMaxTypeCode + 1); + + AssertTypeEqual(*t1, *t2); + + auto t3 = checked_pointer_cast(sparse_union(fields, type_codes)); + ASSERT_EQ(t3->type_codes().size(), UnionType::kMaxTypeCode + 1); + ASSERT_EQ(t3->child_ids().size(), UnionType::kMaxTypeCode + 1); + + auto t4 = checked_pointer_cast(sparse_union(fields)); + ASSERT_EQ(t4->type_codes().size(), UnionType::kMaxTypeCode + 1); + ASSERT_EQ(t4->child_ids().size(), UnionType::kMaxTypeCode + 1); + + AssertTypeEqual(*t3, *t4); +} + TEST(TestDictionaryType, Basics) { auto value_type = int32(); diff --git a/cpp/src/arrow/util/range.h b/cpp/src/arrow/util/range.h index 20553287985..55155b7e34d 100644 --- a/cpp/src/arrow/util/range.h +++ b/cpp/src/arrow/util/range.h @@ -27,15 +27,21 @@ namespace arrow::internal { +/// Create a vector containing the values from start with length elements +template +std::vector Iota(T start, size_t length) { + std::vector result(length); + std::iota(result.begin(), result.end(), start); + return result; +} + /// Create a vector containing the values from start up to stop template std::vector Iota(T start, T stop) { if (start > stop) { return {}; } - std::vector result(static_cast(stop - start)); - std::iota(result.begin(), result.end(), start); - return result; + return Iota(start, static_cast(stop - start)); } /// Create a vector containing the values from 0 up to length diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 5002e805247..cf7ff0c3dec 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -880,7 +880,7 @@ each value. Its physical layout is as follows: * One child array for each type * Types buffer: A buffer of 8-bit signed integers. Each type in the union has a corresponding type id whose values are found in this - buffer. A union with more than 127 possible types can be modeled as + buffer. A union with more than 128 possible types can be modeled as a union of unions. * Offsets buffer: A buffer of signed Int32 values indicating the relative offset into the respective child array for the type in a