Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3270,15 +3270,15 @@ std::shared_ptr<DataType> run_end_encoded(std::shared_ptr<DataType> run_end_type
std::shared_ptr<DataType> sparse_union(FieldVector child_fields,
std::vector<int8_t> type_codes) {
if (type_codes.empty()) {
type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
type_codes = internal::Iota<int8_t>(0, child_fields.size());
}
return std::make_shared<SparseUnionType>(std::move(child_fields),
std::move(type_codes));
}
std::shared_ptr<DataType> dense_union(FieldVector child_fields,
std::vector<int8_t> type_codes) {
if (type_codes.empty()) {
type_codes = internal::Iota(static_cast<int8_t>(child_fields.size()));
type_codes = internal::Iota<int8_t>(0, child_fields.size());
}
return std::make_shared<DenseUnionType>(std::move(child_fields), std::move(type_codes));
}
Expand Down Expand Up @@ -3310,7 +3310,7 @@ std::shared_ptr<DataType> sparse_union(const ArrayVector& children,
std::vector<std::string> field_names,
std::vector<int8_t> type_codes) {
if (type_codes.empty()) {
type_codes = internal::Iota(static_cast<int8_t>(children.size()));
type_codes = internal::Iota<int8_t>(0, children.size());
}
auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
return sparse_union(std::move(fields), std::move(type_codes));
Expand All @@ -3320,7 +3320,7 @@ std::shared_ptr<DataType> dense_union(const ArrayVector& children,
std::vector<std::string> field_names,
std::vector<int8_t> type_codes) {
if (type_codes.empty()) {
type_codes = internal::Iota(static_cast<int8_t>(children.size()));
type_codes = internal::Iota<int8_t>(0, children.size());
}
auto fields = FieldsFromArraysAndNames(std::move(field_names), children);
return dense_union(std::move(fields), std::move(type_codes));
Expand Down
31 changes: 31 additions & 0 deletions cpp/src/arrow/type_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cstdint>
#include <functional>
#include <memory>
#include <numeric>
#include <string>
#include <unordered_set>
#include <vector>
Expand Down Expand Up @@ -2191,6 +2192,36 @@ TEST(TestUnionType, Basics) {
ASSERT_EQ(ty6->child_ids(), child_ids2);
}

TEST(TestUnionType, MaxTypeCode) {
std::vector<std::shared_ptr<Field>> fields;
for (int32_t i = 0; i <= UnionType::kMaxTypeCode; i++) {
fields.push_back(field(std::to_string(i), int32()));
}

std::vector<int8_t> type_codes(fields.size());
std::iota(type_codes.begin(), type_codes.end(), 0);

auto t1 = checked_pointer_cast<UnionType>(dense_union(fields, type_codes));
ASSERT_EQ(t1->type_codes().size(), UnionType::kMaxTypeCode + 1);
ASSERT_EQ(t1->child_ids().size(), UnionType::kMaxTypeCode + 1);

auto t2 = checked_pointer_cast<UnionType>(dense_union(fields));
ASSERT_EQ(t2->type_codes().size(), UnionType::kMaxTypeCode + 1);
ASSERT_EQ(t2->child_ids().size(), UnionType::kMaxTypeCode + 1);

AssertTypeEqual(*t1, *t2);

auto t3 = checked_pointer_cast<UnionType>(sparse_union(fields, type_codes));
ASSERT_EQ(t3->type_codes().size(), UnionType::kMaxTypeCode + 1);
ASSERT_EQ(t3->child_ids().size(), UnionType::kMaxTypeCode + 1);

auto t4 = checked_pointer_cast<UnionType>(sparse_union(fields));
ASSERT_EQ(t4->type_codes().size(), UnionType::kMaxTypeCode + 1);
ASSERT_EQ(t4->child_ids().size(), UnionType::kMaxTypeCode + 1);

AssertTypeEqual(*t3, *t4);
}

TEST(TestDictionaryType, Basics) {
auto value_type = int32();

Expand Down
12 changes: 9 additions & 3 deletions cpp/src/arrow/util/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,21 @@

namespace arrow::internal {

/// Create a vector containing the values from start with length elements
template <typename T>
std::vector<T> Iota(T start, size_t length) {
std::vector<T> result(length);
std::iota(result.begin(), result.end(), start);
return result;
}

/// Create a vector containing the values from start up to stop
template <typename T>
std::vector<T> Iota(T start, T stop) {
if (start > stop) {
return {};
}
std::vector<T> result(static_cast<size_t>(stop - start));
std::iota(result.begin(), result.end(), start);
return result;
return Iota<T>(start, static_cast<size_t>(stop - start));
}

/// Create a vector containing the values from 0 up to length
Expand Down
2 changes: 1 addition & 1 deletion docs/source/format/Columnar.rst
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ each value. Its physical layout is as follows:
* One child array for each type
* Types buffer: A buffer of 8-bit signed integers. Each type in the
union has a corresponding type id whose values are found in this
buffer. A union with more than 127 possible types can be modeled as
buffer. A union with more than 128 possible types can be modeled as
a union of unions.
* Offsets buffer: A buffer of signed Int32 values indicating the
relative offset into the respective child array for the type in a
Expand Down
Loading