From 91cdef8aa2dddc8b429d00c8a71731a3f15fa30d Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Tue, 28 Oct 2025 13:57:18 -0700 Subject: [PATCH 1/8] mac vcpkg --- ci/vcpkg/vcpkg.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 5a46da815c3..eefe0098ba2 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -90,6 +90,27 @@ } ] }, + "gandiva-llvm": { + "description": "Gandiva support", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "version>=": "21.1.1", + "features": [ + "clang", + "default-targets", + "enable-bindings", + "enable-terminfo", + "enable-zlib", + "enable-zstd", + "enable-rtti", + "lld", + "tools" + ] + } + ] + }, "gcs": { "description": "GCS support", "dependencies": [ From 4003cba24f57794b9b0342a623fab099c5589329 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Mon, 17 Nov 2025 14:55:22 -0800 Subject: [PATCH 2/8] Add cpu attrs to llvm code generation. --- cpp/src/gandiva/engine.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index c91afd7c4b7..5806413919d 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -305,6 +305,7 @@ Engine::Engine(const std::shared_ptr& conf, // LLVM 10 doesn't like the expr function name to be the same as the module name auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); module_ = std::make_unique(module_id, *context_); + module_->setDataLayout(target_machine_->createDataLayout()); } Engine::~Engine() {} From bdb46f5b639bf0dae097bd8425b3962584d4e3d9 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Mon, 17 Nov 2025 16:48:15 -0800 Subject: [PATCH 3/8] unit test --- cpp/src/gandiva/CMakeLists.txt | 1 + cpp/src/gandiva/target_datalayout_test.cc | 205 ++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 cpp/src/gandiva/target_datalayout_test.cc diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index a7f5f9dacff..44ebfb2c413 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -276,6 +276,7 @@ add_gandiva_test(internals-test hash_utils_test.cc gdv_function_stubs_test.cc interval_holder_test.cc + target_datalayout_test.cc tests/test_util.cc EXTRA_LINK_LIBS re2::re2 diff --git a/cpp/src/gandiva/target_datalayout_test.cc b/cpp/src/gandiva/target_datalayout_test.cc new file mode 100644 index 00000000000..b5421ca25ea --- /dev/null +++ b/cpp/src/gandiva/target_datalayout_test.cc @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gandiva/llvm_generator.h" +#include "gandiva/tests/test_util.h" + +namespace gandiva { + +class TestTargetDataLayout : public ::testing::Test { + protected: + void SetUp() override {} +}; + +// Test that verifies the target data layout string representation +// is consistent with the CPU architecture. This test is portable +// across different architectures. +TEST_F(TestTargetDataLayout, VerifyDataLayoutForArchitecture) { + // Create an LLVM generator with default configuration + ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); + + // Get the module from the generator + llvm::Module* module = generator->module(); + ASSERT_NE(module, nullptr); + + // Get the data layout from the module + const llvm::DataLayout& data_layout = module->getDataLayout(); + std::string data_layout_str = data_layout.getStringRepresentation(); + + // Verify that the data layout string is not empty + EXPECT_FALSE(data_layout_str.empty()); + + // Get the host CPU architecture information + std::string host_cpu = llvm::sys::getHostCPUName().str(); + std::string triple = llvm::sys::getDefaultTargetTriple(); + + // Log the information for debugging + std::cout << "Host CPU: " << host_cpu << std::endl; + std::cout << "Target Triple: " << triple << std::endl; + std::cout << "Data Layout: " << data_layout_str << std::endl; + + // Verify basic data layout properties based on architecture + // The data layout string should contain endianness information + // 'e' for little-endian, 'E' for big-endian + EXPECT_TRUE(data_layout_str[0] == 'e' || data_layout_str[0] == 'E') + << "Data layout should start with endianness specifier"; + + // Check for pointer size information in the data layout + // Most modern architectures use 64-bit pointers + if (triple.find("x86_64") != std::string::npos || + triple.find("aarch64") != std::string::npos || + triple.find("arm64") != std::string::npos) { + // For 64-bit architectures, expect pointer size to be 64 bits + EXPECT_NE(data_layout_str.find("p:64:64"), std::string::npos) + << "Expected 64-bit pointer size for 64-bit architecture"; + } else if (triple.find("i386") != std::string::npos || + triple.find("i686") != std::string::npos || + triple.find("arm") != std::string::npos) { + // For 32-bit architectures, expect pointer size to be 32 bits + EXPECT_NE(data_layout_str.find("p:32:32"), std::string::npos) + << "Expected 32-bit pointer size for 32-bit architecture"; + } + + // Verify that integer alignments are present + EXPECT_NE(data_layout_str.find("i"), std::string::npos) + << "Data layout should contain integer type information"; + + // Verify endianness consistency + if (data_layout_str[0] == 'e') { + std::cout << "Architecture is little-endian" << std::endl; + } else if (data_layout_str[0] == 'E') { + std::cout << "Architecture is big-endian" << std::endl; + } + + // Additional verification: Check that the data layout is valid + // by querying specific properties + EXPECT_GT(data_layout.getPointerSize(), 0u) + << "Pointer size should be greater than 0"; + EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()) + << "Data layout should have a defined endianness"; +} + +// Test that verifies data layout consistency across multiple generator instances +TEST_F(TestTargetDataLayout, VerifyDataLayoutConsistency) { + // Create two LLVM generators with the same configuration + ASSERT_OK_AND_ASSIGN(auto generator1, LLVMGenerator::Make(TestConfiguration(), false)); + ASSERT_OK_AND_ASSIGN(auto generator2, LLVMGenerator::Make(TestConfiguration(), false)); + + // Get the data layout from both modules + const llvm::DataLayout& data_layout1 = generator1->module()->getDataLayout(); + const llvm::DataLayout& data_layout2 = generator2->module()->getDataLayout(); + + std::string data_layout_str1 = data_layout1.getStringRepresentation(); + std::string data_layout_str2 = data_layout2.getStringRepresentation(); + + // Verify that both generators produce the same data layout + EXPECT_EQ(data_layout_str1, data_layout_str2) + << "Data layout should be consistent across generator instances"; + + // Verify that pointer sizes match + EXPECT_EQ(data_layout1.getPointerSize(), data_layout2.getPointerSize()) + << "Pointer sizes should match"; + + // Verify that endianness matches + EXPECT_EQ(data_layout1.isLittleEndian(), data_layout2.isLittleEndian()) + << "Endianness should match"; +} + +// Test that verifies specific data layout properties for common types +TEST_F(TestTargetDataLayout, VerifyTypeAlignments) { + ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); + + llvm::Module* module = generator->module(); + const llvm::DataLayout& data_layout = module->getDataLayout(); + + // Get the LLVM context and types + llvm::LLVMContext* context = generator->context(); + LLVMTypes* types = generator->types(); + + // Verify alignment for common integer types + EXPECT_GT(data_layout.getABITypeAlign(types->i8_type()).value(), 0u) + << "i8 type should have non-zero alignment"; + EXPECT_GT(data_layout.getABITypeAlign(types->i32_type()).value(), 0u) + << "i32 type should have non-zero alignment"; + EXPECT_GT(data_layout.getABITypeAlign(types->i64_type()).value(), 0u) + << "i64 type should have non-zero alignment"; + + // Verify alignment for pointer types + EXPECT_GT(data_layout.getABITypeAlign(types->i8_ptr_type()).value(), 0u) + << "i8* type should have non-zero alignment"; + + // Verify type sizes + EXPECT_EQ(data_layout.getTypeAllocSize(types->i8_type()), 1u) + << "i8 type should be 1 byte"; + EXPECT_EQ(data_layout.getTypeAllocSize(types->i32_type()), 4u) + << "i32 type should be 4 bytes"; + EXPECT_EQ(data_layout.getTypeAllocSize(types->i64_type()), 8u) + << "i64 type should be 8 bytes"; + + // Log the data layout string for reference + std::cout << "Data Layout: " << data_layout.getStringRepresentation() << std::endl; +} + +// Test that verifies data layout for different CPU configurations +TEST_F(TestTargetDataLayout, VerifyDataLayoutForHostCPU) { + // Create a configuration that targets the host CPU + auto config = TestConfiguration(); + + ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); + + llvm::Module* module = generator->module(); + const llvm::DataLayout& data_layout = module->getDataLayout(); + std::string data_layout_str = data_layout.getStringRepresentation(); + + // Get host information + std::string host_cpu = llvm::sys::getHostCPUName().str(); + std::string triple = llvm::sys::getDefaultTargetTriple(); + + std::cout << "Testing with Host CPU: " << host_cpu << std::endl; + std::cout << "Target Triple: " << triple << std::endl; + std::cout << "Data Layout: " << data_layout_str << std::endl; + + // Verify that the data layout is appropriate for the host + EXPECT_FALSE(data_layout_str.empty()); + + // Check architecture-specific properties + if (triple.find("x86_64") != std::string::npos) { + // x86_64 should be little-endian + EXPECT_EQ(data_layout_str[0], 'e') << "x86_64 should be little-endian"; + // x86_64 should have 64-bit pointers + EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) + << "x86_64 should have 64-bit pointers"; + } else if (triple.find("aarch64") != std::string::npos || + triple.find("arm64") != std::string::npos) { + // ARM64 should be little-endian (most common configuration) + EXPECT_EQ(data_layout_str[0], 'e') << "ARM64 should be little-endian"; + // ARM64 should have 64-bit pointers + EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) + << "ARM64 should have 64-bit pointers"; + } + + // Verify that the data layout is valid by checking basic properties + EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()); + EXPECT_GT(data_layout.getPointerSize(), 0u); +} + +} // namespace gandiva + From a3833204e96f7c7b4f208eaebf01a46059378611 Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Tue, 18 Nov 2025 01:25:08 +0000 Subject: [PATCH 4/8] Update test --- cpp/src/gandiva/target_datalayout_test.cc | 145 +--------------------- 1 file changed, 2 insertions(+), 143 deletions(-) diff --git a/cpp/src/gandiva/target_datalayout_test.cc b/cpp/src/gandiva/target_datalayout_test.cc index b5421ca25ea..f5c1c8b9262 100644 --- a/cpp/src/gandiva/target_datalayout_test.cc +++ b/cpp/src/gandiva/target_datalayout_test.cc @@ -17,7 +17,7 @@ #include #include -#include +#include #include "gandiva/llvm_generator.h" #include "gandiva/tests/test_util.h" @@ -56,150 +56,9 @@ TEST_F(TestTargetDataLayout, VerifyDataLayoutForArchitecture) { std::cout << "Target Triple: " << triple << std::endl; std::cout << "Data Layout: " << data_layout_str << std::endl; - // Verify basic data layout properties based on architecture - // The data layout string should contain endianness information - // 'e' for little-endian, 'E' for big-endian - EXPECT_TRUE(data_layout_str[0] == 'e' || data_layout_str[0] == 'E') - << "Data layout should start with endianness specifier"; - - // Check for pointer size information in the data layout - // Most modern architectures use 64-bit pointers - if (triple.find("x86_64") != std::string::npos || - triple.find("aarch64") != std::string::npos || - triple.find("arm64") != std::string::npos) { - // For 64-bit architectures, expect pointer size to be 64 bits - EXPECT_NE(data_layout_str.find("p:64:64"), std::string::npos) - << "Expected 64-bit pointer size for 64-bit architecture"; - } else if (triple.find("i386") != std::string::npos || - triple.find("i686") != std::string::npos || - triple.find("arm") != std::string::npos) { - // For 32-bit architectures, expect pointer size to be 32 bits - EXPECT_NE(data_layout_str.find("p:32:32"), std::string::npos) - << "Expected 32-bit pointer size for 32-bit architecture"; - } - - // Verify that integer alignments are present - EXPECT_NE(data_layout_str.find("i"), std::string::npos) - << "Data layout should contain integer type information"; - - // Verify endianness consistency - if (data_layout_str[0] == 'e') { - std::cout << "Architecture is little-endian" << std::endl; - } else if (data_layout_str[0] == 'E') { - std::cout << "Architecture is big-endian" << std::endl; - } - - // Additional verification: Check that the data layout is valid - // by querying specific properties - EXPECT_GT(data_layout.getPointerSize(), 0u) - << "Pointer size should be greater than 0"; - EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()) - << "Data layout should have a defined endianness"; -} - -// Test that verifies data layout consistency across multiple generator instances -TEST_F(TestTargetDataLayout, VerifyDataLayoutConsistency) { - // Create two LLVM generators with the same configuration - ASSERT_OK_AND_ASSIGN(auto generator1, LLVMGenerator::Make(TestConfiguration(), false)); - ASSERT_OK_AND_ASSIGN(auto generator2, LLVMGenerator::Make(TestConfiguration(), false)); - - // Get the data layout from both modules - const llvm::DataLayout& data_layout1 = generator1->module()->getDataLayout(); - const llvm::DataLayout& data_layout2 = generator2->module()->getDataLayout(); - - std::string data_layout_str1 = data_layout1.getStringRepresentation(); - std::string data_layout_str2 = data_layout2.getStringRepresentation(); - - // Verify that both generators produce the same data layout - EXPECT_EQ(data_layout_str1, data_layout_str2) - << "Data layout should be consistent across generator instances"; - - // Verify that pointer sizes match - EXPECT_EQ(data_layout1.getPointerSize(), data_layout2.getPointerSize()) - << "Pointer sizes should match"; - - // Verify that endianness matches - EXPECT_EQ(data_layout1.isLittleEndian(), data_layout2.isLittleEndian()) - << "Endianness should match"; -} - -// Test that verifies specific data layout properties for common types -TEST_F(TestTargetDataLayout, VerifyTypeAlignments) { - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); - - llvm::Module* module = generator->module(); - const llvm::DataLayout& data_layout = module->getDataLayout(); - - // Get the LLVM context and types - llvm::LLVMContext* context = generator->context(); - LLVMTypes* types = generator->types(); - - // Verify alignment for common integer types - EXPECT_GT(data_layout.getABITypeAlign(types->i8_type()).value(), 0u) - << "i8 type should have non-zero alignment"; - EXPECT_GT(data_layout.getABITypeAlign(types->i32_type()).value(), 0u) - << "i32 type should have non-zero alignment"; - EXPECT_GT(data_layout.getABITypeAlign(types->i64_type()).value(), 0u) - << "i64 type should have non-zero alignment"; - - // Verify alignment for pointer types - EXPECT_GT(data_layout.getABITypeAlign(types->i8_ptr_type()).value(), 0u) - << "i8* type should have non-zero alignment"; - - // Verify type sizes - EXPECT_EQ(data_layout.getTypeAllocSize(types->i8_type()), 1u) - << "i8 type should be 1 byte"; - EXPECT_EQ(data_layout.getTypeAllocSize(types->i32_type()), 4u) - << "i32 type should be 4 bytes"; - EXPECT_EQ(data_layout.getTypeAllocSize(types->i64_type()), 8u) - << "i64 type should be 8 bytes"; - - // Log the data layout string for reference - std::cout << "Data Layout: " << data_layout.getStringRepresentation() << std::endl; -} - -// Test that verifies data layout for different CPU configurations -TEST_F(TestTargetDataLayout, VerifyDataLayoutForHostCPU) { - // Create a configuration that targets the host CPU - auto config = TestConfiguration(); - - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); - - llvm::Module* module = generator->module(); - const llvm::DataLayout& data_layout = module->getDataLayout(); - std::string data_layout_str = data_layout.getStringRepresentation(); - - // Get host information - std::string host_cpu = llvm::sys::getHostCPUName().str(); - std::string triple = llvm::sys::getDefaultTargetTriple(); - - std::cout << "Testing with Host CPU: " << host_cpu << std::endl; - std::cout << "Target Triple: " << triple << std::endl; - std::cout << "Data Layout: " << data_layout_str << std::endl; - - // Verify that the data layout is appropriate for the host + // Verify that the data layout string is not empty EXPECT_FALSE(data_layout_str.empty()); - // Check architecture-specific properties - if (triple.find("x86_64") != std::string::npos) { - // x86_64 should be little-endian - EXPECT_EQ(data_layout_str[0], 'e') << "x86_64 should be little-endian"; - // x86_64 should have 64-bit pointers - EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) - << "x86_64 should have 64-bit pointers"; - } else if (triple.find("aarch64") != std::string::npos || - triple.find("arm64") != std::string::npos) { - // ARM64 should be little-endian (most common configuration) - EXPECT_EQ(data_layout_str[0], 'e') << "ARM64 should be little-endian"; - // ARM64 should have 64-bit pointers - EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) - << "ARM64 should have 64-bit pointers"; } - - // Verify that the data layout is valid by checking basic properties - EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()); - EXPECT_GT(data_layout.getPointerSize(), 0u); -} - } // namespace gandiva From f41b1fba5328ed31396c143441ab9014a74cc78d Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 10 Dec 2025 15:28:50 -0800 Subject: [PATCH 5/8] Only build llvm in release mode --- ci/vcpkg/overlay/llvm/portfile.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/vcpkg/overlay/llvm/portfile.cmake b/ci/vcpkg/overlay/llvm/portfile.cmake index b6d5bdacd02..71f8f6d710f 100644 --- a/ci/vcpkg/overlay/llvm/portfile.cmake +++ b/ci/vcpkg/overlay/llvm/portfile.cmake @@ -2,6 +2,8 @@ set(VCPKG_POLICY_ALLOW_EMPTY_FOLDERS enabled) vcpkg_check_linkage(ONLY_STATIC_LIBRARY) +# Only build release configuration to speed up build time +set(VCPKG_BUILD_TYPE release) # [BOLT] Allow to compile with MSVC (#151189) vcpkg_download_distfile( From e0900961c23fc7967c0349a0e331f9022e8da351 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Fri, 12 Dec 2025 18:32:33 -0800 Subject: [PATCH 6/8] Revert "Update test" This reverts commit a3833204e96f7c7b4f208eaebf01a46059378611. --- cpp/src/gandiva/target_datalayout_test.cc | 145 +++++++++++++++++++++- 1 file changed, 143 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/target_datalayout_test.cc b/cpp/src/gandiva/target_datalayout_test.cc index f5c1c8b9262..b5421ca25ea 100644 --- a/cpp/src/gandiva/target_datalayout_test.cc +++ b/cpp/src/gandiva/target_datalayout_test.cc @@ -17,7 +17,7 @@ #include #include -#include +#include #include "gandiva/llvm_generator.h" #include "gandiva/tests/test_util.h" @@ -56,9 +56,150 @@ TEST_F(TestTargetDataLayout, VerifyDataLayoutForArchitecture) { std::cout << "Target Triple: " << triple << std::endl; std::cout << "Data Layout: " << data_layout_str << std::endl; - // Verify that the data layout string is not empty + // Verify basic data layout properties based on architecture + // The data layout string should contain endianness information + // 'e' for little-endian, 'E' for big-endian + EXPECT_TRUE(data_layout_str[0] == 'e' || data_layout_str[0] == 'E') + << "Data layout should start with endianness specifier"; + + // Check for pointer size information in the data layout + // Most modern architectures use 64-bit pointers + if (triple.find("x86_64") != std::string::npos || + triple.find("aarch64") != std::string::npos || + triple.find("arm64") != std::string::npos) { + // For 64-bit architectures, expect pointer size to be 64 bits + EXPECT_NE(data_layout_str.find("p:64:64"), std::string::npos) + << "Expected 64-bit pointer size for 64-bit architecture"; + } else if (triple.find("i386") != std::string::npos || + triple.find("i686") != std::string::npos || + triple.find("arm") != std::string::npos) { + // For 32-bit architectures, expect pointer size to be 32 bits + EXPECT_NE(data_layout_str.find("p:32:32"), std::string::npos) + << "Expected 32-bit pointer size for 32-bit architecture"; + } + + // Verify that integer alignments are present + EXPECT_NE(data_layout_str.find("i"), std::string::npos) + << "Data layout should contain integer type information"; + + // Verify endianness consistency + if (data_layout_str[0] == 'e') { + std::cout << "Architecture is little-endian" << std::endl; + } else if (data_layout_str[0] == 'E') { + std::cout << "Architecture is big-endian" << std::endl; + } + + // Additional verification: Check that the data layout is valid + // by querying specific properties + EXPECT_GT(data_layout.getPointerSize(), 0u) + << "Pointer size should be greater than 0"; + EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()) + << "Data layout should have a defined endianness"; +} + +// Test that verifies data layout consistency across multiple generator instances +TEST_F(TestTargetDataLayout, VerifyDataLayoutConsistency) { + // Create two LLVM generators with the same configuration + ASSERT_OK_AND_ASSIGN(auto generator1, LLVMGenerator::Make(TestConfiguration(), false)); + ASSERT_OK_AND_ASSIGN(auto generator2, LLVMGenerator::Make(TestConfiguration(), false)); + + // Get the data layout from both modules + const llvm::DataLayout& data_layout1 = generator1->module()->getDataLayout(); + const llvm::DataLayout& data_layout2 = generator2->module()->getDataLayout(); + + std::string data_layout_str1 = data_layout1.getStringRepresentation(); + std::string data_layout_str2 = data_layout2.getStringRepresentation(); + + // Verify that both generators produce the same data layout + EXPECT_EQ(data_layout_str1, data_layout_str2) + << "Data layout should be consistent across generator instances"; + + // Verify that pointer sizes match + EXPECT_EQ(data_layout1.getPointerSize(), data_layout2.getPointerSize()) + << "Pointer sizes should match"; + + // Verify that endianness matches + EXPECT_EQ(data_layout1.isLittleEndian(), data_layout2.isLittleEndian()) + << "Endianness should match"; +} + +// Test that verifies specific data layout properties for common types +TEST_F(TestTargetDataLayout, VerifyTypeAlignments) { + ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); + + llvm::Module* module = generator->module(); + const llvm::DataLayout& data_layout = module->getDataLayout(); + + // Get the LLVM context and types + llvm::LLVMContext* context = generator->context(); + LLVMTypes* types = generator->types(); + + // Verify alignment for common integer types + EXPECT_GT(data_layout.getABITypeAlign(types->i8_type()).value(), 0u) + << "i8 type should have non-zero alignment"; + EXPECT_GT(data_layout.getABITypeAlign(types->i32_type()).value(), 0u) + << "i32 type should have non-zero alignment"; + EXPECT_GT(data_layout.getABITypeAlign(types->i64_type()).value(), 0u) + << "i64 type should have non-zero alignment"; + + // Verify alignment for pointer types + EXPECT_GT(data_layout.getABITypeAlign(types->i8_ptr_type()).value(), 0u) + << "i8* type should have non-zero alignment"; + + // Verify type sizes + EXPECT_EQ(data_layout.getTypeAllocSize(types->i8_type()), 1u) + << "i8 type should be 1 byte"; + EXPECT_EQ(data_layout.getTypeAllocSize(types->i32_type()), 4u) + << "i32 type should be 4 bytes"; + EXPECT_EQ(data_layout.getTypeAllocSize(types->i64_type()), 8u) + << "i64 type should be 8 bytes"; + + // Log the data layout string for reference + std::cout << "Data Layout: " << data_layout.getStringRepresentation() << std::endl; +} + +// Test that verifies data layout for different CPU configurations +TEST_F(TestTargetDataLayout, VerifyDataLayoutForHostCPU) { + // Create a configuration that targets the host CPU + auto config = TestConfiguration(); + + ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); + + llvm::Module* module = generator->module(); + const llvm::DataLayout& data_layout = module->getDataLayout(); + std::string data_layout_str = data_layout.getStringRepresentation(); + + // Get host information + std::string host_cpu = llvm::sys::getHostCPUName().str(); + std::string triple = llvm::sys::getDefaultTargetTriple(); + + std::cout << "Testing with Host CPU: " << host_cpu << std::endl; + std::cout << "Target Triple: " << triple << std::endl; + std::cout << "Data Layout: " << data_layout_str << std::endl; + + // Verify that the data layout is appropriate for the host EXPECT_FALSE(data_layout_str.empty()); + // Check architecture-specific properties + if (triple.find("x86_64") != std::string::npos) { + // x86_64 should be little-endian + EXPECT_EQ(data_layout_str[0], 'e') << "x86_64 should be little-endian"; + // x86_64 should have 64-bit pointers + EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) + << "x86_64 should have 64-bit pointers"; + } else if (triple.find("aarch64") != std::string::npos || + triple.find("arm64") != std::string::npos) { + // ARM64 should be little-endian (most common configuration) + EXPECT_EQ(data_layout_str[0], 'e') << "ARM64 should be little-endian"; + // ARM64 should have 64-bit pointers + EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) + << "ARM64 should have 64-bit pointers"; } + + // Verify that the data layout is valid by checking basic properties + EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()); + EXPECT_GT(data_layout.getPointerSize(), 0u); +} + } // namespace gandiva From 4fea9b09059d806946ecfce781f7706ec136b246 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Fri, 12 Dec 2025 18:32:44 -0800 Subject: [PATCH 7/8] Revert "unit test" This reverts commit bdb46f5b639bf0dae097bd8425b3962584d4e3d9. --- cpp/src/gandiva/CMakeLists.txt | 1 - cpp/src/gandiva/target_datalayout_test.cc | 205 ---------------------- 2 files changed, 206 deletions(-) delete mode 100644 cpp/src/gandiva/target_datalayout_test.cc diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 44ebfb2c413..a7f5f9dacff 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -276,7 +276,6 @@ add_gandiva_test(internals-test hash_utils_test.cc gdv_function_stubs_test.cc interval_holder_test.cc - target_datalayout_test.cc tests/test_util.cc EXTRA_LINK_LIBS re2::re2 diff --git a/cpp/src/gandiva/target_datalayout_test.cc b/cpp/src/gandiva/target_datalayout_test.cc deleted file mode 100644 index b5421ca25ea..00000000000 --- a/cpp/src/gandiva/target_datalayout_test.cc +++ /dev/null @@ -1,205 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include "gandiva/llvm_generator.h" -#include "gandiva/tests/test_util.h" - -namespace gandiva { - -class TestTargetDataLayout : public ::testing::Test { - protected: - void SetUp() override {} -}; - -// Test that verifies the target data layout string representation -// is consistent with the CPU architecture. This test is portable -// across different architectures. -TEST_F(TestTargetDataLayout, VerifyDataLayoutForArchitecture) { - // Create an LLVM generator with default configuration - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); - - // Get the module from the generator - llvm::Module* module = generator->module(); - ASSERT_NE(module, nullptr); - - // Get the data layout from the module - const llvm::DataLayout& data_layout = module->getDataLayout(); - std::string data_layout_str = data_layout.getStringRepresentation(); - - // Verify that the data layout string is not empty - EXPECT_FALSE(data_layout_str.empty()); - - // Get the host CPU architecture information - std::string host_cpu = llvm::sys::getHostCPUName().str(); - std::string triple = llvm::sys::getDefaultTargetTriple(); - - // Log the information for debugging - std::cout << "Host CPU: " << host_cpu << std::endl; - std::cout << "Target Triple: " << triple << std::endl; - std::cout << "Data Layout: " << data_layout_str << std::endl; - - // Verify basic data layout properties based on architecture - // The data layout string should contain endianness information - // 'e' for little-endian, 'E' for big-endian - EXPECT_TRUE(data_layout_str[0] == 'e' || data_layout_str[0] == 'E') - << "Data layout should start with endianness specifier"; - - // Check for pointer size information in the data layout - // Most modern architectures use 64-bit pointers - if (triple.find("x86_64") != std::string::npos || - triple.find("aarch64") != std::string::npos || - triple.find("arm64") != std::string::npos) { - // For 64-bit architectures, expect pointer size to be 64 bits - EXPECT_NE(data_layout_str.find("p:64:64"), std::string::npos) - << "Expected 64-bit pointer size for 64-bit architecture"; - } else if (triple.find("i386") != std::string::npos || - triple.find("i686") != std::string::npos || - triple.find("arm") != std::string::npos) { - // For 32-bit architectures, expect pointer size to be 32 bits - EXPECT_NE(data_layout_str.find("p:32:32"), std::string::npos) - << "Expected 32-bit pointer size for 32-bit architecture"; - } - - // Verify that integer alignments are present - EXPECT_NE(data_layout_str.find("i"), std::string::npos) - << "Data layout should contain integer type information"; - - // Verify endianness consistency - if (data_layout_str[0] == 'e') { - std::cout << "Architecture is little-endian" << std::endl; - } else if (data_layout_str[0] == 'E') { - std::cout << "Architecture is big-endian" << std::endl; - } - - // Additional verification: Check that the data layout is valid - // by querying specific properties - EXPECT_GT(data_layout.getPointerSize(), 0u) - << "Pointer size should be greater than 0"; - EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()) - << "Data layout should have a defined endianness"; -} - -// Test that verifies data layout consistency across multiple generator instances -TEST_F(TestTargetDataLayout, VerifyDataLayoutConsistency) { - // Create two LLVM generators with the same configuration - ASSERT_OK_AND_ASSIGN(auto generator1, LLVMGenerator::Make(TestConfiguration(), false)); - ASSERT_OK_AND_ASSIGN(auto generator2, LLVMGenerator::Make(TestConfiguration(), false)); - - // Get the data layout from both modules - const llvm::DataLayout& data_layout1 = generator1->module()->getDataLayout(); - const llvm::DataLayout& data_layout2 = generator2->module()->getDataLayout(); - - std::string data_layout_str1 = data_layout1.getStringRepresentation(); - std::string data_layout_str2 = data_layout2.getStringRepresentation(); - - // Verify that both generators produce the same data layout - EXPECT_EQ(data_layout_str1, data_layout_str2) - << "Data layout should be consistent across generator instances"; - - // Verify that pointer sizes match - EXPECT_EQ(data_layout1.getPointerSize(), data_layout2.getPointerSize()) - << "Pointer sizes should match"; - - // Verify that endianness matches - EXPECT_EQ(data_layout1.isLittleEndian(), data_layout2.isLittleEndian()) - << "Endianness should match"; -} - -// Test that verifies specific data layout properties for common types -TEST_F(TestTargetDataLayout, VerifyTypeAlignments) { - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); - - llvm::Module* module = generator->module(); - const llvm::DataLayout& data_layout = module->getDataLayout(); - - // Get the LLVM context and types - llvm::LLVMContext* context = generator->context(); - LLVMTypes* types = generator->types(); - - // Verify alignment for common integer types - EXPECT_GT(data_layout.getABITypeAlign(types->i8_type()).value(), 0u) - << "i8 type should have non-zero alignment"; - EXPECT_GT(data_layout.getABITypeAlign(types->i32_type()).value(), 0u) - << "i32 type should have non-zero alignment"; - EXPECT_GT(data_layout.getABITypeAlign(types->i64_type()).value(), 0u) - << "i64 type should have non-zero alignment"; - - // Verify alignment for pointer types - EXPECT_GT(data_layout.getABITypeAlign(types->i8_ptr_type()).value(), 0u) - << "i8* type should have non-zero alignment"; - - // Verify type sizes - EXPECT_EQ(data_layout.getTypeAllocSize(types->i8_type()), 1u) - << "i8 type should be 1 byte"; - EXPECT_EQ(data_layout.getTypeAllocSize(types->i32_type()), 4u) - << "i32 type should be 4 bytes"; - EXPECT_EQ(data_layout.getTypeAllocSize(types->i64_type()), 8u) - << "i64 type should be 8 bytes"; - - // Log the data layout string for reference - std::cout << "Data Layout: " << data_layout.getStringRepresentation() << std::endl; -} - -// Test that verifies data layout for different CPU configurations -TEST_F(TestTargetDataLayout, VerifyDataLayoutForHostCPU) { - // Create a configuration that targets the host CPU - auto config = TestConfiguration(); - - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); - - llvm::Module* module = generator->module(); - const llvm::DataLayout& data_layout = module->getDataLayout(); - std::string data_layout_str = data_layout.getStringRepresentation(); - - // Get host information - std::string host_cpu = llvm::sys::getHostCPUName().str(); - std::string triple = llvm::sys::getDefaultTargetTriple(); - - std::cout << "Testing with Host CPU: " << host_cpu << std::endl; - std::cout << "Target Triple: " << triple << std::endl; - std::cout << "Data Layout: " << data_layout_str << std::endl; - - // Verify that the data layout is appropriate for the host - EXPECT_FALSE(data_layout_str.empty()); - - // Check architecture-specific properties - if (triple.find("x86_64") != std::string::npos) { - // x86_64 should be little-endian - EXPECT_EQ(data_layout_str[0], 'e') << "x86_64 should be little-endian"; - // x86_64 should have 64-bit pointers - EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) - << "x86_64 should have 64-bit pointers"; - } else if (triple.find("aarch64") != std::string::npos || - triple.find("arm64") != std::string::npos) { - // ARM64 should be little-endian (most common configuration) - EXPECT_EQ(data_layout_str[0], 'e') << "ARM64 should be little-endian"; - // ARM64 should have 64-bit pointers - EXPECT_EQ(data_layout.getPointerSizeInBits(), 64u) - << "ARM64 should have 64-bit pointers"; - } - - // Verify that the data layout is valid by checking basic properties - EXPECT_TRUE(data_layout.isLittleEndian() || data_layout.isBigEndian()); - EXPECT_GT(data_layout.getPointerSize(), 0u); -} - -} // namespace gandiva - From efb1f9ba4b49baee88be6a22961808051c260114 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Fri, 12 Dec 2025 18:32:54 -0800 Subject: [PATCH 8/8] Revert "Add cpu attrs to llvm code generation." This reverts commit 4003cba24f57794b9b0342a623fab099c5589329. --- cpp/src/gandiva/engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 5806413919d..c91afd7c4b7 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -305,7 +305,6 @@ Engine::Engine(const std::shared_ptr& conf, // LLVM 10 doesn't like the expr function name to be the same as the module name auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); module_ = std::make_unique(module_id, *context_); - module_->setDataLayout(target_machine_->createDataLayout()); } Engine::~Engine() {}