From 1f3c6c7a0558339d4d8c52c4661fedf431c27455 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 10:11:22 -0500 Subject: [PATCH 01/14] DX-108149: Add IV generation and extraction utility functions --- cpp/src/gandiva/CMakeLists.txt | 2 + cpp/src/gandiva/encrypt_utils_iv.cc | 87 +++++++++++++ cpp/src/gandiva/encrypt_utils_iv.h | 62 ++++++++++ cpp/src/gandiva/encrypt_utils_iv_test.cc | 150 +++++++++++++++++++++++ 4 files changed, 301 insertions(+) create mode 100644 cpp/src/gandiva/encrypt_utils_iv.cc create mode 100644 cpp/src/gandiva/encrypt_utils_iv.h create mode 100644 cpp/src/gandiva/encrypt_utils_iv_test.cc diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 836fecec960..4b77f0fc296 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -65,6 +65,7 @@ set(SRC_FILES engine.cc date_utils.cc encrypt_utils_common.cc + encrypt_utils_iv.cc encrypt_utils_ecb.cc encrypt_utils_cbc.cc encrypt_utils_gcm.cc @@ -269,6 +270,7 @@ add_gandiva_test(internals-test encrypt_utils_ecb_test.cc encrypt_utils_cbc_test.cc encrypt_utils_gcm_test.cc + encrypt_utils_iv_test.cc encrypt_utils_common_test.cc expr_decomposer_test.cc exported_funcs_registry_test.cc diff --git a/cpp/src/gandiva/encrypt_utils_iv.cc b/cpp/src/gandiva/encrypt_utils_iv.cc new file mode 100644 index 00000000000..338ece1b8c2 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_iv.cc @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_iv.h" +#include +#include +#include +#include + +namespace gandiva { + +void generate_random_iv(unsigned char* iv_buffer, int32_t iv_length) { + if (iv_buffer == nullptr) { + throw std::runtime_error("IV buffer cannot be null"); + } + + if (iv_length <= 0) { + std::ostringstream oss; + oss << "Invalid IV length: " << iv_length << ". IV length must be positive"; + throw std::runtime_error(oss.str()); + } + + // Generate cryptographically secure random bytes using OpenSSL + int result = RAND_bytes(iv_buffer, iv_length); + if (result != 1) { + throw std::runtime_error( + "Failed to generate random IV: OpenSSL RAND_bytes failed"); + } +} + +void extract_iv_from_ciphertext(const char* ciphertext_with_iv, int32_t ciphertext_len, + int32_t iv_length, unsigned char* extracted_iv, + const char** actual_ciphertext, + int32_t* actual_ciphertext_len) { + if (ciphertext_with_iv == nullptr) { + throw std::runtime_error("Ciphertext cannot be null"); + } + + if (extracted_iv == nullptr) { + throw std::runtime_error("Extracted IV buffer cannot be null"); + } + + if (actual_ciphertext == nullptr) { + throw std::runtime_error("Actual ciphertext output pointer cannot be null"); + } + + if (actual_ciphertext_len == nullptr) { + throw std::runtime_error("Actual ciphertext length output pointer cannot be null"); + } + + if (iv_length <= 0) { + std::ostringstream oss; + oss << "Invalid IV length: " << iv_length << ". IV length must be positive"; + throw std::runtime_error(oss.str()); + } + + if (ciphertext_len < iv_length) { + std::ostringstream oss; + oss << "Ciphertext too short to contain IV: ciphertext is " << ciphertext_len + << " bytes but IV requires " << iv_length << " bytes"; + throw std::runtime_error(oss.str()); + } + + // Extract IV from the beginning of ciphertext + std::memcpy(extracted_iv, ciphertext_with_iv, iv_length); + + // Set pointer to actual ciphertext (after IV) + *actual_ciphertext = ciphertext_with_iv + iv_length; + *actual_ciphertext_len = ciphertext_len - iv_length; +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_iv.h b/cpp/src/gandiva/encrypt_utils_iv.h new file mode 100644 index 00000000000..11a381d1702 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_iv.h @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_ENCRYPT_UTILS_IV_H +#define GANDIVA_ENCRYPT_UTILS_IV_H + +#include +#include "gandiva/visibility.h" + +namespace gandiva { + +// IV length constants for different encryption modes +constexpr int32_t GCM_IV_LENGTH = 12; // 12 bytes (96 bits) - recommended for GCM +constexpr int32_t CBC_IV_LENGTH = 16; // 16 bytes (128 bits) - required for CBC + +/** + * Generate a cryptographically secure random initialization vector (IV) + * using OpenSSL's RAND_bytes. + * + * @param iv_buffer Output buffer to store the generated IV + * @param iv_length Length of IV to generate in bytes (typically 12 for GCM, 16 for CBC) + * @throws std::runtime_error if random number generation fails + */ +GANDIVA_EXPORT +void generate_random_iv(unsigned char* iv_buffer, int32_t iv_length); + +/** + * Extract IV from the beginning of ciphertext and return pointer to actual ciphertext. + * This is a helper function for decrypt operations when IV is embedded in the ciphertext. + * + * @param ciphertext_with_iv Pointer to ciphertext with IV prepended + * @param ciphertext_len Total length including IV + * @param iv_length Expected IV length (12 for GCM, 16 for CBC) + * @param extracted_iv Output buffer to store extracted IV (must be at least iv_length bytes) + * @param actual_ciphertext Output pointer to the actual ciphertext (after IV) + * @param actual_ciphertext_len Output length of actual ciphertext (without IV) + * @throws std::runtime_error if ciphertext is too short to contain IV + */ +GANDIVA_EXPORT +void extract_iv_from_ciphertext(const char* ciphertext_with_iv, int32_t ciphertext_len, + int32_t iv_length, unsigned char* extracted_iv, + const char** actual_ciphertext, + int32_t* actual_ciphertext_len); + +} // namespace gandiva + +#endif // GANDIVA_ENCRYPT_UTILS_IV_H + diff --git a/cpp/src/gandiva/encrypt_utils_iv_test.cc b/cpp/src/gandiva/encrypt_utils_iv_test.cc new file mode 100644 index 00000000000..7aa858a6761 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_iv_test.cc @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_iv.h" + +#include +#include +#include + +// Test that multiple IV generations produce different values +TEST(TestIVUtils, TestGenerateRandomIvUniqueness) { + unsigned char iv1[gandiva::GCM_IV_LENGTH]; + unsigned char iv2[gandiva::GCM_IV_LENGTH]; + + gandiva::generate_random_iv(iv1, gandiva::GCM_IV_LENGTH); + gandiva::generate_random_iv(iv2, gandiva::GCM_IV_LENGTH); + + EXPECT_NE(0, std::memcmp(iv1, iv2, gandiva::GCM_IV_LENGTH)); +} + +// Test that generated IV has the correct length +TEST(TestIVUtils, TestGenerateRandomIvLength) { + unsigned char iv[gandiva::GCM_IV_LENGTH]; + + // Generate IV with GCM_IV_LENGTH (12 bytes) + ASSERT_NO_THROW(gandiva::generate_random_iv(iv, gandiva::GCM_IV_LENGTH)); + + // Verify that the function accepts the correct length without throwing + // The actual length verification is implicit - if the buffer is correctly + // filled, no out-of-bounds access occurs + + // Also test with CBC_IV_LENGTH (16 bytes) + unsigned char iv_cbc[gandiva::CBC_IV_LENGTH]; + ASSERT_NO_THROW(gandiva::generate_random_iv(iv_cbc, gandiva::CBC_IV_LENGTH)); +} + +// Test error handling for null buffer +TEST(TestIVUtils, TestGenerateRandomIvNullBuffer) { + EXPECT_THROW(gandiva::generate_random_iv(nullptr, gandiva::GCM_IV_LENGTH), + std::runtime_error); +} + +// Test error handling for invalid length +TEST(TestIVUtils, TestGenerateRandomIvInvalidLength) { + unsigned char iv[16]; + EXPECT_THROW(gandiva::generate_random_iv(iv, 0), std::runtime_error); + EXPECT_THROW(gandiva::generate_random_iv(iv, -1), std::runtime_error); +} + +// Test extracting GCM IV from ciphertext +TEST(TestIVUtils, TestExtractIvFromCiphertextGcm) { + // Create test data: [12-byte IV][ciphertext] + const char test_data[] = "123456789012CIPHERTEXT_DATA"; + const int32_t total_len = 28; // 12 + 16 + + unsigned char extracted_iv[gandiva::GCM_IV_LENGTH]; + const char* actual_ciphertext = nullptr; + int32_t actual_ciphertext_len = 0; + + ASSERT_NO_THROW(gandiva::extract_iv_from_ciphertext( + test_data, total_len, gandiva::GCM_IV_LENGTH, extracted_iv, + &actual_ciphertext, &actual_ciphertext_len)); + + // Verify IV was extracted correctly + EXPECT_EQ(0, std::memcmp(extracted_iv, "123456789012", gandiva::GCM_IV_LENGTH)); + + // Verify ciphertext pointer and length + EXPECT_EQ(actual_ciphertext, test_data + gandiva::GCM_IV_LENGTH); + EXPECT_EQ(actual_ciphertext_len, 16); + EXPECT_EQ(0, std::memcmp(actual_ciphertext, "CIPHERTEXT_DATA", 15)); +} + +// Test extracting CBC IV from ciphertext +TEST(TestIVUtils, TestExtractIvFromCiphertextCbc) { + // Create test data: [16-byte IV][ciphertext] + const char test_data[] = "1234567890123456CIPHERTEXT_DATA_HERE"; + const int32_t total_len = 37; // 16 + 21 + + unsigned char extracted_iv[gandiva::CBC_IV_LENGTH]; + const char* actual_ciphertext = nullptr; + int32_t actual_ciphertext_len = 0; + + ASSERT_NO_THROW(gandiva::extract_iv_from_ciphertext( + test_data, total_len, gandiva::CBC_IV_LENGTH, extracted_iv, + &actual_ciphertext, &actual_ciphertext_len)); + + // Verify IV was extracted correctly + EXPECT_EQ(0, std::memcmp(extracted_iv, "1234567890123456", gandiva::CBC_IV_LENGTH)); + + // Verify ciphertext pointer and length + EXPECT_EQ(actual_ciphertext, test_data + gandiva::CBC_IV_LENGTH); + EXPECT_EQ(actual_ciphertext_len, 21); + EXPECT_EQ(0, std::memcmp(actual_ciphertext, "CIPHERTEXT_DATA_HERE", 20)); +} + +// Test error handling for ciphertext too short +TEST(TestIVUtils, TestExtractIvFromCiphertextTooShort) { + const char test_data[] = "SHORT"; + unsigned char extracted_iv[gandiva::GCM_IV_LENGTH]; + const char* actual_ciphertext = nullptr; + int32_t actual_ciphertext_len = 0; + + EXPECT_THROW(gandiva::extract_iv_from_ciphertext( + test_data, 5, gandiva::GCM_IV_LENGTH, extracted_iv, + &actual_ciphertext, &actual_ciphertext_len), + std::runtime_error); +} + +// Test error handling for null inputs +TEST(TestIVUtils, TestExtractIvFromCiphertextNullInputs) { + const char test_data[] = "1234567890123456CIPHERTEXT"; + unsigned char extracted_iv[16]; + const char* actual_ciphertext = nullptr; + int32_t actual_ciphertext_len = 0; + + // Null ciphertext + EXPECT_THROW(gandiva::extract_iv_from_ciphertext( + nullptr, 27, 16, extracted_iv, &actual_ciphertext, &actual_ciphertext_len), + std::runtime_error); + + // Null extracted_iv buffer + EXPECT_THROW(gandiva::extract_iv_from_ciphertext( + test_data, 27, 16, nullptr, &actual_ciphertext, &actual_ciphertext_len), + std::runtime_error); + + // Null actual_ciphertext pointer + EXPECT_THROW(gandiva::extract_iv_from_ciphertext( + test_data, 27, 16, extracted_iv, nullptr, &actual_ciphertext_len), + std::runtime_error); + + // Null actual_ciphertext_len pointer + EXPECT_THROW(gandiva::extract_iv_from_ciphertext( + test_data, 27, 16, extracted_iv, &actual_ciphertext, nullptr), + std::runtime_error); +} + From d7d993179e38a065b177ebf239040592f18c550f Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 11:13:10 -0500 Subject: [PATCH 02/14] DX-108149: Use IV generation and extraction in CBC and GCM modes --- cpp/src/gandiva/encrypt_utils_cbc.cc | 78 +++++++++++++++----- cpp/src/gandiva/encrypt_utils_cbc.h | 33 +++++++-- cpp/src/gandiva/encrypt_utils_gcm.cc | 93 ++++++++++++++++++------ cpp/src/gandiva/encrypt_utils_gcm.h | 37 +++++++--- cpp/src/gandiva/encrypt_utils_iv.h | 4 - cpp/src/gandiva/encrypt_utils_iv_test.cc | 2 + 6 files changed, 187 insertions(+), 60 deletions(-) diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index 04eb60c96a7..0d0b5dcfa63 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -17,6 +17,7 @@ #include "gandiva/encrypt_utils_cbc.h" #include "gandiva/encrypt_utils_common.h" +#include "gandiva/encrypt_utils_iv.h" #include #include #include @@ -51,12 +52,23 @@ GANDIVA_EXPORT int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, unsigned char* cipher) { - // Validate IV length - if (iv_len != 16) { - std::ostringstream oss; - oss << "Invalid IV length for AES-CBC: " << iv_len - << " bytes. IV must be exactly 16 bytes"; - throw std::runtime_error(oss.str()); + // Buffer for IV (either user-supplied or auto-generated) + unsigned char iv_buffer[CBC_IV_LENGTH]; + const unsigned char* actual_iv = nullptr; + + // Handle NULL IV: generate random IV + if (iv == nullptr || iv_len == 0) { + generate_random_iv(iv_buffer, CBC_IV_LENGTH); + actual_iv = iv_buffer; + } else { + // Validate user-supplied IV length - CBC requires exactly 16 bytes + if (iv_len != CBC_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-CBC: " << iv_len + << " bytes. IV must be exactly " << CBC_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } + actual_iv = reinterpret_cast(iv); } int32_t cipher_len = 0; @@ -69,9 +81,13 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char get_openssl_error_string()); } + // Prepend IV to output: [16-byte IV][ciphertext] + std::memcpy(cipher, actual_iv, CBC_IV_LENGTH); + cipher_len = CBC_IV_LENGTH; + if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, reinterpret_cast(key), - reinterpret_cast(iv))) { + actual_iv)) { EVP_CIPHER_CTX_free(en_ctx); throw std::runtime_error("Could not initialize EVP cipher context for encryption: " + get_openssl_error_string()); @@ -84,7 +100,8 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char get_openssl_error_string()); } - if (!EVP_EncryptUpdate(en_ctx, cipher, &len, + // Encrypt plaintext (write after IV) + if (!EVP_EncryptUpdate(en_ctx, cipher + cipher_len, &len, reinterpret_cast(plaintext), plaintext_len)) { EVP_CIPHER_CTX_free(en_ctx); @@ -94,7 +111,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char cipher_len += len; - if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { + if (!EVP_EncryptFinal_ex(en_ctx, cipher + cipher_len, &len)) { EVP_CIPHER_CTX_free(en_ctx); throw std::runtime_error("Could not finalize EVP cipher context for encryption: " + get_openssl_error_string()); @@ -110,12 +127,37 @@ GANDIVA_EXPORT int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, unsigned char* plaintext) { - // Validate IV length - if (iv_len != 16) { - std::ostringstream oss; - oss << "Invalid IV length for AES-CBC: " << iv_len - << " bytes. IV must be exactly 16 bytes"; - throw std::runtime_error(oss.str()); + // Buffer for extracted IV (if needed) + unsigned char iv_buffer[CBC_IV_LENGTH]; + const unsigned char* actual_iv = nullptr; + const char* actual_ciphertext = ciphertext; + int32_t actual_ciphertext_len = ciphertext_len; + + // Handle NULL IV: extract from beginning of ciphertext + if (iv == nullptr || iv_len == 0) { + // Validate ciphertext length: must have IV (16) + at least one block (16) = 32 bytes minimum + if (ciphertext_len < CBC_IV_LENGTH + 16) { + std::ostringstream oss; + oss << "Ciphertext too short for AES-CBC with embedded IV: " << ciphertext_len + << " bytes. Must be at least " << (CBC_IV_LENGTH + 16) + << " bytes (16-byte IV + minimum 16-byte block)"; + throw std::runtime_error(oss.str()); + } + + // Extract IV from beginning of ciphertext + extract_iv_from_ciphertext(ciphertext, ciphertext_len, CBC_IV_LENGTH, + iv_buffer, &actual_ciphertext, + &actual_ciphertext_len); + actual_iv = iv_buffer; + } else { + // Validate user-supplied IV length + if (iv_len != CBC_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-CBC: " << iv_len + << " bytes. IV must be exactly " << CBC_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } + actual_iv = reinterpret_cast(iv); } int32_t plaintext_len = 0; @@ -130,7 +172,7 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, reinterpret_cast(key), - reinterpret_cast(iv))) { + actual_iv)) { EVP_CIPHER_CTX_free(de_ctx); throw std::runtime_error("Could not initialize EVP cipher context for decryption: " + get_openssl_error_string()); @@ -144,8 +186,8 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch } if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, - reinterpret_cast(ciphertext), - ciphertext_len)) { + reinterpret_cast(actual_ciphertext), + actual_ciphertext_len)) { EVP_CIPHER_CTX_free(de_ctx); throw std::runtime_error("Could not update EVP cipher context for decryption: " + get_openssl_error_string()); diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h index b083d6f0a2d..8352c5be7ec 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.h +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -28,18 +28,29 @@ constexpr const char* AES_CBC_MODE = "AES-CBC"; constexpr const char* AES_CBC_PKCS7_MODE = "AES-CBC-PKCS7"; constexpr const char* AES_CBC_NONE_MODE = "AES-CBC-NONE"; +// CBC IV length in bytes +constexpr int32_t CBC_IV_LENGTH = 16; // 16 bytes (128 bits) - required for CBC + /** * Encrypt data using AES-CBC algorithm with explicit padding mode * + * Output format: [16-byte IV][ciphertext] + * The IV is always prepended to the output. + * + * IV Handling: + * - If iv is NULL or iv_len is 0: A cryptographically secure random 16-byte IV + * is automatically generated using OpenSSL RAND_bytes + * - If iv is provided: It must be exactly 16 bytes, and will be used as-is + * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes - * @param iv The initialization vector (must be exactly 16 bytes) - * @param iv_len Length of IV in bytes (must be 16) + * @param iv The initialization vector (NULL for auto-generation, or exactly 16 bytes) + * @param iv_len Length of IV in bytes (0 for auto-generation, or 16) * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) - * @param cipher Output buffer for encrypted data - * @return Length of encrypted data in bytes + * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 32 bytes) + * @return Length of encrypted data in bytes (16 + ciphertext_len) * @throws std::runtime_error on encryption failure or invalid parameters */ GANDIVA_EXPORT @@ -50,12 +61,20 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char /** * Decrypt data using AES-CBC algorithm with explicit padding mode * + * IV Handling: + * - If iv is NULL or iv_len is 0: IV is extracted from the first 16 bytes of ciphertext + * (expects format: [16-byte IV][ciphertext]) + * - If iv is provided: It must be exactly 16 bytes, and ciphertext should be + * [ciphertext] without embedded IV + * * @param ciphertext The data to decrypt - * @param ciphertext_len Length of ciphertext in bytes + * - With NULL IV: [16-byte IV][ciphertext] (min 32 bytes) + * - With provided IV: [ciphertext] + * @param ciphertext_len Length of ciphertext in bytes (includes IV if embedded) * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes - * @param iv The initialization vector (must be exactly 16 bytes) - * @param iv_len Length of IV in bytes (must be 16) + * @param iv The initialization vector (NULL for extraction, or exactly 16 bytes) + * @param iv_len Length of IV in bytes (0 for extraction, or 16) * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index f028243da59..085e02b695a 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -17,6 +17,7 @@ #include "gandiva/encrypt_utils_gcm.h" #include "gandiva/encrypt_utils_common.h" +#include "gandiva/encrypt_utils_iv.h" #include #include #include @@ -51,9 +52,23 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, const char* aad, int32_t aad_len, unsigned char* cipher) { - if (iv_len <= 0) { - throw std::runtime_error( - "Invalid IV length for AES-GCM: IV length must be greater than 0"); + // Buffer for IV (either user-supplied or auto-generated) + unsigned char iv_buffer[GCM_IV_LENGTH]; + const unsigned char* actual_iv = nullptr; + + // Handle NULL IV: generate random IV + if (iv == nullptr || iv_len == 0) { + generate_random_iv(iv_buffer, GCM_IV_LENGTH); + actual_iv = iv_buffer; + } else { + // Validate user-supplied IV length - GCM requires exactly 12 bytes + if (iv_len != GCM_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-GCM: " << iv_len + << " bytes. IV must be exactly " << GCM_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } + actual_iv = reinterpret_cast(iv); } int32_t cipher_len = 0; @@ -67,16 +82,20 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } try { + // Prepend IV to output: [12-byte IV][ciphertext][16-byte tag] + std::memcpy(cipher, actual_iv, GCM_IV_LENGTH); + cipher_len = GCM_IV_LENGTH; + if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, reinterpret_cast(key), - reinterpret_cast(iv))) { + actual_iv)) { throw std::runtime_error( "Could not initialize EVP cipher context for encryption: " + get_openssl_error_string()); } // Set IV length for GCM mode - if (!EVP_CIPHER_CTX_ctrl(en_ctx, EVP_CTRL_GCM_SET_IVLEN, iv_len, nullptr)) { + if (!EVP_CIPHER_CTX_ctrl(en_ctx, EVP_CTRL_GCM_SET_IVLEN, GCM_IV_LENGTH, nullptr)) { throw std::runtime_error("Could not set GCM IV length: " + get_openssl_error_string()); } @@ -90,8 +109,8 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } } - // Encrypt plaintext - if (!EVP_EncryptUpdate(en_ctx, cipher, &len, + // Encrypt plaintext (write after IV) + if (!EVP_EncryptUpdate(en_ctx, cipher + cipher_len, &len, reinterpret_cast(plaintext), plaintext_len)) { throw std::runtime_error("Could not update EVP cipher context for encryption: " + @@ -101,7 +120,7 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, cipher_len += len; // Finalize encryption - if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { + if (!EVP_EncryptFinal_ex(en_ctx, cipher + cipher_len, &len)) { throw std::runtime_error("Could not finalize EVP cipher context for encryption: " + get_openssl_error_string()); } @@ -129,14 +148,44 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, const char* aad, int32_t aad_len, unsigned char* plaintext) { - if (iv_len <= 0) { - throw std::runtime_error( - "Invalid IV length for AES-GCM: IV length must be greater than 0"); - } + // Buffer for extracted IV (if needed) + unsigned char iv_buffer[GCM_IV_LENGTH]; + const unsigned char* actual_iv = nullptr; + const char* actual_ciphertext = ciphertext; + int32_t actual_ciphertext_with_tag_len = ciphertext_len; + + // Handle NULL IV: extract from beginning of ciphertext + if (iv == nullptr || iv_len == 0) { + // Validate ciphertext length: must have IV (12) + tag (16) = 28 bytes minimum + if (ciphertext_len < GCM_IV_LENGTH + GCM_TAG_LENGTH) { + std::ostringstream oss; + oss << "Ciphertext too short for AES-GCM with embedded IV: " << ciphertext_len + << " bytes. Must be at least " << (GCM_IV_LENGTH + GCM_TAG_LENGTH) + << " bytes (12-byte IV + 16-byte tag)"; + throw std::runtime_error(oss.str()); + } + + // Extract IV from beginning of ciphertext + extract_iv_from_ciphertext(ciphertext, ciphertext_len, GCM_IV_LENGTH, + iv_buffer, &actual_ciphertext, + &actual_ciphertext_with_tag_len); + actual_iv = iv_buffer; + } else { + // Validate user-supplied IV length + if (iv_len != GCM_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-GCM: " << iv_len + << " bytes. IV must be exactly " << GCM_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } + + // Validate ciphertext length for user-supplied IV case + if (ciphertext_len < GCM_TAG_LENGTH) { + throw std::runtime_error( + "Ciphertext too short for AES-GCM: must be at least 16 bytes for tag"); + } - if (ciphertext_len < GCM_TAG_LENGTH) { - throw std::runtime_error( - "Ciphertext too short for AES-GCM: must be at least 16 bytes for tag"); + actual_iv = reinterpret_cast(iv); } int32_t plaintext_len = 0; @@ -152,14 +201,14 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, try { if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, reinterpret_cast(key), - reinterpret_cast(iv))) { + actual_iv)) { throw std::runtime_error( "Could not initialize EVP cipher context for decryption: " + get_openssl_error_string()); } // Set IV length for GCM mode - if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_IVLEN, iv_len, nullptr)) { + if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_IVLEN, GCM_IV_LENGTH, nullptr)) { throw std::runtime_error("Could not set GCM IV length: " + get_openssl_error_string()); } @@ -173,10 +222,10 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } } - // Extract tag from end of ciphertext - int32_t actual_ciphertext_len = ciphertext_len - GCM_TAG_LENGTH; + // Extract tag from end of actual ciphertext (after IV if it was embedded) + int32_t ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; const unsigned char* tag = - reinterpret_cast(ciphertext + actual_ciphertext_len); + reinterpret_cast(actual_ciphertext + ciphertext_without_tag_len); // Set the authentication tag if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, @@ -187,8 +236,8 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, // Decrypt ciphertext if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, - reinterpret_cast(ciphertext), - actual_ciphertext_len)) { + reinterpret_cast(actual_ciphertext), + ciphertext_without_tag_len)) { throw std::runtime_error("Could not update EVP cipher context for decryption: " + get_openssl_error_string()); } diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h index 07a597af0b6..5bcb2b8e930 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.h +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -26,22 +26,33 @@ namespace gandiva { // GCM mode identifier constexpr const char* AES_GCM_MODE = "AES-GCM"; +// GCM IV length in bytes +constexpr int32_t GCM_IV_LENGTH = 12; // 12 bytes (96 bits) - recommended for GCM but agreed to enforce it + // GCM authentication tag length in bytes constexpr int32_t GCM_TAG_LENGTH = 16; /** * Encrypt data using AES-GCM algorithm * + * Output format: [12-byte IV][ciphertext][16-byte authentication tag] + * The IV is always prepended to the output. + * + * IV Handling: + * - If iv is NULL or iv_len is 0: A cryptographically secure random 12-byte IV + * is automatically generated using OpenSSL RAND_bytes + * - If iv is provided: It must be exactly 12 bytes, and will be used as-is + * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes - * @param iv The initialization vector (variable length, typically 12 bytes) - * @param iv_len Length of IV in bytes + * @param iv The initialization vector (NULL for auto-generation, or exactly 12 bytes) + * @param iv_len Length of IV in bytes (0 for auto-generation, or 12) * @param aad Optional additional authenticated data (can be null) * @param aad_len Length of AAD in bytes (0 if aad is null) - * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 bytes) - * @return Length of encrypted data in bytes (plaintext_len + 16 for the tag) + * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 28 bytes) + * @return Length of encrypted data in bytes (12 + plaintext_len + 16) * @throws std::runtime_error on encryption failure or invalid parameters */ GANDIVA_EXPORT @@ -52,16 +63,24 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char /** * Decrypt data using AES-GCM algorithm * - * @param ciphertext The data to decrypt (includes 16-byte authentication tag at the end) - * @param ciphertext_len Length of ciphertext in bytes (includes tag) + * IV Handling: + * - If iv is NULL or iv_len is 0: IV is extracted from the first 12 bytes of ciphertext + * (expects format: [12-byte IV][ciphertext][16-byte tag]) + * - If iv is provided: It must be exactly 12 bytes, and ciphertext should be + * [ciphertext][16-byte tag] without embedded IV + * + * @param ciphertext The data to decrypt + * - With NULL IV: [12-byte IV][ciphertext][16-byte tag] (min 28 bytes) + * - With provided IV: [ciphertext][16-byte tag] (min 16 bytes) + * @param ciphertext_len Length of ciphertext in bytes (includes IV if embedded, and tag) * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes - * @param iv The initialization vector (variable length, typically 12 bytes) - * @param iv_len Length of IV in bytes + * @param iv The initialization vector (NULL for extraction, or exactly 12 bytes) + * @param iv_len Length of IV in bytes (0 for extraction, or 12) * @param aad Optional additional authenticated data (can be null) * @param aad_len Length of AAD in bytes (0 if aad is null) * @param plaintext Output buffer for decrypted data - * @return Length of decrypted data in bytes (ciphertext_len - 16) + * @return Length of decrypted data in bytes * @throws std::runtime_error on decryption failure, invalid parameters, or tag verification failure */ GANDIVA_EXPORT diff --git a/cpp/src/gandiva/encrypt_utils_iv.h b/cpp/src/gandiva/encrypt_utils_iv.h index 11a381d1702..c3bdcf0e13a 100644 --- a/cpp/src/gandiva/encrypt_utils_iv.h +++ b/cpp/src/gandiva/encrypt_utils_iv.h @@ -23,10 +23,6 @@ namespace gandiva { -// IV length constants for different encryption modes -constexpr int32_t GCM_IV_LENGTH = 12; // 12 bytes (96 bits) - recommended for GCM -constexpr int32_t CBC_IV_LENGTH = 16; // 16 bytes (128 bits) - required for CBC - /** * Generate a cryptographically secure random initialization vector (IV) * using OpenSSL's RAND_bytes. diff --git a/cpp/src/gandiva/encrypt_utils_iv_test.cc b/cpp/src/gandiva/encrypt_utils_iv_test.cc index 7aa858a6761..0eb1af9a110 100644 --- a/cpp/src/gandiva/encrypt_utils_iv_test.cc +++ b/cpp/src/gandiva/encrypt_utils_iv_test.cc @@ -16,6 +16,8 @@ // under the License. #include "gandiva/encrypt_utils_iv.h" +#include "gandiva/encrypt_utils_gcm.h" +#include "gandiva/encrypt_utils_cbc.h" #include #include From e35573b1a766ecc20b536ccb32765639303ebbdf Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 11:16:56 -0500 Subject: [PATCH 03/14] DX-108149: Update memory allocation --- cpp/src/gandiva/gdv_function_stubs.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 55f6dd43cd1..1c9083751da 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -866,11 +866,15 @@ const char* gdv_fn_encrypt_dispatcher_5args( const char* iv_data, int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, int32_t* out_len) { try { - // Allocate extra 16 bytes for AES block padding (PKCS7 padding can add - // up to 16 bytes for a 128-bit block cipher) - // In cases of no-padding modes, this extra space is not used + // Calculate buffer size based on mode: + // - ECB: data_len + 16 (padding only, no IV) + // - CBC: data_len + 16 (IV) + 16 (padding) = data_len + 32 + // - GCM: data_len + 12 (IV) + 16 (tag) = data_len + 28 + // Use maximum to handle all modes safely + int32_t buffer_size = data_len + 32; + auto* output = reinterpret_cast( - gdv_fn_context_arena_malloc(context, data_len + 16)); + gdv_fn_context_arena_malloc(context, buffer_size)); if (output == nullptr) { throw std::runtime_error( "Memory allocation failed for encryption output"); @@ -896,6 +900,10 @@ const char* gdv_fn_decrypt_dispatcher_5args( const char* iv_data, int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, int32_t* out_len) { try { + // Buffer size for decryption output is data_len: + // - Input may contain IV + ciphertext + tag/padding + // - Output is plaintext only (IV and tag/padding are removed) + // - Plaintext is always <= input size, so data_len is sufficient auto* output = reinterpret_cast( gdv_fn_context_arena_malloc(context, data_len)); if (output == nullptr) { From 145e0cd5354e5f40758a4b19506ddb68f7324869 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 11:22:20 -0500 Subject: [PATCH 04/14] DX-108149: Update tests --- cpp/src/gandiva/encrypt_utils_cbc_test.cc | 142 ++++++++++++++++++++-- cpp/src/gandiva/encrypt_utils_gcm_test.cc | 133 ++++++++++++++++++-- 2 files changed, 253 insertions(+), 22 deletions(-) diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc index 8bf9227d65b..531297b0329 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc_test.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -21,7 +21,7 @@ #include #include -// Test PKCS#7 padding with 16-byte key +// Test PKCS#7 padding with 16-byte key (user-supplied IV) TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { auto* key = "12345678abcdefgh"; auto* iv = "1234567890123456"; @@ -30,12 +30,19 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { auto key_len = static_cast(strlen(key)); auto iv_len = static_cast(strlen(iv)); auto to_encrypt_len = static_cast(strlen(to_encrypt)); - unsigned char cipher[64]; + unsigned char cipher[128]; int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - unsigned char decrypted[64]; + // Output format: [16-byte IV][ciphertext] + // Ciphertext includes padding, so it's rounded up to next 16-byte block + EXPECT_GE(cipher_len, to_encrypt_len + 16); // At least IV + plaintext + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, true, decrypted); @@ -44,7 +51,7 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test PKCS#7 padding with 24-byte key +// Test PKCS#7 padding with 24-byte key (user-supplied IV) TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { auto* key = "12345678abcdefgh12345678"; auto* iv = "1234567890123456"; @@ -53,12 +60,18 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { auto key_len = static_cast(strlen(key)); auto iv_len = static_cast(strlen(iv)); auto to_encrypt_len = static_cast(strlen(to_encrypt)); - unsigned char cipher[64]; + unsigned char cipher[128]; int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - unsigned char decrypted[64]; + // Output format: [16-byte IV][ciphertext] + EXPECT_GE(cipher_len, to_encrypt_len + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, true, decrypted); @@ -67,7 +80,7 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test PKCS#7 padding with 32-byte key +// Test PKCS#7 padding with 32-byte key (user-supplied IV) TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { auto* key = "12345678abcdefgh12345678abcdefgh"; auto* iv = "1234567890123456"; @@ -76,12 +89,18 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { auto key_len = static_cast(strlen(key)); auto iv_len = static_cast(strlen(iv)); auto to_encrypt_len = static_cast(strlen(to_encrypt)); - unsigned char cipher[64]; + unsigned char cipher[128]; int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - unsigned char decrypted[64]; + // Output format: [16-byte IV][ciphertext] + EXPECT_GE(cipher_len, to_encrypt_len + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, true, decrypted); @@ -90,7 +109,7 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test no-padding mode with block-aligned data (16 bytes) +// Test no-padding mode with block-aligned data (16 bytes, user-supplied IV) TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { auto* key = "12345678abcdefgh"; auto* iv = "1234567890123456"; @@ -99,12 +118,19 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { auto key_len = static_cast(strlen(key)); auto iv_len = static_cast(strlen(iv)); auto to_encrypt_len = static_cast(strlen(to_encrypt)); - unsigned char cipher[64]; + unsigned char cipher[128]; int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, false, cipher); - unsigned char decrypted[64]; + // Output format: [16-byte IV][ciphertext] + // No padding, so ciphertext is exactly 16 bytes + EXPECT_EQ(cipher_len, 16 + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, false, decrypted); @@ -153,5 +179,97 @@ TEST(TestAesCbcEncryptUtils, TestInvalidKeyLength) { } } +// Test NULL IV with auto-generation (encrypt and decrypt round-trip) +TEST(TestAesCbcEncryptUtils, TestNullIvAutoGeneration) { + auto* key = "12345678abcdefgh"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + // Encrypt with NULL IV (auto-generate) + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + nullptr, 0, true, cipher); + + // Output format: [16-byte IV][ciphertext with padding] + EXPECT_GE(cipher_len, to_encrypt_len + 16); + + // Decrypt with NULL IV (extract from ciphertext) + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + true, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test NULL IV with no padding +TEST(TestAesCbcEncryptUtils, TestNullIvNoPadding) { + auto* key = "12345678abcdefgh"; + auto* to_encrypt = "1234567890123456"; // Exactly 16 bytes + + auto key_len = static_cast(strlen(key)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + // Encrypt with NULL IV and no padding + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + nullptr, 0, false, cipher); + + // Output format: [16-byte IV][16-byte ciphertext] + EXPECT_EQ(cipher_len, 16 + 16); + + // Decrypt with NULL IV and no padding + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + false, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test NULL IV decrypt with user-supplied IV encrypt (backward compatibility) +TEST(TestAesCbcEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { + auto* key = "12345678abcdefgh"; + auto* iv = "1234567890123456"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + // Encrypt with user-supplied IV (IV will be prepended) + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + + // Decrypt with NULL IV (extract IV from ciphertext) + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + true, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test decrypt with too-short ciphertext (NULL IV case) +TEST(TestAesCbcEncryptUtils, TestDecryptTooShortCiphertext) { + auto* key = "12345678abcdefgh"; + auto key_len = static_cast(strlen(key)); + + // Ciphertext too short: only 20 bytes (needs at least 32: 16 IV + 16 min block) + unsigned char short_cipher[20] = {0}; + unsigned char decrypted[128]; + + EXPECT_THROW(gandiva::aes_decrypt_cbc(reinterpret_cast(short_cipher), + 20, key, key_len, nullptr, 0, + true, decrypted), + std::runtime_error); +} + diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc index 2156132bc62..a6cc3cc4636 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm_test.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -21,7 +21,7 @@ #include #include -// Test IV-only GCM with 16-byte key +// Test IV-only GCM with 16-byte key (user-supplied IV) TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { auto* key = "12345678abcdefgh"; auto* iv = "123456789012"; // 12-byte IV @@ -35,8 +35,11 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); - // Ciphertext should be plaintext_len + 16 (tag) - EXPECT_EQ(cipher_len, to_encrypt_len + 16); + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), @@ -47,7 +50,7 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test IV + AAD GCM with 16-byte key +// Test IV + AAD GCM with 16-byte key (user-supplied IV) TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { auto* key = "12345678abcdefgh"; auto* iv = "123456789012"; @@ -63,7 +66,11 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, aad, aad_len, cipher); - EXPECT_EQ(cipher_len, to_encrypt_len + 16); + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), @@ -74,7 +81,7 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test IV-only GCM with 24-byte key +// Test IV-only GCM with 24-byte key (user-supplied IV) TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { auto* key = "12345678abcdefgh12345678"; auto* iv = "123456789012"; @@ -88,6 +95,12 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, @@ -97,7 +110,7 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test IV-only GCM with 32-byte key +// Test IV-only GCM with 32-byte key (user-supplied IV) TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { auto* key = "12345678abcdefgh12345678abcdefgh"; auto* iv = "123456789012"; @@ -111,6 +124,12 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Verify IV is prepended + EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, key_len, iv, iv_len, @@ -120,7 +139,7 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test tag verification failure +// Test tag verification failure (user-supplied IV) TEST(TestAesGcmEncryptUtils, TestTagVerificationFailure) { auto* key = "12345678abcdefgh"; auto* iv = "123456789012"; @@ -144,10 +163,10 @@ TEST(TestAesGcmEncryptUtils, TestTagVerificationFailure) { std::runtime_error); } -// Test invalid IV length +// Test invalid IV length (non-12-byte IV should fail) TEST(TestAesGcmEncryptUtils, TestInvalidIvLength) { auto* key = "12345678abcdefgh"; - auto* iv = ""; // Empty IV + auto* iv = "1234567890"; // 10-byte IV (invalid, must be exactly 12) auto* to_encrypt = "some test string"; auto key_len = static_cast(strlen(key)); @@ -160,3 +179,97 @@ TEST(TestAesGcmEncryptUtils, TestInvalidIvLength) { std::runtime_error); } +// Test NULL IV with auto-generation (encrypt and decrypt round-trip) +TEST(TestAesGcmEncryptUtils, TestNullIvAutoGeneration) { + auto* key = "12345678abcdefgh"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + // Encrypt with NULL IV (auto-generate) + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + nullptr, 0, nullptr, 0, cipher); + + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Decrypt with NULL IV (extract from ciphertext) + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + nullptr, 0, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test NULL IV with AAD +TEST(TestAesGcmEncryptUtils, TestNullIvWithAad) { + auto* key = "12345678abcdefgh"; + auto* to_encrypt = "some test string"; + auto* aad = "additional authenticated data"; + + auto key_len = static_cast(strlen(key)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + auto aad_len = static_cast(strlen(aad)); + unsigned char cipher[128]; + + // Encrypt with NULL IV and AAD + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + nullptr, 0, aad, aad_len, cipher); + + // Output format: [12-byte IV][ciphertext][16-byte tag] + EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + + // Decrypt with NULL IV and AAD + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + aad, aad_len, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test NULL IV decrypt with user-supplied IV encrypt (backward compatibility) +TEST(TestAesGcmEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { + auto* key = "12345678abcdefgh"; + auto* iv = "123456789012"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + // Encrypt with user-supplied IV (IV will be prepended) + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher); + + // Decrypt with NULL IV (extract IV from ciphertext) + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, nullptr, 0, + nullptr, 0, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test decrypt with too-short ciphertext (NULL IV case) +TEST(TestAesGcmEncryptUtils, TestDecryptTooShortCiphertext) { + auto* key = "12345678abcdefgh"; + auto key_len = static_cast(strlen(key)); + + // Ciphertext too short: only 20 bytes (needs at least 28: 12 IV + 16 tag) + unsigned char short_cipher[20] = {0}; + unsigned char decrypted[128]; + + EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(short_cipher), + 20, key, key_len, nullptr, 0, + nullptr, 0, decrypted), + std::runtime_error); +} + From 1f23d0bd0a78493d6313e3d5d2315775f6dd1e1f Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 11:25:56 -0500 Subject: [PATCH 05/14] DX-108149: Update comments --- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 28 +++++++-- cpp/src/gandiva/encrypt_mode_dispatcher.h | 67 ++++++++++++++++------ 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index fad1c54ba9f..416f0ea0636 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -61,27 +61,34 @@ int32_t EncryptModeDispatcher::encrypt( int32_t key_len, const char* mode, int32_t mode_len, const char* iv, int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, unsigned char* cipher) { + // Convert mode string to uppercase for case-insensitive comparison std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: + // ECB mode: No IV used, output is [ciphertext] // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, true, cipher); case EncryptionMode::ECB_NONE: - // ECB without padding + // ECB mode without padding return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, false, cipher); case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: - // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 + // CBC mode: IV is prepended to output, format is [16-byte IV][ciphertext] + // If iv is NULL, a random IV is auto-generated + // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 padding return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, true, cipher); case EncryptionMode::CBC_NONE: - // CBC without padding + // CBC mode without padding return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, false, cipher); case EncryptionMode::GCM: + // GCM mode: IV is prepended to output, format is [12-byte IV][ciphertext][16-byte tag] + // If iv is NULL, a random IV is auto-generated + // fifth_argument is AAD (Additional Authenticated Data) return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, cipher); case EncryptionMode::UNKNOWN: @@ -100,27 +107,36 @@ int32_t EncryptModeDispatcher::decrypt( int32_t key_len, const char* mode, int32_t mode_len, const char* iv, int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, unsigned char* plaintext) { + // Convert mode string to uppercase for case-insensitive comparison std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: + // ECB mode: No IV used, input is [ciphertext] // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, true, plaintext); case EncryptionMode::ECB_NONE: - // ECB without padding + // ECB mode without padding return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, false, plaintext); case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: - // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 + // CBC mode: If iv is NULL, IV is extracted from first 16 bytes of ciphertext + // Expected format with NULL IV: [16-byte IV][ciphertext] + // Expected format with provided IV: [ciphertext] + // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 padding return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, true, plaintext); case EncryptionMode::CBC_NONE: - // CBC without padding + // CBC mode without padding return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, false, plaintext); case EncryptionMode::GCM: + // GCM mode: If iv is NULL, IV is extracted from first 12 bytes of ciphertext + // Expected format with NULL IV: [12-byte IV][ciphertext][16-byte tag] + // Expected format with provided IV: [ciphertext][16-byte tag] + // fifth_argument is AAD (Additional Authenticated Data) return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index 20326845bd0..7ae787fcdc3 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -31,19 +31,35 @@ class EncryptModeDispatcher { /** * Encrypt data using the specified mode * + * Supported modes: + * - AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE: ECB mode (no IV) + * - AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE: CBC mode (requires 16-byte IV) + * - AES-GCM: GCM mode (requires 12-byte IV) + * + * Output format: + * - ECB: [ciphertext] + * - CBC: [16-byte IV][ciphertext] + * - GCM: [12-byte IV][ciphertext][16-byte authentication tag] + * + * IV Handling (CBC and GCM modes): + * - If iv is NULL or iv_len is 0: A cryptographically secure random IV is + * automatically generated and prepended to the output + * - If iv is provided: It must be the exact required length (12 for GCM, 16 for CBC), + * and will be prepended to the output + * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes - * @param key The encryption key + * @param key The encryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes - * @param mode Mode string + * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes - * @param iv The initialization vector (optional, only for modes that support it) - * @param iv_len Length of the IV in bytes - * @param fifth_argument Additional parameter (optional, only for modes that support it) + * @param iv The initialization vector (NULL for auto-generation in CBC/GCM, ignored for ECB) + * @param iv_len Length of the IV in bytes (0 for auto-generation, 12 for GCM, 16 for CBC) + * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) * @param fifth_argument_len Length of fifth_argument in bytes - * @param cipher Output buffer for encrypted data - * @return Length of encrypted data in bytes - * @throws std::runtime_error on encryption failure or unsupported mode + * @param cipher Output buffer for encrypted data (must be large enough for output format) + * @return Length of encrypted data in bytes (includes prepended IV for CBC/GCM) + * @throws std::runtime_error on encryption failure, unsupported mode, or invalid parameters */ static int32_t encrypt(const char* plaintext, int32_t plaintext_len, const char* key, int32_t key_len, @@ -55,19 +71,36 @@ class EncryptModeDispatcher { /** * Decrypt data using the specified mode * - * @param ciphertext The data to decrypt - * @param ciphertext_len Length of ciphertext in bytes - * @param key The decryption key + * Supported modes: + * - AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE: ECB mode (no IV) + * - AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE: CBC mode (requires 16-byte IV) + * - AES-GCM: GCM mode (requires 12-byte IV) + * + * Expected input format: + * - ECB: [ciphertext] + * - CBC with NULL IV: [16-byte IV][ciphertext] + * - CBC with provided IV: [ciphertext] (IV provided separately) + * - GCM with NULL IV: [12-byte IV][ciphertext][16-byte authentication tag] + * - GCM with provided IV: [ciphertext][16-byte authentication tag] (IV provided separately) + * + * IV Handling (CBC and GCM modes): + * - If iv is NULL or iv_len is 0: IV is extracted from the beginning of ciphertext + * - If iv is provided: It must be the exact required length (12 for GCM, 16 for CBC), + * and ciphertext should not include the IV + * + * @param ciphertext The data to decrypt (format depends on mode and IV parameter) + * @param ciphertext_len Length of ciphertext in bytes (includes IV if embedded) + * @param key The decryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes - * @param mode Mode string + * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes - * @param iv The initialization vector (optional, only for modes that support it) - * @param iv_len Length of the IV in bytes - * @param fifth_argument Additional parameter (optional, only for modes that support it) + * @param iv The initialization vector (NULL for extraction from ciphertext, ignored for ECB) + * @param iv_len Length of the IV in bytes (0 for extraction, 12 for GCM, 16 for CBC) + * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) * @param fifth_argument_len Length of fifth_argument in bytes * @param plaintext Output buffer for decrypted data - * @return Length of decrypted data in bytes - * @throws std::runtime_error on decryption failure or unsupported mode + * @return Length of decrypted data in bytes (plaintext only, IV and tag removed) + * @throws std::runtime_error on decryption failure, unsupported mode, invalid parameters, or authentication failure */ static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, const char* key, int32_t key_len, From 785db394e8a0f28dcfebf5da6a8676f306741108 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 26 Dec 2025 12:26:59 -0500 Subject: [PATCH 06/14] DX-108149: Refactor and update tests --- cpp/src/gandiva/encrypt_utils_cbc.cc | 45 ++++++++------ cpp/src/gandiva/encrypt_utils_cbc_test.cc | 12 ++-- cpp/src/gandiva/encrypt_utils_gcm.cc | 72 ++++++++++++---------- cpp/src/gandiva/encrypt_utils_gcm_test.cc | 12 ++-- cpp/src/gandiva/gdv_function_stubs_test.cc | 15 +++-- 5 files changed, 90 insertions(+), 66 deletions(-) diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index 0d0b5dcfa63..cb2a719c1a8 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -46,6 +46,26 @@ const EVP_CIPHER* get_cbc_cipher_algo(int32_t key_length) { } } +void validate_iv_length_cbc(int32_t iv_len) { + if (iv_len != CBC_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-CBC: " << iv_len + << " bytes. IV must be exactly " << CBC_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } +} + +void validate_ciphertext_with_embedded_iv_cbc(int32_t ciphertext_len) { + constexpr int32_t MIN_CIPHERTEXT_LEN = CBC_IV_LENGTH + 16; // IV + minimum one block + if (ciphertext_len < MIN_CIPHERTEXT_LEN) { + std::ostringstream oss; + oss << "Ciphertext too short for AES-CBC with embedded IV: " << ciphertext_len + << " bytes. Must be at least " << MIN_CIPHERTEXT_LEN + << " bytes (16-byte IV + minimum 16-byte block)"; + throw std::runtime_error(oss.str()); + } +} + } // namespace GANDIVA_EXPORT @@ -133,30 +153,17 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch const char* actual_ciphertext = ciphertext; int32_t actual_ciphertext_len = ciphertext_len; - // Handle NULL IV: extract from beginning of ciphertext - if (iv == nullptr || iv_len == 0) { - // Validate ciphertext length: must have IV (16) + at least one block (16) = 32 bytes minimum - if (ciphertext_len < CBC_IV_LENGTH + 16) { - std::ostringstream oss; - oss << "Ciphertext too short for AES-CBC with embedded IV: " << ciphertext_len - << " bytes. Must be at least " << (CBC_IV_LENGTH + 16) - << " bytes (16-byte IV + minimum 16-byte block)"; - throw std::runtime_error(oss.str()); - } - - // Extract IV from beginning of ciphertext + // Handle IV: either extract from ciphertext or use user-supplied IV + if (iv == nullptr) { + // Extract IV from beginning of ciphertext: [16-byte IV][ciphertext] + validate_ciphertext_with_embedded_iv_cbc(ciphertext_len); extract_iv_from_ciphertext(ciphertext, ciphertext_len, CBC_IV_LENGTH, iv_buffer, &actual_ciphertext, &actual_ciphertext_len); actual_iv = iv_buffer; } else { - // Validate user-supplied IV length - if (iv_len != CBC_IV_LENGTH) { - std::ostringstream oss; - oss << "Invalid IV length for AES-CBC: " << iv_len - << " bytes. IV must be exactly " << CBC_IV_LENGTH << " bytes"; - throw std::runtime_error(oss.str()); - } + // Use user-supplied IV + validate_iv_length_cbc(iv_len); actual_iv = reinterpret_cast(iv); } diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc index 531297b0329..f52d9c3682f 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc_test.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -43,8 +43,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -72,8 +73,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -101,8 +103,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -131,8 +134,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, false, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index 085e02b695a..9f98840981d 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -45,6 +45,33 @@ const EVP_CIPHER* get_gcm_cipher_algo(int32_t key_length) { } } +void validate_iv_length_gcm(int32_t iv_len) { + if (iv_len != GCM_IV_LENGTH) { + std::ostringstream oss; + oss << "Invalid IV length for AES-GCM: " << iv_len + << " bytes. IV must be exactly " << GCM_IV_LENGTH << " bytes"; + throw std::runtime_error(oss.str()); + } +} + +void validate_ciphertext_with_embedded_iv_gcm(int32_t ciphertext_len) { + constexpr int32_t MIN_CIPHERTEXT_LEN = GCM_IV_LENGTH + GCM_TAG_LENGTH; // IV + tag + if (ciphertext_len < MIN_CIPHERTEXT_LEN) { + std::ostringstream oss; + oss << "Ciphertext too short for AES-GCM with embedded IV: " << ciphertext_len + << " bytes. Must be at least " << MIN_CIPHERTEXT_LEN + << " bytes (12-byte IV + 16-byte tag)"; + throw std::runtime_error(oss.str()); + } +} + +void validate_ciphertext_with_tag(int32_t ciphertext_len) { + if (ciphertext_len < GCM_TAG_LENGTH) { + throw std::runtime_error( + "Ciphertext too short for AES-GCM: must be at least 16 bytes for tag"); + } +} + } // namespace GANDIVA_EXPORT @@ -56,18 +83,14 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, unsigned char iv_buffer[GCM_IV_LENGTH]; const unsigned char* actual_iv = nullptr; - // Handle NULL IV: generate random IV + // Handle IV: either generate random IV or use user-supplied IV if (iv == nullptr || iv_len == 0) { + // Generate random IV generate_random_iv(iv_buffer, GCM_IV_LENGTH); actual_iv = iv_buffer; } else { - // Validate user-supplied IV length - GCM requires exactly 12 bytes - if (iv_len != GCM_IV_LENGTH) { - std::ostringstream oss; - oss << "Invalid IV length for AES-GCM: " << iv_len - << " bytes. IV must be exactly " << GCM_IV_LENGTH << " bytes"; - throw std::runtime_error(oss.str()); - } + // Use user-supplied IV + validate_iv_length_gcm(iv_len); actual_iv = reinterpret_cast(iv); } @@ -154,37 +177,18 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* actual_ciphertext = ciphertext; int32_t actual_ciphertext_with_tag_len = ciphertext_len; - // Handle NULL IV: extract from beginning of ciphertext - if (iv == nullptr || iv_len == 0) { - // Validate ciphertext length: must have IV (12) + tag (16) = 28 bytes minimum - if (ciphertext_len < GCM_IV_LENGTH + GCM_TAG_LENGTH) { - std::ostringstream oss; - oss << "Ciphertext too short for AES-GCM with embedded IV: " << ciphertext_len - << " bytes. Must be at least " << (GCM_IV_LENGTH + GCM_TAG_LENGTH) - << " bytes (12-byte IV + 16-byte tag)"; - throw std::runtime_error(oss.str()); - } - - // Extract IV from beginning of ciphertext + // Handle IV: either extract from ciphertext or use user-supplied IV + if (iv == nullptr) { + // Extract IV from beginning of ciphertext: [12-byte IV][ciphertext][16-byte tag] + validate_ciphertext_with_embedded_iv_gcm(ciphertext_len); extract_iv_from_ciphertext(ciphertext, ciphertext_len, GCM_IV_LENGTH, iv_buffer, &actual_ciphertext, &actual_ciphertext_with_tag_len); actual_iv = iv_buffer; } else { - // Validate user-supplied IV length - if (iv_len != GCM_IV_LENGTH) { - std::ostringstream oss; - oss << "Invalid IV length for AES-GCM: " << iv_len - << " bytes. IV must be exactly " << GCM_IV_LENGTH << " bytes"; - throw std::runtime_error(oss.str()); - } - - // Validate ciphertext length for user-supplied IV case - if (ciphertext_len < GCM_TAG_LENGTH) { - throw std::runtime_error( - "Ciphertext too short for AES-GCM: must be at least 16 bytes for tag"); - } - + // Use user-supplied IV + validate_iv_length_gcm(iv_len); + validate_ciphertext_with_tag(ciphertext_len); actual_iv = reinterpret_cast(iv); } diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc index a6cc3cc4636..ffd14da4dd4 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm_test.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -42,8 +42,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -73,8 +74,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, aad, aad_len, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -102,8 +104,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -131,8 +134,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, + cipher_len, key, key_len, nullptr, 0, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index bfb34eeb31d..d428be5b75c 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1529,9 +1529,10 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { mode_len, iv.c_str(), iv_len, nullptr, 0, &cipher_len); EXPECT_GT(cipher_len, 0); + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypted_len); + mode_len, nullptr, 0, nullptr, 0, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1559,9 +1560,10 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); EXPECT_GT(cipher_len, 0); + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); + mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1642,9 +1644,10 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { mode_len, iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + mode_len, nullptr, 0, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1671,9 +1674,10 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { mode_len, iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + mode_len, nullptr, 0, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1701,9 +1705,10 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { mode_len, iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); + // Pass NULL IV to extract from ciphertext (since encrypt prepended it) const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + mode_len, nullptr, 0, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), From 4b771f00ce5e58226564ffdc8b82a961786730d4 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 1 Jan 2026 11:53:43 -0500 Subject: [PATCH 07/14] Add support for NULL IV values --- cpp/src/gandiva/function_registry_string.cc | 10 +- cpp/src/gandiva/gdv_function_stubs_test.cc | 122 ++++++++++++++++++++ 2 files changed, 128 insertions(+), 4 deletions(-) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 7750421360e..27177bb3282 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -514,21 +514,23 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv (e.g. CBC mode) + // Note: IV can be NULL for CBC/GCM modes (auto-generates random IV) NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", + kResultNullInternal, "gdv_fn_encrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", + kResultNullInternal, "gdv_fn_decrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv, fifth_argument (e.g. GCM mode) + // Note: IV and AAD can be NULL (auto-generates random IV, no AAD) NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", + kResultNullInternal, "gdv_fn_encrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", + kResultNullInternal, "gdv_fn_decrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("mask_first_n", {}, DataTypeVector{utf8(), int32()}, utf8(), diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index d428be5b75c..3bda0d9abbf 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1715,4 +1715,126 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { decrypted_len)); } +// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL) works (NULL IV should auto-generate) +TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIv4Args) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Test 4-arg version with NULL IV (should auto-generate IV) + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, &cipher_len); + EXPECT_GT(cipher_len, 0); + EXPECT_TRUE(cipher != nullptr); + + // Decrypt with NULL IV (should extract from ciphertext) + const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL, NULL) works (NULL IV and NULL AAD) +TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvAndNullAad) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Test 5-arg version with NULL IV and NULL AAD + const char* cipher = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, nullptr, 0, &cipher_len); + EXPECT_GT(cipher_len, 0); + EXPECT_TRUE(cipher != nullptr); + + // Decrypt with NULL IV and NULL AAD + const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, nullptr, 0, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Test that ENCRYPT(plaintext, key, 'AES-CBC', NULL) works (NULL IV should auto-generate) +TEST(TestGdvFnStubs, TestAesEncryptCbcWithNullIv4Args) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_CBC_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Test 4-arg version with NULL IV (should auto-generate IV) + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, &cipher_len); + EXPECT_GT(cipher_len, 0); + EXPECT_TRUE(cipher != nullptr); + + // Decrypt with NULL IV (should extract from ciphertext) + const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL, aad) works (NULL IV with non-NULL AAD) +TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvButWithAad) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + std::string aad = "additional authenticated data"; + auto aad_len = static_cast(aad.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Test 5-arg version with NULL IV but non-NULL AAD + const char* cipher = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, aad.c_str(), aad_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + EXPECT_TRUE(cipher != nullptr); + + // Decrypt with NULL IV and same AAD + const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + } // namespace gandiva From e887e22ec10c7da8490a45674a3f4f48eded49b2 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 6 Jan 2026 17:16:05 -0500 Subject: [PATCH 08/14] Add handling of NULL values for IV --- cpp/src/gandiva/function_registry_string.cc | 7 ++- cpp/src/gandiva/gdv_function_stubs.cc | 54 +++++++++++++++++---- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 27177bb3282..6bab661ccb2 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -505,16 +505,18 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode (e.g. ECB mode) + // Uses kResultNullInternal to allow NULL data while failing on NULL key/mode NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_3args", + kResultNullInternal, "gdv_fn_encrypt_dispatcher_3args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_3args", + kResultNullInternal, "gdv_fn_decrypt_dispatcher_3args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv (e.g. CBC mode) // Note: IV can be NULL for CBC/GCM modes (auto-generates random IV) + // Uses kResultNullInternal to allow NULL IV while failing on NULL key/mode NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), kResultNullInternal, "gdv_fn_encrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), @@ -525,6 +527,7 @@ std::vector GetStringFunctionRegistry() { // Parameters: data, key, mode, iv, fifth_argument (e.g. GCM mode) // Note: IV and AAD can be NULL (auto-generates random IV, no AAD) + // Uses kResultNullInternal to allow NULL IV/AAD while failing on NULL key/mode NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), kResultNullInternal, "gdv_fn_encrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 1c9083751da..18edee203c8 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -823,40 +823,40 @@ extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { + bool* out_valid, int32_t* out_len) { return gdv_fn_encrypt_dispatcher_5args( context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + 0, nullptr, 0, out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { + bool* out_valid, int32_t* out_len) { return gdv_fn_decrypt_dispatcher_5args( context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + 0, nullptr, 0, out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { + const char* iv_data, int32_t iv_data_len, bool* out_valid, int32_t* out_len) { return gdv_fn_encrypt_dispatcher_5args( context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + iv_data_len, nullptr, 0, out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { + const char* iv_data, int32_t iv_data_len, bool* out_valid, int32_t* out_len) { return gdv_fn_decrypt_dispatcher_5args( context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + iv_data_len, nullptr, 0, out_valid, out_len); } extern "C" GANDIVA_EXPORT @@ -864,7 +864,23 @@ const char* gdv_fn_encrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t fifth_argument_len, bool* out_valid, int32_t* out_len) { + // We use kResultNullInternal to handle NULL inputs selectively: + // - NULL plaintext → return NULL (set out_valid = false) + // - NULL key → call function, it will throw validation error + // - NULL mode → call function, it will throw validation error + // - NULL IV → call function, auto-generates IV + // - NULL AAD → call function, no AAD used + + // Check if plaintext is NULL - this is the only case where we return NULL + if (data == nullptr) { + *out_valid = false; + *out_len = 0; + return nullptr; + } + + *out_valid = true; + try { // Calculate buffer size based on mode: // - ECB: data_len + 16 (padding only, no IV) @@ -898,7 +914,23 @@ const char* gdv_fn_decrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t fifth_argument_len, bool* out_valid, int32_t* out_len) { + // We use kResultNullInternal to handle NULL inputs selectively: + // - NULL ciphertext → return NULL (set out_valid = false) + // - NULL key → call function, it will throw validation error + // - NULL mode → call function, it will throw validation error + // - NULL IV → call function, auto-extracts IV from ciphertext + // - NULL AAD → call function, no AAD used + + // Check if ciphertext is NULL - this is the only case where we return NULL + if (data == nullptr) { + *out_valid = false; + *out_len = 0; + return nullptr; + } + + *out_valid = true; + try { // Buffer size for decryption output is data_len: // - Input may contain IV + ciphertext + tag/padding @@ -1168,6 +1200,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // key_data_length types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; @@ -1185,6 +1218,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // key_data_length types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; From 4f526f32d13048a697d00ab6ab5db3e65ac3c0ce Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 7 Jan 2026 12:43:40 -0500 Subject: [PATCH 09/14] Prepend the IV only when it was auto-generated --- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 10 +- cpp/src/gandiva/encrypt_mode_dispatcher.h | 10 +- cpp/src/gandiva/encrypt_utils_cbc.cc | 13 ++- cpp/src/gandiva/encrypt_utils_cbc.h | 9 +- cpp/src/gandiva/encrypt_utils_cbc_test.cc | 61 ++++++----- cpp/src/gandiva/encrypt_utils_gcm.cc | 13 ++- cpp/src/gandiva/encrypt_utils_gcm.h | 9 +- cpp/src/gandiva/encrypt_utils_gcm_test.cc | 58 +++++----- cpp/src/gandiva/gdv_function_stubs.cc | 14 ++- cpp/src/gandiva/gdv_function_stubs.h | 12 +-- cpp/src/gandiva/gdv_function_stubs_test.cc | 117 ++++++++++++++------- 11 files changed, 197 insertions(+), 129 deletions(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index 416f0ea0636..5e0e796951c 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -76,8 +76,9 @@ int32_t EncryptModeDispatcher::encrypt( return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, false, cipher); case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: - // CBC mode: IV is prepended to output, format is [16-byte IV][ciphertext] - // If iv is NULL, a random IV is auto-generated + // CBC mode: If iv is NULL, a random IV is auto-generated and prepended to output + // Auto-generated IV format: [16-byte IV][ciphertext] + // User-supplied IV format: [ciphertext] // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 padding return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, true, cipher); @@ -86,8 +87,9 @@ int32_t EncryptModeDispatcher::encrypt( return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, false, cipher); case EncryptionMode::GCM: - // GCM mode: IV is prepended to output, format is [12-byte IV][ciphertext][16-byte tag] - // If iv is NULL, a random IV is auto-generated + // GCM mode: If iv is NULL, a random IV is auto-generated and prepended to output + // Auto-generated IV format: [12-byte IV][ciphertext][16-byte tag] + // User-supplied IV format: [ciphertext][16-byte tag] // fifth_argument is AAD (Additional Authenticated Data) return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, cipher); diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index 7ae787fcdc3..a7d7b1863fb 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -38,14 +38,16 @@ class EncryptModeDispatcher { * * Output format: * - ECB: [ciphertext] - * - CBC: [16-byte IV][ciphertext] - * - GCM: [12-byte IV][ciphertext][16-byte authentication tag] + * - CBC with auto-generated IV: [16-byte IV][ciphertext] + * - CBC with user-supplied IV: [ciphertext] + * - GCM with auto-generated IV: [12-byte IV][ciphertext][16-byte authentication tag] + * - GCM with user-supplied IV: [ciphertext][16-byte authentication tag] * * IV Handling (CBC and GCM modes): * - If iv is NULL or iv_len is 0: A cryptographically secure random IV is * automatically generated and prepended to the output * - If iv is provided: It must be the exact required length (12 for GCM, 16 for CBC), - * and will be prepended to the output + * and will NOT be prepended to the output (only ciphertext is returned) * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes @@ -58,7 +60,7 @@ class EncryptModeDispatcher { * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) * @param fifth_argument_len Length of fifth_argument in bytes * @param cipher Output buffer for encrypted data (must be large enough for output format) - * @return Length of encrypted data in bytes (includes prepended IV for CBC/GCM) + * @return Length of encrypted data in bytes (includes prepended IV only if auto-generated) * @throws std::runtime_error on encryption failure, unsupported mode, or invalid parameters */ static int32_t encrypt(const char* plaintext, int32_t plaintext_len, diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index cb2a719c1a8..84459c9a96f 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -75,11 +75,13 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char // Buffer for IV (either user-supplied or auto-generated) unsigned char iv_buffer[CBC_IV_LENGTH]; const unsigned char* actual_iv = nullptr; + bool iv_auto_generated = false; // Handle NULL IV: generate random IV if (iv == nullptr || iv_len == 0) { generate_random_iv(iv_buffer, CBC_IV_LENGTH); actual_iv = iv_buffer; + iv_auto_generated = true; } else { // Validate user-supplied IV length - CBC requires exactly 16 bytes if (iv_len != CBC_IV_LENGTH) { @@ -89,6 +91,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char throw std::runtime_error(oss.str()); } actual_iv = reinterpret_cast(iv); + iv_auto_generated = false; } int32_t cipher_len = 0; @@ -101,9 +104,13 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char get_openssl_error_string()); } - // Prepend IV to output: [16-byte IV][ciphertext] - std::memcpy(cipher, actual_iv, CBC_IV_LENGTH); - cipher_len = CBC_IV_LENGTH; + // Only prepend IV to output if it was auto-generated + // Auto-generated IV: [16-byte IV][ciphertext] + // User-supplied IV: [ciphertext] + if (iv_auto_generated) { + std::memcpy(cipher, actual_iv, CBC_IV_LENGTH); + cipher_len = CBC_IV_LENGTH; + } if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, reinterpret_cast(key), diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h index 8352c5be7ec..a25122f2557 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.h +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -34,13 +34,14 @@ constexpr int32_t CBC_IV_LENGTH = 16; // 16 bytes (128 bits) - required for CBC /** * Encrypt data using AES-CBC algorithm with explicit padding mode * - * Output format: [16-byte IV][ciphertext] - * The IV is always prepended to the output. + * Output format: + * - With NULL IV (auto-generated): [16-byte IV][ciphertext] + * - With user-supplied IV: [ciphertext] * * IV Handling: * - If iv is NULL or iv_len is 0: A cryptographically secure random 16-byte IV - * is automatically generated using OpenSSL RAND_bytes - * - If iv is provided: It must be exactly 16 bytes, and will be used as-is + * is automatically generated using OpenSSL RAND_bytes and prepended to output + * - If iv is provided: It must be exactly 16 bytes, will be used as-is, and NOT prepended * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc index f52d9c3682f..de6e1914a51 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc_test.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -35,17 +35,18 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - // Output format: [16-byte IV][ciphertext] + // Output format with user-supplied IV: [ciphertext] (IV NOT prepended) // Ciphertext includes padding, so it's rounded up to next 16-byte block - EXPECT_GE(cipher_len, to_encrypt_len + 16); // At least IV + plaintext + EXPECT_GE(cipher_len, to_encrypt_len); // At least plaintext length + EXPECT_LE(cipher_len, to_encrypt_len + 16); // Should NOT include IV (at most one block of padding) - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + // Verify IV is NOT prepended (ciphertext should not match IV) + EXPECT_NE(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -66,16 +67,17 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - // Output format: [16-byte IV][ciphertext] - EXPECT_GE(cipher_len, to_encrypt_len + 16); + // Output format with user-supplied IV: [ciphertext] (IV NOT prepended) + EXPECT_GE(cipher_len, to_encrypt_len); + EXPECT_LE(cipher_len, to_encrypt_len + 16); // Should NOT include IV (at most one block of padding) - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -96,16 +98,17 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - // Output format: [16-byte IV][ciphertext] - EXPECT_GE(cipher_len, to_encrypt_len + 16); + // Output format with user-supplied IV: [ciphertext] (IV NOT prepended) + EXPECT_GE(cipher_len, to_encrypt_len); + EXPECT_LT(cipher_len, to_encrypt_len + 16); // Should NOT include IV - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -126,17 +129,17 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, false, cipher); - // Output format: [16-byte IV][ciphertext] + // Output format with user-supplied IV: [ciphertext] (IV NOT prepended) // No padding, so ciphertext is exactly 16 bytes - EXPECT_EQ(cipher_len, 16 + 16); + EXPECT_EQ(cipher_len, 16); - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 16)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 16)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, false, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -235,8 +238,8 @@ TEST(TestAesCbcEncryptUtils, TestNullIvNoPadding) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test NULL IV decrypt with user-supplied IV encrypt (backward compatibility) -TEST(TestAesCbcEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { +// Test that user-supplied IV encrypt requires same IV for decrypt +TEST(TestAesCbcEncryptUtils, TestSuppliedIvEncryptRequiresSameIvDecrypt) { auto* key = "12345678abcdefgh"; auto* iv = "1234567890123456"; auto* to_encrypt = "some test string"; @@ -246,14 +249,14 @@ TEST(TestAesCbcEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { auto to_encrypt_len = static_cast(strlen(to_encrypt)); unsigned char cipher[128]; - // Encrypt with user-supplied IV (IV will be prepended) + // Encrypt with user-supplied IV (IV will NOT be prepended) int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, cipher); - // Decrypt with NULL IV (extract IV from ciphertext) + // Decrypt with the same IV (required since IV was not prepended) unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index 9f98840981d..881d9d58036 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -82,16 +82,19 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, // Buffer for IV (either user-supplied or auto-generated) unsigned char iv_buffer[GCM_IV_LENGTH]; const unsigned char* actual_iv = nullptr; + bool iv_auto_generated = false; // Handle IV: either generate random IV or use user-supplied IV if (iv == nullptr || iv_len == 0) { // Generate random IV generate_random_iv(iv_buffer, GCM_IV_LENGTH); actual_iv = iv_buffer; + iv_auto_generated = true; } else { // Use user-supplied IV validate_iv_length_gcm(iv_len); actual_iv = reinterpret_cast(iv); + iv_auto_generated = false; } int32_t cipher_len = 0; @@ -105,9 +108,13 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } try { - // Prepend IV to output: [12-byte IV][ciphertext][16-byte tag] - std::memcpy(cipher, actual_iv, GCM_IV_LENGTH); - cipher_len = GCM_IV_LENGTH; + // Only prepend IV to output if it was auto-generated + // Auto-generated IV: [12-byte IV][ciphertext][16-byte tag] + // User-supplied IV: [ciphertext][16-byte tag] + if (iv_auto_generated) { + std::memcpy(cipher, actual_iv, GCM_IV_LENGTH); + cipher_len = GCM_IV_LENGTH; + } if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, reinterpret_cast(key), diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h index 5bcb2b8e930..1c4d815c79f 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.h +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -35,13 +35,14 @@ constexpr int32_t GCM_TAG_LENGTH = 16; /** * Encrypt data using AES-GCM algorithm * - * Output format: [12-byte IV][ciphertext][16-byte authentication tag] - * The IV is always prepended to the output. + * Output format: + * - With NULL IV (auto-generated): [12-byte IV][ciphertext][16-byte authentication tag] + * - With user-supplied IV: [ciphertext][16-byte authentication tag] * * IV Handling: * - If iv is NULL or iv_len is 0: A cryptographically secure random 12-byte IV - * is automatically generated using OpenSSL RAND_bytes - * - If iv is provided: It must be exactly 12 bytes, and will be used as-is + * is automatically generated using OpenSSL RAND_bytes and prepended to output + * - If iv is provided: It must be exactly 12 bytes, will be used as-is, and NOT prepended * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc index ffd14da4dd4..8325b24c16d 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm_test.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -35,16 +35,16 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); - // Output format: [12-byte IV][ciphertext][16-byte tag] - EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + // Output format with user-supplied IV: [ciphertext][16-byte tag] (IV NOT prepended) + EXPECT_EQ(cipher_len, to_encrypt_len + 16); - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + // Verify IV is NOT prepended (ciphertext should not match IV) + EXPECT_NE(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -67,16 +67,16 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, aad, aad_len, cipher); - // Output format: [12-byte IV][ciphertext][16-byte tag] - EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + // Output format with user-supplied IV: [ciphertext][16-byte tag] (IV NOT prepended) + EXPECT_EQ(cipher_len, to_encrypt_len + 16); - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, aad, aad_len, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -97,16 +97,16 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); - // Output format: [12-byte IV][ciphertext][16-byte tag] - EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + // Output format with user-supplied IV: [ciphertext][16-byte tag] (IV NOT prepended) + EXPECT_EQ(cipher_len, to_encrypt_len + 16); - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -127,16 +127,16 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); - // Output format: [12-byte IV][ciphertext][16-byte tag] - EXPECT_EQ(cipher_len, to_encrypt_len + 12 + 16); + // Output format with user-supplied IV: [ciphertext][16-byte tag] (IV NOT prepended) + EXPECT_EQ(cipher_len, to_encrypt_len + 16); - // Verify IV is prepended - EXPECT_EQ(0, std::memcmp(cipher, iv, 12)); + // Verify IV is NOT prepended + EXPECT_NE(0, std::memcmp(cipher, iv, 12)); unsigned char decrypted[128]; - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // Pass the same IV to decrypt (since encrypt did NOT prepend it) int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), @@ -237,8 +237,8 @@ TEST(TestAesGcmEncryptUtils, TestNullIvWithAad) { std::string(reinterpret_cast(decrypted), decrypted_len)); } -// Test NULL IV decrypt with user-supplied IV encrypt (backward compatibility) -TEST(TestAesGcmEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { +// Test that user-supplied IV encrypt requires same IV for decrypt +TEST(TestAesGcmEncryptUtils, TestSuppliedIvEncryptRequiresSameIvDecrypt) { auto* key = "12345678abcdefgh"; auto* iv = "123456789012"; auto* to_encrypt = "some test string"; @@ -248,14 +248,14 @@ TEST(TestAesGcmEncryptUtils, TestNullIvDecryptWithSuppliedIvEncrypt) { auto to_encrypt_len = static_cast(strlen(to_encrypt)); unsigned char cipher[128]; - // Encrypt with user-supplied IV (IV will be prepended) + // Encrypt with user-supplied IV (IV will NOT be prepended) int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, nullptr, 0, cipher); - // Decrypt with NULL IV (extract IV from ciphertext) + // Decrypt with the same IV (required since IV was not prepended) unsigned char decrypted[128]; int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, nullptr, 0, + cipher_len, key, key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 18edee203c8..04fcdc50c2d 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -778,12 +778,13 @@ const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; int32_t mode_len = 7; + bool out_valid = true; const char* result = gdv_fn_encrypt_dispatcher_3args( - context, data, data_len, key_data, key_data_len, mode, mode_len, out_len); + context, data, data_len, key_data, key_data_len, mode, mode_len, &out_valid, out_len); // Add null terminator for string compatibility // Note: This may not be valid UTF-8, but it's needed for string handling - if (result != nullptr) { + if (result != nullptr && out_valid) { char* mutable_result = const_cast(result); mutable_result[*out_len] = '\0'; } @@ -805,12 +806,13 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; int32_t mode_len = 7; + bool out_valid = true; const char* result = gdv_fn_decrypt_dispatcher_3args( - context, data, data_len, key_data, key_data_len, mode, mode_len, out_len); + context, data, data_len, key_data, key_data_len, mode, mode_len, &out_valid, out_len); // Add null terminator for string compatibility // Note: This may not be valid UTF-8, but it's needed for string handling - if (result != nullptr) { + if (result != nullptr && out_valid) { char* mutable_result = const_cast(result); mutable_result[*out_len] = '\0'; } @@ -1238,6 +1240,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // mode_length types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; @@ -1257,6 +1260,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // mode_length types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; @@ -1279,6 +1283,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // iv_length types->i8_ptr_type(), // fifth_argument (binary string) types->i32_type(), // fifth_argument_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; @@ -1301,6 +1306,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type(), // iv_length types->i8_ptr_type(), // fifth_argument (binary string) types->i32_type(), // fifth_argument_length + types->i32_ptr_type(), // out_valid types->i32_ptr_type() // out_length }; diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 54480ac7f6f..7c26b9cbf0f 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -209,13 +209,13 @@ GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); + int32_t mode_len, bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); + int32_t mode_len, bool* out_valid, int32_t* out_len); // 4-argument dispatcher: (data, key, mode, iv) GANDIVA_EXPORT @@ -223,14 +223,14 @@ const char* gdv_fn_encrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); + bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); + bool* out_valid, int32_t* out_len); // 5-argument dispatcher: (data, key, mode, iv, fifth_argument) GANDIVA_EXPORT @@ -239,7 +239,7 @@ const char* gdv_fn_encrypt_dispatcher_5args( const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); + bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_5args( @@ -247,7 +247,7 @@ const char* gdv_fn_decrypt_dispatcher_5args( const char* key_data, int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); + bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index 3bda0d9abbf..e46f3aabc64 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1360,12 +1360,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1384,13 +1386,15 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1409,13 +1413,15 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1435,16 +1441,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { std::string cipher = "12345678abcdefgh12345678abcdefghb"; auto cipher_len = static_cast(cipher.length()); + bool encrypt_valid = true; gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key33.c_str(), key33_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); + bool decrypt_valid = true; gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, key33.c_str(), key33_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); @@ -1463,14 +1471,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), decrypted_len)); @@ -1489,10 +1499,11 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test encrypt with invalid mode + bool encrypt_valid = true; gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, invalid_mode.c_str(), invalid_mode_len, - &cipher_len); + &encrypt_valid, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode")); ctx.Reset(); @@ -1500,10 +1511,11 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { // Test decrypt with invalid mode std::string cipher = "12345678abcdefgh12345678abcdefgh"; auto cipher_len_val = static_cast(cipher.length()); + bool decrypt_valid = true; gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, key16.c_str(), key16_len, invalid_mode.c_str(), invalid_mode_len, - &decrypted_len); + &decrypt_valid, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported decryption mode")); ctx.Reset(); @@ -1524,15 +1536,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { auto iv_len = static_cast(iv.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &cipher_len); + mode_len, iv.c_str(), iv_len, nullptr, 0, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // When IV is supplied to encrypt, it must also be supplied to decrypt + // (IV is only prepended when auto-generated) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, nullptr, 0, &decrypted_len); + mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1555,15 +1570,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { auto aad_len = static_cast(aad.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); + mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // When IV is supplied to encrypt, it must also be supplied to decrypt + // (IV is only prepended when auto-generated) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypted_len); + mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1583,14 +1601,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1610,14 +1630,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid2 = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + mode_len, &encrypt_valid2, &cipher_len); EXPECT_GT(cipher_len, 0); + bool decrypt_valid2 = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + mode_len, &decrypt_valid2, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1639,15 +1661,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { auto iv_len = static_cast(iv.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // When IV is supplied to encrypt, it must also be supplied to decrypt + // (IV is only prepended when auto-generated) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypted_len); + mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1669,15 +1694,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { auto iv_len = static_cast(iv.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // When IV is supplied to encrypt, it must also be supplied to decrypt + // (IV is only prepended when auto-generated) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypted_len); + mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1700,15 +1728,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { auto iv_len = static_cast(iv.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); - // Pass NULL IV to extract from ciphertext (since encrypt prepended it) + // When IV is supplied to encrypt, it must also be supplied to decrypt + // (IV is only prepended when auto-generated) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypted_len); + mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1729,16 +1760,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIv4Args) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test 4-arg version with NULL IV (should auto-generate IV) + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &cipher_len); + mode_len, nullptr, 0, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV (should extract from ciphertext) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypted_len); + mode_len, nullptr, 0, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1759,16 +1792,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvAndNullAad) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test 5-arg version with NULL IV and NULL AAD + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, nullptr, 0, &cipher_len); + mode_len, nullptr, 0, nullptr, 0, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV and NULL AAD + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, nullptr, 0, &decrypted_len); + mode_len, nullptr, 0, nullptr, 0, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1789,16 +1824,18 @@ TEST(TestGdvFnStubs, TestAesEncryptCbcWithNullIv4Args) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test 4-arg version with NULL IV (should auto-generate IV) + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &cipher_len); + mode_len, nullptr, 0, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV (should extract from ciphertext) + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypted_len); + mode_len, nullptr, 0, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1821,16 +1858,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvButWithAad) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test 5-arg version with NULL IV but non-NULL AAD + bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, aad.c_str(), aad_len, &cipher_len); + mode_len, nullptr, 0, aad.c_str(), aad_len, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV and same AAD + bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypted_len); + mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), From 14f6f3f8f783c00566b33776d3cac9fc35db5428 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 21 Jan 2026 11:35:41 -0500 Subject: [PATCH 10/14] WIP --- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 24 --- cpp/src/gandiva/encrypt_utils_gcm.cc | 39 +++-- cpp/src/gandiva/gdv_function_stubs.cc | 124 +++++++++++----- cpp/src/gandiva/gdv_function_stubs.h | 49 ++++--- cpp/src/gandiva/gdv_function_stubs_test.cc | 162 ++++++++++++--------- 5 files changed, 226 insertions(+), 172 deletions(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index 5e0e796951c..cdda4395938 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -27,7 +27,6 @@ namespace gandiva { -// Supported encryption modes static const std::vector SUPPORTED_MODES = { AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, AES_CBC_MODE, AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, @@ -68,29 +67,17 @@ int32_t EncryptModeDispatcher::encrypt( switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: - // ECB mode: No IV used, output is [ciphertext] - // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, true, cipher); case EncryptionMode::ECB_NONE: - // ECB mode without padding return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, false, cipher); case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: - // CBC mode: If iv is NULL, a random IV is auto-generated and prepended to output - // Auto-generated IV format: [16-byte IV][ciphertext] - // User-supplied IV format: [ciphertext] - // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 padding return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, true, cipher); case EncryptionMode::CBC_NONE: - // CBC mode without padding return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, false, cipher); case EncryptionMode::GCM: - // GCM mode: If iv is NULL, a random IV is auto-generated and prepended to output - // Auto-generated IV format: [12-byte IV][ciphertext][16-byte tag] - // User-supplied IV format: [ciphertext][16-byte tag] - // fifth_argument is AAD (Additional Authenticated Data) return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, cipher); case EncryptionMode::UNKNOWN: @@ -116,18 +103,11 @@ int32_t EncryptModeDispatcher::decrypt( switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: - // ECB mode: No IV used, input is [ciphertext] - // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, true, plaintext); case EncryptionMode::ECB_NONE: - // ECB mode without padding return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, false, plaintext); case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: - // CBC mode: If iv is NULL, IV is extracted from first 16 bytes of ciphertext - // Expected format with NULL IV: [16-byte IV][ciphertext] - // Expected format with provided IV: [ciphertext] - // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 padding return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, true, plaintext); case EncryptionMode::CBC_NONE: @@ -135,10 +115,6 @@ int32_t EncryptModeDispatcher::decrypt( return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, false, plaintext); case EncryptionMode::GCM: - // GCM mode: If iv is NULL, IV is extracted from first 12 bytes of ciphertext - // Expected format with NULL IV: [12-byte IV][ciphertext][16-byte tag] - // Expected format with provided IV: [ciphertext][16-byte tag] - // fifth_argument is AAD (Additional Authenticated Data) return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index 881d9d58036..dfb964282c1 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -79,22 +79,16 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, const char* aad, int32_t aad_len, unsigned char* cipher) { - // Buffer for IV (either user-supplied or auto-generated) unsigned char iv_buffer[GCM_IV_LENGTH]; const unsigned char* actual_iv = nullptr; - bool iv_auto_generated = false; + bool iv_auto_generated = iv == nullptr || iv_len == 0; - // Handle IV: either generate random IV or use user-supplied IV - if (iv == nullptr || iv_len == 0) { - // Generate random IV + if (iv_auto_generated) { generate_random_iv(iv_buffer, GCM_IV_LENGTH); actual_iv = iv_buffer; - iv_auto_generated = true; } else { - // Use user-supplied IV validate_iv_length_gcm(iv_len); actual_iv = reinterpret_cast(iv); - iv_auto_generated = false; } int32_t cipher_len = 0; @@ -178,22 +172,18 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, int32_t key_len, const char* iv, int32_t iv_len, const char* aad, int32_t aad_len, unsigned char* plaintext) { - // Buffer for extracted IV (if needed) unsigned char iv_buffer[GCM_IV_LENGTH]; const unsigned char* actual_iv = nullptr; const char* actual_ciphertext = ciphertext; int32_t actual_ciphertext_with_tag_len = ciphertext_len; - // Handle IV: either extract from ciphertext or use user-supplied IV if (iv == nullptr) { - // Extract IV from beginning of ciphertext: [12-byte IV][ciphertext][16-byte tag] validate_ciphertext_with_embedded_iv_gcm(ciphertext_len); extract_iv_from_ciphertext(ciphertext, ciphertext_len, GCM_IV_LENGTH, iv_buffer, &actual_ciphertext, &actual_ciphertext_with_tag_len); actual_iv = iv_buffer; } else { - // Use user-supplied IV validate_iv_length_gcm(iv_len); validate_ciphertext_with_tag(ciphertext_len); actual_iv = reinterpret_cast(iv); @@ -233,16 +223,23 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } } - // Extract tag from end of actual ciphertext (after IV if it was embedded) - int32_t ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; - const unsigned char* tag = - reinterpret_cast(actual_ciphertext + ciphertext_without_tag_len); + int32_t ciphertext_without_tag_len; + const unsigned char* tag = nullptr; - // Set the authentication tag - if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, - const_cast(tag))) { - throw std::runtime_error("Could not set GCM authentication tag: " + - get_openssl_error_string()); + // Extract and set the authentication tag only if AAD is provided + if (aad != nullptr && aad_len > 0) { + ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; + tag = reinterpret_cast(actual_ciphertext + ciphertext_without_tag_len); + + // Set the authentication tag + if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, + const_cast(tag))) { + throw std::runtime_error("Could not set GCM authentication tag: " + + get_openssl_error_string()); + } + } else { + // No AAD means no tag appended + ciphertext_without_tag_len = actual_ciphertext_with_tag_len; } // Decrypt ciphertext diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 04fcdc50c2d..1453ea8b8d1 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -779,8 +779,12 @@ const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, const char* mode = "AES-ECB"; int32_t mode_len = 7; bool out_valid = true; + + // Passing `true` for validity parameters because this function is marked as kResultNullIfNull, + // so if we're here the inputs are guaranteed to be non-NULL const char* result = gdv_fn_encrypt_dispatcher_3args( - context, data, data_len, key_data, key_data_len, mode, mode_len, &out_valid, out_len); + context, data, data_len, true, key_data, key_data_len, true, mode, mode_len, true, + &out_valid, out_len); // Add null terminator for string compatibility // Note: This may not be valid UTF-8, but it's needed for string handling @@ -807,8 +811,12 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, const char* mode = "AES-ECB"; int32_t mode_len = 7; bool out_valid = true; + + // Passing `true` for validity parameters because this function is marked as kResultNullIfNull, + // so if we're here the inputs are guaranteed to be non-NULL const char* result = gdv_fn_decrypt_dispatcher_3args( - context, data, data_len, key_data, key_data_len, mode, mode_len, &out_valid, out_len); + context, data, data_len, true, key_data, key_data_len, true, mode, mode_len, true, + &out_valid, out_len); // Add null terminator for string compatibility // Note: This may not be valid UTF-8, but it's needed for string handling @@ -821,52 +829,65 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, } // The 3- and 4-arg signatures exist to support optional IV and other arguments +// Note: kResultNullInternal functions receive validity for each argument extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, bool* out_valid, int32_t* out_len) { return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_valid, out_len); + context, data, data_len, data_validity, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, nullptr, 0, false, nullptr, 0, false, + out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, bool* out_valid, int32_t* out_len) { return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_valid, out_len); + context, data, data_len, data_validity, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, nullptr, 0, false, nullptr, 0, false, + out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, bool* out_valid, int32_t* out_len) { + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + bool* out_valid, int32_t* out_len) { return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_valid, out_len); + context, data, data_len, data_validity, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, iv_data, iv_data_len, iv_validity, + nullptr, 0, false, out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, bool* out_valid, int32_t* out_len) { + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + bool* out_valid, int32_t* out_len) { return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_valid, out_len); + context, data, data_len, data_validity, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, iv_data, iv_data_len, iv_validity, + nullptr, 0, false, out_valid, out_len); } extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, bool* out_valid, int32_t* out_len) { + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, + bool* out_valid, int32_t* out_len) { // We use kResultNullInternal to handle NULL inputs selectively: // - NULL plaintext → return NULL (set out_valid = false) // - NULL key → call function, it will throw validation error @@ -875,7 +896,7 @@ const char* gdv_fn_encrypt_dispatcher_5args( // - NULL AAD → call function, no AAD used // Check if plaintext is NULL - this is the only case where we return NULL - if (data == nullptr) { + if (!data_validity) { *out_valid = false; *out_len = 0; return nullptr; @@ -913,16 +934,19 @@ const char* gdv_fn_encrypt_dispatcher_5args( extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, bool* out_valid, int32_t* out_len) { + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, + bool* out_valid, int32_t* out_len) { // We use kResultNullInternal to handle NULL inputs selectively: // - NULL ciphertext → return NULL (set out_valid = false) // - NULL key → call function, it will throw validation error // - NULL mode → call function, it will throw validation error // - NULL IV → call function, auto-extracts IV from ciphertext // - NULL AAD → call function, no AAD used + // Note: validity parameters are ignored here - NULL handling is done by kResultNullInternal logic // Check if ciphertext is NULL - this is the only case where we return NULL if (data == nullptr) { @@ -1194,15 +1218,19 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; // gdv_fn_encrypt_dispatcher_3args (data, key, mode) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // mode_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; @@ -1212,15 +1240,19 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { reinterpret_cast(gdv_fn_encrypt_dispatcher_3args)); // gdv_fn_decrypt_dispatcher_3args (data, key, mode) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // mode_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; @@ -1230,17 +1262,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { reinterpret_cast(gdv_fn_decrypt_dispatcher_3args)); // gdv_fn_encrypt_dispatcher_4args (data, key, mode, iv) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i1_type(), // mode_validity types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // iv_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; @@ -1250,17 +1287,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { reinterpret_cast(gdv_fn_encrypt_dispatcher_4args)); // gdv_fn_decrypt_dispatcher_4args (data, key, mode, iv) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i1_type(), // mode_validity types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // iv_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; @@ -1271,19 +1313,25 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { // gdv_fn_encrypt_dispatcher_5args (data, key, mode, iv, // fifth_argument) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i1_type(), // mode_validity types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length + types->i1_type(), // iv_validity types->i8_ptr_type(), // fifth_argument (binary string) types->i32_type(), // fifth_argument_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // fifth_argument_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; @@ -1294,19 +1342,25 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { // gdv_fn_decrypt_dispatcher_5args (data, key, mode, iv, // fifth_argument) + // Note: kResultNullInternal functions receive validity for each argument args = { types->i64_type(), // context types->i8_ptr_type(), // data types->i32_type(), // data_length + types->i1_type(), // data_validity types->i8_ptr_type(), // key_data types->i32_type(), // key_data_length + types->i1_type(), // key_validity types->i8_ptr_type(), // mode (binary string) types->i32_type(), // mode_length + types->i1_type(), // mode_validity types->i8_ptr_type(), // iv (binary string) types->i32_type(), // iv_length + types->i1_type(), // iv_validity types->i8_ptr_type(), // fifth_argument (binary string) types->i32_type(), // fifth_argument_length - types->i32_ptr_type(), // out_valid + types->i1_type(), // fifth_argument_validity + types->ptr_type(types->i1_type()), // out_valid types->i32_ptr_type() // out_length }; diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 7c26b9cbf0f..3d9daf7cd71 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -205,48 +205,57 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, int32_t* out_len); // 3-argument dispatcher: (data, key, mode) +// Note: kResultNullInternal functions receive validity for each argument GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, bool* out_valid, int32_t* out_len); + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, bool* out_valid, int32_t* out_len); + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + bool* out_valid, int32_t* out_len); // 4-argument dispatcher: (data, key, mode, iv) +// Note: kResultNullInternal functions receive validity for each argument GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, bool* out_valid, int32_t* out_len); // 5-argument dispatcher: (data, key, mode, iv, fifth_argument) +// Note: kResultNullInternal functions receive validity for each argument GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, + int64_t context, const char* data, int32_t data_len, bool data_validity, + const char* key_data, int32_t key_data_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv_data, int32_t iv_data_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len); GANDIVA_EXPORT diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index e46f3aabc64..b6b1fdf390b 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1362,12 +1362,12 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1388,13 +1388,13 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key24.c_str(), key24_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key24.c_str(), key24_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1415,13 +1415,13 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key32.c_str(), key32_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key32.c_str(), key32_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1442,17 +1442,17 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { auto cipher_len = static_cast(cipher.length()); bool encrypt_valid = true; - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, true, + key33.c_str(), key33_len, true, mode.c_str(), + mode_len, true, &encrypt_valid, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); bool decrypt_valid = true; - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, true, + key33.c_str(), key33_len, true, mode.c_str(), + mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); @@ -1473,14 +1473,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), decrypted_len)); @@ -1500,9 +1500,9 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { // Test encrypt with invalid mode bool encrypt_valid = true; - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, true, + key16.c_str(), key16_len, true, + invalid_mode.c_str(), invalid_mode_len, true, &encrypt_valid, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode")); @@ -1512,9 +1512,9 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { std::string cipher = "12345678abcdefgh12345678abcdefgh"; auto cipher_len_val = static_cast(cipher.length()); bool decrypt_valid = true; - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, true, + key16.c_str(), key16_len, true, + invalid_mode.c_str(), invalid_mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported decryption mode")); @@ -1538,16 +1538,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + nullptr, 0, false, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); // When IV is supplied to encrypt, it must also be supplied to decrypt // (IV is only prepended when auto-generated) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + nullptr, 0, false, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1572,16 +1574,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + aad.c_str(), aad_len, true, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); // When IV is supplied to encrypt, it must also be supplied to decrypt // (IV is only prepended when auto-generated) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + aad.c_str(), aad_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1603,14 +1607,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1632,14 +1636,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { bool encrypt_valid2 = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &encrypt_valid2, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &encrypt_valid2, &cipher_len); EXPECT_GT(cipher_len, 0); bool decrypt_valid2 = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypt_valid2, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &decrypt_valid2, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1663,16 +1667,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); // When IV is supplied to encrypt, it must also be supplied to decrypt // (IV is only prepended when auto-generated) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1696,16 +1702,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); // When IV is supplied to encrypt, it must also be supplied to decrypt // (IV is only prepended when auto-generated) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1730,16 +1738,18 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); // When IV is supplied to encrypt, it must also be supplied to decrypt // (IV is only prepended when auto-generated) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1762,16 +1772,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIv4Args) { // Test 4-arg version with NULL IV (should auto-generate IV) bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV (should extract from ciphertext) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1794,16 +1806,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvAndNullAad) { // Test 5-arg version with NULL IV and NULL AAD bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, nullptr, 0, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, nullptr, 0, false, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV and NULL AAD bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, nullptr, 0, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, nullptr, 0, false, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1826,16 +1840,18 @@ TEST(TestGdvFnStubs, TestAesEncryptCbcWithNullIv4Args) { // Test 4-arg version with NULL IV (should auto-generate IV) bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV (should extract from ciphertext) bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), @@ -1860,16 +1876,18 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvButWithAad) { // Test 5-arg version with NULL IV but non-NULL AAD bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, aad.c_str(), aad_len, &encrypt_valid, &cipher_len); + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, aad.c_str(), aad_len, true, + &encrypt_valid, &cipher_len); EXPECT_GT(cipher_len, 0); EXPECT_TRUE(cipher != nullptr); // Decrypt with NULL IV and same AAD bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, nullptr, 0, aad.c_str(), aad_len, &decrypt_valid, &decrypted_len); + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, aad.c_str(), aad_len, true, + &decrypt_valid, &decrypted_len); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), From 9cba0e45b9943b87c77452c4f318447ea64c2121 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 21 Jan 2026 12:27:42 -0500 Subject: [PATCH 11/14] Checkpoint --- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 74 ++++++---- cpp/src/gandiva/encrypt_mode_dispatcher.h | 30 ++-- cpp/src/gandiva/encrypt_utils_gcm.cc | 1 - cpp/src/gandiva/gdv_function_stubs.cc | 33 ++--- cpp/src/gandiva/gdv_function_stubs_test.cc | 158 +++++++++++++++++++++ 5 files changed, 238 insertions(+), 58 deletions(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index cdda4395938..b1afa4afbae 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -44,7 +44,15 @@ enum class EncryptionMode { UNKNOWN }; -EncryptionMode ParseEncryptionMode(std::string_view mode_str) { +EncryptionMode ParseEncryptionMode(const char* mode, int32_t mode_len, bool mode_validity) { + if (!mode_validity) { + return EncryptionMode::UNKNOWN; + } + + // Convert mode string to uppercase for case-insensitive comparison + std::string mode_str = + arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); + if (mode_str == AES_ECB_MODE) return EncryptionMode::ECB; if (mode_str == AES_ECB_PKCS7_MODE) return EncryptionMode::ECB_PKCS7; if (mode_str == AES_ECB_NONE_MODE) return EncryptionMode::ECB_NONE; @@ -52,19 +60,30 @@ EncryptionMode ParseEncryptionMode(std::string_view mode_str) { if (mode_str == AES_CBC_PKCS7_MODE) return EncryptionMode::CBC_PKCS7; if (mode_str == AES_CBC_NONE_MODE) return EncryptionMode::CBC_NONE; if (mode_str == AES_GCM_MODE) return EncryptionMode::GCM; + return EncryptionMode::UNKNOWN; } +std::string BuildUnsupportedModeError(const char* operation, const char* mode, int32_t mode_len) { + std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); + std::ostringstream oss; + oss << "Unsupported " << operation << " mode: " << std::string_view(mode, mode_len) + << ". Supported modes: " << modes_str; + return oss.str(); +} + int32_t EncryptModeDispatcher::encrypt( - const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* cipher) { - // Convert mode string to uppercase for case-insensitive comparison - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); + const char* plaintext, int32_t plaintext_len, + const char* key, int32_t key_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv, int32_t iv_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, + bool fifth_argument_validity, unsigned char* cipher) { + if (!key_validity) { + throw std::runtime_error("Encryption key cannot be NULL"); + } - switch (ParseEncryptionMode(mode_str)) { + switch (ParseEncryptionMode(mode, mode_len, mode_validity)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, true, cipher); @@ -81,26 +100,24 @@ int32_t EncryptModeDispatcher::encrypt( return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, cipher); case EncryptionMode::UNKNOWN: - default: { - std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); - std::ostringstream oss; - oss << "Unsupported encryption mode: " << mode_str - << ". Supported modes: " << modes_str; - throw std::runtime_error(oss.str()); - } + default: + throw std::runtime_error(BuildUnsupportedModeError("encryption", mode, mode_len)); } } int32_t EncryptModeDispatcher::decrypt( - const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* plaintext) { - // Convert mode string to uppercase for case-insensitive comparison - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); + const char* ciphertext, int32_t ciphertext_len, + const char* key, int32_t key_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv, int32_t iv_len, bool iv_validity, + const char* fifth_argument, int32_t fifth_argument_len, + bool fifth_argument_validity, unsigned char* plaintext) { + // If key is NULL (validity flag is false), throw error + if (!key_validity) { + throw std::runtime_error("Decryption key cannot be NULL"); + } - switch (ParseEncryptionMode(mode_str)) { + switch (ParseEncryptionMode(mode, mode_len, mode_validity)) { case EncryptionMode::ECB: case EncryptionMode::ECB_PKCS7: return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, true, plaintext); @@ -118,13 +135,8 @@ int32_t EncryptModeDispatcher::decrypt( return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: - default: { - std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); - std::ostringstream oss; - oss << "Unsupported decryption mode: " << mode_str - << ". Supported modes: " << modes_str; - throw std::runtime_error(oss.str()); - } + default: + throw std::runtime_error(BuildUnsupportedModeError("decryption", mode, mode_len)); } } diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index a7d7b1863fb..ee8aef2ddcb 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -44,30 +44,35 @@ class EncryptModeDispatcher { * - GCM with user-supplied IV: [ciphertext][16-byte authentication tag] * * IV Handling (CBC and GCM modes): - * - If iv is NULL or iv_len is 0: A cryptographically secure random IV is + * - If iv is NULL or iv_len is 0 or iv_validity is false: A cryptographically secure random IV is * automatically generated and prepended to the output - * - If iv is provided: It must be the exact required length (12 for GCM, 16 for CBC), + * - If iv is provided and iv_validity is true: It must be the exact required length (12 for GCM, 16 for CBC), * and will NOT be prepended to the output (only ciphertext is returned) * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes + * @param key_validity Whether key is valid (if false, throws error) * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes + * @param mode_validity Whether mode is valid (if false, treated as NULL/UNKNOWN) * @param iv The initialization vector (NULL for auto-generation in CBC/GCM, ignored for ECB) * @param iv_len Length of the IV in bytes (0 for auto-generation, 12 for GCM, 16 for CBC) + * @param iv_validity Whether IV is valid (if false, treated as NULL) * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) * @param fifth_argument_len Length of fifth_argument in bytes + * @param fifth_argument_validity Whether fifth_argument is valid (if false, treated as NULL) * @param cipher Output buffer for encrypted data (must be large enough for output format) * @return Length of encrypted data in bytes (includes prepended IV only if auto-generated) * @throws std::runtime_error on encryption failure, unsupported mode, or invalid parameters */ static int32_t encrypt(const char* plaintext, int32_t plaintext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, + const char* key, int32_t key_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv, int32_t iv_len, bool iv_validity, const char* fifth_argument, int32_t fifth_argument_len, + bool fifth_argument_validity, unsigned char* cipher); /** @@ -86,29 +91,34 @@ class EncryptModeDispatcher { * - GCM with provided IV: [ciphertext][16-byte authentication tag] (IV provided separately) * * IV Handling (CBC and GCM modes): - * - If iv is NULL or iv_len is 0: IV is extracted from the beginning of ciphertext - * - If iv is provided: It must be the exact required length (12 for GCM, 16 for CBC), + * - If iv is NULL or iv_len is 0 or iv_validity is false: IV is extracted from the beginning of ciphertext + * - If iv is provided and iv_validity is true: It must be the exact required length (12 for GCM, 16 for CBC), * and ciphertext should not include the IV * * @param ciphertext The data to decrypt (format depends on mode and IV parameter) * @param ciphertext_len Length of ciphertext in bytes (includes IV if embedded) * @param key The decryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes + * @param key_validity Whether key is valid (if false, throws error) * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes + * @param mode_validity Whether mode is valid (if false, treated as NULL/UNKNOWN) * @param iv The initialization vector (NULL for extraction from ciphertext, ignored for ECB) * @param iv_len Length of the IV in bytes (0 for extraction, 12 for GCM, 16 for CBC) + * @param iv_validity Whether IV is valid (if false, treated as NULL) * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) * @param fifth_argument_len Length of fifth_argument in bytes + * @param fifth_argument_validity Whether fifth_argument is valid (if false, treated as NULL) * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes (plaintext only, IV and tag removed) * @throws std::runtime_error on decryption failure, unsupported mode, invalid parameters, or authentication failure */ static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, + const char* key, int32_t key_len, bool key_validity, + const char* mode, int32_t mode_len, bool mode_validity, + const char* iv, int32_t iv_len, bool iv_validity, const char* fifth_argument, int32_t fifth_argument_len, + bool fifth_argument_validity, unsigned char* plaintext); }; diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index dfb964282c1..10034ac0446 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -226,7 +226,6 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, int32_t ciphertext_without_tag_len; const unsigned char* tag = nullptr; - // Extract and set the authentication tag only if AAD is provided if (aad != nullptr && aad_len > 0) { ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; tag = reinterpret_cast(actual_ciphertext + ciphertext_without_tag_len); diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 1453ea8b8d1..3d5116d2c58 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -889,11 +889,11 @@ const char* gdv_fn_encrypt_dispatcher_5args( const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len) { // We use kResultNullInternal to handle NULL inputs selectively: - // - NULL plaintext → return NULL (set out_valid = false) - // - NULL key → call function, it will throw validation error - // - NULL mode → call function, it will throw validation error - // - NULL IV → call function, auto-generates IV - // - NULL AAD → call function, no AAD used + // - NULL plaintext → return NULL (set out_valid = false) - handled in stub + // - NULL key → dispatcher throws validation error + // - NULL mode → dispatcher throws validation error + // - NULL IV → dispatcher auto-generates IV + // - NULL AAD → dispatcher treats as no AAD // Check if plaintext is NULL - this is the only case where we return NULL if (!data_validity) { @@ -920,8 +920,9 @@ const char* gdv_fn_encrypt_dispatcher_5args( } int32_t cipher_len = EncryptModeDispatcher::encrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, iv_data, iv_data_len, iv_validity, + fifth_argument, fifth_argument_len, fifth_argument_validity, output); *out_len = cipher_len; return reinterpret_cast(output); @@ -941,15 +942,14 @@ const char* gdv_fn_decrypt_dispatcher_5args( const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len) { // We use kResultNullInternal to handle NULL inputs selectively: - // - NULL ciphertext → return NULL (set out_valid = false) - // - NULL key → call function, it will throw validation error - // - NULL mode → call function, it will throw validation error - // - NULL IV → call function, auto-extracts IV from ciphertext - // - NULL AAD → call function, no AAD used - // Note: validity parameters are ignored here - NULL handling is done by kResultNullInternal logic + // - NULL ciphertext → return NULL (set out_valid = false) - handled in stub + // - NULL key → dispatcher throws validation error + // - NULL mode → dispatcher throws validation error + // - NULL IV → dispatcher auto-extracts IV from ciphertext + // - NULL AAD → dispatcher treats as no AAD // Check if ciphertext is NULL - this is the only case where we return NULL - if (data == nullptr) { + if (!data_validity) { *out_valid = false; *out_len = 0; return nullptr; @@ -970,8 +970,9 @@ const char* gdv_fn_decrypt_dispatcher_5args( } int32_t plaintext_len = EncryptModeDispatcher::decrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, key_validity, + mode, mode_len, mode_validity, iv_data, iv_data_len, iv_validity, + fifth_argument, fifth_argument_len, fifth_argument_validity, output); *out_len = plaintext_len; return reinterpret_cast(output); diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index b6b1fdf390b..a55c9f024ca 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1894,4 +1894,162 @@ TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvButWithAad) { decrypted_len)); } +// Test that NULL mode (mode_validity = false) throws an error +TEST(TestGdvFnStubs, TestAesEncryptWithNullMode) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + // Mode has garbage data but validity is false + std::string mode = "GARBAGE_MODE"; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool encrypt_valid = true; + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, false, // mode_validity = false (NULL mode) + &encrypt_valid, &cipher_len); + + // Should fail with error message about NULL mode + EXPECT_FALSE(encrypt_valid); + EXPECT_EQ(cipher, nullptr); + EXPECT_TRUE(ctx.has_error()); + EXPECT_NE(std::string(ctx.get_error()).find("Unsupported encryption mode: NULL"), + std::string::npos); +} + +// Test that NULL mode (mode_validity = false) throws an error for decrypt +TEST(TestGdvFnStubs, TestAesDecryptWithNullMode) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t decrypted_len = 0; + std::string data = "some ciphertext"; + auto data_len = static_cast(data.length()); + // Mode has garbage data but validity is false + std::string mode = "GARBAGE_MODE"; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool decrypt_valid = true; + const char* plaintext = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, false, // mode_validity = false (NULL mode) + &decrypt_valid, &decrypted_len); + + // Should fail with error message about NULL mode + EXPECT_FALSE(decrypt_valid); + EXPECT_EQ(plaintext, nullptr); + EXPECT_TRUE(ctx.has_error()); + EXPECT_NE(std::string(ctx.get_error()).find("Unsupported decryption mode: NULL"), + std::string::npos); +} + +// Test that NULL plaintext (plaintext_validity = false) returns NULL +TEST(TestGdvFnStubs, TestAesEncryptWithNullPlaintext) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool encrypt_valid = true; + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, false, // plaintext_validity = false (NULL plaintext) + key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, + &encrypt_valid, &cipher_len); + + // Should return NULL without error + EXPECT_FALSE(encrypt_valid); + EXPECT_EQ(cipher, nullptr); + EXPECT_EQ(cipher_len, 0); + EXPECT_FALSE(ctx.has_error()); +} + +// Test that NULL ciphertext (ciphertext_validity = false) returns NULL +TEST(TestGdvFnStubs, TestAesDecryptWithNullCiphertext) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t decrypted_len = 0; + std::string data = "some ciphertext"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool decrypt_valid = true; + const char* plaintext = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, false, // ciphertext_validity = false (NULL ciphertext) + key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, + &decrypt_valid, &decrypted_len); + + // Should return NULL without error + EXPECT_FALSE(decrypt_valid); + EXPECT_EQ(plaintext, nullptr); + EXPECT_EQ(decrypted_len, 0); + EXPECT_FALSE(ctx.has_error()); +} + +// Test that NULL key (key_validity = false) throws an error for encrypt +TEST(TestGdvFnStubs, TestAesEncryptWithNullKey) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool encrypt_valid = true; + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, + key16.c_str(), key16_len, false, // key_validity = false (NULL key) + mode.c_str(), mode_len, true, + &encrypt_valid, &cipher_len); + + // Should fail with error message about NULL key + EXPECT_FALSE(encrypt_valid); + EXPECT_EQ(cipher, nullptr); + EXPECT_TRUE(ctx.has_error()); + EXPECT_NE(std::string(ctx.get_error()).find("Encryption key cannot be NULL"), + std::string::npos); +} + +// Test that NULL key (key_validity = false) throws an error for decrypt +TEST(TestGdvFnStubs, TestAesDecryptWithNullKey) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t decrypted_len = 0; + std::string data = "some ciphertext"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + bool decrypt_valid = true; + const char* plaintext = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, + key16.c_str(), key16_len, false, // key_validity = false (NULL key) + mode.c_str(), mode_len, true, + &decrypt_valid, &decrypted_len); + + // Should fail with error message about NULL key + EXPECT_FALSE(decrypt_valid); + EXPECT_EQ(plaintext, nullptr); + EXPECT_TRUE(ctx.has_error()); + EXPECT_NE(std::string(ctx.get_error()).find("Decryption key cannot be NULL"), + std::string::npos); +} + } // namespace gandiva From ac1e2458b057a2945c6dc41bdc20b42bdb696a09 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 21 Jan 2026 15:41:52 -0500 Subject: [PATCH 12/14] Checkpoint --- cpp/src/gandiva/encrypt_mode_dispatcher.h | 69 ++++++----------------- cpp/src/gandiva/encrypt_utils_cbc.cc | 12 +--- cpp/src/gandiva/encrypt_utils_cbc.h | 12 ++-- cpp/src/gandiva/encrypt_utils_gcm.cc | 43 +++++++------- cpp/src/gandiva/encrypt_utils_gcm.h | 8 +-- cpp/src/gandiva/gdv_function_stubs.cc | 17 ------ 6 files changed, 52 insertions(+), 109 deletions(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index ee8aef2ddcb..d19c76422fb 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -31,40 +31,22 @@ class EncryptModeDispatcher { /** * Encrypt data using the specified mode * - * Supported modes: - * - AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE: ECB mode (no IV) - * - AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE: CBC mode (requires 16-byte IV) - * - AES-GCM: GCM mode (requires 12-byte IV) - * - * Output format: - * - ECB: [ciphertext] - * - CBC with auto-generated IV: [16-byte IV][ciphertext] - * - CBC with user-supplied IV: [ciphertext] - * - GCM with auto-generated IV: [12-byte IV][ciphertext][16-byte authentication tag] - * - GCM with user-supplied IV: [ciphertext][16-byte authentication tag] - * - * IV Handling (CBC and GCM modes): - * - If iv is NULL or iv_len is 0 or iv_validity is false: A cryptographically secure random IV is - * automatically generated and prepended to the output - * - If iv is provided and iv_validity is true: It must be the exact required length (12 for GCM, 16 for CBC), - * and will NOT be prepended to the output (only ciphertext is returned) - * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes - * @param key_validity Whether key is valid (if false, throws error) + * @param key_validity Whether key is valid * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes - * @param mode_validity Whether mode is valid (if false, treated as NULL/UNKNOWN) - * @param iv The initialization vector (NULL for auto-generation in CBC/GCM, ignored for ECB) - * @param iv_len Length of the IV in bytes (0 for auto-generation, 12 for GCM, 16 for CBC) - * @param iv_validity Whether IV is valid (if false, treated as NULL) - * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) + * @param mode_validity Whether mode is valid + * @param iv The initialization vector + * @param iv_len Length of the IV in bytes + * @param iv_validity Whether IV is valid + * @param fifth_argument Additional parameter (e.g. AAD for the GCM mode) * @param fifth_argument_len Length of fifth_argument in bytes - * @param fifth_argument_validity Whether fifth_argument is valid (if false, treated as NULL) - * @param cipher Output buffer for encrypted data (must be large enough for output format) - * @return Length of encrypted data in bytes (includes prepended IV only if auto-generated) + * @param fifth_argument_validity Whether fifth_argument is valid + * @param cipher Output buffer for encrypted data + * @return Length of encrypted data in bytes * @throws std::runtime_error on encryption failure, unsupported mode, or invalid parameters */ static int32_t encrypt(const char* plaintext, int32_t plaintext_len, @@ -78,37 +60,20 @@ class EncryptModeDispatcher { /** * Decrypt data using the specified mode * - * Supported modes: - * - AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE: ECB mode (no IV) - * - AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE: CBC mode (requires 16-byte IV) - * - AES-GCM: GCM mode (requires 12-byte IV) - * - * Expected input format: - * - ECB: [ciphertext] - * - CBC with NULL IV: [16-byte IV][ciphertext] - * - CBC with provided IV: [ciphertext] (IV provided separately) - * - GCM with NULL IV: [12-byte IV][ciphertext][16-byte authentication tag] - * - GCM with provided IV: [ciphertext][16-byte authentication tag] (IV provided separately) - * - * IV Handling (CBC and GCM modes): - * - If iv is NULL or iv_len is 0 or iv_validity is false: IV is extracted from the beginning of ciphertext - * - If iv is provided and iv_validity is true: It must be the exact required length (12 for GCM, 16 for CBC), - * and ciphertext should not include the IV - * * @param ciphertext The data to decrypt (format depends on mode and IV parameter) - * @param ciphertext_len Length of ciphertext in bytes (includes IV if embedded) + * @param ciphertext_len Length of ciphertext in bytes * @param key The decryption key (16, 24, or 32 bytes for AES-128/192/256) * @param key_len Length of key in bytes - * @param key_validity Whether key is valid (if false, throws error) + * @param key_validity Whether key is valid * @param mode Mode string (case-insensitive) * @param mode_len Length of mode string in bytes - * @param mode_validity Whether mode is valid (if false, treated as NULL/UNKNOWN) - * @param iv The initialization vector (NULL for extraction from ciphertext, ignored for ECB) - * @param iv_len Length of the IV in bytes (0 for extraction, 12 for GCM, 16 for CBC) - * @param iv_validity Whether IV is valid (if false, treated as NULL) - * @param fifth_argument Additional parameter (AAD for GCM mode, ignored for others) + * @param mode_validity Whether mode is valid + * @param iv The initialization vector + * @param iv_len Length of the IV in bytes + * @param iv_validity Whether IV is valid + * @param fifth_argument Additional parameter (e.g. AAD for the GCM mode) * @param fifth_argument_len Length of fifth_argument in bytes - * @param fifth_argument_validity Whether fifth_argument is valid (if false, treated as NULL) + * @param fifth_argument_validity Whether fifth_argument is valid * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes (plaintext only, IV and tag removed) * @throws std::runtime_error on decryption failure, unsupported mode, invalid parameters, or authentication failure diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index 84459c9a96f..3c4bf14993e 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -77,19 +77,12 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char const unsigned char* actual_iv = nullptr; bool iv_auto_generated = false; - // Handle NULL IV: generate random IV if (iv == nullptr || iv_len == 0) { generate_random_iv(iv_buffer, CBC_IV_LENGTH); actual_iv = iv_buffer; iv_auto_generated = true; } else { - // Validate user-supplied IV length - CBC requires exactly 16 bytes - if (iv_len != CBC_IV_LENGTH) { - std::ostringstream oss; - oss << "Invalid IV length for AES-CBC: " << iv_len - << " bytes. IV must be exactly " << CBC_IV_LENGTH << " bytes"; - throw std::runtime_error(oss.str()); - } + validate_iv_length_cbc(iv_len); actual_iv = reinterpret_cast(iv); iv_auto_generated = false; } @@ -160,16 +153,13 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch const char* actual_ciphertext = ciphertext; int32_t actual_ciphertext_len = ciphertext_len; - // Handle IV: either extract from ciphertext or use user-supplied IV if (iv == nullptr) { - // Extract IV from beginning of ciphertext: [16-byte IV][ciphertext] validate_ciphertext_with_embedded_iv_cbc(ciphertext_len); extract_iv_from_ciphertext(ciphertext, ciphertext_len, CBC_IV_LENGTH, iv_buffer, &actual_ciphertext, &actual_ciphertext_len); actual_iv = iv_buffer; } else { - // Use user-supplied IV validate_iv_length_cbc(iv_len); actual_iv = reinterpret_cast(iv); } diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h index a25122f2557..41acdab0cfb 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.h +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -39,19 +39,19 @@ constexpr int32_t CBC_IV_LENGTH = 16; // 16 bytes (128 bits) - required for CBC * - With user-supplied IV: [ciphertext] * * IV Handling: - * - If iv is NULL or iv_len is 0: A cryptographically secure random 16-byte IV + * - If iv is NULL: A cryptographically secure random 16-byte IV * is automatically generated using OpenSSL RAND_bytes and prepended to output - * - If iv is provided: It must be exactly 16 bytes, will be used as-is, and NOT prepended + * - If iv is provided: It must be exactly 16 bytes, will be used as-is, and not prepended * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes * @param iv The initialization vector (NULL for auto-generation, or exactly 16 bytes) - * @param iv_len Length of IV in bytes (0 for auto-generation, or 16) + * @param iv_len Length of IV in bytes * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 32 bytes) - * @return Length of encrypted data in bytes (16 + ciphertext_len) + * @return Length of encrypted data in bytes * @throws std::runtime_error on encryption failure or invalid parameters */ GANDIVA_EXPORT @@ -63,7 +63,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char * Decrypt data using AES-CBC algorithm with explicit padding mode * * IV Handling: - * - If iv is NULL or iv_len is 0: IV is extracted from the first 16 bytes of ciphertext + * - If iv is NULL: IV is extracted from the first 16 bytes of ciphertext * (expects format: [16-byte IV][ciphertext]) * - If iv is provided: It must be exactly 16 bytes, and ciphertext should be * [ciphertext] without embedded IV @@ -75,7 +75,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes * @param iv The initialization vector (NULL for extraction, or exactly 16 bytes) - * @param iv_len Length of IV in bytes (0 for extraction, or 16) + * @param iv_len Length of IV in bytes * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index 10034ac0446..05e8d656e90 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -54,8 +54,15 @@ void validate_iv_length_gcm(int32_t iv_len) { } } +void validate_aad_length_gcm(int32_t aad_len) { + if (aad_len <= 0) { + throw std::runtime_error("AAD length must be positive when AAD is provided"); + } +} + void validate_ciphertext_with_embedded_iv_gcm(int32_t ciphertext_len) { - constexpr int32_t MIN_CIPHERTEXT_LEN = GCM_IV_LENGTH + GCM_TAG_LENGTH; // IV + tag + constexpr int32_t MIN_CIPHERTEXT_LEN = GCM_IV_LENGTH + GCM_TAG_LENGTH; + if (ciphertext_len < MIN_CIPHERTEXT_LEN) { std::ostringstream oss; oss << "Ciphertext too short for AES-GCM with embedded IV: " << ciphertext_len @@ -81,7 +88,7 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, unsigned char* cipher) { unsigned char iv_buffer[GCM_IV_LENGTH]; const unsigned char* actual_iv = nullptr; - bool iv_auto_generated = iv == nullptr || iv_len == 0; + bool iv_auto_generated = iv == nullptr; if (iv_auto_generated) { generate_random_iv(iv_buffer, GCM_IV_LENGTH); @@ -125,7 +132,9 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } // Process AAD if provided - if (aad != nullptr && aad_len > 0) { + if (aad != nullptr) { + validate_aad_length_gcm(aad_len); + if (!EVP_EncryptUpdate(en_ctx, nullptr, &len, reinterpret_cast(aad), aad_len)) { throw std::runtime_error("Could not process AAD for encryption: " + @@ -215,7 +224,9 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } // Process AAD if provided - if (aad != nullptr && aad_len > 0) { + if (aad != nullptr) { + validate_aad_length_gcm(aad_len); + if (!EVP_DecryptUpdate(de_ctx, nullptr, &len, reinterpret_cast(aad), aad_len)) { throw std::runtime_error("Could not process AAD for decryption: " + @@ -223,22 +234,16 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } } - int32_t ciphertext_without_tag_len; - const unsigned char* tag = nullptr; - - if (aad != nullptr && aad_len > 0) { - ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; - tag = reinterpret_cast(actual_ciphertext + ciphertext_without_tag_len); + // GCM always has a tag appended, regardless of whether AAD was used + int32_t ciphertext_without_tag_len = actual_ciphertext_with_tag_len - GCM_TAG_LENGTH; + const unsigned char* tag = reinterpret_cast( + actual_ciphertext + ciphertext_without_tag_len); - // Set the authentication tag - if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, - const_cast(tag))) { - throw std::runtime_error("Could not set GCM authentication tag: " + - get_openssl_error_string()); - } - } else { - // No AAD means no tag appended - ciphertext_without_tag_len = actual_ciphertext_with_tag_len; + // Set the authentication tag + if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, + const_cast(tag))) { + throw std::runtime_error("Could not set GCM authentication tag: " + + get_openssl_error_string()); } // Decrypt ciphertext diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h index 1c4d815c79f..6b117251681 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.h +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -40,18 +40,18 @@ constexpr int32_t GCM_TAG_LENGTH = 16; * - With user-supplied IV: [ciphertext][16-byte authentication tag] * * IV Handling: - * - If iv is NULL or iv_len is 0: A cryptographically secure random 12-byte IV + * - If iv is NULL: A cryptographically secure random 12-byte IV * is automatically generated using OpenSSL RAND_bytes and prepended to output - * - If iv is provided: It must be exactly 12 bytes, will be used as-is, and NOT prepended + * - If iv is provided: It must be exactly 12 bytes, will be used as-is, and not prepended * * @param plaintext The data to encrypt * @param plaintext_len Length of plaintext in bytes * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) * @param key_len Length of key in bytes * @param iv The initialization vector (NULL for auto-generation, or exactly 12 bytes) - * @param iv_len Length of IV in bytes (0 for auto-generation, or 12) + * @param iv_len Length of IV in bytes * @param aad Optional additional authenticated data (can be null) - * @param aad_len Length of AAD in bytes (0 if aad is null) + * @param aad_len Length of AAD in bytes * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 28 bytes) * @return Length of encrypted data in bytes (12 + plaintext_len + 16) * @throws std::runtime_error on encryption failure or invalid parameters diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 3d5116d2c58..609afb58c0b 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -787,7 +787,6 @@ const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, &out_valid, out_len); // Add null terminator for string compatibility - // Note: This may not be valid UTF-8, but it's needed for string handling if (result != nullptr && out_valid) { char* mutable_result = const_cast(result); mutable_result[*out_len] = '\0'; @@ -819,7 +818,6 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, &out_valid, out_len); // Add null terminator for string compatibility - // Note: This may not be valid UTF-8, but it's needed for string handling if (result != nullptr && out_valid) { char* mutable_result = const_cast(result); mutable_result[*out_len] = '\0'; @@ -829,7 +827,6 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, } // The 3- and 4-arg signatures exist to support optional IV and other arguments -// Note: kResultNullInternal functions receive validity for each argument extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, bool data_validity, @@ -888,13 +885,6 @@ const char* gdv_fn_encrypt_dispatcher_5args( const char* iv_data, int32_t iv_data_len, bool iv_validity, const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len) { - // We use kResultNullInternal to handle NULL inputs selectively: - // - NULL plaintext → return NULL (set out_valid = false) - handled in stub - // - NULL key → dispatcher throws validation error - // - NULL mode → dispatcher throws validation error - // - NULL IV → dispatcher auto-generates IV - // - NULL AAD → dispatcher treats as no AAD - // Check if plaintext is NULL - this is the only case where we return NULL if (!data_validity) { *out_valid = false; @@ -941,13 +931,6 @@ const char* gdv_fn_decrypt_dispatcher_5args( const char* iv_data, int32_t iv_data_len, bool iv_validity, const char* fifth_argument, int32_t fifth_argument_len, bool fifth_argument_validity, bool* out_valid, int32_t* out_len) { - // We use kResultNullInternal to handle NULL inputs selectively: - // - NULL ciphertext → return NULL (set out_valid = false) - handled in stub - // - NULL key → dispatcher throws validation error - // - NULL mode → dispatcher throws validation error - // - NULL IV → dispatcher auto-extracts IV from ciphertext - // - NULL AAD → dispatcher treats as no AAD - // Check if ciphertext is NULL - this is the only case where we return NULL if (!data_validity) { *out_valid = false; From 862be211725235c5408ff7c6c6ec3a3288af1c0d Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 21 Jan 2026 17:39:38 -0500 Subject: [PATCH 13/14] Checkpoint --- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 8 +++++++- cpp/src/gandiva/gdv_function_stubs.cc | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index b1afa4afbae..7a0dcd7cde1 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -41,12 +41,13 @@ enum class EncryptionMode { CBC_PKCS7, CBC_NONE, GCM, + NULL_VALUE, UNKNOWN }; EncryptionMode ParseEncryptionMode(const char* mode, int32_t mode_len, bool mode_validity) { if (!mode_validity) { - return EncryptionMode::UNKNOWN; + return EncryptionMode::NULL_VALUE; } // Convert mode string to uppercase for case-insensitive comparison @@ -99,6 +100,8 @@ int32_t EncryptModeDispatcher::encrypt( case EncryptionMode::GCM: return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, fifth_argument, fifth_argument_len, cipher); + case EncryptionMode::NULL_VALUE: + throw std::runtime_error(BuildUnsupportedModeError("encryption", "NULL", 4)); case EncryptionMode::UNKNOWN: default: throw std::runtime_error(BuildUnsupportedModeError("encryption", mode, mode_len)); @@ -136,6 +139,9 @@ int32_t EncryptModeDispatcher::decrypt( iv, iv_len, fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: default: + if (!mode_validity) { + throw std::runtime_error(BuildUnsupportedModeError("decryption", "NULL", 4)); + } throw std::runtime_error(BuildUnsupportedModeError("decryption", mode, mode_len)); } } diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 609afb58c0b..2fc940e3f6f 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -918,6 +918,7 @@ const char* gdv_fn_encrypt_dispatcher_5args( return reinterpret_cast(output); } catch (const std::runtime_error& e) { gdv_fn_context_set_error_msg(context, e.what()); + *out_valid = false; *out_len = 0; return nullptr; } @@ -961,6 +962,7 @@ const char* gdv_fn_decrypt_dispatcher_5args( return reinterpret_cast(output); } catch (const std::runtime_error& e) { gdv_fn_context_set_error_msg(context, e.what()); + *out_valid = false; *out_len = 0; return nullptr; } From fb170107033286623e3a8c1490c946c542f6714c Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 22 Jan 2026 12:31:03 -0500 Subject: [PATCH 14/14] Checkpoint --- cpp/src/gandiva/gdv_function_stubs_test.cc | 1001 ++++++++++++-------- 1 file changed, 607 insertions(+), 394 deletions(-) diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index a55c9f024ca..7e33c0e41d0 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -1356,22 +1356,15 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); + const char* cipher = gdv_fn_aes_encrypt_ecb_legacy(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, &cipher_len); + const char* decrypted_value = gdv_fn_aes_decrypt_ecb_legacy( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { @@ -1382,23 +1375,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, true, key24.c_str(), key24_len, true, - mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); + const char* cipher = gdv_fn_aes_encrypt_ecb_legacy(ctx_ptr, data.c_str(), data_len, key24.c_str(), + key24_len, &cipher_len); - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, true, key24.c_str(), key24_len, true, - mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); + const char* decrypted_value = gdv_fn_aes_decrypt_ecb_legacy( + ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { @@ -1409,23 +1395,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, true, key32.c_str(), key32_len, true, - mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); + const char* cipher = gdv_fn_aes_encrypt_ecb_legacy(ctx_ptr, data.c_str(), data_len, key32.c_str(), + key32_len, &cipher_len); - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, true, key32.c_str(), key32_len, true, - mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); + const char* decrypted_value = gdv_fn_aes_decrypt_ecb_legacy( + ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { @@ -1435,52 +1414,50 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); std::string cipher = "12345678abcdefgh12345678abcdefghb"; auto cipher_len = static_cast(cipher.length()); - bool encrypt_valid = true; - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, true, - key33.c_str(), key33_len, true, mode.c_str(), - mode_len, true, &encrypt_valid, &cipher_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported key length for AES-ECB")); + gdv_fn_aes_encrypt_ecb_legacy(ctx_ptr, data.c_str(), data_len, key33.c_str(), key33_len, + &cipher_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB: 33 bytes. Supported lengths: 16, 24, 32 bytes")); ctx.Reset(); - bool decrypt_valid = true; - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, true, - key33.c_str(), key33_len, true, mode.c_str(), - mode_len, true, &decrypt_valid, &decrypted_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported key length for AES-ECB")); + gdv_fn_aes_decrypt_ecb_legacy(ctx_ptr, cipher.c_str(), cipher_len, key33.c_str(), key33_len, + &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB: 33 bytes. Supported lengths: 16, 24, 32 bytes")); ctx.Reset(); } // Tests for new mode-aware AES functions TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + std::string data = "test string"; auto data_len = static_cast(data.length()); + + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + std::string mode = AES_ECB_MODE; auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + + int32_t cipher_len = 0; + int32_t decrypted_len = 0; bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), decrypted_len)); @@ -1488,15 +1465,19 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + std::string data = "test string"; auto data_len = static_cast(data.length()); + + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + std::string invalid_mode = "AES-INVALID"; auto invalid_mode_len = static_cast(invalid_mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + + int32_t cipher_len = 0; + int32_t decrypted_len = 0; // Test encrypt with invalid mode bool encrypt_valid = true; @@ -1522,534 +1503,766 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { } // Tests for AES-GCM mode -TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { +TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithUserSuppliedIv) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + std::string data = "A long-ish test string to make sure the ciphertext is long enough for GCM"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; auto mode_len = static_cast(mode.length()); + std::string iv = "123456789012"; auto iv_len = static_cast(iv.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + + int32_t cipher_len = 0; + int32_t decrypted_len = 0; bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, nullptr, 0, false, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - // When IV is supplied to encrypt, it must also be supplied to decrypt - // (IV is only prepended when auto-generated) + // Positive test bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, nullptr, 0, false, &decrypt_valid, &decrypted_len); - + EXPECT_TRUE(decrypt_valid); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), decrypted_len)); + + // Negative test: IV not supplied to decrypt + ctx.Reset(); + decrypt_valid = true; + gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + nullptr, 0, false, &decrypt_valid, &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("GCM tag verification failed or decryption error: Unknown OpenSSL error")); + ctx.Reset(); } -TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { +TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAutoGeneratedIv) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; + + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + + std::string iv = "123456789012"; + auto iv_len = static_cast(iv.length()); + + bool encrypt_valid = true; + bool decrypt_valid = true; + + // Ideally, we would want to test all combinations of encrypt/decrypt variants, but + // that would result in 3*3 = 9 combinations. Instead, we assume the respective variants + // are well tested elsewhere and only test the encrypt/decrypt functions out of pairs. + + // Encrypting with the 3-args variant and decrypting with the 4-args variant + int32_t cipher_from_3args_len = 0; + const char* cipher_from_3args = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &encrypt_valid, &cipher_from_3args_len); + + int32_t decrypted_from_4args_len = 0; + const char* decrypted_from_4args = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher_from_3args, cipher_from_3args_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, &decrypt_valid, &decrypted_from_4args_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_from_4args), + decrypted_from_4args_len)); + + // Encrypting with the 4-args variant and decrypting with the 5-args variant + int32_t cipher_from_4args_len = 0; + const char* cipher_from_4args = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, &encrypt_valid, &cipher_from_4args_len); + + int32_t decrypted_from_5args_len = 0; + const char* decrypted_from_5args = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher_from_4args, cipher_from_4args_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + nullptr, 0, false, &decrypt_valid, &decrypted_from_5args_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_from_5args), + decrypted_from_5args_len)); + + // Encrypting with the 5-args variant and decrypting with the 3-args variant + int32_t cipher_from_5args_len = 0; + const char* cipher_from_5args = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + nullptr, 0, false, &encrypt_valid, &cipher_from_5args_len); + + int32_t decrypted_from_3args_len = 0; + const char* decrypted_from_3args = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher_from_3args, cipher_from_3args_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_from_3args_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_from_3args), + decrypted_from_3args_len)); +} + +TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { + gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + std::string data = "A long-ish test string to make sure the ciphertext is long enough for GCM"; auto data_len = static_cast(data.length()); + + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + std::string mode = AES_GCM_MODE; auto mode_len = static_cast(mode.length()); + std::string iv = "123456789012"; auto iv_len = static_cast(iv.length()); + std::string aad = "additional authenticated data"; auto aad_len = static_cast(aad.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + + int32_t cipher_len = 0; + int32_t decrypted_len = 0; bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_5args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, aad.c_str(), aad_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - // When IV is supplied to encrypt, it must also be supplied to decrypt - // (IV is only prepended when auto-generated) + // Positive test bool decrypt_valid = true; const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, aad.c_str(), aad_len, true, &decrypt_valid, &decrypted_len); - + EXPECT_TRUE(decrypt_valid); EXPECT_EQ(data, std::string(reinterpret_cast(decrypted_value), decrypted_len)); + + // Negative test: AAD not supplied to decrypt + ctx.Reset(); + decrypt_valid = true; + gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + nullptr, 0, false, &decrypt_valid, &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("GCM tag verification failed or decryption error: Unknown OpenSSL error")); + ctx.Reset(); + + // Negative test: a different AAD is supplied to decrypt + ctx.Reset(); + decrypt_valid = true; + std::string different_aad = "different aad"; + auto different_aad_len = static_cast(different_aad.length()); + gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, + different_aad.c_str(), different_aad_len, true, &decrypt_valid, &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("GCM tag verification failed or decryption error: Unknown OpenSSL error")); } -// Tests for shorthand mode: AES-ECB (defaults to PKCS7) -TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { +TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcWithAutoGeneratedIv) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; // Shorthand mode + + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + + std::string mode = AES_CBC_MODE; auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); - EXPECT_GT(cipher_len, 0); - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &decrypt_valid, &decrypted_len); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); -} + // Ideally, we would want to test all combinations of encrypt/decrypt variants, but + // that would result in 3*3 = 9 combinations. Instead, we assume the respective variants + // are well tested elsewhere and only test the encrypt/decrypt functions out of pairs. -// Tests for explicit mode: AES-ECB-PKCS7 -TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { - gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_PKCS7_MODE; // Explicit mode - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + // Encrypting with the 3-args variant and decrypting with the 4-args variant + // Note: 3-args doesn't support IV, so this won't work for CBC. Skip this combination. - bool encrypt_valid2 = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( + // Encrypting with the 4-args variant (NULL IV) and decrypting with the 5-args variant (NULL IV) + int32_t cipher_from_4args_len = 0; + const char* cipher_from_4args = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &encrypt_valid2, &cipher_len); - EXPECT_GT(cipher_len, 0); + mode.c_str(), mode_len, true, nullptr, 0, false, &encrypt_valid, &cipher_from_4args_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_from_4args_len, 0); - bool decrypt_valid2 = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, &decrypt_valid2, &decrypted_len); + int32_t decrypted_from_5args_len = 0; + const char* decrypted_from_5args = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher_from_4args, cipher_from_4args_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + nullptr, 0, false, &decrypt_valid, &decrypted_from_5args_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_from_5args), + decrypted_from_5args_len)); + // Encrypting with the 5-args variant (NULL IV) and decrypting with the 4-args variant (NULL IV) + int32_t cipher_from_5args_len = 0; + const char* cipher_from_5args = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, + nullptr, 0, false, &encrypt_valid, &cipher_from_5args_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_from_5args_len, 0); + + int32_t decrypted_from_4args_len = 0; + const char* decrypted_from_4args = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher_from_5args, cipher_from_5args_len, true, key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, nullptr, 0, false, &decrypt_valid, &decrypted_from_4args_len); + EXPECT_TRUE(decrypt_valid); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_from_4args), + decrypted_from_4args_len)); } -// Tests for shorthand mode: AES-CBC (defaults to PKCS7) -TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { +TEST(TestGdvFnStubs, TestAesEncryptEcbWithBlockAlignedData) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is a multiple of 16 + std::string data = "12345678901234561234567890123456"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_CBC_MODE; // Shorthand mode - auto mode_len = static_cast(mode.length()); - std::string iv = "1234567890123456"; - auto iv_len = static_cast(iv.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_4args( + + // Test AES-ECB (shorthand, with PKCS7 padding) + std::string mode_ecb = AES_ECB_MODE; + auto mode_ecb_len = static_cast(mode_ecb.length()); + const char* cipher_ecb = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &encrypt_valid, &cipher_len); + mode_ecb.c_str(), mode_ecb_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - // When IV is supplied to encrypt, it must also be supplied to decrypt - // (IV is only prepended when auto-generated) - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &decrypt_valid, &decrypted_len); + // Test AES-ECB-PKCS7 (explicit PKCS7 padding) + std::string mode_ecb_pkcs7 = AES_ECB_PKCS7_MODE; + auto mode_ecb_pkcs7_len = static_cast(mode_ecb_pkcs7.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_ecb_pkcs7 = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-ECB-NONE (no padding) + std::string mode_ecb_none = AES_ECB_NONE_MODE; + auto mode_ecb_none_len = static_cast(mode_ecb_none.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_ecb_none = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_ecb_none.c_str(), mode_ecb_none_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); } -// Tests for explicit mode: AES-CBC-PKCS7 -TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { +TEST(TestGdvFnStubs, TestAesDecryptEcbWithBlockAlignedData) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is a multiple of 16 + std::string data = "12345678901234561234567890123456"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_CBC_PKCS7_MODE; // Explicit mode - auto mode_len = static_cast(mode.length()); - std::string iv = "1234567890123456"; - auto iv_len = static_cast(iv.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + // Encrypt once with AES-ECB-PKCS7 to get ciphertext + std::string mode_ecb_pkcs7 = AES_ECB_PKCS7_MODE; + auto mode_ecb_pkcs7_len = static_cast(mode_ecb_pkcs7.length()); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_4args( + const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &encrypt_valid, &cipher_len); + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - // When IV is supplied to encrypt, it must also be supplied to decrypt - // (IV is only prepended when auto-generated) + int32_t decrypted_len = 0; bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &decrypt_valid, &decrypted_len); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-ECB (shorthand, with PKCS7 padding) + std::string mode_ecb = AES_ECB_MODE; + auto mode_ecb_len = static_cast(mode_ecb.length()); + const char* decrypted_ecb = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb.c_str(), mode_ecb_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); + + // Test AES-ECB-PKCS7 (explicit PKCS7 padding) + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_ecb_pkcs7 = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); + + // Test AES-ECB-NONE (no padding) + std::string mode_ecb_none = AES_ECB_NONE_MODE; + auto mode_ecb_none_len = static_cast(mode_ecb_none.length()); + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_ecb_none = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb_none.c_str(), mode_ecb_none_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); } -// Tests for explicit mode: AES-CBC-NONE (no padding) -TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { +TEST(TestGdvFnStubs, TestAesEncryptEcbWithNonBlockAlignedData) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is not a multiple of 16 + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - // Use exactly 16 bytes (one block) for no-padding mode - std::string data = "1234567890123456"; - auto data_len = static_cast(data.length()); - std::string mode = AES_CBC_NONE_MODE; // No padding mode - auto mode_len = static_cast(mode.length()); - std::string iv = "1234567890123456"; - auto iv_len = static_cast(iv.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_4args( + + // Test AES-ECB (shorthand, with PKCS7 padding) - should succeed + std::string mode_ecb = AES_ECB_MODE; + auto mode_ecb_len = static_cast(mode_ecb.length()); + const char* cipher_ecb = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &encrypt_valid, &cipher_len); + mode_ecb.c_str(), mode_ecb_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - // When IV is supplied to encrypt, it must also be supplied to decrypt - // (IV is only prepended when auto-generated) - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, iv.c_str(), iv_len, true, - &decrypt_valid, &decrypted_len); + // Test AES-ECB-PKCS7 (explicit PKCS7 padding) - should succeed + std::string mode_ecb_pkcs7 = AES_ECB_PKCS7_MODE; + auto mode_ecb_pkcs7_len = static_cast(mode_ecb_pkcs7.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_ecb_pkcs7 = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-ECB-NONE (no padding) - should fail because data is not block-aligned + std::string mode_ecb_none = AES_ECB_NONE_MODE; + auto mode_ecb_none_len = static_cast(mode_ecb_none.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_ecb_none = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_ecb_none.c_str(), mode_ecb_none_len, true, &encrypt_valid, &cipher_len); + EXPECT_FALSE(encrypt_valid); + EXPECT_TRUE(ctx.has_error()); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Could not finalize EVP cipher context for encryption")); } -// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL) works (NULL IV should auto-generate) -TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIv4Args) { +TEST(TestGdvFnStubs, TestAesDecryptEcbWithNonBlockAlignedData) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is not a multiple of 16 std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_GCM_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - // Test 4-arg version with NULL IV (should auto-generate IV) + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + + // Encrypt once with AES-ECB-PKCS7 to get ciphertext + std::string mode_ecb_pkcs7 = AES_ECB_PKCS7_MODE; + auto mode_ecb_pkcs7_len = static_cast(mode_ecb_pkcs7.length()); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_4args( + const char* cipher = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, - &encrypt_valid, &cipher_len); + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - EXPECT_TRUE(cipher != nullptr); - // Decrypt with NULL IV (should extract from ciphertext) + int32_t decrypted_len = 0; bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, - &decrypt_valid, &decrypted_len); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-ECB (shorthand, with PKCS7 padding) - should succeed + std::string mode_ecb = AES_ECB_MODE; + auto mode_ecb_len = static_cast(mode_ecb.length()); + const char* decrypted_ecb = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb.c_str(), mode_ecb_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, data_len); // Returns original data length (padding removed) + + // Test AES-ECB-PKCS7 (explicit PKCS7 padding) - should succeed + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_ecb_pkcs7 = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb_pkcs7.c_str(), mode_ecb_pkcs7_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, data_len); // Returns original data length (padding removed) + + // Test AES-ECB-NONE (no padding) - should succeed but return padded data + std::string mode_ecb_none = AES_ECB_NONE_MODE; + auto mode_ecb_none_len = static_cast(mode_ecb_none.length()); + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_ecb_none = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_ecb_none.c_str(), mode_ecb_none_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, cipher_len); // Returns full block including padding } -// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL, NULL) works (NULL IV and NULL AAD) -TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvAndNullAad) { +// CBC mode tests with block-aligned and non-block-aligned data + +TEST(TestGdvFnStubs, TestAesEncryptCbcWithBlockAlignedData) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is a multiple of 16 (32 bytes) + std::string data = "12345678901234561234567890123456"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_GCM_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - // Test 5-arg version with NULL IV and NULL AAD + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_5args( + + // Test AES-CBC (shorthand, with PKCS7 padding) + std::string mode_cbc = AES_CBC_MODE; + auto mode_cbc_len = static_cast(mode_cbc.length()); + const char* cipher_cbc = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, nullptr, 0, false, + mode_cbc.c_str(), mode_cbc_len, true, iv.c_str(), iv_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - EXPECT_TRUE(cipher != nullptr); - // Decrypt with NULL IV and NULL AAD - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, nullptr, 0, false, - &decrypt_valid, &decrypted_len); + // Test AES-CBC-PKCS7 (explicit PKCS7 padding) + std::string mode_cbc_pkcs7 = AES_CBC_PKCS7_MODE; + auto mode_cbc_pkcs7_len = static_cast(mode_cbc_pkcs7.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_cbc_pkcs7 = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-CBC-NONE (no padding) + std::string mode_cbc_none = AES_CBC_NONE_MODE; + auto mode_cbc_none_len = static_cast(mode_cbc_none.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_cbc_none = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_cbc_none.c_str(), mode_cbc_none_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); } -// Test that ENCRYPT(plaintext, key, 'AES-CBC', NULL) works (NULL IV should auto-generate) -TEST(TestGdvFnStubs, TestAesEncryptCbcWithNullIv4Args) { +TEST(TestGdvFnStubs, TestAesDecryptCbcWithBlockAlignedData) { gandiva::ExecutionContext ctx; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is a multiple of 16 (32 bytes) + std::string data = "12345678901234561234567890123456"; + auto data_len = static_cast(data.length()); + std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_CBC_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - // Test 4-arg version with NULL IV (should auto-generate IV) + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + + // Encrypt once with AES-CBC-PKCS7 to get ciphertext + std::string mode_cbc_pkcs7 = AES_CBC_PKCS7_MODE; + auto mode_cbc_pkcs7_len = static_cast(mode_cbc_pkcs7.length()); + int32_t cipher_len = 0; bool encrypt_valid = true; const char* cipher = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - EXPECT_TRUE(cipher != nullptr); - // Decrypt with NULL IV (should extract from ciphertext) + int32_t decrypted_len = 0; bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + + // Test AES-CBC (shorthand, with PKCS7 padding) + std::string mode_cbc = AES_CBC_MODE; + auto mode_cbc_len = static_cast(mode_cbc.length()); + const char* decrypted_cbc = gdv_fn_decrypt_dispatcher_4args( ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, + mode_cbc.c_str(), mode_cbc_len, true, iv.c_str(), iv_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + // Test AES-CBC-PKCS7 (explicit PKCS7 padding) + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_cbc_pkcs7 = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); + + // Test AES-CBC-NONE (no padding) + std::string mode_cbc_none = AES_CBC_NONE_MODE; + auto mode_cbc_none_len = static_cast(mode_cbc_none.length()); + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_cbc_none = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_cbc_none.c_str(), mode_cbc_none_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_GT(decrypted_len, 0); } -// Test that ENCRYPT(plaintext, key, 'AES-GCM', NULL, aad) works (NULL IV with non-NULL AAD) -TEST(TestGdvFnStubs, TestAesEncryptGcmWithNullIvButWithAad) { +TEST(TestGdvFnStubs, TestAesEncryptCbcWithNonBlockAlignedData) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - int32_t decrypted_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Data with length that is NOT a multiple of 16 (11 bytes) std::string data = "test string"; auto data_len = static_cast(data.length()); - std::string mode = AES_GCM_MODE; - auto mode_len = static_cast(mode.length()); - std::string aad = "additional authenticated data"; - auto aad_len = static_cast(aad.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - // Test 5-arg version with NULL IV but non-NULL AAD + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_5args( + + // Test AES-CBC (shorthand, with PKCS7 padding) - should succeed + std::string mode_cbc = AES_CBC_MODE; + auto mode_cbc_len = static_cast(mode_cbc.length()); + const char* cipher_cbc = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, aad.c_str(), aad_len, true, + mode_cbc.c_str(), mode_cbc_len, true, iv.c_str(), iv_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); EXPECT_GT(cipher_len, 0); - EXPECT_TRUE(cipher != nullptr); - - // Decrypt with NULL IV and same AAD - bool decrypt_valid = true; - const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, true, nullptr, 0, false, aad.c_str(), aad_len, true, - &decrypt_valid, &decrypted_len); - - EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); -} - -// Test that NULL mode (mode_validity = false) throws an error -TEST(TestGdvFnStubs, TestAesEncryptWithNullMode) { - gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - // Mode has garbage data but validity is false - std::string mode = "GARBAGE_MODE"; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( + // Test AES-CBC-PKCS7 (explicit PKCS7 padding) - should succeed + std::string mode_cbc_pkcs7 = AES_CBC_PKCS7_MODE; + auto mode_cbc_pkcs7_len = static_cast(mode_cbc_pkcs7.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_cbc_pkcs7 = gdv_fn_encrypt_dispatcher_4args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, false, // mode_validity = false (NULL mode) + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); - // Should fail with error message about NULL mode + // Test AES-CBC-NONE (no padding) - should fail because data is not block-aligned + std::string mode_cbc_none = AES_CBC_NONE_MODE; + auto mode_cbc_none_len = static_cast(mode_cbc_none.length()); + encrypt_valid = true; + cipher_len = 0; + const char* cipher_cbc_none = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_cbc_none.c_str(), mode_cbc_none_len, true, iv.c_str(), iv_len, true, + &encrypt_valid, &cipher_len); EXPECT_FALSE(encrypt_valid); - EXPECT_EQ(cipher, nullptr); EXPECT_TRUE(ctx.has_error()); - EXPECT_NE(std::string(ctx.get_error()).find("Unsupported encryption mode: NULL"), - std::string::npos); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Could not finalize EVP cipher context for encryption")); } -// Test that NULL mode (mode_validity = false) throws an error for decrypt -TEST(TestGdvFnStubs, TestAesDecryptWithNullMode) { +TEST(TestGdvFnStubs, TestAesDecryptCbcWithNonBlockAlignedData) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t decrypted_len = 0; - std::string data = "some ciphertext"; - auto data_len = static_cast(data.length()); - // Mode has garbage data but validity is false - std::string mode = "GARBAGE_MODE"; - auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - bool decrypt_valid = true; - const char* plaintext = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, - mode.c_str(), mode_len, false, // mode_validity = false (NULL mode) - &decrypt_valid, &decrypted_len); - - // Should fail with error message about NULL mode - EXPECT_FALSE(decrypt_valid); - EXPECT_EQ(plaintext, nullptr); - EXPECT_TRUE(ctx.has_error()); - EXPECT_NE(std::string(ctx.get_error()).find("Unsupported decryption mode: NULL"), - std::string::npos); -} + // Data with length that is not a multiple of 16 + std::string data = "test string"; + auto data_len = static_cast(data.length()); -// Test that NULL plaintext (plaintext_validity = false) returns NULL -TEST(TestGdvFnStubs, TestAesEncryptWithNullPlaintext) { - gandiva::ExecutionContext ctx; std::string key16 = "12345678abcdefgh"; auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; - std::string data = "test string"; - auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + + // Encrypt once with AES-CBC-PKCS7 to get ciphertext + std::string mode_cbc_pkcs7 = AES_CBC_PKCS7_MODE; + auto mode_cbc_pkcs7_len = static_cast(mode_cbc_pkcs7.length()); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, false, // plaintext_validity = false (NULL plaintext) - key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, true, + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, &encrypt_valid, &cipher_len); + EXPECT_TRUE(encrypt_valid); + EXPECT_GT(cipher_len, 0); - // Should return NULL without error - EXPECT_FALSE(encrypt_valid); - EXPECT_EQ(cipher, nullptr); - EXPECT_EQ(cipher_len, 0); - EXPECT_FALSE(ctx.has_error()); -} - -// Test that NULL ciphertext (ciphertext_validity = false) returns NULL -TEST(TestGdvFnStubs, TestAesDecryptWithNullCiphertext) { - gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); int32_t decrypted_len = 0; - std::string data = "some ciphertext"; - auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); - bool decrypt_valid = true; - const char* plaintext = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, false, // ciphertext_validity = false (NULL ciphertext) - key16.c_str(), key16_len, true, mode.c_str(), mode_len, true, + + // Test AES-CBC (shorthand, with PKCS7 padding) - should succeed + std::string mode_cbc = AES_CBC_MODE; + auto mode_cbc_len = static_cast(mode_cbc.length()); + const char* decrypted_cbc = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_cbc.c_str(), mode_cbc_len, true, iv.c_str(), iv_len, true, &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, data_len); // Returns original data length (padding removed) - // Should return NULL without error - EXPECT_FALSE(decrypt_valid); - EXPECT_EQ(plaintext, nullptr); - EXPECT_EQ(decrypted_len, 0); - EXPECT_FALSE(ctx.has_error()); + // Test AES-CBC-PKCS7 (explicit PKCS7 padding) - should succeed + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_cbc_pkcs7 = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_cbc_pkcs7.c_str(), mode_cbc_pkcs7_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, data_len); // Returns original data length (padding removed) + + // Test AES-CBC-NONE (no padding) - should succeed but return padded data + std::string mode_cbc_none = AES_CBC_NONE_MODE; + auto mode_cbc_none_len = static_cast(mode_cbc_none.length()); + decrypt_valid = true; + decrypted_len = 0; + const char* decrypted_cbc_none = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, true, key16.c_str(), key16_len, true, + mode_cbc_none.c_str(), mode_cbc_none_len, true, iv.c_str(), iv_len, true, + &decrypt_valid, &decrypted_len); + EXPECT_TRUE(decrypt_valid); + EXPECT_EQ(decrypted_len, cipher_len); // Returns full block including padding } -// Test that NULL key (key_validity = false) throws an error for encrypt -TEST(TestGdvFnStubs, TestAesEncryptWithNullKey) { +// Validation tests + +TEST(TestGdvFnStubs, TestAesEncrypt3ArgsValidation) { gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t cipher_len = 0; + int64_t ctx_ptr = reinterpret_cast(&ctx); + std::string data = "test string"; auto data_len = static_cast(data.length()); + + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + std::string mode = AES_ECB_MODE; auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + int32_t cipher_len = 0; bool encrypt_valid = true; - const char* cipher = gdv_fn_encrypt_dispatcher_3args( + + // Test 1: NULL plaintext should return NULL ciphertext + const char* result = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, false, // data_validity = false (NULL plaintext) + key16.c_str(), key16_len, true, + mode.c_str(), mode_len, true, + &encrypt_valid, &cipher_len); + EXPECT_FALSE(encrypt_valid); + EXPECT_EQ(result, nullptr); + EXPECT_EQ(cipher_len, 0); + + // Test 2: NULL key should fail with error + ctx.Reset(); + encrypt_valid = true; + cipher_len = 0; + result = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, key16.c_str(), key16_len, false, // key_validity = false (NULL key) mode.c_str(), mode_len, true, &encrypt_valid, &cipher_len); + EXPECT_FALSE(encrypt_valid); + EXPECT_TRUE(ctx.has_error()); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("key cannot be NULL")); - // Should fail with error message about NULL key + // Test 3: NULL mode should fail with error + ctx.Reset(); + encrypt_valid = true; + cipher_len = 0; + result = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, + key16.c_str(), key16_len, true, + mode.c_str(), mode_len, false, // mode_validity = false (NULL mode) + &encrypt_valid, &cipher_len); EXPECT_FALSE(encrypt_valid); - EXPECT_EQ(cipher, nullptr); EXPECT_TRUE(ctx.has_error()); - EXPECT_NE(std::string(ctx.get_error()).find("Encryption key cannot be NULL"), - std::string::npos); -} + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode: NULL. Supported modes: AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE, AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE, AES-GCM")); -// Test that NULL key (key_validity = false) throws an error for decrypt -TEST(TestGdvFnStubs, TestAesDecryptWithNullKey) { - gandiva::ExecutionContext ctx; - std::string key16 = "12345678abcdefgh"; - auto key16_len = static_cast(key16.length()); - int32_t decrypted_len = 0; - std::string data = "some ciphertext"; - auto data_len = static_cast(data.length()); - std::string mode = AES_ECB_MODE; - auto mode_len = static_cast(mode.length()); - int64_t ctx_ptr = reinterpret_cast(&ctx); + // Test 4: Invalid mode string should fail with error + ctx.Reset(); + encrypt_valid = true; + cipher_len = 0; + std::string invalid_mode = "AES-INVALID"; + auto invalid_mode_len = static_cast(invalid_mode.length()); + result = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, true, + key16.c_str(), key16_len, true, + invalid_mode.c_str(), invalid_mode_len, true, + &encrypt_valid, &cipher_len); + EXPECT_FALSE(encrypt_valid); + EXPECT_TRUE(ctx.has_error()); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode: AES-INVALID. Supported modes: AES-ECB, AES-ECB-PKCS7, AES-ECB-NONE, AES-CBC, AES-CBC-PKCS7, AES-CBC-NONE, AES-GCM")); - bool decrypt_valid = true; - const char* plaintext = gdv_fn_decrypt_dispatcher_3args( + // Test 5: Invalid key length should fail with error + ctx.Reset(); + encrypt_valid = true; + cipher_len = 0; + std::string short_key = "short"; + auto short_key_len = static_cast(short_key.length()); + result = gdv_fn_encrypt_dispatcher_3args( ctx_ptr, data.c_str(), data_len, true, - key16.c_str(), key16_len, false, // key_validity = false (NULL key) + short_key.c_str(), short_key_len, true, mode.c_str(), mode_len, true, - &decrypt_valid, &decrypted_len); - - // Should fail with error message about NULL key - EXPECT_FALSE(decrypt_valid); - EXPECT_EQ(plaintext, nullptr); + &encrypt_valid, &cipher_len); + EXPECT_FALSE(encrypt_valid); EXPECT_TRUE(ctx.has_error()); - EXPECT_NE(std::string(ctx.get_error()).find("Decryption key cannot be NULL"), - std::string::npos); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); } } // namespace gandiva