From 100ab26c48ddc2f8adf021152f3bc18b6c75072f Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 18 Nov 2025 13:52:02 +0100 Subject: [PATCH 1/3] Added test to verify reqd_work_group_size attribute with cl_khr_command_buffer extension --- .../api/test_kernel_attributes.cpp | 2 + .../cl_khr_command_buffer/CMakeLists.txt | 1 + .../command_buffer_kernel_attributes.cpp | 227 ++++++++++++++++++ 3 files changed, 230 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp index 86b3595c6e..5bf8cdf443 100644 --- a/test_conformance/api/test_kernel_attributes.cpp +++ b/test_conformance/api/test_kernel_attributes.cpp @@ -382,6 +382,7 @@ REGISTER_TEST(null_required_work_group_size) clMemWrapper dst; dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int), nullptr, &error); + test_error(error, "clCreateBuffer failed"); struct KernelAttribInfo { @@ -446,6 +447,7 @@ REGISTER_TEST(null_required_work_group_size) const cl_int zero = 0; error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0, sizeof(expected), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 9e54feccca..1a50db93e1 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES command_buffer_test_event_info.cpp command_buffer_finalize.cpp command_buffer_pipelined_enqueue.cpp + command_buffer_kernel_attributes.cpp negative_command_buffer_finalize.cpp negative_command_buffer_svm_mem.cpp negative_command_buffer_copy_image.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp new file mode 100644 index 0000000000..972859cbea --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp @@ -0,0 +1,227 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "basic_command_buffer.h" + +#include + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// Tests for cl_khr_command_buffer while enqueueing a kernel with a +// reqd_work_group_size with a NULL local_work_size. + +struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest +{ + KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue), dst(nullptr), + clGetKernelSuggestedLocalWorkSizeKHR(nullptr) + {} + + cl_int SetUp(int elements) override + { + cl_int error = BasicCommandBufferTest::SetUp(elements); + test_error(error, "BasicCommandBufferTest::SetUp failed"); + + if (is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + cl_platform_id platform = nullptr; + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, + sizeof(platform), &platform, NULL); + test_error(error, "clGetDeviceInfo for platform failed"); + + clGetKernelSuggestedLocalWorkSizeKHR = + (clGetKernelSuggestedLocalWorkSizeKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSuggestedLocalWorkSizeKHR"); + test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr, + "Couldn't get function pointer for " + "clGetKernelSuggestedLocalWorkSizeKHR"); + } + + dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int), + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + return CL_SUCCESS; + } + + cl_int Run() override + { + cl_uint device_max_dim = 0; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(device_max_dim), &device_max_dim, nullptr); + test_error( + error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed"); + test_assert_error( + device_max_dim >= 3, + "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!"); + + std::vector device_max_work_item_sizes(device_max_dim); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t) * device_max_dim, + device_max_work_item_sizes.data(), nullptr); + + size_t device_max_work_group_size = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(device_max_work_group_size), + &device_max_work_group_size, nullptr); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed"); + + + std::vector> attribs = { + { "__attribute__((reqd_work_group_size(2,1,1)))", 1 }, + { "__attribute__((reqd_work_group_size(2,3,1)))", 2 }, + { "__attribute__((reqd_work_group_size(2,3,4)))", 3 } + }; + + const std::string body_str = R"( + __kernel void wg_size(__global int* dst) + { + if (get_global_id(0) == 0 && + get_global_id(1) == 0 && + get_global_id(2) == 0) { + dst[0] = get_local_size(0); + dst[1] = get_local_size(1); + dst[2] = get_local_size(2); + } + } + )"; + + + for (auto& attrib : attribs) + { + const std::string source_str = attrib.first + body_str; + const char* source = source_str.c_str(); + + clProgramWrapper program; + clKernelWrapper kernel; + error = create_single_kernel_helper(context, &program, &kernel, 1, + &source, "wg_size"); + test_error(error, "Unable to create test kernel"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst); + test_error(error, "clSetKernelArg failed"); + + for (cl_uint work_dim = 1; work_dim <= attrib.second; work_dim++) + { + const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1, + work_dim >= 3 ? 4 : 1 }; + const size_t test_work_group_size = + expected[0] * expected[1] * expected[2]; + if ((size_t)expected[0] > device_max_work_item_sizes[0] + || (size_t)expected[1] > device_max_work_item_sizes[1] + || (size_t)expected[2] > device_max_work_item_sizes[2] + || test_work_group_size > device_max_work_group_size) + { + log_info( + "Skipping test for work_dim = %u: required work group " + "size (%i, %i, %i) (total %zu) exceeds device max " + "work group size (%zu, %zu, %zu) (total %zu)\n", + work_dim, expected[0], expected[1], expected[2], + test_work_group_size, device_max_work_item_sizes[0], + device_max_work_item_sizes[1], + device_max_work_item_sizes[2], + device_max_work_group_size); + continue; + } + + const cl_int zero = 0; + error = clCommandFillBufferKHR( + command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero), + 0, sizeof(expected), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, work_dim, nullptr, + global_work_size, nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + cl_int results[3] = { -1, -1, -1 }; + error = + clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results), + results, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + // Verify the result + if (results[0] != expected[0] || results[1] != expected[1] + || results[2] != expected[2]) + { + log_error( + "Executed local size mismatch with work_dim = %u: " + "Expected (%d,%d,%d) got (%d,%d,%d)\n", + work_dim, expected[0], expected[1], expected[2], + results[0], results[1], results[2]); + return TEST_FAIL; + } + + if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr) + { + size_t suggested[3] = { 1, 1, 1 }; + error = clGetKernelSuggestedLocalWorkSizeKHR( + queue, kernel, work_dim, nullptr, global_work_size, + suggested); + test_error(error, + "clGetKernelSuggestedLocalWorkSizeKHR failed"); + + if ((cl_int)suggested[0] != expected[0] + || (cl_int)suggested[1] != expected[1] + || (cl_int)suggested[2] != expected[2]) + { + log_error( + "Suggested local size mismatch with work_dim = " + "%u: Expected (%d,%d,%d) got (%d,%d,%d)\n", + work_dim, expected[0], expected[1], expected[2], + (cl_int)suggested[0], (cl_int)suggested[1], + (cl_int)suggested[2]); + return TEST_FAIL; + } + } + + // create new command buffer + command_buffer = + clCreateCommandBufferKHR(1, &queue, nullptr, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + } + } + + return CL_SUCCESS; + } + + clMemWrapper dst; + clGetKernelSuggestedLocalWorkSizeKHR_fn + clGetKernelSuggestedLocalWorkSizeKHR; +}; + +} // anonymous namespace + +REGISTER_TEST(command_null_required_work_group_size) +{ + return MakeAndRunTest( + device, context, queue, num_elements); +} From 8d7241de0200a4cea6e1685806969d004d627094 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 20 Nov 2025 12:35:00 +0100 Subject: [PATCH 2/3] Added correction related to #2576 --- .../command_buffer_kernel_attributes.cpp | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp index 972859cbea..3191ee8b84 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp @@ -86,12 +86,16 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed"); - std::vector> attribs = { - { "__attribute__((reqd_work_group_size(2,1,1)))", 1 }, - { "__attribute__((reqd_work_group_size(2,3,1)))", 2 }, - { "__attribute__((reqd_work_group_size(2,3,4)))", 3 } + struct KernelAttribInfo + { + cl_int wgs[3]; + cl_uint min_dim; }; + std::vector attribs = { { { 2, 1, 1 }, 1 }, + { { 2, 3, 1 }, 2 }, + { { 2, 3, 4 }, 3 } }; + const std::string body_str = R"( __kernel void wg_size(__global int* dst) { @@ -108,7 +112,12 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest for (auto& attrib : attribs) { - const std::string source_str = attrib.first + body_str; + const std::string attrib_str = + "__attribute__((reqd_work_group_size(" + + std::to_string(attrib.wgs[0]) + "," + + std::to_string(attrib.wgs[1]) + "," + + std::to_string(attrib.wgs[2]) + ")))"; + const std::string source_str = attrib_str + body_str; const char* source = source_str.c_str(); clProgramWrapper program; @@ -120,22 +129,20 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst); test_error(error, "clSetKernelArg failed"); - for (cl_uint work_dim = 1; work_dim <= attrib.second; work_dim++) + for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++) { - const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1, - work_dim >= 3 ? 4 : 1 }; const size_t test_work_group_size = - expected[0] * expected[1] * expected[2]; - if ((size_t)expected[0] > device_max_work_item_sizes[0] - || (size_t)expected[1] > device_max_work_item_sizes[1] - || (size_t)expected[2] > device_max_work_item_sizes[2] + attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2]; + if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0] + || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1] + || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2] || test_work_group_size > device_max_work_group_size) { log_info( "Skipping test for work_dim = %u: required work group " "size (%i, %i, %i) (total %zu) exceeds device max " "work group size (%zu, %zu, %zu) (total %zu)\n", - work_dim, expected[0], expected[1], expected[2], + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], test_work_group_size, device_max_work_item_sizes[0], device_max_work_item_sizes[1], device_max_work_item_sizes[2], @@ -146,7 +153,7 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest const cl_int zero = 0; error = clCommandFillBufferKHR( command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero), - 0, sizeof(expected), 0, nullptr, nullptr, nullptr); + 0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; @@ -169,13 +176,13 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest test_error(error, "clEnqueueReadBuffer failed"); // Verify the result - if (results[0] != expected[0] || results[1] != expected[1] - || results[2] != expected[2]) + if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1] + || results[2] != attrib.wgs[2]) { log_error( "Executed local size mismatch with work_dim = %u: " "Expected (%d,%d,%d) got (%d,%d,%d)\n", - work_dim, expected[0], expected[1], expected[2], + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], results[0], results[1], results[2]); return TEST_FAIL; } @@ -189,16 +196,16 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest test_error(error, "clGetKernelSuggestedLocalWorkSizeKHR failed"); - if ((cl_int)suggested[0] != expected[0] - || (cl_int)suggested[1] != expected[1] - || (cl_int)suggested[2] != expected[2]) + if (suggested[0] != (size_t)attrib.wgs[0] + || suggested[1] != (size_t)attrib.wgs[1] + || suggested[2] != (size_t)attrib.wgs[2]) { log_error( "Suggested local size mismatch with work_dim = " - "%u: Expected (%d,%d,%d) got (%d,%d,%d)\n", - work_dim, expected[0], expected[1], expected[2], - (cl_int)suggested[0], (cl_int)suggested[1], - (cl_int)suggested[2]); + "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n", + work_dim, attrib.wgs[0], attrib.wgs[1], + attrib.wgs[2], suggested[0], suggested[1], + suggested[2]); return TEST_FAIL; } } From 82a181cdcebe39fb589dc3f14f1e78305551fa9a Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 3 Dec 2025 13:27:32 +0100 Subject: [PATCH 3/3] Corrections due to code review --- .../command_buffer_kernel_attributes.cpp | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp index 3191ee8b84..4eddd1d821 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp @@ -16,8 +16,6 @@ #include "basic_command_buffer.h" -#include - namespace { //////////////////////////////////////////////////////////////////////////////// @@ -26,10 +24,24 @@ namespace { struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest { + inline static const std::string body_str = R"( + __kernel void wg_size(__global int* dst) + { + if (get_global_id(0) == 0 && + get_global_id(1) == 0 && + get_global_id(2) == 0) { + dst[0] = get_local_size(0); + dst[1] = get_local_size(1); + dst[2] = get_local_size(2); + } + } + )"; + KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context, cl_command_queue queue) : BasicCommandBufferTest(device, context, queue), dst(nullptr), - clGetKernelSuggestedLocalWorkSizeKHR(nullptr) + clGetKernelSuggestedLocalWorkSizeKHR(nullptr), + device_max_work_group_size(0) {} cl_int SetUp(int elements) override @@ -57,13 +69,9 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest nullptr, &error); test_error(error, "clCreateBuffer failed"); - return CL_SUCCESS; - } - cl_int Run() override - { cl_uint device_max_dim = 0; - cl_int error = + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(device_max_dim), &device_max_dim, nullptr); test_error( @@ -73,19 +81,23 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest device_max_dim >= 3, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!"); - std::vector device_max_work_item_sizes(device_max_dim); + device_max_work_item_sizes.resize(device_max_dim); error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * device_max_dim, device_max_work_item_sizes.data(), nullptr); - size_t device_max_work_group_size = 0; error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(device_max_work_group_size), &device_max_work_group_size, nullptr); test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed"); + return CL_SUCCESS; + } + cl_int Run() override + { + cl_int error = CL_SUCCESS; struct KernelAttribInfo { cl_int wgs[3]; @@ -96,20 +108,6 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest { { 2, 3, 1 }, 2 }, { { 2, 3, 4 }, 3 } }; - const std::string body_str = R"( - __kernel void wg_size(__global int* dst) - { - if (get_global_id(0) == 0 && - get_global_id(1) == 0 && - get_global_id(2) == 0) { - dst[0] = get_local_size(0); - dst[1] = get_local_size(1); - dst[2] = get_local_size(2); - } - } - )"; - - for (auto& attrib : attribs) { const std::string attrib_str = @@ -223,6 +221,9 @@ struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest clMemWrapper dst; clGetKernelSuggestedLocalWorkSizeKHR_fn clGetKernelSuggestedLocalWorkSizeKHR; + + size_t device_max_work_group_size; + std::vector device_max_work_item_sizes; }; } // anonymous namespace