@@ -817,6 +817,7 @@ struct SVMMemFill : Command
817817struct NDRangeKernel : Command
818818{
819819 static std::unique_ptr<NDRangeKernel> create (
820+ const bool isMutable,
820821 const cl_command_properties_khr* properties,
821822 cl_command_buffer_khr cmdbuf,
822823 cl_command_queue queue,
@@ -1235,6 +1236,11 @@ typedef struct _cl_command_buffer_khr
12351236 cmdbuf->TestQueues .reserve (num_queues);
12361237 cmdbuf->BlockingEvents .reserve (num_queues);
12371238
1239+ if ( cmdbuf->Queues .size () == 1 )
1240+ {
1241+ cmdbuf->setupSuggestedLocalWorkSize ();
1242+ }
1243+
12381244 for ( auto queue : cmdbuf->Queues )
12391245 {
12401246 g_pNextDispatch->clRetainCommandQueue (queue);
@@ -1683,6 +1689,32 @@ typedef struct _cl_command_buffer_khr
16831689 return CL_SUCCESS;
16841690 }
16851691
1692+ cl_int clGetKernelSuggestedLocalWorkSize (
1693+ cl_command_queue queue,
1694+ cl_kernel kernel,
1695+ cl_uint work_dim,
1696+ const size_t * global_work_offset,
1697+ const size_t * global_work_size,
1698+ size_t * suggested_local_work_size )
1699+ {
1700+ if ( ptrGetKernelSuggestedLocalWorkSizeKHR == nullptr )
1701+ {
1702+ return CL_INVALID_OPERATION;
1703+ }
1704+ if ( queue != nullptr && queue != Queues[0 ] )
1705+ {
1706+ return CL_INVALID_COMMAND_QUEUE;
1707+ }
1708+
1709+ return ptrGetKernelSuggestedLocalWorkSizeKHR (
1710+ Queues[0 ],
1711+ kernel,
1712+ work_dim,
1713+ global_work_offset,
1714+ global_work_size,
1715+ suggested_local_work_size );
1716+ }
1717+
16861718private:
16871719 static constexpr cl_uint cMagic = 0x434d4442 ; // "CMDB"
16881720
@@ -1703,6 +1735,32 @@ typedef struct _cl_command_buffer_khr
17031735 std::vector<std::unique_ptr<Command>> Commands;
17041736 std::atomic<uint32_t > NextSyncPoint;
17051737
1738+ clGetKernelSuggestedLocalWorkSizeKHR_fn ptrGetKernelSuggestedLocalWorkSizeKHR = nullptr ;
1739+
1740+ void setupSuggestedLocalWorkSize ()
1741+ {
1742+ cl_device_id device = nullptr ;
1743+ g_pNextDispatch->clGetCommandQueueInfo (
1744+ Queues[0 ],
1745+ CL_QUEUE_DEVICE,
1746+ sizeof (device),
1747+ &device,
1748+ nullptr );
1749+
1750+ cl_platform_id platform = nullptr ;
1751+ g_pNextDispatch->clGetDeviceInfo (
1752+ device,
1753+ CL_DEVICE_PLATFORM,
1754+ sizeof (platform),
1755+ &platform,
1756+ nullptr );
1757+
1758+ ptrGetKernelSuggestedLocalWorkSizeKHR = (clGetKernelSuggestedLocalWorkSizeKHR_fn)
1759+ g_pNextDispatch->clGetExtensionFunctionAddressForPlatform (
1760+ platform,
1761+ " clGetKernelSuggestedLocalWorkSizeKHR" );
1762+ }
1763+
17061764 void setupTestQueue (cl_command_queue src)
17071765 {
17081766 if ( g_EnhancedErrorChecking )
@@ -1847,6 +1905,7 @@ _cl_mutable_command_khr::_cl_mutable_command_khr(
18471905 Queue(queue ? queue : cmdbuf->getQueue ()) {}
18481906
18491907std::unique_ptr<NDRangeKernel> NDRangeKernel::create (
1908+ const bool isMutable,
18501909 const cl_command_properties_khr* properties,
18511910 cl_command_buffer_khr cmdbuf,
18521911 cl_command_queue queue,
@@ -1964,6 +2023,21 @@ std::unique_ptr<NDRangeKernel> NDRangeKernel::create(
19642023 local_work_size,
19652024 local_work_size + work_dim);
19662025 }
2026+ else if ( isMutable == false )
2027+ {
2028+ command->local_work_size .resize (work_dim);
2029+ cl_int checkError = cmdbuf->clGetKernelSuggestedLocalWorkSize (
2030+ queue,
2031+ kernel,
2032+ work_dim,
2033+ global_work_offset,
2034+ global_work_size,
2035+ command->local_work_size .data () );
2036+ if ( checkError != CL_SUCCESS )
2037+ {
2038+ command->local_work_size .clear ();
2039+ }
2040+ }
19672041
19682042 g_pNextDispatch->clRetainKernel (command->original_kernel );
19692043
@@ -2838,8 +2912,11 @@ cl_int CL_API_CALL clCommandNDRangeKernelKHR_EMU(
28382912 }
28392913 }
28402914
2915+ const bool isMutable = mutable_handle != nullptr ;
2916+
28412917 cl_int errorCode = CL_SUCCESS;
28422918 auto command = NDRangeKernel::create (
2919+ isMutable,
28432920 properties,
28442921 cmdbuf,
28452922 command_queue,
0 commit comments