-
Notifications
You must be signed in to change notification settings - Fork 109
Open
Description
By referencing here, wrote the following inline assembly code:
inline __device__ void sts(uint32_t ptr, uint4 val) {
asm volatile("DS_WRITE_B128 %0, %1;\n" : : "v"(ptr) , "v"(val));
}
But when compile it reports the following error:
XXXXXXXXXXXX: error: Don't know how to handle indirect register inputs yet for constraint 'v'
asm volatile("DS_WRITE_B128 %0, %1;\n" : : "v"(ptr) , "v"(val));
^
The hip file is something as
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include "hip/hip_runtime.h"
#define HIP_ENABLE_PRINTF
inline __device__ void sts_uint4(uint32_t ptr, uint4 val) {
asm volatile("DS_WRITE_B128 %0, %1;\n" : : "v"(ptr) , "v"(val));
}
inline __device__ void lds_uint4(uint4 &dst, uint32_t ptr) {
uint4 temp;
asm volatile("DS_READ_B128 %0, %1;\n"
: "=v"(temp)
: "r"(ptr));
dst.x = temp.x;
dst.y = temp.y;
dst.z = temp.z;
dst.w = temp.w;
}
static inline __device__ void v_pk_sts_uint4(uint32_t ptr, uint4 val) {
printf("Store value: %d and %d and %d and %d into local shared memory address: %d.\n", val.x , val.y , val.z , val.w , ptr );
sts_uint4( ptr , val );
}
static inline __device__ void v_pk_lds_uint4(uint4 &dst, uint32_t ptr) {
uint4 tmp_loaded;
lds_uint4( tmp_loaded , ptr );
printf("Load value: %d and %d and %d and %d from local shared memory address: %d.\n", tmp_loaded.x, tmp_loaded.y , tmp_loaded.z , tmp_loaded.w , ptr );
dst = tmp_loaded;
}
The cpp file is something as
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include "test_cpp_inline_asm_sts_lds_uint32_t.h"
#include "hip/hip_hcc.h"
#include "test_simple_kernel.hip"
int main(int argc,char**vargs)
{
uint4* to_be_stored = new uint4[1];
uint4* to_be_loaded = new uint4[1];
uint32_t* local_share_memory_address = new uint32_t[1];
uint4* to_be_stored_d;
uint4* to_be_loaded_d;
uint32_t* local_share_memory_address_d;
to_be_stored[ 0 ].x = 1;
to_be_stored[ 0 ].y = 2;
to_be_stored[ 0 ].z = 19;
to_be_stored[ 0 ].w = 20;
local_share_memory_address[ 0 ] = 0;
hipSetDevice(0);
hipMalloc(&to_be_stored_d, 16);
hipMalloc(&to_be_loaded_d, 16);
hipMalloc(&local_share_memory_address_d, 4);
hipMemcpy(to_be_stored_d, to_be_stored, 16, hipMemcpyHostToDevice);
hipMemcpy(local_share_memory_address_d, local_share_memory_address, 4, hipMemcpyHostToDevice);
printf("Stored value: %d and %d and %d and %d at local share memory address: %d.\n" , to_be_stored[ 0 ].x , to_be_stored[ 0 ].y , to_be_stored[ 0 ].z , to_be_stored[ 0 ].w , local_share_memory_address[ 0 ] );
hipLaunchKernelGGL( halfVec_v_pk_sts_then_lds_uint4 , dim3( 1 ) , dim3( 1 ) , sizeof(uint4)*5 , 0, (uint4*)(to_be_loaded_d), (uint32_t*)(local_share_memory_address_d) , (uint4*)(to_be_stored_d) , 1 );
hipMemcpy(to_be_loaded, to_be_loaded_d, 16, hipMemcpyDeviceToHost);
hipDeviceSynchronize();
printf("And then load value: %d and %d and %d and %d from local share memory address: %d.\n", to_be_loaded[ 0 ].x, to_be_loaded[ 0 ].y , to_be_loaded[ 0 ].z , to_be_loaded[ 0 ].w , local_share_memory_address[ 0 ] );
}
Thanks in advance!
Metadata
Metadata
Assignees
Labels
No labels