Skip to content

Commit a570963

Browse files
committed
Quality enhancement: Immediately interrupt execution when OOM memory Malloc fails
Signed-off-by: leo-pony <nengjunma@outlook.com>
1 parent 8a71540 commit a570963

File tree

1 file changed

+35
-26
lines changed

1 file changed

+35
-26
lines changed

csrc/camem_allocator.cpp

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
*/
1616

1717
#include <iostream>
18+
#include <stdexcept>
19+
#include <string>
1820

1921
extern "C" {
2022

@@ -49,7 +51,7 @@ void create_and_map(unsigned long long device, ssize_t size, void* d_mem,
4951
ensure_context(device);
5052
// Define memory allocation properties
5153
aclrtPhysicalMemProp prop = {};
52-
prop.handleType = ACL_MEM_HANDLE_TYPE_NONE ;
54+
prop.handleType = ACL_MEM_HANDLE_TYPE_NONE;
5355
prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
5456
prop.memAttr = ACL_HBM_MEM_HUGE;
5557
prop.location.id = device;
@@ -59,15 +61,21 @@ void create_and_map(unsigned long long device, ssize_t size, void* d_mem,
5961
// Allocate memory using aclrtMallocPhysical
6062
aclError error_code = aclrtMallocPhysical(p_memHandle, size, &prop, 0);
6163
if (error_code != 0) {
62-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
63-
<< __LINE__ << std::endl;
64-
return;
64+
if (error_code == ACL_ERROR_RT_MEMORY_ALLOCATION) {
65+
throw std::runtime_error("aclrtMallocPhysical failed with acl error code: " +
66+
std::to_string(error_code) + "(OOM: Out of Memory, allocation failed) " +
67+
__FILE__ + ":" + std::to_string(__LINE__));
68+
} else {
69+
throw std::runtime_error("aclrtMallocPhysical failed with acl error code: " +
70+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
71+
}
6572
}
73+
74+
// Map memory
6675
error_code = aclrtMapMem(d_mem, size, 0, *p_memHandle, 0);
6776
if (error_code != 0) {
68-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
69-
<< __LINE__ << std::endl;
70-
return;
77+
throw std::runtime_error("aclrtMapMem failed with acl error code: " +
78+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
7179
}
7280
}
7381

@@ -79,15 +87,13 @@ void unmap_and_release(unsigned long long device, ssize_t size,
7987
ensure_context(device);
8088
aclError error_code = aclrtUnmapMem(d_mem);
8189
if (error_code != 0) {
82-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
83-
<< __LINE__ << std::endl;
84-
return;
90+
throw std::runtime_error("aclrtUnmapMem failed with acl error code: " +
91+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
8592
}
8693
error_code = aclrtFreePhysical(*p_memHandle);
8794
if (error_code != 0) {
88-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
89-
<< __LINE__ << std::endl;
90-
return;
95+
throw std::runtime_error("aclrtFreePhysical failed with acl error code: " +
96+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
9197
}
9298
}
9399

@@ -139,25 +145,29 @@ __attribute__ ((visibility("default"))) void* my_malloc(ssize_t size, int device
139145
ACL_RT_MEM_ALLOC_GRANULARITY_MINIMUM,
140146
&granularity);
141147
if (error_code != 0) {
142-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
143-
<< __LINE__ << std::endl;
144-
return nullptr;
148+
throw std::runtime_error("aclrtMemGetAllocationGranularity failed with acl error code: " +
149+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
145150
}
146151
size_t alignedSize = ((size + granularity - 1) / granularity) * granularity;
147152
void *d_mem;
148153
error_code = aclrtReserveMemAddress(&d_mem, alignedSize, 0, nullptr, 0);
149154
if (error_code != 0) {
150-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
151-
<< __LINE__ << std::endl;
152-
return nullptr;
155+
if (error_code == ACL_ERROR_RT_MEMORY_ALLOCATION) {
156+
throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " +
157+
std::to_string(error_code) + "(OOM: Out of Memory, allocation failed) " +
158+
__FILE__ + ":" + std::to_string(__LINE__));
159+
} else {
160+
throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " +
161+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
162+
}
153163
}
154164
// allocate the aclrtDrvMemHandle
155165
aclrtDrvMemHandle* p_memHandle =
156166
(aclrtDrvMemHandle*)malloc(sizeof(aclrtDrvMemHandle));
157167

158168
if (!g_python_malloc_callback) {
159-
std::cerr << "ERROR: g_python_malloc_callback not set.\n";
160-
return nullptr;
169+
throw std::runtime_error("my_malloc ERROR: g_python_malloc_callback not set." +
170+
std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
161171
}
162172

163173
// Acquire GIL (not in stable ABI officially, but often works)
@@ -189,8 +199,8 @@ __attribute__ ((visibility("default"))) void* my_malloc(ssize_t size, int device
189199
__attribute__ ((visibility("default"))) void my_free(void* ptr, ssize_t size, int device, aclrtStream stream) {
190200
// get memory handle from the pointer
191201
if (!g_python_free_callback) {
192-
std::cerr << "ERROR: g_python_free_callback not set.\n";
193-
return;
202+
throw std::runtime_error("aclrtDrvMemHandle ERROR: g_python_malloc_callback not set." +
203+
std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
194204
}
195205

196206
// Acquire GIL (not in stable ABI officially, but often works)
@@ -232,9 +242,8 @@ __attribute__ ((visibility("default"))) void my_free(void* ptr, ssize_t size, in
232242
// free address and the handle
233243
aclError error_code = aclrtReleaseMemAddress(d_mem);
234244
if (error_code != 0) {
235-
std::cerr << "acl Error, code: " << error_code << " at " << __FILE__ << ":" \
236-
<< __LINE__ << std::endl;
237-
return;
245+
throw std::runtime_error("aclrtReleaseMemAddress failed with acl error code: " +
246+
std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
238247
}
239248
free(p_memHandle);
240249
}

0 commit comments

Comments
 (0)