Skip to content

Commit 39d4ccf

Browse files
committed
[slimtensor] Storage and SharedPtr for CPU owning mode
This diff introduces the foundation for tensor storage management in the SlimTensor migration: - util/SharedPtr.h - A lightweight, non-thread-safe shared pointer optimized for single-threaded tensor operations. - core/Storage.h - The MaybeOwningStorage class that manages tensor data memory: - DeviceTraits<CPU> specialization with allocate(), free(), memcpy() using malloc/free - Owning mode for CPU device (CUDA and non-owning mode added later) - Storage type alias as SharedPtr<MaybeOwningStorage> - Move semantics for efficient resource transfer - clone() and copy_() methods for data management Differential Revision: [D89747980](https://our.internmc.facebook.com/intern/diff/D89747980/) [ghstack-poisoned]
1 parent e4e1a49 commit 39d4ccf

File tree

9 files changed

+754
-0
lines changed

9 files changed

+754
-0
lines changed

backends/aoti/slim/core/Storage.h

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <cstdint>
12+
#include <cstring>
13+
14+
#include <executorch/backends/aoti/slim/c10/core/Device.h>
15+
#include <executorch/backends/aoti/slim/c10/core/ScalarType.h>
16+
#include <executorch/backends/aoti/slim/util/SharedPtr.h>
17+
#include <executorch/runtime/platform/assert.h>
18+
19+
namespace executorch::backends::aoti::slim {
20+
21+
/// Type alias for deleter function pointer.
22+
using DeleterFn = void (*)(void*);
23+
24+
namespace detail {
25+
/// No-op deleter for non-owning storage.
26+
inline void noop(void*) {}
27+
} // namespace detail
28+
29+
/// Default CPU device constant.
30+
const c10::Device CPU_DEVICE = c10::Device(c10::DeviceType::CPU, 0);
31+
32+
/// DeviceTraits template for device-specific operations.
33+
/// Device-specific implementations provide allocate(), free(), and memcpy().
34+
template <c10::DeviceType D>
35+
struct DeviceTraits;
36+
37+
/// CPU specialization of DeviceTraits.
38+
/// Provides CPU memory allocation and copy operations using malloc/free/memcpy.
39+
template <>
40+
struct DeviceTraits<c10::DeviceType::CPU> {
41+
/// Allocates CPU memory using malloc.
42+
/// @param nbytes Number of bytes to allocate.
43+
/// @param device The target device (unused for CPU).
44+
/// @return Pointer to allocated memory.
45+
static void* allocate(size_t nbytes, const c10::Device& device = CPU_DEVICE) {
46+
(void)device;
47+
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
48+
return malloc(nbytes);
49+
}
50+
51+
/// Frees CPU memory using free.
52+
/// @param ptr Pointer to memory to free.
53+
static void free(void* ptr) {
54+
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
55+
std::free(ptr);
56+
}
57+
58+
/// Copies memory between CPU locations.
59+
/// @param dst Destination pointer.
60+
/// @param src Source pointer.
61+
/// @param nbytes Number of bytes to copy.
62+
/// @param dst_device Destination device (unused for CPU-to-CPU).
63+
/// @param src_device Source device (unused for CPU-to-CPU).
64+
static void memcpy(
65+
void* dst,
66+
const void* src,
67+
size_t nbytes,
68+
const c10::Device& dst_device,
69+
const c10::Device& src_device) {
70+
(void)dst_device;
71+
(void)src_device;
72+
std::memcpy(dst, src, nbytes);
73+
}
74+
};
75+
76+
/**
77+
* MaybeOwningStorage - A storage class that manages tensor data memory.
78+
*
79+
* This class provides owning memory storage for tensor data on CPU.
80+
* Owning storage allocates and manages its own memory, freeing it upon
81+
* destruction.
82+
*
83+
* Current limitations:
84+
* - CPU device only
85+
* - Owning mode only
86+
* The future diffs will add support for non-owning storage and other devices.
87+
*
88+
* Thread Safety: NOT THREAD-SAFE
89+
* - Uses NonAtomicSharedPtr for reference counting
90+
* - Must only be used in single-threaded contexts
91+
*/
92+
class MaybeOwningStorage {
93+
public:
94+
/// Constructs owning storage with allocated memory.
95+
/// @param device The device for storage (must be CPU).
96+
/// @param nbytes Number of bytes to allocate.
97+
MaybeOwningStorage(const c10::Device& device, size_t nbytes)
98+
: device_(device), capacity_(nbytes), is_owning_(true) {
99+
ET_CHECK_MSG(
100+
device.is_cpu(),
101+
"Only CPU device is currently supported, got: %s",
102+
device.str().c_str());
103+
104+
data_ = DeviceTraits<c10::DeviceType::CPU>::allocate(nbytes, device);
105+
deleter_ = DeviceTraits<c10::DeviceType::CPU>::free;
106+
}
107+
108+
/// Default constructor is deleted - storage must have a device.
109+
MaybeOwningStorage() = delete;
110+
111+
/// Copy constructor is deleted - use SharedPtr for shared ownership.
112+
MaybeOwningStorage(const MaybeOwningStorage&) = delete;
113+
114+
/// Copy assignment is deleted - use SharedPtr for shared ownership.
115+
MaybeOwningStorage& operator=(const MaybeOwningStorage&) = delete;
116+
117+
/// Move constructor.
118+
MaybeOwningStorage(MaybeOwningStorage&& other) noexcept
119+
: device_(other.device_),
120+
data_(other.data_),
121+
capacity_(other.capacity_),
122+
deleter_(other.deleter_),
123+
is_owning_(other.is_owning_) {
124+
other.data_ = nullptr;
125+
other.capacity_ = 0;
126+
other.deleter_ = detail::noop;
127+
other.is_owning_ = false;
128+
}
129+
130+
/// Move assignment operator.
131+
MaybeOwningStorage& operator=(MaybeOwningStorage&& other) noexcept {
132+
if (this != &other) {
133+
free_data();
134+
135+
device_ = other.device_;
136+
data_ = other.data_;
137+
capacity_ = other.capacity_;
138+
deleter_ = other.deleter_;
139+
is_owning_ = other.is_owning_;
140+
141+
other.data_ = nullptr;
142+
other.capacity_ = 0;
143+
other.deleter_ = detail::noop;
144+
other.is_owning_ = false;
145+
}
146+
return *this;
147+
}
148+
149+
/// Destructor - frees owned memory.
150+
~MaybeOwningStorage() {
151+
free_data();
152+
}
153+
154+
/// Copies data between storage locations.
155+
/// @param dst_data_ptr Destination data pointer.
156+
/// @param src_data_ptr Source data pointer.
157+
/// @param nbytes Number of bytes to copy.
158+
/// @param src_device Source device.
159+
void copy_(
160+
void* dst_data_ptr,
161+
void* src_data_ptr,
162+
size_t nbytes,
163+
const c10::Device& src_device) {
164+
ET_CHECK_MSG(
165+
dst_data_ptr, "Storage copy failed: dst_data_ptr cannot be nullptr");
166+
ET_CHECK_MSG(
167+
src_data_ptr, "Storage copy failed: src_data_ptr cannot be nullptr");
168+
169+
if (dst_data_ptr == src_data_ptr) {
170+
return;
171+
}
172+
173+
ET_CHECK_MSG(
174+
device_.is_cpu() && src_device.is_cpu(),
175+
"Only CPU-to-CPU copy is currently supported");
176+
177+
DeviceTraits<c10::DeviceType::CPU>::memcpy(
178+
dst_data_ptr, src_data_ptr, nbytes, device_, src_device);
179+
}
180+
181+
/// Creates a clone of this storage on the specified device.
182+
/// @param device Target device for the clone (must be CPU).
183+
/// @return A new MaybeOwningStorage with copied data.
184+
MaybeOwningStorage clone(const c10::Device& device) const {
185+
ET_CHECK_MSG(data_, "Storage clone failed: source data cannot be nullptr");
186+
ET_CHECK_MSG(
187+
device.is_cpu(), "Only CPU device is currently supported for clone");
188+
189+
MaybeOwningStorage cloned_storage(device, capacity_);
190+
191+
DeviceTraits<c10::DeviceType::CPU>::memcpy(
192+
cloned_storage.data_, data_, capacity_, device, device_);
193+
194+
return cloned_storage;
195+
}
196+
197+
/// Returns the data pointer, or nullptr for zero-sized storage.
198+
void* data() const {
199+
if (capacity_ == 0) {
200+
return nullptr;
201+
}
202+
return data_;
203+
}
204+
205+
/// Returns the device this storage is on.
206+
const c10::Device& device() const {
207+
return device_;
208+
}
209+
210+
/// Returns the capacity in bytes.
211+
size_t nbytes() const {
212+
return capacity_;
213+
}
214+
215+
/// Returns true if this storage owns its memory.
216+
bool is_owning() const {
217+
return is_owning_;
218+
}
219+
220+
/// Returns true if the storage can be resized (must be owning).
221+
bool is_resizable() const {
222+
return is_owning_;
223+
}
224+
225+
private:
226+
c10::Device device_ = CPU_DEVICE;
227+
void* data_ = nullptr;
228+
size_t capacity_ = 0;
229+
DeleterFn deleter_ = detail::noop;
230+
bool is_owning_ = false;
231+
232+
/// Frees the data if non-null.
233+
void free_data() {
234+
if (data_ != nullptr) {
235+
deleter_(data_);
236+
data_ = nullptr;
237+
}
238+
}
239+
};
240+
241+
/// Storage is a shared pointer to MaybeOwningStorage.
242+
/// Multiple tensors can share the same underlying storage.
243+
using Storage = SharedPtr<MaybeOwningStorage>;
244+
245+
} // namespace executorch::backends::aoti::slim

backends/aoti/slim/core/TARGETS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
load("targets.bzl", "define_common_targets")
2+
3+
define_common_targets()
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
def define_common_targets():
4+
"""Define targets for SlimTensor core module."""
5+
6+
# Header-only library for Storage
7+
runtime.cxx_library(
8+
name = "storage",
9+
headers = [
10+
"Storage.h",
11+
],
12+
visibility = ["@EXECUTORCH_CLIENTS"],
13+
exported_deps = [
14+
"//executorch/backends/aoti/slim/c10/core:device",
15+
"//executorch/backends/aoti/slim/c10/core:scalar_type",
16+
"//executorch/backends/aoti/slim/util:shared_ptr",
17+
"//executorch/runtime/platform:platform",
18+
],
19+
)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
load("targets.bzl", "define_common_targets")
2+
3+
define_common_targets()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
def define_common_targets():
4+
"""Define test targets for SlimTensor core module."""
5+
6+
runtime.cxx_test(
7+
name = "test_storage_cpu",
8+
srcs = [
9+
"test_storage_cpu.cpp",
10+
],
11+
deps = [
12+
"//executorch/backends/aoti/slim/core:storage",
13+
],
14+
)

0 commit comments

Comments
 (0)