Skip to content
This repository was archived by the owner on Nov 15, 2022. It is now read-only.

Commit 7a07b06

Browse files
cpuhrschfacebook-github-bot
authored andcommitted
20210519(1) nestedtensor import
Reviewed By: parmeet Differential Revision: D28552249 fbshipit-source-id: ce8ad5e3c873b27cc77bf4aba707db74e0f5a7e4
1 parent d91f32c commit 7a07b06

File tree

13 files changed

+291
-73
lines changed

13 files changed

+291
-73
lines changed

nestedtensor/csrc/cuda/padding.cu

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include <cuda_runtime.h>
2+
#include <cuda_fp16.h>
3+
#include <cmath>
4+
#include <nestedtensor/csrc/cuda/attention.h>
5+
#include <stdio.h>
6+
7+
namespace nested_tensor {
8+
namespace cuda {
9+
10+
template<typename T>
11+
__global__
12+
void add_padding(
13+
const T* input,
14+
T* output,
15+
const int* offsets,
16+
const int batch_size,
17+
const int output_stride,
18+
const int inner_size)
19+
{
20+
const int batch_id = blockIdx.x;
21+
for (int i = 0; i < (offsets[batch_id + 1] - offsets[batch_id]) * inner_size; i++) {
22+
output[batch_id * output_stride + i] = input[offsets[batch_id] * inner_size + i];
23+
}
24+
}
25+
26+
template<typename T>
27+
void add_padding_kernelLauncher(
28+
T* input, // [batch_size x None]
29+
T* output, // [batch_size x max(input.nested_size(1))]
30+
const int* offsets, // [batch_size]
31+
const int batch_size,
32+
const int output_stride,
33+
const int inner_size,
34+
const cudaStream_t stream)
35+
{
36+
dim3 grid;
37+
grid.x = batch_size;
38+
39+
add_padding<float><<<grid, 1, 0, stream>>>(
40+
input,
41+
output,
42+
offsets,
43+
batch_size,
44+
output_stride,
45+
inner_size);
46+
}
47+
48+
template void add_padding_kernelLauncher<float>(
49+
float* input,
50+
float* output,
51+
const int* offsets,
52+
const int batch_size,
53+
const int output_stride,
54+
const int inner_size,
55+
const cudaStream_t stream);
56+
}
57+
}

nestedtensor/csrc/cuda/padding.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include <assert.h>
4+
#include <cuda_fp16.h>
5+
#include <cuda_runtime.h>
6+
namespace nested_tensor {
7+
namespace cuda {
8+
9+
template <typename T>
10+
void add_padding_kernelLauncher(
11+
T* input,
12+
T* output,
13+
const int* lengths,
14+
const int batch_size,
15+
const int output_stride,
16+
const int inner_size,
17+
const cudaStream_t stream);
18+
}
19+
} // namespace nested_tensor

nestedtensor/csrc/functions.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,33 @@ Tensor NestedTensor_embedding(
2626
weight,
2727
indices);
2828
}
29+
if (is_nested_tensor_impl(indices) && get_is_contiguous(indices) &&
30+
!is_nested_tensor_impl(weight) && get_dim(indices) == 2 &&
31+
get_nested_dim(indices) == 1) {
32+
Tensor indices_buffer = get_buffer(indices);
33+
Tensor result_buffer = at::embedding(
34+
weight, indices_buffer, padding_idx, scale_grad_by_freq, sparse);
35+
EfficientSizeNode new_nested_size = get_efficient_nested_size(indices);
36+
EfficientSizeNode new_nested_stride = get_efficient_nested_stride(indices);
37+
auto new_nested_size_sizes = new_nested_size.sizes();
38+
auto new_nested_stride_sizes = new_nested_stride.sizes();
39+
auto tmp = torch::empty(
40+
{new_nested_size_sizes.size(0)}, new_nested_size_sizes.options());
41+
tmp.fill_(weight.size(1));
42+
tmp = tmp.reshape({new_nested_size_sizes.size(0), 1});
43+
new_nested_size_sizes = at::cat({new_nested_size_sizes, tmp}, 1);
44+
new_nested_stride_sizes = at::cat({tmp, new_nested_stride_sizes}, 1);
45+
return wrap_buffer(
46+
std::move(result_buffer),
47+
EfficientSizeNode(
48+
new_nested_size.height(),
49+
new_nested_size.structure(),
50+
new_nested_size_sizes),
51+
EfficientSizeNode(
52+
new_nested_stride.height(),
53+
new_nested_stride.structure(),
54+
new_nested_stride_sizes));
55+
}
2956
return map_nested_tensor(
3057
[&](at::Tensor i) {
3158
return at::embedding(

nestedtensor/csrc/masking.cpp

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <nestedtensor/csrc/masking.h>
22
#include <chrono>
3+
#ifdef WITH_CUDA
4+
#include <c10/cuda/CUDAStream.h>
5+
#include <nestedtensor/csrc/cuda/padding.h>
6+
#endif
37

48
using namespace torch::nested_tensor;
59
using namespace at;
@@ -40,7 +44,7 @@ std::tuple<Tensor, Tensor> merge_tensor_mask(
4044
Tensor pad_tensor_to_shape(Tensor t, std::vector<int64_t> goal_shape) {
4145
std::vector<int64_t> padd;
4246
auto tup = t.sizes();
43-
if (get_dim(t) != goal_shape.size()) {
47+
if (get_dim(t) != (int64_t)(goal_shape.size())) {
4448
throw std::runtime_error("dimension doesn't match length of goal shape.");
4549
}
4650
for (int64_t i = tup.size() - 1; i >= 0; i--) {
@@ -182,7 +186,7 @@ c10::optional<Tensor> nt_from_tensor_mask(
182186
}
183187
}
184188
std::vector<TensorNode> inner_tensor_nodes;
185-
for (int64_t i = 0; i < inner_tensors.size(); i++) {
189+
for (size_t i = 0; i < inner_tensors.size(); i++) {
186190
if (inner_tensors[i]) {
187191
TensorNode node = get_nested_tensor_structure(*inner_tensors[i]);
188192
inner_tensor_nodes.push_back(node);
@@ -194,15 +198,68 @@ c10::optional<Tensor> nt_from_tensor_mask(
194198
std::tuple<Tensor, Tensor> to_tensor_mask(
195199
Tensor nt,
196200
c10::optional<int64_t> mask_dim) {
197-
// TODO: Cover if not isinstance(nt, list) and nt.size() == (1,):
198-
// TODO: Move to_tensor_mask entirely into C++
199-
200-
std::vector<int64_t> max_size = get_max_size(nt);
201-
Tensor tensor;
202-
Tensor mask;
203-
std::tie(tensor, mask) = pad_nt(nt, max_size);
204-
std::tie(tensor, mask) = merge_tensor_mask(tensor, mask, mask_dim);
205-
return std::make_tuple(tensor, mask);
201+
TORCH_CHECK(
202+
!mask_dim || *mask_dim <= get_dim(nt),
203+
"Requested mask dimension ",
204+
*mask_dim,
205+
" is bigger than dimension ",
206+
get_dim(nt),
207+
" of given NestedTensor.");
208+
209+
auto opt_sizes = get_opt_sizes(nt);
210+
if (opt_sizes.size() == 1 && *opt_sizes[0] == 1) {
211+
nt = NestedTensor_contiguous(nt);
212+
Tensor nt_buffer = get_buffer(nt);
213+
nt_buffer = nt_buffer.reshape({-1});
214+
Tensor result_mask = !mask_dim || *mask_dim == 0 ? torch::tensor(true)
215+
: torch::tensor({true});
216+
return std::make_tuple(nt_buffer, result_mask);
217+
}
218+
219+
auto max_size = get_max_size(nt);
220+
at::Tensor res_tensor;
221+
at::Tensor res_mask;
222+
std::tie(res_tensor, res_mask) = pad_nt(nt, max_size);
223+
return merge_tensor_mask(res_tensor, res_mask, mask_dim);
224+
}
225+
226+
Tensor to_padded_tensor(Tensor nt, double padding) {
227+
#ifdef WITH_CUDA
228+
if (get_dim(nt) == 3) {
229+
auto nt_opt_size = get_opt_sizes(nt);
230+
if (nt_opt_size[2]) {
231+
Tensor nt_buffer = get_buffer(nt);
232+
Tensor nt_sizes_ =
233+
get_efficient_nested_size(nt).sizes().to(torch::kInt32);
234+
TORCH_CHECK(nt_sizes_.dim() == 2, "NestedTensor must be of nested_dim 2.")
235+
Tensor nt_sizes = at::native::narrow(nt_sizes_, 1, 0, 1);
236+
int max_size_1 = nt_sizes.max().item<int>();
237+
nt_sizes =
238+
at::native::cumsum(nt_sizes, 0).to(torch::kInt32).reshape({-1});
239+
nt_sizes = at::cat({torch::tensor({0}, torch::kInt32), nt_sizes});
240+
Tensor output = torch::empty(
241+
{*nt_opt_size[0], max_size_1, *nt_opt_size[2]}, nt_buffer.options());
242+
output.fill_(padding);
243+
nt_sizes = nt_sizes.to(torch::kCUDA);
244+
at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream();
245+
nested_tensor::cuda::add_padding_kernelLauncher(
246+
nt_buffer.data_ptr<float>(),
247+
output.data_ptr<float>(),
248+
nt_sizes.data_ptr<int>(),
249+
*nt_opt_size[0],
250+
output.stride(0),
251+
*nt_opt_size[2],
252+
defaultStream);
253+
return output;
254+
}
255+
}
256+
#endif
257+
at::Tensor tensor;
258+
at::Tensor mask;
259+
std::tie(tensor, mask) = to_tensor_mask(nt, get_dim(nt));
260+
mask = mask.to(torch::kBool);
261+
tensor.masked_fill_(at::logical_not(mask), padding);
262+
return tensor;
206263
}
207264

208265
TORCH_LIBRARY_FRAGMENT(nestedtensor, m) {
@@ -219,4 +276,10 @@ TORCH_LIBRARY_FRAGMENT(nestedtensor, m) {
219276

220277
m.def("get_max_size(Tensor nt) -> int[]");
221278
m.impl("get_max_size", NestedTensorKey, TORCH_FN(get_max_size));
279+
280+
m.def("to_tensor_mask(Tensor nt, int? mask_dim) -> (Tensor, Tensor)");
281+
m.impl("to_tensor_mask", NestedTensorKey, to_tensor_mask);
282+
283+
m.def("to_padded_tensor(Tensor nt, float padding) -> Tensor");
284+
m.impl("to_padded_tensor", NestedTensorKey, to_padded_tensor);
222285
}

nestedtensor/csrc/nested_tensor_impl.h

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,13 @@ inline const std::vector<c10::optional<int64_t>> get_opt_sizes(
173173
return get_nested_tensor_impl(tensor)->opt_sizes();
174174
}
175175

176-
inline const EfficientSizeNode get_efficient_nested_size(
177-
at::Tensor tensor) {
176+
inline const EfficientSizeNode get_efficient_nested_size(at::Tensor tensor) {
178177
TORCH_CHECK(
179178
is_nested_tensor_impl(tensor), "Given tensor must be NestedTensor.");
180179
return get_nested_tensor_impl(tensor)->get_storage()->nested_size();
181180
}
182181

183-
inline const EfficientSizeNode get_efficient_nested_stride(
184-
at::Tensor tensor) {
182+
inline const EfficientSizeNode get_efficient_nested_stride(at::Tensor tensor) {
185183
TORCH_CHECK(
186184
is_nested_tensor_impl(tensor), "Given tensor must be NestedTensor.");
187185
return get_nested_tensor_impl(tensor)->get_storage()->nested_stride();
@@ -285,6 +283,31 @@ inline bool is_tensor_shape(const at::Tensor tensor) {
285283

286284
Tensor NestedTensor_to_tensor(Tensor tensor, c10::optional<int64_t> dim_);
287285

286+
inline Tensor NestedTensor_to_sparse_csr(Tensor tensor) {
287+
TORCH_CHECK(
288+
get_dim(tensor) == 2,
289+
"Given tensor must be of dimension 2, got dimension ",
290+
get_dim(tensor));
291+
Tensor values;
292+
if (get_is_contiguous(tensor)) {
293+
values = get_buffer(tensor).reshape({-1});
294+
} else {
295+
values = at::cat(flatten(get_nested_tensor_structure(tensor)));
296+
}
297+
auto tensor_sizes = get_efficient_nested_size(tensor).sizes();
298+
tensor_sizes = tensor_sizes.reshape({-1});
299+
int64_t* tensor_sizes_ptr = tensor_sizes.data_ptr<int64_t>();
300+
at::Tensor crow_indices =
301+
at::cat({torch::tensor({0}), at::cumsum(tensor_sizes, 0)});
302+
std::vector<at::Tensor> col_indices_;
303+
for (int64_t i = 0; i < tensor_sizes.size(0); i++) {
304+
col_indices_.push_back(torch::arange({tensor_sizes_ptr[i]}));
305+
}
306+
at::Tensor col_indices = at::cat(col_indices_);
307+
return at::native::sparse_csr_tensor(crow_indices, col_indices, values,
308+
c10::nullopt, torch::kSparseCsr);
309+
}
310+
288311
inline std::ostream& operator<<(
289312
std::ostream& out,
290313
const NestedTensorImpl& batch_tensor) {

nestedtensor/csrc/py_init.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,18 +179,29 @@ TORCH_LIBRARY(nestedtensor, m) {
179179
m.impl("get_dim", NestedTensorKey, [](Tensor self) { return get_dim(self); });
180180

181181
m.def("get_numel(Tensor self) -> int");
182-
m.impl("get_numel", NestedTensorKey, [](Tensor self) { return get_numel(self); });
182+
m.impl("get_numel", NestedTensorKey, [](Tensor self) {
183+
return get_numel(self);
184+
});
183185

184186
m.def("get_is_contiguous(Tensor self) -> int");
185-
m.impl("get_is_contiguous", NestedTensorKey, [](Tensor self) { return get_is_contiguous(self); });
187+
m.impl("get_is_contiguous", NestedTensorKey, [](Tensor self) {
188+
return get_is_contiguous(self);
189+
});
186190

187191
m.def("make_contiguous(Tensor self) -> Tensor");
188-
m.impl("make_contiguous", NestedTensorKey, [](Tensor self) { return NestedTensor_contiguous(self); });
192+
m.impl("make_contiguous", NestedTensorKey, [](Tensor self) {
193+
return NestedTensor_contiguous(self);
194+
});
189195

190196
m.def("to_tensor_list(Tensor tensor) -> Tensor[]");
191197
m.impl("to_tensor_list", NestedTensorKey, [](Tensor tensor) {
192198
return flatten_nested_tensor(tensor);
193199
});
200+
201+
m.def("to_sparse_csr(Tensor tensor) -> Tensor");
202+
m.impl("to_sparse_csr", NestedTensorKey, [](Tensor tensor) {
203+
return NestedTensor_to_sparse_csr(tensor);
204+
});
194205
}
195206

196207
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

nestedtensor/csrc/storage/Packed.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ inline std::tuple<TensorNode, at::Tensor> build_structure(
99
const at::Tensor& buffer,
1010
const SizeNode& nested_size,
1111
const SizeNode& nested_stride) {
12+
TORCH_CHECK(
13+
buffer.dim() == 1, "Given buffer must be vector, i.e. dim 1 Tensor.");
1214
std::vector<int64_t> split_sizes = flatten(
1315
map([](std::vector<int64_t> a,
1416
std::vector<int64_t> b) { return num_memory(a, b); },
@@ -121,7 +123,7 @@ struct PackedStorage : public NestedTensorStorage {
121123
}
122124
TensorNode get_structure() const override {
123125
return std::get<0>(impl::build_structure(
124-
_buffer, _nested_size.to_size_node(), _nested_stride.to_size_node()));
126+
_buffer.reshape({-1}), _nested_size.to_size_node(), _nested_stride.to_size_node()));
125127
}
126128
at::Tensor& get_buffer() {
127129
return _buffer;

nestedtensor/nested/masking.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -48,42 +48,3 @@ def nt_from_tensor_mask(tensor, mask, nested_dim):
4848
tensor, mask, nested_dim)
4949
assert result is not None
5050
return nestedtensor.NestedTensor(result).contiguous()
51-
52-
53-
def get_tensor_mask(nt, shape):
54-
return torch.ops.nestedtensor.pad_nt(nt, shape)
55-
56-
57-
# Return a tuple of a tensor and a mask that represent the given tensor list
58-
# Returned tensor is always the same no matter what mask_dim was passed.
59-
# If mask_dim was not passed, a mask with the smallest dimensionality would be returned.
60-
# if passed mask_dim is lower than the minimal dimensionality of the mask that can represent
61-
# the data tensor, an error is thrown.
62-
def to_tensor_mask(nt, mask_dim):
63-
if mask_dim is not None and mask_dim > nt.dim():
64-
raise RuntimeError(
65-
"Mask dimension is bigger than nested dimension of a nested tensor.")
66-
67-
# Check if scalar was passed
68-
if not isinstance(nt, list) and nt.size() == (1,):
69-
res_scalar = torch.tensor(
70-
[nt[0].item()], dtype=nt.dtype, device=nt.device, requires_grad=nt.requires_grad)
71-
mask = torch.tensor(
72-
True) if mask_dim == 0 or mask_dim is None else torch.tensor([True])
73-
return res_scalar, mask
74-
75-
max_size = torch.ops.nestedtensor.get_max_size(nt)
76-
res_tensor, res_mask = get_tensor_mask(nt, max_size)
77-
tensor_mask_tuple = merge_tensor_mask(
78-
TensorMask(res_tensor, res_mask), mask_dim)
79-
80-
return tensor_mask_tuple.tensor, tensor_mask_tuple.mask
81-
82-
83-
# Merge mask to a given dimension if possible.
84-
def merge_tensor_mask(tensor_mask, mask_dim):
85-
tensor = tensor_mask.tensor
86-
mask = tensor_mask.mask
87-
tensor, mask = torch.ops.nestedtensor.merge_tensor_mask(
88-
tensor, mask, mask_dim)
89-
return TensorMask(tensor=tensor, mask=mask)

0 commit comments

Comments
 (0)