From 8f92bfc94cd72fe0374ab3be197d7364342bd912 Mon Sep 17 00:00:00 2001 From: zhangqiu Date: Thu, 8 Aug 2024 11:13:36 +0800 Subject: [PATCH 1/5] fix index_put --- impl/ascend/aclnn/adaptor.hpp | 12 +- impl/ascend/ascend_tensor.cpp | 104 ++++++++- impl/ascend/ascend_tensor.hpp | 6 +- impl/ascend/device_configs.py | 5 + impl/ascend/functions/index.cpp | 320 ++++++++++++++++++++++++++++ impl/ascend/functions/index_put.cpp | 291 ++++++++++++++++++++++++- impl/ascend_npu/CMakeLists.txt | 1 + impl/ascend_npu/ascend_config.yaml | 4 +- 8 files changed, 733 insertions(+), 10 deletions(-) create mode 100644 impl/ascend/functions/index.cpp diff --git a/impl/ascend/aclnn/adaptor.hpp b/impl/ascend/aclnn/adaptor.hpp index 117423c78..f4881be30 100644 --- a/impl/ascend/aclnn/adaptor.hpp +++ b/impl/ascend/aclnn/adaptor.hpp @@ -149,6 +149,10 @@ struct IsBoolStdArray> : std::true_type {}; inline aclIntArray* createAclIntArrayFromIntVector(const std::vector& vec) { return ::aclCreateIntArray(vec.data(), vec.size()); } +inline aclTensorList* createAclTensorListFromAclTensorVector(const std::vector& tensorsVec) { + return ::aclCreateTensorList(tensorsVec.data(), tensorsVec.size()); +} + inline aclTensorList* createAclTensorListFromAscendTensorVector(const std::vector& tensorsVec) { std::vector tList(tensorsVec.size()); for (size_t i = 0; i < tensorsVec.size(); i++) { @@ -175,7 +179,11 @@ inline aclTensorList* createAclTensorListFromConstDiopiTensorVector(const std::v template >> decltype(auto) convertType(T&& param) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { + return std::forward(param); + } else if constexpr (std::is_same_v>) { + return createAclTensorListFromAclTensorVector(std::forward(param)); + } else if constexpr (std::is_same_v) { return createAclTensorFromAscendTensor(std::forward(param)); } else if constexpr (std::is_same_v || std::is_same_v) { return createAclTensorFromDiopiTensor(std::forward(param)); @@ -385,4 +393,4 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, convertedParams.params()) \ } while (false); -#endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ +#endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ \ No newline at end of file diff --git a/impl/ascend/ascend_tensor.cpp b/impl/ascend/ascend_tensor.cpp index e966bc5f4..d71d5b941 100644 --- a/impl/ascend/ascend_tensor.cpp +++ b/impl/ascend/ascend_tensor.cpp @@ -6,9 +6,11 @@ #include "ascend_tensor.hpp" +// #include #include #include #include +#include #include #include "common/debug.hpp" @@ -82,6 +84,106 @@ AscendTensor& AscendTensor::asStrided(const std::vector& shape, const s return *this; } +AscendTensor& AscendTensor::permute(std::vector dims) { + ASCEND_CHECK_ABORT(this->dim() == dims.size(), "permute dims does not match the tensor dims."); + + std::vector newShape(dims.size(), 0); + std::vector newStride(dims.size(), 0); + + for (size_t i = 0; i < dims.size(); i++) { + newShape[i] = this->shape(dims[i]); + newStride[i] = this->stride(dims[i]); + } + + this->shape_ = newShape; + this->stride_ = newStride; + + return *this; +} + +AscendTensor& AscendTensor::expand(std::vector shape) { + ASCEND_CHECK_ABORT(shape.size() >= this->dim(), + "the number of sizes provided[% ld] must be greater or eaqual to the number of dimensions of the tensor[% ld].", + shape.size(), + this->dim()); + + // todo: dim() == 0 + int64_t expandDims = shape.size() - this->shape().size(); + std::vector tShapeExp(expandDims, 0); + auto tShape = this->shape(); + tShapeExp.insert(tShapeExp.end(), tShape.begin(), tShape.end()); + std::vector newShape = shape; + + for (int64_t i = 0; i < newShape.size(); i++) { + if (newShape[i] < 0 && i < expandDims) { + ASCEND_CHECK_ABORT(false, "The expanded size of the tensor (%ld) isn't allowed in a leading, non-existing dimension %ld", newShape[i], i); + } + + if (i >= expandDims) { + if (newShape[i] == -1) { + newShape[i] = tShapeExp[i]; + } else { + ASCEND_CHECK_ABORT(tShapeExp[i] == 1 || newShape[i] == tShapeExp[i], + "The expanded size of the tensor (%ld) must match the existing size (%ld) at non-singleton dimension %ld.", + newShape[i], + tShapeExp[i], + i); + } + } + } + + int64_t numElem = std::accumulate(newShape.begin(), newShape.end(), 1, std::multiplies<>()); + std::vector newStride(expandDims, 0); + auto tStride = this->stride(); + newStride.insert(newStride.end(), tStride.begin(), tStride.end()); + for (int64_t i = expandDims; i < shape.size(); i++) { + if (shape[i] == -1 || shape[i] == tShapeExp[i]) { + continue; + } else { + newStride[i] = 0; + } + } + + this->numel_ = numElem; + this->shape_ = newShape; + this->stride_ = newStride; + + return *this; +} + +AscendTensor& AscendTensor::resize(const std::vector& shape) { + int64_t numElem = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()); + std::vector stride(shape.size(), 1); + for (int64_t j = shape.size() - 2; j >= 0; j--) { + stride[j] = stride[j + 1] * shape[j + 1]; + } + + this->numel_ = numElem; + this->shape_ = shape; + this->stride_ = stride; + + return *this; +} +AscendTensor& AscendTensor::select(int64_t dim, int64_t index) { + auto shape = this->shape(); + auto stride = this->stride(); + + ASCEND_CHECK_ABORT(dim >= 0 && dim < shape.size(), "selected dim [%ld] execeed the tensor dims [%ld].", dim, shape.size()); + + if (dim < shape.size() - 1) { + int64_t offset = dim * shape[dim] * stride[dim]; + this->storageOffset_ = offset; + } + this->numel_ /= shape[dim]; + + shape.erase(shape.begin() + dim); + stride.erase(stride.begin() + dim); + this->shape_ = shape; + this->stride_ = stride; + + return *this; +} + AscendTensor& AscendTensor::unsqueeze(int dim) { // Note: `channels_last` tensor uses this will become uncontiguous // which is same with pytorch @@ -240,4 +342,4 @@ aclFormat inferAclDataFormat(int64_t dim, const int64_t* shape, const int64_t* s return ACL_FORMAT_ND; } } // namespace ascend -} // namespace impl +} // namespace impl \ No newline at end of file diff --git a/impl/ascend/ascend_tensor.hpp b/impl/ascend/ascend_tensor.hpp index 5c20faab4..20b29b6f5 100644 --- a/impl/ascend/ascend_tensor.hpp +++ b/impl/ascend/ascend_tensor.hpp @@ -245,6 +245,10 @@ class AscendTensor final { AscendTensor& asStrided(const std::vector& shape, const std::vector& stride); AscendTensor& unsqueeze(int dim); AscendTensor& view(const std::vector& shape); + AscendTensor& resize(const std::vector& shape); + AscendTensor& select(int64_t dim, int64_t index); + AscendTensor& permute(std::vector dims); + AscendTensor& expand(std::vector shape); private: // diopi origin tensor @@ -262,4 +266,4 @@ class AscendTensor final { } // namespace ascend } // namespace impl -#endif // IMPL_ASCEND_ASCEND_TENSOR_HPP_ +#endif // IMPL_ASCEND_ASCEND_TENSOR_HPP_ \ No newline at end of file diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index 1377c420e..9df743ff6 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -1177,6 +1177,11 @@ skip_all=True ), + 'index_put_acc_bool_indices_zeros': dict( + name=['index_put'], + skip_all=True + ), + # TODO(zhangqiu) Due to a bug in the software stack, this test will be skipped for now. 'embedding': dict( name=['embedding'], diff --git a/impl/ascend/functions/index.cpp b/impl/ascend/functions/index.cpp new file mode 100644 index 000000000..b9cf3c81b --- /dev/null +++ b/impl/ascend/functions/index.cpp @@ -0,0 +1,320 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2024, DeepLink. + */ + +#include + +#include "../aclnn/acl_scalar.hpp" +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { + +static std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { + std::vector result; + for (auto& t : indices) { + if (!t.defined()) { + result.emplace_back(nullptr); + continue; + } + if (t.dtype() == diopi_dtype_int32) { + diopiTensorHandle_t indexHandle = nullptr; + auto shape = t.shape(); + diopiSize_t size = vectorToDiopiSize(shape); + diopiRequireTensor(ctx, &indexHandle, &size, nullptr, diopi_dtype_int64, diopi_device); + DIOPI_ASCEND_CALL_ACLNN(aclnnCast, ctx, t, diopi_dtype_int64, indexHandle); + result.emplace_back(indexHandle); + } else { + if (t.device() == diopi_host) { + result.emplace_back(hostToDevice(ctx, t.tensorHandle())); + } else { + result.emplace_back(t); + } + } + } + return result; +} + +static void checkIndexTensorTypes(const std::vector& indices) { + for (const auto& t : indices) { + if (t.defined()) { + diopiDtype_t type = t.dtype(); + ASCEND_CHECK_ABORT(type == diopi_dtype_int64 || type == diopi_dtype_bool || type == diopi_dtype_uint8, + "tensors used as indices must be long, byte or bool tensors"); + } + } +} + +static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { + int64_t numELem = self.numel() * self.dim(); + std::vector nShape{self.numel(), self.dim()}; + std::vector nStride(nShape.size(), 1); + for (int64_t i = nShape.size() - 2; i >= 0; i--) { + nStride[i] = nStride[i + 1] * nShape[i + 1]; + } + + diopiTensorHandle_t nzBuff = nullptr; + diopiSize_t nzBuffSize = vectorToDiopiSize(nShape); + diopiRequireTensor(ctx, &nzBuff, &nzBuffSize, nullptr, diopi_dtype_int64, diopi_device); + AscendTensor nzTensor(nzBuff); + + auto aclNZTensor = ::aclCreateTensor( + nShape.data(), nShape.size(), aclDataType::ACL_INT64, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &numELem, 1, const_cast(nzTensor.data())); + DIOPI_ASCEND_CALL_ACLNN(aclnnNonzero, ctx, self, aclNZTensor); + + int64_t* vDims = nullptr; + uint64_t vDimsNum = 0; + auto ret = aclGetViewShape(aclNZTensor, &vDims, &vDimsNum); + ASCEND_CHECK_ABORT(ret == 0, "NonZero aclGetViewShape failed."); + + std::vector nzShape(vDims, vDims + vDimsNum); + nzTensor = nzTensor.resize(nzShape); + + delete vDims; + vDims = nullptr; + + diopiTensorHandle_t nzTrans = nullptr; + std::vector nzTransShape{nzShape[1], nzShape[0]}; + diopiSize_t nzTransSize = vectorToDiopiSize(nzTransShape); + diopiRequireTensor(ctx, &nzTrans, &nzTransSize, nullptr, diopi_dtype_int64, diopi_device); + std::vector transDims{1, 0}; + diopiSize_t permuteDims = vectorToDiopiSize(transDims); + DIOPI_ASCEND_CALL_ACLNN(aclnnPermute, ctx, nzTensor, permuteDims, nzTrans); + + return AscendTensor(nzTrans); +} + +static std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { + std::vector result; + for (auto& t : indices) { + if (!t.defined()) { + result.push_back(t); + } else { + if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) { + ASCEND_CHECK(t.dtype() == diopi_dtype_uint8, + "indexing with dtype torch.uint8 is now deprecated," + " please use a dtype torch.bool instead."); + for (uint64_t j = 0; j < static_cast(t.dim()); j++) { + uint64_t srcIdx = result.size() + j; + ASCEND_CHECK_ABORT(t.shape(j) == self.shape(srcIdx), + "The shape of the mask %ld at index %ld does not match the shape of the indexed tensor %ld at index %ld", + t.dim(), + j, + self.dim(), + srcIdx); + } + AscendTensor non = nonZeroTensor(ctx, t); + for (int64_t j = 0; j < t.dim(); j++) { + result.push_back(non.select(0, j)); + } + } else { + result.push_back(t); + } + } + } + return result; +} + +static aclTensor* createEmptyAclTensor() { + std::vector nShape{0}; + std::vector nStride{1}; + int64_t storageSize = 0; + void* storage = nullptr; + + return ::aclCreateTensor(nShape.data(), nShape.size(), aclDataType::ACL_FLOAT16, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &storageSize, 0, storage); +} + +static std::vector indicesExpandedOutplace(std::vector indices) { + bool first = true; + std::vector sizes; + + for (auto& idx : indices) { + if (!idx.defined()) { + continue; + } else if (first) { + sizes = idx.shape(); + first = false; + } else { + sizes = inferSize(sizes, idx.shape()); + } + } + + std::vector result; + for (auto& idx : indices) { + if (!idx.defined() || (idx.shape() == sizes)) { + result.push_back(idx); + } else { + result.push_back(idx.expand(sizes)); + } + } + return result; +} + +static bool hasContiguousSubspace(std::vector indices) { // true if all the non-null tensors are adjacent + auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); }; + auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); }; + auto start = std::find_if(indices.begin(), indices.end(), isDefined); + auto stop = std::find_if(indices.rbegin(), indices.rend(), isDefined); + auto it = std::find_if(start, stop.base(), isNull); + return it == stop.base(); +} + +static std::tuple> transposeToFront(AscendTensor self, std::vector indices) { + std::vector dims; + std::vector transposedIndices; + + dims.reserve(self.dim()); + for (int64_t i = 0; i < self.dim(); i++) { + if (indices[i].defined()) { + dims.push_back(i); + transposedIndices.push_back(indices[i]); + } + } + + for (int64_t i = 0; i < self.dim(); i++) { + if (!indices[i].defined()) { + dims.push_back(i); + transposedIndices.push_back(indices[i]); + } + } + + return std::make_tuple(self.permute(dims), transposedIndices); +} + +static std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { + std::vector indexShape; + for (auto& idx : endIndices) { + if (idx.defined()) { + std::vector shape; + shape.insert(shape.end(), dimsBefore, 1); + shape.insert(shape.end(), idx.shape().begin(), idx.shape().end()); + shape.insert(shape.end(), dimsAfter, 1); + if (indexShape.empty()) { + indexShape = shape; + } else { + indexShape = inferSize(indexShape, shape); + } + } + } + return indexShape; +} + +static std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { + std::vector midIndices = indicesExpandedOutplace(indices); + while (midIndices.size() < (size_t)self.dim()) { + midIndices.emplace_back(nullptr); + } + + AscendTensor src = self; + std::vector endIndices = midIndices; + if (!hasContiguousSubspace(midIndices)) { + endIndices.clear(); + std::tie(src, endIndices) = transposeToFront(self, midIndices); + } + + int64_t dimsBefore = 0; + int64_t dimsAfter = 0; + int64_t dimsIndexed = 0; + + std::vector replaceShape; + std::vector indexedSizes; + + for (size_t dim = 0; dim < endIndices.size(); dim++) { + if (!endIndices[dim].defined()) { + if (dimsIndexed == 0) { + dimsBefore++; + } else { + dimsAfter++; + } + } else { + dimsIndexed++; + replaceShape = endIndices[dim].shape(); + indexedSizes.push_back(src.shape(dim)); + } + } + + if (std::find(indexedSizes.begin(), indexedSizes.end(), 0) != indexedSizes.end() && + std::find(replaceShape.begin(), replaceShape.end(), 0) == replaceShape.end()) { + ASCEND_CHECK_ABORT(false, "index is out of bounds for dimension with size 0"); + } + + auto selfShape = src.shape(); + int64_t end = dimsBefore + dimsIndexed; + selfShape.erase(selfShape.begin() + dimsBefore, selfShape.begin() + end); + selfShape.insert(selfShape.begin() + dimsBefore, replaceShape.begin(), replaceShape.end()); + + std::vector indexShape = indexReshape(endIndices, dimsBefore, dimsAfter); + std::vector outputSize = indexShape; + if (indexShape != selfShape) { + outputSize = inferSize(indexShape, selfShape); + } + + return outputSize; +} + +diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t* indices, int64_t nums) { + AscendTensor inputAt(input); + std::vector indicesOrigin(nums); + for (int64_t i = 0; i < nums; i++) { + if (indices[i] != nullptr) { + indicesOrigin[i] = AscendTensor(indices[i]); + } + } + + std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); + checkIndexTensorTypes(indicesList); + + auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + + std::vector allDefinedIndices; + auto emptyTensor = createEmptyAclTensor(); + for (const auto& idx : indicesExpanded) { + if (idx.defined()) { + allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); + } else { + allDefinedIndices.push_back(emptyTensor); + } + } + + std::vector outShape = indexOutputSize(inputAt, indicesExpanded); + + diopiSize_t outSize = vectorToDiopiSize(outShape); + diopiRequireTensor(ctx, out, &outSize, nullptr, inputAt.dtype(), diopi_device); + + DIOPI_ASCEND_CALL_ACLNN(aclnnIndex, ctx, inputAt, allDefinedIndices, *out); + return diopiSuccess; +} + +diopiError_t diopiIndexBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiTensorHandle_t zerosLikeInput, diopiConstTensorHandle_t* indices, + int64_t nums, diopiConstTensorHandle_t gradOutput) { + AscendTensor gradInputTensor(gradInput); + AscendTensor gradOutputTensor(gradOutput); + if (gradInputTensor.numel() == 0 || gradOutputTensor.numel() == 0) { + return diopiSuccess; + } + + std::vector indicesVec; + indicesVec.reserve(nums); + + for (int i = 0; i < nums; i++) { + if (indices[i] != nullptr) { + indicesVec.emplace_back(indices[i]); + } else { + int64_t array[1] = {0}; + diopiSize_t size = {array, 1}; + diopiTensorHandle_t emptyTensor = nullptr; + diopiRequireTensor(ctx, &emptyTensor, &size, nullptr, gradOutputTensor.dtype(), diopi_device); + indicesVec.emplace_back(emptyTensor); + } + } + + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, gradInput, zerosLikeInput); + DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, gradInput, indicesVec, gradOutput, true, false); + + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl \ No newline at end of file diff --git a/impl/ascend/functions/index_put.cpp b/impl/ascend/functions/index_put.cpp index 3b01d6cfd..d3d0565c9 100755 --- a/impl/ascend/functions/index_put.cpp +++ b/impl/ascend/functions/index_put.cpp @@ -9,18 +9,301 @@ namespace impl { namespace ascend { + +static std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { + std::vector result; + for (auto& t : indices) { + if (!t.defined()) { + result.emplace_back(nullptr); + continue; + } + if (t.dtype() == diopi_dtype_int32) { + diopiTensorHandle_t indexHandle = nullptr; + auto shape = t.shape(); + diopiSize_t size = vectorToDiopiSize(shape); + diopiRequireTensor(ctx, &indexHandle, &size, nullptr, diopi_dtype_int64, diopi_device); + DIOPI_ASCEND_CALL_ACLNN(aclnnCast, ctx, t, diopi_dtype_int64, indexHandle); + result.emplace_back(indexHandle); + } else { + if (t.device() == diopi_host) { + result.emplace_back(hostToDevice(ctx, t.tensorHandle())); + } else { + result.emplace_back(t); + } + } + } + return result; +} + +static void checkIndexTensorTypes(const std::vector& indices) { + for (const auto& t : indices) { + if (t.defined()) { + diopiDtype_t type = t.dtype(); + ASCEND_CHECK_ABORT(type == diopi_dtype_int64 || type == diopi_dtype_bool || type == diopi_dtype_uint8, + "tensors used as indices must be long, byte or bool tensors"); + } + } +} + +static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { + int64_t numELem = self.numel() * self.dim(); + std::vector nShape{self.numel(), self.dim()}; + std::vector nStride(nShape.size(), 1); + for (int64_t i = nShape.size() - 2; i >= 0; i--) { + nStride[i] = nStride[i + 1] * nShape[i + 1]; + } + + diopiTensorHandle_t nzBuff = nullptr; + diopiSize_t nzBuffSize = vectorToDiopiSize(nShape); + diopiRequireTensor(ctx, &nzBuff, &nzBuffSize, nullptr, diopi_dtype_int64, diopi_device); + AscendTensor nzTensor(nzBuff); + + auto aclNZTensor = ::aclCreateTensor( + nShape.data(), nShape.size(), aclDataType::ACL_INT64, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &numELem, 1, const_cast(nzTensor.data())); + DIOPI_ASCEND_CALL_ACLNN(aclnnNonzero, ctx, self, aclNZTensor); + + int64_t* vDims = nullptr; + uint64_t vDimsNum = 0; + auto ret = aclGetViewShape(aclNZTensor, &vDims, &vDimsNum); + ASCEND_CHECK_ABORT(ret == 0, "NonZero aclGetViewShape failed."); + + std::vector nzShape(vDims, vDims + vDimsNum); + nzTensor = nzTensor.resize(nzShape); + + delete vDims; + vDims = nullptr; + + diopiTensorHandle_t nzTrans = nullptr; + std::vector nzTransShape{nzShape[1], nzShape[0]}; + diopiSize_t nzTransSize = vectorToDiopiSize(nzTransShape); + diopiRequireTensor(ctx, &nzTrans, &nzTransSize, nullptr, diopi_dtype_int64, diopi_device); + std::vector transDims{1, 0}; + diopiSize_t permuteDims = vectorToDiopiSize(transDims); + DIOPI_ASCEND_CALL_ACLNN(aclnnPermute, ctx, nzTensor, permuteDims, nzTrans); + + return AscendTensor(nzTrans); +} + +static std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { + std::vector result; + for (auto& t : indices) { + if (!t.defined()) { + result.push_back(t); + } else { + if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) { + ASCEND_CHECK(t.dtype() == diopi_dtype_uint8, + "indexing with dtype torch.uint8 is now deprecated," + " please use a dtype torch.bool instead."); + for (uint64_t j = 0; j < static_cast(t.dim()); j++) { + uint64_t srcIdx = result.size() + j; + ASCEND_CHECK_ABORT(t.shape(j) == self.shape(srcIdx), + "The shape of the mask %ld at index %ld does not match the shape of the indexed tensor %ld at index %ld", + t.dim(), + j, + self.dim(), + srcIdx); + } + AscendTensor non = nonZeroTensor(ctx, t); + for (int64_t j = 0; j < t.dim(); j++) { + result.push_back(non.select(0, j)); + } + } else { + result.push_back(t); + } + } + } + return result; +} + + +static aclTensor* createEmptyAclTensor() { + std::vector nShape{0}; + std::vector nStride{1}; + int64_t storageSize = 0; + void* storage = nullptr; + + return ::aclCreateTensor(nShape.data(), nShape.size(), aclDataType::ACL_FLOAT16, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &storageSize, 0, storage); +} + +static std::vector indicesExpandedOutplace(std::vector indices) { + bool first = true; + std::vector sizes; + + for (auto& idx : indices) { + if (!idx.defined()) { + continue; + } else if (first) { + sizes = idx.shape(); + first = false; + } else { + sizes = inferSize(sizes, idx.shape()); + } + } + + std::vector result; + for (auto& idx : indices) { + if (!idx.defined() || (idx.shape() == sizes)) { + result.push_back(idx); + } else { + result.push_back(idx.expand(sizes)); + } + } + return result; +} + +static bool hasContiguousSubspace(std::vector indices) { // true if all the non-null tensors are adjacent + auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); }; + auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); }; + auto start = std::find_if(indices.begin(), indices.end(), isDefined); + auto stop = std::find_if(indices.rbegin(), indices.rend(), isDefined); + auto it = std::find_if(start, stop.base(), isNull); + return it == stop.base(); +} + +static std::tuple> transposeToFront(AscendTensor self, std::vector indices) { + std::vector dims; + std::vector transposedIndices; + + dims.reserve(self.dim()); + for (int64_t i = 0; i < self.dim(); i++) { + if (indices[i].defined()) { + dims.push_back(i); + transposedIndices.push_back(indices[i]); + } + } + + for (int64_t i = 0; i < self.dim(); i++) { + if (!indices[i].defined()) { + dims.push_back(i); + transposedIndices.push_back(indices[i]); + } + } + + return std::make_tuple(self.permute(dims), transposedIndices); +} + +static std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { + std::vector indexShape; + for (auto& idx : endIndices) { + if (idx.defined()) { + std::vector shape; + shape.insert(shape.end(), dimsBefore, 1); + shape.insert(shape.end(), idx.shape().begin(), idx.shape().end()); + shape.insert(shape.end(), dimsAfter, 1); + if (indexShape.empty()) { + indexShape = shape; + } else { + indexShape = inferSize(indexShape, shape); + } + } + } + return indexShape; +} + +static std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { + std::vector midIndices = indicesExpandedOutplace(indices); + while (midIndices.size() < (size_t)self.dim()) { + midIndices.emplace_back(nullptr); + } + + AscendTensor src = self; + std::vector endIndices = midIndices; + if (!hasContiguousSubspace(midIndices)) { + endIndices.clear(); + std::tie(src, endIndices) = transposeToFront(self, midIndices); + } + + int64_t dimsBefore = 0; + int64_t dimsAfter = 0; + int64_t dimsIndexed = 0; + + std::vector replaceShape; + std::vector indexedSizes; + + for (size_t dim = 0; dim < endIndices.size(); dim++) { + if (!endIndices[dim].defined()) { + if (dimsIndexed == 0) { + dimsBefore++; + } else { + dimsAfter++; + } + } else { + dimsIndexed++; + replaceShape = endIndices[dim].shape(); + indexedSizes.push_back(src.shape(dim)); + } + } + + if (std::find(indexedSizes.begin(), indexedSizes.end(), 0) != indexedSizes.end() && + std::find(replaceShape.begin(), replaceShape.end(), 0) == replaceShape.end()) { + ASCEND_CHECK_ABORT(false, "index is out of bounds for dimension with size 0"); + } + + auto selfShape = src.shape(); + int64_t end = dimsBefore + dimsIndexed; + selfShape.erase(selfShape.begin() + dimsBefore, selfShape.begin() + end); + selfShape.insert(selfShape.begin() + dimsBefore, replaceShape.begin(), replaceShape.end()); + + std::vector indexShape = indexReshape(endIndices, dimsBefore, dimsAfter); + std::vector outputSize = indexShape; + if (indexShape != selfShape) { + outputSize = inferSize(indexShape, selfShape); + } + + return outputSize; +} + diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { diopiCopyInp(ctx, input, out); - std::vector indicesVec(indices, indices + indicesCounts); - DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, indicesVec, values, accumulate, false); + AscendTensor inputAt(input); + std::vector indicesOrigin(indicesCounts); + for (int64_t i = 0; i < indicesCounts; i++) { + if (indices[i] != nullptr) { + indicesOrigin[i] = AscendTensor(indices[i]); + } + } + std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); + checkIndexTensorTypes(indicesList); + auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + std::vector allDefinedIndices; + auto emptyTensor = createEmptyAclTensor(); + for (const auto& idx : indicesExpanded) { + if (idx.defined()) { + allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); + } else { + allDefinedIndices.push_back(emptyTensor); + } + } + + DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, allDefinedIndices, values, accumulate, false); return diopiSuccess; + } diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { - std::vector indicesVec(indices, indices + indicesCounts); - DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, indicesVec, values, accumulate, false); + AscendTensor inputAt(input); + std::vector indicesOrigin(indicesCounts); + for (int64_t i = 0; i < indicesCounts; i++) { + if (indices[i] != nullptr) { + indicesOrigin[i] = AscendTensor(indices[i]); + } + } + std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); + checkIndexTensorTypes(indicesList); + auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + std::vector allDefinedIndices; + auto emptyTensor = createEmptyAclTensor(); + for (const auto& idx : indicesExpanded) { + if (idx.defined()) { + allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); + } else { + allDefinedIndices.push_back(emptyTensor); + } + } + + DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, allDefinedIndices, values, accumulate, false); return diopiSuccess; } diff --git a/impl/ascend_npu/CMakeLists.txt b/impl/ascend_npu/CMakeLists.txt index ba7701105..84f285f0b 100755 --- a/impl/ascend_npu/CMakeLists.txt +++ b/impl/ascend_npu/CMakeLists.txt @@ -167,6 +167,7 @@ set(OLD_IMPL_SRC ${OLD_IMPL_DIR}/functions/arange.cpp ${OLD_IMPL_DIR}/functions/gather.cpp ${OLD_IMPL_DIR}/functions/layer_norm.cpp + ${OLD_IMPL_DIR}/functions/index.cpp ${OLD_IMPL_DIR}/functions/index_put.cpp ${OLD_IMPL_DIR}/functions/index_select.cpp ${OLD_IMPL_DIR}/functions/repeat.cpp diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index 9dbdec336..7def339c0 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -112,6 +112,8 @@ ascend: - diopiHardtanh - diopiHardtanhBackward - diopiHardtanhInp +- diopiIndex +- diopiIndexBackward - diopiIndexPut - diopiIndexPutInp - diopiIndexSelect @@ -265,8 +267,6 @@ ascend_npu: - diopiApplyPenalty - diopiContextAttentionInference - diopiGetNativeMemoryFormat -- diopiIndex -- diopiIndexBackward - diopiNLLLoss - diopiNLLLossBackward - diopiNLLLossV2 From f713c69ce7fdbd61874f358e8578a95cda36c9ca Mon Sep 17 00:00:00 2001 From: zhangqiu Date: Thu, 8 Aug 2024 17:05:38 +0800 Subject: [PATCH 2/5] fix --- adaptor/codegen/gen.py | 2 +- impl/ascend/aclnn/adaptor.hpp | 2 +- impl/ascend/ascend_tensor.cpp | 2 +- impl/ascend/ascend_tensor.hpp | 2 +- impl/ascend/functions/index.cpp | 32 ++-- impl/ascend/functions/index_put.cpp | 265 ++-------------------------- 6 files changed, 35 insertions(+), 270 deletions(-) diff --git a/adaptor/codegen/gen.py b/adaptor/codegen/gen.py index af8727d8b..1f9319b12 100644 --- a/adaptor/codegen/gen.py +++ b/adaptor/codegen/gen.py @@ -196,7 +196,7 @@ def prepare() -> Tuple[dict, str]: impl_plugin = options.impl_plugin base_device = options.base_device - assert(base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}") + assert base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}" if base_device == "": base_device = None def create_if_not_exist(name): diff --git a/impl/ascend/aclnn/adaptor.hpp b/impl/ascend/aclnn/adaptor.hpp index f4881be30..f0c4ff953 100644 --- a/impl/ascend/aclnn/adaptor.hpp +++ b/impl/ascend/aclnn/adaptor.hpp @@ -393,4 +393,4 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, convertedParams.params()) \ } while (false); -#endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ \ No newline at end of file +#endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ diff --git a/impl/ascend/ascend_tensor.cpp b/impl/ascend/ascend_tensor.cpp index d71d5b941..f39f87902 100644 --- a/impl/ascend/ascend_tensor.cpp +++ b/impl/ascend/ascend_tensor.cpp @@ -342,4 +342,4 @@ aclFormat inferAclDataFormat(int64_t dim, const int64_t* shape, const int64_t* s return ACL_FORMAT_ND; } } // namespace ascend -} // namespace impl \ No newline at end of file +} // namespace impl diff --git a/impl/ascend/ascend_tensor.hpp b/impl/ascend/ascend_tensor.hpp index 20b29b6f5..cf295e87b 100644 --- a/impl/ascend/ascend_tensor.hpp +++ b/impl/ascend/ascend_tensor.hpp @@ -266,4 +266,4 @@ class AscendTensor final { } // namespace ascend } // namespace impl -#endif // IMPL_ASCEND_ASCEND_TENSOR_HPP_ \ No newline at end of file +#endif // IMPL_ASCEND_ASCEND_TENSOR_HPP_ diff --git a/impl/ascend/functions/index.cpp b/impl/ascend/functions/index.cpp index b9cf3c81b..ac2cd78c3 100644 --- a/impl/ascend/functions/index.cpp +++ b/impl/ascend/functions/index.cpp @@ -12,7 +12,8 @@ namespace impl { namespace ascend { -static std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { +namespace indexProcess { +std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { std::vector result; for (auto& t : indices) { if (!t.defined()) { @@ -37,7 +38,7 @@ static std::vector castIntIndicesToLongIndices(diopiContextHandle_ return result; } -static void checkIndexTensorTypes(const std::vector& indices) { +void checkIndexTensorTypes(const std::vector& indices) { for (const auto& t : indices) { if (t.defined()) { diopiDtype_t type = t.dtype(); @@ -47,7 +48,7 @@ static void checkIndexTensorTypes(const std::vector& indices) { } } -static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { +AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { int64_t numELem = self.numel() * self.dim(); std::vector nShape{self.numel(), self.dim()}; std::vector nStride(nShape.size(), 1); @@ -86,7 +87,7 @@ static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& return AscendTensor(nzTrans); } -static std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { +std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { std::vector result; for (auto& t : indices) { if (!t.defined()) { @@ -117,7 +118,7 @@ static std::vector expandIndicesTensors(diopiContextHandle_t ctx, return result; } -static aclTensor* createEmptyAclTensor() { +aclTensor* createEmptyAclTensor() { std::vector nShape{0}; std::vector nStride{1}; int64_t storageSize = 0; @@ -152,7 +153,7 @@ static std::vector indicesExpandedOutplace(std::vector indices) { // true if all the non-null tensors are adjacent +bool hasContiguousSubspace(std::vector indices) { // true if all the non-null tensors are adjacent auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); }; auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); }; auto start = std::find_if(indices.begin(), indices.end(), isDefined); @@ -161,7 +162,7 @@ static bool hasContiguousSubspace(std::vector indices) { // true return it == stop.base(); } -static std::tuple> transposeToFront(AscendTensor self, std::vector indices) { +std::tuple> transposeToFront(AscendTensor self, std::vector indices) { std::vector dims; std::vector transposedIndices; @@ -183,7 +184,7 @@ static std::tuple> transposeToFront(Asce return std::make_tuple(self.permute(dims), transposedIndices); } -static std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { +std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { std::vector indexShape; for (auto& idx : endIndices) { if (idx.defined()) { @@ -201,7 +202,7 @@ static std::vector indexReshape(std::vector endIndices, i return indexShape; } -static std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { +std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { std::vector midIndices = indicesExpandedOutplace(indices); while (midIndices.size() < (size_t)self.dim()) { midIndices.emplace_back(nullptr); @@ -253,6 +254,7 @@ static std::vector indexOutputSize(const AscendTensor& self, std::vecto return outputSize; } +} // namespace indexProcess diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t* indices, int64_t nums) { AscendTensor inputAt(input); @@ -263,13 +265,13 @@ diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diop } } - std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); - checkIndexTensorTypes(indicesList); + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); - auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); std::vector allDefinedIndices; - auto emptyTensor = createEmptyAclTensor(); + auto emptyTensor = indexProcess::createEmptyAclTensor(); for (const auto& idx : indicesExpanded) { if (idx.defined()) { allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); @@ -278,7 +280,7 @@ diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diop } } - std::vector outShape = indexOutputSize(inputAt, indicesExpanded); + std::vector outShape = indexProcess::indexOutputSize(inputAt, indicesExpanded); diopiSize_t outSize = vectorToDiopiSize(outShape); diopiRequireTensor(ctx, out, &outSize, nullptr, inputAt.dtype(), diopi_device); @@ -317,4 +319,4 @@ diopiError_t diopiIndexBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gr } } // namespace ascend -} // namespace impl \ No newline at end of file +} // namespace impl diff --git a/impl/ascend/functions/index_put.cpp b/impl/ascend/functions/index_put.cpp index d3d0565c9..383e133b2 100755 --- a/impl/ascend/functions/index_put.cpp +++ b/impl/ascend/functions/index_put.cpp @@ -10,248 +10,12 @@ namespace impl { namespace ascend { -static std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices) { - std::vector result; - for (auto& t : indices) { - if (!t.defined()) { - result.emplace_back(nullptr); - continue; - } - if (t.dtype() == diopi_dtype_int32) { - diopiTensorHandle_t indexHandle = nullptr; - auto shape = t.shape(); - diopiSize_t size = vectorToDiopiSize(shape); - diopiRequireTensor(ctx, &indexHandle, &size, nullptr, diopi_dtype_int64, diopi_device); - DIOPI_ASCEND_CALL_ACLNN(aclnnCast, ctx, t, diopi_dtype_int64, indexHandle); - result.emplace_back(indexHandle); - } else { - if (t.device() == diopi_host) { - result.emplace_back(hostToDevice(ctx, t.tensorHandle())); - } else { - result.emplace_back(t); - } - } - } - return result; -} - -static void checkIndexTensorTypes(const std::vector& indices) { - for (const auto& t : indices) { - if (t.defined()) { - diopiDtype_t type = t.dtype(); - ASCEND_CHECK_ABORT(type == diopi_dtype_int64 || type == diopi_dtype_bool || type == diopi_dtype_uint8, - "tensors used as indices must be long, byte or bool tensors"); - } - } -} - -static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) { - int64_t numELem = self.numel() * self.dim(); - std::vector nShape{self.numel(), self.dim()}; - std::vector nStride(nShape.size(), 1); - for (int64_t i = nShape.size() - 2; i >= 0; i--) { - nStride[i] = nStride[i + 1] * nShape[i + 1]; - } - - diopiTensorHandle_t nzBuff = nullptr; - diopiSize_t nzBuffSize = vectorToDiopiSize(nShape); - diopiRequireTensor(ctx, &nzBuff, &nzBuffSize, nullptr, diopi_dtype_int64, diopi_device); - AscendTensor nzTensor(nzBuff); - - auto aclNZTensor = ::aclCreateTensor( - nShape.data(), nShape.size(), aclDataType::ACL_INT64, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &numELem, 1, const_cast(nzTensor.data())); - DIOPI_ASCEND_CALL_ACLNN(aclnnNonzero, ctx, self, aclNZTensor); - - int64_t* vDims = nullptr; - uint64_t vDimsNum = 0; - auto ret = aclGetViewShape(aclNZTensor, &vDims, &vDimsNum); - ASCEND_CHECK_ABORT(ret == 0, "NonZero aclGetViewShape failed."); - - std::vector nzShape(vDims, vDims + vDimsNum); - nzTensor = nzTensor.resize(nzShape); - - delete vDims; - vDims = nullptr; - - diopiTensorHandle_t nzTrans = nullptr; - std::vector nzTransShape{nzShape[1], nzShape[0]}; - diopiSize_t nzTransSize = vectorToDiopiSize(nzTransShape); - diopiRequireTensor(ctx, &nzTrans, &nzTransSize, nullptr, diopi_dtype_int64, diopi_device); - std::vector transDims{1, 0}; - diopiSize_t permuteDims = vectorToDiopiSize(transDims); - DIOPI_ASCEND_CALL_ACLNN(aclnnPermute, ctx, nzTensor, permuteDims, nzTrans); - - return AscendTensor(nzTrans); -} - -static std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices) { - std::vector result; - for (auto& t : indices) { - if (!t.defined()) { - result.push_back(t); - } else { - if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) { - ASCEND_CHECK(t.dtype() == diopi_dtype_uint8, - "indexing with dtype torch.uint8 is now deprecated," - " please use a dtype torch.bool instead."); - for (uint64_t j = 0; j < static_cast(t.dim()); j++) { - uint64_t srcIdx = result.size() + j; - ASCEND_CHECK_ABORT(t.shape(j) == self.shape(srcIdx), - "The shape of the mask %ld at index %ld does not match the shape of the indexed tensor %ld at index %ld", - t.dim(), - j, - self.dim(), - srcIdx); - } - AscendTensor non = nonZeroTensor(ctx, t); - for (int64_t j = 0; j < t.dim(); j++) { - result.push_back(non.select(0, j)); - } - } else { - result.push_back(t); - } - } - } - return result; -} - - -static aclTensor* createEmptyAclTensor() { - std::vector nShape{0}; - std::vector nStride{1}; - int64_t storageSize = 0; - void* storage = nullptr; - - return ::aclCreateTensor(nShape.data(), nShape.size(), aclDataType::ACL_FLOAT16, nStride.data(), 0, aclFormat::ACL_FORMAT_ND, &storageSize, 0, storage); -} - -static std::vector indicesExpandedOutplace(std::vector indices) { - bool first = true; - std::vector sizes; - - for (auto& idx : indices) { - if (!idx.defined()) { - continue; - } else if (first) { - sizes = idx.shape(); - first = false; - } else { - sizes = inferSize(sizes, idx.shape()); - } - } - - std::vector result; - for (auto& idx : indices) { - if (!idx.defined() || (idx.shape() == sizes)) { - result.push_back(idx); - } else { - result.push_back(idx.expand(sizes)); - } - } - return result; -} - -static bool hasContiguousSubspace(std::vector indices) { // true if all the non-null tensors are adjacent - auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); }; - auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); }; - auto start = std::find_if(indices.begin(), indices.end(), isDefined); - auto stop = std::find_if(indices.rbegin(), indices.rend(), isDefined); - auto it = std::find_if(start, stop.base(), isNull); - return it == stop.base(); -} - -static std::tuple> transposeToFront(AscendTensor self, std::vector indices) { - std::vector dims; - std::vector transposedIndices; - - dims.reserve(self.dim()); - for (int64_t i = 0; i < self.dim(); i++) { - if (indices[i].defined()) { - dims.push_back(i); - transposedIndices.push_back(indices[i]); - } - } - - for (int64_t i = 0; i < self.dim(); i++) { - if (!indices[i].defined()) { - dims.push_back(i); - transposedIndices.push_back(indices[i]); - } - } - - return std::make_tuple(self.permute(dims), transposedIndices); -} - -static std::vector indexReshape(std::vector endIndices, int64_t dimsBefore, int64_t dimsAfter) { - std::vector indexShape; - for (auto& idx : endIndices) { - if (idx.defined()) { - std::vector shape; - shape.insert(shape.end(), dimsBefore, 1); - shape.insert(shape.end(), idx.shape().begin(), idx.shape().end()); - shape.insert(shape.end(), dimsAfter, 1); - if (indexShape.empty()) { - indexShape = shape; - } else { - indexShape = inferSize(indexShape, shape); - } - } - } - return indexShape; -} - -static std::vector indexOutputSize(const AscendTensor& self, std::vector& indices) { - std::vector midIndices = indicesExpandedOutplace(indices); - while (midIndices.size() < (size_t)self.dim()) { - midIndices.emplace_back(nullptr); - } - - AscendTensor src = self; - std::vector endIndices = midIndices; - if (!hasContiguousSubspace(midIndices)) { - endIndices.clear(); - std::tie(src, endIndices) = transposeToFront(self, midIndices); - } - - int64_t dimsBefore = 0; - int64_t dimsAfter = 0; - int64_t dimsIndexed = 0; - - std::vector replaceShape; - std::vector indexedSizes; - - for (size_t dim = 0; dim < endIndices.size(); dim++) { - if (!endIndices[dim].defined()) { - if (dimsIndexed == 0) { - dimsBefore++; - } else { - dimsAfter++; - } - } else { - dimsIndexed++; - replaceShape = endIndices[dim].shape(); - indexedSizes.push_back(src.shape(dim)); - } - } - - if (std::find(indexedSizes.begin(), indexedSizes.end(), 0) != indexedSizes.end() && - std::find(replaceShape.begin(), replaceShape.end(), 0) == replaceShape.end()) { - ASCEND_CHECK_ABORT(false, "index is out of bounds for dimension with size 0"); - } - - auto selfShape = src.shape(); - int64_t end = dimsBefore + dimsIndexed; - selfShape.erase(selfShape.begin() + dimsBefore, selfShape.begin() + end); - selfShape.insert(selfShape.begin() + dimsBefore, replaceShape.begin(), replaceShape.end()); - - std::vector indexShape = indexReshape(endIndices, dimsBefore, dimsAfter); - std::vector outputSize = indexShape; - if (indexShape != selfShape) { - outputSize = inferSize(indexShape, selfShape); - } - - return outputSize; -} +namespace indexProcess { +extern std::vector castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector& indices); +extern void checkIndexTensorTypes(const std::vector& indices); +extern std::vector expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector& indices); +extern aclTensor* createEmptyAclTensor(); +} // namespace indexProcess diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { @@ -263,11 +27,11 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di indicesOrigin[i] = AscendTensor(indices[i]); } } - std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); - checkIndexTensorTypes(indicesList); - auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); std::vector allDefinedIndices; - auto emptyTensor = createEmptyAclTensor(); + auto emptyTensor = indexProcess::createEmptyAclTensor(); for (const auto& idx : indicesExpanded) { if (idx.defined()) { allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); @@ -278,7 +42,6 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, allDefinedIndices, values, accumulate, false); return diopiSuccess; - } diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, @@ -290,11 +53,11 @@ diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t inpu indicesOrigin[i] = AscendTensor(indices[i]); } } - std::vector indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin); - checkIndexTensorTypes(indicesList); - auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList); + std::vector indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin); + indexProcess::checkIndexTensorTypes(indicesList); + auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList); std::vector allDefinedIndices; - auto emptyTensor = createEmptyAclTensor(); + auto emptyTensor = indexProcess::createEmptyAclTensor(); for (const auto& idx : indicesExpanded) { if (idx.defined()) { allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx)); From 903378a696b30fc975b5e065fcea0d94e42bd53d Mon Sep 17 00:00:00 2001 From: zhangqiu Date: Thu, 8 Aug 2024 20:48:58 +0800 Subject: [PATCH 3/5] fix device_config --- impl/ascend/device_configs.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index 9df743ff6..7858623c5 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -901,6 +901,7 @@ 'index_put_acc_bool_indices_zeros': dict( # llm used name=['index_put'], + skip_all=True, para=dict( accumulate=[Skip(False),], ), @@ -911,15 +912,26 @@ para=dict( accumulate=[Skip(False),], ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": [Skip((16, 4, 4)),], + }, + ] + ), ), 'index_put_bool_indices_value': dict( # llm used name=['index_put'], + para=dict( + accumulate=[Skip(False),], + ), tensor_para=dict( args=[ { "ins": ['input'], - "shape": [Skip((3, 2, 2, 20)),], + "shape": [Skip((3, 2, 2, 20)), Skip((4, 2, 2, 6, 2))], }, ] ), @@ -1177,11 +1189,6 @@ skip_all=True ), - 'index_put_acc_bool_indices_zeros': dict( - name=['index_put'], - skip_all=True - ), - # TODO(zhangqiu) Due to a bug in the software stack, this test will be skipped for now. 'embedding': dict( name=['embedding'], From d1ea96b6bf5953989a5935c0f305e43d35231bd6 Mon Sep 17 00:00:00 2001 From: zhangqiu Date: Fri, 9 Aug 2024 15:04:35 +0800 Subject: [PATCH 4/5] fix check for uint8 --- impl/ascend/functions/index.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/impl/ascend/functions/index.cpp b/impl/ascend/functions/index.cpp index ac2cd78c3..708ae4a0d 100644 --- a/impl/ascend/functions/index.cpp +++ b/impl/ascend/functions/index.cpp @@ -94,7 +94,7 @@ std::vector expandIndicesTensors(diopiContextHandle_t ctx, const A result.push_back(t); } else { if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) { - ASCEND_CHECK(t.dtype() == diopi_dtype_uint8, + ASCEND_CHECK(t.dtype() == diopi_dtype_bool, "indexing with dtype torch.uint8 is now deprecated," " please use a dtype torch.bool instead."); for (uint64_t j = 0; j < static_cast(t.dim()); j++) { @@ -281,7 +281,6 @@ diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diop } std::vector outShape = indexProcess::indexOutputSize(inputAt, indicesExpanded); - diopiSize_t outSize = vectorToDiopiSize(outShape); diopiRequireTensor(ctx, out, &outSize, nullptr, inputAt.dtype(), diopi_device); From 288dd2d80c363dd7ad698c97a3e54afbee58d9dd Mon Sep 17 00:00:00 2001 From: zhangqiu Date: Fri, 9 Aug 2024 16:49:36 +0800 Subject: [PATCH 5/5] fix --- impl/ascend/device_configs.py | 1 - impl/ascend/functions/index_put.cpp | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index 7858623c5..c4cc9b31f 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -901,7 +901,6 @@ 'index_put_acc_bool_indices_zeros': dict( # llm used name=['index_put'], - skip_all=True, para=dict( accumulate=[Skip(False),], ), diff --git a/impl/ascend/functions/index_put.cpp b/impl/ascend/functions/index_put.cpp index 383e133b2..1354d8d0e 100755 --- a/impl/ascend/functions/index_put.cpp +++ b/impl/ascend/functions/index_put.cpp @@ -21,6 +21,10 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { diopiCopyInp(ctx, input, out); AscendTensor inputAt(input); + AscendTensor valuesAt(values); + if (inputAt.numel() == 0 || valuesAt.numel() == 0) { + return diopiSuccess; + } std::vector indicesOrigin(indicesCounts); for (int64_t i = 0; i < indicesCounts; i++) { if (indices[i] != nullptr) { @@ -47,6 +51,10 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) { AscendTensor inputAt(input); + AscendTensor valuesAt(values); + if (inputAt.numel() == 0 || valuesAt.numel() == 0) { + return diopiSuccess; + } std::vector indicesOrigin(indicesCounts); for (int64_t i = 0; i < indicesCounts; i++) { if (indices[i] != nullptr) {