Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions BasePreparedModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ bool BasePreparedModel::loadRemoteModel(const std::string& ir_xml, const std::st
if(mDetectionClient) {
auto reply = mDetectionClient->sendIRs(is_success, ir_xml, ir_bin);
ALOGI("sendIRs response GRPC %d %s", is_success, reply.c_str());
if (reply == "status False") {
ALOGE("%s Model Load Failed",__func__);
}
}
else {
ALOGE("%s mDetectionClient is null",__func__);
Expand Down Expand Up @@ -334,7 +337,7 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod

if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
ngraphNw->getOutputShape(outIndex));
ngraphNw->getOutputShape(outIndex), expectedLength);
} else {
switch (operandType) {
case OperandType::TENSOR_INT32:
Expand Down Expand Up @@ -428,7 +431,8 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
//check if remote infer is available
//TODO: Need to add FLOAT16 support for remote inferencing
if(mRemoteCheck && mDetectionClient) {
mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len);
auto inOperandType = modelInfo->getOperandType(inIndex);
mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len, inOperandType);
} else {
ov::Tensor destTensor;
try {
Expand Down Expand Up @@ -557,7 +561,7 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
//TODO: Add support for other OperandType
if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
ngraphNw->getOutputShape(outIndex));
ngraphNw->getOutputShape(outIndex), expectedLength);
} else {
switch (operandType) {
case OperandType::TENSOR_INT32:
Expand Down Expand Up @@ -872,7 +876,7 @@ Return<void> BasePreparedModel::executeFenced(const V1_3::Request& request1_3,

if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
mNgraphNetCreator->getOutputShape(outIndex));
mNgraphNetCreator->getOutputShape(outIndex), expectedLength);
} else {
switch (operandType) {
case OperandType::TENSOR_INT32:
Expand Down
77 changes: 73 additions & 4 deletions DetectionClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ Status DetectionClient::sendFile(std::string fileName,
return writer->Finish();
}

bool DetectionClient::isModelLoaded(std::string fileName) {
ReplyStatus reply;
ClientContext context;
RequestString request;
request.set_value(fileName);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
context.set_deadline(deadline);
status = stub_->loadModel(&context, request, &reply);
if(status.ok()) {
return reply.status();
} else {
ALOGE("Model Load failure: %s", status.error_message().c_str());
}
return false;
}

std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) {
ReplyStatus reply;
ClientContext context;
Expand All @@ -62,25 +78,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons
status = sendFile(ir_bin, writerBin);
if (status.ok()) {
flag = reply.status();
return (flag ? "status True" : "status False");
//if model is sent succesfully trigger model loading
if (flag && isModelLoaded(ir_xml) ) {
flag = true;
return ("status True");
} else {
flag = false;
ALOGE("Model Loading Failed!!!");
return ("status False");
}
} else {
return ("status False");
}
}
return std::string(status.error_message());
}

void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size) {
void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) {
const float* src;
size_t index;

DataTensor* input = request.add_data_tensors();
input->set_node_name(label);
switch(operandType) {
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: {
input->set_data_type(DataTensor::i32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: {
input->set_data_type(DataTensor::f16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: {
input->set_data_type(DataTensor::f32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: {
input->set_data_type(DataTensor::boolean);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: {
input->set_data_type(DataTensor::u8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
input->set_data_type(DataTensor::i8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: {
input->set_data_type(DataTensor::i16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: {
input->set_data_type(DataTensor::u16);
break;
}
default: {
input->set_data_type(DataTensor::u8);
break;
}
}
for (index = 0; index < shape.size(); index++) {
input->add_tensor_shape(shape[index]);
}
input->set_data(buffer, size);
}

void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape) {
void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength) {
std::string src;
size_t index;
size_t size = 1;
Expand All @@ -91,6 +157,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v
for (index = 0; index < reply.data_tensors_size(); index++) {
if (label.compare(reply.data_tensors(index).node_name()) == 0) {
src = reply.data_tensors(index).data();
if(expectedLength != src.length()) {
ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length());
}
memcpy(buffer, src.data(), src.length());
break;
}
Expand All @@ -104,7 +173,7 @@ void DetectionClient::clear_data() {

std::string DetectionClient::remote_infer() {
ClientContext context;
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(5000);
context.set_deadline(deadline);

status = stub_->getInferResult(&context, request, &reply);
Expand Down
6 changes: 4 additions & 2 deletions DetectionClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <log/log.h>
#include <android-base/logging.h>
#include "nnhal_object_detection.grpc.pb.h"
#include "Driver.h"

using grpc::Channel;
using grpc::ClientContext;
Expand All @@ -32,9 +33,10 @@ class DetectionClient {
std::unique_ptr<ClientWriter<RequestDataChunks> >& writer);

std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin);
bool isModelLoaded(std::string fileName);

void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size);
void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape);
void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType);
void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength);
void clear_data();
std::string remote_infer();
bool get_status();
Expand Down
16 changes: 0 additions & 16 deletions ngraph_creator/src/OperationsFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<DepthToSpace>(operationIndex);
case OperationType::DEPTHWISE_CONV_2D:
return std::make_shared<DepthwiseConv2d>(operationIndex);
case OperationType::DEQUANTIZE:
return std::make_shared<Dequantize>(operationIndex);
case OperationType::DIV:
return std::make_shared<Div>(operationIndex);
case OperationType::EMBEDDING_LOOKUP:
Expand All @@ -72,8 +70,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<InstanceNormalization>(operationIndex);
case OperationType::L2_POOL_2D:
return std::make_shared<L2Pooling2D>(operationIndex);
case OperationType::L2_NORMALIZATION:
return std::make_shared<L2Normalization>(operationIndex);
case OperationType::LSTM:
return std::make_shared<LSTM>(operationIndex);
case OperationType::LESS:
Expand Down Expand Up @@ -114,8 +110,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<Pow>(operationIndex);
case OperationType::PRELU:
return std::make_shared<PRelu>(operationIndex);
case OperationType::QUANTIZE:
return std::make_shared<Quantize>(operationIndex);
case OperationType::REDUCE_ALL:
return std::make_shared<ReduceAll>(operationIndex);
case OperationType::REDUCE_ANY:
Expand Down Expand Up @@ -144,10 +138,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<ROIPooling>(operationIndex);
case OperationType::RSQRT:
return std::make_shared<RSQRT>(operationIndex);
case OperationType::RESIZE_BILINEAR:
return std::make_shared<ResizeBilinear>(operationIndex);
case OperationType::RESIZE_NEAREST_NEIGHBOR:
return std::make_shared<ResizeNearestNeighbor>(operationIndex);
case OperationType::SELECT:
return std::make_shared<Select>(operationIndex);
case OperationType::SOFTMAX:
Expand All @@ -160,8 +150,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<SQRT>(operationIndex);
case OperationType::SIN:
return std::make_shared<Sin>(operationIndex);
case OperationType::SPLIT:
return std::make_shared<Split>(operationIndex);
case OperationType::STRIDED_SLICE:
return std::make_shared<StridedSlice>(operationIndex);
case OperationType::SQUEEZE:
Expand All @@ -172,10 +160,6 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
return std::make_shared<Tanh>(operationIndex);
case OperationType::TOPK_V2:
return std::make_shared<TopkV2>(operationIndex);
case OperationType::TRANSPOSE_CONV_2D:
return std::make_shared<TransposeConv2D>(operationIndex);
case OperationType::TRANSPOSE:
return std::make_shared<Transpose>(operationIndex);
case OperationType::UNIDIRECTIONAL_SEQUENCE_RNN:
return std::make_shared<UnidirectionalSequenceRNN>(operationIndex);
default:
Expand Down
20 changes: 20 additions & 0 deletions proto/nnhal_object_detection.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ service Detection {
rpc getInferResult (RequestDataTensors) returns (ReplyDataTensors) {}
rpc sendXml (stream RequestDataChunks) returns (ReplyStatus) {}
rpc sendBin (stream RequestDataChunks) returns (ReplyStatus) {}
rpc loadModel(RequestString) returns (ReplyStatus) {}
rpc prepare (RequestString) returns (ReplyStatus) {} //Placeholder for any future support : RequestString
}

Expand All @@ -47,6 +48,25 @@ message DataTensor {
bytes data = 1;
string node_name = 2;
repeated int32 tensor_shape = 3;
enum DATA_TYPE {
boolean = 0;
bf16 = 1;
f16 = 2;
f32 = 3;
f64 = 4;
i4 = 5;
i8 = 6;
i16 = 7;
i32 = 8;
i64 = 9;
u1 = 10;
u4 = 11;
u8 = 12;
u16 = 13;
u32 = 14;
u64 = 15;
}
DATA_TYPE data_type = 4;
}

// Reply message containing the Output Data Tensors(blobs)
Expand Down