diff --git a/BasePreparedModel.cpp b/BasePreparedModel.cpp index 31c04b49c..a91c168f7 100644 --- a/BasePreparedModel.cpp +++ b/BasePreparedModel.cpp @@ -121,6 +121,9 @@ bool BasePreparedModel::loadRemoteModel(const std::string& ir_xml, const std::st if(mDetectionClient) { auto reply = mDetectionClient->sendIRs(is_success, ir_xml, ir_bin); ALOGI("sendIRs response GRPC %d %s", is_success, reply.c_str()); + if (reply == "status False") { + ALOGE("%s Model Load Failed",__func__); + } } else { ALOGE("%s mDetectionClient is null",__func__); @@ -334,7 +337,7 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - ngraphNw->getOutputShape(outIndex)); + ngraphNw->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: @@ -428,7 +431,8 @@ static std::tuple, Timing> executeSynch //check if remote infer is available //TODO: Need to add FLOAT16 support for remote inferencing if(mRemoteCheck && mDetectionClient) { - mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len); + auto inOperandType = modelInfo->getOperandType(inIndex); + mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len, inOperandType); } else { ov::Tensor destTensor; try { @@ -557,7 +561,7 @@ static std::tuple, Timing> executeSynch //TODO: Add support for other OperandType if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - ngraphNw->getOutputShape(outIndex)); + ngraphNw->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: @@ -872,7 +876,7 @@ Return BasePreparedModel::executeFenced(const V1_3::Request& request1_3, if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - mNgraphNetCreator->getOutputShape(outIndex)); + mNgraphNetCreator->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: diff --git a/DetectionClient.cpp b/DetectionClient.cpp index 4d0716180..bcf0fc6d2 100644 --- a/DetectionClient.cpp +++ b/DetectionClient.cpp @@ -47,6 +47,22 @@ Status DetectionClient::sendFile(std::string fileName, return writer->Finish(); } +bool DetectionClient::isModelLoaded(std::string fileName) { + ReplyStatus reply; + ClientContext context; + RequestString request; + request.set_value(fileName); + time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000); + context.set_deadline(deadline); + status = stub_->loadModel(&context, request, &reply); + if(status.ok()) { + return reply.status(); + } else { + ALOGE("Model Load failure: %s", status.error_message().c_str()); + } + return false; +} + std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) { ReplyStatus reply; ClientContext context; @@ -62,25 +78,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons status = sendFile(ir_bin, writerBin); if (status.ok()) { flag = reply.status(); - return (flag ? "status True" : "status False"); + //if model is sent succesfully trigger model loading + if (flag && isModelLoaded(ir_xml) ) { + flag = true; + return ("status True"); + } else { + flag = false; + ALOGE("Model Loading Failed!!!"); + return ("status False"); + } + } else { + return ("status False"); } } return std::string(status.error_message()); } -void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size) { +void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) { const float* src; size_t index; DataTensor* input = request.add_data_tensors(); input->set_node_name(label); + switch(operandType) { + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: { + input->set_data_type(DataTensor::i32); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: { + input->set_data_type(DataTensor::f16); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: { + input->set_data_type(DataTensor::f32); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: { + input->set_data_type(DataTensor::boolean); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: { + input->set_data_type(DataTensor::u8); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM: + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: { + input->set_data_type(DataTensor::i8); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: { + input->set_data_type(DataTensor::i16); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: { + input->set_data_type(DataTensor::u16); + break; + } + default: { + input->set_data_type(DataTensor::u8); + break; + } + } for (index = 0; index < shape.size(); index++) { input->add_tensor_shape(shape[index]); } input->set_data(buffer, size); } -void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector shape) { +void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector shape, uint32_t expectedLength) { std::string src; size_t index; size_t size = 1; @@ -91,6 +157,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v for (index = 0; index < reply.data_tensors_size(); index++) { if (label.compare(reply.data_tensors(index).node_name()) == 0) { src = reply.data_tensors(index).data(); + if(expectedLength != src.length()) { + ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length()); + } memcpy(buffer, src.data(), src.length()); break; } @@ -104,7 +173,7 @@ void DetectionClient::clear_data() { std::string DetectionClient::remote_infer() { ClientContext context; - time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000); + time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(5000); context.set_deadline(deadline); status = stub_->getInferResult(&context, request, &reply); diff --git a/DetectionClient.h b/DetectionClient.h index dece36ae4..f26306b6e 100644 --- a/DetectionClient.h +++ b/DetectionClient.h @@ -8,6 +8,7 @@ #include #include #include "nnhal_object_detection.grpc.pb.h" +#include "Driver.h" using grpc::Channel; using grpc::ClientContext; @@ -32,9 +33,10 @@ class DetectionClient { std::unique_ptr >& writer); std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin); + bool isModelLoaded(std::string fileName); - void add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size); - void get_output_data(std::string label, uint8_t* buffer, std::vector shape); + void add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType); + void get_output_data(std::string label, uint8_t* buffer, std::vector shape, uint32_t expectedLength); void clear_data(); std::string remote_infer(); bool get_status(); diff --git a/ngraph_creator/src/OperationsFactory.cpp b/ngraph_creator/src/OperationsFactory.cpp index f31deb796..b48ede226 100755 --- a/ngraph_creator/src/OperationsFactory.cpp +++ b/ngraph_creator/src/OperationsFactory.cpp @@ -44,8 +44,6 @@ std::shared_ptr OperationsFactory::getOperation( return std::make_shared(operationIndex); case OperationType::DEPTHWISE_CONV_2D: return std::make_shared(operationIndex); - case OperationType::DEQUANTIZE: - return std::make_shared(operationIndex); case OperationType::DIV: return std::make_shared
(operationIndex); case OperationType::EMBEDDING_LOOKUP: @@ -72,8 +70,6 @@ std::shared_ptr OperationsFactory::getOperation( return std::make_shared(operationIndex); case OperationType::L2_POOL_2D: return std::make_shared(operationIndex); - case OperationType::L2_NORMALIZATION: - return std::make_shared(operationIndex); case OperationType::LSTM: return std::make_shared(operationIndex); case OperationType::LESS: @@ -114,8 +110,6 @@ std::shared_ptr OperationsFactory::getOperation( return std::make_shared(operationIndex); case OperationType::PRELU: return std::make_shared(operationIndex); - case OperationType::QUANTIZE: - return std::make_shared(operationIndex); case OperationType::REDUCE_ALL: return std::make_shared(operationIndex); case OperationType::REDUCE_ANY: @@ -144,10 +138,6 @@ std::shared_ptr OperationsFactory::getOperation( return std::make_shared(operationIndex); case OperationType::RSQRT: return std::make_shared(operationIndex); - case OperationType::RESIZE_BILINEAR: - return std::make_shared(operationIndex); - case OperationType::RESIZE_NEAREST_NEIGHBOR: - return std::make_shared(operationIndex); case OperationType::SELECT: return std::make_shared