From bd7b249195aa4bbff4e559bffd62d124f19c01cf Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sun, 5 Apr 2020 00:49:54 +0200 Subject: [PATCH 01/10] addaption of https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/RcppExports.R | 4 + R/text.R | 27 +++++++ man/opencv.Rd | 5 +- src/RcppExports.cpp | 18 +++++ src/text.cpp | 185 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 R/text.R create mode 100644 src/text.cpp diff --git a/DESCRIPTION b/DESCRIPTION index eb6990b..cacf82f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,5 +16,5 @@ LinkingTo: Rcpp Imports: Rcpp, magrittr LazyData: true Encoding: UTF-8 -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.0 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 0d9dab4..281b878 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(ocv_read) export(ocv_resize) export(ocv_sketch) export(ocv_stylize) +export(ocv_text) export(ocv_video) export(ocv_write) importFrom(Rcpp,sourceCpp) diff --git a/R/RcppExports.R b/R/RcppExports.R index 84d20c9..f14a751 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -105,3 +105,7 @@ cvmat_markers <- function(ptr) { .Call('_opencv_cvmat_markers', PACKAGE = 'opencv', ptr) } +text_detection <- function(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) { + .Call('_opencv_text_detection', PACKAGE = 'opencv', input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) +} + diff --git a/R/text.R b/R/text.R new file mode 100644 index 0000000..8c12976 --- /dev/null +++ b/R/text.R @@ -0,0 +1,27 @@ +#' @export +#' @rdname opencv +#' @param image Path to input image or video file. Skip this argument to capture frames from a camera. +#' @param width Preprocess input image by resizing to a specific width. It should be multiple by 32. +#' @param height Preprocess input image by resizing to a specific height. It should be multiple by 32. +#' @param thrs Confidence threshold. +#' @param nms Non-maximum suppression threshold. +#' +#' @param model Path to a binary .pb file contains trained network. +ocv_text <- function(image, thrs = 0.5, nms = 20, width = 320, height = 320, + model, draw = FALSE){ + + if(missing(model)) + stop("requires a model pb-file") + + image <- path.expand(image) + model <- path.expand(model) + + text_detection(input = image, + confThreshold = thrs, + nmsThreshold = nms, + inpWidth = width, + inpHeight = height, + model = model, + draw = draw) +} + diff --git a/man/opencv.Rd b/man/opencv.Rd index f88d0f5..3daaf0b 100644 --- a/man/opencv.Rd +++ b/man/opencv.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/face.R, R/opencv.R +% Please edit documentation in R/face.R, R/opencv.R, R/text.R \name{ocv_face} \alias{ocv_face} \alias{ocv_facemask} @@ -21,6 +21,7 @@ \alias{ocv_copyto} \alias{ocv_display} \alias{ocv_video} +\alias{ocv_text} \title{OpenCV Computer Vision} \usage{ ocv_face(image) @@ -62,6 +63,8 @@ ocv_copyto(image, target, mask) ocv_display(image) ocv_video(filter) + +ocv_text(image, thrs = 0.5, nms = 20, width = 320, height = 320, model) } \arguments{ \item{image}{a ocv image object} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 4fe270b..42c7754 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -296,6 +296,23 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// text_detection +Rcpp::DataFrame text_detection(std::string input, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw); +RcppExport SEXP _opencv_text_detection(SEXP inputSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type input(inputSEXP); + Rcpp::traits::input_parameter< float >::type confThreshold(confThresholdSEXP); + Rcpp::traits::input_parameter< float >::type nmsThreshold(nmsThresholdSEXP); + Rcpp::traits::input_parameter< int >::type inpWidth(inpWidthSEXP); + Rcpp::traits::input_parameter< int >::type inpHeight(inpHeightSEXP); + Rcpp::traits::input_parameter< std::string >::type model(modelSEXP); + Rcpp::traits::input_parameter< bool >::type draw(drawSEXP); + rcpp_result_gen = Rcpp::wrap(text_detection(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_opencv_cvmat_destroy", (DL_FUNC) &_opencv_cvmat_destroy, 1}, @@ -324,6 +341,7 @@ static const R_CallMethodDef CallEntries[] = { {"_opencv_cvmat_edges", (DL_FUNC) &_opencv_cvmat_edges, 1}, {"_opencv_cvmat_hog", (DL_FUNC) &_opencv_cvmat_hog, 1}, {"_opencv_cvmat_markers", (DL_FUNC) &_opencv_cvmat_markers, 1}, + {"_opencv_text_detection", (DL_FUNC) &_opencv_text_detection, 7}, {NULL, NULL, 0} }; diff --git a/src/text.cpp b/src/text.cpp new file mode 100644 index 0000000..a9c879c --- /dev/null +++ b/src/text.cpp @@ -0,0 +1,185 @@ +#include "util.hpp" +#include +#include +#include + +using namespace cv; +using namespace cv::dnn; + +void decode(const Mat& scores, const Mat& geometry, float scoreThresh, + std::vector& detections, + std::vector& confidences) +{ + detections.clear(); + CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); + CV_Assert(scores.size[0] == 1); CV_Assert(geometry.size[0] == 1); + CV_Assert(scores.size[1] == 1); CV_Assert(geometry.size[1] == 5); + CV_Assert(scores.size[2] == geometry.size[2]); + CV_Assert(scores.size[3] == geometry.size[3]); + + const int height = scores.size[2]; + const int width = scores.size[3]; + for (int y = 0; y < height; ++y) + { + const float* scoresData = scores.ptr(0, 0, y); + const float* x0_data = geometry.ptr(0, 0, y); + const float* x1_data = geometry.ptr(0, 1, y); + const float* x2_data = geometry.ptr(0, 2, y); + const float* x3_data = geometry.ptr(0, 3, y); + const float* anglesData = geometry.ptr(0, 4, y); + for (int x = 0; x < width; ++x) + { + float score = scoresData[x]; + if (score < scoreThresh) + continue; + + // Decode a prediction. + // Multiple by 4 because feature maps are 4 time less than input image. + float offsetX = x * 4.0f, offsetY = y * 4.0f; + float angle = anglesData[x]; + float cosA = std::cos(angle); + float sinA = std::sin(angle); + float h = x0_data[x] + x2_data[x]; + float w = x1_data[x] + x3_data[x]; + + Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x], + offsetY - sinA * x1_data[x] + cosA * x2_data[x]); + Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset; + Point2f p3 = Point2f(-cosA * w, sinA * w) + offset; + RotatedRect r(0.5f * (p1 + p3), Size2f(w, h), + -angle * 180.0f / (float)CV_PI); + detections.push_back(r); + confidences.push_back(score); + } + } +} + + +// [[Rcpp::export]] +Rcpp::DataFrame + text_detection(std::string input, float confThreshold,float nmsThreshold, + int inpWidth, int inpHeight, std::string model, bool draw) +{ + if (model.empty()) + Rcpp::stop("No model defined"); + + // Load network. + Net net = readNet(model); + + // Open a video file or an image file or a camera stream. + VideoCapture cap; + if (!input.empty()) + cap.open(input); + else + cap.open(0); + + static const std::string kWinName = + "EAST: An Efficient and Accurate Scene Text Detector"; + + if (draw) { + namedWindow(kWinName, WINDOW_NORMAL); + } + + std::vector outs; + std::vector outNames(2); + outNames[0] = "feature_fusion/Conv_7/Sigmoid"; + outNames[1] = "feature_fusion/concat_3"; + + Mat frame, blob; + std::vector indices; + + Rcpp::IntegerVector x1vec(indices.size()); + Rcpp::IntegerVector y1vec(indices.size()); + Rcpp::IntegerVector x2vec(indices.size()); + Rcpp::IntegerVector y2vec(indices.size()); + Rcpp::IntegerVector x3vec(indices.size()); + Rcpp::IntegerVector y3vec(indices.size()); + Rcpp::IntegerVector x4vec(indices.size()); + Rcpp::IntegerVector y4vec(indices.size()); + + // if window is drawn, push super key to exit + while (waitKey(1) < 0) + { + cap >> frame; + if (frame.empty()) + { + waitKey(); + break; + } + + blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), + Scalar(123.68, 116.78, 103.94), true, false); + net.setInput(blob); + net.forward(outs, outNames); + + Mat scores = outs[0]; + Mat geometry = outs[1]; + + // Decode predicted bounding boxes. + std::vector boxes; + std::vector confidences; + decode(scores, geometry, confThreshold, boxes, confidences); + + // Apply non-maximum suppression procedure. + NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + + + // Render detections. + Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight); + for (size_t i = 0; i < indices.size(); ++i) + { + RotatedRect& box = boxes[indices[i]]; + + Point2f vertices[4]; + box.points(vertices); + for (int j = 0; j < 4; ++j) + { + vertices[j].x *= ratio.x; + vertices[j].y *= ratio.y; + } + + // not the fastes way + x1vec.push_back(vertices[0].x); + y1vec.push_back(vertices[0].y); + x2vec.push_back(vertices[1].x); + y2vec.push_back(vertices[1].y); + x3vec.push_back(vertices[2].x); + y3vec.push_back(vertices[2].y); + x4vec.push_back(vertices[3].x); + y4vec.push_back(vertices[3].y); + + if (draw) + for (int j = 0; j < 4; ++j) + line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1); + } + + if (draw) { + // Put efficiency information. + std::vector layersTimes; + double freq = getTickFrequency() / 1000; + double t = net.getPerfProfile(layersTimes) / freq; + std::string label = format("Inference time: %.2f ms", t); + putText(frame, label, Point(0, 15), + FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + } + + if (draw) + imshow(kWinName, frame); + else + break; + } + + // XPtrMat out = cvmat_xptr(box); + // out.attr("indices") = ; + + return Rcpp::DataFrame::create( + Rcpp::_["x1"] = x1vec, + Rcpp::_["y1"] = y1vec, + Rcpp::_["x2"] = x2vec, + Rcpp::_["y2"] = y2vec, + Rcpp::_["x3"] = x3vec, + Rcpp::_["y3"] = y3vec, + Rcpp::_["x4"] = x4vec, + Rcpp::_["y4"] = y4vec + ); +} From 1b356e41274512d7d1af4e7b7925cd26c8f3d68f Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sun, 5 Apr 2020 03:39:25 +0200 Subject: [PATCH 02/10] use xptrmat --- R/RcppExports.R | 4 ++-- R/text.R | 3 +-- src/RcppExports.cpp | 8 ++++---- src/text.cpp | 48 +++++++++++++-------------------------------- 4 files changed, 21 insertions(+), 42 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index f14a751..7897015 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -105,7 +105,7 @@ cvmat_markers <- function(ptr) { .Call('_opencv_cvmat_markers', PACKAGE = 'opencv', ptr) } -text_detection <- function(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) { - .Call('_opencv_text_detection', PACKAGE = 'opencv', input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) +text_detection <- function(ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) { + .Call('_opencv_text_detection', PACKAGE = 'opencv', ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) } diff --git a/R/text.R b/R/text.R index 8c12976..3a18a74 100644 --- a/R/text.R +++ b/R/text.R @@ -13,10 +13,9 @@ ocv_text <- function(image, thrs = 0.5, nms = 20, width = 320, height = 320, if(missing(model)) stop("requires a model pb-file") - image <- path.expand(image) model <- path.expand(model) - text_detection(input = image, + text_detection(ptr = image, confThreshold = thrs, nmsThreshold = nms, inpWidth = width, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 42c7754..9829f59 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -297,19 +297,19 @@ BEGIN_RCPP END_RCPP } // text_detection -Rcpp::DataFrame text_detection(std::string input, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw); -RcppExport SEXP _opencv_text_detection(SEXP inputSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) { +XPtrMat text_detection(XPtrMat ptr, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw); +RcppExport SEXP _opencv_text_detection(SEXP ptrSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< std::string >::type input(inputSEXP); + Rcpp::traits::input_parameter< XPtrMat >::type ptr(ptrSEXP); Rcpp::traits::input_parameter< float >::type confThreshold(confThresholdSEXP); Rcpp::traits::input_parameter< float >::type nmsThreshold(nmsThresholdSEXP); Rcpp::traits::input_parameter< int >::type inpWidth(inpWidthSEXP); Rcpp::traits::input_parameter< int >::type inpHeight(inpHeightSEXP); Rcpp::traits::input_parameter< std::string >::type model(modelSEXP); Rcpp::traits::input_parameter< bool >::type draw(drawSEXP); - rcpp_result_gen = Rcpp::wrap(text_detection(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)); + rcpp_result_gen = Rcpp::wrap(text_detection(ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)); return rcpp_result_gen; END_RCPP } diff --git a/src/text.cpp b/src/text.cpp index a9c879c..0c26f4e 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -56,36 +56,25 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, // [[Rcpp::export]] -Rcpp::DataFrame - text_detection(std::string input, float confThreshold,float nmsThreshold, +XPtrMat + text_detection(XPtrMat ptr, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { if (model.empty()) Rcpp::stop("No model defined"); + Mat frame = get_mat(ptr); + // Load network. Net net = readNet(model); - // Open a video file or an image file or a camera stream. - VideoCapture cap; - if (!input.empty()) - cap.open(input); - else - cap.open(0); - - static const std::string kWinName = - "EAST: An Efficient and Accurate Scene Text Detector"; - - if (draw) { - namedWindow(kWinName, WINDOW_NORMAL); - } - std::vector outs; std::vector outNames(2); outNames[0] = "feature_fusion/Conv_7/Sigmoid"; outNames[1] = "feature_fusion/concat_3"; - Mat frame, blob; + Mat blob; + // Mat frame, blob; std::vector indices; Rcpp::IntegerVector x1vec(indices.size()); @@ -98,14 +87,8 @@ Rcpp::DataFrame Rcpp::IntegerVector y4vec(indices.size()); // if window is drawn, push super key to exit - while (waitKey(1) < 0) - { - cap >> frame; - if (frame.empty()) - { - waitKey(); - break; - } + // while (waitKey(1) < 0) + // { blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false); @@ -163,16 +146,11 @@ Rcpp::DataFrame FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); } - if (draw) - imshow(kWinName, frame); - else - break; - } + // break; + // } - // XPtrMat out = cvmat_xptr(box); - // out.attr("indices") = ; - - return Rcpp::DataFrame::create( + XPtrMat out = cvmat_xptr(frame); + out.attr("indices") = Rcpp::DataFrame::create( Rcpp::_["x1"] = x1vec, Rcpp::_["y1"] = y1vec, Rcpp::_["x2"] = x2vec, @@ -182,4 +160,6 @@ Rcpp::DataFrame Rcpp::_["x4"] = x4vec, Rcpp::_["y4"] = y4vec ); + + return out; } From d2b847c0d7fe090abd911114bd432c7f0b738c6a Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sun, 5 Apr 2020 04:19:44 +0200 Subject: [PATCH 03/10] cleanup --- src/text.cpp | 96 ++++++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index 0c26f4e..d2b151e 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -59,36 +59,24 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, XPtrMat text_detection(XPtrMat ptr, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) -{ - if (model.empty()) - Rcpp::stop("No model defined"); - - Mat frame = get_mat(ptr); + { + if (model.empty()) + Rcpp::stop("No model defined"); - // Load network. - Net net = readNet(model); + Mat frame = get_mat(ptr); - std::vector outs; - std::vector outNames(2); - outNames[0] = "feature_fusion/Conv_7/Sigmoid"; - outNames[1] = "feature_fusion/concat_3"; + // Load network. + Net net = readNet(model); - Mat blob; - // Mat frame, blob; - std::vector indices; + std::vector outs; + std::vector outNames(2); + outNames[0] = "feature_fusion/Conv_7/Sigmoid"; + outNames[1] = "feature_fusion/concat_3"; - Rcpp::IntegerVector x1vec(indices.size()); - Rcpp::IntegerVector y1vec(indices.size()); - Rcpp::IntegerVector x2vec(indices.size()); - Rcpp::IntegerVector y2vec(indices.size()); - Rcpp::IntegerVector x3vec(indices.size()); - Rcpp::IntegerVector y3vec(indices.size()); - Rcpp::IntegerVector x4vec(indices.size()); - Rcpp::IntegerVector y4vec(indices.size()); + Mat blob; + // Mat frame, blob; + std::vector indices; - // if window is drawn, push super key to exit - // while (waitKey(1) < 0) - // { blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false); @@ -106,6 +94,15 @@ XPtrMat // Apply non-maximum suppression procedure. NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + Rcpp::IntegerVector x1vec(indices.size()); + Rcpp::IntegerVector y1vec(indices.size()); + Rcpp::IntegerVector x2vec(indices.size()); + Rcpp::IntegerVector y2vec(indices.size()); + Rcpp::IntegerVector x3vec(indices.size()); + Rcpp::IntegerVector y3vec(indices.size()); + Rcpp::IntegerVector x4vec(indices.size()); + Rcpp::IntegerVector y4vec(indices.size()); + // Render detections. Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight); @@ -121,15 +118,14 @@ XPtrMat vertices[j].y *= ratio.y; } - // not the fastes way - x1vec.push_back(vertices[0].x); - y1vec.push_back(vertices[0].y); - x2vec.push_back(vertices[1].x); - y2vec.push_back(vertices[1].y); - x3vec.push_back(vertices[2].x); - y3vec.push_back(vertices[2].y); - x4vec.push_back(vertices[3].x); - y4vec.push_back(vertices[3].y); + x1vec.at(i) = vertices[0].x; + y1vec.at(i) = vertices[0].y; + x2vec.at(i) = vertices[1].x; + y2vec.at(i) = vertices[1].y; + x3vec.at(i) = vertices[2].x; + y3vec.at(i) = vertices[2].y; + x4vec.at(i) = vertices[3].x; + y4vec.at(i) = vertices[3].y; if (draw) for (int j = 0; j < 4; ++j) @@ -146,20 +142,18 @@ XPtrMat FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); } - // break; - // } - - XPtrMat out = cvmat_xptr(frame); - out.attr("indices") = Rcpp::DataFrame::create( - Rcpp::_["x1"] = x1vec, - Rcpp::_["y1"] = y1vec, - Rcpp::_["x2"] = x2vec, - Rcpp::_["y2"] = y2vec, - Rcpp::_["x3"] = x3vec, - Rcpp::_["y3"] = y3vec, - Rcpp::_["x4"] = x4vec, - Rcpp::_["y4"] = y4vec - ); - - return out; -} + + XPtrMat out = cvmat_xptr(frame); + out.attr("indices") = Rcpp::DataFrame::create( + Rcpp::_["x1"] = x1vec, + Rcpp::_["y1"] = y1vec, + Rcpp::_["x2"] = x2vec, + Rcpp::_["y2"] = y2vec, + Rcpp::_["x3"] = x3vec, + Rcpp::_["y3"] = y3vec, + Rcpp::_["x4"] = x4vec, + Rcpp::_["y4"] = y4vec + ); + + return out; + } From 7040d7461cf389b1918b8a9684c4f2bb1042d148 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sun, 5 Apr 2020 13:49:03 +0200 Subject: [PATCH 04/10] cleanups --- R/RcppExports.R | 4 ++-- R/text.R | 2 +- src/RcppExports.cpp | 8 ++++---- src/text.cpp | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 7897015..f14a751 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -105,7 +105,7 @@ cvmat_markers <- function(ptr) { .Call('_opencv_cvmat_markers', PACKAGE = 'opencv', ptr) } -text_detection <- function(ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) { - .Call('_opencv_text_detection', PACKAGE = 'opencv', ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) +text_detection <- function(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) { + .Call('_opencv_text_detection', PACKAGE = 'opencv', input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw) } diff --git a/R/text.R b/R/text.R index 3a18a74..869018e 100644 --- a/R/text.R +++ b/R/text.R @@ -15,7 +15,7 @@ ocv_text <- function(image, thrs = 0.5, nms = 20, width = 320, height = 320, model <- path.expand(model) - text_detection(ptr = image, + text_detection(input = image, confThreshold = thrs, nmsThreshold = nms, inpWidth = width, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 9829f59..e606488 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -297,19 +297,19 @@ BEGIN_RCPP END_RCPP } // text_detection -XPtrMat text_detection(XPtrMat ptr, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw); -RcppExport SEXP _opencv_text_detection(SEXP ptrSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) { +XPtrMat text_detection(XPtrMat input, float confThreshold, float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw); +RcppExport SEXP _opencv_text_detection(SEXP inputSEXP, SEXP confThresholdSEXP, SEXP nmsThresholdSEXP, SEXP inpWidthSEXP, SEXP inpHeightSEXP, SEXP modelSEXP, SEXP drawSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< XPtrMat >::type ptr(ptrSEXP); + Rcpp::traits::input_parameter< XPtrMat >::type input(inputSEXP); Rcpp::traits::input_parameter< float >::type confThreshold(confThresholdSEXP); Rcpp::traits::input_parameter< float >::type nmsThreshold(nmsThresholdSEXP); Rcpp::traits::input_parameter< int >::type inpWidth(inpWidthSEXP); Rcpp::traits::input_parameter< int >::type inpHeight(inpHeightSEXP); Rcpp::traits::input_parameter< std::string >::type model(modelSEXP); Rcpp::traits::input_parameter< bool >::type draw(drawSEXP); - rcpp_result_gen = Rcpp::wrap(text_detection(ptr, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)); + rcpp_result_gen = Rcpp::wrap(text_detection(input, confThreshold, nmsThreshold, inpWidth, inpHeight, model, draw)); return rcpp_result_gen; END_RCPP } diff --git a/src/text.cpp b/src/text.cpp index d2b151e..05f0540 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -1,6 +1,5 @@ #include "util.hpp" #include -#include #include using namespace cv; @@ -57,13 +56,14 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, // [[Rcpp::export]] XPtrMat - text_detection(XPtrMat ptr, float confThreshold,float nmsThreshold, + text_detection(XPtrMat input, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { if (model.empty()) Rcpp::stop("No model defined"); - Mat frame = get_mat(ptr); + Mat inp = get_mat(input); + // Load network. Net net = readNet(model); @@ -73,10 +73,10 @@ XPtrMat outNames[0] = "feature_fusion/Conv_7/Sigmoid"; outNames[1] = "feature_fusion/concat_3"; - Mat blob; - // Mat frame, blob; + Mat frame, blob; std::vector indices; + frame = inp.clone(); blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false); From 3cf07e14aac84b1a1dcc7e6842c5157334994de8 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sun, 5 Apr 2020 14:26:14 +0200 Subject: [PATCH 05/10] cleanup --- R/text.R | 34 +++++++++++++++++++++++++------ man/ocv_text.Rd | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ man/opencv.Rd | 5 +---- 3 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 man/ocv_text.Rd diff --git a/R/text.R b/R/text.R index 869018e..902883b 100644 --- a/R/text.R +++ b/R/text.R @@ -1,12 +1,34 @@ -#' @export -#' @rdname opencv -#' @param image Path to input image or video file. Skip this argument to capture frames from a camera. -#' @param width Preprocess input image by resizing to a specific width. It should be multiple by 32. -#' @param height Preprocess input image by resizing to a specific height. It should be multiple by 32. +#' ocv_text text detection in images using EAST +#' +#' @description OpenCV sample using EAST (Efficient and Accurate Scene Text +#' Detector) to detect text in images. Requires the EAST pb-model not included +#' in the package. +#' +#' @param image Path to input image or video file. Skip this argument to capture +#' frames from a camera. +#' @param width Preprocess input image by resizing to a specific width. It +#' should be multiple by 32. +#' @param height Preprocess input image by resizing to a specific height. It +#' should be multiple by 32. #' @param thrs Confidence threshold. #' @param nms Non-maximum suppression threshold. -#' #' @param model Path to a binary .pb file contains trained network. +#' @param draw Draws visual output to the image. +#' +#' @examples +#' \dontrun{ +#' url <- paste0('https://upload.wikimedia.org/wikipedia/commons/6/6f/', +#' 'Keep-calm-and-carry-on-scan.jpg') +#' fl <- ocv_read(url) +#' +#' ocv_text(image = fl, thrs = 0.7, nms = 0.3, +#' model = "frozen_east_text_detection.pb") +#' } +#' +#' @references +#' https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp +#' +#' @export ocv_text <- function(image, thrs = 0.5, nms = 20, width = 320, height = 320, model, draw = FALSE){ diff --git a/man/ocv_text.Rd b/man/ocv_text.Rd new file mode 100644 index 0000000..c9b193c --- /dev/null +++ b/man/ocv_text.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/text.R +\name{ocv_text} +\alias{ocv_text} +\title{ocv_text text detection in images using EAST} +\usage{ +ocv_text( + image, + thrs = 0.5, + nms = 20, + width = 320, + height = 320, + model, + draw = FALSE +) +} +\arguments{ +\item{image}{Path to input image or video file. Skip this argument to capture +frames from a camera.} + +\item{thrs}{Confidence threshold.} + +\item{nms}{Non-maximum suppression threshold.} + +\item{width}{Preprocess input image by resizing to a specific width. It +should be multiple by 32.} + +\item{height}{Preprocess input image by resizing to a specific height. It +should be multiple by 32.} + +\item{model}{Path to a binary .pb file contains trained network.} + +\item{draw}{Draws visual output to the image.} +} +\description{ +OpenCV sample using EAST (Efficient and Accurate Scene Text +Detector) to detect text in images. Requires the EAST pb-model not included +in the package. +} +\examples{ +\dontrun{ +url <- paste0('https://upload.wikimedia.org/wikipedia/commons/6/6f/', + 'Keep-calm-and-carry-on-scan.jpg') +fl <- ocv_read(url) + +ocv_text(image = fl, thrs = 0.7, nms = 0.3, + model = "frozen_east_text_detection.pb") +} + +} +\references{ +https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp +} diff --git a/man/opencv.Rd b/man/opencv.Rd index 3daaf0b..f88d0f5 100644 --- a/man/opencv.Rd +++ b/man/opencv.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/face.R, R/opencv.R, R/text.R +% Please edit documentation in R/face.R, R/opencv.R \name{ocv_face} \alias{ocv_face} \alias{ocv_facemask} @@ -21,7 +21,6 @@ \alias{ocv_copyto} \alias{ocv_display} \alias{ocv_video} -\alias{ocv_text} \title{OpenCV Computer Vision} \usage{ ocv_face(image) @@ -63,8 +62,6 @@ ocv_copyto(image, target, mask) ocv_display(image) ocv_video(filter) - -ocv_text(image, thrs = 0.5, nms = 20, width = 320, height = 320, model) } \arguments{ \item{image}{a ocv image object} From 422f7c0802900a5dd4825506be7fce356c08b6ff Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Mon, 6 Apr 2020 20:10:57 +0200 Subject: [PATCH 06/10] dnn requires 3.4.3 --- src/text.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/text.cpp b/src/text.cpp index 05f0540..e285afb 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -9,6 +9,9 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, std::vector& detections, std::vector& confidences) { +#if CV_VERSION_MAJOR < 3 and CV_VERSION_MINOR < 4 and CV_VERSION_REVISION < 3 + throw std::runtime_error("createBackgroundSubtractorMOG2 requires OpenCV 3 or newer"); +#else detections.clear(); CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); CV_Assert(scores.size[0] == 1); CV_Assert(geometry.size[0] == 1); @@ -51,6 +54,7 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, confidences.push_back(score); } } +#endif } @@ -59,6 +63,9 @@ XPtrMat text_detection(XPtrMat input, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { +#if CV_VERSION_MAJOR < 3 and CV_VERSION_MINOR < 4 and CV_VERSION_REVISION < 3 + throw std::runtime_error("createBackgroundSubtractorMOG2 requires OpenCV 3 or newer"); +#else if (model.empty()) Rcpp::stop("No model defined"); @@ -156,4 +163,5 @@ XPtrMat ); return out; +#endif } From fb28f9e5fc24e6a26a818a36c3a01a3b48856e62 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Mon, 6 Apr 2020 20:48:41 +0200 Subject: [PATCH 07/10] dnn requires 3.4.3 --- src/text.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index e285afb..7cfd654 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -1,16 +1,19 @@ #include "util.hpp" #include + +#if CV_VERSION_MAJOR >= 3 && CV_VERSION_MINOR >= 4 && CV_VERSION_REVISION >= 3 #include +#endif using namespace cv; -using namespace cv::dnn; +// using namespace cv::dnn; void decode(const Mat& scores, const Mat& geometry, float scoreThresh, std::vector& detections, std::vector& confidences) { -#if CV_VERSION_MAJOR < 3 and CV_VERSION_MINOR < 4 and CV_VERSION_REVISION < 3 - throw std::runtime_error("createBackgroundSubtractorMOG2 requires OpenCV 3 or newer"); +#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 + throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); #else detections.clear(); CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); @@ -63,8 +66,8 @@ XPtrMat text_detection(XPtrMat input, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { -#if CV_VERSION_MAJOR < 3 and CV_VERSION_MINOR < 4 and CV_VERSION_REVISION < 3 - throw std::runtime_error("createBackgroundSubtractorMOG2 requires OpenCV 3 or newer"); +#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 + throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); #else if (model.empty()) Rcpp::stop("No model defined"); @@ -73,7 +76,7 @@ XPtrMat // Load network. - Net net = readNet(model); + cv::dnn::Net net = cv::dnn::readNet(model); std::vector outs; std::vector outNames(2); @@ -85,8 +88,8 @@ XPtrMat frame = inp.clone(); - blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), - Scalar(123.68, 116.78, 103.94), true, false); + cv::dnn::blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), + Scalar(123.68, 116.78, 103.94), true, false); net.setInput(blob); net.forward(outs, outNames); @@ -99,7 +102,8 @@ XPtrMat decode(scores, geometry, confThreshold, boxes, confidences); // Apply non-maximum suppression procedure. - NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + + cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); Rcpp::IntegerVector x1vec(indices.size()); Rcpp::IntegerVector y1vec(indices.size()); From 0c986eee126d2003bf4e35f5cbe92178cf288421 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Fri, 10 Apr 2020 13:59:30 +0200 Subject: [PATCH 08/10] dnn requires 3.4.3 or newer with DNN --- src/text.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index 7cfd654..cd519c0 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -1,19 +1,18 @@ #include "util.hpp" #include -#if CV_VERSION_MAJOR >= 3 && CV_VERSION_MINOR >= 4 && CV_VERSION_REVISION >= 3 -#include +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" #endif using namespace cv; -// using namespace cv::dnn; void decode(const Mat& scores, const Mat& geometry, float scoreThresh, std::vector& detections, std::vector& confidences) { -#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 - throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); +#ifndef HAVE_OPENCV_DNN + throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN"); #else detections.clear(); CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); @@ -66,8 +65,8 @@ XPtrMat text_detection(XPtrMat input, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { -#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 - throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); +#ifndef HAVE_OPENCV_DNN + throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN"); #else if (model.empty()) Rcpp::stop("No model defined"); From bf708c03d3c6be472a08035f900a27b08652a865 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Fri, 10 Apr 2020 13:59:30 +0200 Subject: [PATCH 09/10] dnn requires 3.4.3 or newer with DNN --- src/text.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index 7cfd654..33bb6f8 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -1,19 +1,22 @@ #include "util.hpp" #include -#if CV_VERSION_MAJOR >= 3 && CV_VERSION_MINOR >= 4 && CV_VERSION_REVISION >= 3 -#include +#define OPENCV_VERSION (CV_VERSION_MAJOR * 10000 \ ++ CV_VERSION_MINOR * 100 \ ++ CV_VERSION_REVISION) + +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" #endif using namespace cv; -// using namespace cv::dnn; void decode(const Mat& scores, const Mat& geometry, float scoreThresh, std::vector& detections, std::vector& confidences) { -#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 - throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); +#if !defined(HAVE_OPENCV_DNN) || OPENCV_VERSION < 30403 + throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN"); #else detections.clear(); CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); @@ -66,8 +69,8 @@ XPtrMat text_detection(XPtrMat input, float confThreshold,float nmsThreshold, int inpWidth, int inpHeight, std::string model, bool draw) { -#if CV_VERSION_MAJOR < 3 && CV_VERSION_MINOR < 4 && CV_VERSION_REVISION < 3 - throw std::runtime_error("ocv_text requires OpenCV 3.4.3 or newer"); +#if !defined(HAVE_OPENCV_DNN) || OPENCV_VERSION < 30403 + throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN"); #else if (model.empty()) Rcpp::stop("No model defined"); From 653ee42e65ab0d5a3d61165f2558d9eb9cb05ad6 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Fri, 10 Apr 2020 15:09:01 +0200 Subject: [PATCH 10/10] clean up --- R/text.R | 5 ++--- man/ocv_text.Rd | 5 ++--- src/text.cpp | 23 ++++++++++------------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/R/text.R b/R/text.R index 902883b..b12a3b2 100644 --- a/R/text.R +++ b/R/text.R @@ -4,13 +4,12 @@ #' Detector) to detect text in images. Requires the EAST pb-model not included #' in the package. #' -#' @param image Path to input image or video file. Skip this argument to capture -#' frames from a camera. +#' @param image opencv image to be processed. #' @param width Preprocess input image by resizing to a specific width. It #' should be multiple by 32. #' @param height Preprocess input image by resizing to a specific height. It #' should be multiple by 32. -#' @param thrs Confidence threshold. +#' @param thrs Confidence threshold between 0 and 1. #' @param nms Non-maximum suppression threshold. #' @param model Path to a binary .pb file contains trained network. #' @param draw Draws visual output to the image. diff --git a/man/ocv_text.Rd b/man/ocv_text.Rd index c9b193c..475bf78 100644 --- a/man/ocv_text.Rd +++ b/man/ocv_text.Rd @@ -15,10 +15,9 @@ ocv_text( ) } \arguments{ -\item{image}{Path to input image or video file. Skip this argument to capture -frames from a camera.} +\item{image}{opencv image to be processed.} -\item{thrs}{Confidence threshold.} +\item{thrs}{Confidence threshold between 0 and 1.} \item{nms}{Non-maximum suppression threshold.} diff --git a/src/text.cpp b/src/text.cpp index 33bb6f8..a25c7e0 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -72,24 +72,21 @@ XPtrMat #if !defined(HAVE_OPENCV_DNN) || OPENCV_VERSION < 30403 throw std::runtime_error("ocv_text req. OpenCV 3.4.3 or newer with DNN"); #else - if (model.empty()) - Rcpp::stop("No model defined"); - Mat inp = get_mat(input); + Mat frame, blob; + Mat inp = get_mat(input); + frame = inp.clone(); - // Load network. - cv::dnn::Net net = cv::dnn::readNet(model); + std::vector indices; std::vector outs; std::vector outNames(2); outNames[0] = "feature_fusion/Conv_7/Sigmoid"; outNames[1] = "feature_fusion/concat_3"; - Mat frame, blob; - std::vector indices; - - frame = inp.clone(); + // Load network. + cv::dnn::Net net = cv::dnn::readNet(model); cv::dnn::blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false); @@ -105,9 +102,9 @@ XPtrMat decode(scores, geometry, confThreshold, boxes, confidences); // Apply non-maximum suppression procedure. - cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + // initiate output vetors Rcpp::IntegerVector x1vec(indices.size()); Rcpp::IntegerVector y1vec(indices.size()); Rcpp::IntegerVector x2vec(indices.size()); @@ -117,7 +114,6 @@ XPtrMat Rcpp::IntegerVector x4vec(indices.size()); Rcpp::IntegerVector y4vec(indices.size()); - // Render detections. Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight); for (size_t i = 0; i < indices.size(); ++i) @@ -141,13 +137,14 @@ XPtrMat x4vec.at(i) = vertices[3].x; y4vec.at(i) = vertices[3].y; + // draws boxes to image if (draw) for (int j = 0; j < 4; ++j) line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1); } if (draw) { - // Put efficiency information. + // Draws efficiency information to the image. std::vector layersTimes; double freq = getTickFrequency() / 1000; double t = net.getPerfProfile(layersTimes) / freq; @@ -156,7 +153,7 @@ XPtrMat FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); } - + // prepare output XPtrMat out = cvmat_xptr(frame); out.attr("indices") = Rcpp::DataFrame::create( Rcpp::_["x1"] = x1vec,