From 16a341653e27267450e69dc0122d0e5df3be8fd8 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Fri, 14 Feb 2025 19:09:45 +0100 Subject: [PATCH 01/29] now its stable and fixed so the angle is calculated from the line, and x vector chosen to be the vector 90 degrees on z vector and rotated by angle about z vector --- src/valve_detection.cpp | 47 ++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index 9448161..48e0334 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -348,18 +348,37 @@ void ValveDetectionNode::process_and_publish_image( double angle = std::atan2(longest_line[3] - longest_line[1], longest_line[2] - longest_line[0]); - // ANGLE TO QUATERNION WITH RESPECT TO PLANE NORMAL. + Eigen::Vector3f plane_normal(coefficients->values[0], coefficients->values[1], coefficients->values[2]); + plane_normal.normalize(); - // Publish detected line - geometry_msgs::msg::PoseStamped line_pose_msg; - line_pose_msg.header.stamp = depth_image->header.stamp; - line_pose_msg.header.frame_id = "zed_left_camera_frame"; - line_pose_msg.pose.position.x = centroid.x(); - line_pose_msg.pose.position.y = centroid.y(); - line_pose_msg.pose.position.z = centroid.z(); + // Find an arbitrary vector perpendicular to the plane normal + Eigen::Vector3f perp_vector = (std::abs(plane_normal.z()) < 0.99) + ? Eigen::Vector3f(0, 0, 1).cross(plane_normal).normalized() + : Eigen::Vector3f(1, 0, 0).cross(plane_normal).normalized(); + + // Rotate perp_vector by `angle` around `plane_normal` to get x-axis + Eigen::AngleAxisf rotation(angle, plane_normal); + Eigen::Vector3f x_axis = rotation * perp_vector; // Rotated vector in-plane + + // Filter the x-axis direction to maintain consistency + if (filter_direction_.dot(x_axis) < 0) { + x_axis = -x_axis; // Flip to maintain consistency + } + + // Update filter_direction_ + filter_direction_ = x_axis; + + // Compute y-axis (perpendicular to both x and z) + Eigen::Vector3f y_axis = plane_normal.cross(x_axis).normalized(); + + // Construct rotation matrix + Eigen::Matrix3f rotation_matrix; + rotation_matrix.col(0) = x_axis; + rotation_matrix.col(1) = y_axis; + rotation_matrix.col(2) = plane_normal; // z-axis is the plane normal - Eigen::Quaternionf quaternion; - quaternion.setFromTwoVectors(Eigen::Vector3f(1, 0, 0), line_direction.normalized()); + // Convert rotation matrix to quaternion + Eigen::Quaternionf quaternion(rotation_matrix); if (std::isnan(quaternion.x()) || std::isnan(quaternion.y()) || std::isnan(quaternion.z()) || std::isnan(quaternion.w())) { @@ -367,6 +386,14 @@ void ValveDetectionNode::process_and_publish_image( return; } + geometry_msgs::msg::PoseStamped line_pose_msg; + line_pose_msg.header.stamp = color_image->header.stamp; + line_pose_msg.header.frame_id = "zed_left_camera_frame"; + + line_pose_msg.pose.position.x = centroid[0]; + line_pose_msg.pose.position.y = centroid[1]; + line_pose_msg.pose.position.z = centroid[2]; + line_pose_msg.pose.orientation.x = quaternion.x(); line_pose_msg.pose.orientation.y = quaternion.y(); line_pose_msg.pose.orientation.z = quaternion.z(); From 69a8a4205023744290742f29302a80ca808fe07c Mon Sep 17 00:00:00 2001 From: ole Date: Fri, 7 Mar 2025 17:06:08 +0100 Subject: [PATCH 02/29] added docstrings --- include/valve_detection/valve_detection.hpp | 58 +++++ src/valve_detection.cpp | 275 +------------------- 2 files changed, 62 insertions(+), 271 deletions(-) diff --git a/include/valve_detection/valve_detection.hpp b/include/valve_detection/valve_detection.hpp index 78f830c..8818100 100644 --- a/include/valve_detection/valve_detection.hpp +++ b/include/valve_detection/valve_detection.hpp @@ -91,6 +91,11 @@ class ValveDetectionNode : public rclcpp::Node double height_scalar_; double width_scalar_; + /** + * @brief Callback function for synchronized depth image, color image, and 2D detections. + * + * This function is triggered when synchronized messages for a depth image, a color image, and a 2D detection array + */ void synchronized_callback(const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, const sensor_msgs::msg::Image::ConstSharedPtr & color_image, const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections); @@ -100,11 +105,64 @@ class ValveDetectionNode : public rclcpp::Node // Utility functions + /** + * @brief Computes the height and width scalars based on the camera information. + * + * This function calculates the scaling factors (`height_scalar_` and `width_scalar_`) + * by comparing the dimensions of two camera frames: + * - The YOLO color camera frame (`camera_info_yolo_color_`). + * - The reference camera frame (`camera_info_`). + * + */ void compute_height_width_scalars(); + + /** + * @brief Main logic executed when two images are synchronized. + * + * This function processes synchronized depth and color images along with 2D detections to calculate the angle of the valve handle + * and its orientation in 3D space. It uses RANSAC line detection to determine the handle's angle and RANSAC plane detection + * to find the normal vector of the plane in which the valve handle lies. + * + * The function performs the following steps: + * 1. Converts the depth and color images from ROS messages to OpenCV matrices. + * 2. Selects the most centered detection from the 2D detection array. + * 3. Projects the detected bounding box into 3D space using the depth image and camera intrinsics. + * 4. Uses RANSAC plane detection to find the plane normal vector and filters points near the plane. + * 5. Detects the valve handle's orientation using Hough line detection on the color image. + * 6. Computes the 3D pose of the valve handle and publishes it as a `geometry_msgs::msg::PoseStamped` message. + * + * @param[in] depth_image A shared pointer to the depth image message. + * @param[in] color_image A shared pointer to the color image message. + * @param[in] detections A shared pointer to the 2D detection array message. + * + * @pre The `camera_info_` and `camera_info_yolo_color_` member variables must be initialized with valid camera intrinsic parameters. + * @pre The input images and detections must be synchronized (i.e., they correspond to the same timestamp or frame). + * @post If successful, the function publishes the following: + * - A point cloud of the detected valve handle region. + * - The normal vector of the detected plane. + * - The 3D pose of the valve handle. + * @post If unsuccessful, the function logs warnings or errors and returns early. + * + * @note The function assumes that the input images and detections are synchronized. + * @note The function uses RANSAC for robust plane and line detection, which is suitable for noisy sensor data. + * @note The function handles edge cases such as invalid depth values, empty detections, and failed RANSAC fits. + */ void process_and_publish_image(const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, const sensor_msgs::msg::Image::ConstSharedPtr & color_image, const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections); + + /** + * @brief Projects a 2D pixel coordinate and depth value into a 3D point in the camera frame. + * + * This function converts a pixel coordinate `(u, v)` and its corresponding depth value into a 3D point + */ void project_pixel_to_3d(int u, int v, float depth, pcl::PointXYZ &point); + + /** + * @brief Projects a 3D point in the camera frame to 2D pixel coordinates. + * + * This function converts a 3D point `(x, y, z)` in the camera frame into 2D pixel coordinates `(u, v)` + */ void project_3d_to_pixel(float x, float y, float z, int &u, int &v); }; diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index 48e0334..0df9a5f 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -130,7 +130,6 @@ void ValveDetectionNode::synchronized_callback( } - void ValveDetectionNode::process_and_publish_image( const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, const sensor_msgs::msg::Image::ConstSharedPtr & color_image, @@ -153,8 +152,8 @@ void ValveDetectionNode::process_and_publish_image( // The most centred of the two most likely variables are chosen as a boundingbox. // Calculate distance to center (cx, cy) - double cx = camera_info_->k[2]; // k[2] = cx - double cy = camera_info_->k[5]; // k[5] = cy + double cx = camera_info_->k[2]; + double cy = camera_info_->k[5]; if (detections->detections.size() > 1) { float dist_first = std::sqrt(std::pow(detections->detections[0].bbox.center.position.x - cx, 2) + @@ -190,11 +189,7 @@ void ValveDetectionNode::process_and_publish_image( float depth_value = cv_depth_image.at(y, x); pcl::PointXYZ point; project_pixel_to_3d(x, y, depth_value, point); - // if (y == center_y && x == center_x) - // { - // RCLCPP_INFO(this->get_logger(), "center 2d point: (%d, %d, %f)", x, y, depth_value); - // RCLCPP_INFO(this->get_logger(), "Center 3d point: (%.3f, %.3f, %.3f)", point.x, point.y, point.z); - // } + if (depth_value > 0) // Check for valid depth { cloud->points.push_back(point); @@ -421,266 +416,4 @@ void ValveDetectionNode::process_and_publish_image( } } -// void ValveDetectionNode::process_and_publish_image(const sensor_msgs::msg::Image::ConstSharedPtr & image,const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections) -// { -// std::string target_frame = "zed_left_camera_frame"; -// if (!camera_info_received_ || !camera_info_received_yolo_color_) -// { -// return; -// } - -// try -// { -// cv::Mat cv_image = cv_bridge::toCvCopy(image, "32FC1")->image; - -// if (!detections->detections.empty()) -// { -// const auto* first_detection = &detections->detections[0]; -// // The most centred of the two most likely variables are chosen as a boundingbox. -// // Calculate distance to center (cx, cy) -// double cx = camera_info_->k[2]; // k[2] = cx -// double cy = camera_info_->k[5]; // k[5] = cy -// if (detections->detections.size() > 1) -// { -// float dist_first = std::sqrt(std::pow(detections->detections[0].bbox.center.position.x - cx, 2) + -// std::pow(detections->detections[0].bbox.center.position.y - cy, 2)); - -// float dist_second = std::sqrt(std::pow(detections->detections[1].bbox.center.position.x - cx, 2) + -// std::pow(detections->detections[1].bbox.center.position.y - cy, 2)); - -// // Select the detection that is closer to the center. -// if (dist_first > dist_second) { -// first_detection = &detections->detections[1]; -// } -// } - -// int center_x = static_cast(first_detection->bbox.center.position.x / this->width_scalar_); -// int center_y = static_cast((first_detection->bbox.center.position.y -140) / this->height_scalar_); -// int width = static_cast(first_detection->bbox.size_x / this->width_scalar_); -// int height = static_cast(first_detection->bbox.size_y / this->height_scalar_); - - - -// pcl::PointCloud::Ptr cloud(new pcl::PointCloud); -// bool points_added = false; - -// for (int y = center_y - (height / 2); y <= center_y + (height / 2); ++y) -// { -// for (int x = center_x - (width / 2); x <= center_x + (width / 2); ++x) -// { -// int dx = x - center_x; -// int dy = y - center_y; -// if (dx * dx / (float)((width / 2) * (width / 2)) + dy * dy / (float)((height / 2) * (height / 2)) <= 1.0) -// { -// if (y >= 0 && y < cv_image.rows && x >= 0 && x < cv_image.cols) -// { -// float depth_value = cv_image.at(y, x); -// pcl::PointXYZ point; -// project_pixel_to_3d(x, y, depth_value, point); -// // if (y == center_y && x == center_x) -// // { -// // RCLCPP_INFO(this->get_logger(), "center 2d point: (%d, %d, %f)", x, y, depth_value); -// // RCLCPP_INFO(this->get_logger(), "Center 3d point: (%.3f, %.3f, %.3f)", point.x, point.y, point.z); -// // } -// if (depth_value > 0) // Check for valid depth -// { -// cloud->points.push_back(point); -// points_added = true; -// } -// } -// } -// } -// } - - -// if (points_added && cloud->points.size() > 0) -// { - - - - -// sensor_msgs::msg::PointCloud2 cloud_msg; -// pcl::toROSMsg(*cloud, cloud_msg); -// cloud_msg.header.stamp = image->header.stamp; -// cloud_msg.header.frame_id = target_frame; - -// pointcloud_pub_->publish(cloud_msg); - -// pcl::SACSegmentation seg; -// seg.setOptimizeCoefficients(true); -// seg.setModelType(pcl::SACMODEL_PLANE); -// seg.setMethodType(pcl::SAC_RANSAC); -// seg.setDistanceThreshold(0.01); - -// pcl::ModelCoefficients::Ptr coefficients(new pcl::ModelCoefficients); -// pcl::PointIndices::Ptr inliers(new pcl::PointIndices); -// seg.setInputCloud(cloud); -// seg.segment(*inliers, *coefficients); - -// if (inliers->indices.size() > 0) -// { -// geometry_msgs::msg::Vector3 normal_msg; -// normal_msg.x = coefficients->values[0]; -// normal_msg.y = coefficients->values[1]; -// normal_msg.z = coefficients->values[2]; -// plane_normal_pub_->publish(normal_msg); - -// RCLCPP_INFO(this->get_logger(), "RANSAC successful: Plane normal (%.3f, %.3f, %.3f)", -// coefficients->values[0], coefficients->values[1], coefficients->values[2]); - -// pcl::PointCloud::Ptr near_plane_cloud(new pcl::PointCloud); - -// // Select points within a distance threshold of the plane model -// pcl::ExtractIndices extract; -// extract.setInputCloud(cloud); -// extract.setIndices(inliers); -// extract.setNegative(false); // Select the inliers -// extract.filter(*near_plane_cloud); - - -// // Now we can use `selectWithinDistance` to select inliers that are within the distance threshold -// pcl::PointIndices::Ptr selected_inliers(new pcl::PointIndices()); -// float threshold_distance = 0.02; // Example threshold (3 cm) - -// // Logic for selecting points close to the threshold_distance of the plane. -// for (size_t i = 0; i < cloud->points.size(); ++i) { -// const auto& point = cloud->points[i]; -// float distance = std::abs(coefficients->values[0] * point.x + -// coefficients->values[1] * point.y + -// coefficients->values[2] * point.z + -// coefficients->values[3]) / -// std::sqrt(coefficients->values[0] * coefficients->values[0] + -// coefficients->values[1] * coefficients->values[1] + -// coefficients->values[2] * coefficients->values[2]); -// if (distance < threshold_distance) { -// selected_inliers->indices.push_back(i); -// } -// } -// RCLCPP_INFO(this->get_logger(), "Selected inlier size: %zu", selected_inliers->indices.size()); - -// // Extract the selected inliers -// pcl::PointCloud::Ptr final_inliers_cloud(new pcl::PointCloud); -// pcl::PointCloud::Ptr near_plane_cloud_copy(new pcl::PointCloud); - -// for (const auto& idx : selected_inliers->indices) -// { -// // Convert 3D point (x, y) to image space (u, v) -// int u, v; -// const auto& point = cloud->points[idx]; -// project_3d_to_pixel(point.x, point.y, point.z, u, v); - - -// // Check if the point (u, v) is inside the smaller ellipse -// int dx = u - center_x; -// int dy = v - center_y; - -// // Check if point is inside the smaller ellipse in image space -// float a = static_cast(width) / 6.0f; // Semi-major axis -// float b = static_cast(height) / 6.0f; // Semi-minor axis - -// if ((dx * dx) / (a * a) + (dy * dy) / (b * b) <= 1.0f) { -// // Point is inside the ellipse, add it to the near_plane_cloud -// final_inliers_cloud->points.push_back(cloud->points[idx]); -// } - - -// } - -// near_plane_cloud_copy = near_plane_cloud; -// near_plane_cloud = final_inliers_cloud; - -// RCLCPP_INFO(this->get_logger(), "near_plane_points size: %zu", near_plane_cloud->points.size()); - -// if (near_plane_cloud->points.size() > 0) -// { -// sensor_msgs::msg::PointCloud2 near_plane_cloud_msg; -// pcl::toROSMsg(*near_plane_cloud, near_plane_cloud_msg); -// near_plane_cloud_msg.header.stamp = image->header.stamp; -// near_plane_cloud_msg.header.frame_id = target_frame; -// near_plane_cloud_pub_->publish(near_plane_cloud_msg); // Publish near plane cloud - - -// Eigen::Vector4f centroid; -// pcl::compute3DCentroid(*near_plane_cloud, centroid); - -// Eigen::Matrix3f covariance; -// pcl::computeCovarianceMatrixNormalized(*near_plane_cloud, centroid, covariance); - -// Eigen::SelfAdjointEigenSolver eigen_solver(covariance, Eigen::ComputeEigenvectors); - -// Eigen::Vector3f line_direction = eigen_solver.eigenvectors().col(2); - -// // Filter the line direction to maintain consistency -// if (filter_direction_.dot(line_direction) < 0) { -// line_direction = -line_direction; // Flip to maintain consistency -// } - -// filter_direction_ = line_direction; - -// geometry_msgs::msg::PoseStamped line_pose_msg; -// line_pose_msg.header.stamp = image->header.stamp; -// line_pose_msg.header.frame_id = target_frame; - -// line_pose_msg.pose.position.x = centroid[0]; -// line_pose_msg.pose.position.y = centroid[1]; -// line_pose_msg.pose.position.z = centroid[2]; - -// Eigen::Quaternionf quaternion; -// quaternion.setFromTwoVectors(Eigen::Vector3f(1, 0, 0), line_direction); - -// line_pose_msg.pose.orientation.x = quaternion.x(); -// line_pose_msg.pose.orientation.y = quaternion.y(); -// line_pose_msg.pose.orientation.z = quaternion.z(); -// line_pose_msg.pose.orientation.w = quaternion.w(); - -// line_pose_pub_->publish(line_pose_msg); -// RCLCPP_INFO(this->get_logger(), "Line fitted and published."); - -// // Publish line points as a PointCloud2 message -// pcl::PointCloud::Ptr line_cloud(new pcl::PointCloud); -// const int num_points = 100; // Number of points along the line - -// for (int i = -num_points / 2; i <= num_points / 2; ++i) -// { -// pcl::PointXYZ line_point; -// float scale = i * 0.01; // Adjust the scale for length - -// line_point.x = centroid[0] + scale * line_direction[0]; -// line_point.y = centroid[1] + scale * line_direction[1]; -// line_point.z = centroid[2] + scale * line_direction[2]; -// line_cloud->points.push_back(line_point); -// } - -// // Convert to ROS message and publish -// sensor_msgs::msg::PointCloud2 line_cloud_msg; -// pcl::toROSMsg(*line_cloud, line_cloud_msg); -// line_cloud_msg.header.stamp = image->header.stamp; -// line_cloud_msg.header.frame_id = target_frame; -// line_points_pub_->publish(line_cloud_msg); - -// // RCLCPP_INFO(this->get_logger(), "Line points published as PointCloud2."); -// } -// } -// else -// { -// RCLCPP_WARN(this->get_logger(), "No plane inliers found."); -// } -// } -// else -// { -// RCLCPP_WARN(this->get_logger(), "No valid points found for RANSAC."); -// } -// } - -// auto processed_msg = cv_bridge::CvImage(image->header, "32FC1", cv_image).toImageMsg(); -// processed_image_pub_->publish(*processed_msg); -// } -// catch (cv_bridge::Exception &e) -// { -// RCLCPP_ERROR(this->get_logger(), "CvBridge Error: %s", e.what()); -// } -// } - - -RCLCPP_COMPONENTS_REGISTER_NODE(ValveDetectionNode) - +RCLCPP_COMPONENTS_REGISTER_NODE(ValveDetectionNode) \ No newline at end of file From d95465b9344d9a2171d563fc914590336391d440 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Fri, 7 Mar 2025 18:45:07 +0100 Subject: [PATCH 03/29] fixed so the image is bublished always, with less lag by placing the publisher outside the sync checks --- package.xml | 3 +-- src/valve_detection.cpp | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/package.xml b/package.xml index 7247b06..ea94fd4 100644 --- a/package.xml +++ b/package.xml @@ -16,9 +16,8 @@ vision_msgs geometry_msgs std_msgs - PCL pcl_conversions - opencv + ament_lint_auto ament_lint_common diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index 0df9a5f..55910ba 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -402,13 +402,50 @@ void ValveDetectionNode::process_and_publish_image( { RCLCPP_WARN(this->get_logger(), "No lines detected in bounding box."); } + if (!cv_color_image.empty()) { + // Draw the bounding box on the color image + int x1 = std::max(center_x - width / 2, 0); + int y1 = std::max(center_y - height / 2, 0); + int x2 = std::min(center_x + width / 2, cv_color_image.cols - 1); + int y2 = std::min(center_y + height / 2, cv_color_image.rows - 1); + + cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); + + // Draw the detected line on the color image + if (!lines.empty()) { + cv::Vec4i longest_line = lines[0]; + double max_length = 0; + + for (const auto &line : lines) { + double length = std::hypot(line[2] - line[0], line[3] - line[1]); + if (length > max_length) { + max_length = length; + longest_line = line; + } + } + + // Adjust line coordinates to the full image size + cv::Point pt1(longest_line[0] + x1, longest_line[1] + y1); + cv::Point pt2(longest_line[2] + x1, longest_line[3] + y1); + + // Draw the line on the color image + cv::line(cv_color_image, pt1, pt2, cv::Scalar(0, 0, 255), 2); + } + } + } else { RCLCPP_WARN(this->get_logger(), "No plane inliers found."); } - } } + } + // Convert the edited OpenCV image back to a ROS message + sensor_msgs::msg::Image::SharedPtr edited_image_msg = cv_bridge::CvImage(color_image->header, "bgr8", cv_color_image).toImageMsg(); + + // Publish the edited image + processed_image_pub_->publish(*edited_image_msg); + } catch (cv_bridge::Exception &e) { From b54fb3b63bdc0b613258d8690328d6e2323ff8e9 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Sun, 16 Mar 2025 12:20:12 +0100 Subject: [PATCH 04/29] fixed offset in line plot. TODO: roi is square, should be sircle --- config/valve_detection_params.yaml | 3 +- include/valve_detection/valve_detection.hpp | 2 + src/valve_detection.cpp | 42 ++++++++++++++------- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index aba82f4..d5de105 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -3,5 +3,6 @@ depth_image_sub_topic: "/zed_node/depth/depth_registered" color_image_sub_topic: "/image_rect" detections_sub_topic: "/detections_output" - + + line_detection_area: 4 diff --git a/include/valve_detection/valve_detection.hpp b/include/valve_detection/valve_detection.hpp index 8818100..089ad03 100644 --- a/include/valve_detection/valve_detection.hpp +++ b/include/valve_detection/valve_detection.hpp @@ -91,6 +91,8 @@ class ValveDetectionNode : public rclcpp::Node double height_scalar_; double width_scalar_; + int line_detection_area_; + /** * @brief Callback function for synchronized depth image, color image, and 2D detections. * diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index 55910ba..449d8c7 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -11,6 +11,8 @@ ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions & options) auto color_image_sub_topic = this->declare_parameter("color_image_sub_topic"); auto detections_sub_topic = this->declare_parameter("detections_sub_topic"); + line_detection_area_ = this->declare_parameter("line_detection_area", 5); + rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)).reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); depth_image_sub_.subscribe(this, depth_image_sub_topic, qos.get_rmw_qos_profile()); color_image_sub_.subscribe(this, color_image_sub_topic, qos.get_rmw_qos_profile()); @@ -32,7 +34,7 @@ ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions & options) processed_image_pub_ = this->create_publisher("yolov8_valve_detection_image", 10); plane_normal_pub_ = this->create_publisher("plane_normal", 10); pointcloud_pub_ = this->create_publisher("point_cloud", 10); - line_pose_pub_ = this->create_publisher("line_pose", 10); + line_pose_pub_ = this->create_publisher("valve_pose", 10); line_points_pub_ = this->create_publisher("line_points", 10); near_plane_cloud_pub_ = this->create_publisher("near_plane_cloud", 10); } @@ -306,10 +308,12 @@ void ValveDetectionNode::process_and_publish_image( return; } - int x1 = std::max(center_x - width / 2, 0); - int y1 = std::max(center_y - height / 2, 0); - int x2 = std::min(center_x + width / 2, cv_color_image.cols - 1); - int y2 = std::min(center_y + height / 2, cv_color_image.rows - 1); + int x1 = std::max(center_x - width * line_detection_area_/10 , 0); + int y1 = std::max(center_y - height * line_detection_area_/10 , 0); + int x2 = std::min(center_x + width * line_detection_area_/10 , cv_color_image.cols - 1); + int y2 = std::min(center_y + height * line_detection_area_/10 , cv_color_image.rows - 1); + // Draw roi for debugging + cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); // Extract ROI (region of interest) from the color image cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); @@ -324,7 +328,7 @@ void ValveDetectionNode::process_and_publish_image( // Detect lines using Hough Transform std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 50, 50, 10); + cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 10); if (!lines.empty()) { @@ -415,7 +419,8 @@ void ValveDetectionNode::process_and_publish_image( if (!lines.empty()) { cv::Vec4i longest_line = lines[0]; double max_length = 0; - + + // Find the longest line in the ROI for (const auto &line : lines) { double length = std::hypot(line[2] - line[0], line[3] - line[1]); if (length > max_length) { @@ -423,13 +428,24 @@ void ValveDetectionNode::process_and_publish_image( longest_line = line; } } - - // Adjust line coordinates to the full image size - cv::Point pt1(longest_line[0] + x1, longest_line[1] + y1); - cv::Point pt2(longest_line[2] + x1, longest_line[3] + y1); - - // Draw the line on the color image + + // Adjust line coordinates to the full image size using the same x1 ... as roi calculation + int x1 = std::max(center_x - width * line_detection_area_/10 , 0); + int y1 = std::max(center_y - height * line_detection_area_/10 , 0); + int x2 = std::min(center_x + width * line_detection_area_/10 , cv_color_image.cols - 1); + int y2 = std::min(center_y + height * line_detection_area_/10 , cv_color_image.rows - 1); + cv::Point pt1(longest_line[0] + x1, longest_line[1] + y1); // Add ROI offset + cv::Point pt2(longest_line[2] + x1, longest_line[3] + y1); // Add ROI offset + + // Draw the line on the full color image cv::line(cv_color_image, pt1, pt2, cv::Scalar(0, 0, 255), 2); + + // Debugging output + RCLCPP_INFO(this->get_logger(), "ROI Coordinates: x1=%d, y1=%d, x2=%d, y2=%d", x1, y1, x2, y2); + RCLCPP_INFO(this->get_logger(), "Longest Line in ROI: (%d, %d) -> (%d, %d)", + longest_line[0], longest_line[1], longest_line[2], longest_line[3]); + RCLCPP_INFO(this->get_logger(), "Adjusted Line in Full Image: (%d, %d) -> (%d, %d)", + pt1.x, pt1.y, pt2.x, pt2.y); } } From e8babaf75e9b9481d44b228c2846fdbc896793b9 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Sun, 16 Mar 2025 12:57:37 +0100 Subject: [PATCH 05/29] sphere roi added, could have bad offset, has not ben tested :/ --- src/valve_detection.cpp | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index 449d8c7..f51fb80 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -312,12 +312,31 @@ void ValveDetectionNode::process_and_publish_image( int y1 = std::max(center_y - height * line_detection_area_/10 , 0); int x2 = std::min(center_x + width * line_detection_area_/10 , cv_color_image.cols - 1); int y2 = std::min(center_y + height * line_detection_area_/10 , cv_color_image.rows - 1); - // Draw roi for debugging - cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); + // // Draw roi for debugging + // cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); - // Extract ROI (region of interest) from the color image + // // Extract ROI (region of interest) from the color image + // cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + + + // Define the ellipse parameters + cv::Point center((x1 + x2) / 2, (y1 + y2) / 2); // Center of the ellipse + cv::Size axes((x2 - x1) / 2, (y2 - y1) / 2); // Semi-major and semi-minor axes + + // Create a mask for the elliptical ROI + cv::Mat mask = cv::Mat::zeros(cv_color_image.size(), CV_8UC1); + cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); // Draw a filled ellipse + + // Apply the mask to the ROI cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + cv::Mat masked_roi; + cv::bitwise_and(roi, roi, masked_roi, mask(cv::Rect(x1, y1, x2 - x1, y2 - y1))); + + // For debugging: Draw the ellipse on the original image + cv::ellipse(cv_color_image, center, axes, 0, 0, 360, cv::Scalar(0, 255, 0), 2); + + // Convert to grayscale for edge detection cv::Mat gray; cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); From 90099b0067dcba0174eb093d709a7f33d64bc1f3 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Thu, 3 Apr 2025 19:38:37 +0200 Subject: [PATCH 06/29] commit stuff --- src/valve_detection.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index f51fb80..f953b05 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -308,10 +308,10 @@ void ValveDetectionNode::process_and_publish_image( return; } - int x1 = std::max(center_x - width * line_detection_area_/10 , 0); - int y1 = std::max(center_y - height * line_detection_area_/10 , 0); - int x2 = std::min(center_x + width * line_detection_area_/10 , cv_color_image.cols - 1); - int y2 = std::min(center_y + height * line_detection_area_/10 , cv_color_image.rows - 1); + int x1 = std::max(center_x - width * line_detection_area_/100 , 0); + int y1 = std::max(center_y - height * line_detection_area_/100 , 0); + int x2 = std::min(center_x + width * line_detection_area_/100 , cv_color_image.cols - 1); + int y2 = std::min(center_y + height * line_detection_area_/100 , cv_color_image.rows - 1); // // Draw roi for debugging // cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); @@ -449,10 +449,10 @@ void ValveDetectionNode::process_and_publish_image( } // Adjust line coordinates to the full image size using the same x1 ... as roi calculation - int x1 = std::max(center_x - width * line_detection_area_/10 , 0); - int y1 = std::max(center_y - height * line_detection_area_/10 , 0); - int x2 = std::min(center_x + width * line_detection_area_/10 , cv_color_image.cols - 1); - int y2 = std::min(center_y + height * line_detection_area_/10 , cv_color_image.rows - 1); + int x1 = std::max(center_x - width * line_detection_area_/100 , 0); + int y1 = std::max(center_y - height * line_detection_area_/100 , 0); + int x2 = std::min(center_x + width * line_detection_area_/100 , cv_color_image.cols - 1); + int y2 = std::min(center_y + height * line_detection_area_/100 , cv_color_image.rows - 1); cv::Point pt1(longest_line[0] + x1, longest_line[1] + y1); // Add ROI offset cv::Point pt2(longest_line[2] + x1, longest_line[3] + y1); // Add ROI offset From 1e93a624dc5d74e4b1e1d2c82b4044075730eea3 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Fri, 4 Apr 2025 18:12:27 +0200 Subject: [PATCH 07/29] canny edge params update --- config/valve_detection_params.yaml | 2 +- src/valve_detection.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index d5de105..3cfa44e 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -4,5 +4,5 @@ color_image_sub_topic: "/image_rect" detections_sub_topic: "/detections_output" - line_detection_area: 4 + line_detection_area: 25 diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index f953b05..e5d461f 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -343,7 +343,7 @@ void ValveDetectionNode::process_and_publish_image( // Apply Canny edge detection cv::Mat edges; - cv::Canny(gray, edges, 50, 150, 3); + cv::Canny(gray, edges, 50, 250, 3); // Detect lines using Hough Transform std::vector lines; From d50261d3f57f276cd8205e32f6843282cec5275b Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Sun, 6 Apr 2025 13:21:11 +0200 Subject: [PATCH 08/29] visualisation of canny edge detection, tuned to work at the office. Works well in good ligting, but when it is covered by a shadow the edge detection does not detect shit roflcopter --- include/valve_detection/valve_detection.hpp | 1 + src/valve_detection.cpp | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/valve_detection/valve_detection.hpp b/include/valve_detection/valve_detection.hpp index 089ad03..f2124eb 100644 --- a/include/valve_detection/valve_detection.hpp +++ b/include/valve_detection/valve_detection.hpp @@ -72,6 +72,7 @@ class ValveDetectionNode : public rclcpp::Node rclcpp::Publisher::SharedPtr line_pose_pub_; rclcpp::Publisher::SharedPtr line_points_pub_; rclcpp::Publisher::SharedPtr near_plane_cloud_pub_; + rclcpp::Publisher::SharedPtr canny_debug_image_pub_; // Stored messages sensor_msgs::msg::CameraInfo::SharedPtr camera_info_; diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index e5d461f..e23cdcd 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -37,6 +37,8 @@ ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions & options) line_pose_pub_ = this->create_publisher("valve_pose", 10); line_points_pub_ = this->create_publisher("line_points", 10); near_plane_cloud_pub_ = this->create_publisher("near_plane_cloud", 10); + canny_debug_image_pub_ = this->create_publisher("canny_valve_detection_image", 10); + } Eigen::Vector3f ValveDetectionNode::filter_direction_ = Eigen::Vector3f(1, 0, 0); @@ -343,11 +345,12 @@ void ValveDetectionNode::process_and_publish_image( // Apply Canny edge detection cv::Mat edges; - cv::Canny(gray, edges, 50, 250, 3); + cv::Canny(gray, edges, 100, 250, 3); + canny_debug_image_pub_->publish(*cv_bridge::CvImage(color_image->header, "mono8", edges).toImageMsg()); // Detect lines using Hough Transform std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 10); + cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 5); if (!lines.empty()) { From fe49ebe8ca4bbce7ec5b674d8082466630793434 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 6 Apr 2025 12:16:45 +0000 Subject: [PATCH 09/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- config/valve_detection_params.yaml | 2 +- include/valve_detection/valve_detection.hpp | 99 +-- src/valve_detection.cpp | 674 +++++++++++--------- 3 files changed, 450 insertions(+), 325 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 87e22bd..41186aa 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -3,5 +3,5 @@ depth_image_sub_topic: "/zed_node/depth/depth_registered" color_image_sub_topic: "/image_rect" detections_sub_topic: "/detections_output" - + line_detection_area: 25 diff --git a/include/valve_detection/valve_detection.hpp b/include/valve_detection/valve_detection.hpp index aa062b1..06b324f 100644 --- a/include/valve_detection/valve_detection.hpp +++ b/include/valve_detection/valve_detection.hpp @@ -72,10 +72,14 @@ class ValveDetectionNode : public rclcpp::Node { rclcpp::Publisher::SharedPtr processed_image_pub_; rclcpp::Publisher::SharedPtr plane_normal_pub_; rclcpp::Publisher::SharedPtr pointcloud_pub_; - rclcpp::Publisher::SharedPtr line_pose_pub_; - rclcpp::Publisher::SharedPtr line_points_pub_; - rclcpp::Publisher::SharedPtr near_plane_cloud_pub_; - rclcpp::Publisher::SharedPtr canny_debug_image_pub_; + rclcpp::Publisher::SharedPtr + line_pose_pub_; + rclcpp::Publisher::SharedPtr + line_points_pub_; + rclcpp::Publisher::SharedPtr + near_plane_cloud_pub_; + rclcpp::Publisher::SharedPtr + canny_debug_image_pub_; // Stored messages sensor_msgs::msg::CameraInfo::SharedPtr camera_info_; @@ -98,13 +102,16 @@ class ValveDetectionNode : public rclcpp::Node { int line_detection_area_; /** - * @brief Callback function for synchronized depth image, color image, and 2D detections. + * @brief Callback function for synchronized depth image, color image, and + * 2D detections. * - * This function is triggered when synchronized messages for a depth image, a color image, and a 2D detection array + * This function is triggered when synchronized messages for a depth image, + * a color image, and a 2D detection array */ - void synchronized_callback(const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr & color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections); + void synchronized_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); // Callback functions void camera_info_callback( const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); @@ -113,64 +120,82 @@ class ValveDetectionNode : public rclcpp::Node { // Utility functions /** - * @brief Computes the height and width scalars based on the camera information. + * @brief Computes the height and width scalars based on the camera + * information. * - * This function calculates the scaling factors (`height_scalar_` and `width_scalar_`) - * by comparing the dimensions of two camera frames: + * This function calculates the scaling factors (`height_scalar_` and + * `width_scalar_`) by comparing the dimensions of two camera frames: * - The YOLO color camera frame (`camera_info_yolo_color_`). * - The reference camera frame (`camera_info_`). * - */ + */ void compute_height_width_scalars(); /** * @brief Main logic executed when two images are synchronized. * - * This function processes synchronized depth and color images along with 2D detections to calculate the angle of the valve handle - * and its orientation in 3D space. It uses RANSAC line detection to determine the handle's angle and RANSAC plane detection - * to find the normal vector of the plane in which the valve handle lies. + * This function processes synchronized depth and color images along with 2D + * detections to calculate the angle of the valve handle and its orientation + * in 3D space. It uses RANSAC line detection to determine the handle's + * angle and RANSAC plane detection to find the normal vector of the plane + * in which the valve handle lies. * * The function performs the following steps: - * 1. Converts the depth and color images from ROS messages to OpenCV matrices. + * 1. Converts the depth and color images from ROS messages to OpenCV + * matrices. * 2. Selects the most centered detection from the 2D detection array. - * 3. Projects the detected bounding box into 3D space using the depth image and camera intrinsics. - * 4. Uses RANSAC plane detection to find the plane normal vector and filters points near the plane. - * 5. Detects the valve handle's orientation using Hough line detection on the color image. - * 6. Computes the 3D pose of the valve handle and publishes it as a `geometry_msgs::msg::PoseStamped` message. + * 3. Projects the detected bounding box into 3D space using the depth image + * and camera intrinsics. + * 4. Uses RANSAC plane detection to find the plane normal vector and + * filters points near the plane. + * 5. Detects the valve handle's orientation using Hough line detection on + * the color image. + * 6. Computes the 3D pose of the valve handle and publishes it as a + * `geometry_msgs::msg::PoseStamped` message. * * @param[in] depth_image A shared pointer to the depth image message. * @param[in] color_image A shared pointer to the color image message. * @param[in] detections A shared pointer to the 2D detection array message. * - * @pre The `camera_info_` and `camera_info_yolo_color_` member variables must be initialized with valid camera intrinsic parameters. - * @pre The input images and detections must be synchronized (i.e., they correspond to the same timestamp or frame). + * @pre The `camera_info_` and `camera_info_yolo_color_` member variables + * must be initialized with valid camera intrinsic parameters. + * @pre The input images and detections must be synchronized (i.e., they + * correspond to the same timestamp or frame). * @post If successful, the function publishes the following: * - A point cloud of the detected valve handle region. * - The normal vector of the detected plane. * - The 3D pose of the valve handle. - * @post If unsuccessful, the function logs warnings or errors and returns early. + * @post If unsuccessful, the function logs warnings or errors and returns + * early. * - * @note The function assumes that the input images and detections are synchronized. - * @note The function uses RANSAC for robust plane and line detection, which is suitable for noisy sensor data. - * @note The function handles edge cases such as invalid depth values, empty detections, and failed RANSAC fits. + * @note The function assumes that the input images and detections are + * synchronized. + * @note The function uses RANSAC for robust plane and line detection, which + * is suitable for noisy sensor data. + * @note The function handles edge cases such as invalid depth values, empty + * detections, and failed RANSAC fits. */ - void process_and_publish_image(const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr & color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections); + void process_and_publish_image( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); /** - * @brief Projects a 2D pixel coordinate and depth value into a 3D point in the camera frame. + * @brief Projects a 2D pixel coordinate and depth value into a 3D point in + * the camera frame. * - * This function converts a pixel coordinate `(u, v)` and its corresponding depth value into a 3D point - */ - void project_pixel_to_3d(int u, int v, float depth, pcl::PointXYZ &point); + * This function converts a pixel coordinate `(u, v)` and its corresponding + * depth value into a 3D point + */ + void project_pixel_to_3d(int u, int v, float depth, pcl::PointXYZ& point); /** * @brief Projects a 3D point in the camera frame to 2D pixel coordinates. * - * This function converts a 3D point `(x, y, z)` in the camera frame into 2D pixel coordinates `(u, v)` - */ - void project_3d_to_pixel(float x, float y, float z, int &u, int &v); + * This function converts a 3D point `(x, y, z)` in the camera frame into 2D + * pixel coordinates `(u, v)` + */ + void project_3d_to_pixel(float x, float y, float z, int& u, int& v); }; #endif // DETECTION_IMAGE_PROCESSOR_HPP diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index c30fcba..b8142b2 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -4,22 +4,31 @@ using std::placeholders::_1; using std::placeholders::_2; using std::placeholders::_3; -ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions & options) - : Node("valve_detection_node", options) -{ - auto depth_image_sub_topic = this->declare_parameter("depth_image_sub_topic"); - auto color_image_sub_topic = this->declare_parameter("color_image_sub_topic"); - auto detections_sub_topic = this->declare_parameter("detections_sub_topic"); - - line_detection_area_ = this->declare_parameter("line_detection_area", 5); - - rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)).reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - depth_image_sub_.subscribe(this, depth_image_sub_topic, qos.get_rmw_qos_profile()); - color_image_sub_.subscribe(this, color_image_sub_topic, qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, qos.get_rmw_qos_profile()); - - sync_ = std::make_shared>(MySyncPolicy(10), depth_image_sub_, color_image_sub_, detections_sub_); - sync_->registerCallback(std::bind(&ValveDetectionNode::synchronized_callback, this, _1, _2, _3)); +ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions& options) + : Node("valve_detection_node", options) { + auto depth_image_sub_topic = + this->declare_parameter("depth_image_sub_topic"); + auto color_image_sub_topic = + this->declare_parameter("color_image_sub_topic"); + auto detections_sub_topic = + this->declare_parameter("detections_sub_topic"); + + line_detection_area_ = + this->declare_parameter("line_detection_area", 5); + + rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + depth_image_sub_.subscribe(this, depth_image_sub_topic, + qos.get_rmw_qos_profile()); + color_image_sub_.subscribe(this, color_image_sub_topic, + qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + + sync_ = std::make_shared>( + MySyncPolicy(10), depth_image_sub_, color_image_sub_, detections_sub_); + sync_->registerCallback(std::bind( + &ValveDetectionNode::synchronized_callback, this, _1, _2, _3)); // Create subscriptions with synchronized callbacks camera_info_subscription_yolo_color_ = @@ -29,18 +38,27 @@ ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions & options) this, std::placeholders::_1)); // Create subscriptions with synchronized callbacks - camera_info_subscription_ = this->create_subscription( - "/zed_node/depth/camera_info", 10, - std::bind(&ValveDetectionNode::camera_info_callback, this, std::placeholders::_1)); - - processed_image_pub_ = this->create_publisher("yolov8_valve_detection_image", 10); - plane_normal_pub_ = this->create_publisher("plane_normal", 10); - pointcloud_pub_ = this->create_publisher("point_cloud", 10); - line_pose_pub_ = this->create_publisher("valve_pose", 10); - line_points_pub_ = this->create_publisher("line_points", 10); - near_plane_cloud_pub_ = this->create_publisher("near_plane_cloud", 10); - canny_debug_image_pub_ = this->create_publisher("canny_valve_detection_image", 10); - + camera_info_subscription_ = + this->create_subscription( + "/zed_node/depth/camera_info", 10, + std::bind(&ValveDetectionNode::camera_info_callback, this, + std::placeholders::_1)); + + processed_image_pub_ = this->create_publisher( + "yolov8_valve_detection_image", 10); + plane_normal_pub_ = + this->create_publisher("plane_normal", 10); + pointcloud_pub_ = this->create_publisher( + "point_cloud", 10); + line_pose_pub_ = this->create_publisher( + "valve_pose", 10); + line_points_pub_ = this->create_publisher( + "line_points", 10); + near_plane_cloud_pub_ = + this->create_publisher( + "near_plane_cloud", 10); + canny_debug_image_pub_ = this->create_publisher( + "canny_valve_detection_image", 10); } Eigen::Vector3f ValveDetectionNode::filter_direction_ = @@ -141,36 +159,43 @@ void ValveDetectionNode::synchronized_callback( } void ValveDetectionNode::process_and_publish_image( - const sensor_msgs::msg::Image::ConstSharedPtr & depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr & color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr & detections) -{ - if (!camera_info_received_ || !camera_info_received_yolo_color_) - { + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + if (!camera_info_received_ || !camera_info_received_yolo_color_) { return; } - try - { + try { // Convert depth and color images - cv::Mat cv_depth_image = cv_bridge::toCvCopy(depth_image, "32FC1")->image; - cv::Mat cv_color_image = cv_bridge::toCvCopy(color_image, "bgr8")->image; - - if (!detections->detections.empty()) - { - const auto *first_detection = &detections->detections[0]; - - // The most centred of the two most likely variables are chosen as a boundingbox. - // Calculate distance to center (cx, cy) - double cx = camera_info_->k[2]; - double cy = camera_info_->k[5]; - if (detections->detections.size() > 1) - { - float dist_first = std::sqrt(std::pow(detections->detections[0].bbox.center.position.x - cx, 2) + - std::pow(detections->detections[0].bbox.center.position.y - cy, 2)); - - float dist_second = std::sqrt(std::pow(detections->detections[1].bbox.center.position.x - cx, 2) + - std::pow(detections->detections[1].bbox.center.position.y - cy, 2)); + cv::Mat cv_depth_image = + cv_bridge::toCvCopy(depth_image, "32FC1")->image; + cv::Mat cv_color_image = + cv_bridge::toCvCopy(color_image, "bgr8")->image; + + if (!detections->detections.empty()) { + const auto* first_detection = &detections->detections[0]; + + // The most centred of the two most likely variables are chosen as a + // boundingbox. Calculate distance to center (cx, cy) + double cx = camera_info_->k[2]; + double cy = camera_info_->k[5]; + if (detections->detections.size() > 1) { + float dist_first = std::sqrt( + std::pow( + detections->detections[0].bbox.center.position.x - cx, + 2) + + std::pow( + detections->detections[0].bbox.center.position.y - cy, + 2)); + + float dist_second = std::sqrt( + std::pow( + detections->detections[1].bbox.center.position.x - cx, + 2) + + std::pow( + detections->detections[1].bbox.center.position.y - cy, + 2)); // Select the detection that is closer to the center. if (dist_first > dist_second) { @@ -178,29 +203,36 @@ void ValveDetectionNode::process_and_publish_image( } } - int center_x = static_cast(first_detection->bbox.center.position.x / this->width_scalar_); - int center_y = static_cast((first_detection->bbox.center.position.y - 140) / this->height_scalar_); - int width = static_cast(first_detection->bbox.size_x / this->width_scalar_); - int height = static_cast(first_detection->bbox.size_y / this->height_scalar_); - - pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + int center_x = static_cast( + first_detection->bbox.center.position.x / this->width_scalar_); + int center_y = static_cast( + (first_detection->bbox.center.position.y - 140) / + this->height_scalar_); + int width = static_cast(first_detection->bbox.size_x / + this->width_scalar_); + int height = static_cast(first_detection->bbox.size_y / + this->height_scalar_); + + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); bool points_added = false; - for (int y = center_y - (height / 2); y <= center_y + (height / 2); ++y) - { - for (int x = center_x - (width / 2); x <= center_x + (width / 2); ++x) - { + for (int y = center_y - (height / 2); y <= center_y + (height / 2); + ++y) { + for (int x = center_x - (width / 2); + x <= center_x + (width / 2); ++x) { int dx = x - center_x; int dy = y - center_y; - if (dx * dx / (float)((width / 2) * (width / 2)) + dy * dy / (float)((height / 2) * (height / 2)) <= 1.0) - { - if (y >= 0 && y < cv_depth_image.rows && x >= 0 && x < cv_depth_image.cols) - { + if (dx * dx / (float)((width / 2) * (width / 2)) + + dy * dy / (float)((height / 2) * (height / 2)) <= + 1.0) { + if (y >= 0 && y < cv_depth_image.rows && x >= 0 && + x < cv_depth_image.cols) { float depth_value = cv_depth_image.at(y, x); pcl::PointXYZ point; project_pixel_to_3d(x, y, depth_value, point); - if (depth_value > 0) // Check for valid depth + if (depth_value > 0) // Check for valid depth { cloud->points.push_back(point); points_added = true; @@ -210,12 +242,7 @@ void ValveDetectionNode::process_and_publish_image( } } - if (points_added && cloud->points.size() > 0) - { - - - - + if (points_added && cloud->points.size() > 0) { sensor_msgs::msg::PointCloud2 cloud_msg; pcl::toROSMsg(*cloud, cloud_msg); cloud_msg.header.stamp = depth_image->header.stamp; @@ -229,270 +256,343 @@ void ValveDetectionNode::process_and_publish_image( seg.setMethodType(pcl::SAC_RANSAC); seg.setDistanceThreshold(0.01); - pcl::ModelCoefficients::Ptr coefficients(new pcl::ModelCoefficients); + pcl::ModelCoefficients::Ptr coefficients( + new pcl::ModelCoefficients); pcl::PointIndices::Ptr inliers(new pcl::PointIndices); seg.setInputCloud(cloud); seg.segment(*inliers, *coefficients); - if (inliers->indices.size() > 0) - { + if (inliers->indices.size() > 0) { geometry_msgs::msg::Vector3 normal_msg; normal_msg.x = coefficients->values[0]; normal_msg.y = coefficients->values[1]; normal_msg.z = coefficients->values[2]; plane_normal_pub_->publish(normal_msg); - RCLCPP_INFO(this->get_logger(), "RANSAC successful: Plane normal (%.3f, %.3f, %.3f)", - coefficients->values[0], coefficients->values[1], coefficients->values[2]); + RCLCPP_INFO( + this->get_logger(), + "RANSAC successful: Plane normal (%.3f, %.3f, %.3f)", + coefficients->values[0], coefficients->values[1], + coefficients->values[2]); - pcl::PointCloud::Ptr near_plane_cloud(new pcl::PointCloud); + pcl::PointCloud::Ptr near_plane_cloud( + new pcl::PointCloud); - // Select points within a distance threshold of the plane model + // Select points within a distance threshold of the plane + // model pcl::ExtractIndices extract; extract.setInputCloud(cloud); extract.setIndices(inliers); extract.setNegative(false); // Select the inliers extract.filter(*near_plane_cloud); + // Now we can use `selectWithinDistance` to select inliers + // that are within the distance threshold + pcl::PointIndices::Ptr selected_inliers( + new pcl::PointIndices()); + float threshold_distance = + 0.02; // Example threshold (3 cm) - // Now we can use `selectWithinDistance` to select inliers that are within the distance threshold - pcl::PointIndices::Ptr selected_inliers(new pcl::PointIndices()); - float threshold_distance = 0.02; // Example threshold (3 cm) - - // Logic for selecting points close to the threshold_distance of the plane. + // Logic for selecting points close to the + // threshold_distance of the plane. for (size_t i = 0; i < cloud->points.size(); ++i) { const auto& point = cloud->points[i]; - float distance = std::abs(coefficients->values[0] * point.x + - coefficients->values[1] * point.y + - coefficients->values[2] * point.z + - coefficients->values[3]) / - std::sqrt(coefficients->values[0] * coefficients->values[0] + - coefficients->values[1] * coefficients->values[1] + - coefficients->values[2] * coefficients->values[2]); + float distance = + std::abs(coefficients->values[0] * point.x + + coefficients->values[1] * point.y + + coefficients->values[2] * point.z + + coefficients->values[3]) / + std::sqrt(coefficients->values[0] * + coefficients->values[0] + + coefficients->values[1] * + coefficients->values[1] + + coefficients->values[2] * + coefficients->values[2]); if (distance < threshold_distance) { selected_inliers->indices.push_back(i); } } - RCLCPP_INFO(this->get_logger(), "Selected inlier size: %zu", selected_inliers->indices.size()); + RCLCPP_INFO(this->get_logger(), "Selected inlier size: %zu", + selected_inliers->indices.size()); // Extract the selected inliers - pcl::PointCloud::Ptr final_inliers_cloud(new pcl::PointCloud); - - for (const auto& idx : selected_inliers->indices) - { - // Convert 3D point (x, y) to image space (u, v) - int u, v; - const auto& point = cloud->points[idx]; - project_3d_to_pixel(point.x, point.y, point.z, u, v); - - - // Check if the point (u, v) is inside the smaller ellipse - int dx = u - center_x; - int dy = v - center_y; - - // Check if point is inside the smaller ellipse in image space - float a = static_cast(width) / 6.0f; // Semi-major axis - float b = static_cast(height) / 6.0f; // Semi-minor axis - - if ((dx * dx) / (a * a) + (dy * dy) / (b * b) <= 1.0f) { - // Point is inside the ellipse, add it to the near_plane_cloud - final_inliers_cloud->points.push_back(cloud->points[idx]); - } - - + pcl::PointCloud::Ptr final_inliers_cloud( + new pcl::PointCloud); + + for (const auto& idx : selected_inliers->indices) { + // Convert 3D point (x, y) to image space (u, v) + int u, v; + const auto& point = cloud->points[idx]; + project_3d_to_pixel(point.x, point.y, point.z, u, v); + + // Check if the point (u, v) is inside the smaller + // ellipse + int dx = u - center_x; + int dy = v - center_y; + + // Check if point is inside the smaller ellipse in image + // space + float a = static_cast(width) / + 6.0f; // Semi-major axis + float b = static_cast(height) / + 6.0f; // Semi-minor axis + + if ((dx * dx) / (a * a) + (dy * dy) / (b * b) <= 1.0f) { + // Point is inside the ellipse, add it to the + // near_plane_cloud + final_inliers_cloud->points.push_back( + cloud->points[idx]); + } } near_plane_cloud = final_inliers_cloud; - // Calculate centroid position from near_plane_cloud - Eigen::Vector3f centroid(0, 0, 0); - if (!near_plane_cloud->empty()) { - for (const auto& point : near_plane_cloud->points) { - centroid += Eigen::Vector3f(point.x, point.y, point.z); + // Calculate centroid position from near_plane_cloud + Eigen::Vector3f centroid(0, 0, 0); + if (!near_plane_cloud->empty()) { + for (const auto& point : near_plane_cloud->points) { + centroid += + Eigen::Vector3f(point.x, point.y, point.z); + } + centroid /= near_plane_cloud->points.size(); + } else { + RCLCPP_WARN(this->get_logger(), + "No valid near plane points found."); + return; } - centroid /= near_plane_cloud->points.size(); - } else { - RCLCPP_WARN(this->get_logger(), "No valid near plane points found."); - return; - } - - int x1 = std::max(center_x - width * line_detection_area_/100 , 0); - int y1 = std::max(center_y - height * line_detection_area_/100 , 0); - int x2 = std::min(center_x + width * line_detection_area_/100 , cv_color_image.cols - 1); - int y2 = std::min(center_y + height * line_detection_area_/100 , cv_color_image.rows - 1); - // // Draw roi for debugging - // cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); - - // // Extract ROI (region of interest) from the color image - // cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - - - // Define the ellipse parameters - cv::Point center((x1 + x2) / 2, (y1 + y2) / 2); // Center of the ellipse - cv::Size axes((x2 - x1) / 2, (y2 - y1) / 2); // Semi-major and semi-minor axes - - // Create a mask for the elliptical ROI - cv::Mat mask = cv::Mat::zeros(cv_color_image.size(), CV_8UC1); - cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); // Draw a filled ellipse - - // Apply the mask to the ROI - cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - cv::Mat masked_roi; - cv::bitwise_and(roi, roi, masked_roi, mask(cv::Rect(x1, y1, x2 - x1, y2 - y1))); - - // For debugging: Draw the ellipse on the original image - cv::ellipse(cv_color_image, center, axes, 0, 0, 360, cv::Scalar(0, 255, 0), 2); + int x1 = std::max( + center_x - width * line_detection_area_ / 100, 0); + int y1 = std::max( + center_y - height * line_detection_area_ / 100, 0); + int x2 = + std::min(center_x + width * line_detection_area_ / 100, + cv_color_image.cols - 1); + int y2 = + std::min(center_y + height * line_detection_area_ / 100, + cv_color_image.rows - 1); + // // Draw roi for debugging + // cv::rectangle(cv_color_image, cv::Point(x1, y1), + // cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); + + // // Extract ROI (region of interest) from the color image + // cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 + // - y1)); + + // Define the ellipse parameters + cv::Point center((x1 + x2) / 2, + (y1 + y2) / 2); // Center of the ellipse + cv::Size axes( + (x2 - x1) / 2, + (y2 - y1) / 2); // Semi-major and semi-minor axes + + // Create a mask for the elliptical ROI + cv::Mat mask = + cv::Mat::zeros(cv_color_image.size(), CV_8UC1); + cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), + -1); // Draw a filled ellipse + + // Apply the mask to the ROI + cv::Mat roi = + cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + cv::Mat masked_roi; + cv::bitwise_and(roi, roi, masked_roi, + mask(cv::Rect(x1, y1, x2 - x1, y2 - y1))); + + // For debugging: Draw the ellipse on the original image + cv::ellipse(cv_color_image, center, axes, 0, 0, 360, + cv::Scalar(0, 255, 0), 2); + + // Convert to grayscale for edge detection + cv::Mat gray; + cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); + + // Apply Canny edge detection + cv::Mat edges; + cv::Canny(gray, edges, 100, 250, 3); + canny_debug_image_pub_->publish( + *cv_bridge::CvImage(color_image->header, "mono8", edges) + .toImageMsg()); + + // Detect lines using Hough Transform + std::vector lines; + cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 5); - - // Convert to grayscale for edge detection - cv::Mat gray; - cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); - - // Apply Canny edge detection - cv::Mat edges; - cv::Canny(gray, edges, 100, 250, 3); - canny_debug_image_pub_->publish(*cv_bridge::CvImage(color_image->header, "mono8", edges).toImageMsg()); - - // Detect lines using Hough Transform - std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 5); - - if (!lines.empty()) - { - cv::Vec4i longest_line = lines[0]; - double max_length = 0; + if (!lines.empty()) { + cv::Vec4i longest_line = lines[0]; + double max_length = 0; - for (const auto &line : lines) - { - double length = std::hypot(line[2] - line[0], line[3] - line[1]); - if (length > max_length) - { - max_length = length; - longest_line = line; + for (const auto& line : lines) { + double length = std::hypot(line[2] - line[0], + line[3] - line[1]); + if (length > max_length) { + max_length = length; + longest_line = line; + } } - } - - double angle = std::atan2(longest_line[3] - longest_line[1], longest_line[2] - longest_line[0]); - Eigen::Vector3f plane_normal(coefficients->values[0], coefficients->values[1], coefficients->values[2]); - plane_normal.normalize(); - - // Find an arbitrary vector perpendicular to the plane normal - Eigen::Vector3f perp_vector = (std::abs(plane_normal.z()) < 0.99) - ? Eigen::Vector3f(0, 0, 1).cross(plane_normal).normalized() - : Eigen::Vector3f(1, 0, 0).cross(plane_normal).normalized(); + double angle = + std::atan2(longest_line[3] - longest_line[1], + longest_line[2] - longest_line[0]); + + Eigen::Vector3f plane_normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + plane_normal.normalize(); + + // Find an arbitrary vector perpendicular to the plane + // normal + Eigen::Vector3f perp_vector = + (std::abs(plane_normal.z()) < 0.99) + ? Eigen::Vector3f(0, 0, 1) + .cross(plane_normal) + .normalized() + : Eigen::Vector3f(1, 0, 0) + .cross(plane_normal) + .normalized(); + + // Rotate perp_vector by `angle` around `plane_normal` + // to get x-axis + Eigen::AngleAxisf rotation(angle, plane_normal); + Eigen::Vector3f x_axis = + rotation * perp_vector; // Rotated vector in-plane + + // Filter the x-axis direction to maintain consistency + if (filter_direction_.dot(x_axis) < 0) { + x_axis = -x_axis; // Flip to maintain consistency + } - // Rotate perp_vector by `angle` around `plane_normal` to get x-axis - Eigen::AngleAxisf rotation(angle, plane_normal); - Eigen::Vector3f x_axis = rotation * perp_vector; // Rotated vector in-plane + // Update filter_direction_ + filter_direction_ = x_axis; + + // Compute y-axis (perpendicular to both x and z) + Eigen::Vector3f y_axis = + plane_normal.cross(x_axis).normalized(); + + // Construct rotation matrix + Eigen::Matrix3f rotation_matrix; + rotation_matrix.col(0) = x_axis; + rotation_matrix.col(1) = y_axis; + rotation_matrix.col(2) = + plane_normal; // z-axis is the plane normal + + // Convert rotation matrix to quaternion + Eigen::Quaternionf quaternion(rotation_matrix); + + if (std::isnan(quaternion.x()) || + std::isnan(quaternion.y()) || + std::isnan(quaternion.z()) || + std::isnan(quaternion.w())) { + RCLCPP_WARN(this->get_logger(), + "Invalid quaternion computed, skipping " + "pose publishing."); + return; + } - // Filter the x-axis direction to maintain consistency - if (filter_direction_.dot(x_axis) < 0) { - x_axis = -x_axis; // Flip to maintain consistency - } + geometry_msgs::msg::PoseStamped line_pose_msg; + line_pose_msg.header.stamp = color_image->header.stamp; + line_pose_msg.header.frame_id = "zed_left_camera_frame"; - // Update filter_direction_ - filter_direction_ = x_axis; - - // Compute y-axis (perpendicular to both x and z) - Eigen::Vector3f y_axis = plane_normal.cross(x_axis).normalized(); + line_pose_msg.pose.position.x = centroid[0]; + line_pose_msg.pose.position.y = centroid[1]; + line_pose_msg.pose.position.z = centroid[2]; - // Construct rotation matrix - Eigen::Matrix3f rotation_matrix; - rotation_matrix.col(0) = x_axis; - rotation_matrix.col(1) = y_axis; - rotation_matrix.col(2) = plane_normal; // z-axis is the plane normal + line_pose_msg.pose.orientation.x = quaternion.x(); + line_pose_msg.pose.orientation.y = quaternion.y(); + line_pose_msg.pose.orientation.z = quaternion.z(); + line_pose_msg.pose.orientation.w = quaternion.w(); - // Convert rotation matrix to quaternion - Eigen::Quaternionf quaternion(rotation_matrix); + line_pose_pub_->publish(line_pose_msg); - if (std::isnan(quaternion.x()) || std::isnan(quaternion.y()) || - std::isnan(quaternion.z()) || std::isnan(quaternion.w())) { - RCLCPP_WARN(this->get_logger(), "Invalid quaternion computed, skipping pose publishing."); - return; + RCLCPP_INFO(this->get_logger(), + "Detected line published."); + } else { + RCLCPP_WARN(this->get_logger(), + "No lines detected in bounding box."); } - - geometry_msgs::msg::PoseStamped line_pose_msg; - line_pose_msg.header.stamp = color_image->header.stamp; - line_pose_msg.header.frame_id = "zed_left_camera_frame"; - - line_pose_msg.pose.position.x = centroid[0]; - line_pose_msg.pose.position.y = centroid[1]; - line_pose_msg.pose.position.z = centroid[2]; - - line_pose_msg.pose.orientation.x = quaternion.x(); - line_pose_msg.pose.orientation.y = quaternion.y(); - line_pose_msg.pose.orientation.z = quaternion.z(); - line_pose_msg.pose.orientation.w = quaternion.w(); - - line_pose_pub_->publish(line_pose_msg); - - RCLCPP_INFO(this->get_logger(), "Detected line published."); - } - else - { - RCLCPP_WARN(this->get_logger(), "No lines detected in bounding box."); - } - if (!cv_color_image.empty()) { - // Draw the bounding box on the color image - int x1 = std::max(center_x - width / 2, 0); - int y1 = std::max(center_y - height / 2, 0); - int x2 = std::min(center_x + width / 2, cv_color_image.cols - 1); - int y2 = std::min(center_y + height / 2, cv_color_image.rows - 1); - - cv::rectangle(cv_color_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); - - // Draw the detected line on the color image - if (!lines.empty()) { - cv::Vec4i longest_line = lines[0]; - double max_length = 0; - - // Find the longest line in the ROI - for (const auto &line : lines) { - double length = std::hypot(line[2] - line[0], line[3] - line[1]); - if (length > max_length) { - max_length = length; - longest_line = line; + if (!cv_color_image.empty()) { + // Draw the bounding box on the color image + int x1 = std::max(center_x - width / 2, 0); + int y1 = std::max(center_y - height / 2, 0); + int x2 = std::min(center_x + width / 2, + cv_color_image.cols - 1); + int y2 = std::min(center_y + height / 2, + cv_color_image.rows - 1); + + cv::rectangle(cv_color_image, cv::Point(x1, y1), + cv::Point(x2, y2), cv::Scalar(0, 255, 0), + 2); + + // Draw the detected line on the color image + if (!lines.empty()) { + cv::Vec4i longest_line = lines[0]; + double max_length = 0; + + // Find the longest line in the ROI + for (const auto& line : lines) { + double length = std::hypot(line[2] - line[0], + line[3] - line[1]); + if (length > max_length) { + max_length = length; + longest_line = line; + } } + + // Adjust line coordinates to the full image size + // using the same x1 ... as roi calculation + int x1 = std::max( + center_x - width * line_detection_area_ / 100, + 0); + int y1 = std::max( + center_y - height * line_detection_area_ / 100, + 0); + int x2 = std::min( + center_x + width * line_detection_area_ / 100, + cv_color_image.cols - 1); + int y2 = std::min( + center_y + height * line_detection_area_ / 100, + cv_color_image.rows - 1); + cv::Point pt1( + longest_line[0] + x1, + longest_line[1] + y1); // Add ROI offset + cv::Point pt2( + longest_line[2] + x1, + longest_line[3] + y1); // Add ROI offset + + // Draw the line on the full color image + cv::line(cv_color_image, pt1, pt2, + cv::Scalar(0, 0, 255), 2); + + // Debugging output + RCLCPP_INFO( + this->get_logger(), + "ROI Coordinates: x1=%d, y1=%d, x2=%d, y2=%d", + x1, y1, x2, y2); + RCLCPP_INFO( + this->get_logger(), + "Longest Line in ROI: (%d, %d) -> (%d, %d)", + longest_line[0], longest_line[1], + longest_line[2], longest_line[3]); + RCLCPP_INFO(this->get_logger(), + "Adjusted Line in Full Image: (%d, %d) " + "-> (%d, %d)", + pt1.x, pt1.y, pt2.x, pt2.y); } - - // Adjust line coordinates to the full image size using the same x1 ... as roi calculation - int x1 = std::max(center_x - width * line_detection_area_/100 , 0); - int y1 = std::max(center_y - height * line_detection_area_/100 , 0); - int x2 = std::min(center_x + width * line_detection_area_/100 , cv_color_image.cols - 1); - int y2 = std::min(center_y + height * line_detection_area_/100 , cv_color_image.rows - 1); - cv::Point pt1(longest_line[0] + x1, longest_line[1] + y1); // Add ROI offset - cv::Point pt2(longest_line[2] + x1, longest_line[3] + y1); // Add ROI offset - - // Draw the line on the full color image - cv::line(cv_color_image, pt1, pt2, cv::Scalar(0, 0, 255), 2); - - // Debugging output - RCLCPP_INFO(this->get_logger(), "ROI Coordinates: x1=%d, y1=%d, x2=%d, y2=%d", x1, y1, x2, y2); - RCLCPP_INFO(this->get_logger(), "Longest Line in ROI: (%d, %d) -> (%d, %d)", - longest_line[0], longest_line[1], longest_line[2], longest_line[3]); - RCLCPP_INFO(this->get_logger(), "Adjusted Line in Full Image: (%d, %d) -> (%d, %d)", - pt1.x, pt1.y, pt2.x, pt2.y); } - + } else { + RCLCPP_WARN(this->get_logger(), "No plane inliers found."); } } - else - { - RCLCPP_WARN(this->get_logger(), "No plane inliers found."); - } } - } - // Convert the edited OpenCV image back to a ROS message - sensor_msgs::msg::Image::SharedPtr edited_image_msg = cv_bridge::CvImage(color_image->header, "bgr8", cv_color_image).toImageMsg(); + // Convert the edited OpenCV image back to a ROS message + sensor_msgs::msg::Image::SharedPtr edited_image_msg = + cv_bridge::CvImage(color_image->header, "bgr8", cv_color_image) + .toImageMsg(); - // Publish the edited image - processed_image_pub_->publish(*edited_image_msg); + // Publish the edited image + processed_image_pub_->publish(*edited_image_msg); - } - catch (cv_bridge::Exception &e) - { + } catch (cv_bridge::Exception& e) { RCLCPP_ERROR(this->get_logger(), "CvBridge Error: %s", e.what()); } } From 164af43d944599322fddfb76f2d7f15a00db5197 Mon Sep 17 00:00:00 2001 From: vortex-orin Date: Sun, 13 Apr 2025 14:16:32 +0200 Subject: [PATCH 10/29] fixed compile error --- src/valve_detection.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index b8142b2..b14388b 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -154,8 +154,8 @@ void ValveDetectionNode::synchronized_callback( RCLCPP_INFO(this->get_logger(), "Synchronized image and detection messages received."); - // process_and_publish_image(depth_image, color_image, detections); - process_and_publish_image(depth_image, detections); + process_and_publish_image(depth_image, color_image, detections); + // process_and_publish_image(depth_image, detections); } void ValveDetectionNode::process_and_publish_image( @@ -409,14 +409,14 @@ void ValveDetectionNode::process_and_publish_image( // Apply Canny edge detection cv::Mat edges; - cv::Canny(gray, edges, 100, 250, 3); + cv::Canny(gray, edges, 50, 200, 3); canny_debug_image_pub_->publish( *cv_bridge::CvImage(color_image->header, "mono8", edges) .toImageMsg()); // Detect lines using Hough Transform std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 40, 5); + cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 50, 5); if (!lines.empty()) { cv::Vec4i longest_line = lines[0]; From 08de1b7dcf35807333ccc79f7c33f0c1a31d8b2c Mon Sep 17 00:00:00 2001 From: ole Date: Sun, 13 Apr 2025 15:40:54 +0200 Subject: [PATCH 11/29] tuned canny --- config/valve_detection_params.yaml | 2 +- launch/valve_detection.launch.py | 3 ++- src/valve_detection.cpp | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 41186aa..d0d4299 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -4,4 +4,4 @@ color_image_sub_topic: "/image_rect" detections_sub_topic: "/detections_output" - line_detection_area: 25 + line_detection_area: 22 diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index 1bacfd0..0ea4591 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -16,7 +16,8 @@ def generate_launch_description(): get_package_share_directory('valve_detection'), 'config', 'valve_detection_params.yaml', - ) + ), + {'use_sim_time': True}, ], output='screen', ) diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp index b14388b..482751f 100644 --- a/src/valve_detection.cpp +++ b/src/valve_detection.cpp @@ -409,14 +409,14 @@ void ValveDetectionNode::process_and_publish_image( // Apply Canny edge detection cv::Mat edges; - cv::Canny(gray, edges, 50, 200, 3); + cv::Canny(gray, edges, 50, 100, 3); canny_debug_image_pub_->publish( *cv_bridge::CvImage(color_image->header, "mono8", edges) .toImageMsg()); // Detect lines using Hough Transform std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 50, 5); + cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 45, 5); if (!lines.empty()) { cv::Vec4i longest_line = lines[0]; From 410219db319916d41ff645f485171bf959594bf3 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Sun, 13 Apr 2025 22:14:28 +0200 Subject: [PATCH 12/29] subscribe zed color image directly --- config/valve_detection_params.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index d0d4299..185d440 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -1,7 +1,7 @@ /**: ros__parameters: depth_image_sub_topic: "/zed_node/depth/depth_registered" - color_image_sub_topic: "/image_rect" + color_image_sub_topic: "/zed_node/left/image_rect_color" detections_sub_topic: "/detections_output" line_detection_area: 22 From 04e1d7a1dfebe9eedc8e723aa96b52f2088470f7 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Wed, 20 Aug 2025 22:13:07 +0200 Subject: [PATCH 13/29] base and depth classes --- CMakeLists.txt | 1 - config/depth_image_angle_params.yaml | 27 +++ include/valve_detection/types.hpp | 38 ++++ include/valve_detection/valve_pose_base.hpp | 103 +++++++++ include/valve_detection/valve_pose_depth.hpp | 35 +++ .../valve_pose_depth_ros.hpp | 76 +++++++ src/valve_pose_base.cpp | 181 +++++++++++++++ src/valve_pose_depth.cpp | 82 +++++++ src/valve_pose_depth_ros.cpp | 209 ++++++++++++++++++ src/valve_pose_pcl.cpp | 0 10 files changed, 751 insertions(+), 1 deletion(-) create mode 100644 config/depth_image_angle_params.yaml create mode 100644 include/valve_detection/types.hpp create mode 100644 include/valve_detection/valve_pose_base.hpp create mode 100644 include/valve_detection/valve_pose_depth.hpp create mode 100644 include/valve_detection_ros/valve_pose_depth_ros.hpp create mode 100644 src/valve_pose_base.cpp create mode 100644 src/valve_pose_depth.cpp create mode 100644 src/valve_pose_depth_ros.cpp create mode 100644 src/valve_pose_pcl.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 587d063..de59d4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ ament_target_dependencies(${LIB_NAME} PUBLIC vision_msgs geometry_msgs std_msgs - PCL pcl_conversions ) diff --git a/config/depth_image_angle_params.yaml b/config/depth_image_angle_params.yaml new file mode 100644 index 0000000..bc3d5f3 --- /dev/null +++ b/config/depth_image_angle_params.yaml @@ -0,0 +1,27 @@ +/**: + ros__parameters: + depth_image_sub_topic: "/zed_node/depth/depth_registered" + color_image_sub_topic: "/zed_node/left/image_rect_color" + detections_sub_topic: "/detections_output" + + color_image_info_topic: "/zed_node/left/camera_info" + depth_image_info_topic: "/zed_node/depth/camera_info" + + valve_pose_pub_topic: "/valve_pose" + + yolo_img_width: 640 + yolo_img_height: 640 + + annulus_radius_ratio: 0.8 + + plane_ransac_threshold: 0.01 + plane_ransac_max_iterations: 20 + + valve_handle_offset: 0.05 + + calculate_angle: true + + visualize_detections: true + debug_visualize: false + annulus_pub_topic: "/bbx_annulus_pcl" + plane_normal_pub_topic: "/plane_normal" diff --git a/include/valve_detection/types.hpp b/include/valve_detection/types.hpp new file mode 100644 index 0000000..b42082c --- /dev/null +++ b/include/valve_detection/types.hpp @@ -0,0 +1,38 @@ +#ifndef TYPES_HPP +#define TYPES_HPP + +namespace valve_detection { + +struct CameraIntrinsics { + double fx; + double fy; + double cx; + double cy; +}; + +struct ImageDimensions { + int width; + int height; +}; + +struct ImageProperties { + CameraIntrinsics intr; + ImageDimensions dim; +}; + +struct BoundingBox { + float center_x; + float center_y; + float size_x; + float size_y; + float theta; // Orientation in radians positive counter-clockwise +} + +enum class AngleDetection { + ENABLED, + DISABLED +}; + +} // namespace valve_detection + +#endif // TYPES_HPP \ No newline at end of file diff --git a/include/valve_detection/valve_pose_base.hpp b/include/valve_detection/valve_pose_base.hpp new file mode 100644 index 0000000..f40e1a1 --- /dev/null +++ b/include/valve_detection/valve_pose_base.hpp @@ -0,0 +1,103 @@ +#ifndef VALVE_POSE_BASE_HPP +#define VALVE_POSE_BASE_HPP + +#include "valve_detection/types.hpp" +#include + +namespace valve_detection { + +class ValvePoseBase { +public: + ValvePoseBase(int yolo_img_width, int yolo_img_height_, + float annulus_radius_ratio, + float plane_ransac_threshold, int plane_ransac_max_iterations, + float valve_handle_offset); + virtual ~ValvePoseBase() = default; + + void project_point_to_pixel( + float x, float y, float z, int& u, int& v) const; + + void calculate_letterbox_properties(); + + BoundingBox transform_bounding_box( + const BoundingBox& bbox) const; + + void set_color_image_properties( + const ImageProperties& properties) { + color_image_properties_ = properties; + } + + /** + * @brief Segments the dominant plane from a point cloud using RANSAC. + * @param cloud The input point cloud. + * @param coefficients The output model coefficients (ax + by + cz + d = 0). + * @return True if a plane with inliers was found, false otherwise. + */ + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const; + + Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; + + /** + * @brief Creates a consistent, normalized Eigen::Vector3f from PCL plane coefficients + * and a reference ray. + * + * This function extracts the normal vector (A, B, C) from the plane equation + * Ax + By + Cz + D = 0, normalizes it, and flips its direction if necessary + * to ensure it always points in the same general direction as the reference ray. + * + * @param coefficients A shared pointer to the pcl::ModelCoefficients object. + * @param ray_direction A reference to the Eigen::Vector3f representing the ray. + * @return A normalized Eigen::Vector3f representing the consistent plane normal. + */ + Eigen::Vector3f create_consistent_plane_normal_with_ray( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const; + + + /** + * @brief Finds the intersection point of a ray and a plane. + * + * The ray is defined by its origin (assumed to be the camera's optical center, + * i.e., [0, 0, 0]) and a direction vector. The plane is defined by its coefficients + * Ax + By + Cz + D = 0. + * + * @param coefficients A shared pointer to the pcl::ModelCoefficients object. + * @param ray_direction The normalized Eigen::Vector3f ray direction. + * @return An Eigen::Vector3f representing the 3D intersection point. + */ + Eigen::Vector3f find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const; + + /** + * @brief Shifts a point along a plane normal by a given distance. + * + * This function is useful for moving a point from the detected plane's + * surface to a new location, such as the center of a valve knob. + * The shift is performed along the vector of the consistent plane normal. + * + * @param intersection_point The 3D intersection point on the plane. + * @param plane_normal The consistent, normalized plane normal vector. + * @return The new Eigen::Vector3f point after the shift. + */ + Eigen::Vector3f shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + +protected: + ImageProperties color_image_properties_; + int yolo_img_width_; + int yolo_img_height_; + float annulus_radius_ratio_; + float plane_ransac_threshold_; + int plane_ransac_max_iterations_; + float valve_handle_offset_; + float letterbox_scale_factor_; + int letterbox_pad_x_; + int letterbox_pad_y_; +} +} // namespace valve_detection + +#endif // VALVE_POSE_BASE_HPP \ No newline at end of file diff --git a/include/valve_detection/valve_pose_depth.hpp b/include/valve_detection/valve_pose_depth.hpp new file mode 100644 index 0000000..f826a4c --- /dev/null +++ b/include/valve_detection/valve_pose_depth.hpp @@ -0,0 +1,35 @@ +#ifndef VALVE_POSE_DEPTH_HPP +#define VALVE_POSE_DEPTH_HPP + +#include "valve_detection/valve_pose_base.hpp" +#include +#include +#include + +namespace valve_detection { + +class ValvePoseDepth : public ValvePoseBase { +public: + ValvePoseDepth(int yolo_img_width, int yolo_img_height_, float annulus_radius_ratio, + float plane_ransac_threshold, int plane_ransac_max_iterations); + virtual ~ValvePoseDepth() = default; + + void project_pixel_to_point(int u, + int v, + float depth, + pcl::PointXYZ& point) const; + + void extract_annulus_pcl( + const cv::Mat& depth_image, + const BoundingBox& bbox, + pcl::PointCloud::Ptr& cloud) const; + +private: + ImageProperties depth_image_properties_; + double height_scalar_; + double width_scalar_; +}; + +} // namespace valve_detection + +#endif // VALVE_POSE_DEPTH_HPP \ No newline at end of file diff --git a/include/valve_detection_ros/valve_pose_depth_ros.hpp b/include/valve_detection_ros/valve_pose_depth_ros.hpp new file mode 100644 index 0000000..8e5ce9f --- /dev/null +++ b/include/valve_detection_ros/valve_pose_depth_ros.hpp @@ -0,0 +1,76 @@ +#ifndef VALVE_POSE_DEPTH_ROS_HPP +#define VALVE_POSE_DEPTH_ROS_HPP + +#include "valve_detection/valve_pose_depth.hpp" +#include +#include +#include +#include + +#include +#include +#include + + +namespace valve_detection { + +class ValvePoseDepthNode { +public: + explicit ValvePoseDepthNode(const rclcpp::NodeOptions& options); + virtual ~ValvePoseDepthNode() = default; + + +private: + void color_image_info_callback( + const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); + + BoundingBox to_bounding_box(const vision_msgs::msg::BoundingBox2D& bbox) const; + + void publish_annulus_pcl(const pcl::PointCloud::Ptr& cloud) const; + + /** + * @brief Callback function for synchronized depth image, color image, and + * 2D detections. + * + * This function is triggered when synchronized messages for a depth image, + * a color image, and a 2D detection array are received. + */ + void synchronized_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); + + + rclcpp::Subscription::SharedPtr color_image_info_sub_; + bool color_image_info_received_ = false; + + message_filters::Subscriber depth_image_sub_; + message_filters::Subscriber color_image_sub_; + message_filters::Subscriber + detections_sub_; + + typedef message_filters::sync_policies::ExactTime< + sensor_msgs::msg::Image, + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray> + MySyncPolicy; + + std::shared_ptr> sync_; + + rclcpp::Publisher::SharedPtr + valve_pose_pub_; + + std::unique_ptr valve_detector_; + + std::string color_image_frame_id_; + + bool visualize_detections_ = true; + bool debug_visualize_ = false; + bool calculate_angle_ = true; + rclcpp::Publisher::SharedPtr annulus_pcl_pub_; + rclcpp::Publisher::SharedPtr plane_normal_pub_; +} + +} // namespace valve_detection + +#endif // VALVE_POSE_DEPTH_ROS_HPP \ No newline at end of file diff --git a/src/valve_pose_base.cpp b/src/valve_pose_base.cpp new file mode 100644 index 0000000..b780d0d --- /dev/null +++ b/src/valve_pose_base.cpp @@ -0,0 +1,181 @@ +#include "valve_detection/valve_pose_base.hpp" + +namespace valve_detection { + +ValvePoseBase::ValvePoseBase(int yolo_img_width_, int yolo_img_height_, + float annulus_radius_ratio, + float plane_ransac_threshold, int plane_ransac_max_iterations, + float valve_handle_offset) + : yolo_img_width_(yolo_img_width_), yolo_img_height_(yolo_img_height_), + annulus_radius_ratio_(annulus_radius_ratio), + plane_ransac_threshold_(plane_ransac_threshold), + plane_ransac_max_iterations_(plane_ransac_max_iterations), + valve_handle_offset_(valve_handle_offset) { +} + +void ValvePoseBase::project_point_to_pixel(float x, float y, float z, int& u, int& v) const { + if (z <= 0) { + u = v = -1; // Invalid pixel coordinates + return; + } + + double fx = color_image_properties_.intr.fx; + double fy = color_image_properties_.intr.fy; + double cx = color_image_properties_.intr.cx; + double cy = color_image_properties_.intr.cy; + + u = static_cast((x * fx / z) + cx); + v = static_cast((y * fy / z) + cy); +} + +void ValvePoseBase::calculate_letterbox_padding() { + int org_image_width = color_image_properties_.dim.x; + int org_image_height = color_image_properties_.dim.y; + + letterbox_scale_factor_ = std::min( + static_cast(yolo_input_width) / original_width, + static_cast(yolo_input_height) / original_height); + + double resized_width = original_width * letterbox_scale_factor_; + double resized_height = original_height * letterbox_scale_factor_; + + letterbox_pad_x_ = (yolo_input_width - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_input_height - resized_height) / 2.0; +} + +BoundingBox ValvePoseBase::transform_bounding_box(const BoundingBox& bbox) const { + BoundingBox transformed_bbox = bbox; + + transformed_bbox.center.position.x = (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed_bbox.center.position.y = (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed_bbox.size_x /= letterbox_scale_factor_; + transformed_bbox.size_y /= letterbox_scale_factor_; + + return transformed_bbox; +} + +bool ValvePoseBase::segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const { + + if (cloud->points.empty()) { + return false; + } + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); +} + +Eigen::Vector3f ValvePoseBase::get_ray_direction(const BoundingBox& bbox) const { + float u = bbox.center_x; + float v = bbox.center_y; + + float fx = color_image_properties_.intr.fx; + float fy = color_image_properties_.intr.fy; + float cx = color_image_properties_.intr.cx; + float cy = color_image_properties_.intr.cy; + + float xc = (u - cx) / fx; + float yc = (v - cy) / fy; + + Eigen::Vector3f ray_direction; + ray_direction << xc, yc, 1.0f; + + return ray_direction.normalized(); +} + +Eigen::Vector3f ValvePoseBase::compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) { + + Eigen::Vector3f normal_vector; + + if (!coefficients || coefficients->values.size() < 3) { + return Eigen::Vector3f::Zero(); + } + + normal_vector << coefficients->values[0], coefficients->values[1], coefficients->values[2]; + + normal_vector.normalize(); + + if (normal_vector.dot(ray_direction) > 0.0) { + normal_vector = -normal_vector; + } + + return normal_vector; +} + +/** + * @brief Finds the intersection point of a ray and a plane. + * + * The ray is defined by its origin (The camera's optical center, + * i.e., [0, 0, 0]) and a direction vector. The plane is defined by its coefficients + * Ax + By + Cz + D = 0. + * + * @param coefficients A shared pointer to the pcl::ModelCoefficients object. + * @param ray_direction The normalized Eigen::Vector3f ray direction. + * @return An Eigen::Vector3f representing the 3D intersection point. + */ + Eigen::Vector3f ValvePoseBase::find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const { + + // Plane equation: Ax + By + Cz + D = 0 + // Ray equation: P = O + t * d, where O is origin [0,0,0], P is point, t is scalar, d is direction. + // Substituting the ray equation into the plane equation: + // A*(O.x + t*d.x) + B*(O.y + t*d.y) + C*(O.z + t*d.z) + D = 0 + // Since O is [0,0,0], this simplifies to: + // A*t*d.x + B*t*d.y + C*t*d.z + D = 0 + // t * (A*d.x + B*d.y + C*d.z) = -D + // t = -D / (A*d.x + B*d.y + C*d.z) + + if (!coefficients || coefficients->values.size() < 4) { + return Eigen::Vector3f::Zero(); + } + + Eigen::Vector3f plane_normal; + plane_normal << coefficients->values[0], coefficients->values[1], coefficients->values[2]; + float D = coefficients->values[3]; + + float normal_dot_ray = plane_normal.dot(ray_direction); + + if (std::abs(normal_dot_ray) < 1e-6) { + // The ray is parallel or contained within the plane. + return Eigen::Vector3f::Zero(); + } + + float t = -D / normal_dot_ray; + + Eigen::Vector3f intersection_point = t * ray_direction; + + return intersection_point; +} + +/** + * @brief Shifts a point along a plane normal by a given distance. + * + * This function is useful for moving a point from the detected plane's + * surface to a new location, such as the center of a valve knob. + * The shift is performed along the vector of the consistent plane normal. + * + * @param intersection_point The 3D intersection point on the plane. + * @param plane_normal The consistent, normalized plane normal vector. + * @return The new Eigen::Vector3f point after the shift. + */ + Eigen::Vector3f ValvePoseBase::shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const { + + return intersection_point + (plane_normal * valve_handle_offset_); +} + + +} // namespace valve_detection \ No newline at end of file diff --git a/src/valve_pose_depth.cpp b/src/valve_pose_depth.cpp new file mode 100644 index 0000000..96de2f3 --- /dev/null +++ b/src/valve_pose_depth.cpp @@ -0,0 +1,82 @@ +#include "valve_detection/valve_pose_depth.hpp" + +namespace valve_detection { + +ValvePoseDepth::ValvePoseDepth(int yolo_img_width, int yolo_img_height_, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float annulus_radius_ratio, + float valve_handle_offset) + : ValvePoseBase(yolo_img_width, yolo_img_height_, + plane_ransac_threshold, plane_ransac_max_iterations, + annulus_radius_ratio, valve_handle_offset){} + +void ValvePoseDepth::project_pixel_to_point(int u, int v, float depth, pcl::PointXYZ& point) { + if (depth <= 0 || depth == std::numeric_limits::infinity() || std::isnan(depth)) { + point.x = point.y = point.z = std::numeric_limits::quiet_NaN(); + return; + } + + double fx = depth_image_properties_.intr.fx; + double fy = depth_image_properties_.intr.fy; + double cx = depth_image_properties_.intr.cx; + double cy = depth_image_properties_.intr.cy; + + point.x = (u - cx) * depth / fx; + point.y = (v - cy) * depth / fy; + point.z = depth; +} + +void ValvePoseDepth::extract_annulus_pcl( + const cv::Mat& depth_image, + const BoundingBox& bbox, + pcl::PointCloud::Ptr& cloud) const { + + float center_x = bbox.center_x; + float center_y = bbox.center_y; + float outer_radius_x = bbox.size_x / 2.0f; + float outer_radius_y = bbox.size_y / 2.0f; + + float inner_radius_x = outer_radius_x * annulus_radius_ratio_; + float inner_radius_y = outer_radius_y * annulus_radius_ratio_; + + cloud->clear(); + + int upper_bound_size = static_cast(M_PI * ((outer_radius_x * outer_radius_y - inner_radius_x * inner_radius_y) + 1)); + cloud->reserve(upper_bound_size); + + for (int y = static_cast(-outer_radius_y); y <= static_cast(outer_radius_y); ++y) { + for (int x = static_cast(-outer_radius_x); x <= static_cast(outer_radius_x); ++x) { + float dx_scaled_outer = x / outer_radius_x; + float dy_scaled_outer = y / outer_radius_y; + bool is_inside_outer_ellipse = (dx_scaled_outer * dx_scaled_outer + dy_scaled_outer * dy_scaled_outer) <= 1.0f; + + float dx_scaled_inner = x / inner_radius_x; + float dy_scaled_inner = y / inner_radius_y; + bool is_outside_inner_ellipse = (dx_scaled_inner * dx_scaled_inner + dy_scaled_inner * dy_scaled_inner) > 1.0f; + + if (is_inside_outer_ellipse && is_outside_inner_ellipse) { + int u = static_cast(center_x + x); + int v = static_cast(center_y + y); + + if (u >= 0 && u < depth_image.cols && v >= 0 && v < depth_image.rows) { + float depth_value = depth_image.at(v, u); + pcl::PointXYZ point; + project_pixel_to_point(u, v, depth_value, point); + + if (!std::isnan(point.x) && !std::isnan(point.y) && !std::isnan(point.z)) { + cloud->points.push_back(point); + } + } + } + } + } + + cloud->width = static_cast(cloud->points.size()); + cloud->height = 1; + cloud->is_dense = false; +} + + +} // namespace valve_detection + \ No newline at end of file diff --git a/src/valve_pose_depth_ros.cpp b/src/valve_pose_depth_ros.cpp new file mode 100644 index 0000000..8c8388c --- /dev/null +++ b/src/valve_pose_depth_ros.cpp @@ -0,0 +1,209 @@ +#include "valve_detection/valve_pose_depth_ros.hpp" + +ValvePoseDepthNode::ValvePoseDepthNode(const rclcpp::NodeOptions& options) +: Node("valve_detection_node", options) { + std::string color_image_sub_topic = + this->declare_parameter("color_image_sub_topic"); + std::string depth_image_sub_topic = + this->declare_parameter("depth_image_sub_topic"); + std::string detections_sub_topic = + this->declare_parameter("detections_sub_topic"); + visualize_detections_ = + this->declare_parameter("visualize_detections"); + debug_visualize_ = + this->declare_parameter("debug_visualize"); + calculate_angle_ = + this->declare_parameter("calculate_angle"); + + int yolo_img_width = + this->declare_parameter("yolo_img_width"); + int yolo_img_height = + this->declare_parameter("yolo_img_height"); + + float annulus_radius_ratio = + this->declare_parameter("annulus_radius_ratio"); + + float plane_ransac_threshold = + this->declare_parameter("plane_ransac_threshold"); + int plane_ransac_max_iterations = + this->declare_parameter("plane_ransac_max_iterations"); + + float valve_handle_offset = + this->declare_parameter("valve_handle_offset"); + + valve_detector_ = std::make_shared(yolo_img_width, yolo_img_height, annulus_radius_ratio, + plane_ransac_threshold, plane_ransac_max_iterations, valve_handle_offset); + + + rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + depth_image_sub_.subscribe(this, depth_image_sub_topic, + qos.get_rmw_qos_profile()); + color_image_sub_.subscribe(this, color_image_sub_topic, + qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + + sync_ = std::make_shared>( + MySyncPolicy(10), depth_image_sub_, color_image_sub_, detections_sub_); + sync_->registerCallback(std::bind( + &ValvePoseDepthNode::synchronized_callback, this, _1, _2, _3)); + + std::string color_image_info_topic = + this->declare_parameter("color_image_info_topic"); + + qos.history = rclcpp::KeepLast(1); + color_image_info_sub_ = this->create_subscription( + color_image_info_topic, 10, + std::bind(&ValvePoseDepthNode::color_image_info_callback, + this, std::placeholders::_1)); + + std::string valve_pose_pub_topic = + this->declare_parameter("valve_pose_pub_topic"); + + valve_pose_pub_ = this->create_publisher( + valve_pose_pub_topic, qos); + + if (debug_visualize_) { + std::string annulus_pub_topic = + this->declare_parameter("annulus_pub_topic"); + annulus_pcl_pub_ = this->create_publisher( + annulus_pub_topic, qos); + } +} + +void ValvePoseDepthNode::color_image_info_callback( + const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { + if (!color_image_info_received_) { + ImageProperties img_props; + img_props.intr.fx = camera_info_msg->k[0]; + img_props.intr.fy = camera_info_msg->k[4]; + img_props.intr.cx = camera_info_msg->k[2]; + img_props.intr.cy = camera_info_msg->k[5]; + img_props.dim.x = camera_info_msg->width; + img_props.dim.y = camera_info_msg->height; + + valve_detector_->set_color_image_properties(img_props); + valve_detector_->calculate_letterbox_padding(); + + color_image_frame_id_ = camera_info_msg->header.frame_id; + color_image_info_received_ = true; + color_image_info_sub_.reset(); + } +} + +BoundingBox ValvePoseDepthNode::to_bounding_box( + const vision_msgs::msg::BoundingBox2D& bbox) const { + BoundingBox box; + box.center_x = bbox.center.x; + box.center_y = bbox.center.y; + box.size_x = bbox.size_x; + box.size_y = bbox.size_y; + box.theta = bbox.center.theta; + return box; +} + +void ValvePoseDepthNode::publish_annulus_pcl( + const pcl::PointCloud::Ptr& cloud) const { + sensor_msgs::msg::PointCloud2 cloud_msg; + pcl::toROSMsg(*cloud, cloud_msg); + cloud_msg.header.frame_id = color_image_frame_id_; + cloud_msg.header.stamp = this->now(); + cloud_msg.is_dense = false; + cloud_msg.width = static_cast(cloud->points.size()); + cloud_msg.height = 1; + + annulus_pcl_pub_->publish(cloud_msg); +} + +void ValvePoseDepthNode::synchronized_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + if (!color_image_info_received_) { + return; + } + cv_bridge::CvImageConstPtr cv_ptr = + cv_bridge::toCvShare(color_image, "bgr8"); + + cv:brudge::CvImageConstPtr cv_depth_ptr = + cv_bridge::toCvShare(depth_image, "32FC1"); + + cv::Mat cv_color_image = cv_ptr->image; + + if (visualize_detections_) { + cv_color_image = cv_color_image.clone(); + } + std::vector boxes = std::vector().reserve(detections->detections.size()); + for (const auto& detection : detections->detections) { + boxes.push_back(to_bounding_box(detection.bbox)); + } + + for (const auto& box : boxes) { + BoundingBox org_image_box = + valve_detector_->transform_bounding_box(box); + + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); + + valve_detector_->extract_annulus_pcl( + cv_depth_ptr->image, + org_image_box, + cloud); + + if (cloud->empty()) { + continue; + } + if (debug_visualize_) { + publish_annulus_pcl(cloud); + } + + pcl::ModelCoefficients::Ptr coefficients(new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + if (!valve_detector_->segment_plane(cloud, coefficients, inliers)) { + continue; + } + + Eigen::Vector3f bb_centre_ray = + valve_detector_->get_ray_direction(org_image_box); + + Eigen::Vector3f ray_plane_intersection = + valve_detector_->find_ray_plane_intersection(coefficients, bb_centre_ray); + + if (ray_plane_intersection.isZero()) { + continue; + } + + Eigen::Vector3f plane_normal = + valve_detector_->compute_plane_normal( + coefficients, ray_direction); + + if (plane_normal.isZero()) { + continue; + } + + Eigen::Vector3f shifted_position = + valve_detector_->shift_point_along_normal( + ray_plane_intersection, plane_normal); + + float angle = org_image_box.theta; + if (calculate_angle_) { + // angle = calculate_angle() + } + geometry_msgs::msg::PoseStamped pose_msg; + pose_msg.header.stamp = color_image->header.stamp; + pose_msg.header.frame_id = color_image_frame_id_; + + pose_msg.pose.position.x = shifted_position[0]; + pose_msg.pose.position.y = shifted_position[1]; + pose_msg.pose.position.z = shifted_position[2]; + + } + + + + + +} + +RCLCPP_COMPONENTS_REGISTER_NODE(ValvePoseDepthNode) \ No newline at end of file diff --git a/src/valve_pose_pcl.cpp b/src/valve_pose_pcl.cpp new file mode 100644 index 0000000..e69de29 From ef8c4bc8d5608c1b6d3008416397237814b6f0d1 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Mon, 25 Aug 2025 20:36:12 +0200 Subject: [PATCH 14/29] small refactor --- CMakeLists.txt | 12 +- config/depth_image_angle_params.yaml | 27 - config/valve_detection_params.yaml | 35 +- include/valve_detection/angle_detector.hpp | 52 ++ .../depth_image_processing.hpp | 61 ++ .../valve_detection/pointcloud_processing.hpp | 39 ++ include/valve_detection/types.hpp | 87 ++- include/valve_detection/valve_detection.hpp | 201 ------ include/valve_detection/valve_detector.hpp | 293 +++++++++ include/valve_detection/valve_pose_base.hpp | 103 --- include/valve_detection/valve_pose_depth.hpp | 35 - .../valve_pose_depth_ros.hpp | 76 --- .../valve_detection_ros/valve_pose_ros.hpp | 310 +++++++++ launch/valve_detection.launch.py | 33 +- package.xml | 1 + src/angle_detector.cpp | 81 +++ src/depth_image_processing.cpp | 88 +++ src/pointcloud_processing.cpp | 59 ++ src/valve_detection.cpp | 600 ------------------ src/valve_detector.cpp | 241 +++++++ src/valve_pose_base.cpp | 181 ------ src/valve_pose_depth.cpp | 82 --- src/valve_pose_depth_ros.cpp | 209 ------ src/valve_pose_pcl.cpp | 0 src/valve_pose_ros.cpp | 289 +++++++++ 25 files changed, 1646 insertions(+), 1549 deletions(-) delete mode 100644 config/depth_image_angle_params.yaml create mode 100644 include/valve_detection/angle_detector.hpp create mode 100644 include/valve_detection/depth_image_processing.hpp create mode 100644 include/valve_detection/pointcloud_processing.hpp delete mode 100644 include/valve_detection/valve_detection.hpp create mode 100644 include/valve_detection/valve_detector.hpp delete mode 100644 include/valve_detection/valve_pose_base.hpp delete mode 100644 include/valve_detection/valve_pose_depth.hpp delete mode 100644 include/valve_detection_ros/valve_pose_depth_ros.hpp create mode 100644 include/valve_detection_ros/valve_pose_ros.hpp create mode 100644 src/angle_detector.cpp create mode 100644 src/depth_image_processing.cpp create mode 100644 src/pointcloud_processing.cpp delete mode 100644 src/valve_detection.cpp create mode 100644 src/valve_detector.cpp delete mode 100644 src/valve_pose_base.cpp delete mode 100644 src/valve_pose_depth.cpp delete mode 100644 src/valve_pose_depth_ros.cpp delete mode 100644 src/valve_pose_pcl.cpp create mode 100644 src/valve_pose_ros.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index de59d4f..1c5783a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,13 +20,18 @@ find_package(std_msgs REQUIRED) find_package(OpenCV 4.5.4 REQUIRED) find_package(PCL REQUIRED) find_package(pcl_conversions REQUIRED) +find_package(tf2_geometry_msgs REQUIRED) include_directories(include) set(LIB_NAME "${PROJECT_NAME}_component") add_library(${LIB_NAME} SHARED - src/valve_detection.cpp + src/valve_detector.cpp + src/depth_image_processing.cpp + src/pointcloud_processing.cpp + src/angle_detector.cpp + src/valve_pose_ros.cpp ) target_link_libraries(${LIB_NAME} PUBLIC @@ -50,11 +55,14 @@ ament_target_dependencies(${LIB_NAME} PUBLIC geometry_msgs std_msgs pcl_conversions + tf2 + tf2_ros + tf2_geometry_msgs ) rclcpp_components_register_node( ${LIB_NAME} - PLUGIN "ValveDetectionNode" + PLUGIN "valve_detection::ValvePoseNode" EXECUTABLE ${PROJECT_NAME}_node ) diff --git a/config/depth_image_angle_params.yaml b/config/depth_image_angle_params.yaml deleted file mode 100644 index bc3d5f3..0000000 --- a/config/depth_image_angle_params.yaml +++ /dev/null @@ -1,27 +0,0 @@ -/**: - ros__parameters: - depth_image_sub_topic: "/zed_node/depth/depth_registered" - color_image_sub_topic: "/zed_node/left/image_rect_color" - detections_sub_topic: "/detections_output" - - color_image_info_topic: "/zed_node/left/camera_info" - depth_image_info_topic: "/zed_node/depth/camera_info" - - valve_pose_pub_topic: "/valve_pose" - - yolo_img_width: 640 - yolo_img_height: 640 - - annulus_radius_ratio: 0.8 - - plane_ransac_threshold: 0.01 - plane_ransac_max_iterations: 20 - - valve_handle_offset: 0.05 - - calculate_angle: true - - visualize_detections: true - debug_visualize: false - annulus_pub_topic: "/bbx_annulus_pcl" - plane_normal_pub_topic: "/plane_normal" diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 185d440..2b9f093 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -2,6 +2,39 @@ ros__parameters: depth_image_sub_topic: "/zed_node/depth/depth_registered" color_image_sub_topic: "/zed_node/left/image_rect_color" + pcl_sub_topic: "/zed_node/depth/point_cloud" detections_sub_topic: "/detections_output" - line_detection_area: 22 + color_image_info_topic: "/zed_node/left/camera_info" + depth_image_info_topic: "/zed_node/depth/camera_info" + + valve_poses_pub_topic: "/valve_poses" + + yolo_img_width: 640 + yolo_img_height: 640 + + annulus_radius_ratio: 0.8 + + plane_ransac_threshold: 0.01 + plane_ransac_max_iterations: 20 + + valve_handle_offset: 0.05 + + use_depth_image: true # if false, use pcl and tf2 to transform pointcloud to color frame + + visualize_detections: true + processed_image_pub_topic: "/valve_detection_image" + pcl_visualize: false + annulus_pub_topic: "/bbx_annulus_pcl" + plane_pub_topic: "/annulus_plane_pcl" + + calculate_angle: true + angle.detection_area_ratio: 0.05 # percentage of bb size + angle.canny_low_threshold: 50 + angle.canny_high_threshold: 150 + angle.canny_aperture_size: 3 # must be odd + angle.hough_rho_res: 1.0 + angle.hough_theta_res: 0.0174533 # 1 degree in radians + angle.hough_threshold: 20 + angle.hough_min_line_length: 45.0 + angle.hough_max_line_gap: 5.0 diff --git a/include/valve_detection/angle_detector.hpp b/include/valve_detection/angle_detector.hpp new file mode 100644 index 0000000..188d563 --- /dev/null +++ b/include/valve_detection/angle_detector.hpp @@ -0,0 +1,52 @@ +#ifndef VALVE_POSE_ANGLE_HPP +#define VALVE_POSE_ANGLE_HPP + +#include +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +class AngleDetector { + public: + /** + * @brief Constructs an AngleDetector with a set of parameters. + * @param params The parameters for the detector. + */ + explicit AngleDetector(const AngleDetectorParams& params) + : params_(params) {} + + /** + * @brief Calculates the angle of the longest line within a detected valve. + * @param color_image The input color image. + * @param bbox The bounding box of the detected valve. + * @return The angle in radians, or NaN if no lines are detected. + */ + double calculate_angle(const cv::Mat& color_image, + const BoundingBox& bbox) const; + + private: + /** + * @brief Finds the longest line among a set of lines detected in an image. + * @param lines A vector of lines, each represented as cv::Vec4i (x1, y1, + * x2, y2). + * @return The cv::Vec4i representing the longest line. + * If the input vector is empty, returns a zero-length line {0, 0, 0, 0}. + */ + cv::Vec4i find_longest_line(const std::vector& lines) const; + + /** + * @brief Applies Canny edge detection to a grayscale image. + * @param gray_image The input grayscale image. + * @return A binary image (CV_8UC1) with edges detected. + */ + cv::Mat apply_canny_edge_detection(const cv::Mat& gray_image) const; + + /** @brief Parameters for the angle detector. */ + AngleDetectorParams params_; +}; + +} // namespace valve_detection + +#endif // VALVE_POSE_ANGLE_HPP diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp new file mode 100644 index 0000000..2ddea69 --- /dev/null +++ b/include/valve_detection/depth_image_processing.hpp @@ -0,0 +1,61 @@ +#ifndef VALVE_POSE_DEPTH_HPP +#define VALVE_POSE_DEPTH_HPP + +#include "valve_detection/types.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +/** + * @brief Projects a 2D pixel with depth into 3D camera coordinates. + * + * @param u The x-coordinate of the pixel in the image. + * @param v The y-coordinate of the pixel in the image. + * @param depth The depth value at the pixel (in meters or the same units as + * intrinsics). + * @param fx Focal length along x-axis. + * @param fy Focal length along y-axis. + * @param cx Principal point x-coordinate. + * @param cy Principal point y-coordinate. + * @param[out] point The resulting 3D point in camera coordinates. + */ +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& point); + +/** + * @brief Extracts a 3D point cloud representing the annulus (valve rim) from a + * depth image. + * + * The function selects pixels within the annulus region of a bounding box and + * projects them into 3D points using the camera intrinsics. + * + * @param depth_image The input depth image (CV_32FC1 or similar). + * @param bbox The bounding box around the valve in the image. + * @param image_properties Camera intrinsics and image dimensions. + * @param annulus_radius_ratio Fraction of the bounding box considered as the + * annulus (0.0–1.0). + * @param[out] cloud The resulting point cloud containing the 3D points of the + * annulus. + */ +void extract_annulus_pcl(const cv::Mat& depth_image, + const BoundingBox& bbox, + const ImageProperties& image_properties, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& cloud); + +} // namespace valve_detection + +#endif // VALVE_POSE_DEPTH_HPP diff --git a/include/valve_detection/pointcloud_processing.hpp b/include/valve_detection/pointcloud_processing.hpp new file mode 100644 index 0000000..588496f --- /dev/null +++ b/include/valve_detection/pointcloud_processing.hpp @@ -0,0 +1,39 @@ +#ifndef VALVE_POSE_PCL_HPP +#define VALVE_POSE_PCL_HPP + +#include "valve_detection/types.hpp" + +#include +#include +#include +#include + +namespace valve_detection { + +/** + * @brief Extracts a 3D point cloud representing the annulus (valve rim) from an + * input point cloud. + * + * This function selects points that correspond to the annulus region of a + * bounding box in image space, based on the camera intrinsics and the specified + * radius ratio. + * + * @param input_cloud The input point cloud (typically from a depth camera or + * LIDAR). + * @param bbox The bounding box around the valve in image coordinates. + * @param image_properties Camera intrinsics and image dimensions for + * projection. + * @param annulus_radius_ratio Fraction of the bounding box considered as the + * annulus (0.0–1.0). + * @param[out] cloud The resulting point cloud containing the 3D points of the + * annulus. + */ +void extract_annulus_pcl(const pcl::PointCloud::Ptr& input_cloud, + const BoundingBox& bbox, + const ImageProperties& image_properties, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& cloud); + +} // namespace valve_detection + +#endif // VALVE_POSE_PCL_HPP diff --git a/include/valve_detection/types.hpp b/include/valve_detection/types.hpp index b42082c..248452b 100644 --- a/include/valve_detection/types.hpp +++ b/include/valve_detection/types.hpp @@ -1,38 +1,85 @@ #ifndef TYPES_HPP #define TYPES_HPP +#include + namespace valve_detection { +/** + * @struct CameraIntrinsics + * @brief Stores intrinsic parameters of a pinhole camera model. + */ struct CameraIntrinsics { - double fx; - double fy; - double cx; - double cy; + double fx; ///< Focal length in pixels along x-axis + double fy; ///< Focal length in pixels along y-axis + double cx; ///< Principal point x-coordinate (in pixels) + double cy; ///< Principal point y-coordinate (in pixels) }; +/** + * @struct ImageDimensions + * @brief Represents the width and height of an image. + */ struct ImageDimensions { - int width; - int height; + int width; ///< Image width in pixels + int height; ///< Image height in pixels }; +/** + * @struct ImageProperties + * @brief Combines camera intrinsics and image dimensions. + */ struct ImageProperties { - CameraIntrinsics intr; - ImageDimensions dim; + CameraIntrinsics intr; ///< Camera intrinsic parameters + ImageDimensions dim; ///< Image width and height }; +/** + * @struct BoundingBox + * @brief Represents a 2D bounding box with center, size, and orientation. + */ struct BoundingBox { - float center_x; - float center_y; - float size_x; - float size_y; - float theta; // Orientation in radians positive counter-clockwise -} - -enum class AngleDetection { - ENABLED, - DISABLED + float center_x; ///< X-coordinate of the bounding box center + float center_y; ///< Y-coordinate of the bounding box center + float size_x; ///< Width of the bounding box + float size_y; ///< Height of the bounding box + float theta; ///< Orientation in radians (positive counter-clockwise) +}; + +/** + * @struct Pose + * @brief Represents a 3D pose with position and orientation. + */ +struct Pose { + Eigen::Vector3f position; ///< 3D position (x, y, z) + Eigen::Quaternionf orientation; ///< Orientation as a quaternion +}; + +/** + * @struct AngleDetectorParams + * @brief Parameters for Canny edge detection and Hough line transform used in + * angle detection. + */ +struct AngleDetectorParams { + float line_detection_area; ///< Fraction of the bounding box to use as ROI + ///< (0.0–1.0) + + // Canny edge detection parameters + int canny_low_threshold; ///< Low threshold for Canny edge detector + int canny_high_threshold; ///< High threshold for Canny edge detector + int canny_aperture_size; ///< Aperture size for the Sobel operator in Canny + + // Hough line transform parameters + double hough_rho_res; ///< Distance resolution in pixels + double hough_theta_res; ///< Angle resolution in radians + int hough_threshold; ///< Minimum number of votes (intersections) to detect + ///< a line + double + hough_min_line_length; ///< Minimum line length to accept (in pixels) + double hough_max_line_gap; ///< Maximum allowed gap between line segments + ///< to treat them as a single line }; -} // namespace valve_detection +} // namespace valve_detection -#endif // TYPES_HPP \ No newline at end of file +#endif // TYPES_HPP diff --git a/include/valve_detection/valve_detection.hpp b/include/valve_detection/valve_detection.hpp deleted file mode 100644 index 06b324f..0000000 --- a/include/valve_detection/valve_detection.hpp +++ /dev/null @@ -1,201 +0,0 @@ -#ifndef DETECTION_IMAGE_PROCESSOR_HPP -#define DETECTION_IMAGE_PROCESSOR_HPP - -#include -#include -#include -#include -#include -#include -#include - -#include -#include "cv_bridge/cv_bridge.h" -#include "geometry_msgs/msg/pose_stamped.hpp" -#include "geometry_msgs/msg/vector3.hpp" -#include "rclcpp/rclcpp.hpp" -#include "sensor_msgs/msg/camera_info.hpp" -#include "sensor_msgs/msg/image.hpp" -#include "sensor_msgs/msg/point_cloud2.hpp" -#include "std_msgs/msg/header.hpp" -#include "vision_msgs/msg/detection2_d_array.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -class ValveDetectionNode : public rclcpp::Node { - public: - explicit ValveDetectionNode(const rclcpp::NodeOptions& options); - ~ValveDetectionNode() {}; - - private: - message_filters::Subscriber depth_image_sub_; - message_filters::Subscriber color_image_sub_; - message_filters::Subscriber - detections_sub_; - - typedef message_filters::sync_policies::ExactTime< - sensor_msgs::msg::Image, - sensor_msgs::msg::Image, - vision_msgs::msg::Detection2DArray> - MySyncPolicy; - - std::shared_ptr> sync_; - - // Subscriptions - rclcpp::Subscription::SharedPtr - camera_info_subscription_; - rclcpp::Subscription::SharedPtr - camera_info_subscription_yolo_color_; - rclcpp::Subscription::SharedPtr - detections_subscription_; - rclcpp::Subscription::SharedPtr - image_subscription_; - - // Publishers - rclcpp::Publisher::SharedPtr processed_image_pub_; - rclcpp::Publisher::SharedPtr plane_normal_pub_; - rclcpp::Publisher::SharedPtr pointcloud_pub_; - rclcpp::Publisher::SharedPtr - line_pose_pub_; - rclcpp::Publisher::SharedPtr - line_points_pub_; - rclcpp::Publisher::SharedPtr - near_plane_cloud_pub_; - rclcpp::Publisher::SharedPtr - canny_debug_image_pub_; - - // Stored messages - sensor_msgs::msg::CameraInfo::SharedPtr camera_info_; - bool camera_info_received_ = false; - sensor_msgs::msg::CameraInfo::SharedPtr camera_info_yolo_color_; - bool camera_info_received_yolo_color_ = false; - // vision_msgs::msg::Detection2DArray::SharedPtr latest_detections_; - // sensor_msgs::msg::Image::SharedPtr latest_image_; - - // Mutex for thread safety - std::mutex mutex_; - - // Used for line fitting direction filtering - static Eigen::Vector3f filter_direction_; - - // Scalars for image resizing - double height_scalar_; - double width_scalar_; - - int line_detection_area_; - - /** - * @brief Callback function for synchronized depth image, color image, and - * 2D detections. - * - * This function is triggered when synchronized messages for a depth image, - * a color image, and a 2D detection array - */ - void synchronized_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - // Callback functions - void camera_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); - void camera_info_callback_yolo_colored( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); - - // Utility functions - /** - * @brief Computes the height and width scalars based on the camera - * information. - * - * This function calculates the scaling factors (`height_scalar_` and - * `width_scalar_`) by comparing the dimensions of two camera frames: - * - The YOLO color camera frame (`camera_info_yolo_color_`). - * - The reference camera frame (`camera_info_`). - * - */ - void compute_height_width_scalars(); - - /** - * @brief Main logic executed when two images are synchronized. - * - * This function processes synchronized depth and color images along with 2D - * detections to calculate the angle of the valve handle and its orientation - * in 3D space. It uses RANSAC line detection to determine the handle's - * angle and RANSAC plane detection to find the normal vector of the plane - * in which the valve handle lies. - * - * The function performs the following steps: - * 1. Converts the depth and color images from ROS messages to OpenCV - * matrices. - * 2. Selects the most centered detection from the 2D detection array. - * 3. Projects the detected bounding box into 3D space using the depth image - * and camera intrinsics. - * 4. Uses RANSAC plane detection to find the plane normal vector and - * filters points near the plane. - * 5. Detects the valve handle's orientation using Hough line detection on - * the color image. - * 6. Computes the 3D pose of the valve handle and publishes it as a - * `geometry_msgs::msg::PoseStamped` message. - * - * @param[in] depth_image A shared pointer to the depth image message. - * @param[in] color_image A shared pointer to the color image message. - * @param[in] detections A shared pointer to the 2D detection array message. - * - * @pre The `camera_info_` and `camera_info_yolo_color_` member variables - * must be initialized with valid camera intrinsic parameters. - * @pre The input images and detections must be synchronized (i.e., they - * correspond to the same timestamp or frame). - * @post If successful, the function publishes the following: - * - A point cloud of the detected valve handle region. - * - The normal vector of the detected plane. - * - The 3D pose of the valve handle. - * @post If unsuccessful, the function logs warnings or errors and returns - * early. - * - * @note The function assumes that the input images and detections are - * synchronized. - * @note The function uses RANSAC for robust plane and line detection, which - * is suitable for noisy sensor data. - * @note The function handles edge cases such as invalid depth values, empty - * detections, and failed RANSAC fits. - */ - void process_and_publish_image( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - /** - * @brief Projects a 2D pixel coordinate and depth value into a 3D point in - * the camera frame. - * - * This function converts a pixel coordinate `(u, v)` and its corresponding - * depth value into a 3D point - */ - void project_pixel_to_3d(int u, int v, float depth, pcl::PointXYZ& point); - - /** - * @brief Projects a 3D point in the camera frame to 2D pixel coordinates. - * - * This function converts a 3D point `(x, y, z)` in the camera frame into 2D - * pixel coordinates `(u, v)` - */ - void project_3d_to_pixel(float x, float y, float z, int& u, int& v); -}; - -#endif // DETECTION_IMAGE_PROCESSOR_HPP diff --git a/include/valve_detection/valve_detector.hpp b/include/valve_detection/valve_detector.hpp new file mode 100644 index 0000000..3d83432 --- /dev/null +++ b/include/valve_detection/valve_detector.hpp @@ -0,0 +1,293 @@ +#ifndef VALVE_POSE_BASE_HPP +#define VALVE_POSE_BASE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include "valve_detection/angle_detector.hpp" +#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/pointcloud_processing.hpp" +#include "valve_detection/types.hpp" + +namespace valve_detection { + +/** + * @class ValveDetector + * @brief Detects valve poses from color and depth images or point clouds. + * + * This class extracts valve annulus point clouds, segments planes, + * computes ray-plane intersections, estimates valve poses, and can calculate + * the valve rotation angle using the AngleDetector. + */ +class ValveDetector { + public: + /** + * @brief Constructs a ValveDetector with YOLO image dimensions and + * detection parameters. + * @param yolo_img_width Width of the YOLO input image. + * @param yolo_img_height Height of the YOLO input image. + * @param annulus_radius_ratio Ratio of bounding box used to define the + * annulus. + * @param plane_ransac_threshold Distance threshold for plane segmentation. + * @param plane_ransac_max_iterations Max iterations for RANSAC plane + * segmentation. + * @param valve_handle_offset Distance to shift the valve handle along the + * plane normal. + */ + ValveDetector(int yolo_img_width, + int yolo_img_height_, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset); + + virtual ~ValveDetector() = default; + + /** + * @brief Computes valve poses from depth input and bounding boxes. + * + * This template function supports depth input as either a PCL point cloud + * or an OpenCV depth image. It extracts the annulus points, segments + * planes, computes ray-plane intersections, shifts points along normals, + * and optionally calculates valve rotation angles. + * + * @tparam T Depth input type (cv::Mat or + * pcl::PointCloud::Ptr). + * @param depth_input The input depth data. + * @param color_image Corresponding color image for angle calculation. + * @param boxes Bounding boxes of detected valves. + * @param poses Output vector of computed valve poses. + * @param all_annulus_cloud Optional point cloud for visualizing all annuli. + * @param all_annulus_plane_cloud Optional point cloud for visualizing + * segmented planes. + * @param pcl_visualize Whether to populate visualization point clouds. + * @param calculate_angle Whether to compute the valve rotation angle. + */ + template + void Compute_valve_poses( + const T& depth_input, + const cv::Mat& color_image, + const std::vector& boxes, + std::vector& poses, + pcl::PointCloud::Ptr& all_annulus_cloud, + pcl::PointCloud::Ptr& all_annulus_plane_cloud, + bool pcl_visualize, + bool calculate_angle) { + if (pcl_visualize) { + all_annulus_cloud->clear(); + all_annulus_plane_cloud->clear(); + } + + for (const auto& box : boxes) { + BoundingBox org_image_box = transform_bounding_box(box); + pcl::PointCloud::Ptr annulus_cloud( + new pcl::PointCloud); + + if constexpr (std::is_same< + T, pcl::PointCloud::Ptr>::value) { + extract_annulus_pcl(depth_input, org_image_box, + color_image_properties_, + annulus_radius_ratio_, annulus_cloud); + } else if constexpr (std::is_same::value) { + extract_annulus_pcl(depth_input, org_image_box, + color_image_properties_, + annulus_radius_ratio_, annulus_cloud); + } + + if (annulus_cloud->empty()) + continue; + + if (pcl_visualize) { + *all_annulus_cloud += *annulus_cloud; + } + + pcl::ModelCoefficients::Ptr coefficients( + new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + + if (!segment_plane(annulus_cloud, coefficients, inliers)) + continue; + + if (pcl_visualize) { + pcl::PointCloud::Ptr annulus_plane_cloud( + new pcl::PointCloud); + pcl::copyPointCloud(*annulus_cloud, inliers->indices, + *annulus_plane_cloud); + *all_annulus_plane_cloud += *annulus_plane_cloud; + } + + Eigen::Vector3f bb_centre_ray = get_ray_direction(org_image_box); + Eigen::Vector3f ray_plane_intersection = + find_ray_plane_intersection(coefficients, bb_centre_ray); + if (ray_plane_intersection.isZero()) + continue; + + Eigen::Vector3f plane_normal = + compute_plane_normal(coefficients, bb_centre_ray); + if (plane_normal.isZero()) + continue; + + Eigen::Vector3f shifted_position = + shift_point_along_normal(ray_plane_intersection, plane_normal); + + float angle = org_image_box.theta; + if (calculate_angle) { + angle = angle_detector_->calculate_angle(color_image, + org_image_box); + } + + Eigen::Matrix3f rotation_matrix = + create_rotation_matrix(plane_normal, angle); + Eigen::Quaternionf rotation_quat; + rmat_to_quat(rotation_matrix, rotation_quat); + + Pose pose; + pose.position = shifted_position; + pose.orientation = rotation_quat; + poses.emplace_back(pose); + } + } + + /** + * @brief Projects a 3D point to pixel coordinates using the camera + * intrinsics. + * @param x X coordinate in 3D space. + * @param y Y coordinate in 3D space. + * @param z Z coordinate in 3D space. + * @param[out] u Output pixel x-coordinate. + * @param[out] v Output pixel y-coordinate. + */ + void project_point_to_pixel(float x, + float y, + float z, + int& u, + int& v) const; + + /** + * @brief Calculates the padding and scaling for letterbox resizing. + */ + void calculate_letterbox_padding(); + + /** + * @brief Transforms bounding box coordinates from letterboxed YOLO input to + * original image coordinates. + * @param bbox Bounding box in YOLO image coordinates. + * @return Bounding box transformed to original image coordinates. + */ + BoundingBox transform_bounding_box(const BoundingBox& bbox) const; + + /** + * @brief Sets the color image properties (intrinsics and dimensions). + */ + void set_color_image_properties(const ImageProperties& properties) { + color_image_properties_ = properties; + } + + /** + * @brief Segments the dominant plane from a point cloud using RANSAC. + * @param cloud Input point cloud. + * @param coefficients Output model coefficients (Ax + By + Cz + D = 0). + * @param inliers Output inlier indices of the plane. + * @return True if a plane with inliers was found, false otherwise. + */ + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const; + + /** + * @brief Computes the ray direction vector through the bounding box center + * in camera frame. + * @param bbox Bounding box for the valve. + * @return Normalized Eigen::Vector3f representing the ray direction. + */ + Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; + + /** + * @brief Computes the plane normal from plane coefficients, ensuring it + * points against the ray. + * @param coefficients Plane coefficients (Ax + By + Cz + D = 0). + * @param ray_direction Ray direction vector from camera. + * @return Normalized Eigen::Vector3f plane normal, or zero if invalid. + */ + Eigen::Vector3f compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const; + + /** + * @brief Finds the intersection point of a ray and a plane. + * @param coefficients Plane coefficients. + * @param ray_direction Normalized ray direction. + * @return Intersection point in 3D space, or zero vector if no + * intersection. + */ + Eigen::Vector3f find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const; + + /** + * @brief Shifts a point along a plane normal by the valve handle offset. + * @param intersection_point Point on the plane. + * @param plane_normal Normalized plane normal vector. + * @return Shifted 3D point representing valve handle position. + */ + Eigen::Vector3f shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + + /** + * @brief Creates a rotation matrix with the plane normal as z-axis and + * valve angle applied. + * @param plane_normal Normalized plane normal vector. + * @param angle Valve rotation angle in radians. + * @return Rotation matrix (Eigen::Matrix3f) for the valve pose. + */ + Eigen::Matrix3f create_rotation_matrix(const Eigen::Vector3f& plane_normal, + float angle); + + /** + * @brief Initializes the angle detector with given parameters. + * @param params Parameters for Canny and Hough-based angle detection. + */ + void init_angle_detector(const AngleDetectorParams& params); + + /** + * @brief Converts a 3x3 rotation matrix to a normalized quaternion. + * @param rotation_matrix Input rotation matrix. + * @param quat Output quaternion. + */ + void rmat_to_quat(const Eigen::Matrix3f& rotation_matrix, + Eigen::Quaternionf& quat) const; + + /** + * @brief Draws bounding boxes and 3D axes for valve poses on an image. + * @param image Input color image. + * @param boxes Detected valve bounding boxes. + * @param poses Computed valve poses. + * @return Image with drawn detections. + */ + cv::Mat draw_detections(const cv::Mat& image, + const std::vector& boxes, + const std::vector& poses) const; + + protected: + ImageProperties color_image_properties_; + int yolo_img_width_; + int yolo_img_height_; + float annulus_radius_ratio_; + float plane_ransac_threshold_; + int plane_ransac_max_iterations_; + float valve_handle_offset_; + float letterbox_scale_factor_; + int letterbox_pad_x_; + int letterbox_pad_y_; + Eigen::Vector3f filter_direction_ = Eigen::Vector3f(1, 0, 0); + std::unique_ptr angle_detector_; +}; + +} // namespace valve_detection + +#endif // VALVE_POSE_BASE_HPP diff --git a/include/valve_detection/valve_pose_base.hpp b/include/valve_detection/valve_pose_base.hpp deleted file mode 100644 index f40e1a1..0000000 --- a/include/valve_detection/valve_pose_base.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef VALVE_POSE_BASE_HPP -#define VALVE_POSE_BASE_HPP - -#include "valve_detection/types.hpp" -#include - -namespace valve_detection { - -class ValvePoseBase { -public: - ValvePoseBase(int yolo_img_width, int yolo_img_height_, - float annulus_radius_ratio, - float plane_ransac_threshold, int plane_ransac_max_iterations, - float valve_handle_offset); - virtual ~ValvePoseBase() = default; - - void project_point_to_pixel( - float x, float y, float z, int& u, int& v) const; - - void calculate_letterbox_properties(); - - BoundingBox transform_bounding_box( - const BoundingBox& bbox) const; - - void set_color_image_properties( - const ImageProperties& properties) { - color_image_properties_ = properties; - } - - /** - * @brief Segments the dominant plane from a point cloud using RANSAC. - * @param cloud The input point cloud. - * @param coefficients The output model coefficients (ax + by + cz + d = 0). - * @return True if a plane with inliers was found, false otherwise. - */ - bool segment_plane(const pcl::PointCloud::Ptr& cloud, - pcl::ModelCoefficients::Ptr& coefficients, - pcl::PointIndices::Ptr& inliers) const; - - Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; - - /** - * @brief Creates a consistent, normalized Eigen::Vector3f from PCL plane coefficients - * and a reference ray. - * - * This function extracts the normal vector (A, B, C) from the plane equation - * Ax + By + Cz + D = 0, normalizes it, and flips its direction if necessary - * to ensure it always points in the same general direction as the reference ray. - * - * @param coefficients A shared pointer to the pcl::ModelCoefficients object. - * @param ray_direction A reference to the Eigen::Vector3f representing the ray. - * @return A normalized Eigen::Vector3f representing the consistent plane normal. - */ - Eigen::Vector3f create_consistent_plane_normal_with_ray( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const; - - - /** - * @brief Finds the intersection point of a ray and a plane. - * - * The ray is defined by its origin (assumed to be the camera's optical center, - * i.e., [0, 0, 0]) and a direction vector. The plane is defined by its coefficients - * Ax + By + Cz + D = 0. - * - * @param coefficients A shared pointer to the pcl::ModelCoefficients object. - * @param ray_direction The normalized Eigen::Vector3f ray direction. - * @return An Eigen::Vector3f representing the 3D intersection point. - */ - Eigen::Vector3f find_ray_plane_intersection( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const; - - /** - * @brief Shifts a point along a plane normal by a given distance. - * - * This function is useful for moving a point from the detected plane's - * surface to a new location, such as the center of a valve knob. - * The shift is performed along the vector of the consistent plane normal. - * - * @param intersection_point The 3D intersection point on the plane. - * @param plane_normal The consistent, normalized plane normal vector. - * @return The new Eigen::Vector3f point after the shift. - */ - Eigen::Vector3f shift_point_along_normal( - const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const; - -protected: - ImageProperties color_image_properties_; - int yolo_img_width_; - int yolo_img_height_; - float annulus_radius_ratio_; - float plane_ransac_threshold_; - int plane_ransac_max_iterations_; - float valve_handle_offset_; - float letterbox_scale_factor_; - int letterbox_pad_x_; - int letterbox_pad_y_; -} -} // namespace valve_detection - -#endif // VALVE_POSE_BASE_HPP \ No newline at end of file diff --git a/include/valve_detection/valve_pose_depth.hpp b/include/valve_detection/valve_pose_depth.hpp deleted file mode 100644 index f826a4c..0000000 --- a/include/valve_detection/valve_pose_depth.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef VALVE_POSE_DEPTH_HPP -#define VALVE_POSE_DEPTH_HPP - -#include "valve_detection/valve_pose_base.hpp" -#include -#include -#include - -namespace valve_detection { - -class ValvePoseDepth : public ValvePoseBase { -public: - ValvePoseDepth(int yolo_img_width, int yolo_img_height_, float annulus_radius_ratio, - float plane_ransac_threshold, int plane_ransac_max_iterations); - virtual ~ValvePoseDepth() = default; - - void project_pixel_to_point(int u, - int v, - float depth, - pcl::PointXYZ& point) const; - - void extract_annulus_pcl( - const cv::Mat& depth_image, - const BoundingBox& bbox, - pcl::PointCloud::Ptr& cloud) const; - -private: - ImageProperties depth_image_properties_; - double height_scalar_; - double width_scalar_; -}; - -} // namespace valve_detection - -#endif // VALVE_POSE_DEPTH_HPP \ No newline at end of file diff --git a/include/valve_detection_ros/valve_pose_depth_ros.hpp b/include/valve_detection_ros/valve_pose_depth_ros.hpp deleted file mode 100644 index 8e5ce9f..0000000 --- a/include/valve_detection_ros/valve_pose_depth_ros.hpp +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef VALVE_POSE_DEPTH_ROS_HPP -#define VALVE_POSE_DEPTH_ROS_HPP - -#include "valve_detection/valve_pose_depth.hpp" -#include -#include -#include -#include - -#include -#include -#include - - -namespace valve_detection { - -class ValvePoseDepthNode { -public: - explicit ValvePoseDepthNode(const rclcpp::NodeOptions& options); - virtual ~ValvePoseDepthNode() = default; - - -private: - void color_image_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); - - BoundingBox to_bounding_box(const vision_msgs::msg::BoundingBox2D& bbox) const; - - void publish_annulus_pcl(const pcl::PointCloud::Ptr& cloud) const; - - /** - * @brief Callback function for synchronized depth image, color image, and - * 2D detections. - * - * This function is triggered when synchronized messages for a depth image, - * a color image, and a 2D detection array are received. - */ - void synchronized_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - - rclcpp::Subscription::SharedPtr color_image_info_sub_; - bool color_image_info_received_ = false; - - message_filters::Subscriber depth_image_sub_; - message_filters::Subscriber color_image_sub_; - message_filters::Subscriber - detections_sub_; - - typedef message_filters::sync_policies::ExactTime< - sensor_msgs::msg::Image, - sensor_msgs::msg::Image, - vision_msgs::msg::Detection2DArray> - MySyncPolicy; - - std::shared_ptr> sync_; - - rclcpp::Publisher::SharedPtr - valve_pose_pub_; - - std::unique_ptr valve_detector_; - - std::string color_image_frame_id_; - - bool visualize_detections_ = true; - bool debug_visualize_ = false; - bool calculate_angle_ = true; - rclcpp::Publisher::SharedPtr annulus_pcl_pub_; - rclcpp::Publisher::SharedPtr plane_normal_pub_; -} - -} // namespace valve_detection - -#endif // VALVE_POSE_DEPTH_ROS_HPP \ No newline at end of file diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp new file mode 100644 index 0000000..51d645a --- /dev/null +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -0,0 +1,310 @@ +#ifndef VALVE_POSE_DEPTH_ROS_HPP +#define VALVE_POSE_DEPTH_ROS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include "valve_detection/valve_detector.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace valve_detection { + +/** + * @class ValvePoseNode + * @brief ROS 2 node for computing valve poses from depth images or point clouds + * and 2D detections. + * + * This node subscribes to depth images or point clouds, color images + * (optional), and 2D detection messages. It synchronizes the inputs, computes + * valve poses using ValveDetector, and publishes results as PoseArray and + * optionally as visualized point clouds or images. + */ +class ValvePoseNode : public rclcpp::Node { + public: + /** + * @brief Constructs the ValvePoseNode with ROS node options. + * @param options Node options. + */ + explicit ValvePoseNode(const rclcpp::NodeOptions& options); + + virtual ~ValvePoseNode() = default; + + private: + /** + * @brief Callback for camera info subscription to obtain intrinsics and + * image size. + * @param camera_info_msg Shared pointer to the CameraInfo message. + */ + void color_image_info_callback( + const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); + + /** + * @brief Initializes the ValveDetector with the specified ros parameters. + */ + void init_valve_detector(); + + /** + * @brief Initializes the angle detector inside the ValveDetector if angle + * calculation is enabled. + */ + void init_angle_detector(); + + /** + * @brief Converts a vision_msgs::BoundingBox2D to local BoundingBox type. + * @param bbox The input 2D bounding box message. + * @return Converted BoundingBox. + */ + BoundingBox to_bounding_box( + const vision_msgs::msg::BoundingBox2D& bbox) const; + + /** + * @brief Publishes a point cloud containing annulus points. + * @param cloud The point cloud to publish. + * @param header ROS message header to use for publication. + */ + void publish_annulus_pcl(const pcl::PointCloud::Ptr& cloud, + const std_msgs::msg::Header& header) const; + + /** + * @brief Publishes a point cloud containing segmented plane points. + * @param cloud The point cloud to publish. + * @param header ROS message header to use for publication. + */ + void publish_annulus_plane(const pcl::PointCloud::Ptr& cloud, + const std_msgs::msg::Header& header) const; + + /** + * @brief Publishes computed valve poses as a PoseArray message. + * @param poses Vector of computed valve poses. + * @param header ROS message header to use for publication. + */ + void publish_valve_poses(const std::vector& poses, + const std_msgs::msg::Header& header) const; + + /** + * @brief Configures message_filters synchronizers based on available + * inputs. + */ + void setup_sync(); + + /** + * @brief Callback for Depth Image + Color Image + Detections + * synchronization. + */ + void di_ci_d_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); + + /** + * @brief Callback for Depth Image + Detections synchronization (no color + * image). + */ + void di_d_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); + + /** + * @brief Callback for Point Cloud + Color Image + Detections + * synchronization. + */ + void pc_ci_d_callback( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); + + /** + * @brief Callback for Point Cloud + Detections synchronization (no color + * image). + */ + void pc_d_callback( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); + + /** + * @brief Generic template callback for synchronized inputs. + * + * Supports T = sensor_msgs::msg::Image (depth image) or PointCloud2. + * Handles optional color image and converts messages to OpenCV/PCL formats. + * + * @tparam T Depth input message type. + * @param depth_msg Depth image or point cloud message. + * @param color_image_msg Optional color image message. + * @param detections 2D detection message. + */ + template + void synchronized_callback( + const std::shared_ptr& depth_msg, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image_msg, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + if (!color_image_info_received_) { + return; + } + if (detections->detections.empty() && !visualize_detections_) { + return; + } + + cv::Mat cv_depth_image; + sensor_msgs::msg::PointCloud2 pcl_tf; + cv::Mat cv_color_image; + + if constexpr (std::is_same::value) { + std::string source_frame = depth_msg->header.frame_id; + std::string target_frame = color_image_frame_id_; + + try { + geometry_msgs::msg::TransformStamped transform = + tf_buffer_->lookupTransform( + target_frame, source_frame, + rclcpp::Time(depth_msg->header.stamp)); + + tf2::doTransform(*depth_msg, pcl_tf, transform); + } catch (tf2::TransformException& ex) { + RCLCPP_ERROR(this->get_logger(), + "Failed to transform point cloud: %s", ex.what()); + return; + } + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); + pcl::fromROSMsg(pcl_tf, *cloud); + + } else if constexpr (std::is_same::value) { + cv_bridge::CvImageConstPtr cv_depth_ptr = + cv_bridge::toCvShare(depth_msg, "32FC1"); + cv_depth_image = cv_depth_ptr->image; + } else { + RCLCPP_ERROR(this->get_logger(), + "Unsupported message type in synchronized_callback."); + return; + } + + if (use_color_image_) { + if (!color_image_msg) { + RCLCPP_ERROR(this->get_logger(), + "Color image message is null."); + return; + } + cv_bridge::CvImageConstPtr cv_ptr = + cv_bridge::toCvShare(color_image_msg, "bgr8"); + + cv_color_image = cv_ptr->image; + } + + std::vector boxes; + boxes.reserve(detections->detections.size()); + for (const auto& detection : detections->detections) { + boxes.push_back(to_bounding_box(detection.bbox)); + } + + std::vector poses; + poses.reserve(boxes.size()); + + pcl::PointCloud::Ptr all_annulus_cloud; + pcl::PointCloud::Ptr all_annulus_plane_cloud; + + if constexpr (std::is_same::value) { + valve_detector_->Compute_valve_poses( + pcl_tf, cv_color_image, boxes, poses, all_annulus_cloud, + all_annulus_plane_cloud, pcl_visualize_, calculate_angle_); + } else if constexpr (std::is_same::value) { + valve_detector_->Compute_valve_poses( + cv_depth_image, cv_color_image, boxes, poses, all_annulus_cloud, + all_annulus_plane_cloud, pcl_visualize_, calculate_angle_); + } + if (pcl_visualize_ && all_annulus_cloud && all_annulus_plane_cloud) { + publish_annulus_pcl(all_annulus_cloud, depth_msg->header); + publish_annulus_plane(all_annulus_plane_cloud, depth_msg->header); + } + if (visualize_detections_) { + cv::Mat visualized_image = + valve_detector_->draw_detections(cv_color_image, boxes, poses); + sensor_msgs::msg::Image::SharedPtr output_msg = + cv_bridge::CvImage(depth_msg->header, "bgr8", visualized_image) + .toImageMsg(); + processed_image_pub_->publish(*output_msg); + } + publish_valve_poses(poses, depth_msg->header); + } + + rclcpp::Subscription::SharedPtr + color_image_info_sub_; + message_filters::Subscriber color_image_sub_; + message_filters::Subscriber pcl_sub_; + message_filters::Subscriber depth_image_sub_; + message_filters::Subscriber + detections_sub_; + + rclcpp::Publisher::SharedPtr + valve_poses_pub_; + rclcpp::Publisher::SharedPtr + annulus_pcl_pub_; + rclcpp::Publisher::SharedPtr + annulus_plane_pub_; + rclcpp::Publisher::SharedPtr processed_image_pub_; + + // Depth image sync policies uses ExactTime with the assumption + // that the depth image and color image are from the same camera + // and are synchronized. + + // Policy for Depth Image + Color Image + Detections + typedef message_filters::sync_policies::ExactTime< + sensor_msgs::msg::Image, + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray> + SyncPolicy_DI_CI_D; + + // Policy for Depth Image + Detections (no color image) + typedef message_filters::sync_policies:: + ExactTime + SyncPolicy_DI_D; + + // Policy for Point Cloud + Color Image + Detections + typedef message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::PointCloud2, + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray> + SyncPolicy_PC_CI_D; + + // Policy for Point Cloud + Detections (no color image) + typedef message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::PointCloud2, + vision_msgs::msg::Detection2DArray> + SyncPolicy_PC_D; + + std::shared_ptr> + sync_di_ci_d_; + std::shared_ptr> sync_di_d_; + std::shared_ptr> + sync_pc_ci_d_; + std::shared_ptr> sync_pc_d_; + + bool color_image_info_received_ = false; + std::unique_ptr valve_detector_; + std::string color_image_frame_id_; + std::shared_ptr tf_buffer_; + std::shared_ptr tf_listener_; + + bool visualize_detections_ = true; + bool pcl_visualize_ = false; + bool calculate_angle_ = true; + bool use_color_image_ = true; + bool use_depth_image_ = true; +}; + +} // namespace valve_detection + +#endif // VALVE_POSE_DEPTH_ROS_HPP diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index 0ea4591..28ee6b0 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -1,24 +1,33 @@ import os from ament_index_python.packages import get_package_share_directory -from launch_ros.actions import Node +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode from launch import LaunchDescription +config_file = os.path.join( + get_package_share_directory("valve_detection"), + "config", + "valve_detection_params.yaml", +) + def generate_launch_description(): - valve_detection_node = Node( - package='valve_detection', - executable='valve_detection_node', - name='valve_detection_node', - parameters=[ - os.path.join( - get_package_share_directory('valve_detection'), - 'config', - 'valve_detection_params.yaml', + container = ComposableNodeContainer( + name='valve_detection_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + ComposableNode( + package='valve_detection', + plugin='valve_detection::ValvePoseNode', + name='valve_pose_node', + parameters=[config_file], ), - {'use_sim_time': True}, ], output='screen', ) - return LaunchDescription([valve_detection_node]) + + return LaunchDescription([container]) diff --git a/package.xml b/package.xml index 19b40b4..7c4730e 100644 --- a/package.xml +++ b/package.xml @@ -17,6 +17,7 @@ geometry_msgs std_msgs pcl_conversions + tf2_geometry_msgs ament_lint_auto ament_lint_common diff --git a/src/angle_detector.cpp b/src/angle_detector.cpp new file mode 100644 index 0000000..649fc69 --- /dev/null +++ b/src/angle_detector.cpp @@ -0,0 +1,81 @@ +#include "valve_detection/angle_detector.hpp" +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +double AngleDetector::calculate_angle(const cv::Mat& color_image, + const BoundingBox& bbox) const { + double width = bbox.size_x; + double height = bbox.size_y; + double center_x = bbox.center_x; + double center_y = bbox.center_y; + + int x1 = std::max( + static_cast(center_x - width * params_.line_detection_area), 0); + int y1 = std::max( + static_cast(center_y - height * params_.line_detection_area), 0); + int x2 = std::min( + static_cast(center_x + width * params_.line_detection_area), + color_image.cols - 1); + int y2 = std::min( + static_cast(center_y + height * params_.line_detection_area), + color_image.rows - 1); + + cv::Mat roi = color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + + cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); + cv::Point center((roi.cols / 2), (roi.rows / 2)); + cv::Size axes(roi.cols / 2, roi.rows / 2); + cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); + + cv::Mat masked_roi; + roi.copyTo(masked_roi, mask); + + cv::Mat gray; + cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); + + cv::Mat edges = apply_canny_edge_detection(gray); + + std::vector lines; + cv::HoughLinesP(edges, lines, params_.hough_rho_res, + params_.hough_theta_res, params_.hough_threshold, + params_.hough_min_line_length, params_.hough_max_line_gap); + + if (lines.empty()) { + return std::numeric_limits::quiet_NaN(); + } + + cv::Vec4i longest_line = find_longest_line(lines); + return std::atan2(longest_line[3] - longest_line[1], + longest_line[2] - longest_line[0]); +} + +cv::Vec4i AngleDetector::find_longest_line( + const std::vector& lines) const { + if (lines.empty()) { + return cv::Vec4i(0, 0, 0, 0); + } + + cv::Vec4i longest_line = lines[0]; + double max_length = 0; + + for (const auto& line : lines) { + double length = std::hypot(line[2] - line[0], line[3] - line[1]); + if (length > max_length) { + max_length = length; + longest_line = line; + } + } + return longest_line; +} + +cv::Mat AngleDetector::apply_canny_edge_detection( + const cv::Mat& gray_image) const { + cv::Mat edges; + cv::Canny(gray_image, edges, params_.canny_low_threshold, + params_.canny_high_threshold, params_.canny_aperture_size); + return edges; +} + +} // namespace valve_detection diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp new file mode 100644 index 0000000..c76fee4 --- /dev/null +++ b/src/depth_image_processing.cpp @@ -0,0 +1,88 @@ +#include "valve_detection/depth_image_processing.hpp" + +namespace valve_detection { + +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& point) { + if (depth <= 0 || depth == std::numeric_limits::infinity() || + std::isnan(depth)) { + point.x = point.y = point.z = std::numeric_limits::quiet_NaN(); + return; + } + + point.x = (u - cx) * depth / fx; + point.y = (v - cy) * depth / fy; + point.z = depth; +} + +void extract_annulus_pcl(const cv::Mat& depth_image, + const BoundingBox& bbox, + const ImageProperties& image_properties, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& cloud) { + float center_x = bbox.center_x; + float center_y = bbox.center_y; + float outer_radius_x = bbox.size_x / 2.0f; + float outer_radius_y = bbox.size_y / 2.0f; + + float inner_radius_x = outer_radius_x * annulus_radius_ratio; + float inner_radius_y = outer_radius_y * annulus_radius_ratio; + + int upper_bound_size = static_cast( + M_PI * + ((outer_radius_x * outer_radius_y - inner_radius_x * inner_radius_y) + + 1)); + + cloud->clear(); + cloud->reserve(upper_bound_size); + + for (int y = static_cast(-outer_radius_y); + y <= static_cast(outer_radius_y); ++y) { + for (int x = static_cast(-outer_radius_x); + x <= static_cast(outer_radius_x); ++x) { + float dx_scaled_outer = x / outer_radius_x; + float dy_scaled_outer = y / outer_radius_y; + bool is_inside_outer_ellipse = + (dx_scaled_outer * dx_scaled_outer + + dy_scaled_outer * dy_scaled_outer) <= 1.0f; + + float dx_scaled_inner = x / inner_radius_x; + float dy_scaled_inner = y / inner_radius_y; + bool is_outside_inner_ellipse = + (dx_scaled_inner * dx_scaled_inner + + dy_scaled_inner * dy_scaled_inner) > 1.0f; + + if (is_inside_outer_ellipse && is_outside_inner_ellipse) { + int u = static_cast(center_x + x); + int v = static_cast(center_y + y); + + if (u >= 0 && u < depth_image.cols && v >= 0 && + v < depth_image.rows) { + float depth_value = depth_image.at(v, u); + pcl::PointXYZ point; + project_pixel_to_point( + u, v, depth_value, image_properties.intr.fx, + image_properties.intr.fy, image_properties.intr.cx, + image_properties.intr.cy, point); + + if (!std::isnan(point.x) && !std::isnan(point.y) && + !std::isnan(point.z)) { + cloud->points.push_back(point); + } + } + } + } + } + + cloud->width = static_cast(cloud->points.size()); + cloud->height = 1; + cloud->is_dense = false; +} + +} // namespace valve_detection diff --git a/src/pointcloud_processing.cpp b/src/pointcloud_processing.cpp new file mode 100644 index 0000000..707aa69 --- /dev/null +++ b/src/pointcloud_processing.cpp @@ -0,0 +1,59 @@ +#include "valve_detection/pointcloud_processing.hpp" + +namespace valve_detection { + +void extract_annulus_pcl(const pcl::PointCloud::Ptr& input_cloud, + const BoundingBox& bbox, + const ImageProperties& image_properties, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& cloud) { + float center_x = bbox.center_x; + float center_y = bbox.center_y; + float outer_radius_x = bbox.size_x / 2.0f; + float outer_radius_y = bbox.size_y / 2.0f; + + float inner_radius_x = outer_radius_x * annulus_radius_ratio; + float inner_radius_y = outer_radius_y * annulus_radius_ratio; + + double fx = image_properties.intr.fx; + double fy = image_properties.intr.fy; + double cx = image_properties.intr.cx; + double cy = image_properties.intr.cy; + + cloud->clear(); + cloud->points.reserve(input_cloud->points.size()); + + for (const auto& point_in : input_cloud->points) { + if (std::isnan(point_in.z)) { + continue; + } + + int u = static_cast((point_in.x * fx / point_in.z) + cx); + int v = static_cast((point_in.y * fy / point_in.z) + cy); + + if (u >= center_x - outer_radius_x && u <= center_x + outer_radius_x && + v >= center_y - outer_radius_y && v <= center_y + outer_radius_y) { + float dx_scaled_outer = (u - center_x) / outer_radius_x; + float dy_scaled_outer = (v - center_y) / outer_radius_y; + bool is_inside_outer_ellipse = + (dx_scaled_outer * dx_scaled_outer + + dy_scaled_outer * dy_scaled_outer) <= 1.0f; + + float dx_scaled_inner = (u - center_x) / inner_radius_x; + float dy_scaled_inner = (v - center_y) / inner_radius_y; + bool is_outside_inner_ellipse = + (dx_scaled_inner * dx_scaled_inner + + dy_scaled_inner * dy_scaled_inner) > 1.0f; + + if (is_inside_outer_ellipse && is_outside_inner_ellipse) { + cloud->points.push_back(point_in); + } + } + } + + cloud->width = static_cast(cloud->points.size()); + cloud->height = 1; + cloud->is_dense = false; +} + +} // namespace valve_detection diff --git a/src/valve_detection.cpp b/src/valve_detection.cpp deleted file mode 100644 index 482751f..0000000 --- a/src/valve_detection.cpp +++ /dev/null @@ -1,600 +0,0 @@ -#include - -using std::placeholders::_1; -using std::placeholders::_2; -using std::placeholders::_3; - -ValveDetectionNode::ValveDetectionNode(const rclcpp::NodeOptions& options) - : Node("valve_detection_node", options) { - auto depth_image_sub_topic = - this->declare_parameter("depth_image_sub_topic"); - auto color_image_sub_topic = - this->declare_parameter("color_image_sub_topic"); - auto detections_sub_topic = - this->declare_parameter("detections_sub_topic"); - - line_detection_area_ = - this->declare_parameter("line_detection_area", 5); - - rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - depth_image_sub_.subscribe(this, depth_image_sub_topic, - qos.get_rmw_qos_profile()); - color_image_sub_.subscribe(this, color_image_sub_topic, - qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - - sync_ = std::make_shared>( - MySyncPolicy(10), depth_image_sub_, color_image_sub_, detections_sub_); - sync_->registerCallback(std::bind( - &ValveDetectionNode::synchronized_callback, this, _1, _2, _3)); - - // Create subscriptions with synchronized callbacks - camera_info_subscription_yolo_color_ = - this->create_subscription( - "/yolov8_encoder/resize/camera_info", 10, - std::bind(&ValveDetectionNode::camera_info_callback_yolo_colored, - this, std::placeholders::_1)); - - // Create subscriptions with synchronized callbacks - camera_info_subscription_ = - this->create_subscription( - "/zed_node/depth/camera_info", 10, - std::bind(&ValveDetectionNode::camera_info_callback, this, - std::placeholders::_1)); - - processed_image_pub_ = this->create_publisher( - "yolov8_valve_detection_image", 10); - plane_normal_pub_ = - this->create_publisher("plane_normal", 10); - pointcloud_pub_ = this->create_publisher( - "point_cloud", 10); - line_pose_pub_ = this->create_publisher( - "valve_pose", 10); - line_points_pub_ = this->create_publisher( - "line_points", 10); - near_plane_cloud_pub_ = - this->create_publisher( - "near_plane_cloud", 10); - canny_debug_image_pub_ = this->create_publisher( - "canny_valve_detection_image", 10); -} - -Eigen::Vector3f ValveDetectionNode::filter_direction_ = - Eigen::Vector3f(1, 0, 0); - -void ValveDetectionNode::camera_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { - if (!camera_info_received_) { - camera_info_ = camera_info_msg; - camera_info_received_ = true; - RCLCPP_INFO(this->get_logger(), - "Camera resize info received and stored."); - - compute_height_width_scalars(); - // unsubscribe to avoid further callback executions - camera_info_subscription_.reset(); - } -} - -void ValveDetectionNode::camera_info_callback_yolo_colored( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { - if (!camera_info_received_yolo_color_) { - camera_info_yolo_color_ = camera_info_msg; - camera_info_received_yolo_color_ = true; - RCLCPP_INFO(this->get_logger(), - "Camera yolo color info received and stored."); - - compute_height_width_scalars(); - // unsubscribe to avoid further callback executions - camera_info_subscription_yolo_color_.reset(); - } -} - -void ValveDetectionNode::compute_height_width_scalars() { - if (camera_info_received_ && camera_info_received_yolo_color_) { - height_scalar_ = - static_cast(camera_info_yolo_color_->height - 280) / - camera_info_->height; - width_scalar_ = static_cast(camera_info_yolo_color_->width) / - camera_info_->width; - - RCLCPP_INFO(this->get_logger(), "Height scalar: %.3f", height_scalar_); - RCLCPP_INFO(this->get_logger(), "Width scalar: %.3f", width_scalar_); - } -} - -void ValveDetectionNode::project_pixel_to_3d(int u, - int v, - float depth, - pcl::PointXYZ& point) { - if (!camera_info_ || depth <= 0) { - return; // Skip invalid depth or missing camera info - } - - // Extract intrinsic parameters - double fx = camera_info_->k[0]; // k[0] = fx - double fy = camera_info_->k[4]; // k[4] = fy - double cx = camera_info_->k[2]; // k[2] = cx - double cy = camera_info_->k[5]; // k[5] = cy - - // Calculate 3D point coordinates - // transform from optical to camera frame - point.x = depth; - point.y = -(u - cx) * depth / fx; - point.z = -(v - cy) * depth / fy; -} - -void ValveDetectionNode::project_3d_to_pixel(float x, - float y, - float z, - int& u, - int& v) { - if (!camera_info_ || z <= 0) { - return; // Ensure camera info is available and z is positive - } - - // Extract intrinsic parameters - double fx = camera_info_->k[0]; // Focal length in x direction - double fy = camera_info_->k[4]; // Focal length in y direction - double cx = camera_info_->k[2]; // Principal point in x direction - double cy = camera_info_->k[5]; // Principal point in y direction - - // Project the 3D point to 2D pixel coordinates - // transform from camera to optical frame - u = static_cast((-y * fx) / x + cx); - v = static_cast((-z * fy) / x + cy); -} - -void ValveDetectionNode::synchronized_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - RCLCPP_INFO(this->get_logger(), - "Synchronized image and detection messages received."); - - process_and_publish_image(depth_image, color_image, detections); - // process_and_publish_image(depth_image, detections); -} - -void ValveDetectionNode::process_and_publish_image( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - if (!camera_info_received_ || !camera_info_received_yolo_color_) { - return; - } - - try { - // Convert depth and color images - cv::Mat cv_depth_image = - cv_bridge::toCvCopy(depth_image, "32FC1")->image; - cv::Mat cv_color_image = - cv_bridge::toCvCopy(color_image, "bgr8")->image; - - if (!detections->detections.empty()) { - const auto* first_detection = &detections->detections[0]; - - // The most centred of the two most likely variables are chosen as a - // boundingbox. Calculate distance to center (cx, cy) - double cx = camera_info_->k[2]; - double cy = camera_info_->k[5]; - if (detections->detections.size() > 1) { - float dist_first = std::sqrt( - std::pow( - detections->detections[0].bbox.center.position.x - cx, - 2) + - std::pow( - detections->detections[0].bbox.center.position.y - cy, - 2)); - - float dist_second = std::sqrt( - std::pow( - detections->detections[1].bbox.center.position.x - cx, - 2) + - std::pow( - detections->detections[1].bbox.center.position.y - cy, - 2)); - - // Select the detection that is closer to the center. - if (dist_first > dist_second) { - first_detection = &detections->detections[1]; - } - } - - int center_x = static_cast( - first_detection->bbox.center.position.x / this->width_scalar_); - int center_y = static_cast( - (first_detection->bbox.center.position.y - 140) / - this->height_scalar_); - int width = static_cast(first_detection->bbox.size_x / - this->width_scalar_); - int height = static_cast(first_detection->bbox.size_y / - this->height_scalar_); - - pcl::PointCloud::Ptr cloud( - new pcl::PointCloud); - bool points_added = false; - - for (int y = center_y - (height / 2); y <= center_y + (height / 2); - ++y) { - for (int x = center_x - (width / 2); - x <= center_x + (width / 2); ++x) { - int dx = x - center_x; - int dy = y - center_y; - if (dx * dx / (float)((width / 2) * (width / 2)) + - dy * dy / (float)((height / 2) * (height / 2)) <= - 1.0) { - if (y >= 0 && y < cv_depth_image.rows && x >= 0 && - x < cv_depth_image.cols) { - float depth_value = cv_depth_image.at(y, x); - pcl::PointXYZ point; - project_pixel_to_3d(x, y, depth_value, point); - - if (depth_value > 0) // Check for valid depth - { - cloud->points.push_back(point); - points_added = true; - } - } - } - } - } - - if (points_added && cloud->points.size() > 0) { - sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*cloud, cloud_msg); - cloud_msg.header.stamp = depth_image->header.stamp; - cloud_msg.header.frame_id = "zed_left_camera_frame"; - - pointcloud_pub_->publish(cloud_msg); - - pcl::SACSegmentation seg; - seg.setOptimizeCoefficients(true); - seg.setModelType(pcl::SACMODEL_PLANE); - seg.setMethodType(pcl::SAC_RANSAC); - seg.setDistanceThreshold(0.01); - - pcl::ModelCoefficients::Ptr coefficients( - new pcl::ModelCoefficients); - pcl::PointIndices::Ptr inliers(new pcl::PointIndices); - seg.setInputCloud(cloud); - seg.segment(*inliers, *coefficients); - - if (inliers->indices.size() > 0) { - geometry_msgs::msg::Vector3 normal_msg; - normal_msg.x = coefficients->values[0]; - normal_msg.y = coefficients->values[1]; - normal_msg.z = coefficients->values[2]; - plane_normal_pub_->publish(normal_msg); - - RCLCPP_INFO( - this->get_logger(), - "RANSAC successful: Plane normal (%.3f, %.3f, %.3f)", - coefficients->values[0], coefficients->values[1], - coefficients->values[2]); - - pcl::PointCloud::Ptr near_plane_cloud( - new pcl::PointCloud); - - // Select points within a distance threshold of the plane - // model - pcl::ExtractIndices extract; - extract.setInputCloud(cloud); - extract.setIndices(inliers); - extract.setNegative(false); // Select the inliers - extract.filter(*near_plane_cloud); - - // Now we can use `selectWithinDistance` to select inliers - // that are within the distance threshold - pcl::PointIndices::Ptr selected_inliers( - new pcl::PointIndices()); - float threshold_distance = - 0.02; // Example threshold (3 cm) - - // Logic for selecting points close to the - // threshold_distance of the plane. - for (size_t i = 0; i < cloud->points.size(); ++i) { - const auto& point = cloud->points[i]; - float distance = - std::abs(coefficients->values[0] * point.x + - coefficients->values[1] * point.y + - coefficients->values[2] * point.z + - coefficients->values[3]) / - std::sqrt(coefficients->values[0] * - coefficients->values[0] + - coefficients->values[1] * - coefficients->values[1] + - coefficients->values[2] * - coefficients->values[2]); - if (distance < threshold_distance) { - selected_inliers->indices.push_back(i); - } - } - RCLCPP_INFO(this->get_logger(), "Selected inlier size: %zu", - selected_inliers->indices.size()); - - // Extract the selected inliers - pcl::PointCloud::Ptr final_inliers_cloud( - new pcl::PointCloud); - - for (const auto& idx : selected_inliers->indices) { - // Convert 3D point (x, y) to image space (u, v) - int u, v; - const auto& point = cloud->points[idx]; - project_3d_to_pixel(point.x, point.y, point.z, u, v); - - // Check if the point (u, v) is inside the smaller - // ellipse - int dx = u - center_x; - int dy = v - center_y; - - // Check if point is inside the smaller ellipse in image - // space - float a = static_cast(width) / - 6.0f; // Semi-major axis - float b = static_cast(height) / - 6.0f; // Semi-minor axis - - if ((dx * dx) / (a * a) + (dy * dy) / (b * b) <= 1.0f) { - // Point is inside the ellipse, add it to the - // near_plane_cloud - final_inliers_cloud->points.push_back( - cloud->points[idx]); - } - } - - near_plane_cloud = final_inliers_cloud; - - // Calculate centroid position from near_plane_cloud - Eigen::Vector3f centroid(0, 0, 0); - if (!near_plane_cloud->empty()) { - for (const auto& point : near_plane_cloud->points) { - centroid += - Eigen::Vector3f(point.x, point.y, point.z); - } - centroid /= near_plane_cloud->points.size(); - } else { - RCLCPP_WARN(this->get_logger(), - "No valid near plane points found."); - return; - } - - int x1 = std::max( - center_x - width * line_detection_area_ / 100, 0); - int y1 = std::max( - center_y - height * line_detection_area_ / 100, 0); - int x2 = - std::min(center_x + width * line_detection_area_ / 100, - cv_color_image.cols - 1); - int y2 = - std::min(center_y + height * line_detection_area_ / 100, - cv_color_image.rows - 1); - // // Draw roi for debugging - // cv::rectangle(cv_color_image, cv::Point(x1, y1), - // cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); - - // // Extract ROI (region of interest) from the color image - // cv::Mat roi = cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - // - y1)); - - // Define the ellipse parameters - cv::Point center((x1 + x2) / 2, - (y1 + y2) / 2); // Center of the ellipse - cv::Size axes( - (x2 - x1) / 2, - (y2 - y1) / 2); // Semi-major and semi-minor axes - - // Create a mask for the elliptical ROI - cv::Mat mask = - cv::Mat::zeros(cv_color_image.size(), CV_8UC1); - cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), - -1); // Draw a filled ellipse - - // Apply the mask to the ROI - cv::Mat roi = - cv_color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - cv::Mat masked_roi; - cv::bitwise_and(roi, roi, masked_roi, - mask(cv::Rect(x1, y1, x2 - x1, y2 - y1))); - - // For debugging: Draw the ellipse on the original image - cv::ellipse(cv_color_image, center, axes, 0, 0, 360, - cv::Scalar(0, 255, 0), 2); - - // Convert to grayscale for edge detection - cv::Mat gray; - cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); - - // Apply Canny edge detection - cv::Mat edges; - cv::Canny(gray, edges, 50, 100, 3); - canny_debug_image_pub_->publish( - *cv_bridge::CvImage(color_image->header, "mono8", edges) - .toImageMsg()); - - // Detect lines using Hough Transform - std::vector lines; - cv::HoughLinesP(edges, lines, 1, CV_PI / 180, 20, 45, 5); - - if (!lines.empty()) { - cv::Vec4i longest_line = lines[0]; - double max_length = 0; - - for (const auto& line : lines) { - double length = std::hypot(line[2] - line[0], - line[3] - line[1]); - if (length > max_length) { - max_length = length; - longest_line = line; - } - } - - double angle = - std::atan2(longest_line[3] - longest_line[1], - longest_line[2] - longest_line[0]); - - Eigen::Vector3f plane_normal(coefficients->values[0], - coefficients->values[1], - coefficients->values[2]); - plane_normal.normalize(); - - // Find an arbitrary vector perpendicular to the plane - // normal - Eigen::Vector3f perp_vector = - (std::abs(plane_normal.z()) < 0.99) - ? Eigen::Vector3f(0, 0, 1) - .cross(plane_normal) - .normalized() - : Eigen::Vector3f(1, 0, 0) - .cross(plane_normal) - .normalized(); - - // Rotate perp_vector by `angle` around `plane_normal` - // to get x-axis - Eigen::AngleAxisf rotation(angle, plane_normal); - Eigen::Vector3f x_axis = - rotation * perp_vector; // Rotated vector in-plane - - // Filter the x-axis direction to maintain consistency - if (filter_direction_.dot(x_axis) < 0) { - x_axis = -x_axis; // Flip to maintain consistency - } - - // Update filter_direction_ - filter_direction_ = x_axis; - - // Compute y-axis (perpendicular to both x and z) - Eigen::Vector3f y_axis = - plane_normal.cross(x_axis).normalized(); - - // Construct rotation matrix - Eigen::Matrix3f rotation_matrix; - rotation_matrix.col(0) = x_axis; - rotation_matrix.col(1) = y_axis; - rotation_matrix.col(2) = - plane_normal; // z-axis is the plane normal - - // Convert rotation matrix to quaternion - Eigen::Quaternionf quaternion(rotation_matrix); - - if (std::isnan(quaternion.x()) || - std::isnan(quaternion.y()) || - std::isnan(quaternion.z()) || - std::isnan(quaternion.w())) { - RCLCPP_WARN(this->get_logger(), - "Invalid quaternion computed, skipping " - "pose publishing."); - return; - } - - geometry_msgs::msg::PoseStamped line_pose_msg; - line_pose_msg.header.stamp = color_image->header.stamp; - line_pose_msg.header.frame_id = "zed_left_camera_frame"; - - line_pose_msg.pose.position.x = centroid[0]; - line_pose_msg.pose.position.y = centroid[1]; - line_pose_msg.pose.position.z = centroid[2]; - - line_pose_msg.pose.orientation.x = quaternion.x(); - line_pose_msg.pose.orientation.y = quaternion.y(); - line_pose_msg.pose.orientation.z = quaternion.z(); - line_pose_msg.pose.orientation.w = quaternion.w(); - - line_pose_pub_->publish(line_pose_msg); - - RCLCPP_INFO(this->get_logger(), - "Detected line published."); - } else { - RCLCPP_WARN(this->get_logger(), - "No lines detected in bounding box."); - } - if (!cv_color_image.empty()) { - // Draw the bounding box on the color image - int x1 = std::max(center_x - width / 2, 0); - int y1 = std::max(center_y - height / 2, 0); - int x2 = std::min(center_x + width / 2, - cv_color_image.cols - 1); - int y2 = std::min(center_y + height / 2, - cv_color_image.rows - 1); - - cv::rectangle(cv_color_image, cv::Point(x1, y1), - cv::Point(x2, y2), cv::Scalar(0, 255, 0), - 2); - - // Draw the detected line on the color image - if (!lines.empty()) { - cv::Vec4i longest_line = lines[0]; - double max_length = 0; - - // Find the longest line in the ROI - for (const auto& line : lines) { - double length = std::hypot(line[2] - line[0], - line[3] - line[1]); - if (length > max_length) { - max_length = length; - longest_line = line; - } - } - - // Adjust line coordinates to the full image size - // using the same x1 ... as roi calculation - int x1 = std::max( - center_x - width * line_detection_area_ / 100, - 0); - int y1 = std::max( - center_y - height * line_detection_area_ / 100, - 0); - int x2 = std::min( - center_x + width * line_detection_area_ / 100, - cv_color_image.cols - 1); - int y2 = std::min( - center_y + height * line_detection_area_ / 100, - cv_color_image.rows - 1); - cv::Point pt1( - longest_line[0] + x1, - longest_line[1] + y1); // Add ROI offset - cv::Point pt2( - longest_line[2] + x1, - longest_line[3] + y1); // Add ROI offset - - // Draw the line on the full color image - cv::line(cv_color_image, pt1, pt2, - cv::Scalar(0, 0, 255), 2); - - // Debugging output - RCLCPP_INFO( - this->get_logger(), - "ROI Coordinates: x1=%d, y1=%d, x2=%d, y2=%d", - x1, y1, x2, y2); - RCLCPP_INFO( - this->get_logger(), - "Longest Line in ROI: (%d, %d) -> (%d, %d)", - longest_line[0], longest_line[1], - longest_line[2], longest_line[3]); - RCLCPP_INFO(this->get_logger(), - "Adjusted Line in Full Image: (%d, %d) " - "-> (%d, %d)", - pt1.x, pt1.y, pt2.x, pt2.y); - } - } - } else { - RCLCPP_WARN(this->get_logger(), "No plane inliers found."); - } - } - } - // Convert the edited OpenCV image back to a ROS message - sensor_msgs::msg::Image::SharedPtr edited_image_msg = - cv_bridge::CvImage(color_image->header, "bgr8", cv_color_image) - .toImageMsg(); - - // Publish the edited image - processed_image_pub_->publish(*edited_image_msg); - - } catch (cv_bridge::Exception& e) { - RCLCPP_ERROR(this->get_logger(), "CvBridge Error: %s", e.what()); - } -} - -RCLCPP_COMPONENTS_REGISTER_NODE(ValveDetectionNode) diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp new file mode 100644 index 0000000..b35cd81 --- /dev/null +++ b/src/valve_detector.cpp @@ -0,0 +1,241 @@ +#include "valve_detection/valve_detector.hpp" + +namespace valve_detection { + +ValveDetector::ValveDetector(int yolo_img_width_, + int yolo_img_height_, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset) + : yolo_img_width_(yolo_img_width_), + yolo_img_height_(yolo_img_height_), + annulus_radius_ratio_(annulus_radius_ratio), + plane_ransac_threshold_(plane_ransac_threshold), + plane_ransac_max_iterations_(plane_ransac_max_iterations), + valve_handle_offset_(valve_handle_offset) {} + +void ValveDetector::init_angle_detector(const AngleDetectorParams& params) { + angle_detector_ = std::make_unique(params); +} + +cv::Mat ValveDetector::draw_detections(const cv::Mat& image, + const std::vector& boxes, + const std::vector& poses) const { + if (boxes.size() != poses.size()) { + return cv::Mat(); + } + + cv::Mat visualized_image = image.clone(); + + cv::Mat camera_matrix = + (cv::Mat_(3, 3) << color_image_properties_.intr.fx, 0, + color_image_properties_.intr.cx, 0, color_image_properties_.intr.fy, + color_image_properties_.intr.cy, 0, 0, 1); + cv::Mat dist_coeffs = cv::Mat::zeros(4, 1, CV_64F); + + for (size_t i = 0; i < boxes.size(); ++i) { + const auto& box = boxes[i]; + const auto& pose = poses[i]; + + int x1 = box.center_x - box.size_x / 2; + int y1 = box.center_y - box.size_y / 2; + int x2 = box.center_x + box.size_x / 2; + int y2 = box.center_y + box.size_y / 2; + cv::rectangle(visualized_image, cv::Point(x1, y1), cv::Point(x2, y2), + cv::Scalar(0, 255, 0), 2); + + cv::Mat tvec = (cv::Mat_(3, 1) << pose.position[0], + pose.position[1], pose.position[2]); + + Eigen::Matrix3f rotation_matrix = pose.orientation.toRotationMatrix(); + cv::Mat rmat = (cv::Mat_(3, 3) << rotation_matrix(0, 0), + rotation_matrix(0, 1), rotation_matrix(0, 2), + rotation_matrix(1, 0), rotation_matrix(1, 1), + rotation_matrix(1, 2), rotation_matrix(2, 0), + rotation_matrix(2, 1), rotation_matrix(2, 2)); + cv::Mat rvec; + cv::Rodrigues(rmat, rvec); + + cv::drawFrameAxes(visualized_image, camera_matrix, dist_coeffs, rvec, + tvec, 0.1); + } + return visualized_image; +} + +void ValveDetector::rmat_to_quat(const Eigen::Matrix3f& rotation_matrix, + Eigen::Quaternionf& quat) const { + quat = Eigen::Quaternionf(rotation_matrix); + quat.normalize(); +} + +void ValveDetector::project_point_to_pixel(float x, + float y, + float z, + int& u, + int& v) const { + if (z <= 0) { + u = v = -1; // Invalid pixel coordinates + return; + } + + double fx = color_image_properties_.intr.fx; + double fy = color_image_properties_.intr.fy; + double cx = color_image_properties_.intr.cx; + double cy = color_image_properties_.intr.cy; + + u = static_cast((x * fx / z) + cx); + v = static_cast((y * fy / z) + cy); +} + +void ValveDetector::calculate_letterbox_padding() { + int org_image_width = color_image_properties_.dim.width; + int org_image_height = color_image_properties_.dim.height; + + letterbox_scale_factor_ = + std::min(static_cast(yolo_img_width_) / org_image_width, + static_cast(yolo_img_height_) / org_image_height); + + double resized_width = org_image_width * letterbox_scale_factor_; + double resized_height = org_image_height * letterbox_scale_factor_; + + letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; +} + +BoundingBox ValveDetector::transform_bounding_box( + const BoundingBox& bbox) const { + BoundingBox transformed_bbox = bbox; + + transformed_bbox.center_x = + (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed_bbox.center_y = + (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed_bbox.size_x /= letterbox_scale_factor_; + transformed_bbox.size_y /= letterbox_scale_factor_; + + return transformed_bbox; +} + +bool ValveDetector::segment_plane( + const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const { + if (cloud->points.empty()) { + return false; + } + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); +} + +Eigen::Vector3f ValveDetector::get_ray_direction( + const BoundingBox& bbox) const { + float u = bbox.center_x; + float v = bbox.center_y; + + float fx = color_image_properties_.intr.fx; + float fy = color_image_properties_.intr.fy; + float cx = color_image_properties_.intr.cx; + float cy = color_image_properties_.intr.cy; + + float xc = (u - cx) / fx; + float yc = (v - cy) / fy; + + Eigen::Vector3f ray_direction; + ray_direction << xc, yc, 1.0f; + + return ray_direction.normalized(); +} + +Eigen::Vector3f ValveDetector::compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const { + Eigen::Vector3f normal_vector; + + if (!coefficients || coefficients->values.size() < 3) { + return Eigen::Vector3f::Zero(); + } + + normal_vector << coefficients->values[0], coefficients->values[1], + coefficients->values[2]; + + normal_vector.normalize(); + + if (normal_vector.dot(ray_direction) > 0.0) { + normal_vector = -normal_vector; + } + + return normal_vector; +} + +Eigen::Vector3f ValveDetector::find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const { + if (!coefficients || coefficients->values.size() < 4) { + return Eigen::Vector3f::Zero(); + } + + Eigen::Vector3f plane_normal; + plane_normal << coefficients->values[0], coefficients->values[1], + coefficients->values[2]; + float D = coefficients->values[3]; + + float normal_dot_ray = plane_normal.dot(ray_direction); + + if (std::abs(normal_dot_ray) < 1e-6) { + // The ray is parallel or contained within the plane. + return Eigen::Vector3f::Zero(); + } + + float t = -D / normal_dot_ray; + + Eigen::Vector3f intersection_point = t * ray_direction; + + return intersection_point; +} + +Eigen::Vector3f ValveDetector::shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const { + return intersection_point + (plane_normal * valve_handle_offset_); +} + +Eigen::Matrix3f ValveDetector::create_rotation_matrix( + const Eigen::Vector3f& plane_normal, + float angle) { + Eigen::Vector3f z_axis = plane_normal; + + Eigen::Vector3f temp_vec(1, 0, 0); + if (std::abs(z_axis.dot(temp_vec)) > 0.99f) { + temp_vec = Eigen::Vector3f(0, 1, 0); + } + Eigen::Vector3f base_x_axis = z_axis.cross(temp_vec).normalized(); + + Eigen::AngleAxisf rotation(angle, z_axis); + Eigen::Vector3f x_axis = rotation * base_x_axis; + + if (filter_direction_.dot(x_axis) < 0) { + x_axis = -x_axis; + } + filter_direction_ = x_axis; + + Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + + Eigen::Matrix3f rotation_matrix; + rotation_matrix.col(0) = x_axis; + rotation_matrix.col(1) = y_axis; + rotation_matrix.col(2) = z_axis; + + return rotation_matrix; +} + +} // namespace valve_detection diff --git a/src/valve_pose_base.cpp b/src/valve_pose_base.cpp deleted file mode 100644 index b780d0d..0000000 --- a/src/valve_pose_base.cpp +++ /dev/null @@ -1,181 +0,0 @@ -#include "valve_detection/valve_pose_base.hpp" - -namespace valve_detection { - -ValvePoseBase::ValvePoseBase(int yolo_img_width_, int yolo_img_height_, - float annulus_radius_ratio, - float plane_ransac_threshold, int plane_ransac_max_iterations, - float valve_handle_offset) - : yolo_img_width_(yolo_img_width_), yolo_img_height_(yolo_img_height_), - annulus_radius_ratio_(annulus_radius_ratio), - plane_ransac_threshold_(plane_ransac_threshold), - plane_ransac_max_iterations_(plane_ransac_max_iterations), - valve_handle_offset_(valve_handle_offset) { -} - -void ValvePoseBase::project_point_to_pixel(float x, float y, float z, int& u, int& v) const { - if (z <= 0) { - u = v = -1; // Invalid pixel coordinates - return; - } - - double fx = color_image_properties_.intr.fx; - double fy = color_image_properties_.intr.fy; - double cx = color_image_properties_.intr.cx; - double cy = color_image_properties_.intr.cy; - - u = static_cast((x * fx / z) + cx); - v = static_cast((y * fy / z) + cy); -} - -void ValvePoseBase::calculate_letterbox_padding() { - int org_image_width = color_image_properties_.dim.x; - int org_image_height = color_image_properties_.dim.y; - - letterbox_scale_factor_ = std::min( - static_cast(yolo_input_width) / original_width, - static_cast(yolo_input_height) / original_height); - - double resized_width = original_width * letterbox_scale_factor_; - double resized_height = original_height * letterbox_scale_factor_; - - letterbox_pad_x_ = (yolo_input_width - resized_width) / 2.0; - letterbox_pad_y_ = (yolo_input_height - resized_height) / 2.0; -} - -BoundingBox ValvePoseBase::transform_bounding_box(const BoundingBox& bbox) const { - BoundingBox transformed_bbox = bbox; - - transformed_bbox.center.position.x = (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; - transformed_bbox.center.position.y = (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; - transformed_bbox.size_x /= letterbox_scale_factor_; - transformed_bbox.size_y /= letterbox_scale_factor_; - - return transformed_bbox; -} - -bool ValvePoseBase::segment_plane(const pcl::PointCloud::Ptr& cloud, - pcl::ModelCoefficients::Ptr& coefficients, - pcl::PointIndices::Ptr& inliers) const { - - if (cloud->points.empty()) { - return false; - } - - pcl::SACSegmentation seg; - seg.setOptimizeCoefficients(true); - seg.setModelType(pcl::SACMODEL_PLANE); - seg.setMethodType(pcl::SAC_RANSAC); - seg.setDistanceThreshold(plane_ransac_threshold_); - seg.setMaxIterations(plane_ransac_max_iterations_); - seg.setInputCloud(cloud); - seg.segment(*inliers, *coefficients); - - return !inliers->indices.empty(); -} - -Eigen::Vector3f ValvePoseBase::get_ray_direction(const BoundingBox& bbox) const { - float u = bbox.center_x; - float v = bbox.center_y; - - float fx = color_image_properties_.intr.fx; - float fy = color_image_properties_.intr.fy; - float cx = color_image_properties_.intr.cx; - float cy = color_image_properties_.intr.cy; - - float xc = (u - cx) / fx; - float yc = (v - cy) / fy; - - Eigen::Vector3f ray_direction; - ray_direction << xc, yc, 1.0f; - - return ray_direction.normalized(); -} - -Eigen::Vector3f ValvePoseBase::compute_plane_normal( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) { - - Eigen::Vector3f normal_vector; - - if (!coefficients || coefficients->values.size() < 3) { - return Eigen::Vector3f::Zero(); - } - - normal_vector << coefficients->values[0], coefficients->values[1], coefficients->values[2]; - - normal_vector.normalize(); - - if (normal_vector.dot(ray_direction) > 0.0) { - normal_vector = -normal_vector; - } - - return normal_vector; -} - -/** - * @brief Finds the intersection point of a ray and a plane. - * - * The ray is defined by its origin (The camera's optical center, - * i.e., [0, 0, 0]) and a direction vector. The plane is defined by its coefficients - * Ax + By + Cz + D = 0. - * - * @param coefficients A shared pointer to the pcl::ModelCoefficients object. - * @param ray_direction The normalized Eigen::Vector3f ray direction. - * @return An Eigen::Vector3f representing the 3D intersection point. - */ - Eigen::Vector3f ValvePoseBase::find_ray_plane_intersection( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const { - - // Plane equation: Ax + By + Cz + D = 0 - // Ray equation: P = O + t * d, where O is origin [0,0,0], P is point, t is scalar, d is direction. - // Substituting the ray equation into the plane equation: - // A*(O.x + t*d.x) + B*(O.y + t*d.y) + C*(O.z + t*d.z) + D = 0 - // Since O is [0,0,0], this simplifies to: - // A*t*d.x + B*t*d.y + C*t*d.z + D = 0 - // t * (A*d.x + B*d.y + C*d.z) = -D - // t = -D / (A*d.x + B*d.y + C*d.z) - - if (!coefficients || coefficients->values.size() < 4) { - return Eigen::Vector3f::Zero(); - } - - Eigen::Vector3f plane_normal; - plane_normal << coefficients->values[0], coefficients->values[1], coefficients->values[2]; - float D = coefficients->values[3]; - - float normal_dot_ray = plane_normal.dot(ray_direction); - - if (std::abs(normal_dot_ray) < 1e-6) { - // The ray is parallel or contained within the plane. - return Eigen::Vector3f::Zero(); - } - - float t = -D / normal_dot_ray; - - Eigen::Vector3f intersection_point = t * ray_direction; - - return intersection_point; -} - -/** - * @brief Shifts a point along a plane normal by a given distance. - * - * This function is useful for moving a point from the detected plane's - * surface to a new location, such as the center of a valve knob. - * The shift is performed along the vector of the consistent plane normal. - * - * @param intersection_point The 3D intersection point on the plane. - * @param plane_normal The consistent, normalized plane normal vector. - * @return The new Eigen::Vector3f point after the shift. - */ - Eigen::Vector3f ValvePoseBase::shift_point_along_normal( - const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const { - - return intersection_point + (plane_normal * valve_handle_offset_); -} - - -} // namespace valve_detection \ No newline at end of file diff --git a/src/valve_pose_depth.cpp b/src/valve_pose_depth.cpp deleted file mode 100644 index 96de2f3..0000000 --- a/src/valve_pose_depth.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "valve_detection/valve_pose_depth.hpp" - -namespace valve_detection { - -ValvePoseDepth::ValvePoseDepth(int yolo_img_width, int yolo_img_height_, - float plane_ransac_threshold, - int plane_ransac_max_iterations, - float annulus_radius_ratio, - float valve_handle_offset) - : ValvePoseBase(yolo_img_width, yolo_img_height_, - plane_ransac_threshold, plane_ransac_max_iterations, - annulus_radius_ratio, valve_handle_offset){} - -void ValvePoseDepth::project_pixel_to_point(int u, int v, float depth, pcl::PointXYZ& point) { - if (depth <= 0 || depth == std::numeric_limits::infinity() || std::isnan(depth)) { - point.x = point.y = point.z = std::numeric_limits::quiet_NaN(); - return; - } - - double fx = depth_image_properties_.intr.fx; - double fy = depth_image_properties_.intr.fy; - double cx = depth_image_properties_.intr.cx; - double cy = depth_image_properties_.intr.cy; - - point.x = (u - cx) * depth / fx; - point.y = (v - cy) * depth / fy; - point.z = depth; -} - -void ValvePoseDepth::extract_annulus_pcl( - const cv::Mat& depth_image, - const BoundingBox& bbox, - pcl::PointCloud::Ptr& cloud) const { - - float center_x = bbox.center_x; - float center_y = bbox.center_y; - float outer_radius_x = bbox.size_x / 2.0f; - float outer_radius_y = bbox.size_y / 2.0f; - - float inner_radius_x = outer_radius_x * annulus_radius_ratio_; - float inner_radius_y = outer_radius_y * annulus_radius_ratio_; - - cloud->clear(); - - int upper_bound_size = static_cast(M_PI * ((outer_radius_x * outer_radius_y - inner_radius_x * inner_radius_y) + 1)); - cloud->reserve(upper_bound_size); - - for (int y = static_cast(-outer_radius_y); y <= static_cast(outer_radius_y); ++y) { - for (int x = static_cast(-outer_radius_x); x <= static_cast(outer_radius_x); ++x) { - float dx_scaled_outer = x / outer_radius_x; - float dy_scaled_outer = y / outer_radius_y; - bool is_inside_outer_ellipse = (dx_scaled_outer * dx_scaled_outer + dy_scaled_outer * dy_scaled_outer) <= 1.0f; - - float dx_scaled_inner = x / inner_radius_x; - float dy_scaled_inner = y / inner_radius_y; - bool is_outside_inner_ellipse = (dx_scaled_inner * dx_scaled_inner + dy_scaled_inner * dy_scaled_inner) > 1.0f; - - if (is_inside_outer_ellipse && is_outside_inner_ellipse) { - int u = static_cast(center_x + x); - int v = static_cast(center_y + y); - - if (u >= 0 && u < depth_image.cols && v >= 0 && v < depth_image.rows) { - float depth_value = depth_image.at(v, u); - pcl::PointXYZ point; - project_pixel_to_point(u, v, depth_value, point); - - if (!std::isnan(point.x) && !std::isnan(point.y) && !std::isnan(point.z)) { - cloud->points.push_back(point); - } - } - } - } - } - - cloud->width = static_cast(cloud->points.size()); - cloud->height = 1; - cloud->is_dense = false; -} - - -} // namespace valve_detection - \ No newline at end of file diff --git a/src/valve_pose_depth_ros.cpp b/src/valve_pose_depth_ros.cpp deleted file mode 100644 index 8c8388c..0000000 --- a/src/valve_pose_depth_ros.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include "valve_detection/valve_pose_depth_ros.hpp" - -ValvePoseDepthNode::ValvePoseDepthNode(const rclcpp::NodeOptions& options) -: Node("valve_detection_node", options) { - std::string color_image_sub_topic = - this->declare_parameter("color_image_sub_topic"); - std::string depth_image_sub_topic = - this->declare_parameter("depth_image_sub_topic"); - std::string detections_sub_topic = - this->declare_parameter("detections_sub_topic"); - visualize_detections_ = - this->declare_parameter("visualize_detections"); - debug_visualize_ = - this->declare_parameter("debug_visualize"); - calculate_angle_ = - this->declare_parameter("calculate_angle"); - - int yolo_img_width = - this->declare_parameter("yolo_img_width"); - int yolo_img_height = - this->declare_parameter("yolo_img_height"); - - float annulus_radius_ratio = - this->declare_parameter("annulus_radius_ratio"); - - float plane_ransac_threshold = - this->declare_parameter("plane_ransac_threshold"); - int plane_ransac_max_iterations = - this->declare_parameter("plane_ransac_max_iterations"); - - float valve_handle_offset = - this->declare_parameter("valve_handle_offset"); - - valve_detector_ = std::make_shared(yolo_img_width, yolo_img_height, annulus_radius_ratio, - plane_ransac_threshold, plane_ransac_max_iterations, valve_handle_offset); - - - rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - depth_image_sub_.subscribe(this, depth_image_sub_topic, - qos.get_rmw_qos_profile()); - color_image_sub_.subscribe(this, color_image_sub_topic, - qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - - sync_ = std::make_shared>( - MySyncPolicy(10), depth_image_sub_, color_image_sub_, detections_sub_); - sync_->registerCallback(std::bind( - &ValvePoseDepthNode::synchronized_callback, this, _1, _2, _3)); - - std::string color_image_info_topic = - this->declare_parameter("color_image_info_topic"); - - qos.history = rclcpp::KeepLast(1); - color_image_info_sub_ = this->create_subscription( - color_image_info_topic, 10, - std::bind(&ValvePoseDepthNode::color_image_info_callback, - this, std::placeholders::_1)); - - std::string valve_pose_pub_topic = - this->declare_parameter("valve_pose_pub_topic"); - - valve_pose_pub_ = this->create_publisher( - valve_pose_pub_topic, qos); - - if (debug_visualize_) { - std::string annulus_pub_topic = - this->declare_parameter("annulus_pub_topic"); - annulus_pcl_pub_ = this->create_publisher( - annulus_pub_topic, qos); - } -} - -void ValvePoseDepthNode::color_image_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { - if (!color_image_info_received_) { - ImageProperties img_props; - img_props.intr.fx = camera_info_msg->k[0]; - img_props.intr.fy = camera_info_msg->k[4]; - img_props.intr.cx = camera_info_msg->k[2]; - img_props.intr.cy = camera_info_msg->k[5]; - img_props.dim.x = camera_info_msg->width; - img_props.dim.y = camera_info_msg->height; - - valve_detector_->set_color_image_properties(img_props); - valve_detector_->calculate_letterbox_padding(); - - color_image_frame_id_ = camera_info_msg->header.frame_id; - color_image_info_received_ = true; - color_image_info_sub_.reset(); - } -} - -BoundingBox ValvePoseDepthNode::to_bounding_box( - const vision_msgs::msg::BoundingBox2D& bbox) const { - BoundingBox box; - box.center_x = bbox.center.x; - box.center_y = bbox.center.y; - box.size_x = bbox.size_x; - box.size_y = bbox.size_y; - box.theta = bbox.center.theta; - return box; -} - -void ValvePoseDepthNode::publish_annulus_pcl( - const pcl::PointCloud::Ptr& cloud) const { - sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*cloud, cloud_msg); - cloud_msg.header.frame_id = color_image_frame_id_; - cloud_msg.header.stamp = this->now(); - cloud_msg.is_dense = false; - cloud_msg.width = static_cast(cloud->points.size()); - cloud_msg.height = 1; - - annulus_pcl_pub_->publish(cloud_msg); -} - -void ValvePoseDepthNode::synchronized_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - if (!color_image_info_received_) { - return; - } - cv_bridge::CvImageConstPtr cv_ptr = - cv_bridge::toCvShare(color_image, "bgr8"); - - cv:brudge::CvImageConstPtr cv_depth_ptr = - cv_bridge::toCvShare(depth_image, "32FC1"); - - cv::Mat cv_color_image = cv_ptr->image; - - if (visualize_detections_) { - cv_color_image = cv_color_image.clone(); - } - std::vector boxes = std::vector().reserve(detections->detections.size()); - for (const auto& detection : detections->detections) { - boxes.push_back(to_bounding_box(detection.bbox)); - } - - for (const auto& box : boxes) { - BoundingBox org_image_box = - valve_detector_->transform_bounding_box(box); - - pcl::PointCloud::Ptr cloud( - new pcl::PointCloud); - - valve_detector_->extract_annulus_pcl( - cv_depth_ptr->image, - org_image_box, - cloud); - - if (cloud->empty()) { - continue; - } - if (debug_visualize_) { - publish_annulus_pcl(cloud); - } - - pcl::ModelCoefficients::Ptr coefficients(new pcl::ModelCoefficients); - pcl::PointIndices::Ptr inliers(new pcl::PointIndices); - if (!valve_detector_->segment_plane(cloud, coefficients, inliers)) { - continue; - } - - Eigen::Vector3f bb_centre_ray = - valve_detector_->get_ray_direction(org_image_box); - - Eigen::Vector3f ray_plane_intersection = - valve_detector_->find_ray_plane_intersection(coefficients, bb_centre_ray); - - if (ray_plane_intersection.isZero()) { - continue; - } - - Eigen::Vector3f plane_normal = - valve_detector_->compute_plane_normal( - coefficients, ray_direction); - - if (plane_normal.isZero()) { - continue; - } - - Eigen::Vector3f shifted_position = - valve_detector_->shift_point_along_normal( - ray_plane_intersection, plane_normal); - - float angle = org_image_box.theta; - if (calculate_angle_) { - // angle = calculate_angle() - } - geometry_msgs::msg::PoseStamped pose_msg; - pose_msg.header.stamp = color_image->header.stamp; - pose_msg.header.frame_id = color_image_frame_id_; - - pose_msg.pose.position.x = shifted_position[0]; - pose_msg.pose.position.y = shifted_position[1]; - pose_msg.pose.position.z = shifted_position[2]; - - } - - - - - -} - -RCLCPP_COMPONENTS_REGISTER_NODE(ValvePoseDepthNode) \ No newline at end of file diff --git a/src/valve_pose_pcl.cpp b/src/valve_pose_pcl.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp new file mode 100644 index 0000000..82c2e3f --- /dev/null +++ b/src/valve_pose_ros.cpp @@ -0,0 +1,289 @@ +#include "valve_detection_ros/valve_pose_ros.hpp" + +namespace valve_detection { + +using namespace std::placeholders; + +ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) + : Node("valve_detection_node", options) { + init_valve_detector(); + + setup_sync(); + + if (calculate_angle_) { + init_angle_detector(); + } + + std::string color_image_info_topic = + this->declare_parameter("color_image_info_topic"); + + rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(1)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + color_image_info_sub_ = + this->create_subscription( + color_image_info_topic, qos, + std::bind(&ValvePoseNode::color_image_info_callback, this, + std::placeholders::_1)); + + std::string valve_poses_pub_topic = + this->declare_parameter("valve_poses_pub_topic"); + + valve_poses_pub_ = this->create_publisher( + valve_poses_pub_topic, qos); + + pcl_visualize_ = this->declare_parameter("pcl_visualize"); + + if (pcl_visualize_) { + std::string annulus_pub_topic = + this->declare_parameter("annulus_pub_topic"); + annulus_pcl_pub_ = + this->create_publisher( + annulus_pub_topic, qos); + std::string plane_pub_topic = + this->declare_parameter("plane_pub_topic"); + annulus_plane_pub_ = + this->create_publisher( + plane_pub_topic, qos); + } + if (visualize_detections_) { + std::string processed_image_pub_topic = + this->declare_parameter("processed_image_pub_topic"); + processed_image_pub_ = this->create_publisher( + processed_image_pub_topic, qos); + } + if (!use_depth_image_) { + tf_buffer_ = std::make_shared(this->get_clock()); + tf_listener_ = + std::make_shared(*tf_buffer_); + } +} + +void ValvePoseNode::init_valve_detector() { + int yolo_img_width = this->declare_parameter("yolo_img_width"); + int yolo_img_height = this->declare_parameter("yolo_img_height"); + + float annulus_radius_ratio = + this->declare_parameter("annulus_radius_ratio"); + + float plane_ransac_threshold = + this->declare_parameter("plane_ransac_threshold"); + int plane_ransac_max_iterations = + this->declare_parameter("plane_ransac_max_iterations"); + + float valve_handle_offset = + this->declare_parameter("valve_handle_offset"); + + valve_detector_ = std::make_unique( + yolo_img_width, yolo_img_height, annulus_radius_ratio, + plane_ransac_threshold, plane_ransac_max_iterations, + valve_handle_offset); +} + +void ValvePoseNode::setup_sync() { + visualize_detections_ = + this->declare_parameter("visualize_detections"); + calculate_angle_ = this->declare_parameter("calculate_angle"); + use_depth_image_ = this->declare_parameter("use_depth_image"); + + use_color_image_ = (calculate_angle_ || visualize_detections_); + + std::string color_image_sub_topic = + this->declare_parameter("color_image_sub_topic"); + std::string depth_image_sub_topic = + this->declare_parameter("depth_image_sub_topic"); + std::string pcl_sub_topic = + this->declare_parameter("pcl_sub_topic"); + std::string detections_sub_topic = + this->declare_parameter("detections_sub_topic"); + + rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + if (use_color_image_ && use_depth_image_) { + color_image_sub_.subscribe(this, color_image_sub_topic, + qos.get_rmw_qos_profile()); + depth_image_sub_.subscribe(this, depth_image_sub_topic, + qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + sync_di_ci_d_ = + std::make_shared>( + SyncPolicy_DI_CI_D(10), depth_image_sub_, color_image_sub_, + detections_sub_); + sync_di_ci_d_->registerCallback( + std::bind(&ValvePoseNode::di_ci_d_callback, this, _1, _2, _3)); + + } else if (use_depth_image_) { + depth_image_sub_.subscribe(this, depth_image_sub_topic, + qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + sync_di_d_ = + std::make_shared>( + SyncPolicy_DI_D(10), depth_image_sub_, detections_sub_); + sync_di_d_->registerCallback( + std::bind(&ValvePoseNode::di_d_callback, this, _1, _2)); + + } else if (use_color_image_ && !use_depth_image_) { + pcl_sub_.subscribe(this, pcl_sub_topic, qos.get_rmw_qos_profile()); + color_image_sub_.subscribe(this, color_image_sub_topic, + qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + sync_pc_ci_d_ = + std::make_shared>( + SyncPolicy_PC_CI_D(10), pcl_sub_, color_image_sub_, + detections_sub_); + sync_pc_ci_d_->registerCallback( + std::bind(&ValvePoseNode::pc_ci_d_callback, this, _1, _2, _3)); + + } else { + pcl_sub_.subscribe(this, pcl_sub_topic, qos.get_rmw_qos_profile()); + detections_sub_.subscribe(this, detections_sub_topic, + qos.get_rmw_qos_profile()); + sync_pc_d_ = + std::make_shared>( + SyncPolicy_PC_D(10), pcl_sub_, detections_sub_); + sync_pc_d_->registerCallback( + std::bind(&ValvePoseNode::pc_d_callback, this, _1, _2)); + } +} + +void ValvePoseNode::di_ci_d_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + synchronized_callback(depth_image, color_image, + detections); +} + +void ValvePoseNode::di_d_callback( + const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + synchronized_callback(depth_image, nullptr, + detections); +} + +void ValvePoseNode::pc_ci_d_callback( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, + const sensor_msgs::msg::Image::ConstSharedPtr& color_image, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + synchronized_callback(pcl, color_image, + detections); +} + +void ValvePoseNode::pc_d_callback( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { + synchronized_callback(pcl, nullptr, + detections); +} + +void ValvePoseNode::init_angle_detector() { + if (calculate_angle_) { + AngleDetectorParams angle_params; + angle_params.line_detection_area = + this->declare_parameter("angle.detection_area_ratio"); + angle_params.canny_low_threshold = + this->declare_parameter("angle.canny_low_threshold"); + angle_params.canny_high_threshold = + this->declare_parameter("angle.canny_high_threshold"); + angle_params.canny_aperture_size = + this->declare_parameter("angle.canny_aperture_size"); + angle_params.hough_rho_res = + this->declare_parameter("angle.hough_rho_res"); + angle_params.hough_theta_res = + this->declare_parameter("angle.hough_theta_res"); + angle_params.hough_threshold = + this->declare_parameter("angle.hough_threshold"); + angle_params.hough_min_line_length = + this->declare_parameter("angle.hough_min_line_length"); + angle_params.hough_max_line_gap = + this->declare_parameter("angle.hough_max_line_gap"); + + valve_detector_->init_angle_detector(angle_params); + } +} + +void ValvePoseNode::color_image_info_callback( + const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { + if (!color_image_info_received_) { + ImageProperties img_props; + img_props.intr.fx = camera_info_msg->k[0]; + img_props.intr.fy = camera_info_msg->k[4]; + img_props.intr.cx = camera_info_msg->k[2]; + img_props.intr.cy = camera_info_msg->k[5]; + img_props.dim.width = camera_info_msg->width; + img_props.dim.height = camera_info_msg->height; + + valve_detector_->set_color_image_properties(img_props); + valve_detector_->calculate_letterbox_padding(); + + color_image_frame_id_ = camera_info_msg->header.frame_id; + color_image_info_received_ = true; + color_image_info_sub_.reset(); + } +} + +BoundingBox ValvePoseNode::to_bounding_box( + const vision_msgs::msg::BoundingBox2D& bbox) const { + BoundingBox box; + box.center_x = bbox.center.position.x; + box.center_y = bbox.center.position.y; + box.size_x = bbox.size_x; + box.size_y = bbox.size_y; + box.theta = bbox.center.theta; + return box; +} + +void ValvePoseNode::publish_annulus_pcl( + const pcl::PointCloud::Ptr& cloud, + const std_msgs::msg::Header& header) const { + sensor_msgs::msg::PointCloud2 cloud_msg; + pcl::toROSMsg(*cloud, cloud_msg); + cloud_msg.header = header; + cloud_msg.is_dense = false; + cloud_msg.width = static_cast(cloud->points.size()); + cloud_msg.height = 1; + + annulus_pcl_pub_->publish(cloud_msg); +} + +void ValvePoseNode::publish_annulus_plane( + const pcl::PointCloud::Ptr& cloud, + const std_msgs::msg::Header& header) const { + sensor_msgs::msg::PointCloud2 cloud_msg; + pcl::toROSMsg(*cloud, cloud_msg); + cloud_msg.header = header; + cloud_msg.is_dense = false; + cloud_msg.width = static_cast(cloud->points.size()); + cloud_msg.height = 1; + + annulus_plane_pub_->publish(cloud_msg); +} + +void ValvePoseNode::publish_valve_poses( + const std::vector& poses, + const std_msgs::msg::Header& header) const { + geometry_msgs::msg::PoseArray pose_array_msg; + pose_array_msg.header = header; + + for (const auto& pose : poses) { + geometry_msgs::msg::Pose pose_msg; + pose_msg.position.x = pose.position.x(); + pose_msg.position.y = pose.position.y(); + pose_msg.position.z = pose.position.z(); + pose_msg.orientation.x = pose.orientation.x(); + pose_msg.orientation.y = pose.orientation.y(); + pose_msg.orientation.z = pose.orientation.z(); + pose_msg.orientation.w = pose.orientation.w(); + + pose_array_msg.poses.push_back(pose_msg); + } + valve_poses_pub_->publish(pose_array_msg); +} + +} // namespace valve_detection + +RCLCPP_COMPONENTS_REGISTER_NODE(valve_detection::ValvePoseNode) From fa8b0555ab1ef0e2124f2fae6e765789b8bd484d Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Tue, 26 Aug 2025 21:47:35 +0200 Subject: [PATCH 15/29] added transform to visualized bb --- config/valve_detection_params.yaml | 5 ++--- src/valve_detector.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 2b9f093..9374831 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -5,8 +5,7 @@ pcl_sub_topic: "/zed_node/depth/point_cloud" detections_sub_topic: "/detections_output" - color_image_info_topic: "/zed_node/left/camera_info" - depth_image_info_topic: "/zed_node/depth/camera_info" + color_image_info_topic: "/zed_node/depth/camera_info" valve_poses_pub_topic: "/valve_poses" @@ -24,7 +23,7 @@ visualize_detections: true processed_image_pub_topic: "/valve_detection_image" - pcl_visualize: false + pcl_visualize: true annulus_pub_topic: "/bbx_annulus_pcl" plane_pub_topic: "/annulus_plane_pcl" diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp index b35cd81..5d7effd 100644 --- a/src/valve_detector.cpp +++ b/src/valve_detector.cpp @@ -25,6 +25,11 @@ cv::Mat ValveDetector::draw_detections(const cv::Mat& image, if (boxes.size() != poses.size()) { return cv::Mat(); } + std::vector transformed_boxes; + transformed_boxes.reserve(boxes.size()); + for (const auto& box : boxes) { + transformed_boxes.push_back(transform_bounding_box(box)); + } cv::Mat visualized_image = image.clone(); @@ -34,8 +39,8 @@ cv::Mat ValveDetector::draw_detections(const cv::Mat& image, color_image_properties_.intr.cy, 0, 0, 1); cv::Mat dist_coeffs = cv::Mat::zeros(4, 1, CV_64F); - for (size_t i = 0; i < boxes.size(); ++i) { - const auto& box = boxes[i]; + for (size_t i = 0; i < transformed_boxes.size(); ++i) { + const auto& box = transformed_boxes[i]; const auto& pose = poses[i]; int x1 = box.center_x - box.size_x / 2; From 7d334804f8b15dbea3e6df5cdb43698fce4185fe Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Tue, 26 Aug 2025 21:49:12 +0200 Subject: [PATCH 16/29] proper pcl init, fixed nullptr segfault --- include/valve_detection_ros/valve_pose_ros.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index 51d645a..a33418a 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -213,8 +213,10 @@ class ValvePoseNode : public rclcpp::Node { std::vector poses; poses.reserve(boxes.size()); - pcl::PointCloud::Ptr all_annulus_cloud; - pcl::PointCloud::Ptr all_annulus_plane_cloud; + pcl::PointCloud::Ptr all_annulus_cloud( + new pcl::PointCloud); + pcl::PointCloud::Ptr all_annulus_plane_cloud( + new pcl::PointCloud); if constexpr (std::is_same::value) { valve_detector_->Compute_valve_poses( From fac4a8805ad37a970f06958fec84a12832af8de0 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Wed, 27 Aug 2025 22:40:31 +0200 Subject: [PATCH 17/29] moved angle detector as member of ros node for easier debug visualization --- config/valve_detection_params.yaml | 9 +- include/valve_detection/angle_detector.hpp | 32 ++- include/valve_detection/valve_detector.hpp | 37 +--- .../valve_detection_ros/valve_pose_ros.hpp | 32 ++- src/angle_detector.cpp | 184 ++++++++++++++---- src/valve_detector.cpp | 13 +- src/valve_pose_ros.cpp | 10 +- 7 files changed, 217 insertions(+), 100 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 9374831..f59c6ad 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -23,14 +23,15 @@ visualize_detections: true processed_image_pub_topic: "/valve_detection_image" - pcl_visualize: true + debug_visualize: true annulus_pub_topic: "/bbx_annulus_pcl" plane_pub_topic: "/annulus_plane_pcl" + angle_detection_image_pub_topic: "/valve_angle_image" calculate_angle: true - angle.detection_area_ratio: 0.05 # percentage of bb size - angle.canny_low_threshold: 50 - angle.canny_high_threshold: 150 + angle.detection_area_ratio: 0.22 # percentage of bb size + angle.canny_low_threshold: 40 + angle.canny_high_threshold: 80 angle.canny_aperture_size: 3 # must be odd angle.hough_rho_res: 1.0 angle.hough_theta_res: 0.0174533 # 1 degree in radians diff --git a/include/valve_detection/angle_detector.hpp b/include/valve_detection/angle_detector.hpp index 188d563..e565ade 100644 --- a/include/valve_detection/angle_detector.hpp +++ b/include/valve_detection/angle_detector.hpp @@ -18,13 +18,33 @@ class AngleDetector { : params_(params) {} /** - * @brief Calculates the angle of the longest line within a detected valve. - * @param color_image The input color image. - * @param bbox The bounding box of the detected valve. - * @return The angle in radians, or NaN if no lines are detected. + * @brief Computes the angle for each bounding box in a vector. + * + * This function processes each bounding box to define a region of interest + * (ROI) in the input image, applies Canny edge detection and Hough line + * transform, and then calculates the dominant line angle within each box's + * ROI, and updates the 'theta' member of each box in-place. + * + * @param color_image The input image to be processed. + * @param boxes A vector of BoundingBox objects passed by reference. The + * 'theta' member of each box will be updated. */ - double calculate_angle(const cv::Mat& color_image, - const BoundingBox& bbox) const; + void compute_angles(const cv::Mat& color_image, + std::vector& boxes) const; + + /** + * @brief Processes a vector of bounding boxes to calculate the angle for + * each, updating the boxes in-place and drawing all visualizations on a + * single image. + * + * @param image_to_draw_on The input/output image. This cv::Mat will be + * modified to include visualizations for all processed boxes. + * @param boxes A vector of BoundingBox objects. The theta member of each + * box will be updated with the calculated angle (or NaN if no line is + * found). + */ + void compute_angles_debug(cv::Mat& image_to_draw_on, + std::vector& boxes) const; private: /** diff --git a/include/valve_detection/valve_detector.hpp b/include/valve_detection/valve_detector.hpp index 3d83432..b733c8e 100644 --- a/include/valve_detection/valve_detector.hpp +++ b/include/valve_detection/valve_detector.hpp @@ -7,8 +7,7 @@ #include #include #include -#include -#include "valve_detection/angle_detector.hpp" +#include #include "valve_detection/depth_image_processing.hpp" #include "valve_detection/pointcloud_processing.hpp" #include "valve_detection/types.hpp" @@ -64,44 +63,39 @@ class ValveDetector { * @param all_annulus_cloud Optional point cloud for visualizing all annuli. * @param all_annulus_plane_cloud Optional point cloud for visualizing * segmented planes. - * @param pcl_visualize Whether to populate visualization point clouds. + * @param debug_visualize Whether to populate visualization point clouds. * @param calculate_angle Whether to compute the valve rotation angle. */ template void Compute_valve_poses( const T& depth_input, - const cv::Mat& color_image, const std::vector& boxes, std::vector& poses, pcl::PointCloud::Ptr& all_annulus_cloud, pcl::PointCloud::Ptr& all_annulus_plane_cloud, - bool pcl_visualize, - bool calculate_angle) { - if (pcl_visualize) { + bool debug_visualize) { + if (debug_visualize) { all_annulus_cloud->clear(); all_annulus_plane_cloud->clear(); } for (const auto& box : boxes) { - BoundingBox org_image_box = transform_bounding_box(box); pcl::PointCloud::Ptr annulus_cloud( new pcl::PointCloud); if constexpr (std::is_same< T, pcl::PointCloud::Ptr>::value) { - extract_annulus_pcl(depth_input, org_image_box, - color_image_properties_, + extract_annulus_pcl(depth_input, box, color_image_properties_, annulus_radius_ratio_, annulus_cloud); } else if constexpr (std::is_same::value) { - extract_annulus_pcl(depth_input, org_image_box, - color_image_properties_, + extract_annulus_pcl(depth_input, box, color_image_properties_, annulus_radius_ratio_, annulus_cloud); } if (annulus_cloud->empty()) continue; - if (pcl_visualize) { + if (debug_visualize) { *all_annulus_cloud += *annulus_cloud; } @@ -112,7 +106,7 @@ class ValveDetector { if (!segment_plane(annulus_cloud, coefficients, inliers)) continue; - if (pcl_visualize) { + if (debug_visualize) { pcl::PointCloud::Ptr annulus_plane_cloud( new pcl::PointCloud); pcl::copyPointCloud(*annulus_cloud, inliers->indices, @@ -120,7 +114,7 @@ class ValveDetector { *all_annulus_plane_cloud += *annulus_plane_cloud; } - Eigen::Vector3f bb_centre_ray = get_ray_direction(org_image_box); + Eigen::Vector3f bb_centre_ray = get_ray_direction(box); Eigen::Vector3f ray_plane_intersection = find_ray_plane_intersection(coefficients, bb_centre_ray); if (ray_plane_intersection.isZero()) @@ -134,11 +128,7 @@ class ValveDetector { Eigen::Vector3f shifted_position = shift_point_along_normal(ray_plane_intersection, plane_normal); - float angle = org_image_box.theta; - if (calculate_angle) { - angle = angle_detector_->calculate_angle(color_image, - org_image_box); - } + float angle = box.theta; Eigen::Matrix3f rotation_matrix = create_rotation_matrix(plane_normal, angle); @@ -248,12 +238,6 @@ class ValveDetector { Eigen::Matrix3f create_rotation_matrix(const Eigen::Vector3f& plane_normal, float angle); - /** - * @brief Initializes the angle detector with given parameters. - * @param params Parameters for Canny and Hough-based angle detection. - */ - void init_angle_detector(const AngleDetectorParams& params); - /** * @brief Converts a 3x3 rotation matrix to a normalized quaternion. * @param rotation_matrix Input rotation matrix. @@ -285,7 +269,6 @@ class ValveDetector { int letterbox_pad_x_; int letterbox_pad_y_; Eigen::Vector3f filter_direction_ = Eigen::Vector3f(1, 0, 0); - std::unique_ptr angle_detector_; }; } // namespace valve_detection diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index a33418a..26dee16 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -9,6 +9,7 @@ #include #include #include +#include "valve_detection/angle_detector.hpp" #include "valve_detection/valve_detector.hpp" #include @@ -207,7 +208,10 @@ class ValvePoseNode : public rclcpp::Node { std::vector boxes; boxes.reserve(detections->detections.size()); for (const auto& detection : detections->detections) { - boxes.push_back(to_bounding_box(detection.bbox)); + BoundingBox box = to_bounding_box(detection.bbox); + BoundingBox org_image_box = + valve_detector_->transform_bounding_box(box); + boxes.push_back(org_image_box); } std::vector poses; @@ -218,16 +222,28 @@ class ValvePoseNode : public rclcpp::Node { pcl::PointCloud::Ptr all_annulus_plane_cloud( new pcl::PointCloud); + if (calculate_angle_ && debug_visualize_) { + cv::Mat angle_debug_image = cv_color_image.clone(); + + angle_detector_->compute_angles_debug(angle_debug_image, boxes); + sensor_msgs::msg::Image::SharedPtr output_msg = + cv_bridge::CvImage(depth_msg->header, "bgr8", angle_debug_image) + .toImageMsg(); + angle_image_pub_->publish(*output_msg); + } else if (calculate_angle_) { + angle_detector_->compute_angles(cv_color_image, boxes); + } + if constexpr (std::is_same::value) { valve_detector_->Compute_valve_poses( - pcl_tf, cv_color_image, boxes, poses, all_annulus_cloud, - all_annulus_plane_cloud, pcl_visualize_, calculate_angle_); + pcl_tf, boxes, poses, all_annulus_cloud, + all_annulus_plane_cloud, debug_visualize_); } else if constexpr (std::is_same::value) { valve_detector_->Compute_valve_poses( - cv_depth_image, cv_color_image, boxes, poses, all_annulus_cloud, - all_annulus_plane_cloud, pcl_visualize_, calculate_angle_); + cv_depth_image, boxes, poses, all_annulus_cloud, + all_annulus_plane_cloud, debug_visualize_); } - if (pcl_visualize_ && all_annulus_cloud && all_annulus_plane_cloud) { + if (debug_visualize_ && all_annulus_cloud && all_annulus_plane_cloud) { publish_annulus_pcl(all_annulus_cloud, depth_msg->header); publish_annulus_plane(all_annulus_plane_cloud, depth_msg->header); } @@ -257,6 +273,7 @@ class ValvePoseNode : public rclcpp::Node { rclcpp::Publisher::SharedPtr annulus_plane_pub_; rclcpp::Publisher::SharedPtr processed_image_pub_; + rclcpp::Publisher::SharedPtr angle_image_pub_; // Depth image sync policies uses ExactTime with the assumption // that the depth image and color image are from the same camera @@ -296,12 +313,13 @@ class ValvePoseNode : public rclcpp::Node { bool color_image_info_received_ = false; std::unique_ptr valve_detector_; + std::unique_ptr angle_detector_; std::string color_image_frame_id_; std::shared_ptr tf_buffer_; std::shared_ptr tf_listener_; bool visualize_detections_ = true; - bool pcl_visualize_ = false; + bool debug_visualize_ = false; bool calculate_angle_ = true; bool use_color_image_ = true; bool use_depth_image_ = true; diff --git a/src/angle_detector.cpp b/src/angle_detector.cpp index 649fc69..ec750ef 100644 --- a/src/angle_detector.cpp +++ b/src/angle_detector.cpp @@ -4,51 +4,151 @@ namespace valve_detection { -double AngleDetector::calculate_angle(const cv::Mat& color_image, - const BoundingBox& bbox) const { - double width = bbox.size_x; - double height = bbox.size_y; - double center_x = bbox.center_x; - double center_y = bbox.center_y; - - int x1 = std::max( - static_cast(center_x - width * params_.line_detection_area), 0); - int y1 = std::max( - static_cast(center_y - height * params_.line_detection_area), 0); - int x2 = std::min( - static_cast(center_x + width * params_.line_detection_area), - color_image.cols - 1); - int y2 = std::min( - static_cast(center_y + height * params_.line_detection_area), - color_image.rows - 1); - - cv::Mat roi = color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - - cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); - cv::Point center((roi.cols / 2), (roi.rows / 2)); - cv::Size axes(roi.cols / 2, roi.rows / 2); - cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); - - cv::Mat masked_roi; - roi.copyTo(masked_roi, mask); - - cv::Mat gray; - cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); - - cv::Mat edges = apply_canny_edge_detection(gray); - - std::vector lines; - cv::HoughLinesP(edges, lines, params_.hough_rho_res, - params_.hough_theta_res, params_.hough_threshold, - params_.hough_min_line_length, params_.hough_max_line_gap); +void AngleDetector::compute_angles(const cv::Mat& color_image, + std::vector& boxes) const { + for (auto& box : boxes) { + double width = box.size_x; + double height = box.size_y; + double center_x = box.center_x; + double center_y = box.center_y; + + int x1 = std::max( + static_cast(center_x - width * params_.line_detection_area), + 0); + int y1 = std::max( + static_cast(center_y - height * params_.line_detection_area), + 0); + int x2 = std::min( + static_cast(center_x + width * params_.line_detection_area), + color_image.cols - 1); + int y2 = std::min( + static_cast(center_y + height * params_.line_detection_area), + color_image.rows - 1); + + if (x1 >= x2 || y1 >= y2) { + box.theta = std::numeric_limits::quiet_NaN(); + continue; + } - if (lines.empty()) { - return std::numeric_limits::quiet_NaN(); + cv::Mat roi = color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + + cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); + cv::Point center((roi.cols / 2), (roi.rows / 2)); + cv::Size axes(roi.cols / 2, roi.rows / 2); + cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); + + cv::Mat masked_roi; + roi.copyTo(masked_roi, mask); + + cv::Mat gray; + cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); + cv::Mat edges = apply_canny_edge_detection(gray); + + std::vector lines; + cv::HoughLinesP(edges, lines, params_.hough_rho_res, + params_.hough_theta_res, params_.hough_threshold, + params_.hough_min_line_length, + params_.hough_max_line_gap); + + if (lines.empty()) { + box.theta = std::numeric_limits::quiet_NaN(); + continue; + } + + cv::Vec4i longest_line = find_longest_line(lines); + box.theta = std::atan2(longest_line[3] - longest_line[1], + longest_line[2] - longest_line[0]); + } +} + +void AngleDetector::compute_angles_debug( + cv::Mat& image_to_draw_on, + std::vector& boxes) const { + const cv::Scalar ALL_LINES_COLOR(255, 0, 0); + const cv::Scalar LONGEST_LINE_COLOR(0, 255, 0); + const int LINE_THICKNESS = 2; + + cv::Mat output_image = cv::Mat::zeros(image_to_draw_on.size(), CV_8UC3); + + for (auto& box : boxes) { + double width = box.size_x; + double height = box.size_y; + double center_x = box.center_x; + double center_y = box.center_y; + + int x1 = std::max( + static_cast(center_x - width * params_.line_detection_area), + 0); + int y1 = std::max( + static_cast(center_y - height * params_.line_detection_area), + 0); + int x2 = std::min( + static_cast(center_x + width * params_.line_detection_area), + image_to_draw_on.cols - 1); + int y2 = std::min( + static_cast(center_y + height * params_.line_detection_area), + image_to_draw_on.rows - 1); + + if (x1 >= x2 || y1 >= y2) { + box.theta = std::numeric_limits::quiet_NaN(); + continue; + } + + cv::Mat roi = image_to_draw_on(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + + cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); + cv::Point center((roi.cols / 2), (roi.rows / 2)); + cv::Size axes(roi.cols / 2, roi.rows / 2); + cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); + + cv::Mat masked_roi; + roi.copyTo(masked_roi, mask); + + cv::Mat gray; + cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); + cv::Mat edges = apply_canny_edge_detection(gray); + + std::vector lines; + cv::HoughLinesP(edges, lines, params_.hough_rho_res, + params_.hough_theta_res, params_.hough_threshold, + params_.hough_min_line_length, + params_.hough_max_line_gap); + + if (lines.empty()) { + box.theta = std::numeric_limits::quiet_NaN(); + + cv::Mat output_roi_gray = + output_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + cv::Mat edges_color; + cv::cvtColor(edges, edges_color, cv::COLOR_GRAY2BGR); + edges_color.copyTo(output_roi_gray, mask); + + continue; + } + + cv::Mat output_roi = output_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); + cv::Mat edges_color; + cv::cvtColor(edges, edges_color, cv::COLOR_GRAY2BGR); + edges_color.copyTo(output_roi, mask); + + const cv::Point offset(x1, y1); + for (const auto& line : lines) { + cv::line(output_image, cv::Point(line[0], line[1]) + offset, + cv::Point(line[2], line[3]) + offset, ALL_LINES_COLOR, + LINE_THICKNESS); + } + + cv::Vec4i longest_line = find_longest_line(lines); + cv::line(output_image, + cv::Point(longest_line[0], longest_line[1]) + offset, + cv::Point(longest_line[2], longest_line[3]) + offset, + LONGEST_LINE_COLOR, LINE_THICKNESS); + + box.theta = std::atan2(longest_line[3] - longest_line[1], + longest_line[2] - longest_line[0]); } - cv::Vec4i longest_line = find_longest_line(lines); - return std::atan2(longest_line[3] - longest_line[1], - longest_line[2] - longest_line[0]); + image_to_draw_on = output_image; } cv::Vec4i AngleDetector::find_longest_line( diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp index 5d7effd..566023c 100644 --- a/src/valve_detector.cpp +++ b/src/valve_detector.cpp @@ -15,21 +15,12 @@ ValveDetector::ValveDetector(int yolo_img_width_, plane_ransac_max_iterations_(plane_ransac_max_iterations), valve_handle_offset_(valve_handle_offset) {} -void ValveDetector::init_angle_detector(const AngleDetectorParams& params) { - angle_detector_ = std::make_unique(params); -} - cv::Mat ValveDetector::draw_detections(const cv::Mat& image, const std::vector& boxes, const std::vector& poses) const { if (boxes.size() != poses.size()) { return cv::Mat(); } - std::vector transformed_boxes; - transformed_boxes.reserve(boxes.size()); - for (const auto& box : boxes) { - transformed_boxes.push_back(transform_bounding_box(box)); - } cv::Mat visualized_image = image.clone(); @@ -39,8 +30,8 @@ cv::Mat ValveDetector::draw_detections(const cv::Mat& image, color_image_properties_.intr.cy, 0, 0, 1); cv::Mat dist_coeffs = cv::Mat::zeros(4, 1, CV_64F); - for (size_t i = 0; i < transformed_boxes.size(); ++i) { - const auto& box = transformed_boxes[i]; + for (size_t i = 0; i < boxes.size(); ++i) { + const auto& box = boxes[i]; const auto& pose = poses[i]; int x1 = box.center_x - box.size_x / 2; diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index 82c2e3f..54b28c9 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -32,9 +32,9 @@ ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) valve_poses_pub_ = this->create_publisher( valve_poses_pub_topic, qos); - pcl_visualize_ = this->declare_parameter("pcl_visualize"); + debug_visualize_ = this->declare_parameter("debug_visualize"); - if (pcl_visualize_) { + if (debug_visualize_) { std::string annulus_pub_topic = this->declare_parameter("annulus_pub_topic"); annulus_pcl_pub_ = @@ -45,6 +45,10 @@ ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) annulus_plane_pub_ = this->create_publisher( plane_pub_topic, qos); + angle_image_pub_ = this->create_publisher( + this->declare_parameter( + "angle_detection_image_pub_topic"), + qos); } if (visualize_detections_) { std::string processed_image_pub_topic = @@ -202,7 +206,7 @@ void ValvePoseNode::init_angle_detector() { angle_params.hough_max_line_gap = this->declare_parameter("angle.hough_max_line_gap"); - valve_detector_->init_angle_detector(angle_params); + angle_detector_ = std::make_unique(angle_params); } } From 08e25362a6f1b42cb961adad8fc5d78c30013494 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Fri, 17 Oct 2025 21:06:30 +0200 Subject: [PATCH 18/29] fixed orientation calculation --- include/valve_detection/valve_detector.hpp | 9 ++-- src/valve_detector.cpp | 62 ++++++++++++++++++---- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/include/valve_detection/valve_detector.hpp b/include/valve_detection/valve_detector.hpp index b733c8e..b60259e 100644 --- a/include/valve_detection/valve_detector.hpp +++ b/include/valve_detection/valve_detector.hpp @@ -131,7 +131,7 @@ class ValveDetector { float angle = box.theta; Eigen::Matrix3f rotation_matrix = - create_rotation_matrix(plane_normal, angle); + create_rotation_matrix(coefficients, plane_normal, angle); Eigen::Quaternionf rotation_quat; rmat_to_quat(rotation_matrix, rotation_quat); @@ -231,12 +231,15 @@ class ValveDetector { /** * @brief Creates a rotation matrix with the plane normal as z-axis and * valve angle applied. + * @param coefficients Plane coefficients. * @param plane_normal Normalized plane normal vector. * @param angle Valve rotation angle in radians. * @return Rotation matrix (Eigen::Matrix3f) for the valve pose. */ - Eigen::Matrix3f create_rotation_matrix(const Eigen::Vector3f& plane_normal, - float angle); + Eigen::Matrix3f create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle); /** * @brief Converts a 3x3 rotation matrix to a normalized quaternion. diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp index 566023c..924054a 100644 --- a/src/valve_detector.cpp +++ b/src/valve_detector.cpp @@ -206,30 +206,70 @@ Eigen::Vector3f ValveDetector::shift_point_along_normal( } Eigen::Matrix3f ValveDetector::create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& plane_normal, float angle) { + if (!coefficients || coefficients->values.size() < 4) { + return Eigen::Matrix3f::Identity(); + } + Eigen::Vector3f z_axis = plane_normal; + float D = coefficients->values[3]; + + float fx = color_image_properties_.intr.fx; + float fy = color_image_properties_.intr.fy; + float cx = color_image_properties_.intr.cx; + float cy = color_image_properties_.intr.cy; + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + Eigen::Matrix3f Kinv = K.inverse(); + + // Define two image points along the given angle (through principal point) + // --- + float len = 50.0f; // arbitrary pixel length (only direction matters) + Eigen::Vector3f p1(cx, cy, 1.f); + Eigen::Vector3f p2(cx + len * std::cos(angle), cy + len * std::sin(angle), + 1.f); + + // Back project points to rays + Eigen::Vector3f r1 = (Kinv * p1).normalized(); + Eigen::Vector3f r2 = (Kinv * p2).normalized(); + + // Compute intersections of rays with the plane + float denom1 = z_axis.dot(r1); + float denom2 = z_axis.dot(r2); - Eigen::Vector3f temp_vec(1, 0, 0); - if (std::abs(z_axis.dot(temp_vec)) > 0.99f) { - temp_vec = Eigen::Vector3f(0, 1, 0); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) { + // rays parallel to plane — return identity fallback + return Eigen::Matrix3f::Identity(); } - Eigen::Vector3f base_x_axis = z_axis.cross(temp_vec).normalized(); - Eigen::AngleAxisf rotation(angle, z_axis); - Eigen::Vector3f x_axis = rotation * base_x_axis; + float t1 = -D / denom1; + float t2 = -D / denom2; - if (filter_direction_.dot(x_axis) < 0) { + Eigen::Vector3f X1 = t1 * r1; + Eigen::Vector3f X2 = t2 * r2; + + // Compute in-plane direction corresponding to the image line angle + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + + // Project onto the plane (for numerical stability) + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (optional: avoid flipping frame between + // frames) + if (filter_direction_.dot(x_axis) < 0) x_axis = -x_axis; - } filter_direction_ = x_axis; Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); Eigen::Matrix3f rotation_matrix; - rotation_matrix.col(0) = x_axis; - rotation_matrix.col(1) = y_axis; - rotation_matrix.col(2) = z_axis; + rotation_matrix.col(0) = x_axis; // X_obj: direction of the image line + rotation_matrix.col(1) = y_axis; // Y_obj: perpendicular in-plane + rotation_matrix.col(2) = z_axis; // Z_obj: plane normal return rotation_matrix; } From f6c4a0bc85965ae7891abc8a266e3deddf56285e Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Fri, 17 Oct 2025 21:09:03 +0200 Subject: [PATCH 19/29] improved visualization --- src/angle_detector.cpp | 20 +++++++++++++++-- src/valve_detector.cpp | 51 ++++++++++++++++++++++++++---------------- src/valve_pose_ros.cpp | 27 +++++++++++++--------- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/src/angle_detector.cpp b/src/angle_detector.cpp index ec750ef..203dc1f 100644 --- a/src/angle_detector.cpp +++ b/src/angle_detector.cpp @@ -56,8 +56,24 @@ void AngleDetector::compute_angles(const cv::Mat& color_image, } cv::Vec4i longest_line = find_longest_line(lines); - box.theta = std::atan2(longest_line[3] - longest_line[1], - longest_line[2] - longest_line[0]); + + if (longest_line[0] >= longest_line[2]) { + int temp_x = longest_line[0]; + int temp_y = longest_line[1]; + longest_line[0] = longest_line[2]; + longest_line[1] = longest_line[3]; + } + + float theta = std::atan2(longest_line[3] - longest_line[1], + longest_line[2] - longest_line[0]); + + if (theta < 0) { + theta += CV_PI; + } + if (theta > CV_PI) { + theta -= CV_PI; + } + box.theta = theta; } } diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp index 924054a..3ad858a 100644 --- a/src/valve_detector.cpp +++ b/src/valve_detector.cpp @@ -18,10 +18,6 @@ ValveDetector::ValveDetector(int yolo_img_width_, cv::Mat ValveDetector::draw_detections(const cv::Mat& image, const std::vector& boxes, const std::vector& poses) const { - if (boxes.size() != poses.size()) { - return cv::Mat(); - } - cv::Mat visualized_image = image.clone(); cv::Mat camera_matrix = @@ -32,7 +28,6 @@ cv::Mat ValveDetector::draw_detections(const cv::Mat& image, for (size_t i = 0; i < boxes.size(); ++i) { const auto& box = boxes[i]; - const auto& pose = poses[i]; int x1 = box.center_x - box.size_x / 2; int y1 = box.center_y - box.size_y / 2; @@ -41,20 +36,38 @@ cv::Mat ValveDetector::draw_detections(const cv::Mat& image, cv::rectangle(visualized_image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2); - cv::Mat tvec = (cv::Mat_(3, 1) << pose.position[0], - pose.position[1], pose.position[2]); - - Eigen::Matrix3f rotation_matrix = pose.orientation.toRotationMatrix(); - cv::Mat rmat = (cv::Mat_(3, 3) << rotation_matrix(0, 0), - rotation_matrix(0, 1), rotation_matrix(0, 2), - rotation_matrix(1, 0), rotation_matrix(1, 1), - rotation_matrix(1, 2), rotation_matrix(2, 0), - rotation_matrix(2, 1), rotation_matrix(2, 2)); - cv::Mat rvec; - cv::Rodrigues(rmat, rvec); - - cv::drawFrameAxes(visualized_image, camera_matrix, dist_coeffs, rvec, - tvec, 0.1); + std::ostringstream label; + label << "id:" << i; + if (!std::isnan(box.theta)) { + label << "Valve θ=" << std::fixed << std::setprecision(2) + << (box.theta * 180.0 / M_PI) << "°"; + cv::putText(visualized_image, label.str(), cv::Point(x1, y1 - 5), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), + 1); + } + // Draw pose axes only if valid pose exists + if (i < poses.size()) { + const Pose& pose = poses[i]; + if (!std::isnan(pose.position[0]) && + !std::isnan(pose.position[1]) && + !std::isnan(pose.position[2]) && !std::isnan(box.theta)) { + cv::Mat tvec = (cv::Mat_(3, 1) << pose.position[0], + pose.position[1], pose.position[2]); + + Eigen::Matrix3f rotation_matrix = + pose.orientation.toRotationMatrix(); + cv::Mat rmat = (cv::Mat_(3, 3) << rotation_matrix(0, 0), + rotation_matrix(0, 1), rotation_matrix(0, 2), + rotation_matrix(1, 0), rotation_matrix(1, 1), + rotation_matrix(1, 2), rotation_matrix(2, 0), + rotation_matrix(2, 1), rotation_matrix(2, 2)); + cv::Mat rvec; + cv::Rodrigues(rmat, rvec); + + cv::drawFrameAxes(visualized_image, camera_matrix, dist_coeffs, + rvec, tvec, 0.1); + } + } } return visualized_image; } diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index 54b28c9..8a07168 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -274,16 +274,23 @@ void ValvePoseNode::publish_valve_poses( pose_array_msg.header = header; for (const auto& pose : poses) { - geometry_msgs::msg::Pose pose_msg; - pose_msg.position.x = pose.position.x(); - pose_msg.position.y = pose.position.y(); - pose_msg.position.z = pose.position.z(); - pose_msg.orientation.x = pose.orientation.x(); - pose_msg.orientation.y = pose.orientation.y(); - pose_msg.orientation.z = pose.orientation.z(); - pose_msg.orientation.w = pose.orientation.w(); - - pose_array_msg.poses.push_back(pose_msg); + if (!std::isnan(pose.orientation.x()) && + !std::isnan(pose.orientation.y()) && + !std::isnan(pose.orientation.z()) && + !std::isnan(pose.orientation.w()) && + !std::isnan(pose.position.x()) && !std::isnan(pose.position.y()) && + !std::isnan(pose.position.z())) { + geometry_msgs::msg::Pose pose_msg; + pose_msg.position.x = pose.position.x(); + pose_msg.position.y = pose.position.y(); + pose_msg.position.z = pose.position.z(); + pose_msg.orientation.x = pose.orientation.x(); + pose_msg.orientation.y = pose.orientation.y(); + pose_msg.orientation.z = pose.orientation.z(); + pose_msg.orientation.w = pose.orientation.w(); + + pose_array_msg.poses.push_back(pose_msg); + } } valve_poses_pub_->publish(pose_array_msg); } From 01743588d17c12ef6cb763e134566886d22b8128 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Fri, 17 Oct 2025 21:19:03 +0200 Subject: [PATCH 20/29] added readme --- README.md | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1cb4209..4c3b264 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,94 @@ -# valve-detection +# Valve Detection [![Industrial CI](https://github.com/vortexntnu/valve-detection/actions/workflows/industrial-ci.yml/badge.svg)](https://github.com/vortexntnu/valve-detection/actions/workflows/industrial-ci.yml) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/vortexntnu/valve-detection/main.svg)](https://results.pre-commit.ci/latest/github/vortexntnu/valve-detection/main) [![codecov](https://codecov.io/github/vortexntnu/valve-detection/graph/badge.svg?token=6csTwaozlh)](https://codecov.io/github/vortexntnu/valve-detection) + +--- + +### Overview + +The **Valve Detection Node** estimates the **3D pose and orientation** of industrial valves from: + +* A **depth image** or **point cloud**, +* An optional **color image**, and +* **2D detections** (e.g., YOLO bounding boxes). + +It fits a **plane** to the valve annulus, computes the **3D intersection** of the valve center ray with that plane, and estimates the **rotation** of the valve handle using image-based line detection. + +It can output: + +* A `geometry_msgs::msg::PoseArray` of all valve poses, +* Debug point clouds (annulus points and segmented planes), +* Annotated color images showing detections, plane fits, and axes, +* Optional angle detection debug images. + +--- + + + + + +## Launching the Node + + +```bash +ros2 launch valve_detection valve_detection.launch.py +``` + +## How it works + +### 1. Input Synchronization + +The node synchronizes: + +* **Depth (or point cloud)** +* **Color image** (optional) +* **2D detections** + +It supports multiple input modes via `use_depth_image` and `use_color_image`. + +### 2. Annulus Extraction + +For each bounding box, a ring-shaped region (annulus) around the center is extracted from the depth image or point cloud. + +### 3. Plane Segmentation + +RANSAC is used to fit a plane through the annulus points. + +### 4. Ray–Plane Intersection + +The center ray from the camera through the bounding box center is intersected with the plane to find the valve’s 3D position. + +### 5. Orientation Estimation + +* The **plane normal** defines one orientation axis. +* The **valve handle angle** (from image) defines the in-plane rotation. +* Intrinsics are used to back-project that angle into 3D. +* The result is a full 3×3 rotation matrix and quaternion. + +### 6. Pose Publishing + +A `PoseArray` message is published with one `Pose` per detected valve. If orientation computation failed no pose is published. + +--- + +## Visualization Outputs + +| Topic | Description | | +| ------------------------ | ------------------------------------------- | ---------------------------------------- | +| `/valve_detection_image` | Color image with bounding boxes and 3D axes | | +| `/valve_angle_image` | Debug overlay showing detected Hough lines | | +| `/bbx_annulus_pcl` | Ring-shaped depth points used for plane fit | +| `/annulus_plane_pcl` | Segmented plane points | + +--- + + +## Common Issues + +| Issue | Possible Cause | Fix | +| ---------------------------- | ---------------------------------------------------------------------------- | --- | +| No poses published | Missing plane segmentation (adjust `plane_ransac_threshold` or annulus size) | | | | +| Angle NaN / no handle lines | Tune Hough and Canny thresholds | | + +--- From b25f100f184c0c057f5281716f315ead06403835 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Sat, 18 Oct 2025 10:34:33 +0200 Subject: [PATCH 21/29] added todo section in readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 4c3b264..546aab5 100644 --- a/README.md +++ b/README.md @@ -92,3 +92,7 @@ A `PoseArray` message is published with one `Pose` per detected valve. If orient | Angle NaN / no handle lines | Tune Hough and Canny thresholds | | --- + +## Future work +* Use actual endpoints of line for backprojection to retrieve the perspective-correct plane angle. Now we just use rotation around optical axis. +TODO: For OBB, estimate line segment from BB size. From 239b6f9cd897125a86330cc1cce9660a3b87bac3 Mon Sep 17 00:00:00 2001 From: Jorgen Fjermedal Date: Sat, 18 Oct 2025 12:34:04 +0200 Subject: [PATCH 22/29] fixed line endpoint swapping --- README.md | 9 +++++---- src/angle_detector.cpp | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 546aab5..7b0612c 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,11 @@ A `PoseArray` message is published with one `Pose` per detected valve. If orient ## Common Issues -| Issue | Possible Cause | Fix | -| ---------------------------- | ---------------------------------------------------------------------------- | --- | -| No poses published | Missing plane segmentation (adjust `plane_ransac_threshold` or annulus size) | | | | -| Angle NaN / no handle lines | Tune Hough and Canny thresholds | | +| Issue | Possible Cause | +|----------------------------|-----------------------------------------------------------------------------| +| No poses published | Missing plane segmentation (adjust `plane_ransac_threshold` or annulus size) | +| Angle NaN / no handle lines| Tune Hough and Canny thresholds | + --- diff --git a/src/angle_detector.cpp b/src/angle_detector.cpp index 203dc1f..33bf776 100644 --- a/src/angle_detector.cpp +++ b/src/angle_detector.cpp @@ -62,6 +62,8 @@ void AngleDetector::compute_angles(const cv::Mat& color_image, int temp_y = longest_line[1]; longest_line[0] = longest_line[2]; longest_line[1] = longest_line[3]; + longest_line[2] = temp_x; + longest_line[3] = temp_y; } float theta = std::atan2(longest_line[3] - longest_line[1], From 95c3f48afb57db86b702ea986929115668e27e1a Mon Sep 17 00:00:00 2001 From: jens Date: Wed, 18 Mar 2026 20:49:31 +0100 Subject: [PATCH 23/29] Refactor valve detection and pose estimation: remove ValveDetector class, enhance ValvePoseNode --- CMakeLists.txt | 49 +- README.md | 124 +-- config/valve_detection_params.yaml | 93 ++- include/valve_detection/angle_detector.hpp | 72 -- .../depth_image_processing.hpp | 104 +-- .../valve_detection/pointcloud_processing.hpp | 39 - include/valve_detection/pose_estimator.hpp | 78 ++ include/valve_detection/types.hpp | 83 +- include/valve_detection/valve_detector.hpp | 279 ------- .../valve_detection_ros/valve_pose_ros.hpp | 412 +++------- launch/valve_detection.launch.py | 37 +- package.xml | 17 +- src/angle_detector.cpp | 199 ----- src/depth_image_processing.cpp | 343 ++++++-- src/pointcloud_processing.cpp | 59 -- src/pose_estimator.cpp | 249 ++++++ src/valve_detector.cpp | 290 ------- src/valve_pose_ros.cpp | 738 +++++++++++------- 18 files changed, 1422 insertions(+), 1843 deletions(-) delete mode 100644 include/valve_detection/angle_detector.hpp delete mode 100644 include/valve_detection/pointcloud_processing.hpp create mode 100644 include/valve_detection/pose_estimator.hpp delete mode 100644 include/valve_detection/valve_detector.hpp delete mode 100644 src/angle_detector.cpp delete mode 100644 src/pointcloud_processing.cpp create mode 100644 src/pose_estimator.cpp delete mode 100644 src/valve_detector.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c5783a..eb9881a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,32 +5,34 @@ if(NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 17) endif() -if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - add_compile_options(-Wall -Wextra -Wpedantic) -endif() +add_compile_options(-Wall -Wextra -Wpedantic) find_package(ament_cmake REQUIRED) -find_package(cv_bridge REQUIRED) + find_package(rclcpp REQUIRED) find_package(rclcpp_components REQUIRED) +find_package(message_filters REQUIRED) + find_package(sensor_msgs REQUIRED) find_package(vision_msgs REQUIRED) find_package(geometry_msgs REQUIRED) find_package(std_msgs REQUIRED) -find_package(OpenCV 4.5.4 REQUIRED) +find_package(cv_bridge REQUIRED) +find_package(OpenCV REQUIRED) + find_package(PCL REQUIRED) find_package(pcl_conversions REQUIRED) -find_package(tf2_geometry_msgs REQUIRED) + +# <<< IMPORTANT >>> +find_package(vortex_msgs REQUIRED) include_directories(include) set(LIB_NAME "${PROJECT_NAME}_component") add_library(${LIB_NAME} SHARED - src/valve_detector.cpp src/depth_image_processing.cpp - src/pointcloud_processing.cpp - src/angle_detector.cpp + src/pose_estimator.cpp src/valve_pose_ros.cpp ) @@ -49,41 +51,30 @@ target_include_directories(${LIB_NAME} PUBLIC ament_target_dependencies(${LIB_NAME} PUBLIC rclcpp rclcpp_components - cv_bridge + message_filters sensor_msgs vision_msgs geometry_msgs std_msgs + cv_bridge + OpenCV pcl_conversions - tf2 - tf2_ros - tf2_geometry_msgs + vortex_msgs ) rclcpp_components_register_node( - ${LIB_NAME} - PLUGIN "valve_detection::ValvePoseNode" - EXECUTABLE ${PROJECT_NAME}_node + ${LIB_NAME} + PLUGIN "valve_detection::ValvePoseNode" + EXECUTABLE ${PROJECT_NAME}_node ) -ament_export_targets(export_${LIB_NAME}) - install(TARGETS ${LIB_NAME} - EXPORT export_${LIB_NAME} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin ) -install( - DIRECTORY include/ - DESTINATION include -) - -install(DIRECTORY - launch - config - DESTINATION share/${PROJECT_NAME}/ -) +install(DIRECTORY include/ DESTINATION include) +install(DIRECTORY config launch DESTINATION share/${PROJECT_NAME}) ament_package() diff --git a/README.md b/README.md index 7b0612c..e77fa74 100644 --- a/README.md +++ b/README.md @@ -5,95 +5,119 @@ --- -### Overview +## Overview -The **Valve Detection Node** estimates the **3D pose and orientation** of industrial valves from: +The **Valve Detection Node** estimates the **3D pose and orientation** of industrial valves from a synchronized depth image, color image, and 2D oriented bounding box detections (e.g., from a YOLO OBB model). -* A **depth image** or **point cloud**, -* An optional **color image**, and -* **2D detections** (e.g., YOLO bounding boxes). - -It fits a **plane** to the valve annulus, computes the **3D intersection** of the valve center ray with that plane, and estimates the **rotation** of the valve handle using image-based line detection. - -It can output: - -* A `geometry_msgs::msg::PoseArray` of all valve poses, -* Debug point clouds (annulus points and segmented planes), -* Annotated color images showing detections, plane fits, and axes, -* Optional angle detection debug images. +For each detection it: +1. Extracts a point cloud from the depth image aligned to the color camera frame. +2. Fits a plane to those points using RANSAC. +3. Intersects the viewing ray with the plane to find the 3D position. +4. Derives the orientation from the plane normal and the bounding box angle. --- +## File Layout +| File | Responsibility | +|------|---------------| +| `valve_pose_ros.hpp/.cpp` | ROS node — subscriptions, publishing, NMS, camera data ownership | +| `depth_image_processing.hpp/.cpp` | Depth transforms — back-projection, point cloud extraction, depth-to-color pixel mapping | +| `pose_estimator.hpp/.cpp` | Normal/plane estimation — RANSAC plane fit, ray–plane intersection, rotation matrix, pose | +| `types.hpp` | Shared data types (`BoundingBox`, `Pose`, `ImageProperties`, `DepthColorExtrinsic`) | - +--- ## Launching the Node - ```bash ros2 launch valve_detection valve_detection.launch.py ``` -## How it works +--- + +## How It Works ### 1. Input Synchronization -The node synchronizes: +The node subscribes to three topics using `message_filters::ApproximateTime`: -* **Depth (or point cloud)** -* **Color image** (optional) -* **2D detections** +| Topic (default) | Type | +|---|---| +| `/realsense/D555_409122300281_Depth` | `sensor_msgs/Image` (16UC1 mm) | +| `/realsense/D555_409122300281_Color` | `sensor_msgs/Image` (BGR8) | +| `/yolo_obb_object_detection/detections` | `vision_msgs/Detection2DArray` | -It supports multiple input modes via `use_depth_image` and `use_color_image`. +### 2. Duplicate Suppression (NMS) -### 2. Annulus Extraction +Detections are filtered with greedy NMS. Two boxes are considered duplicates when their IoU or intersection-over-minimum exceeds the configured threshold. At most 2 detections are kept per frame. -For each bounding box, a ring-shaped region (annulus) around the center is extracted from the depth image or point cloud. +### 3. Point Cloud Extraction -### 3. Plane Segmentation +For each kept bounding box, all depth pixels whose reprojection into the color frame falls inside the oriented bounding box are back-projected to 3D (in the color camera frame). The depth-to-color extrinsic is applied to correctly handle the baseline offset between the two cameras. -RANSAC is used to fit a plane through the annulus points. +### 4. Plane Segmentation -### 4. Ray–Plane Intersection +RANSAC fits a plane through the extracted point cloud. -The center ray from the camera through the bounding box center is intersected with the plane to find the valve’s 3D position. +### 5. Ray–Plane Intersection -### 5. Orientation Estimation +The viewing ray through the bounding box center (using color intrinsics) is intersected with the fitted plane to find the valve's 3D position. The position is optionally shifted along the plane normal by `valve_handle_offset` to account for the valve handle protrusion. -* The **plane normal** defines one orientation axis. -* The **valve handle angle** (from image) defines the in-plane rotation. -* Intrinsics are used to back-project that angle into 3D. -* The result is a full 3×3 rotation matrix and quaternion. +### 6. Orientation Estimation -### 6. Pose Publishing +The plane normal defines the Z-axis. The bounding box angle is back-projected onto the plane using the color intrinsics to determine the in-plane X-axis, giving a full 3×3 rotation matrix converted to a quaternion. -A `PoseArray` message is published with one `Pose` per detected valve. If orientation computation failed no pose is published. +### 7. Depth Colormap Visualization + +The bounding box is reprojected from color image space to depth image space using the full intrinsic + extrinsic pipeline (`project_color_pixel_to_depth`) so the overlay is correctly aligned on the depth colormap. --- -## Visualization Outputs +## Published Topics -| Topic | Description | | -| ------------------------ | ------------------------------------------- | ---------------------------------------- | -| `/valve_detection_image` | Color image with bounding boxes and 3D axes | | -| `/valve_angle_image` | Debug overlay showing detected Hough lines | | -| `/bbx_annulus_pcl` | Ring-shaped depth points used for plane fit | -| `/annulus_plane_pcl` | Segmented plane points | +| Topic | Type | Description | +|-------|------|-------------| +| `/valve_pose` | `geometry_msgs/PoseStamped` | Best detection pose | +| `/valve_poses` | `geometry_msgs/PoseArray` | All detection poses | +| `/valve_landmarks` | `vortex_msgs/LandmarkArray` | Poses with landmark type/subtype | +| `/valve_detection_image` | `sensor_msgs/Image` | Color image with OBB overlays | +| `/valve_detection_depth_colormap` | `sensor_msgs/Image` | Depth colormap with OBB overlays | +| `/valve_points` | `sensor_msgs/PointCloud2` | Valve center positions | +| `/valve_depth_cloud` | `sensor_msgs/PointCloud2` | Points used for plane fit | +| `/bbx_annulus_pcl` | `sensor_msgs/PointCloud2` | Debug: extracted annulus points | +| `/annulus_plane_pcl` | `sensor_msgs/PointCloud2` | Debug: RANSAC plane inliers | --- +## Key Parameters -## Common Issues +| Parameter | Default | Description | +|-----------|---------|-------------| +| `annulus_radius_ratio` | `0.8` | Inner radius of the extraction ring as fraction of outer radius | +| `plane_ransac_threshold` | `0.01` | RANSAC inlier distance threshold (m) | +| `plane_ransac_max_iterations` | `50` | RANSAC iteration limit | +| `valve_handle_offset` | `0.05` | Shift along plane normal to reach handle (m) | +| `iou_duplicate_threshold` | `0.5` | IoU threshold for NMS | +| `yolo_img_width/height` | `640` | YOLO letterbox reference size for bbox remapping | +| `debug_visualize` | `false` | Publish annulus and plane point clouds | +| `output_frame_id` | `camera_color_optical_frame` | TF frame for published poses | -| Issue | Possible Cause | -|----------------------------|-----------------------------------------------------------------------------| -| No poses published | Missing plane segmentation (adjust `plane_ransac_threshold` or annulus size) | -| Angle NaN / no handle lines| Tune Hough and Canny thresholds | +Camera intrinsics (`color_fx/fy/cx/cy`, `depth_fx/fy/cx/cy`) and distortion coefficients are set in `config/valve_detection_params.yaml`. Depth intrinsics can alternatively be received from a `CameraInfo` topic. +--- + +## Common Issues + +| Issue | Possible Cause | +|-------|---------------| +| No poses published | Too few plane inliers — lower `plane_ransac_threshold` or increase `annulus_radius_ratio` | +| Pose position offset | Wrong `valve_handle_offset` or incorrect camera intrinsics/extrinsic | +| OBB misaligned on depth colormap | Incorrect depth-to-color extrinsic in `d555_depth_to_color_extrinsic()` | +| Duplicate poses | Lower `iou_duplicate_threshold` | --- -## Future work -* Use actual endpoints of line for backprojection to retrieve the perspective-correct plane angle. Now we just use rotation around optical axis. -TODO: For OBB, estimate line segment from BB size. +## Future Work + +- Use actual OBB edge endpoints for back-projection to get the perspective-correct in-plane angle instead of rotating around the optical axis. diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index f59c6ad..9190d4c 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -1,40 +1,83 @@ /**: ros__parameters: - depth_image_sub_topic: "/zed_node/depth/depth_registered" - color_image_sub_topic: "/zed_node/left/image_rect_color" - pcl_sub_topic: "/zed_node/depth/point_cloud" - detections_sub_topic: "/detections_output" + # ── Inputs ──────────────────────────────────────────────────────────────── + depth_image_sub_topic: "/realsense/D555_409122300281_Depth" + color_image_sub_topic: "/realsense/D555_409122300281_Color" + detections_sub_topic: "/yolo_obb_object_detection/detections" - color_image_info_topic: "/zed_node/depth/camera_info" + # Depth camera-info topic (optional – used to load depth intrinsics if published). + # Color intrinsics are always taken from config params below. + depth_image_info_topic: "/camera/camera/depth/camera_info" + # Color camera intrinsics (always used – not overridden by camera_info) + # Source: /camera/camera/color/camera_info + color_fx: 452.121521 + color_fy: 451.737976 + color_cx: 438.254059 + color_cy: 248.703217 + color_image_width: 896 + color_image_height: 504 + + # Distortion coefficients [k1, k2, p1, p2, k3] (plumb_bob model) + color_d1: -0.0548105500638485 + color_d2: 0.0597584918141365 + color_d3: 0.000486430013552308 + color_d4: 0.00031599000794813 + color_d5: -0.0192314591258764 + + # Depth camera intrinsics (fallback when depth camera_info is not published) + # Source: /camera/camera/depth/camera_info + depth_fx: 449.449707 + depth_fy: 449.449707 + depth_cx: 444.819946 + depth_cy: 248.226578 + depth_image_width: 896 + depth_image_height: 504 + + # ── Outputs ─────────────────────────────────────────────────────────────── + valve_pose_stamped_pub_topic: "/valve_pose" valve_poses_pub_topic: "/valve_poses" + landmarks_pub_topic: "/valve_landmarks" + processed_image_pub_topic: "/valve_detection_image" + depth_colormap_pub_topic: "/valve_detection_depth_colormap" + valve_points_pub_topic: "/valve_points" + depth_cloud_pub_topic: "/valve_depth_cloud" + depth_colormap_value_min: 0.1 + depth_colormap_value_max: 1125.5 + debug_annulus_pub_topic: "/bbx_annulus_pcl" + debug_plane_pub_topic: "/annulus_plane_pcl" + # ── YOLO letterbox reference size ───────────────────────────────────────── yolo_img_width: 640 yolo_img_height: 640 + # ── Annulus and plane fit ───────────────────────────────────────────────── annulus_radius_ratio: 0.8 - plane_ransac_threshold: 0.01 - plane_ransac_max_iterations: 20 + plane_ransac_max_iterations: 50 + + # ── Duplicate-detection suppression ────────────────────────────────────── + # Two bounding boxes are considered the same valve when their + # axis-aligned IoU exceeds this threshold OR when the smaller box + # overlaps the larger by more than 70 % (intersection-over-minimum). + # Maximum 2 landmarks are published per frame. + iou_duplicate_threshold: 0.5 + # ── Pose offset ─────────────────────────────────────────────────────────── + # Shift the estimated position along the plane normal by this amount (m). valve_handle_offset: 0.05 - use_depth_image: true # if false, use pcl and tf2 to transform pointcloud to color frame + # ── Behaviour toggles ───────────────────────────────────────────────────── + use_color_image: true + visualize_detections: true # publish annotated color image + debug_visualize: false # publish annulus / plane point clouds - visualize_detections: true - processed_image_pub_topic: "/valve_detection_image" - debug_visualize: true - annulus_pub_topic: "/bbx_annulus_pcl" - plane_pub_topic: "/annulus_plane_pcl" - angle_detection_image_pub_topic: "/valve_angle_image" - - calculate_angle: true - angle.detection_area_ratio: 0.22 # percentage of bb size - angle.canny_low_threshold: 40 - angle.canny_high_threshold: 80 - angle.canny_aperture_size: 3 # must be odd - angle.hough_rho_res: 1.0 - angle.hough_theta_res: 0.0174533 # 1 degree in radians - angle.hough_threshold: 20 - angle.hough_min_line_length: 45.0 - angle.hough_max_line_gap: 5.0 + # ── Landmark fields ─────────────────────────────────────────────────────── + landmark_type: 1 # VALVE + landmark_subtype: 0 # UNKNOWN + + # ── Output frame ────────────────────────────────────────────────────────── + # frame_id used for all published poses. Set to the TF frame that matches + # your robot's TF tree so Foxglove can display the pose automatically. + # Leave empty ("") to inherit the frame_id from the depth image header. + output_frame_id: "camera_color_optical_frame" diff --git a/include/valve_detection/angle_detector.hpp b/include/valve_detection/angle_detector.hpp deleted file mode 100644 index e565ade..0000000 --- a/include/valve_detection/angle_detector.hpp +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef VALVE_POSE_ANGLE_HPP -#define VALVE_POSE_ANGLE_HPP - -#include -#include -#include -#include "valve_detection/types.hpp" - -namespace valve_detection { - -class AngleDetector { - public: - /** - * @brief Constructs an AngleDetector with a set of parameters. - * @param params The parameters for the detector. - */ - explicit AngleDetector(const AngleDetectorParams& params) - : params_(params) {} - - /** - * @brief Computes the angle for each bounding box in a vector. - * - * This function processes each bounding box to define a region of interest - * (ROI) in the input image, applies Canny edge detection and Hough line - * transform, and then calculates the dominant line angle within each box's - * ROI, and updates the 'theta' member of each box in-place. - * - * @param color_image The input image to be processed. - * @param boxes A vector of BoundingBox objects passed by reference. The - * 'theta' member of each box will be updated. - */ - void compute_angles(const cv::Mat& color_image, - std::vector& boxes) const; - - /** - * @brief Processes a vector of bounding boxes to calculate the angle for - * each, updating the boxes in-place and drawing all visualizations on a - * single image. - * - * @param image_to_draw_on The input/output image. This cv::Mat will be - * modified to include visualizations for all processed boxes. - * @param boxes A vector of BoundingBox objects. The theta member of each - * box will be updated with the calculated angle (or NaN if no line is - * found). - */ - void compute_angles_debug(cv::Mat& image_to_draw_on, - std::vector& boxes) const; - - private: - /** - * @brief Finds the longest line among a set of lines detected in an image. - * @param lines A vector of lines, each represented as cv::Vec4i (x1, y1, - * x2, y2). - * @return The cv::Vec4i representing the longest line. - * If the input vector is empty, returns a zero-length line {0, 0, 0, 0}. - */ - cv::Vec4i find_longest_line(const std::vector& lines) const; - - /** - * @brief Applies Canny edge detection to a grayscale image. - * @param gray_image The input grayscale image. - * @return A binary image (CV_8UC1) with edges detected. - */ - cv::Mat apply_canny_edge_detection(const cv::Mat& gray_image) const; - - /** @brief Parameters for the angle detector. */ - AngleDetectorParams params_; -}; - -} // namespace valve_detection - -#endif // VALVE_POSE_ANGLE_HPP diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp index 2ddea69..3552851 100644 --- a/include/valve_detection/depth_image_processing.hpp +++ b/include/valve_detection/depth_image_processing.hpp @@ -1,61 +1,61 @@ -#ifndef VALVE_POSE_DEPTH_HPP -#define VALVE_POSE_DEPTH_HPP +#pragma once #include "valve_detection/types.hpp" - -#include -#include +#include #include #include -#include -#include -#include namespace valve_detection { -/** - * @brief Projects a 2D pixel with depth into 3D camera coordinates. - * - * @param u The x-coordinate of the pixel in the image. - * @param v The y-coordinate of the pixel in the image. - * @param depth The depth value at the pixel (in meters or the same units as - * intrinsics). - * @param fx Focal length along x-axis. - * @param fy Focal length along y-axis. - * @param cx Principal point x-coordinate. - * @param cy Principal point y-coordinate. - * @param[out] point The resulting 3D point in camera coordinates. - */ -void project_pixel_to_point(int u, - int v, - float depth, - double fx, - double fy, - double cx, - double cy, - pcl::PointXYZ& point); - -/** - * @brief Extracts a 3D point cloud representing the annulus (valve rim) from a - * depth image. - * - * The function selects pixels within the annulus region of a bounding box and - * projects them into 3D points using the camera intrinsics. - * - * @param depth_image The input depth image (CV_32FC1 or similar). - * @param bbox The bounding box around the valve in the image. - * @param image_properties Camera intrinsics and image dimensions. - * @param annulus_radius_ratio Fraction of the bounding box considered as the - * annulus (0.0–1.0). - * @param[out] cloud The resulting point cloud containing the 3D points of the - * annulus. - */ -void extract_annulus_pcl(const cv::Mat& depth_image, - const BoundingBox& bbox, - const ImageProperties& image_properties, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& cloud); +void project_pixel_to_point( + int u, int v, float depth, + double fx, double fy, double cx, double cy, + pcl::PointXYZ& out); -} // namespace valve_detection +void extract_annulus_pcl( + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox, // in ORIGINAL image pixels + const ImageProperties& img_props, + float annulus_radius_ratio, // inner radius = outer*ratio + pcl::PointCloud::Ptr& out); + +// Hardcoded depth-to-color extrinsic for Intel RealSense D555. +// Values from the camera URDF / factory calibration. +// Verify with: ros2 topic echo /realsense/extrinsics/depth_to_color +// or the URDF at https://github.com/IntelRealSense/librealsense/issues/14577 +DepthColorExtrinsic d555_depth_to_color_extrinsic(); + +// Like extract_annulus_pcl but with proper depth-to-color alignment. +// Iterates depth pixels, back-projects with depth intrinsics, applies the +// extrinsic transform, then checks whether the resulting color-frame +// projection falls inside the annulus. Output points are in the color +// camera frame. +void extract_annulus_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // annulus defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out); -#endif // VALVE_POSE_DEPTH_HPP +// Extracts all valid depth points whose color-frame projection falls inside +// the oriented bounding box. Output points are in the color camera frame. +void extract_bbox_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +// Project a color image pixel to depth image coordinates. +// u_c, v_c: pixel coordinates in the color image. +// Z: depth of the point in the color camera frame (metres). +cv::Point2f project_color_pixel_to_depth( + float u_c, float v_c, float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); + +} // namespace valve_detection diff --git a/include/valve_detection/pointcloud_processing.hpp b/include/valve_detection/pointcloud_processing.hpp deleted file mode 100644 index 588496f..0000000 --- a/include/valve_detection/pointcloud_processing.hpp +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef VALVE_POSE_PCL_HPP -#define VALVE_POSE_PCL_HPP - -#include "valve_detection/types.hpp" - -#include -#include -#include -#include - -namespace valve_detection { - -/** - * @brief Extracts a 3D point cloud representing the annulus (valve rim) from an - * input point cloud. - * - * This function selects points that correspond to the annulus region of a - * bounding box in image space, based on the camera intrinsics and the specified - * radius ratio. - * - * @param input_cloud The input point cloud (typically from a depth camera or - * LIDAR). - * @param bbox The bounding box around the valve in image coordinates. - * @param image_properties Camera intrinsics and image dimensions for - * projection. - * @param annulus_radius_ratio Fraction of the bounding box considered as the - * annulus (0.0–1.0). - * @param[out] cloud The resulting point cloud containing the 3D points of the - * annulus. - */ -void extract_annulus_pcl(const pcl::PointCloud::Ptr& input_cloud, - const BoundingBox& bbox, - const ImageProperties& image_properties, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& cloud); - -} // namespace valve_detection - -#endif // VALVE_POSE_PCL_HPP diff --git a/include/valve_detection/pose_estimator.hpp b/include/valve_detection/pose_estimator.hpp new file mode 100644 index 0000000..dad26cb --- /dev/null +++ b/include/valve_detection/pose_estimator.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include "valve_detection/types.hpp" +#include "valve_detection/depth_image_processing.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +class PoseEstimator { +public: + PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset); + + void set_color_image_properties(const ImageProperties& props); + void set_depth_image_properties(const ImageProperties& props); + void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); + + void calculate_letterbox_padding(); + BoundingBox transform_bounding_box(const BoundingBox& bbox) const; + + bool compute_pose_from_depth( + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox_org, // in original image pixels + Pose& out_pose, + pcl::PointCloud::Ptr annulus_dbg, + pcl::PointCloud::Ptr plane_dbg, + bool debug_visualize) const; + +private: + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coeff, + pcl::PointIndices::Ptr& inliers) const; + + Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; + Eigen::Vector3f compute_plane_normal(const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f find_ray_plane_intersection(const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f shift_point_along_normal(const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + Eigen::Matrix3f create_rotation_matrix(const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle) const; + + ImageProperties color_image_properties_{}; + ImageProperties depth_image_properties_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + bool has_depth_props_{false}; + + int yolo_img_width_{640}; + int yolo_img_height_{640}; + float annulus_radius_ratio_{0.8f}; + float plane_ransac_threshold_{0.01f}; + int plane_ransac_max_iterations_{50}; + float valve_handle_offset_{0.05f}; + + double letterbox_scale_factor_{1.0}; + double letterbox_pad_x_{0}; + double letterbox_pad_y_{0}; + + mutable Eigen::Vector3f filter_direction_{1, 0, 0}; +}; + +} // namespace valve_detection diff --git a/include/valve_detection/types.hpp b/include/valve_detection/types.hpp index 248452b..5ae25a4 100644 --- a/include/valve_detection/types.hpp +++ b/include/valve_detection/types.hpp @@ -1,85 +1,40 @@ -#ifndef TYPES_HPP -#define TYPES_HPP - +#pragma once #include +#include namespace valve_detection { -/** - * @struct CameraIntrinsics - * @brief Stores intrinsic parameters of a pinhole camera model. - */ struct CameraIntrinsics { - double fx; ///< Focal length in pixels along x-axis - double fy; ///< Focal length in pixels along y-axis - double cx; ///< Principal point x-coordinate (in pixels) - double cy; ///< Principal point y-coordinate (in pixels) + double fx{0}, fy{0}, cx{0}, cy{0}; }; -/** - * @struct ImageDimensions - * @brief Represents the width and height of an image. - */ struct ImageDimensions { - int width; ///< Image width in pixels - int height; ///< Image height in pixels + int width{0}, height{0}; }; -/** - * @struct ImageProperties - * @brief Combines camera intrinsics and image dimensions. - */ struct ImageProperties { - CameraIntrinsics intr; ///< Camera intrinsic parameters - ImageDimensions dim; ///< Image width and height + CameraIntrinsics intr; + ImageDimensions dim; }; -/** - * @struct BoundingBox - * @brief Represents a 2D bounding box with center, size, and orientation. - */ struct BoundingBox { - float center_x; ///< X-coordinate of the bounding box center - float center_y; ///< Y-coordinate of the bounding box center - float size_x; ///< Width of the bounding box - float size_y; ///< Height of the bounding box - float theta; ///< Orientation in radians (positive counter-clockwise) + float center_x{0}; + float center_y{0}; + float size_x{0}; + float size_y{0}; + float theta{0}; // radians }; -/** - * @struct Pose - * @brief Represents a 3D pose with position and orientation. - */ struct Pose { - Eigen::Vector3f position; ///< 3D position (x, y, z) - Eigen::Quaternionf orientation; ///< Orientation as a quaternion + Eigen::Vector3f position{Eigen::Vector3f::Zero()}; + Eigen::Quaternionf orientation{Eigen::Quaternionf::Identity()}; }; -/** - * @struct AngleDetectorParams - * @brief Parameters for Canny edge detection and Hough line transform used in - * angle detection. - */ -struct AngleDetectorParams { - float line_detection_area; ///< Fraction of the bounding box to use as ROI - ///< (0.0–1.0) - - // Canny edge detection parameters - int canny_low_threshold; ///< Low threshold for Canny edge detector - int canny_high_threshold; ///< High threshold for Canny edge detector - int canny_aperture_size; ///< Aperture size for the Sobel operator in Canny - - // Hough line transform parameters - double hough_rho_res; ///< Distance resolution in pixels - double hough_theta_res; ///< Angle resolution in radians - int hough_threshold; ///< Minimum number of votes (intersections) to detect - ///< a line - double - hough_min_line_length; ///< Minimum line length to accept (in pixels) - double hough_max_line_gap; ///< Maximum allowed gap between line segments - ///< to treat them as a single line +// Rigid transform from depth camera frame to color camera frame. +// Rotation R and translation t satisfy: P_color = R * P_depth + t +struct DepthColorExtrinsic { + Eigen::Matrix3f R{Eigen::Matrix3f::Identity()}; + Eigen::Vector3f t{Eigen::Vector3f::Zero()}; }; -} // namespace valve_detection - -#endif // TYPES_HPP +} // namespace valve_detection \ No newline at end of file diff --git a/include/valve_detection/valve_detector.hpp b/include/valve_detection/valve_detector.hpp deleted file mode 100644 index b60259e..0000000 --- a/include/valve_detection/valve_detector.hpp +++ /dev/null @@ -1,279 +0,0 @@ -#ifndef VALVE_POSE_BASE_HPP -#define VALVE_POSE_BASE_HPP - -#include -#include -#include -#include -#include -#include -#include -#include "valve_detection/depth_image_processing.hpp" -#include "valve_detection/pointcloud_processing.hpp" -#include "valve_detection/types.hpp" - -namespace valve_detection { - -/** - * @class ValveDetector - * @brief Detects valve poses from color and depth images or point clouds. - * - * This class extracts valve annulus point clouds, segments planes, - * computes ray-plane intersections, estimates valve poses, and can calculate - * the valve rotation angle using the AngleDetector. - */ -class ValveDetector { - public: - /** - * @brief Constructs a ValveDetector with YOLO image dimensions and - * detection parameters. - * @param yolo_img_width Width of the YOLO input image. - * @param yolo_img_height Height of the YOLO input image. - * @param annulus_radius_ratio Ratio of bounding box used to define the - * annulus. - * @param plane_ransac_threshold Distance threshold for plane segmentation. - * @param plane_ransac_max_iterations Max iterations for RANSAC plane - * segmentation. - * @param valve_handle_offset Distance to shift the valve handle along the - * plane normal. - */ - ValveDetector(int yolo_img_width, - int yolo_img_height_, - float annulus_radius_ratio, - float plane_ransac_threshold, - int plane_ransac_max_iterations, - float valve_handle_offset); - - virtual ~ValveDetector() = default; - - /** - * @brief Computes valve poses from depth input and bounding boxes. - * - * This template function supports depth input as either a PCL point cloud - * or an OpenCV depth image. It extracts the annulus points, segments - * planes, computes ray-plane intersections, shifts points along normals, - * and optionally calculates valve rotation angles. - * - * @tparam T Depth input type (cv::Mat or - * pcl::PointCloud::Ptr). - * @param depth_input The input depth data. - * @param color_image Corresponding color image for angle calculation. - * @param boxes Bounding boxes of detected valves. - * @param poses Output vector of computed valve poses. - * @param all_annulus_cloud Optional point cloud for visualizing all annuli. - * @param all_annulus_plane_cloud Optional point cloud for visualizing - * segmented planes. - * @param debug_visualize Whether to populate visualization point clouds. - * @param calculate_angle Whether to compute the valve rotation angle. - */ - template - void Compute_valve_poses( - const T& depth_input, - const std::vector& boxes, - std::vector& poses, - pcl::PointCloud::Ptr& all_annulus_cloud, - pcl::PointCloud::Ptr& all_annulus_plane_cloud, - bool debug_visualize) { - if (debug_visualize) { - all_annulus_cloud->clear(); - all_annulus_plane_cloud->clear(); - } - - for (const auto& box : boxes) { - pcl::PointCloud::Ptr annulus_cloud( - new pcl::PointCloud); - - if constexpr (std::is_same< - T, pcl::PointCloud::Ptr>::value) { - extract_annulus_pcl(depth_input, box, color_image_properties_, - annulus_radius_ratio_, annulus_cloud); - } else if constexpr (std::is_same::value) { - extract_annulus_pcl(depth_input, box, color_image_properties_, - annulus_radius_ratio_, annulus_cloud); - } - - if (annulus_cloud->empty()) - continue; - - if (debug_visualize) { - *all_annulus_cloud += *annulus_cloud; - } - - pcl::ModelCoefficients::Ptr coefficients( - new pcl::ModelCoefficients); - pcl::PointIndices::Ptr inliers(new pcl::PointIndices); - - if (!segment_plane(annulus_cloud, coefficients, inliers)) - continue; - - if (debug_visualize) { - pcl::PointCloud::Ptr annulus_plane_cloud( - new pcl::PointCloud); - pcl::copyPointCloud(*annulus_cloud, inliers->indices, - *annulus_plane_cloud); - *all_annulus_plane_cloud += *annulus_plane_cloud; - } - - Eigen::Vector3f bb_centre_ray = get_ray_direction(box); - Eigen::Vector3f ray_plane_intersection = - find_ray_plane_intersection(coefficients, bb_centre_ray); - if (ray_plane_intersection.isZero()) - continue; - - Eigen::Vector3f plane_normal = - compute_plane_normal(coefficients, bb_centre_ray); - if (plane_normal.isZero()) - continue; - - Eigen::Vector3f shifted_position = - shift_point_along_normal(ray_plane_intersection, plane_normal); - - float angle = box.theta; - - Eigen::Matrix3f rotation_matrix = - create_rotation_matrix(coefficients, plane_normal, angle); - Eigen::Quaternionf rotation_quat; - rmat_to_quat(rotation_matrix, rotation_quat); - - Pose pose; - pose.position = shifted_position; - pose.orientation = rotation_quat; - poses.emplace_back(pose); - } - } - - /** - * @brief Projects a 3D point to pixel coordinates using the camera - * intrinsics. - * @param x X coordinate in 3D space. - * @param y Y coordinate in 3D space. - * @param z Z coordinate in 3D space. - * @param[out] u Output pixel x-coordinate. - * @param[out] v Output pixel y-coordinate. - */ - void project_point_to_pixel(float x, - float y, - float z, - int& u, - int& v) const; - - /** - * @brief Calculates the padding and scaling for letterbox resizing. - */ - void calculate_letterbox_padding(); - - /** - * @brief Transforms bounding box coordinates from letterboxed YOLO input to - * original image coordinates. - * @param bbox Bounding box in YOLO image coordinates. - * @return Bounding box transformed to original image coordinates. - */ - BoundingBox transform_bounding_box(const BoundingBox& bbox) const; - - /** - * @brief Sets the color image properties (intrinsics and dimensions). - */ - void set_color_image_properties(const ImageProperties& properties) { - color_image_properties_ = properties; - } - - /** - * @brief Segments the dominant plane from a point cloud using RANSAC. - * @param cloud Input point cloud. - * @param coefficients Output model coefficients (Ax + By + Cz + D = 0). - * @param inliers Output inlier indices of the plane. - * @return True if a plane with inliers was found, false otherwise. - */ - bool segment_plane(const pcl::PointCloud::Ptr& cloud, - pcl::ModelCoefficients::Ptr& coefficients, - pcl::PointIndices::Ptr& inliers) const; - - /** - * @brief Computes the ray direction vector through the bounding box center - * in camera frame. - * @param bbox Bounding box for the valve. - * @return Normalized Eigen::Vector3f representing the ray direction. - */ - Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; - - /** - * @brief Computes the plane normal from plane coefficients, ensuring it - * points against the ray. - * @param coefficients Plane coefficients (Ax + By + Cz + D = 0). - * @param ray_direction Ray direction vector from camera. - * @return Normalized Eigen::Vector3f plane normal, or zero if invalid. - */ - Eigen::Vector3f compute_plane_normal( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const; - - /** - * @brief Finds the intersection point of a ray and a plane. - * @param coefficients Plane coefficients. - * @param ray_direction Normalized ray direction. - * @return Intersection point in 3D space, or zero vector if no - * intersection. - */ - Eigen::Vector3f find_ray_plane_intersection( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const; - - /** - * @brief Shifts a point along a plane normal by the valve handle offset. - * @param intersection_point Point on the plane. - * @param plane_normal Normalized plane normal vector. - * @return Shifted 3D point representing valve handle position. - */ - Eigen::Vector3f shift_point_along_normal( - const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const; - - /** - * @brief Creates a rotation matrix with the plane normal as z-axis and - * valve angle applied. - * @param coefficients Plane coefficients. - * @param plane_normal Normalized plane normal vector. - * @param angle Valve rotation angle in radians. - * @return Rotation matrix (Eigen::Matrix3f) for the valve pose. - */ - Eigen::Matrix3f create_rotation_matrix( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& plane_normal, - float angle); - - /** - * @brief Converts a 3x3 rotation matrix to a normalized quaternion. - * @param rotation_matrix Input rotation matrix. - * @param quat Output quaternion. - */ - void rmat_to_quat(const Eigen::Matrix3f& rotation_matrix, - Eigen::Quaternionf& quat) const; - - /** - * @brief Draws bounding boxes and 3D axes for valve poses on an image. - * @param image Input color image. - * @param boxes Detected valve bounding boxes. - * @param poses Computed valve poses. - * @return Image with drawn detections. - */ - cv::Mat draw_detections(const cv::Mat& image, - const std::vector& boxes, - const std::vector& poses) const; - - protected: - ImageProperties color_image_properties_; - int yolo_img_width_; - int yolo_img_height_; - float annulus_radius_ratio_; - float plane_ransac_threshold_; - int plane_ransac_max_iterations_; - float valve_handle_offset_; - float letterbox_scale_factor_; - int letterbox_pad_x_; - int letterbox_pad_y_; - Eigen::Vector3f filter_direction_ = Eigen::Vector3f(1, 0, 0); -}; - -} // namespace valve_detection - -#endif // VALVE_POSE_BASE_HPP diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index 26dee16..49d96a0 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -1,330 +1,114 @@ -#ifndef VALVE_POSE_DEPTH_ROS_HPP -#define VALVE_POSE_DEPTH_ROS_HPP +#pragma once -#include -#include -#include +#include #include + +#include +#include #include #include -#include #include -#include "valve_detection/angle_detector.hpp" -#include "valve_detection/valve_detector.hpp" +#include +#include + +#include +#include #include -#include -#include #include +#include + +#include "valve_detection/pose_estimator.hpp" +#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/types.hpp" -#include -#include -#include -#include +#include "vortex_msgs/msg/landmark.hpp" +#include "vortex_msgs/msg/landmark_array.hpp" +#include "vortex_msgs/msg/landmark_type.hpp" +#include "vortex_msgs/msg/landmark_subtype.hpp" namespace valve_detection { -/** - * @class ValvePoseNode - * @brief ROS 2 node for computing valve poses from depth images or point clouds - * and 2D detections. - * - * This node subscribes to depth images or point clouds, color images - * (optional), and 2D detection messages. It synchronizes the inputs, computes - * valve poses using ValveDetector, and publishes results as PoseArray and - * optionally as visualized point clouds or images. - */ class ValvePoseNode : public rclcpp::Node { - public: - /** - * @brief Constructs the ValvePoseNode with ROS node options. - * @param options Node options. - */ - explicit ValvePoseNode(const rclcpp::NodeOptions& options); - - virtual ~ValvePoseNode() = default; - - private: - /** - * @brief Callback for camera info subscription to obtain intrinsics and - * image size. - * @param camera_info_msg Shared pointer to the CameraInfo message. - */ - void color_image_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg); - - /** - * @brief Initializes the ValveDetector with the specified ros parameters. - */ - void init_valve_detector(); - - /** - * @brief Initializes the angle detector inside the ValveDetector if angle - * calculation is enabled. - */ - void init_angle_detector(); - - /** - * @brief Converts a vision_msgs::BoundingBox2D to local BoundingBox type. - * @param bbox The input 2D bounding box message. - * @return Converted BoundingBox. - */ - BoundingBox to_bounding_box( - const vision_msgs::msg::BoundingBox2D& bbox) const; - - /** - * @brief Publishes a point cloud containing annulus points. - * @param cloud The point cloud to publish. - * @param header ROS message header to use for publication. - */ - void publish_annulus_pcl(const pcl::PointCloud::Ptr& cloud, - const std_msgs::msg::Header& header) const; - - /** - * @brief Publishes a point cloud containing segmented plane points. - * @param cloud The point cloud to publish. - * @param header ROS message header to use for publication. - */ - void publish_annulus_plane(const pcl::PointCloud::Ptr& cloud, - const std_msgs::msg::Header& header) const; - - /** - * @brief Publishes computed valve poses as a PoseArray message. - * @param poses Vector of computed valve poses. - * @param header ROS message header to use for publication. - */ - void publish_valve_poses(const std::vector& poses, - const std_msgs::msg::Header& header) const; - - /** - * @brief Configures message_filters synchronizers based on available - * inputs. - */ - void setup_sync(); - - /** - * @brief Callback for Depth Image + Color Image + Detections - * synchronization. - */ - void di_ci_d_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - /** - * @brief Callback for Depth Image + Detections synchronization (no color - * image). - */ - void di_d_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - /** - * @brief Callback for Point Cloud + Color Image + Detections - * synchronization. - */ - void pc_ci_d_callback( - const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - /** - * @brief Callback for Point Cloud + Detections synchronization (no color - * image). - */ - void pc_d_callback( - const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections); - - /** - * @brief Generic template callback for synchronized inputs. - * - * Supports T = sensor_msgs::msg::Image (depth image) or PointCloud2. - * Handles optional color image and converts messages to OpenCV/PCL formats. - * - * @tparam T Depth input message type. - * @param depth_msg Depth image or point cloud message. - * @param color_image_msg Optional color image message. - * @param detections 2D detection message. - */ - template - void synchronized_callback( - const std::shared_ptr& depth_msg, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image_msg, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - if (!color_image_info_received_) { - return; - } - if (detections->detections.empty() && !visualize_detections_) { - return; - } - - cv::Mat cv_depth_image; - sensor_msgs::msg::PointCloud2 pcl_tf; - cv::Mat cv_color_image; - - if constexpr (std::is_same::value) { - std::string source_frame = depth_msg->header.frame_id; - std::string target_frame = color_image_frame_id_; - - try { - geometry_msgs::msg::TransformStamped transform = - tf_buffer_->lookupTransform( - target_frame, source_frame, - rclcpp::Time(depth_msg->header.stamp)); - - tf2::doTransform(*depth_msg, pcl_tf, transform); - } catch (tf2::TransformException& ex) { - RCLCPP_ERROR(this->get_logger(), - "Failed to transform point cloud: %s", ex.what()); - return; - } - pcl::PointCloud::Ptr cloud( - new pcl::PointCloud); - pcl::fromROSMsg(pcl_tf, *cloud); - - } else if constexpr (std::is_same::value) { - cv_bridge::CvImageConstPtr cv_depth_ptr = - cv_bridge::toCvShare(depth_msg, "32FC1"); - cv_depth_image = cv_depth_ptr->image; - } else { - RCLCPP_ERROR(this->get_logger(), - "Unsupported message type in synchronized_callback."); - return; - } - - if (use_color_image_) { - if (!color_image_msg) { - RCLCPP_ERROR(this->get_logger(), - "Color image message is null."); - return; - } - cv_bridge::CvImageConstPtr cv_ptr = - cv_bridge::toCvShare(color_image_msg, "bgr8"); - - cv_color_image = cv_ptr->image; - } - - std::vector boxes; - boxes.reserve(detections->detections.size()); - for (const auto& detection : detections->detections) { - BoundingBox box = to_bounding_box(detection.bbox); - BoundingBox org_image_box = - valve_detector_->transform_bounding_box(box); - boxes.push_back(org_image_box); - } - - std::vector poses; - poses.reserve(boxes.size()); - - pcl::PointCloud::Ptr all_annulus_cloud( - new pcl::PointCloud); - pcl::PointCloud::Ptr all_annulus_plane_cloud( - new pcl::PointCloud); - - if (calculate_angle_ && debug_visualize_) { - cv::Mat angle_debug_image = cv_color_image.clone(); - - angle_detector_->compute_angles_debug(angle_debug_image, boxes); - sensor_msgs::msg::Image::SharedPtr output_msg = - cv_bridge::CvImage(depth_msg->header, "bgr8", angle_debug_image) - .toImageMsg(); - angle_image_pub_->publish(*output_msg); - } else if (calculate_angle_) { - angle_detector_->compute_angles(cv_color_image, boxes); - } - - if constexpr (std::is_same::value) { - valve_detector_->Compute_valve_poses( - pcl_tf, boxes, poses, all_annulus_cloud, - all_annulus_plane_cloud, debug_visualize_); - } else if constexpr (std::is_same::value) { - valve_detector_->Compute_valve_poses( - cv_depth_image, boxes, poses, all_annulus_cloud, - all_annulus_plane_cloud, debug_visualize_); - } - if (debug_visualize_ && all_annulus_cloud && all_annulus_plane_cloud) { - publish_annulus_pcl(all_annulus_cloud, depth_msg->header); - publish_annulus_plane(all_annulus_plane_cloud, depth_msg->header); - } - if (visualize_detections_) { - cv::Mat visualized_image = - valve_detector_->draw_detections(cv_color_image, boxes, poses); - sensor_msgs::msg::Image::SharedPtr output_msg = - cv_bridge::CvImage(depth_msg->header, "bgr8", visualized_image) - .toImageMsg(); - processed_image_pub_->publish(*output_msg); - } - publish_valve_poses(poses, depth_msg->header); - } - - rclcpp::Subscription::SharedPtr - color_image_info_sub_; - message_filters::Subscriber color_image_sub_; - message_filters::Subscriber pcl_sub_; - message_filters::Subscriber depth_image_sub_; - message_filters::Subscriber - detections_sub_; - - rclcpp::Publisher::SharedPtr - valve_poses_pub_; - rclcpp::Publisher::SharedPtr - annulus_pcl_pub_; - rclcpp::Publisher::SharedPtr - annulus_plane_pub_; - rclcpp::Publisher::SharedPtr processed_image_pub_; - rclcpp::Publisher::SharedPtr angle_image_pub_; - - // Depth image sync policies uses ExactTime with the assumption - // that the depth image and color image are from the same camera - // and are synchronized. - - // Policy for Depth Image + Color Image + Detections - typedef message_filters::sync_policies::ExactTime< - sensor_msgs::msg::Image, - sensor_msgs::msg::Image, - vision_msgs::msg::Detection2DArray> - SyncPolicy_DI_CI_D; - - // Policy for Depth Image + Detections (no color image) - typedef message_filters::sync_policies:: - ExactTime - SyncPolicy_DI_D; - - // Policy for Point Cloud + Color Image + Detections - typedef message_filters::sync_policies::ApproximateTime< - sensor_msgs::msg::PointCloud2, - sensor_msgs::msg::Image, - vision_msgs::msg::Detection2DArray> - SyncPolicy_PC_CI_D; - - // Policy for Point Cloud + Detections (no color image) - typedef message_filters::sync_policies::ApproximateTime< - sensor_msgs::msg::PointCloud2, - vision_msgs::msg::Detection2DArray> - SyncPolicy_PC_D; - - std::shared_ptr> - sync_di_ci_d_; - std::shared_ptr> sync_di_d_; - std::shared_ptr> - sync_pc_ci_d_; - std::shared_ptr> sync_pc_d_; - - bool color_image_info_received_ = false; - std::unique_ptr valve_detector_; - std::unique_ptr angle_detector_; - std::string color_image_frame_id_; - std::shared_ptr tf_buffer_; - std::shared_ptr tf_listener_; - - bool visualize_detections_ = true; - bool debug_visualize_ = false; - bool calculate_angle_ = true; - bool use_color_image_ = true; - bool use_depth_image_ = true; +public: + explicit ValvePoseNode(const rclcpp::NodeOptions& options); + +private: + void depth_camera_info_cb(const sensor_msgs::msg::CameraInfo::SharedPtr msg); + + BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& bbox) const; + + // Returns undistorted copy of bbox (center_x/y corrected for lens distortion). + BoundingBox undistort_bbox(const BoundingBox& bbox) const; + + // Non-maximum suppression: returns indices of kept detections (max 2). + // Two boxes are duplicates when IoMin (intersection / min-area) or IoU + // exceeds iou_duplicate_threshold_. + std::vector filter_duplicate_detections( + const vision_msgs::msg::Detection2DArray& det) const; + + void publish_pose_array(const std::vector& poses, + const std_msgs::msg::Header& header); + + void publish_landmarks(const std::vector& poses, + const std_msgs::msg::Header& header); + + // Sync callback: depth + color + det + void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const sensor_msgs::msg::Image::ConstSharedPtr& color, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); + + using SyncPolicy = message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::Image, + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray>; + + bool got_info_{false}; + bool got_depth_info_{false}; + + // params + bool use_color_image_{true}; + bool visualize_detections_{true}; + bool debug_visualize_{false}; + float iou_duplicate_threshold_{0.5f}; + std::string output_frame_id_{}; + + int landmark_type_{1}; + int landmark_subtype_{0}; + + // distortion (populated from color camera_info) + cv::Mat cv_camera_matrix_; + cv::Mat cv_dist_coeffs_; + + // camera data (owned by ROS node, passed to estimator and depth functions) + ImageProperties color_props_{}; + ImageProperties depth_props_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + + // estimator + std::unique_ptr detector_; + + // subs + rclcpp::Subscription::SharedPtr depth_cam_info_sub_; + message_filters::Subscriber depth_sub_; + message_filters::Subscriber color_sub_; + message_filters::Subscriber det_sub_; + std::shared_ptr> sync_; + + // pubs + rclcpp::Publisher::SharedPtr pose_pub_; + rclcpp::Publisher::SharedPtr pose_stamped_pub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + rclcpp::Publisher::SharedPtr image_pub_; + rclcpp::Publisher::SharedPtr depth_colormap_pub_; + + float depth_colormap_vmin_{0.1f}; + float depth_colormap_vmax_{1125.5f}; + + rclcpp::Publisher::SharedPtr annulus_pub_; + rclcpp::Publisher::SharedPtr plane_pub_; + rclcpp::Publisher::SharedPtr valve_points_pub_; + rclcpp::Publisher::SharedPtr depth_cloud_pub_; }; } // namespace valve_detection - -#endif // VALVE_POSE_DEPTH_ROS_HPP diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index 28ee6b0..b66d262 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -1,33 +1,30 @@ import os - from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription from launch_ros.actions import ComposableNodeContainer from launch_ros.descriptions import ComposableNode -from launch import LaunchDescription - -config_file = os.path.join( - get_package_share_directory("valve_detection"), - "config", - "valve_detection_params.yaml", -) - - def generate_launch_description(): + cfg = os.path.join( + get_package_share_directory("valve_detection"), + "config", + "valve_detection_params.yaml", + ) + container = ComposableNodeContainer( - name='valve_detection_container', - namespace='', - package='rclcpp_components', - executable='component_container_mt', + name="valve_detection_container", + namespace="", + package="rclcpp_components", + executable="component_container_mt", composable_node_descriptions=[ ComposableNode( - package='valve_detection', - plugin='valve_detection::ValvePoseNode', - name='valve_pose_node', - parameters=[config_file], - ), + package="valve_detection", + plugin="valve_detection::ValvePoseNode", + name="valve_pose_node", + parameters=[cfg], + ) ], - output='screen', + output="screen", ) return LaunchDescription([container]) diff --git a/package.xml b/package.xml index 7c4730e..889ca49 100644 --- a/package.xml +++ b/package.xml @@ -2,25 +2,26 @@ valve_detection - 0.0.0 - TODO: Package description - ole + 0.1.0 + Valve pose estimation from YOLOv26 OBB detections + depth image. + you Apache-2.0 ament_cmake + rosidl_default_generators - rclcpp_components - cv_bridge rclcpp + rclcpp_components + message_filters sensor_msgs vision_msgs geometry_msgs std_msgs + cv_bridge pcl_conversions - tf2_geometry_msgs + vortex_msgs - ament_lint_auto - ament_lint_common + rosidl_default_runtime ament_cmake diff --git a/src/angle_detector.cpp b/src/angle_detector.cpp deleted file mode 100644 index 33bf776..0000000 --- a/src/angle_detector.cpp +++ /dev/null @@ -1,199 +0,0 @@ -#include "valve_detection/angle_detector.hpp" -#include -#include "valve_detection/types.hpp" - -namespace valve_detection { - -void AngleDetector::compute_angles(const cv::Mat& color_image, - std::vector& boxes) const { - for (auto& box : boxes) { - double width = box.size_x; - double height = box.size_y; - double center_x = box.center_x; - double center_y = box.center_y; - - int x1 = std::max( - static_cast(center_x - width * params_.line_detection_area), - 0); - int y1 = std::max( - static_cast(center_y - height * params_.line_detection_area), - 0); - int x2 = std::min( - static_cast(center_x + width * params_.line_detection_area), - color_image.cols - 1); - int y2 = std::min( - static_cast(center_y + height * params_.line_detection_area), - color_image.rows - 1); - - if (x1 >= x2 || y1 >= y2) { - box.theta = std::numeric_limits::quiet_NaN(); - continue; - } - - cv::Mat roi = color_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - - cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); - cv::Point center((roi.cols / 2), (roi.rows / 2)); - cv::Size axes(roi.cols / 2, roi.rows / 2); - cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); - - cv::Mat masked_roi; - roi.copyTo(masked_roi, mask); - - cv::Mat gray; - cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); - cv::Mat edges = apply_canny_edge_detection(gray); - - std::vector lines; - cv::HoughLinesP(edges, lines, params_.hough_rho_res, - params_.hough_theta_res, params_.hough_threshold, - params_.hough_min_line_length, - params_.hough_max_line_gap); - - if (lines.empty()) { - box.theta = std::numeric_limits::quiet_NaN(); - continue; - } - - cv::Vec4i longest_line = find_longest_line(lines); - - if (longest_line[0] >= longest_line[2]) { - int temp_x = longest_line[0]; - int temp_y = longest_line[1]; - longest_line[0] = longest_line[2]; - longest_line[1] = longest_line[3]; - longest_line[2] = temp_x; - longest_line[3] = temp_y; - } - - float theta = std::atan2(longest_line[3] - longest_line[1], - longest_line[2] - longest_line[0]); - - if (theta < 0) { - theta += CV_PI; - } - if (theta > CV_PI) { - theta -= CV_PI; - } - box.theta = theta; - } -} - -void AngleDetector::compute_angles_debug( - cv::Mat& image_to_draw_on, - std::vector& boxes) const { - const cv::Scalar ALL_LINES_COLOR(255, 0, 0); - const cv::Scalar LONGEST_LINE_COLOR(0, 255, 0); - const int LINE_THICKNESS = 2; - - cv::Mat output_image = cv::Mat::zeros(image_to_draw_on.size(), CV_8UC3); - - for (auto& box : boxes) { - double width = box.size_x; - double height = box.size_y; - double center_x = box.center_x; - double center_y = box.center_y; - - int x1 = std::max( - static_cast(center_x - width * params_.line_detection_area), - 0); - int y1 = std::max( - static_cast(center_y - height * params_.line_detection_area), - 0); - int x2 = std::min( - static_cast(center_x + width * params_.line_detection_area), - image_to_draw_on.cols - 1); - int y2 = std::min( - static_cast(center_y + height * params_.line_detection_area), - image_to_draw_on.rows - 1); - - if (x1 >= x2 || y1 >= y2) { - box.theta = std::numeric_limits::quiet_NaN(); - continue; - } - - cv::Mat roi = image_to_draw_on(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - - cv::Mat mask = cv::Mat::zeros(roi.size(), CV_8UC1); - cv::Point center((roi.cols / 2), (roi.rows / 2)); - cv::Size axes(roi.cols / 2, roi.rows / 2); - cv::ellipse(mask, center, axes, 0, 0, 360, cv::Scalar(255), -1); - - cv::Mat masked_roi; - roi.copyTo(masked_roi, mask); - - cv::Mat gray; - cv::cvtColor(masked_roi, gray, cv::COLOR_BGR2GRAY); - cv::Mat edges = apply_canny_edge_detection(gray); - - std::vector lines; - cv::HoughLinesP(edges, lines, params_.hough_rho_res, - params_.hough_theta_res, params_.hough_threshold, - params_.hough_min_line_length, - params_.hough_max_line_gap); - - if (lines.empty()) { - box.theta = std::numeric_limits::quiet_NaN(); - - cv::Mat output_roi_gray = - output_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - cv::Mat edges_color; - cv::cvtColor(edges, edges_color, cv::COLOR_GRAY2BGR); - edges_color.copyTo(output_roi_gray, mask); - - continue; - } - - cv::Mat output_roi = output_image(cv::Rect(x1, y1, x2 - x1, y2 - y1)); - cv::Mat edges_color; - cv::cvtColor(edges, edges_color, cv::COLOR_GRAY2BGR); - edges_color.copyTo(output_roi, mask); - - const cv::Point offset(x1, y1); - for (const auto& line : lines) { - cv::line(output_image, cv::Point(line[0], line[1]) + offset, - cv::Point(line[2], line[3]) + offset, ALL_LINES_COLOR, - LINE_THICKNESS); - } - - cv::Vec4i longest_line = find_longest_line(lines); - cv::line(output_image, - cv::Point(longest_line[0], longest_line[1]) + offset, - cv::Point(longest_line[2], longest_line[3]) + offset, - LONGEST_LINE_COLOR, LINE_THICKNESS); - - box.theta = std::atan2(longest_line[3] - longest_line[1], - longest_line[2] - longest_line[0]); - } - - image_to_draw_on = output_image; -} - -cv::Vec4i AngleDetector::find_longest_line( - const std::vector& lines) const { - if (lines.empty()) { - return cv::Vec4i(0, 0, 0, 0); - } - - cv::Vec4i longest_line = lines[0]; - double max_length = 0; - - for (const auto& line : lines) { - double length = std::hypot(line[2] - line[0], line[3] - line[1]); - if (length > max_length) { - max_length = length; - longest_line = line; - } - } - return longest_line; -} - -cv::Mat AngleDetector::apply_canny_edge_detection( - const cv::Mat& gray_image) const { - cv::Mat edges; - cv::Canny(gray_image, edges, params_.canny_low_threshold, - params_.canny_high_threshold, params_.canny_aperture_size); - return edges; -} - -} // namespace valve_detection diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp index c76fee4..80db73e 100644 --- a/src/depth_image_processing.cpp +++ b/src/depth_image_processing.cpp @@ -1,88 +1,281 @@ +// depth_image_processing.cpp +// Depth-to-3D back-projection, point cloud extraction, and color-to-depth pixel projection. #include "valve_detection/depth_image_processing.hpp" +#include +#include +#include namespace valve_detection { -void project_pixel_to_point(int u, - int v, - float depth, - double fx, - double fy, - double cx, - double cy, - pcl::PointXYZ& point) { - if (depth <= 0 || depth == std::numeric_limits::infinity() || - std::isnan(depth)) { - point.x = point.y = point.z = std::numeric_limits::quiet_NaN(); - return; +// Back-projects a depth pixel (u, v, depth) to a 3D point using camera intrinsics. +void project_pixel_to_point( + int u, int v, float depth, + double fx, double fy, double cx, double cy, + pcl::PointXYZ& out) +{ + if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { + out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); + return; + } + out.x = static_cast((u - cx) * depth / fx); + out.y = static_cast((v - cy) * depth / fy); + out.z = depth; +} + +// Extracts depth points that fall inside an elliptic annulus around the bbox center (no alignment). +void extract_annulus_pcl( + const cv::Mat& depth_image, + const BoundingBox& bbox, + const ImageProperties& img_props, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) +{ + out->clear(); + + const float cx = bbox.center_x; + const float cy = bbox.center_y; + const float outer_rx = bbox.size_x * 0.5f; + const float outer_ry = bbox.size_y * 0.5f; + + if (outer_rx < 2.0f || outer_ry < 2.0f) return; + + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + const int u0 = static_cast(std::floor(cx - outer_rx)); + const int u1 = static_cast(std::ceil (cx + outer_rx)); + const int v0 = static_cast(std::floor(cy - outer_ry)); + const int v1 = static_cast(std::ceil (cy + outer_ry)); + + for (int v = v0; v <= v1; ++v) { + if (v < 0 || v >= depth_image.rows) continue; + for (int u = u0; u <= u1; ++u) { + if (u < 0 || u >= depth_image.cols) continue; + + const float dxo = (u - cx) / outer_rx; + const float dyo = (v - cy) / outer_ry; + const bool inside_outer = (dxo*dxo + dyo*dyo) <= 1.0f; + + const float dxi = (u - cx) / inner_rx; + const float dyi = (v - cy) / inner_ry; + const bool outside_inner = (dxi*dxi + dyi*dyi) > 1.0f; + + if (!inside_outer || !outside_inner) continue; + + const float z = depth_image.at(v, u); + pcl::PointXYZ p; + project_pixel_to_point( + u, v, z, + img_props.intr.fx, img_props.intr.fy, + img_props.intr.cx, img_props.intr.cy, + p); + + if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { + out->points.push_back(p); + } } + } - point.x = (u - cx) * depth / fx; - point.y = (v - cy) * depth / fy; - point.z = depth; + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; } -void extract_annulus_pcl(const cv::Mat& depth_image, - const BoundingBox& bbox, - const ImageProperties& image_properties, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& cloud) { - float center_x = bbox.center_x; - float center_y = bbox.center_y; - float outer_radius_x = bbox.size_x / 2.0f; - float outer_radius_y = bbox.size_y / 2.0f; - - float inner_radius_x = outer_radius_x * annulus_radius_ratio; - float inner_radius_y = outer_radius_y * annulus_radius_ratio; - - int upper_bound_size = static_cast( - M_PI * - ((outer_radius_x * outer_radius_y - inner_radius_x * inner_radius_y) + - 1)); - - cloud->clear(); - cloud->reserve(upper_bound_size); - - for (int y = static_cast(-outer_radius_y); - y <= static_cast(outer_radius_y); ++y) { - for (int x = static_cast(-outer_radius_x); - x <= static_cast(outer_radius_x); ++x) { - float dx_scaled_outer = x / outer_radius_x; - float dy_scaled_outer = y / outer_radius_y; - bool is_inside_outer_ellipse = - (dx_scaled_outer * dx_scaled_outer + - dy_scaled_outer * dy_scaled_outer) <= 1.0f; - - float dx_scaled_inner = x / inner_radius_x; - float dy_scaled_inner = y / inner_radius_y; - bool is_outside_inner_ellipse = - (dx_scaled_inner * dx_scaled_inner + - dy_scaled_inner * dy_scaled_inner) > 1.0f; - - if (is_inside_outer_ellipse && is_outside_inner_ellipse) { - int u = static_cast(center_x + x); - int v = static_cast(center_y + y); - - if (u >= 0 && u < depth_image.cols && v >= 0 && - v < depth_image.rows) { - float depth_value = depth_image.at(v, u); - pcl::PointXYZ point; - project_pixel_to_point( - u, v, depth_value, image_properties.intr.fx, - image_properties.intr.fy, image_properties.intr.cx, - image_properties.intr.cy, point); - - if (!std::isnan(point.x) && !std::isnan(point.y) && - !std::isnan(point.z)) { - cloud->points.push_back(point); - } - } - } - } +// Returns hardcoded depth-to-color extrinsic for Intel RealSense D555. +// Verify with: ros2 topic echo /realsense/extrinsics/depth_to_color +DepthColorExtrinsic d555_depth_to_color_extrinsic() { + DepthColorExtrinsic extr; + // Both depth and color optical frames share the same body-to-optical + // rotation, so R is identity. + extr.R = Eigen::Matrix3f::Identity(); + // t = depth origin expressed in color optical frame. + // Color is 0.059 m in -y (rightward) of depth in body frame, so depth is + // +0.059 m in y (leftward) of color. Converting to optical frame: + // t_optical = R_body_to_optical * [0, +0.059, 0] + // = [[0,-1,0],[0,0,-1],[1,0,0]] * [0, 0.059, 0] + // = [-0.059, 0, 0] + extr.t = Eigen::Vector3f(-0.059f, 0.0f, 0.0f); + return extr; +} + +// Like extract_annulus_pcl but transforms depth pixels into the color frame before the annulus test. +void extract_annulus_pcl_aligned( + const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) +{ + out->clear(); + + const float cx_c = color_bbox.center_x; + const float cy_c = color_bbox.center_y; + const float outer_rx = color_bbox.size_x * 0.5f; + const float outer_ry = color_bbox.size_y * 0.5f; + + if (outer_rx < 2.0f || outer_ry < 2.0f) return; + + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // Approximate scale between depth and color focal lengths to define a + // coarse search region in depth-image coordinates. A 30-pixel margin + // accounts for the lateral offset introduced by the extrinsic. + const float scale = (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + constexpr int kMargin = 30; + + const int u0_d = std::max(0, + static_cast((cx_c - outer_rx) * scale) - kMargin); + const int u1_d = std::min(depth_image.cols - 1, + static_cast((cx_c + outer_rx) * scale) + kMargin); + const int v0_d = std::max(0, + static_cast((cy_c - outer_ry) * scale) - kMargin); + const int v1_d = std::min(depth_image.rows - 1, + static_cast((cy_c + outer_ry) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) continue; + + // Back-project using depth intrinsics. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / depth_props.intr.fy); + P_d.z() = z_d; + + // Transform into color camera frame. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Elliptic annulus test in color-image space. + const float dxo = (u_c - cx_c) / outer_rx; + const float dyo = (v_c - cy_c) / outer_ry; + if (dxo*dxo + dyo*dyo > 1.0f) continue; // outside outer ellipse + + const float dxi = (u_c - cx_c) / inner_rx; + const float dyi = (v_c - cy_c) / inner_ry; + if (dxi*dxi + dyi*dyi <= 1.0f) continue; // inside inner ellipse + + pcl::PointXYZ p; + p.x = P_c.x(); + p.y = P_c.y(); + p.z = P_c.z(); + out->points.push_back(p); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Extracts depth points whose color-frame projection falls inside the oriented bounding box. +void extract_bbox_pcl_aligned( + const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + pcl::PointCloud::Ptr& out) +{ + out->clear(); + + // Pre-compute the OBB axes in color-image space for a fast rotated-rect test. + const float cos_t = std::cos(color_bbox.theta); + const float sin_t = std::sin(color_bbox.theta); + const float half_w = color_bbox.size_x * 0.5f; + const float half_h = color_bbox.size_y * 0.5f; + + // Approximate search region in depth-image space. + const float scale = (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const float r = std::sqrt(half_w * half_w + half_h * half_h); + constexpr int kMargin = 30; + + const int u0_d = std::max(0, + static_cast((color_bbox.center_x - r) * scale) - kMargin); + const int u1_d = std::min(depth_image.cols - 1, + static_cast((color_bbox.center_x + r) * scale) + kMargin); + const int v0_d = std::max(0, + static_cast((color_bbox.center_y - r) * scale) - kMargin); + const int v1_d = std::min(depth_image.rows - 1, + static_cast((color_bbox.center_y + r) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) continue; + + // Back-project to depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / depth_props.intr.fy); + P_d.z() = z_d; + + // Transform to color camera frame. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Rotate into the OBB local frame and test against the half-extents. + const float dx = u_c - color_bbox.center_x; + const float dy = v_c - color_bbox.center_y; + const float local_x = cos_t * dx + sin_t * dy; + const float local_y = -sin_t * dx + cos_t * dy; + + if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) continue; + + out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Projects a color image pixel to depth image coordinates using the full intrinsic + extrinsic pipeline. +cv::Point2f project_color_pixel_to_depth( + float u_c, float v_c, float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr) +{ + // Back-project color pixel → 3-D point in color camera frame. + const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z + / static_cast(color_props.intr.fx); + const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z + / static_cast(color_props.intr.fy); + const Eigen::Vector3f Pc(Xc, Yc, Z); + + // Transform to depth camera frame: P_depth = R^T * (P_color - t) + const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); + + if (Pd.z() <= 0.0f) return {u_c, v_c}; // degenerate – return original - cloud->width = static_cast(cloud->points.size()); - cloud->height = 1; - cloud->is_dense = false; + // Project into depth image. + const float u_d = static_cast(depth_props.intr.fx) * Pd.x() / Pd.z() + + static_cast(depth_props.intr.cx); + const float v_d = static_cast(depth_props.intr.fy) * Pd.y() / Pd.z() + + static_cast(depth_props.intr.cy); + return {u_d, v_d}; } } // namespace valve_detection diff --git a/src/pointcloud_processing.cpp b/src/pointcloud_processing.cpp deleted file mode 100644 index 707aa69..0000000 --- a/src/pointcloud_processing.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "valve_detection/pointcloud_processing.hpp" - -namespace valve_detection { - -void extract_annulus_pcl(const pcl::PointCloud::Ptr& input_cloud, - const BoundingBox& bbox, - const ImageProperties& image_properties, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& cloud) { - float center_x = bbox.center_x; - float center_y = bbox.center_y; - float outer_radius_x = bbox.size_x / 2.0f; - float outer_radius_y = bbox.size_y / 2.0f; - - float inner_radius_x = outer_radius_x * annulus_radius_ratio; - float inner_radius_y = outer_radius_y * annulus_radius_ratio; - - double fx = image_properties.intr.fx; - double fy = image_properties.intr.fy; - double cx = image_properties.intr.cx; - double cy = image_properties.intr.cy; - - cloud->clear(); - cloud->points.reserve(input_cloud->points.size()); - - for (const auto& point_in : input_cloud->points) { - if (std::isnan(point_in.z)) { - continue; - } - - int u = static_cast((point_in.x * fx / point_in.z) + cx); - int v = static_cast((point_in.y * fy / point_in.z) + cy); - - if (u >= center_x - outer_radius_x && u <= center_x + outer_radius_x && - v >= center_y - outer_radius_y && v <= center_y + outer_radius_y) { - float dx_scaled_outer = (u - center_x) / outer_radius_x; - float dy_scaled_outer = (v - center_y) / outer_radius_y; - bool is_inside_outer_ellipse = - (dx_scaled_outer * dx_scaled_outer + - dy_scaled_outer * dy_scaled_outer) <= 1.0f; - - float dx_scaled_inner = (u - center_x) / inner_radius_x; - float dy_scaled_inner = (v - center_y) / inner_radius_y; - bool is_outside_inner_ellipse = - (dx_scaled_inner * dx_scaled_inner + - dy_scaled_inner * dy_scaled_inner) > 1.0f; - - if (is_inside_outer_ellipse && is_outside_inner_ellipse) { - cloud->points.push_back(point_in); - } - } - } - - cloud->width = static_cast(cloud->points.size()); - cloud->height = 1; - cloud->is_dense = false; -} - -} // namespace valve_detection diff --git a/src/pose_estimator.cpp b/src/pose_estimator.cpp new file mode 100644 index 0000000..ad7e2fc --- /dev/null +++ b/src/pose_estimator.cpp @@ -0,0 +1,249 @@ +// pose_estimator.cpp +// RANSAC plane fitting, normal/ray-plane intersection, and 3D pose computation. +#include "valve_detection/pose_estimator.hpp" + +namespace valve_detection { + +// Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle offset. +PoseEstimator::PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset) + : yolo_img_width_(yolo_img_width), + yolo_img_height_(yolo_img_height), + annulus_radius_ratio_(annulus_radius_ratio), + plane_ransac_threshold_(plane_ransac_threshold), + plane_ransac_max_iterations_(plane_ransac_max_iterations), + valve_handle_offset_(valve_handle_offset) {} + +// Stores color camera intrinsics and image dimensions. +void PoseEstimator::set_color_image_properties(const ImageProperties& props) { + color_image_properties_ = props; +} + +// Stores depth camera intrinsics and image dimensions. +void PoseEstimator::set_depth_image_properties(const ImageProperties& props) { + depth_image_properties_ = props; + has_depth_props_ = (props.intr.fx > 0.0); +} + +// Stores the depth-to-color extrinsic transform. +void PoseEstimator::set_depth_color_extrinsic(const DepthColorExtrinsic& extr) { + depth_color_extrinsic_ = extr; +} + +// Computes letterbox scale factor and padding from the color image and YOLO dimensions. +void PoseEstimator::calculate_letterbox_padding() { + int org_image_width = color_image_properties_.dim.width; + int org_image_height = color_image_properties_.dim.height; + + letterbox_scale_factor_ = + std::min(static_cast(yolo_img_width_) / org_image_width, + static_cast(yolo_img_height_) / org_image_height); + + double resized_width = org_image_width * letterbox_scale_factor_; + double resized_height = org_image_height * letterbox_scale_factor_; + + letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; +} + +// Remaps a bounding box from YOLO letterbox space back to original image coordinates. +BoundingBox PoseEstimator::transform_bounding_box(const BoundingBox& bbox) const { + BoundingBox transformed = bbox; + transformed.center_x = (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed.center_y = (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed.size_x /= letterbox_scale_factor_; + transformed.size_y /= letterbox_scale_factor_; + return transformed; +} + +// Fits a plane to the point cloud using RANSAC; returns false if no inliers are found. +bool PoseEstimator::segment_plane( + const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const +{ + if (cloud->points.empty()) return false; + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); +} + +// Returns the normalized 3D ray direction from the camera origin through the bbox center. +Eigen::Vector3f PoseEstimator::get_ray_direction(const BoundingBox& bbox) const { + const float xc = (bbox.center_x - static_cast(color_image_properties_.intr.cx)) + / static_cast(color_image_properties_.intr.fx); + const float yc = (bbox.center_y - static_cast(color_image_properties_.intr.cy)) + / static_cast(color_image_properties_.intr.fy); + return Eigen::Vector3f(xc, yc, 1.0f).normalized(); +} + +// Extracts and normalizes the plane normal, flipping it to face the camera. +Eigen::Vector3f PoseEstimator::compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const +{ + if (!coefficients || coefficients->values.size() < 3) + return Eigen::Vector3f::Zero(); + + Eigen::Vector3f normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + normal.normalize(); + + if (normal.dot(ray_direction) > 0.0f) normal = -normal; + return normal; +} + +// Finds the 3D point where the viewing ray intersects the fitted plane. +Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const +{ + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Vector3f::Zero(); + + const Eigen::Vector3f plane_normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + const float D = coefficients->values[3]; + const float denom = plane_normal.dot(ray_direction); + + if (std::abs(denom) < 1e-6f) return Eigen::Vector3f::Zero(); + + return (-D / denom) * ray_direction; +} + +// Shifts a 3D point along the plane normal by the valve handle offset. +Eigen::Vector3f PoseEstimator::shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const +{ + return intersection_point + (plane_normal * valve_handle_offset_); +} + +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected bbox angle (X). +Eigen::Matrix3f PoseEstimator::create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle) const +{ + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back project points to rays. + const Eigen::Vector3f r1 = (Kinv * p1).normalized(); + const Eigen::Vector3f r2 = (Kinv * p2).normalized(); + + // Compute intersections of rays with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f X1 = (-D / denom1) * r1; + const Eigen::Vector3f X2 = (-D / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + + // Project onto the plane (for numerical stability). + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping frame between frames). + if (filter_direction_.dot(x_axis) < 0) x_axis = -x_axis; + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; // X_obj: direction of the image line + rot.col(1) = y_axis; // Y_obj: perpendicular in-plane + rot.col(2) = z_axis; // Z_obj: plane normal + return rot; +} + +// Extracts a point cloud from the depth image, fits a plane, and returns the valve pose. +bool PoseEstimator::compute_pose_from_depth( + const cv::Mat& depth_image, + const BoundingBox& bbox_org, + Pose& out_pose, + pcl::PointCloud::Ptr annulus_dbg, + pcl::PointCloud::Ptr plane_dbg, + bool debug_visualize) const +{ + pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + + if (has_depth_props_) { + extract_bbox_pcl_aligned( + depth_image, bbox_org, + color_image_properties_, depth_image_properties_, + depth_color_extrinsic_, + cloud); + } else { + extract_annulus_pcl( + depth_image, bbox_org, + color_image_properties_, + annulus_radius_ratio_, cloud); + } + + if (cloud->points.size() < 4) return false; + + if (debug_visualize && annulus_dbg) *annulus_dbg += *cloud; + + pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + if (!segment_plane(cloud, coeff, inliers)) return false; + + if (debug_visualize && plane_dbg) { + for (int idx : inliers->indices) + plane_dbg->points.push_back(cloud->points[idx]); + plane_dbg->width = static_cast(plane_dbg->points.size()); + plane_dbg->height = 1; + plane_dbg->is_dense = false; + } + + const Eigen::Vector3f ray = get_ray_direction(bbox_org); + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) return false; + + const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); + if (pos.isZero()) return false; + + out_pose.position = shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = create_rotation_matrix(coeff, normal, bbox_org.theta); + out_pose.orientation = Eigen::Quaternionf(rot).normalized(); + + return true; +} + +} // namespace valve_detection diff --git a/src/valve_detector.cpp b/src/valve_detector.cpp deleted file mode 100644 index 3ad858a..0000000 --- a/src/valve_detector.cpp +++ /dev/null @@ -1,290 +0,0 @@ -#include "valve_detection/valve_detector.hpp" - -namespace valve_detection { - -ValveDetector::ValveDetector(int yolo_img_width_, - int yolo_img_height_, - float annulus_radius_ratio, - float plane_ransac_threshold, - int plane_ransac_max_iterations, - float valve_handle_offset) - : yolo_img_width_(yolo_img_width_), - yolo_img_height_(yolo_img_height_), - annulus_radius_ratio_(annulus_radius_ratio), - plane_ransac_threshold_(plane_ransac_threshold), - plane_ransac_max_iterations_(plane_ransac_max_iterations), - valve_handle_offset_(valve_handle_offset) {} - -cv::Mat ValveDetector::draw_detections(const cv::Mat& image, - const std::vector& boxes, - const std::vector& poses) const { - cv::Mat visualized_image = image.clone(); - - cv::Mat camera_matrix = - (cv::Mat_(3, 3) << color_image_properties_.intr.fx, 0, - color_image_properties_.intr.cx, 0, color_image_properties_.intr.fy, - color_image_properties_.intr.cy, 0, 0, 1); - cv::Mat dist_coeffs = cv::Mat::zeros(4, 1, CV_64F); - - for (size_t i = 0; i < boxes.size(); ++i) { - const auto& box = boxes[i]; - - int x1 = box.center_x - box.size_x / 2; - int y1 = box.center_y - box.size_y / 2; - int x2 = box.center_x + box.size_x / 2; - int y2 = box.center_y + box.size_y / 2; - cv::rectangle(visualized_image, cv::Point(x1, y1), cv::Point(x2, y2), - cv::Scalar(0, 255, 0), 2); - - std::ostringstream label; - label << "id:" << i; - if (!std::isnan(box.theta)) { - label << "Valve θ=" << std::fixed << std::setprecision(2) - << (box.theta * 180.0 / M_PI) << "°"; - cv::putText(visualized_image, label.str(), cv::Point(x1, y1 - 5), - cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), - 1); - } - // Draw pose axes only if valid pose exists - if (i < poses.size()) { - const Pose& pose = poses[i]; - if (!std::isnan(pose.position[0]) && - !std::isnan(pose.position[1]) && - !std::isnan(pose.position[2]) && !std::isnan(box.theta)) { - cv::Mat tvec = (cv::Mat_(3, 1) << pose.position[0], - pose.position[1], pose.position[2]); - - Eigen::Matrix3f rotation_matrix = - pose.orientation.toRotationMatrix(); - cv::Mat rmat = (cv::Mat_(3, 3) << rotation_matrix(0, 0), - rotation_matrix(0, 1), rotation_matrix(0, 2), - rotation_matrix(1, 0), rotation_matrix(1, 1), - rotation_matrix(1, 2), rotation_matrix(2, 0), - rotation_matrix(2, 1), rotation_matrix(2, 2)); - cv::Mat rvec; - cv::Rodrigues(rmat, rvec); - - cv::drawFrameAxes(visualized_image, camera_matrix, dist_coeffs, - rvec, tvec, 0.1); - } - } - } - return visualized_image; -} - -void ValveDetector::rmat_to_quat(const Eigen::Matrix3f& rotation_matrix, - Eigen::Quaternionf& quat) const { - quat = Eigen::Quaternionf(rotation_matrix); - quat.normalize(); -} - -void ValveDetector::project_point_to_pixel(float x, - float y, - float z, - int& u, - int& v) const { - if (z <= 0) { - u = v = -1; // Invalid pixel coordinates - return; - } - - double fx = color_image_properties_.intr.fx; - double fy = color_image_properties_.intr.fy; - double cx = color_image_properties_.intr.cx; - double cy = color_image_properties_.intr.cy; - - u = static_cast((x * fx / z) + cx); - v = static_cast((y * fy / z) + cy); -} - -void ValveDetector::calculate_letterbox_padding() { - int org_image_width = color_image_properties_.dim.width; - int org_image_height = color_image_properties_.dim.height; - - letterbox_scale_factor_ = - std::min(static_cast(yolo_img_width_) / org_image_width, - static_cast(yolo_img_height_) / org_image_height); - - double resized_width = org_image_width * letterbox_scale_factor_; - double resized_height = org_image_height * letterbox_scale_factor_; - - letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; - letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; -} - -BoundingBox ValveDetector::transform_bounding_box( - const BoundingBox& bbox) const { - BoundingBox transformed_bbox = bbox; - - transformed_bbox.center_x = - (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; - transformed_bbox.center_y = - (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; - transformed_bbox.size_x /= letterbox_scale_factor_; - transformed_bbox.size_y /= letterbox_scale_factor_; - - return transformed_bbox; -} - -bool ValveDetector::segment_plane( - const pcl::PointCloud::Ptr& cloud, - pcl::ModelCoefficients::Ptr& coefficients, - pcl::PointIndices::Ptr& inliers) const { - if (cloud->points.empty()) { - return false; - } - - pcl::SACSegmentation seg; - seg.setOptimizeCoefficients(true); - seg.setModelType(pcl::SACMODEL_PLANE); - seg.setMethodType(pcl::SAC_RANSAC); - seg.setDistanceThreshold(plane_ransac_threshold_); - seg.setMaxIterations(plane_ransac_max_iterations_); - seg.setInputCloud(cloud); - seg.segment(*inliers, *coefficients); - - return !inliers->indices.empty(); -} - -Eigen::Vector3f ValveDetector::get_ray_direction( - const BoundingBox& bbox) const { - float u = bbox.center_x; - float v = bbox.center_y; - - float fx = color_image_properties_.intr.fx; - float fy = color_image_properties_.intr.fy; - float cx = color_image_properties_.intr.cx; - float cy = color_image_properties_.intr.cy; - - float xc = (u - cx) / fx; - float yc = (v - cy) / fy; - - Eigen::Vector3f ray_direction; - ray_direction << xc, yc, 1.0f; - - return ray_direction.normalized(); -} - -Eigen::Vector3f ValveDetector::compute_plane_normal( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const { - Eigen::Vector3f normal_vector; - - if (!coefficients || coefficients->values.size() < 3) { - return Eigen::Vector3f::Zero(); - } - - normal_vector << coefficients->values[0], coefficients->values[1], - coefficients->values[2]; - - normal_vector.normalize(); - - if (normal_vector.dot(ray_direction) > 0.0) { - normal_vector = -normal_vector; - } - - return normal_vector; -} - -Eigen::Vector3f ValveDetector::find_ray_plane_intersection( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const { - if (!coefficients || coefficients->values.size() < 4) { - return Eigen::Vector3f::Zero(); - } - - Eigen::Vector3f plane_normal; - plane_normal << coefficients->values[0], coefficients->values[1], - coefficients->values[2]; - float D = coefficients->values[3]; - - float normal_dot_ray = plane_normal.dot(ray_direction); - - if (std::abs(normal_dot_ray) < 1e-6) { - // The ray is parallel or contained within the plane. - return Eigen::Vector3f::Zero(); - } - - float t = -D / normal_dot_ray; - - Eigen::Vector3f intersection_point = t * ray_direction; - - return intersection_point; -} - -Eigen::Vector3f ValveDetector::shift_point_along_normal( - const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const { - return intersection_point + (plane_normal * valve_handle_offset_); -} - -Eigen::Matrix3f ValveDetector::create_rotation_matrix( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& plane_normal, - float angle) { - if (!coefficients || coefficients->values.size() < 4) { - return Eigen::Matrix3f::Identity(); - } - - Eigen::Vector3f z_axis = plane_normal; - float D = coefficients->values[3]; - - float fx = color_image_properties_.intr.fx; - float fy = color_image_properties_.intr.fy; - float cx = color_image_properties_.intr.cx; - float cy = color_image_properties_.intr.cy; - - Eigen::Matrix3f K; - K << fx, 0, cx, 0, fy, cy, 0, 0, 1; - Eigen::Matrix3f Kinv = K.inverse(); - - // Define two image points along the given angle (through principal point) - // --- - float len = 50.0f; // arbitrary pixel length (only direction matters) - Eigen::Vector3f p1(cx, cy, 1.f); - Eigen::Vector3f p2(cx + len * std::cos(angle), cy + len * std::sin(angle), - 1.f); - - // Back project points to rays - Eigen::Vector3f r1 = (Kinv * p1).normalized(); - Eigen::Vector3f r2 = (Kinv * p2).normalized(); - - // Compute intersections of rays with the plane - float denom1 = z_axis.dot(r1); - float denom2 = z_axis.dot(r2); - - if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) { - // rays parallel to plane — return identity fallback - return Eigen::Matrix3f::Identity(); - } - - float t1 = -D / denom1; - float t2 = -D / denom2; - - Eigen::Vector3f X1 = t1 * r1; - Eigen::Vector3f X2 = t2 * r2; - - // Compute in-plane direction corresponding to the image line angle - Eigen::Vector3f x_axis = (X2 - X1).normalized(); - - // Project onto the plane (for numerical stability) - x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); - - // Ensure consistent direction (optional: avoid flipping frame between - // frames) - if (filter_direction_.dot(x_axis) < 0) - x_axis = -x_axis; - filter_direction_ = x_axis; - - Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); - x_axis = y_axis.cross(z_axis).normalized(); - - Eigen::Matrix3f rotation_matrix; - rotation_matrix.col(0) = x_axis; // X_obj: direction of the image line - rotation_matrix.col(1) = y_axis; // Y_obj: perpendicular in-plane - rotation_matrix.col(2) = z_axis; // Z_obj: plane normal - - return rotation_matrix; -} - -} // namespace valve_detection diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index 8a07168..62b784e 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -1,298 +1,500 @@ +// valve_pose_ros.cpp +// ROS node: subscribes to depth, color, and detections; publishes valve poses and visualizations. #include "valve_detection_ros/valve_pose_ros.hpp" -namespace valve_detection { - -using namespace std::placeholders; +#include +#include +#include +#include -ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) - : Node("valve_detection_node", options) { - init_valve_detector(); +#include +#include - setup_sync(); +namespace valve_detection { - if (calculate_angle_) { - init_angle_detector(); - } +using std::placeholders::_1; +using std::placeholders::_2; +using std::placeholders::_3; - std::string color_image_info_topic = - this->declare_parameter("color_image_info_topic"); - - rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(1)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - - color_image_info_sub_ = - this->create_subscription( - color_image_info_topic, qos, - std::bind(&ValvePoseNode::color_image_info_callback, this, - std::placeholders::_1)); - - std::string valve_poses_pub_topic = - this->declare_parameter("valve_poses_pub_topic"); - - valve_poses_pub_ = this->create_publisher( - valve_poses_pub_topic, qos); - - debug_visualize_ = this->declare_parameter("debug_visualize"); - - if (debug_visualize_) { - std::string annulus_pub_topic = - this->declare_parameter("annulus_pub_topic"); - annulus_pcl_pub_ = - this->create_publisher( - annulus_pub_topic, qos); - std::string plane_pub_topic = - this->declare_parameter("plane_pub_topic"); - annulus_plane_pub_ = - this->create_publisher( - plane_pub_topic, qos); - angle_image_pub_ = this->create_publisher( - this->declare_parameter( - "angle_detection_image_pub_topic"), - qos); - } - if (visualize_detections_) { - std::string processed_image_pub_topic = - this->declare_parameter("processed_image_pub_topic"); - processed_image_pub_ = this->create_publisher( - processed_image_pub_topic, qos); - } - if (!use_depth_image_) { - tf_buffer_ = std::make_shared(this->get_clock()); - tf_listener_ = - std::make_shared(*tf_buffer_); - } +// Declares all ROS parameters, initializes the pose estimator, and sets up subscribers and publishers. +ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) +: Node("valve_pose_node", options) +{ + const auto depth_topic = declare_parameter("depth_image_sub_topic"); + const auto color_topic = declare_parameter("color_image_sub_topic"); + const auto det_topic = declare_parameter("detections_sub_topic"); + const auto depth_info_topic = declare_parameter("depth_image_info_topic"); + + const auto pose_stamped_topic = declare_parameter("valve_pose_stamped_pub_topic"); + const auto pose_topic = declare_parameter("valve_poses_pub_topic"); + const auto lm_topic = declare_parameter("landmarks_pub_topic"); + const auto img_topic = declare_parameter("processed_image_pub_topic"); + const auto depth_color_topic = declare_parameter("depth_colormap_pub_topic"); + const auto valve_points_topic = declare_parameter("valve_points_pub_topic"); + const auto depth_cloud_topic = declare_parameter("depth_cloud_pub_topic"); + + depth_colormap_vmin_ = static_cast(declare_parameter("depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast(declare_parameter("depth_colormap_value_max")); + + const int yolo_w = declare_parameter("yolo_img_width"); + const int yolo_h = declare_parameter("yolo_img_height"); + const float annulus_ratio = declare_parameter("annulus_radius_ratio"); + const float ransac_thresh = declare_parameter("plane_ransac_threshold"); + const int ransac_iters = declare_parameter("plane_ransac_max_iterations"); + const float handle_off = declare_parameter("valve_handle_offset"); + + use_color_image_ = declare_parameter("use_color_image"); + visualize_detections_ = declare_parameter("visualize_detections"); + debug_visualize_ = declare_parameter("debug_visualize"); + iou_duplicate_threshold_ = static_cast( + declare_parameter("iou_duplicate_threshold")); + output_frame_id_ = declare_parameter("output_frame_id"); + + landmark_type_ = declare_parameter("landmark_type"); + landmark_subtype_ = declare_parameter("landmark_subtype"); + + detector_ = std::make_unique( + yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); + + depth_color_extrinsic_ = d555_depth_to_color_extrinsic(); + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + + color_props_.intr.fx = declare_parameter("color_fx"); + color_props_.intr.fy = declare_parameter("color_fy"); + color_props_.intr.cx = declare_parameter("color_cx"); + color_props_.intr.cy = declare_parameter("color_cy"); + color_props_.dim.width = declare_parameter("color_image_width"); + color_props_.dim.height = declare_parameter("color_image_height"); + + detector_->set_color_image_properties(color_props_); + detector_->calculate_letterbox_padding(); + + cv_camera_matrix_ = (cv::Mat_(3, 3) + << color_props_.intr.fx, 0, color_props_.intr.cx, + 0, color_props_.intr.fy, color_props_.intr.cy, + 0, 0, 1); + + cv_dist_coeffs_ = (cv::Mat_(5, 1) + << declare_parameter("color_d1"), + declare_parameter("color_d2"), + declare_parameter("color_d3"), + declare_parameter("color_d4"), + declare_parameter("color_d5")); + + got_info_ = true; + RCLCPP_INFO(get_logger(), + "Color intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, + color_props_.intr.cx, color_props_.intr.cy); + + depth_props_.intr.fx = declare_parameter("depth_fx"); + depth_props_.intr.fy = declare_parameter("depth_fy"); + depth_props_.intr.cx = declare_parameter("depth_cx"); + depth_props_.intr.cy = declare_parameter("depth_cy"); + depth_props_.dim.width = declare_parameter("depth_image_width"); + depth_props_.dim.height = declare_parameter("depth_image_height"); + + detector_->set_depth_image_properties(depth_props_); + got_depth_info_ = true; + RCLCPP_INFO(get_logger(), + "Depth intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + depth_props_.intr.fx, depth_props_.intr.fy, + depth_props_.intr.cx, depth_props_.intr.cy); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) + .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) + .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); + + // Color camera_info is not subscribed — intrinsics and distortion are + // always taken from config params above. + depth_cam_info_sub_ = create_subscription( + depth_info_topic, info_qos, + std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); + + pose_stamped_pub_ = create_publisher(pose_stamped_topic, qos); + pose_pub_ = create_publisher(pose_topic, qos); + landmark_pub_ = create_publisher(lm_topic, qos); + image_pub_ = create_publisher(img_topic, qos); + depth_colormap_pub_ = create_publisher(depth_color_topic, qos); + valve_points_pub_ = create_publisher(valve_points_topic, qos); + depth_cloud_pub_ = create_publisher(depth_cloud_topic, qos); + + if (debug_visualize_) { + const auto ann_topic = declare_parameter("debug_annulus_pub_topic"); + const auto pln_topic = declare_parameter("debug_plane_pub_topic"); + annulus_pub_ = create_publisher(ann_topic, qos); + plane_pub_ = create_publisher(pln_topic, qos); + } + + depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); + color_sub_.subscribe(this, color_topic, qos.get_rmw_qos_profile()); + det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); + + sync_ = std::make_shared>( + SyncPolicy(10), depth_sub_, color_sub_, det_sub_); + sync_->registerCallback( + std::bind(&ValvePoseNode::sync_cb, this, _1, _2, _3)); } -void ValvePoseNode::init_valve_detector() { - int yolo_img_width = this->declare_parameter("yolo_img_width"); - int yolo_img_height = this->declare_parameter("yolo_img_height"); - - float annulus_radius_ratio = - this->declare_parameter("annulus_radius_ratio"); - - float plane_ransac_threshold = - this->declare_parameter("plane_ransac_threshold"); - int plane_ransac_max_iterations = - this->declare_parameter("plane_ransac_max_iterations"); - - float valve_handle_offset = - this->declare_parameter("valve_handle_offset"); +// One-shot callback that overrides depth intrinsics from the camera_info topic. +void ValvePoseNode::depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) +{ + if (got_depth_info_) return; + + depth_props_.intr.fx = msg->k[0]; + depth_props_.intr.fy = msg->k[4]; + depth_props_.intr.cx = msg->k[2]; + depth_props_.intr.cy = msg->k[5]; + depth_props_.dim.width = static_cast(msg->width); + depth_props_.dim.height = static_cast(msg->height); + + detector_->set_depth_image_properties(depth_props_); + + got_depth_info_ = true; + depth_cam_info_sub_.reset(); // one-shot + RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", + depth_props_.intr.fx, depth_props_.intr.fy); +} - valve_detector_ = std::make_unique( - yolo_img_width, yolo_img_height, annulus_radius_ratio, - plane_ransac_threshold, plane_ransac_max_iterations, - valve_handle_offset); +// Converts a ROS BoundingBox2D message to the internal BoundingBox struct. +BoundingBox ValvePoseNode::to_bbox(const vision_msgs::msg::BoundingBox2D& b) const +{ + BoundingBox o; + o.center_x = static_cast(b.center.position.x); + o.center_y = static_cast(b.center.position.y); + o.size_x = static_cast(b.size_x); + o.size_y = static_cast(b.size_y); + o.theta = static_cast(b.center.theta); // radians + return o; } -void ValvePoseNode::setup_sync() { - visualize_detections_ = - this->declare_parameter("visualize_detections"); - calculate_angle_ = this->declare_parameter("calculate_angle"); - use_depth_image_ = this->declare_parameter("use_depth_image"); - - use_color_image_ = (calculate_angle_ || visualize_detections_); - - std::string color_image_sub_topic = - this->declare_parameter("color_image_sub_topic"); - std::string depth_image_sub_topic = - this->declare_parameter("depth_image_sub_topic"); - std::string pcl_sub_topic = - this->declare_parameter("pcl_sub_topic"); - std::string detections_sub_topic = - this->declare_parameter("detections_sub_topic"); - - rclcpp::QoS qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - - if (use_color_image_ && use_depth_image_) { - color_image_sub_.subscribe(this, color_image_sub_topic, - qos.get_rmw_qos_profile()); - depth_image_sub_.subscribe(this, depth_image_sub_topic, - qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - sync_di_ci_d_ = - std::make_shared>( - SyncPolicy_DI_CI_D(10), depth_image_sub_, color_image_sub_, - detections_sub_); - sync_di_ci_d_->registerCallback( - std::bind(&ValvePoseNode::di_ci_d_callback, this, _1, _2, _3)); - - } else if (use_depth_image_) { - depth_image_sub_.subscribe(this, depth_image_sub_topic, - qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - sync_di_d_ = - std::make_shared>( - SyncPolicy_DI_D(10), depth_image_sub_, detections_sub_); - sync_di_d_->registerCallback( - std::bind(&ValvePoseNode::di_d_callback, this, _1, _2)); - - } else if (use_color_image_ && !use_depth_image_) { - pcl_sub_.subscribe(this, pcl_sub_topic, qos.get_rmw_qos_profile()); - color_image_sub_.subscribe(this, color_image_sub_topic, - qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - sync_pc_ci_d_ = - std::make_shared>( - SyncPolicy_PC_CI_D(10), pcl_sub_, color_image_sub_, - detections_sub_); - sync_pc_ci_d_->registerCallback( - std::bind(&ValvePoseNode::pc_ci_d_callback, this, _1, _2, _3)); +// Corrects the bbox center for lens distortion using the color camera matrix. +BoundingBox ValvePoseNode::undistort_bbox(const BoundingBox& bbox) const +{ + if (cv_camera_matrix_.empty() || cv_dist_coeffs_.empty()) return bbox; + + std::vector pts{{bbox.center_x, bbox.center_y}}; + std::vector undistorted; + cv::undistortPoints(pts, undistorted, + cv_camera_matrix_, cv_dist_coeffs_, + cv::noArray(), cv_camera_matrix_); + + BoundingBox result = bbox; + result.center_x = undistorted[0].x; + result.center_y = undistorted[0].y; + return result; +} +// Greedy NMS: sorts detections by score, keeps at most 2 non-overlapping boxes. +std::vector ValvePoseNode::filter_duplicate_detections( + const vision_msgs::msg::Detection2DArray& det) const +{ + const size_t n = det.detections.size(); + if (n == 0) return {}; + + // Collect (score, index) pairs – use bbox area when no score is available. + std::vector> scored; + scored.reserve(n); + for (size_t i = 0; i < n; ++i) { + float score = 0.0f; + if (!det.detections[i].results.empty()) { + score = static_cast( + det.detections[i].results[0].hypothesis.score); } else { - pcl_sub_.subscribe(this, pcl_sub_topic, qos.get_rmw_qos_profile()); - detections_sub_.subscribe(this, detections_sub_topic, - qos.get_rmw_qos_profile()); - sync_pc_d_ = - std::make_shared>( - SyncPolicy_PC_D(10), pcl_sub_, detections_sub_); - sync_pc_d_->registerCallback( - std::bind(&ValvePoseNode::pc_d_callback, this, _1, _2)); + const auto& b = det.detections[i].bbox; + score = static_cast(b.size_x * b.size_y); } -} - -void ValvePoseNode::di_ci_d_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - synchronized_callback(depth_image, color_image, - detections); -} + scored.emplace_back(score, i); + } + std::sort(scored.begin(), scored.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + // Greedy NMS – keep at most 2. + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < scored.size() && kept.size() < 2; ++si) { + const size_t i = scored[si].second; + if (suppressed[i]) continue; + + kept.push_back(i); + + const BoundingBox bi = to_bbox(det.detections[i].bbox); + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + for (size_t sj = si + 1; sj < scored.size(); ++sj) { + const size_t j = scored[sj].second; + if (suppressed[j]) continue; + + const BoundingBox bj = to_bbox(det.detections[j].bbox); + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) continue; // no overlap + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = inter / std::min(ai, aj); // intersection over minimum + + if (iou > iou_duplicate_threshold_ || iom > 0.7f) { + suppressed[j] = true; + } + } + } -void ValvePoseNode::di_d_callback( - const sensor_msgs::msg::Image::ConstSharedPtr& depth_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - synchronized_callback(depth_image, nullptr, - detections); + return kept; } -void ValvePoseNode::pc_ci_d_callback( - const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, - const sensor_msgs::msg::Image::ConstSharedPtr& color_image, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - synchronized_callback(pcl, color_image, - detections); +// Packs poses into a PoseArray message and publishes it. +void ValvePoseNode::publish_pose_array(const std::vector& poses, + const std_msgs::msg::Header& header) +{ + geometry_msgs::msg::PoseArray msg; + msg.header = header; + msg.poses.reserve(poses.size()); + for (const auto& p : poses) { + geometry_msgs::msg::Pose po; + po.position.x = p.position.x(); + po.position.y = p.position.y(); + po.position.z = p.position.z(); + po.orientation.x = p.orientation.x(); + po.orientation.y = p.orientation.y(); + po.orientation.z = p.orientation.z(); + po.orientation.w = p.orientation.w(); + msg.poses.push_back(po); + } + pose_pub_->publish(msg); } -void ValvePoseNode::pc_d_callback( - const sensor_msgs::msg::PointCloud2::ConstSharedPtr& pcl, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& detections) { - synchronized_callback(pcl, nullptr, - detections); +// Packs poses into a LandmarkArray message with type/subtype fields and publishes it. +void ValvePoseNode::publish_landmarks(const std::vector& poses, + const std_msgs::msg::Header& header) +{ + vortex_msgs::msg::LandmarkArray out; + out.header = header; + out.landmarks.reserve(poses.size()); + + for (size_t i = 0; i < poses.size(); ++i) { + vortex_msgs::msg::Landmark lm; + lm.header = header; + lm.id = static_cast(i); + lm.type.value = landmark_type_; + lm.subtype.value = landmark_subtype_; + + lm.pose.pose.position.x = poses[i].position.x(); + lm.pose.pose.position.y = poses[i].position.y(); + lm.pose.pose.position.z = poses[i].position.z(); + lm.pose.pose.orientation.x = poses[i].orientation.x(); + lm.pose.pose.orientation.y = poses[i].orientation.y(); + lm.pose.pose.orientation.z = poses[i].orientation.z(); + lm.pose.pose.orientation.w = poses[i].orientation.w(); + + out.landmarks.push_back(lm); + } + landmark_pub_->publish(out); } -void ValvePoseNode::init_angle_detector() { - if (calculate_angle_) { - AngleDetectorParams angle_params; - angle_params.line_detection_area = - this->declare_parameter("angle.detection_area_ratio"); - angle_params.canny_low_threshold = - this->declare_parameter("angle.canny_low_threshold"); - angle_params.canny_high_threshold = - this->declare_parameter("angle.canny_high_threshold"); - angle_params.canny_aperture_size = - this->declare_parameter("angle.canny_aperture_size"); - angle_params.hough_rho_res = - this->declare_parameter("angle.hough_rho_res"); - angle_params.hough_theta_res = - this->declare_parameter("angle.hough_theta_res"); - angle_params.hough_threshold = - this->declare_parameter("angle.hough_threshold"); - angle_params.hough_min_line_length = - this->declare_parameter("angle.hough_min_line_length"); - angle_params.hough_max_line_gap = - this->declare_parameter("angle.hough_max_line_gap"); - - angle_detector_ = std::make_unique(angle_params); +// Main synchronized callback: runs NMS, estimates poses, and publishes all outputs. +void ValvePoseNode::sync_cb( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const sensor_msgs::msg::Image::ConstSharedPtr& color, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) +{ + if (!got_info_) return; + if (!depth || !det) return; + + cv::Mat depth_color; + const bool publish_colormap = depth_colormap_pub_->get_subscription_count() > 0; + if (publish_colormap) { + cv_bridge::CvImageConstPtr cv_raw = cv_bridge::toCvShare(depth, "16UC1"); + cv::Mat depth_f; + cv_raw->image.convertTo(depth_f, CV_32FC1); + const float scale = 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); + cv::Mat depth_8u; + depth_f.convertTo(depth_8u, CV_8UC1, scale, -depth_colormap_vmin_ * scale); + cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); + } + + cv::Mat color_vis; + const bool publish_image = color && image_pub_->get_subscription_count() > 0; + if (publish_image) { + cv_bridge::CvImageConstPtr cv_color = cv_bridge::toCvShare(color, "bgr8"); + color_vis = cv_color->image.clone(); + } + + if (det->detections.empty()) { + publish_pose_array({}, depth->header); + publish_landmarks({}, depth->header); + if (publish_colormap) { + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color).toImageMsg()); } -} - -void ValvePoseNode::color_image_info_callback( - const sensor_msgs::msg::CameraInfo::SharedPtr camera_info_msg) { - if (!color_image_info_received_) { - ImageProperties img_props; - img_props.intr.fx = camera_info_msg->k[0]; - img_props.intr.fy = camera_info_msg->k[4]; - img_props.intr.cx = camera_info_msg->k[2]; - img_props.intr.cy = camera_info_msg->k[5]; - img_props.dim.width = camera_info_msg->width; - img_props.dim.height = camera_info_msg->height; - - valve_detector_->set_color_image_properties(img_props); - valve_detector_->calculate_letterbox_padding(); - - color_image_frame_id_ = camera_info_msg->header.frame_id; - color_image_info_received_ = true; - color_image_info_sub_.reset(); + if (publish_image) { + image_pub_->publish( + *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); } -} - -BoundingBox ValvePoseNode::to_bounding_box( - const vision_msgs::msg::BoundingBox2D& bbox) const { - BoundingBox box; - box.center_x = bbox.center.position.x; - box.center_y = bbox.center.position.y; - box.size_x = bbox.size_x; - box.size_y = bbox.size_y; - box.theta = bbox.center.theta; - return box; -} - -void ValvePoseNode::publish_annulus_pcl( - const pcl::PointCloud::Ptr& cloud, - const std_msgs::msg::Header& header) const { - sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*cloud, cloud_msg); - cloud_msg.header = header; - cloud_msg.is_dense = false; - cloud_msg.width = static_cast(cloud->points.size()); - cloud_msg.height = 1; - - annulus_pcl_pub_->publish(cloud_msg); -} + return; + } + + const std::vector kept = filter_duplicate_detections(*det); + + // RealSense publishes depth as 16UC1 (uint16 millimetres). + // cv_bridge type-casts without scaling, so we must divide by 1000. + cv::Mat depth_img; + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || + depth->encoding == "16UC1") { + cv_bridge::CvImageConstPtr cv_depth = cv_bridge::toCvShare(depth, "16UC1"); + cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); + } else { + cv_bridge::CvImageConstPtr cv_depth = cv_bridge::toCvShare(depth, "32FC1"); + depth_img = cv_depth->image.clone(); + } + + pcl::PointCloud::Ptr ann_dbg(new pcl::PointCloud); + pcl::PointCloud::Ptr pln_dbg(new pcl::PointCloud); + + std::vector kept_boxes; + std::vector raw_boxes; // raw detection coords for depth colormap + std::vector poses; + + for (size_t idx : kept) { + // Parse bounding box (YOLO-space), map to original image, then undistort. + BoundingBox yolo_box = to_bbox(det->detections[idx].bbox); + BoundingBox org_box = yolo_box; + org_box = undistort_bbox(org_box); + + raw_boxes.push_back(yolo_box); + kept_boxes.push_back(org_box); + + Pose pose; + if (detector_->compute_pose_from_depth( + depth_img, yolo_box, pose, ann_dbg, pln_dbg, true)) { + poses.push_back(pose); + } + } -void ValvePoseNode::publish_annulus_plane( - const pcl::PointCloud::Ptr& cloud, - const std_msgs::msg::Header& header) const { + if (depth_cloud_pub_->get_subscription_count() > 0) { sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*cloud, cloud_msg); - cloud_msg.header = header; - cloud_msg.is_dense = false; - cloud_msg.width = static_cast(cloud->points.size()); - cloud_msg.height = 1; - - annulus_plane_pub_->publish(cloud_msg); -} - -void ValvePoseNode::publish_valve_poses( - const std::vector& poses, - const std_msgs::msg::Header& header) const { - geometry_msgs::msg::PoseArray pose_array_msg; - pose_array_msg.header = header; - - for (const auto& pose : poses) { - if (!std::isnan(pose.orientation.x()) && - !std::isnan(pose.orientation.y()) && - !std::isnan(pose.orientation.z()) && - !std::isnan(pose.orientation.w()) && - !std::isnan(pose.position.x()) && !std::isnan(pose.position.y()) && - !std::isnan(pose.position.z())) { - geometry_msgs::msg::Pose pose_msg; - pose_msg.position.x = pose.position.x(); - pose_msg.position.y = pose.position.y(); - pose_msg.position.z = pose.position.z(); - pose_msg.orientation.x = pose.orientation.x(); - pose_msg.orientation.y = pose.orientation.y(); - pose_msg.orientation.z = pose.orientation.z(); - pose_msg.orientation.w = pose.orientation.w(); - - pose_array_msg.poses.push_back(pose_msg); + pcl::toROSMsg(*ann_dbg, cloud_msg); + std_msgs::msg::Header cloud_header = depth->header; + if (!output_frame_id_.empty()) cloud_header.frame_id = output_frame_id_; + cloud_msg.header = cloud_header; + depth_cloud_pub_->publish(cloud_msg); + } + + if (publish_image) { + for (size_t i = 0; i < kept_boxes.size(); ++i) { + const auto& box = kept_boxes[i]; + const float angle_deg = box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect( + cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + for (int j = 0; j < 4; ++j) { + cv::line(color_vis, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + } + image_pub_->publish( + *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); + } + + if (publish_colormap) { + for (size_t i = 0; i < raw_boxes.size(); ++i) { + const auto& box = raw_boxes[i]; + + // Project each OBB corner from color image space to depth image space + // using the full intrinsic + extrinsic pipeline. + // Fall back to the raw color pixel when no valid pose depth is available. + const float Z = (i < poses.size() && poses[i].position.z() > 0.0f) + ? poses[i].position.z() : 0.0f; + + const float angle_deg = box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect( + cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + + if (Z > 0.0f && got_depth_info_) { + for (auto& c : corners) { + c = project_color_pixel_to_depth( + c.x, c.y, Z, color_props_, depth_props_, depth_color_extrinsic_); } + } + + for (int j = 0; j < 4; ++j) { + cv::line(depth_color, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + } + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color).toImageMsg()); + } + + if (debug_visualize_ && annulus_pub_ && plane_pub_) { + sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; + pcl::toROSMsg(*ann_dbg, ann_msg); + pcl::toROSMsg(*pln_dbg, pln_msg); + ann_msg.header = depth->header; + pln_msg.header = depth->header; + annulus_pub_->publish(ann_msg); + plane_pub_->publish(pln_msg); + } + + std_msgs::msg::Header pose_header = depth->header; + if (!output_frame_id_.empty()) pose_header.frame_id = output_frame_id_; + + publish_pose_array(poses, pose_header); + publish_landmarks(poses, pose_header); + + // Publish valve center positions as a PointCloud2 for Foxglove 3D view. + if (valve_points_pub_->get_subscription_count() > 0) { + pcl::PointCloud pts; + for (const auto& p : poses) { + pts.points.push_back({p.position.x(), p.position.y(), p.position.z()}); } - valve_poses_pub_->publish(pose_array_msg); + pts.width = static_cast(pts.points.size()); + pts.height = 1; + pts.is_dense = true; + sensor_msgs::msg::PointCloud2 pc_msg; + pcl::toROSMsg(pts, pc_msg); + pc_msg.header = pose_header; + valve_points_pub_->publish(pc_msg); + } + + // Publish best (first) detection as PoseStamped for easy Foxglove 3D view. + if (!poses.empty()) { + geometry_msgs::msg::PoseStamped ps; + ps.header = pose_header; + ps.pose.position.x = poses[0].position.x(); + ps.pose.position.y = poses[0].position.y(); + ps.pose.position.z = poses[0].position.z(); + ps.pose.orientation.x = poses[0].orientation.x(); + ps.pose.orientation.y = poses[0].orientation.y(); + ps.pose.orientation.z = poses[0].orientation.z(); + ps.pose.orientation.w = poses[0].orientation.w(); + pose_stamped_pub_->publish(ps); + } } } // namespace valve_detection From 5ffe8df9a32e5c913e307d54191a53ecd4309386 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Thu, 19 Mar 2026 12:30:22 +0100 Subject: [PATCH 24/29] ci: update CI workflows and pre-commit configuration: enhance triggers, adjust hook stages, and add static analysis tools --- .github/workflows/code-coverage.yml | 7 ++--- .github/workflows/industrial-ci.yml | 8 ++++-- .github/workflows/pre-commit.yml | 14 +++++++++ .github/workflows/semantic-release.yml | 2 +- .gitignore | 1 + .pre-commit-config.yaml | 40 ++++++++++++++++++++++---- 6 files changed, 60 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .gitignore diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index a4e68bc..8bd9d77 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -4,10 +4,9 @@ on: branches: - main pull_request: - branches: - - main + types: [ opened, synchronize, reopened, ready_for_review ] jobs: - call_reusable_workflow: + code-coverage: uses: vortexntnu/vortex-ci/.github/workflows/reusable-code-coverage.yml@main secrets: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} # Set in the repository secrets \ No newline at end of file diff --git a/.github/workflows/industrial-ci.yml b/.github/workflows/industrial-ci.yml index 67a4efe..23b5750 100644 --- a/.github/workflows/industrial-ci.yml +++ b/.github/workflows/industrial-ci.yml @@ -2,11 +2,15 @@ name: Industrial CI on: push: + branches: + - main + pull_request: + types: [ opened, synchronize, reopened, ready_for_review ] workflow_dispatch: schedule: - cron: '0 1 * * *' # Runs daily to check for dependency issues or flaking tests jobs: - call_reusable_workflow: + industrial-ci: uses: vortexntnu/vortex-ci/.github/workflows/reusable-industrial-ci.yml@main with: - ros_repo: '["testing", "main"]' + ros_repo: '["testing", "main"]' diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..aaf534b --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,14 @@ +name: pre-commit +on: + push: + branches: + - main + pull_request: + types: [ opened, synchronize, reopened, ready_for_review ] + workflow_dispatch: +jobs: + pre-commit: + uses: vortexntnu/vortex-ci/.github/workflows/reusable-pre-commit.yml@main + with: + ros_distro: 'humble' + config_path: '.pre-commit-config.yaml' diff --git a/.github/workflows/semantic-release.yml b/.github/workflows/semantic-release.yml index 70779d4..765aa2c 100644 --- a/.github/workflows/semantic-release.yml +++ b/.github/workflows/semantic-release.yml @@ -5,5 +5,5 @@ on: branches: - main jobs: - call_reusable_workflow: + semantic-release: uses: vortexntnu/vortex-ci/.github/workflows/reusable-semantic-release.yml@main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dbe9c82 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d145b28..025948c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: requirements-txt-fixer # Python hooks - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.2 + rev: v0.11.4 hooks: - id: ruff-format - id: ruff @@ -57,7 +57,7 @@ repos: "--ignore=D100,D101,D102,D103,D104,D105,D106,D107,D401", "--fix", ] - stages: [pre-commit] + stages: [commit] pass_filenames: true - id: ruff name: ruff-check @@ -66,20 +66,50 @@ repos: "--ignore=T201,N812,B006,S101,S311,S607,S603", "--fix" ] - stages: [pre-commit] + stages: [commit] pass_filenames: true # C++ hooks - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v20.1.7 + rev: v20.1.0 hooks: - id: clang-format args: [--style=file] + - repo: local + hooks: + - id: ament_cppcheck + name: ament_cppcheck + description: Static code analysis of C/C++ files. + entry: env AMENT_CPPCHECK_ALLOW_SLOW_VERSIONS=1 ament_cppcheck + language: system + files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$ + exclude: (^|/)(test|tests)/.* # exclude test dirs since ament_cppcheck misparses GTest macros and throws false syntax errors + - repo: local + hooks: + - id: ament_cpplint + name: ament_cpplint + description: Static code analysis of C/C++ files. + entry: ament_cpplint + language: system + files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$ + args: [ + "--linelength=120", + "--filter=-whitespace/newline,-legal/copyright,-build/include_order" # ignore build/include_order since it conflicts with .clang-format + ] + # CMake hooks + - repo: local + hooks: + - id: ament_lint_cmake + name: ament_lint_cmake + description: Check format of CMakeLists.txt files. + entry: ament_lint_cmake + language: system + files: CMakeLists\.txt$ # Spellcheck in comments and docs - repo: https://github.com/codespell-project/codespell rev: v2.4.1 hooks: - id: codespell - args: ['--write-changes', '--ignore-words-list=theses,fom'] + args: ['--write-changes', '--ignore-words-list=theses,fom,NED,ENU'] ci: autoupdate_schedule: quarterly From de54ca0f9104f09116cccf97463769477adf25e7 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Thu, 19 Mar 2026 12:34:28 +0100 Subject: [PATCH 25/29] refactor: apply pre-commit changes --- .github/workflows/code-coverage.yml | 2 +- .gitignore | 2 +- .pre-commit-config.yaml | 4 +- .../depth_image_processing.hpp | 41 +- include/valve_detection/pose_estimator.hpp | 108 +- include/valve_detection/types.hpp | 28 +- .../valve_detection_ros/valve_pose_ros.hpp | 177 ++-- launch/valve_detection.launch.py | 5 +- src/depth_image_processing.cpp | 530 +++++----- src/pose_estimator.cpp | 355 +++---- src/valve_pose_ros.cpp | 924 +++++++++--------- 11 files changed, 1145 insertions(+), 1031 deletions(-) diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index 8bd9d77..7b9b6db 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -9,4 +9,4 @@ jobs: code-coverage: uses: vortexntnu/vortex-ci/.github/workflows/reusable-code-coverage.yml@main secrets: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} # Set in the repository secrets \ No newline at end of file + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} # Set in the repository secrets diff --git a/.gitignore b/.gitignore index dbe9c82..1d74e21 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -.vscode/ \ No newline at end of file +.vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 025948c..6609d6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,7 +57,7 @@ repos: "--ignore=D100,D101,D102,D103,D104,D105,D106,D107,D401", "--fix", ] - stages: [commit] + stages: [pre-commit] pass_filenames: true - id: ruff name: ruff-check @@ -66,7 +66,7 @@ repos: "--ignore=T201,N812,B006,S101,S311,S607,S603", "--fix" ] - stages: [commit] + stages: [pre-commit] pass_filenames: true # C++ hooks - repo: https://github.com/pre-commit/mirrors-clang-format diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp index 3552851..4b03fa1 100644 --- a/include/valve_detection/depth_image_processing.hpp +++ b/include/valve_detection/depth_image_processing.hpp @@ -1,22 +1,26 @@ #pragma once -#include "valve_detection/types.hpp" -#include #include #include +#include +#include "valve_detection/types.hpp" namespace valve_detection { -void project_pixel_to_point( - int u, int v, float depth, - double fx, double fy, double cx, double cy, - pcl::PointXYZ& out); +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& out); void extract_annulus_pcl( - const cv::Mat& depth_image, // CV_32FC1 meters - const BoundingBox& bbox, // in ORIGINAL image pixels + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox, // in ORIGINAL image pixels const ImageProperties& img_props, - float annulus_radius_ratio, // inner radius = outer*ratio + float annulus_radius_ratio, // inner radius = outer*ratio pcl::PointCloud::Ptr& out); // Hardcoded depth-to-color extrinsic for Intel RealSense D555. @@ -31,8 +35,8 @@ DepthColorExtrinsic d555_depth_to_color_extrinsic(); // projection falls inside the annulus. Output points are in the color // camera frame. void extract_annulus_pcl_aligned( - const cv::Mat& depth_image, // CV_32FC1 meters, depth frame - const BoundingBox& color_bbox, // annulus defined in color pixels + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // annulus defined in color pixels const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extrinsic, @@ -42,8 +46,8 @@ void extract_annulus_pcl_aligned( // Extracts all valid depth points whose color-frame projection falls inside // the oriented bounding box. Output points are in the color camera frame. void extract_bbox_pcl_aligned( - const cv::Mat& depth_image, // CV_32FC1 meters, depth frame - const BoundingBox& color_bbox, // OBB defined in color pixels + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extrinsic, @@ -52,10 +56,11 @@ void extract_bbox_pcl_aligned( // Project a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). -cv::Point2f project_color_pixel_to_depth( - float u_c, float v_c, float Z, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr); +cv::Point2f project_color_pixel_to_depth(float u_c, + float v_c, + float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); } // namespace valve_detection diff --git a/include/valve_detection/pose_estimator.hpp b/include/valve_detection/pose_estimator.hpp index dad26cb..18b620e 100644 --- a/include/valve_detection/pose_estimator.hpp +++ b/include/valve_detection/pose_estimator.hpp @@ -1,78 +1,82 @@ #pragma once -#include "valve_detection/types.hpp" #include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/types.hpp" -#include -#include -#include #include #include -#include #include #include +#include #include #include +#include +#include +#include namespace valve_detection { class PoseEstimator { -public: - PoseEstimator(int yolo_img_width, - int yolo_img_height, - float annulus_radius_ratio, - float plane_ransac_threshold, - int plane_ransac_max_iterations, - float valve_handle_offset); + public: + PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset); - void set_color_image_properties(const ImageProperties& props); - void set_depth_image_properties(const ImageProperties& props); - void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); + void set_color_image_properties(const ImageProperties& props); + void set_depth_image_properties(const ImageProperties& props); + void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); - void calculate_letterbox_padding(); - BoundingBox transform_bounding_box(const BoundingBox& bbox) const; + void calculate_letterbox_padding(); + BoundingBox transform_bounding_box(const BoundingBox& bbox) const; - bool compute_pose_from_depth( - const cv::Mat& depth_image, // CV_32FC1 meters - const BoundingBox& bbox_org, // in original image pixels - Pose& out_pose, - pcl::PointCloud::Ptr annulus_dbg, - pcl::PointCloud::Ptr plane_dbg, - bool debug_visualize) const; + bool compute_pose_from_depth( + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox_org, // in original image pixels + Pose& out_pose, + pcl::PointCloud::Ptr annulus_dbg, + pcl::PointCloud::Ptr plane_dbg, + bool debug_visualize) const; -private: - bool segment_plane(const pcl::PointCloud::Ptr& cloud, - pcl::ModelCoefficients::Ptr& coeff, - pcl::PointIndices::Ptr& inliers) const; + private: + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coeff, + pcl::PointIndices::Ptr& inliers) const; - Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; - Eigen::Vector3f compute_plane_normal(const pcl::ModelCoefficients::Ptr& coeff, - const Eigen::Vector3f& ray_direction) const; - Eigen::Vector3f find_ray_plane_intersection(const pcl::ModelCoefficients::Ptr& coeff, - const Eigen::Vector3f& ray_direction) const; - Eigen::Vector3f shift_point_along_normal(const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const; - Eigen::Matrix3f create_rotation_matrix(const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& plane_normal, - float angle) const; + Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; + Eigen::Vector3f compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + Eigen::Matrix3f create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle) const; - ImageProperties color_image_properties_{}; - ImageProperties depth_image_properties_{}; - DepthColorExtrinsic depth_color_extrinsic_{}; - bool has_depth_props_{false}; + ImageProperties color_image_properties_{}; + ImageProperties depth_image_properties_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + bool has_depth_props_{false}; - int yolo_img_width_{640}; - int yolo_img_height_{640}; - float annulus_radius_ratio_{0.8f}; - float plane_ransac_threshold_{0.01f}; - int plane_ransac_max_iterations_{50}; - float valve_handle_offset_{0.05f}; + int yolo_img_width_{640}; + int yolo_img_height_{640}; + float annulus_radius_ratio_{0.8f}; + float plane_ransac_threshold_{0.01f}; + int plane_ransac_max_iterations_{50}; + float valve_handle_offset_{0.05f}; - double letterbox_scale_factor_{1.0}; - double letterbox_pad_x_{0}; - double letterbox_pad_y_{0}; + double letterbox_scale_factor_{1.0}; + double letterbox_pad_x_{0}; + double letterbox_pad_y_{0}; - mutable Eigen::Vector3f filter_direction_{1, 0, 0}; + mutable Eigen::Vector3f filter_direction_{1, 0, 0}; }; } // namespace valve_detection diff --git a/include/valve_detection/types.hpp b/include/valve_detection/types.hpp index 5ae25a4..214b0f5 100644 --- a/include/valve_detection/types.hpp +++ b/include/valve_detection/types.hpp @@ -5,36 +5,36 @@ namespace valve_detection { struct CameraIntrinsics { - double fx{0}, fy{0}, cx{0}, cy{0}; + double fx{0}, fy{0}, cx{0}, cy{0}; }; struct ImageDimensions { - int width{0}, height{0}; + int width{0}, height{0}; }; struct ImageProperties { - CameraIntrinsics intr; - ImageDimensions dim; + CameraIntrinsics intr; + ImageDimensions dim; }; struct BoundingBox { - float center_x{0}; - float center_y{0}; - float size_x{0}; - float size_y{0}; - float theta{0}; // radians + float center_x{0}; + float center_y{0}; + float size_x{0}; + float size_y{0}; + float theta{0}; // radians }; struct Pose { - Eigen::Vector3f position{Eigen::Vector3f::Zero()}; - Eigen::Quaternionf orientation{Eigen::Quaternionf::Identity()}; + Eigen::Vector3f position{Eigen::Vector3f::Zero()}; + Eigen::Quaternionf orientation{Eigen::Quaternionf::Identity()}; }; // Rigid transform from depth camera frame to color camera frame. // Rotation R and translation t satisfy: P_color = R * P_depth + t struct DepthColorExtrinsic { - Eigen::Matrix3f R{Eigen::Matrix3f::Identity()}; - Eigen::Vector3f t{Eigen::Vector3f::Zero()}; + Eigen::Matrix3f R{Eigen::Matrix3f::Identity()}; + Eigen::Vector3f t{Eigen::Vector3f::Zero()}; }; -} // namespace valve_detection \ No newline at end of file +} // namespace valve_detection diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index 49d96a0..2a843c6 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -4,111 +4,120 @@ #include #include +#include +#include #include #include #include #include -#include -#include #include #include #include -#include #include +#include -#include "valve_detection/pose_estimator.hpp" #include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/pose_estimator.hpp" #include "valve_detection/types.hpp" +#include +#include +#include #include "vortex_msgs/msg/landmark.hpp" #include "vortex_msgs/msg/landmark_array.hpp" -#include "vortex_msgs/msg/landmark_type.hpp" #include "vortex_msgs/msg/landmark_subtype.hpp" +#include "vortex_msgs/msg/landmark_type.hpp" namespace valve_detection { class ValvePoseNode : public rclcpp::Node { -public: - explicit ValvePoseNode(const rclcpp::NodeOptions& options); - -private: - void depth_camera_info_cb(const sensor_msgs::msg::CameraInfo::SharedPtr msg); - - BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& bbox) const; - - // Returns undistorted copy of bbox (center_x/y corrected for lens distortion). - BoundingBox undistort_bbox(const BoundingBox& bbox) const; - - // Non-maximum suppression: returns indices of kept detections (max 2). - // Two boxes are duplicates when IoMin (intersection / min-area) or IoU - // exceeds iou_duplicate_threshold_. - std::vector filter_duplicate_detections( - const vision_msgs::msg::Detection2DArray& det) const; - - void publish_pose_array(const std::vector& poses, - const std_msgs::msg::Header& header); - - void publish_landmarks(const std::vector& poses, - const std_msgs::msg::Header& header); - - // Sync callback: depth + color + det - void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, - const sensor_msgs::msg::Image::ConstSharedPtr& color, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); - - using SyncPolicy = message_filters::sync_policies::ApproximateTime< - sensor_msgs::msg::Image, - sensor_msgs::msg::Image, - vision_msgs::msg::Detection2DArray>; - - bool got_info_{false}; - bool got_depth_info_{false}; - - // params - bool use_color_image_{true}; - bool visualize_detections_{true}; - bool debug_visualize_{false}; - float iou_duplicate_threshold_{0.5f}; - std::string output_frame_id_{}; - - int landmark_type_{1}; - int landmark_subtype_{0}; - - // distortion (populated from color camera_info) - cv::Mat cv_camera_matrix_; - cv::Mat cv_dist_coeffs_; - - // camera data (owned by ROS node, passed to estimator and depth functions) - ImageProperties color_props_{}; - ImageProperties depth_props_{}; - DepthColorExtrinsic depth_color_extrinsic_{}; - - // estimator - std::unique_ptr detector_; - - // subs - rclcpp::Subscription::SharedPtr depth_cam_info_sub_; - message_filters::Subscriber depth_sub_; - message_filters::Subscriber color_sub_; - message_filters::Subscriber det_sub_; - std::shared_ptr> sync_; - - // pubs - rclcpp::Publisher::SharedPtr pose_pub_; - rclcpp::Publisher::SharedPtr pose_stamped_pub_; - rclcpp::Publisher::SharedPtr landmark_pub_; - rclcpp::Publisher::SharedPtr image_pub_; - rclcpp::Publisher::SharedPtr depth_colormap_pub_; - - float depth_colormap_vmin_{0.1f}; - float depth_colormap_vmax_{1125.5f}; - - rclcpp::Publisher::SharedPtr annulus_pub_; - rclcpp::Publisher::SharedPtr plane_pub_; - rclcpp::Publisher::SharedPtr valve_points_pub_; - rclcpp::Publisher::SharedPtr depth_cloud_pub_; + public: + explicit ValvePoseNode(const rclcpp::NodeOptions& options); + + private: + void depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); + + BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& bbox) const; + + // Returns undistorted copy of bbox (center_x/y corrected for lens + // distortion). + BoundingBox undistort_bbox(const BoundingBox& bbox) const; + + // Non-maximum suppression: returns indices of kept detections (max 2). + // Two boxes are duplicates when IoMin (intersection / min-area) or IoU + // exceeds iou_duplicate_threshold_. + std::vector filter_duplicate_detections( + const vision_msgs::msg::Detection2DArray& det) const; + + void publish_pose_array(const std::vector& poses, + const std_msgs::msg::Header& header); + + void publish_landmarks(const std::vector& poses, + const std_msgs::msg::Header& header); + + // Sync callback: depth + color + det + void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const sensor_msgs::msg::Image::ConstSharedPtr& color, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); + + using SyncPolicy = message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::Image, + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray>; + + bool got_info_{false}; + bool got_depth_info_{false}; + + // params + bool use_color_image_{true}; + bool visualize_detections_{true}; + bool debug_visualize_{false}; + float iou_duplicate_threshold_{0.5f}; + std::string output_frame_id_{}; + + int landmark_type_{1}; + int landmark_subtype_{0}; + + // distortion (populated from color camera_info) + cv::Mat cv_camera_matrix_; + cv::Mat cv_dist_coeffs_; + + // camera data (owned by ROS node, passed to estimator and depth functions) + ImageProperties color_props_{}; + ImageProperties depth_props_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + + // estimator + std::unique_ptr detector_; + + // subs + rclcpp::Subscription::SharedPtr + depth_cam_info_sub_; + message_filters::Subscriber depth_sub_; + message_filters::Subscriber color_sub_; + message_filters::Subscriber det_sub_; + std::shared_ptr> sync_; + + // pubs + rclcpp::Publisher::SharedPtr pose_pub_; + rclcpp::Publisher::SharedPtr + pose_stamped_pub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + rclcpp::Publisher::SharedPtr image_pub_; + rclcpp::Publisher::SharedPtr depth_colormap_pub_; + + float depth_colormap_vmin_{0.1f}; + float depth_colormap_vmax_{1125.5f}; + + rclcpp::Publisher::SharedPtr annulus_pub_; + rclcpp::Publisher::SharedPtr plane_pub_; + rclcpp::Publisher::SharedPtr + valve_points_pub_; + rclcpp::Publisher::SharedPtr + depth_cloud_pub_; }; } // namespace valve_detection diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index b66d262..d385332 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -1,9 +1,12 @@ import os + from ament_index_python.packages import get_package_share_directory -from launch import LaunchDescription from launch_ros.actions import ComposableNodeContainer from launch_ros.descriptions import ComposableNode +from launch import LaunchDescription + + def generate_launch_description(): cfg = os.path.join( get_package_share_directory("valve_detection"), diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp index 80db73e..ba2b6ca 100644 --- a/src/depth_image_processing.cpp +++ b/src/depth_image_processing.cpp @@ -1,5 +1,6 @@ // depth_image_processing.cpp -// Depth-to-3D back-projection, point cloud extraction, and color-to-depth pixel projection. +// Depth-to-3D back-projection, point cloud extraction, and color-to-depth pixel +// projection. #include "valve_detection/depth_image_processing.hpp" #include #include @@ -7,275 +8,302 @@ namespace valve_detection { -// Back-projects a depth pixel (u, v, depth) to a 3D point using camera intrinsics. -void project_pixel_to_point( - int u, int v, float depth, - double fx, double fy, double cx, double cy, - pcl::PointXYZ& out) -{ - if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { - out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); - return; - } - out.x = static_cast((u - cx) * depth / fx); - out.y = static_cast((v - cy) * depth / fy); - out.z = depth; +// Back-projects a depth pixel (u, v, depth) to a 3D point using camera +// intrinsics. +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& out) { + if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { + out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); + return; + } + out.x = static_cast((u - cx) * depth / fx); + out.y = static_cast((v - cy) * depth / fy); + out.z = depth; } -// Extracts depth points that fall inside an elliptic annulus around the bbox center (no alignment). -void extract_annulus_pcl( - const cv::Mat& depth_image, - const BoundingBox& bbox, - const ImageProperties& img_props, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& out) -{ - out->clear(); - - const float cx = bbox.center_x; - const float cy = bbox.center_y; - const float outer_rx = bbox.size_x * 0.5f; - const float outer_ry = bbox.size_y * 0.5f; - - if (outer_rx < 2.0f || outer_ry < 2.0f) return; - - const float inner_rx = outer_rx * annulus_radius_ratio; - const float inner_ry = outer_ry * annulus_radius_ratio; - - const int u0 = static_cast(std::floor(cx - outer_rx)); - const int u1 = static_cast(std::ceil (cx + outer_rx)); - const int v0 = static_cast(std::floor(cy - outer_ry)); - const int v1 = static_cast(std::ceil (cy + outer_ry)); - - for (int v = v0; v <= v1; ++v) { - if (v < 0 || v >= depth_image.rows) continue; - for (int u = u0; u <= u1; ++u) { - if (u < 0 || u >= depth_image.cols) continue; - - const float dxo = (u - cx) / outer_rx; - const float dyo = (v - cy) / outer_ry; - const bool inside_outer = (dxo*dxo + dyo*dyo) <= 1.0f; - - const float dxi = (u - cx) / inner_rx; - const float dyi = (v - cy) / inner_ry; - const bool outside_inner = (dxi*dxi + dyi*dyi) > 1.0f; - - if (!inside_outer || !outside_inner) continue; - - const float z = depth_image.at(v, u); - pcl::PointXYZ p; - project_pixel_to_point( - u, v, z, - img_props.intr.fx, img_props.intr.fy, - img_props.intr.cx, img_props.intr.cy, - p); - - if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { - out->points.push_back(p); - } +// Extracts depth points that fall inside an elliptic annulus around the bbox +// center (no alignment). +void extract_annulus_pcl(const cv::Mat& depth_image, + const BoundingBox& bbox, + const ImageProperties& img_props, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + const float cx = bbox.center_x; + const float cy = bbox.center_y; + const float outer_rx = bbox.size_x * 0.5f; + const float outer_ry = bbox.size_y * 0.5f; + + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + const int u0 = static_cast(std::floor(cx - outer_rx)); + const int u1 = static_cast(std::ceil(cx + outer_rx)); + const int v0 = static_cast(std::floor(cy - outer_ry)); + const int v1 = static_cast(std::ceil(cy + outer_ry)); + + for (int v = v0; v <= v1; ++v) { + if (v < 0 || v >= depth_image.rows) + continue; + for (int u = u0; u <= u1; ++u) { + if (u < 0 || u >= depth_image.cols) + continue; + + const float dxo = (u - cx) / outer_rx; + const float dyo = (v - cy) / outer_ry; + const bool inside_outer = (dxo * dxo + dyo * dyo) <= 1.0f; + + const float dxi = (u - cx) / inner_rx; + const float dyi = (v - cy) / inner_ry; + const bool outside_inner = (dxi * dxi + dyi * dyi) > 1.0f; + + if (!inside_outer || !outside_inner) + continue; + + const float z = depth_image.at(v, u); + pcl::PointXYZ p; + project_pixel_to_point(u, v, z, img_props.intr.fx, + img_props.intr.fy, img_props.intr.cx, + img_props.intr.cy, p); + + if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { + out->points.push_back(p); + } + } } - } - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; } // Returns hardcoded depth-to-color extrinsic for Intel RealSense D555. // Verify with: ros2 topic echo /realsense/extrinsics/depth_to_color DepthColorExtrinsic d555_depth_to_color_extrinsic() { - DepthColorExtrinsic extr; - // Both depth and color optical frames share the same body-to-optical - // rotation, so R is identity. - extr.R = Eigen::Matrix3f::Identity(); - // t = depth origin expressed in color optical frame. - // Color is 0.059 m in -y (rightward) of depth in body frame, so depth is - // +0.059 m in y (leftward) of color. Converting to optical frame: - // t_optical = R_body_to_optical * [0, +0.059, 0] - // = [[0,-1,0],[0,0,-1],[1,0,0]] * [0, 0.059, 0] - // = [-0.059, 0, 0] - extr.t = Eigen::Vector3f(-0.059f, 0.0f, 0.0f); - return extr; + DepthColorExtrinsic extr; + // Both depth and color optical frames share the same body-to-optical + // rotation, so R is identity. + extr.R = Eigen::Matrix3f::Identity(); + // t = depth origin expressed in color optical frame. + // Color is 0.059 m in -y (rightward) of depth in body frame, so depth is + // +0.059 m in y (leftward) of color. Converting to optical frame: + // t_optical = R_body_to_optical * [0, +0.059, 0] + // = [[0,-1,0],[0,0,-1],[1,0,0]] * [0, 0.059, 0] + // = [-0.059, 0, 0] + extr.t = Eigen::Vector3f(-0.059f, 0.0f, 0.0f); + return extr; } -// Like extract_annulus_pcl but transforms depth pixels into the color frame before the annulus test. -void extract_annulus_pcl_aligned( - const cv::Mat& depth_image, - const BoundingBox& color_bbox, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr, - float annulus_radius_ratio, - pcl::PointCloud::Ptr& out) -{ - out->clear(); - - const float cx_c = color_bbox.center_x; - const float cy_c = color_bbox.center_y; - const float outer_rx = color_bbox.size_x * 0.5f; - const float outer_ry = color_bbox.size_y * 0.5f; - - if (outer_rx < 2.0f || outer_ry < 2.0f) return; - - const float inner_rx = outer_rx * annulus_radius_ratio; - const float inner_ry = outer_ry * annulus_radius_ratio; - - // Approximate scale between depth and color focal lengths to define a - // coarse search region in depth-image coordinates. A 30-pixel margin - // accounts for the lateral offset introduced by the extrinsic. - const float scale = (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) - ? static_cast(depth_props.intr.fx / color_props.intr.fx) - : 1.0f; - constexpr int kMargin = 30; - - const int u0_d = std::max(0, - static_cast((cx_c - outer_rx) * scale) - kMargin); - const int u1_d = std::min(depth_image.cols - 1, - static_cast((cx_c + outer_rx) * scale) + kMargin); - const int v0_d = std::max(0, - static_cast((cy_c - outer_ry) * scale) - kMargin); - const int v1_d = std::min(depth_image.rows - 1, - static_cast((cy_c + outer_ry) * scale) + kMargin); - - for (int v_d = v0_d; v_d <= v1_d; ++v_d) { - for (int u_d = u0_d; u_d <= u1_d; ++u_d) { - const float z_d = depth_image.at(v_d, u_d); - if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) continue; - - // Back-project using depth intrinsics. - Eigen::Vector3f P_d; - P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / depth_props.intr.fx); - P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / depth_props.intr.fy); - P_d.z() = z_d; - - // Transform into color camera frame. - const Eigen::Vector3f P_c = extr.R * P_d + extr.t; - if (P_c.z() <= 0.0f) continue; - - // Project onto color image plane. - const float u_c = static_cast( - color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); - const float v_c = static_cast( - color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); - - // Elliptic annulus test in color-image space. - const float dxo = (u_c - cx_c) / outer_rx; - const float dyo = (v_c - cy_c) / outer_ry; - if (dxo*dxo + dyo*dyo > 1.0f) continue; // outside outer ellipse - - const float dxi = (u_c - cx_c) / inner_rx; - const float dyi = (v_c - cy_c) / inner_ry; - if (dxi*dxi + dyi*dyi <= 1.0f) continue; // inside inner ellipse - - pcl::PointXYZ p; - p.x = P_c.x(); - p.y = P_c.y(); - p.z = P_c.z(); - out->points.push_back(p); +// Like extract_annulus_pcl but transforms depth pixels into the color frame +// before the annulus test. +void extract_annulus_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + const float cx_c = color_bbox.center_x; + const float cy_c = color_bbox.center_y; + const float outer_rx = color_bbox.size_x * 0.5f; + const float outer_ry = color_bbox.size_y * 0.5f; + + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // Approximate scale between depth and color focal lengths to define a + // coarse search region in depth-image coordinates. A 30-pixel margin + // accounts for the lateral offset introduced by the extrinsic. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + constexpr int kMargin = 30; + + const int u0_d = + std::max(0, static_cast((cx_c - outer_rx) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((cx_c + outer_rx) * scale) + kMargin); + const int v0_d = + std::max(0, static_cast((cy_c - outer_ry) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((cy_c + outer_ry) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project using depth intrinsics. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Transform into color camera frame. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) + continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Elliptic annulus test in color-image space. + const float dxo = (u_c - cx_c) / outer_rx; + const float dyo = (v_c - cy_c) / outer_ry; + if (dxo * dxo + dyo * dyo > 1.0f) + continue; // outside outer ellipse + + const float dxi = (u_c - cx_c) / inner_rx; + const float dyi = (v_c - cy_c) / inner_ry; + if (dxi * dxi + dyi * dyi <= 1.0f) + continue; // inside inner ellipse + + pcl::PointXYZ p; + p.x = P_c.x(); + p.y = P_c.y(); + p.z = P_c.z(); + out->points.push_back(p); + } } - } - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; } -// Extracts depth points whose color-frame projection falls inside the oriented bounding box. -void extract_bbox_pcl_aligned( - const cv::Mat& depth_image, - const BoundingBox& color_bbox, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr, - pcl::PointCloud::Ptr& out) -{ - out->clear(); - - // Pre-compute the OBB axes in color-image space for a fast rotated-rect test. - const float cos_t = std::cos(color_bbox.theta); - const float sin_t = std::sin(color_bbox.theta); - const float half_w = color_bbox.size_x * 0.5f; - const float half_h = color_bbox.size_y * 0.5f; - - // Approximate search region in depth-image space. - const float scale = (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) - ? static_cast(depth_props.intr.fx / color_props.intr.fx) - : 1.0f; - const float r = std::sqrt(half_w * half_w + half_h * half_h); - constexpr int kMargin = 30; - - const int u0_d = std::max(0, - static_cast((color_bbox.center_x - r) * scale) - kMargin); - const int u1_d = std::min(depth_image.cols - 1, - static_cast((color_bbox.center_x + r) * scale) + kMargin); - const int v0_d = std::max(0, - static_cast((color_bbox.center_y - r) * scale) - kMargin); - const int v1_d = std::min(depth_image.rows - 1, - static_cast((color_bbox.center_y + r) * scale) + kMargin); - - for (int v_d = v0_d; v_d <= v1_d; ++v_d) { - for (int u_d = u0_d; u_d <= u1_d; ++u_d) { - const float z_d = depth_image.at(v_d, u_d); - if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) continue; - - // Back-project to depth camera frame. - Eigen::Vector3f P_d; - P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / depth_props.intr.fx); - P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / depth_props.intr.fy); - P_d.z() = z_d; - - // Transform to color camera frame. - const Eigen::Vector3f P_c = extr.R * P_d + extr.t; - if (P_c.z() <= 0.0f) continue; - - // Project onto color image plane. - const float u_c = static_cast( - color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); - const float v_c = static_cast( - color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); - - // Rotate into the OBB local frame and test against the half-extents. - const float dx = u_c - color_bbox.center_x; - const float dy = v_c - color_bbox.center_y; - const float local_x = cos_t * dx + sin_t * dy; - const float local_y = -sin_t * dx + cos_t * dy; - - if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) continue; - - out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); +// Extracts depth points whose color-frame projection falls inside the oriented +// bounding box. +void extract_bbox_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Pre-compute the OBB axes in color-image space for a fast rotated-rect + // test. + const float cos_t = std::cos(color_bbox.theta); + const float sin_t = std::sin(color_bbox.theta); + const float half_w = color_bbox.size_x * 0.5f; + const float half_h = color_bbox.size_y * 0.5f; + + // Approximate search region in depth-image space. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const float r = std::sqrt(half_w * half_w + half_h * half_h); + constexpr int kMargin = 30; + + const int u0_d = std::max( + 0, static_cast((color_bbox.center_x - r) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((color_bbox.center_x + r) * scale) + kMargin); + const int v0_d = std::max( + 0, static_cast((color_bbox.center_y - r) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((color_bbox.center_y + r) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Transform to color camera frame. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) + continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Rotate into the OBB local frame and test against the + // half-extents. + const float dx = u_c - color_bbox.center_x; + const float dy = v_c - color_bbox.center_y; + const float local_x = cos_t * dx + sin_t * dy; + const float local_y = -sin_t * dx + cos_t * dy; + + if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) + continue; + + out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); + } } - } - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; } -// Projects a color image pixel to depth image coordinates using the full intrinsic + extrinsic pipeline. -cv::Point2f project_color_pixel_to_depth( - float u_c, float v_c, float Z, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr) -{ - // Back-project color pixel → 3-D point in color camera frame. - const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z - / static_cast(color_props.intr.fx); - const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z - / static_cast(color_props.intr.fy); - const Eigen::Vector3f Pc(Xc, Yc, Z); - - // Transform to depth camera frame: P_depth = R^T * (P_color - t) - const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); - - if (Pd.z() <= 0.0f) return {u_c, v_c}; // degenerate – return original - - // Project into depth image. - const float u_d = static_cast(depth_props.intr.fx) * Pd.x() / Pd.z() - + static_cast(depth_props.intr.cx); - const float v_d = static_cast(depth_props.intr.fy) * Pd.y() / Pd.z() - + static_cast(depth_props.intr.cy); - return {u_d, v_d}; +// Projects a color image pixel to depth image coordinates using the full +// intrinsic + extrinsic pipeline. +cv::Point2f project_color_pixel_to_depth(float u_c, + float v_c, + float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr) { + // Back-project color pixel → 3-D point in color camera frame. + const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z / + static_cast(color_props.intr.fx); + const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z / + static_cast(color_props.intr.fy); + const Eigen::Vector3f Pc(Xc, Yc, Z); + + // Transform to depth camera frame: P_depth = R^T * (P_color - t) + const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); + + if (Pd.z() <= 0.0f) + return {u_c, v_c}; // degenerate – return original + + // Project into depth image. + const float u_d = + static_cast(depth_props.intr.fx) * Pd.x() / Pd.z() + + static_cast(depth_props.intr.cx); + const float v_d = + static_cast(depth_props.intr.fy) * Pd.y() / Pd.z() + + static_cast(depth_props.intr.cy); + return {u_d, v_d}; } } // namespace valve_detection diff --git a/src/pose_estimator.cpp b/src/pose_estimator.cpp index ad7e2fc..d899857 100644 --- a/src/pose_estimator.cpp +++ b/src/pose_estimator.cpp @@ -4,7 +4,8 @@ namespace valve_detection { -// Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle offset. +// Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle +// offset. PoseEstimator::PoseEstimator(int yolo_img_width, int yolo_img_height, float annulus_radius_ratio, @@ -20,230 +21,242 @@ PoseEstimator::PoseEstimator(int yolo_img_width, // Stores color camera intrinsics and image dimensions. void PoseEstimator::set_color_image_properties(const ImageProperties& props) { - color_image_properties_ = props; + color_image_properties_ = props; } // Stores depth camera intrinsics and image dimensions. void PoseEstimator::set_depth_image_properties(const ImageProperties& props) { - depth_image_properties_ = props; - has_depth_props_ = (props.intr.fx > 0.0); + depth_image_properties_ = props; + has_depth_props_ = (props.intr.fx > 0.0); } // Stores the depth-to-color extrinsic transform. void PoseEstimator::set_depth_color_extrinsic(const DepthColorExtrinsic& extr) { - depth_color_extrinsic_ = extr; + depth_color_extrinsic_ = extr; } -// Computes letterbox scale factor and padding from the color image and YOLO dimensions. +// Computes letterbox scale factor and padding from the color image and YOLO +// dimensions. void PoseEstimator::calculate_letterbox_padding() { - int org_image_width = color_image_properties_.dim.width; - int org_image_height = color_image_properties_.dim.height; + int org_image_width = color_image_properties_.dim.width; + int org_image_height = color_image_properties_.dim.height; - letterbox_scale_factor_ = - std::min(static_cast(yolo_img_width_) / org_image_width, - static_cast(yolo_img_height_) / org_image_height); + letterbox_scale_factor_ = + std::min(static_cast(yolo_img_width_) / org_image_width, + static_cast(yolo_img_height_) / org_image_height); - double resized_width = org_image_width * letterbox_scale_factor_; - double resized_height = org_image_height * letterbox_scale_factor_; + double resized_width = org_image_width * letterbox_scale_factor_; + double resized_height = org_image_height * letterbox_scale_factor_; - letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; - letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; + letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; } -// Remaps a bounding box from YOLO letterbox space back to original image coordinates. -BoundingBox PoseEstimator::transform_bounding_box(const BoundingBox& bbox) const { - BoundingBox transformed = bbox; - transformed.center_x = (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; - transformed.center_y = (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; - transformed.size_x /= letterbox_scale_factor_; - transformed.size_y /= letterbox_scale_factor_; - return transformed; +// Remaps a bounding box from YOLO letterbox space back to original image +// coordinates. +BoundingBox PoseEstimator::transform_bounding_box( + const BoundingBox& bbox) const { + BoundingBox transformed = bbox; + transformed.center_x = + (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed.center_y = + (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed.size_x /= letterbox_scale_factor_; + transformed.size_y /= letterbox_scale_factor_; + return transformed; } -// Fits a plane to the point cloud using RANSAC; returns false if no inliers are found. +// Fits a plane to the point cloud using RANSAC; returns false if no inliers are +// found. bool PoseEstimator::segment_plane( const pcl::PointCloud::Ptr& cloud, pcl::ModelCoefficients::Ptr& coefficients, - pcl::PointIndices::Ptr& inliers) const -{ - if (cloud->points.empty()) return false; - - pcl::SACSegmentation seg; - seg.setOptimizeCoefficients(true); - seg.setModelType(pcl::SACMODEL_PLANE); - seg.setMethodType(pcl::SAC_RANSAC); - seg.setDistanceThreshold(plane_ransac_threshold_); - seg.setMaxIterations(plane_ransac_max_iterations_); - seg.setInputCloud(cloud); - seg.segment(*inliers, *coefficients); - - return !inliers->indices.empty(); + pcl::PointIndices::Ptr& inliers) const { + if (cloud->points.empty()) + return false; + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); } -// Returns the normalized 3D ray direction from the camera origin through the bbox center. -Eigen::Vector3f PoseEstimator::get_ray_direction(const BoundingBox& bbox) const { - const float xc = (bbox.center_x - static_cast(color_image_properties_.intr.cx)) - / static_cast(color_image_properties_.intr.fx); - const float yc = (bbox.center_y - static_cast(color_image_properties_.intr.cy)) - / static_cast(color_image_properties_.intr.fy); - return Eigen::Vector3f(xc, yc, 1.0f).normalized(); +// Returns the normalized 3D ray direction from the camera origin through the +// bbox center. +Eigen::Vector3f PoseEstimator::get_ray_direction( + const BoundingBox& bbox) const { + const float xc = + (bbox.center_x - static_cast(color_image_properties_.intr.cx)) / + static_cast(color_image_properties_.intr.fx); + const float yc = + (bbox.center_y - static_cast(color_image_properties_.intr.cy)) / + static_cast(color_image_properties_.intr.fy); + return Eigen::Vector3f(xc, yc, 1.0f).normalized(); } // Extracts and normalizes the plane normal, flipping it to face the camera. Eigen::Vector3f PoseEstimator::compute_plane_normal( const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const -{ - if (!coefficients || coefficients->values.size() < 3) - return Eigen::Vector3f::Zero(); - - Eigen::Vector3f normal(coefficients->values[0], - coefficients->values[1], - coefficients->values[2]); - normal.normalize(); - - if (normal.dot(ray_direction) > 0.0f) normal = -normal; - return normal; + const Eigen::Vector3f& ray_direction) const { + if (!coefficients || coefficients->values.size() < 3) + return Eigen::Vector3f::Zero(); + + Eigen::Vector3f normal(coefficients->values[0], coefficients->values[1], + coefficients->values[2]); + normal.normalize(); + + if (normal.dot(ray_direction) > 0.0f) + normal = -normal; + return normal; } // Finds the 3D point where the viewing ray intersects the fitted plane. Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const -{ - if (!coefficients || coefficients->values.size() < 4) - return Eigen::Vector3f::Zero(); + const Eigen::Vector3f& ray_direction) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Vector3f::Zero(); - const Eigen::Vector3f plane_normal(coefficients->values[0], - coefficients->values[1], - coefficients->values[2]); - const float D = coefficients->values[3]; - const float denom = plane_normal.dot(ray_direction); + const Eigen::Vector3f plane_normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + const float D = coefficients->values[3]; + const float denom = plane_normal.dot(ray_direction); - if (std::abs(denom) < 1e-6f) return Eigen::Vector3f::Zero(); + if (std::abs(denom) < 1e-6f) + return Eigen::Vector3f::Zero(); - return (-D / denom) * ray_direction; + return (-D / denom) * ray_direction; } // Shifts a 3D point along the plane normal by the valve handle offset. Eigen::Vector3f PoseEstimator::shift_point_along_normal( const Eigen::Vector3f& intersection_point, - const Eigen::Vector3f& plane_normal) const -{ - return intersection_point + (plane_normal * valve_handle_offset_); + const Eigen::Vector3f& plane_normal) const { + return intersection_point + (plane_normal * valve_handle_offset_); } -// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected bbox angle (X). +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected bbox +// angle (X). Eigen::Matrix3f PoseEstimator::create_rotation_matrix( const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& plane_normal, - float angle) const -{ - if (!coefficients || coefficients->values.size() < 4) - return Eigen::Matrix3f::Identity(); - - const Eigen::Vector3f z_axis = plane_normal; - const float D = coefficients->values[3]; - const float fx = static_cast(color_image_properties_.intr.fx); - const float fy = static_cast(color_image_properties_.intr.fy); - const float cx = static_cast(color_image_properties_.intr.cx); - const float cy = static_cast(color_image_properties_.intr.cy); - - Eigen::Matrix3f K; - K << fx, 0, cx, 0, fy, cy, 0, 0, 1; - const Eigen::Matrix3f Kinv = K.inverse(); - - // Two image points along the bbox angle through the principal point. - const float len = 50.0f; - const Eigen::Vector3f p1(cx, cy, 1.f); - const Eigen::Vector3f p2(cx + len * std::cos(angle), - cy + len * std::sin(angle), 1.f); - - // Back project points to rays. - const Eigen::Vector3f r1 = (Kinv * p1).normalized(); - const Eigen::Vector3f r2 = (Kinv * p2).normalized(); - - // Compute intersections of rays with the plane. - const float denom1 = z_axis.dot(r1); - const float denom2 = z_axis.dot(r2); - if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) - return Eigen::Matrix3f::Identity(); - - const Eigen::Vector3f X1 = (-D / denom1) * r1; - const Eigen::Vector3f X2 = (-D / denom2) * r2; - - // Compute in-plane direction corresponding to the image line angle. - Eigen::Vector3f x_axis = (X2 - X1).normalized(); - - // Project onto the plane (for numerical stability). - x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); - - // Ensure consistent direction (avoid flipping frame between frames). - if (filter_direction_.dot(x_axis) < 0) x_axis = -x_axis; - filter_direction_ = x_axis; - - const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); - x_axis = y_axis.cross(z_axis).normalized(); - - Eigen::Matrix3f rot; - rot.col(0) = x_axis; // X_obj: direction of the image line - rot.col(1) = y_axis; // Y_obj: perpendicular in-plane - rot.col(2) = z_axis; // Z_obj: plane normal - return rot; + float angle) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back project points to rays. + const Eigen::Vector3f r1 = (Kinv * p1).normalized(); + const Eigen::Vector3f r2 = (Kinv * p2).normalized(); + + // Compute intersections of rays with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f X1 = (-D / denom1) * r1; + const Eigen::Vector3f X2 = (-D / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + + // Project onto the plane (for numerical stability). + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping frame between frames). + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; // X_obj: direction of the image line + rot.col(1) = y_axis; // Y_obj: perpendicular in-plane + rot.col(2) = z_axis; // Z_obj: plane normal + return rot; } -// Extracts a point cloud from the depth image, fits a plane, and returns the valve pose. +// Extracts a point cloud from the depth image, fits a plane, and returns the +// valve pose. bool PoseEstimator::compute_pose_from_depth( const cv::Mat& depth_image, const BoundingBox& bbox_org, Pose& out_pose, pcl::PointCloud::Ptr annulus_dbg, pcl::PointCloud::Ptr plane_dbg, - bool debug_visualize) const -{ - pcl::PointCloud::Ptr cloud(new pcl::PointCloud); - - if (has_depth_props_) { - extract_bbox_pcl_aligned( - depth_image, bbox_org, - color_image_properties_, depth_image_properties_, - depth_color_extrinsic_, - cloud); - } else { - extract_annulus_pcl( - depth_image, bbox_org, - color_image_properties_, - annulus_radius_ratio_, cloud); - } - - if (cloud->points.size() < 4) return false; - - if (debug_visualize && annulus_dbg) *annulus_dbg += *cloud; - - pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); - pcl::PointIndices::Ptr inliers(new pcl::PointIndices); - if (!segment_plane(cloud, coeff, inliers)) return false; - - if (debug_visualize && plane_dbg) { - for (int idx : inliers->indices) - plane_dbg->points.push_back(cloud->points[idx]); - plane_dbg->width = static_cast(plane_dbg->points.size()); - plane_dbg->height = 1; - plane_dbg->is_dense = false; - } - - const Eigen::Vector3f ray = get_ray_direction(bbox_org); - const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); - if (normal.isZero()) return false; - - const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); - if (pos.isZero()) return false; - - out_pose.position = shift_point_along_normal(pos, normal); - const Eigen::Matrix3f rot = create_rotation_matrix(coeff, normal, bbox_org.theta); - out_pose.orientation = Eigen::Quaternionf(rot).normalized(); - - return true; + bool debug_visualize) const { + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); + + if (has_depth_props_) { + extract_bbox_pcl_aligned(depth_image, bbox_org, color_image_properties_, + depth_image_properties_, + depth_color_extrinsic_, cloud); + } else { + extract_annulus_pcl(depth_image, bbox_org, color_image_properties_, + annulus_radius_ratio_, cloud); + } + + if (cloud->points.size() < 4) + return false; + + if (debug_visualize && annulus_dbg) + *annulus_dbg += *cloud; + + pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + if (!segment_plane(cloud, coeff, inliers)) + return false; + + if (debug_visualize && plane_dbg) { + for (int idx : inliers->indices) + plane_dbg->points.push_back(cloud->points[idx]); + plane_dbg->width = static_cast(plane_dbg->points.size()); + plane_dbg->height = 1; + plane_dbg->is_dense = false; + } + + const Eigen::Vector3f ray = get_ray_direction(bbox_org); + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return false; + + const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); + if (pos.isZero()) + return false; + + out_pose.position = shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = + create_rotation_matrix(coeff, normal, bbox_org.theta); + out_pose.orientation = Eigen::Quaternionf(rot).normalized(); + + return true; } } // namespace valve_detection diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index 62b784e..d3a7788 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -1,11 +1,12 @@ // valve_pose_ros.cpp -// ROS node: subscribes to depth, color, and detections; publishes valve poses and visualizations. +// ROS node: subscribes to depth, color, and detections; publishes valve poses +// and visualizations. #include "valve_detection_ros/valve_pose_ros.hpp" -#include #include #include #include +#include #include #include @@ -16,485 +17,536 @@ using std::placeholders::_1; using std::placeholders::_2; using std::placeholders::_3; -// Declares all ROS parameters, initializes the pose estimator, and sets up subscribers and publishers. +// Declares all ROS parameters, initializes the pose estimator, and sets up +// subscribers and publishers. ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) -: Node("valve_pose_node", options) -{ - const auto depth_topic = declare_parameter("depth_image_sub_topic"); - const auto color_topic = declare_parameter("color_image_sub_topic"); - const auto det_topic = declare_parameter("detections_sub_topic"); - const auto depth_info_topic = declare_parameter("depth_image_info_topic"); - - const auto pose_stamped_topic = declare_parameter("valve_pose_stamped_pub_topic"); - const auto pose_topic = declare_parameter("valve_poses_pub_topic"); - const auto lm_topic = declare_parameter("landmarks_pub_topic"); - const auto img_topic = declare_parameter("processed_image_pub_topic"); - const auto depth_color_topic = declare_parameter("depth_colormap_pub_topic"); - const auto valve_points_topic = declare_parameter("valve_points_pub_topic"); - const auto depth_cloud_topic = declare_parameter("depth_cloud_pub_topic"); - - depth_colormap_vmin_ = static_cast(declare_parameter("depth_colormap_value_min")); - depth_colormap_vmax_ = static_cast(declare_parameter("depth_colormap_value_max")); - - const int yolo_w = declare_parameter("yolo_img_width"); - const int yolo_h = declare_parameter("yolo_img_height"); - const float annulus_ratio = declare_parameter("annulus_radius_ratio"); - const float ransac_thresh = declare_parameter("plane_ransac_threshold"); - const int ransac_iters = declare_parameter("plane_ransac_max_iterations"); - const float handle_off = declare_parameter("valve_handle_offset"); - - use_color_image_ = declare_parameter("use_color_image"); - visualize_detections_ = declare_parameter("visualize_detections"); - debug_visualize_ = declare_parameter("debug_visualize"); - iou_duplicate_threshold_ = static_cast( - declare_parameter("iou_duplicate_threshold")); - output_frame_id_ = declare_parameter("output_frame_id"); - - landmark_type_ = declare_parameter("landmark_type"); - landmark_subtype_ = declare_parameter("landmark_subtype"); - - detector_ = std::make_unique( - yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); - - depth_color_extrinsic_ = d555_depth_to_color_extrinsic(); - detector_->set_depth_color_extrinsic(depth_color_extrinsic_); - - color_props_.intr.fx = declare_parameter("color_fx"); - color_props_.intr.fy = declare_parameter("color_fy"); - color_props_.intr.cx = declare_parameter("color_cx"); - color_props_.intr.cy = declare_parameter("color_cy"); - color_props_.dim.width = declare_parameter("color_image_width"); - color_props_.dim.height = declare_parameter("color_image_height"); - - detector_->set_color_image_properties(color_props_); - detector_->calculate_letterbox_padding(); - - cv_camera_matrix_ = (cv::Mat_(3, 3) - << color_props_.intr.fx, 0, color_props_.intr.cx, - 0, color_props_.intr.fy, color_props_.intr.cy, - 0, 0, 1); - - cv_dist_coeffs_ = (cv::Mat_(5, 1) - << declare_parameter("color_d1"), + : Node("valve_pose_node", options) { + const auto depth_topic = + declare_parameter("depth_image_sub_topic"); + const auto color_topic = + declare_parameter("color_image_sub_topic"); + const auto det_topic = + declare_parameter("detections_sub_topic"); + const auto depth_info_topic = + declare_parameter("depth_image_info_topic"); + + const auto pose_stamped_topic = + declare_parameter("valve_pose_stamped_pub_topic"); + const auto pose_topic = + declare_parameter("valve_poses_pub_topic"); + const auto lm_topic = declare_parameter("landmarks_pub_topic"); + const auto img_topic = + declare_parameter("processed_image_pub_topic"); + const auto depth_color_topic = + declare_parameter("depth_colormap_pub_topic"); + const auto valve_points_topic = + declare_parameter("valve_points_pub_topic"); + const auto depth_cloud_topic = + declare_parameter("depth_cloud_pub_topic"); + + depth_colormap_vmin_ = static_cast( + declare_parameter("depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast( + declare_parameter("depth_colormap_value_max")); + + const int yolo_w = declare_parameter("yolo_img_width"); + const int yolo_h = declare_parameter("yolo_img_height"); + const float annulus_ratio = + declare_parameter("annulus_radius_ratio"); + const float ransac_thresh = + declare_parameter("plane_ransac_threshold"); + const int ransac_iters = + declare_parameter("plane_ransac_max_iterations"); + const float handle_off = declare_parameter("valve_handle_offset"); + + use_color_image_ = declare_parameter("use_color_image"); + visualize_detections_ = declare_parameter("visualize_detections"); + debug_visualize_ = declare_parameter("debug_visualize"); + iou_duplicate_threshold_ = static_cast( + declare_parameter("iou_duplicate_threshold")); + output_frame_id_ = declare_parameter("output_frame_id"); + + landmark_type_ = declare_parameter("landmark_type"); + landmark_subtype_ = declare_parameter("landmark_subtype"); + + detector_ = std::make_unique( + yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); + + depth_color_extrinsic_ = d555_depth_to_color_extrinsic(); + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + + color_props_.intr.fx = declare_parameter("color_fx"); + color_props_.intr.fy = declare_parameter("color_fy"); + color_props_.intr.cx = declare_parameter("color_cx"); + color_props_.intr.cy = declare_parameter("color_cy"); + color_props_.dim.width = declare_parameter("color_image_width"); + color_props_.dim.height = declare_parameter("color_image_height"); + + detector_->set_color_image_properties(color_props_); + detector_->calculate_letterbox_padding(); + + cv_camera_matrix_ = (cv::Mat_(3, 3) << color_props_.intr.fx, 0, + color_props_.intr.cx, 0, color_props_.intr.fy, + color_props_.intr.cy, 0, 0, 1); + + cv_dist_coeffs_ = + (cv::Mat_(5, 1) << declare_parameter("color_d1"), declare_parameter("color_d2"), declare_parameter("color_d3"), declare_parameter("color_d4"), declare_parameter("color_d5")); - got_info_ = true; - RCLCPP_INFO(get_logger(), - "Color intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - color_props_.intr.fx, color_props_.intr.fy, - color_props_.intr.cx, color_props_.intr.cy); - - depth_props_.intr.fx = declare_parameter("depth_fx"); - depth_props_.intr.fy = declare_parameter("depth_fy"); - depth_props_.intr.cx = declare_parameter("depth_cx"); - depth_props_.intr.cy = declare_parameter("depth_cy"); - depth_props_.dim.width = declare_parameter("depth_image_width"); - depth_props_.dim.height = declare_parameter("depth_image_height"); - - detector_->set_depth_image_properties(depth_props_); - got_depth_info_ = true; - RCLCPP_INFO(get_logger(), - "Depth intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - depth_props_.intr.fx, depth_props_.intr.fy, - depth_props_.intr.cx, depth_props_.intr.cy); - - const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - - const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) - .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) - .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); - - // Color camera_info is not subscribed — intrinsics and distortion are - // always taken from config params above. - depth_cam_info_sub_ = create_subscription( - depth_info_topic, info_qos, - std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); - - pose_stamped_pub_ = create_publisher(pose_stamped_topic, qos); - pose_pub_ = create_publisher(pose_topic, qos); - landmark_pub_ = create_publisher(lm_topic, qos); - image_pub_ = create_publisher(img_topic, qos); - depth_colormap_pub_ = create_publisher(depth_color_topic, qos); - valve_points_pub_ = create_publisher(valve_points_topic, qos); - depth_cloud_pub_ = create_publisher(depth_cloud_topic, qos); - - if (debug_visualize_) { - const auto ann_topic = declare_parameter("debug_annulus_pub_topic"); - const auto pln_topic = declare_parameter("debug_plane_pub_topic"); - annulus_pub_ = create_publisher(ann_topic, qos); - plane_pub_ = create_publisher(pln_topic, qos); - } - - depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); - color_sub_.subscribe(this, color_topic, qos.get_rmw_qos_profile()); - det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); - - sync_ = std::make_shared>( - SyncPolicy(10), depth_sub_, color_sub_, det_sub_); - sync_->registerCallback( - std::bind(&ValvePoseNode::sync_cb, this, _1, _2, _3)); + got_info_ = true; + RCLCPP_INFO( + get_logger(), + "Color intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, color_props_.intr.cx, + color_props_.intr.cy); + + depth_props_.intr.fx = declare_parameter("depth_fx"); + depth_props_.intr.fy = declare_parameter("depth_fy"); + depth_props_.intr.cx = declare_parameter("depth_cx"); + depth_props_.intr.cy = declare_parameter("depth_cy"); + depth_props_.dim.width = declare_parameter("depth_image_width"); + depth_props_.dim.height = declare_parameter("depth_image_height"); + + detector_->set_depth_image_properties(depth_props_); + got_depth_info_ = true; + RCLCPP_INFO( + get_logger(), + "Depth intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + depth_props_.intr.fx, depth_props_.intr.fy, depth_props_.intr.cx, + depth_props_.intr.cy); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + const auto info_qos = + rclcpp::QoS(rclcpp::KeepLast(1)) + .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) + .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); + + // Color camera_info is not subscribed — intrinsics and distortion are + // always taken from config params above. + depth_cam_info_sub_ = create_subscription( + depth_info_topic, info_qos, + std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); + + pose_stamped_pub_ = create_publisher( + pose_stamped_topic, qos); + pose_pub_ = + create_publisher(pose_topic, qos); + landmark_pub_ = + create_publisher(lm_topic, qos); + image_pub_ = create_publisher(img_topic, qos); + depth_colormap_pub_ = + create_publisher(depth_color_topic, qos); + valve_points_pub_ = create_publisher( + valve_points_topic, qos); + depth_cloud_pub_ = + create_publisher(depth_cloud_topic, qos); + + if (debug_visualize_) { + const auto ann_topic = + declare_parameter("debug_annulus_pub_topic"); + const auto pln_topic = + declare_parameter("debug_plane_pub_topic"); + annulus_pub_ = + create_publisher(ann_topic, qos); + plane_pub_ = + create_publisher(pln_topic, qos); + } + + depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); + color_sub_.subscribe(this, color_topic, qos.get_rmw_qos_profile()); + det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); + + sync_ = std::make_shared>( + SyncPolicy(10), depth_sub_, color_sub_, det_sub_); + sync_->registerCallback( + std::bind(&ValvePoseNode::sync_cb, this, _1, _2, _3)); } // One-shot callback that overrides depth intrinsics from the camera_info topic. void ValvePoseNode::depth_camera_info_cb( - const sensor_msgs::msg::CameraInfo::SharedPtr msg) -{ - if (got_depth_info_) return; - - depth_props_.intr.fx = msg->k[0]; - depth_props_.intr.fy = msg->k[4]; - depth_props_.intr.cx = msg->k[2]; - depth_props_.intr.cy = msg->k[5]; - depth_props_.dim.width = static_cast(msg->width); - depth_props_.dim.height = static_cast(msg->height); - - detector_->set_depth_image_properties(depth_props_); - - got_depth_info_ = true; - depth_cam_info_sub_.reset(); // one-shot - RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", - depth_props_.intr.fx, depth_props_.intr.fy); + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + if (got_depth_info_) + return; + + depth_props_.intr.fx = msg->k[0]; + depth_props_.intr.fy = msg->k[4]; + depth_props_.intr.cx = msg->k[2]; + depth_props_.intr.cy = msg->k[5]; + depth_props_.dim.width = static_cast(msg->width); + depth_props_.dim.height = static_cast(msg->height); + + detector_->set_depth_image_properties(depth_props_); + + got_depth_info_ = true; + depth_cam_info_sub_.reset(); // one-shot + RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", + depth_props_.intr.fx, depth_props_.intr.fy); } // Converts a ROS BoundingBox2D message to the internal BoundingBox struct. -BoundingBox ValvePoseNode::to_bbox(const vision_msgs::msg::BoundingBox2D& b) const -{ - BoundingBox o; - o.center_x = static_cast(b.center.position.x); - o.center_y = static_cast(b.center.position.y); - o.size_x = static_cast(b.size_x); - o.size_y = static_cast(b.size_y); - o.theta = static_cast(b.center.theta); // radians - return o; +BoundingBox ValvePoseNode::to_bbox( + const vision_msgs::msg::BoundingBox2D& b) const { + BoundingBox o; + o.center_x = static_cast(b.center.position.x); + o.center_y = static_cast(b.center.position.y); + o.size_x = static_cast(b.size_x); + o.size_y = static_cast(b.size_y); + o.theta = static_cast(b.center.theta); // radians + return o; } // Corrects the bbox center for lens distortion using the color camera matrix. -BoundingBox ValvePoseNode::undistort_bbox(const BoundingBox& bbox) const -{ - if (cv_camera_matrix_.empty() || cv_dist_coeffs_.empty()) return bbox; - - std::vector pts{{bbox.center_x, bbox.center_y}}; - std::vector undistorted; - cv::undistortPoints(pts, undistorted, - cv_camera_matrix_, cv_dist_coeffs_, - cv::noArray(), cv_camera_matrix_); - - BoundingBox result = bbox; - result.center_x = undistorted[0].x; - result.center_y = undistorted[0].y; - return result; +BoundingBox ValvePoseNode::undistort_bbox(const BoundingBox& bbox) const { + if (cv_camera_matrix_.empty() || cv_dist_coeffs_.empty()) + return bbox; + + std::vector pts{{bbox.center_x, bbox.center_y}}; + std::vector undistorted; + cv::undistortPoints(pts, undistorted, cv_camera_matrix_, cv_dist_coeffs_, + cv::noArray(), cv_camera_matrix_); + + BoundingBox result = bbox; + result.center_x = undistorted[0].x; + result.center_y = undistorted[0].y; + return result; } // Greedy NMS: sorts detections by score, keeps at most 2 non-overlapping boxes. std::vector ValvePoseNode::filter_duplicate_detections( - const vision_msgs::msg::Detection2DArray& det) const -{ - const size_t n = det.detections.size(); - if (n == 0) return {}; - - // Collect (score, index) pairs – use bbox area when no score is available. - std::vector> scored; - scored.reserve(n); - for (size_t i = 0; i < n; ++i) { - float score = 0.0f; - if (!det.detections[i].results.empty()) { - score = static_cast( - det.detections[i].results[0].hypothesis.score); - } else { - const auto& b = det.detections[i].bbox; - score = static_cast(b.size_x * b.size_y); + const vision_msgs::msg::Detection2DArray& det) const { + const size_t n = det.detections.size(); + if (n == 0) + return {}; + + // Collect (score, index) pairs – use bbox area when no score is available. + std::vector> scored; + scored.reserve(n); + for (size_t i = 0; i < n; ++i) { + float score = 0.0f; + if (!det.detections[i].results.empty()) { + score = static_cast( + det.detections[i].results[0].hypothesis.score); + } else { + const auto& b = det.detections[i].bbox; + score = static_cast(b.size_x * b.size_y); + } + scored.emplace_back(score, i); } - scored.emplace_back(score, i); - } - std::sort(scored.begin(), scored.end(), - [](const auto& a, const auto& b) { return a.first > b.first; }); - - // Greedy NMS – keep at most 2. - std::vector kept; - std::vector suppressed(n, false); - - for (size_t si = 0; si < scored.size() && kept.size() < 2; ++si) { - const size_t i = scored[si].second; - if (suppressed[i]) continue; - - kept.push_back(i); - - const BoundingBox bi = to_bbox(det.detections[i].bbox); - const float ai = bi.size_x * bi.size_y; - const float bx1i = bi.center_x - bi.size_x * 0.5f; - const float by1i = bi.center_y - bi.size_y * 0.5f; - const float bx2i = bi.center_x + bi.size_x * 0.5f; - const float by2i = bi.center_y + bi.size_y * 0.5f; - - for (size_t sj = si + 1; sj < scored.size(); ++sj) { - const size_t j = scored[sj].second; - if (suppressed[j]) continue; - - const BoundingBox bj = to_bbox(det.detections[j].bbox); - const float aj = bj.size_x * bj.size_y; - const float bx1j = bj.center_x - bj.size_x * 0.5f; - const float by1j = bj.center_y - bj.size_y * 0.5f; - const float bx2j = bj.center_x + bj.size_x * 0.5f; - const float by2j = bj.center_y + bj.size_y * 0.5f; - - const float ix1 = std::max(bx1i, bx1j); - const float iy1 = std::max(by1i, by1j); - const float ix2 = std::min(bx2i, bx2j); - const float iy2 = std::min(by2i, by2j); - - if (ix2 <= ix1 || iy2 <= iy1) continue; // no overlap - - const float inter = (ix2 - ix1) * (iy2 - iy1); - const float iou = inter / (ai + aj - inter); - const float iom = inter / std::min(ai, aj); // intersection over minimum - - if (iou > iou_duplicate_threshold_ || iom > 0.7f) { - suppressed[j] = true; - } + std::sort(scored.begin(), scored.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + // Greedy NMS – keep at most 2. + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < scored.size() && kept.size() < 2; ++si) { + const size_t i = scored[si].second; + if (suppressed[i]) + continue; + + kept.push_back(i); + + const BoundingBox bi = to_bbox(det.detections[i].bbox); + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + for (size_t sj = si + 1; sj < scored.size(); ++sj) { + const size_t j = scored[sj].second; + if (suppressed[j]) + continue; + + const BoundingBox bj = to_bbox(det.detections[j].bbox); + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) + continue; // no overlap + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = + inter / std::min(ai, aj); // intersection over minimum + + if (iou > iou_duplicate_threshold_ || iom > 0.7f) { + suppressed[j] = true; + } + } } - } - return kept; + return kept; } // Packs poses into a PoseArray message and publishes it. void ValvePoseNode::publish_pose_array(const std::vector& poses, - const std_msgs::msg::Header& header) -{ - geometry_msgs::msg::PoseArray msg; - msg.header = header; - msg.poses.reserve(poses.size()); - for (const auto& p : poses) { - geometry_msgs::msg::Pose po; - po.position.x = p.position.x(); - po.position.y = p.position.y(); - po.position.z = p.position.z(); - po.orientation.x = p.orientation.x(); - po.orientation.y = p.orientation.y(); - po.orientation.z = p.orientation.z(); - po.orientation.w = p.orientation.w(); - msg.poses.push_back(po); - } - pose_pub_->publish(msg); + const std_msgs::msg::Header& header) { + geometry_msgs::msg::PoseArray msg; + msg.header = header; + msg.poses.reserve(poses.size()); + for (const auto& p : poses) { + geometry_msgs::msg::Pose po; + po.position.x = p.position.x(); + po.position.y = p.position.y(); + po.position.z = p.position.z(); + po.orientation.x = p.orientation.x(); + po.orientation.y = p.orientation.y(); + po.orientation.z = p.orientation.z(); + po.orientation.w = p.orientation.w(); + msg.poses.push_back(po); + } + pose_pub_->publish(msg); } -// Packs poses into a LandmarkArray message with type/subtype fields and publishes it. +// Packs poses into a LandmarkArray message with type/subtype fields and +// publishes it. void ValvePoseNode::publish_landmarks(const std::vector& poses, - const std_msgs::msg::Header& header) -{ - vortex_msgs::msg::LandmarkArray out; - out.header = header; - out.landmarks.reserve(poses.size()); - - for (size_t i = 0; i < poses.size(); ++i) { - vortex_msgs::msg::Landmark lm; - lm.header = header; - lm.id = static_cast(i); - lm.type.value = landmark_type_; - lm.subtype.value = landmark_subtype_; - - lm.pose.pose.position.x = poses[i].position.x(); - lm.pose.pose.position.y = poses[i].position.y(); - lm.pose.pose.position.z = poses[i].position.z(); - lm.pose.pose.orientation.x = poses[i].orientation.x(); - lm.pose.pose.orientation.y = poses[i].orientation.y(); - lm.pose.pose.orientation.z = poses[i].orientation.z(); - lm.pose.pose.orientation.w = poses[i].orientation.w(); - - out.landmarks.push_back(lm); - } - landmark_pub_->publish(out); + const std_msgs::msg::Header& header) { + vortex_msgs::msg::LandmarkArray out; + out.header = header; + out.landmarks.reserve(poses.size()); + + for (size_t i = 0; i < poses.size(); ++i) { + vortex_msgs::msg::Landmark lm; + lm.header = header; + lm.id = static_cast(i); + lm.type.value = landmark_type_; + lm.subtype.value = landmark_subtype_; + + lm.pose.pose.position.x = poses[i].position.x(); + lm.pose.pose.position.y = poses[i].position.y(); + lm.pose.pose.position.z = poses[i].position.z(); + lm.pose.pose.orientation.x = poses[i].orientation.x(); + lm.pose.pose.orientation.y = poses[i].orientation.y(); + lm.pose.pose.orientation.z = poses[i].orientation.z(); + lm.pose.pose.orientation.w = poses[i].orientation.w(); + + out.landmarks.push_back(lm); + } + landmark_pub_->publish(out); } -// Main synchronized callback: runs NMS, estimates poses, and publishes all outputs. +// Main synchronized callback: runs NMS, estimates poses, and publishes all +// outputs. void ValvePoseNode::sync_cb( const sensor_msgs::msg::Image::ConstSharedPtr& depth, const sensor_msgs::msg::Image::ConstSharedPtr& color, - const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) -{ - if (!got_info_) return; - if (!depth || !det) return; - - cv::Mat depth_color; - const bool publish_colormap = depth_colormap_pub_->get_subscription_count() > 0; - if (publish_colormap) { - cv_bridge::CvImageConstPtr cv_raw = cv_bridge::toCvShare(depth, "16UC1"); - cv::Mat depth_f; - cv_raw->image.convertTo(depth_f, CV_32FC1); - const float scale = 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); - cv::Mat depth_8u; - depth_f.convertTo(depth_8u, CV_8UC1, scale, -depth_colormap_vmin_ * scale); - cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); - } - - cv::Mat color_vis; - const bool publish_image = color && image_pub_->get_subscription_count() > 0; - if (publish_image) { - cv_bridge::CvImageConstPtr cv_color = cv_bridge::toCvShare(color, "bgr8"); - color_vis = cv_color->image.clone(); - } - - if (det->detections.empty()) { - publish_pose_array({}, depth->header); - publish_landmarks({}, depth->header); + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) { + if (!got_info_) + return; + if (!depth || !det) + return; + + cv::Mat depth_color; + const bool publish_colormap = + depth_colormap_pub_->get_subscription_count() > 0; if (publish_colormap) { - depth_colormap_pub_->publish( - *cv_bridge::CvImage(depth->header, "bgr8", depth_color).toImageMsg()); + cv_bridge::CvImageConstPtr cv_raw = + cv_bridge::toCvShare(depth, "16UC1"); + cv::Mat depth_f; + cv_raw->image.convertTo(depth_f, CV_32FC1); + const float scale = + 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); + cv::Mat depth_8u; + depth_f.convertTo(depth_8u, CV_8UC1, scale, + -depth_colormap_vmin_ * scale); + cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); } + + cv::Mat color_vis; + const bool publish_image = + color && image_pub_->get_subscription_count() > 0; if (publish_image) { - image_pub_->publish( - *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); + cv_bridge::CvImageConstPtr cv_color = + cv_bridge::toCvShare(color, "bgr8"); + color_vis = cv_color->image.clone(); } - return; - } - - const std::vector kept = filter_duplicate_detections(*det); - - // RealSense publishes depth as 16UC1 (uint16 millimetres). - // cv_bridge type-casts without scaling, so we must divide by 1000. - cv::Mat depth_img; - if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || - depth->encoding == "16UC1") { - cv_bridge::CvImageConstPtr cv_depth = cv_bridge::toCvShare(depth, "16UC1"); - cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); - } else { - cv_bridge::CvImageConstPtr cv_depth = cv_bridge::toCvShare(depth, "32FC1"); - depth_img = cv_depth->image.clone(); - } - - pcl::PointCloud::Ptr ann_dbg(new pcl::PointCloud); - pcl::PointCloud::Ptr pln_dbg(new pcl::PointCloud); - - std::vector kept_boxes; - std::vector raw_boxes; // raw detection coords for depth colormap - std::vector poses; - - for (size_t idx : kept) { - // Parse bounding box (YOLO-space), map to original image, then undistort. - BoundingBox yolo_box = to_bbox(det->detections[idx].bbox); - BoundingBox org_box = yolo_box; - org_box = undistort_bbox(org_box); - - raw_boxes.push_back(yolo_box); - kept_boxes.push_back(org_box); - - Pose pose; - if (detector_->compute_pose_from_depth( - depth_img, yolo_box, pose, ann_dbg, pln_dbg, true)) { - poses.push_back(pose); + + if (det->detections.empty()) { + publish_pose_array({}, depth->header); + publish_landmarks({}, depth->header); + if (publish_colormap) { + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color) + .toImageMsg()); + } + if (publish_image) { + image_pub_->publish( + *cv_bridge::CvImage(color->header, "bgr8", color_vis) + .toImageMsg()); + } + return; } - } - - if (depth_cloud_pub_->get_subscription_count() > 0) { - sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*ann_dbg, cloud_msg); - std_msgs::msg::Header cloud_header = depth->header; - if (!output_frame_id_.empty()) cloud_header.frame_id = output_frame_id_; - cloud_msg.header = cloud_header; - depth_cloud_pub_->publish(cloud_msg); - } - - if (publish_image) { - for (size_t i = 0; i < kept_boxes.size(); ++i) { - const auto& box = kept_boxes[i]; - const float angle_deg = box.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect rrect( - cv::Point2f(box.center_x, box.center_y), - cv::Size2f(box.size_x, box.size_y), - angle_deg); - cv::Point2f corners[4]; - rrect.points(corners); - for (int j = 0; j < 4; ++j) { - cv::line(color_vis, corners[j], corners[(j + 1) % 4], - cv::Scalar(0, 255, 0), 2); - } + + const std::vector kept = filter_duplicate_detections(*det); + + // RealSense publishes depth as 16UC1 (uint16 millimetres). + // cv_bridge type-casts without scaling, so we must divide by 1000. + cv::Mat depth_img; + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || + depth->encoding == "16UC1") { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "16UC1"); + cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); + } else { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "32FC1"); + depth_img = cv_depth->image.clone(); } - image_pub_->publish( - *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); - } - - if (publish_colormap) { - for (size_t i = 0; i < raw_boxes.size(); ++i) { - const auto& box = raw_boxes[i]; - - // Project each OBB corner from color image space to depth image space - // using the full intrinsic + extrinsic pipeline. - // Fall back to the raw color pixel when no valid pose depth is available. - const float Z = (i < poses.size() && poses[i].position.z() > 0.0f) - ? poses[i].position.z() : 0.0f; - - const float angle_deg = box.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect rrect( - cv::Point2f(box.center_x, box.center_y), - cv::Size2f(box.size_x, box.size_y), - angle_deg); - cv::Point2f corners[4]; - rrect.points(corners); - - if (Z > 0.0f && got_depth_info_) { - for (auto& c : corners) { - c = project_color_pixel_to_depth( - c.x, c.y, Z, color_props_, depth_props_, depth_color_extrinsic_); + + pcl::PointCloud::Ptr ann_dbg( + new pcl::PointCloud); + pcl::PointCloud::Ptr pln_dbg( + new pcl::PointCloud); + + std::vector kept_boxes; + std::vector + raw_boxes; // raw detection coords for depth colormap + std::vector poses; + + for (size_t idx : kept) { + // Parse bounding box (YOLO-space), map to original image, then + // undistort. + BoundingBox yolo_box = to_bbox(det->detections[idx].bbox); + BoundingBox org_box = yolo_box; + org_box = undistort_bbox(org_box); + + raw_boxes.push_back(yolo_box); + kept_boxes.push_back(org_box); + + Pose pose; + if (detector_->compute_pose_from_depth(depth_img, yolo_box, pose, + ann_dbg, pln_dbg, true)) { + poses.push_back(pose); } - } + } - for (int j = 0; j < 4; ++j) { - cv::line(depth_color, corners[j], corners[(j + 1) % 4], - cv::Scalar(0, 255, 0), 2); - } + if (depth_cloud_pub_->get_subscription_count() > 0) { + sensor_msgs::msg::PointCloud2 cloud_msg; + pcl::toROSMsg(*ann_dbg, cloud_msg); + std_msgs::msg::Header cloud_header = depth->header; + if (!output_frame_id_.empty()) + cloud_header.frame_id = output_frame_id_; + cloud_msg.header = cloud_header; + depth_cloud_pub_->publish(cloud_msg); } - depth_colormap_pub_->publish( - *cv_bridge::CvImage(depth->header, "bgr8", depth_color).toImageMsg()); - } - - if (debug_visualize_ && annulus_pub_ && plane_pub_) { - sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; - pcl::toROSMsg(*ann_dbg, ann_msg); - pcl::toROSMsg(*pln_dbg, pln_msg); - ann_msg.header = depth->header; - pln_msg.header = depth->header; - annulus_pub_->publish(ann_msg); - plane_pub_->publish(pln_msg); - } - - std_msgs::msg::Header pose_header = depth->header; - if (!output_frame_id_.empty()) pose_header.frame_id = output_frame_id_; - - publish_pose_array(poses, pose_header); - publish_landmarks(poses, pose_header); - - // Publish valve center positions as a PointCloud2 for Foxglove 3D view. - if (valve_points_pub_->get_subscription_count() > 0) { - pcl::PointCloud pts; - for (const auto& p : poses) { - pts.points.push_back({p.position.x(), p.position.y(), p.position.z()}); + + if (publish_image) { + for (size_t i = 0; i < kept_boxes.size(); ++i) { + const auto& box = kept_boxes[i]; + const float angle_deg = + box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + for (int j = 0; j < 4; ++j) { + cv::line(color_vis, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + } + image_pub_->publish( + *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); + } + + if (publish_colormap) { + for (size_t i = 0; i < raw_boxes.size(); ++i) { + const auto& box = raw_boxes[i]; + + // Project each OBB corner from color image space to depth image + // space using the full intrinsic + extrinsic pipeline. Fall back to + // the raw color pixel when no valid pose depth is available. + const float Z = (i < poses.size() && poses[i].position.z() > 0.0f) + ? poses[i].position.z() + : 0.0f; + + const float angle_deg = + box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + + if (Z > 0.0f && got_depth_info_) { + for (auto& c : corners) { + c = project_color_pixel_to_depth(c.x, c.y, Z, color_props_, + depth_props_, + depth_color_extrinsic_); + } + } + + for (int j = 0; j < 4; ++j) { + cv::line(depth_color, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + } + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color) + .toImageMsg()); + } + + if (debug_visualize_ && annulus_pub_ && plane_pub_) { + sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; + pcl::toROSMsg(*ann_dbg, ann_msg); + pcl::toROSMsg(*pln_dbg, pln_msg); + ann_msg.header = depth->header; + pln_msg.header = depth->header; + annulus_pub_->publish(ann_msg); + plane_pub_->publish(pln_msg); + } + + std_msgs::msg::Header pose_header = depth->header; + if (!output_frame_id_.empty()) + pose_header.frame_id = output_frame_id_; + + publish_pose_array(poses, pose_header); + publish_landmarks(poses, pose_header); + + // Publish valve center positions as a PointCloud2 for Foxglove 3D view. + if (valve_points_pub_->get_subscription_count() > 0) { + pcl::PointCloud pts; + for (const auto& p : poses) { + pts.points.push_back( + {p.position.x(), p.position.y(), p.position.z()}); + } + pts.width = static_cast(pts.points.size()); + pts.height = 1; + pts.is_dense = true; + sensor_msgs::msg::PointCloud2 pc_msg; + pcl::toROSMsg(pts, pc_msg); + pc_msg.header = pose_header; + valve_points_pub_->publish(pc_msg); + } + + // Publish best (first) detection as PoseStamped for easy Foxglove 3D view. + if (!poses.empty()) { + geometry_msgs::msg::PoseStamped ps; + ps.header = pose_header; + ps.pose.position.x = poses[0].position.x(); + ps.pose.position.y = poses[0].position.y(); + ps.pose.position.z = poses[0].position.z(); + ps.pose.orientation.x = poses[0].orientation.x(); + ps.pose.orientation.y = poses[0].orientation.y(); + ps.pose.orientation.z = poses[0].orientation.z(); + ps.pose.orientation.w = poses[0].orientation.w(); + pose_stamped_pub_->publish(ps); } - pts.width = static_cast(pts.points.size()); - pts.height = 1; - pts.is_dense = true; - sensor_msgs::msg::PointCloud2 pc_msg; - pcl::toROSMsg(pts, pc_msg); - pc_msg.header = pose_header; - valve_points_pub_->publish(pc_msg); - } - - // Publish best (first) detection as PoseStamped for easy Foxglove 3D view. - if (!poses.empty()) { - geometry_msgs::msg::PoseStamped ps; - ps.header = pose_header; - ps.pose.position.x = poses[0].position.x(); - ps.pose.position.y = poses[0].position.y(); - ps.pose.position.z = poses[0].position.z(); - ps.pose.orientation.x = poses[0].orientation.x(); - ps.pose.orientation.y = poses[0].orientation.y(); - ps.pose.orientation.z = poses[0].orientation.z(); - ps.pose.orientation.w = poses[0].orientation.w(); - pose_stamped_pub_->publish(ps); - } } } // namespace valve_detection From f65b7d351bdea42b4e081585c7f62a04f48cda60 Mon Sep 17 00:00:00 2001 From: jens Date: Thu, 19 Mar 2026 12:38:07 +0100 Subject: [PATCH 26/29] ci: add vortex-msgs dependencies --- .github/workflows/code-coverage.yml | 2 ++ .github/workflows/industrial-ci.yml | 1 + dependencies.repos | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 dependencies.repos diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index 7b9b6db..f3e9fe6 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -8,5 +8,7 @@ on: jobs: code-coverage: uses: vortexntnu/vortex-ci/.github/workflows/reusable-code-coverage.yml@main + with: + upstream_workspace: './dependencies.repos' secrets: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} # Set in the repository secrets diff --git a/.github/workflows/industrial-ci.yml b/.github/workflows/industrial-ci.yml index 23b5750..bbb65d8 100644 --- a/.github/workflows/industrial-ci.yml +++ b/.github/workflows/industrial-ci.yml @@ -14,3 +14,4 @@ jobs: uses: vortexntnu/vortex-ci/.github/workflows/reusable-industrial-ci.yml@main with: ros_repo: '["testing", "main"]' + upstream_workspace: './dependencies.repos' diff --git a/dependencies.repos b/dependencies.repos new file mode 100644 index 0000000..4475316 --- /dev/null +++ b/dependencies.repos @@ -0,0 +1,4 @@ +repositories: + vortex-msgs: + type: git + url: https://github.com/vortexntnu/vortex-msgs.git From 35ce0193424cc49e22a6d68f4504b60fa51a2239 Mon Sep 17 00:00:00 2001 From: jens Date: Sat, 21 Mar 2026 19:27:32 +0100 Subject: [PATCH 27/29] Refactor pose estimation and ROS integration --- CMakeLists.txt | 2 +- README.md | 37 +- config/valve_detection_params.yaml | 89 ++- .../depth_image_processing.hpp | 42 +- include/valve_detection/pose_estimator.hpp | 15 +- include/valve_detection/types.hpp | 8 + include/valve_detection_ros/ros_utils.hpp | 32 ++ .../valve_detection_ros/valve_pose_ros.hpp | 74 +-- launch/valve_detection.launch.py | 40 +- package.xml | 2 +- src/depth_image_processing.cpp | 175 ++++-- src/pose_estimator.cpp | 43 +- src/ros_utils.cpp | 80 +++ src/valve_pose_ros.cpp | 519 +++++------------- 14 files changed, 613 insertions(+), 545 deletions(-) create mode 100644 include/valve_detection_ros/ros_utils.hpp create mode 100644 src/ros_utils.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index eb9881a..95727f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,6 @@ find_package(OpenCV REQUIRED) find_package(PCL REQUIRED) find_package(pcl_conversions REQUIRED) -# <<< IMPORTANT >>> find_package(vortex_msgs REQUIRED) include_directories(include) @@ -33,6 +32,7 @@ set(LIB_NAME "${PROJECT_NAME}_component") add_library(${LIB_NAME} SHARED src/depth_image_processing.cpp src/pose_estimator.cpp + src/ros_utils.cpp src/valve_pose_ros.cpp ) diff --git a/README.md b/README.md index e77fa74..e0178d0 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ For each detection it: | File | Responsibility | |------|---------------| | `valve_pose_ros.hpp/.cpp` | ROS node — subscriptions, publishing, NMS, camera data ownership | +| `ros_utils.hpp/.cpp` | ROS message conversions — `to_bbox`, `make_pose_array`, `make_landmark_array`, `decode_depth_to_float` | | `depth_image_processing.hpp/.cpp` | Depth transforms — back-projection, point cloud extraction, depth-to-color pixel mapping | | `pose_estimator.hpp/.cpp` | Normal/plane estimation — RANSAC plane fit, ray–plane intersection, rotation matrix, pose | | `types.hpp` | Shared data types (`BoundingBox`, `Pose`, `ImageProperties`, `DepthColorExtrinsic`) | @@ -34,6 +35,14 @@ For each detection it: ros2 launch valve_detection valve_detection.launch.py ``` +### With debug visualization + +```bash +ros2 launch valve_detection valve_detection.launch.py debug_visualize:=true +``` + +This enables the additional debug topics listed below (`/valve_poses`, `/valve_detection_depth_colormap`, `/valve_depth_cloud`, `/bbx_annulus_pcl`, `/annulus_plane_pcl`). + --- ## How It Works @@ -76,17 +85,16 @@ The bounding box is reprojected from color image space to depth image space usin ## Published Topics -| Topic | Type | Description | -|-------|------|-------------| -| `/valve_pose` | `geometry_msgs/PoseStamped` | Best detection pose | -| `/valve_poses` | `geometry_msgs/PoseArray` | All detection poses | -| `/valve_landmarks` | `vortex_msgs/LandmarkArray` | Poses with landmark type/subtype | -| `/valve_detection_image` | `sensor_msgs/Image` | Color image with OBB overlays | -| `/valve_detection_depth_colormap` | `sensor_msgs/Image` | Depth colormap with OBB overlays | -| `/valve_points` | `sensor_msgs/PointCloud2` | Valve center positions | -| `/valve_depth_cloud` | `sensor_msgs/PointCloud2` | Points used for plane fit | -| `/bbx_annulus_pcl` | `sensor_msgs/PointCloud2` | Debug: extracted annulus points | -| `/annulus_plane_pcl` | `sensor_msgs/PointCloud2` | Debug: RANSAC plane inliers | +| Topic | Type | Always | Description | +|-------|------|--------|-------------| +| `/valve_landmarks` | `vortex_msgs/LandmarkArray` | Yes | All detection poses with landmark type/subtype | +| `/valve_poses` | `geometry_msgs/PoseArray` | Debug | All detection poses | +| `/valve_detection_depth_colormap` | `sensor_msgs/Image` | Debug | Depth colormap with OBB overlays | +| `/valve_depth_cloud` | `sensor_msgs/PointCloud2` | Debug | Points used for plane fit | +| `/bbx_annulus_pcl` | `sensor_msgs/PointCloud2` | Debug | Extracted annulus points | +| `/annulus_plane_pcl` | `sensor_msgs/PointCloud2` | Debug | RANSAC plane inliers | + +Debug topics are only published when `debug_visualize:=true`. --- @@ -100,10 +108,11 @@ The bounding box is reprojected from color image space to depth image space usin | `valve_handle_offset` | `0.05` | Shift along plane normal to reach handle (m) | | `iou_duplicate_threshold` | `0.5` | IoU threshold for NMS | | `yolo_img_width/height` | `640` | YOLO letterbox reference size for bbox remapping | -| `debug_visualize` | `false` | Publish annulus and plane point clouds | +| `debug_visualize` | `false` | Enable debug visualization topics | | `output_frame_id` | `camera_color_optical_frame` | TF frame for published poses | +| `depth_to_color_tx/ty/tz` | `-0.059, 0, 0` | Depth-to-color extrinsic translation (m) | -Camera intrinsics (`color_fx/fy/cx/cy`, `depth_fx/fy/cx/cy`) and distortion coefficients are set in `config/valve_detection_params.yaml`. Depth intrinsics can alternatively be received from a `CameraInfo` topic. +Camera intrinsics (`color_fx/fy/cx/cy`, `depth_fx/fy/cx/cy`) and distortion coefficients are set in `config/valve_detection_params.yaml`. Both color and depth intrinsics can alternatively be received from `CameraInfo` topics and will override the config values. --- @@ -113,7 +122,7 @@ Camera intrinsics (`color_fx/fy/cx/cy`, `depth_fx/fy/cx/cy`) and distortion coef |-------|---------------| | No poses published | Too few plane inliers — lower `plane_ransac_threshold` or increase `annulus_radius_ratio` | | Pose position offset | Wrong `valve_handle_offset` or incorrect camera intrinsics/extrinsic | -| OBB misaligned on depth colormap | Incorrect depth-to-color extrinsic in `d555_depth_to_color_extrinsic()` | +| OBB misaligned on depth colormap | Incorrect depth-to-color extrinsic — check `depth_to_color_tx/ty/tz` in the config | | Duplicate poses | Lower `iou_duplicate_threshold` | --- diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 9190d4c..28fb9a4 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -2,14 +2,15 @@ ros__parameters: # ── Inputs ──────────────────────────────────────────────────────────────── depth_image_sub_topic: "/realsense/D555_409122300281_Depth" - color_image_sub_topic: "/realsense/D555_409122300281_Color" detections_sub_topic: "/yolo_obb_object_detection/detections" # Depth camera-info topic (optional – used to load depth intrinsics if published). - # Color intrinsics are always taken from config params below. depth_image_info_topic: "/camera/camera/depth/camera_info" - # Color camera intrinsics (always used – not overridden by camera_info) + # Color camera-info topic (optional – overrides the fallback params below if published). + color_image_info_topic: "/camera/camera/color/camera_info" + + # Color camera intrinsics – fallback values used when color_image_info_topic is not published. # Source: /camera/camera/color/camera_info color_fx: 452.121521 color_fy: 451.737976 @@ -34,26 +35,37 @@ depth_image_width: 896 depth_image_height: 504 + # ── Depth-to-color extrinsic ────────────────────────────────────────────── + # Translation (metres) of the depth origin expressed in the color camera + # frame. Rotation is assumed to be identity (both optical frames share the + # same orientation for the RealSense D555). + # Source: ros2 topic echo /realsense/extrinsics/depth_to_color + depth_to_color_tx: -0.059 + depth_to_color_ty: 0.0 + depth_to_color_tz: 0.0 + # ── Outputs ─────────────────────────────────────────────────────────────── - valve_pose_stamped_pub_topic: "/valve_pose" - valve_poses_pub_topic: "/valve_poses" + # Primary output consumed by downstream nodes (e.g. EKF, mission planner). landmarks_pub_topic: "/valve_landmarks" - processed_image_pub_topic: "/valve_detection_image" - depth_colormap_pub_topic: "/valve_detection_depth_colormap" - valve_points_pub_topic: "/valve_points" - depth_cloud_pub_topic: "/valve_depth_cloud" - depth_colormap_value_min: 0.1 - depth_colormap_value_max: 1125.5 - debug_annulus_pub_topic: "/bbx_annulus_pcl" - debug_plane_pub_topic: "/annulus_plane_pcl" # ── YOLO letterbox reference size ───────────────────────────────────────── yolo_img_width: 640 yolo_img_height: 640 # ── Annulus and plane fit ───────────────────────────────────────────────── + # Fraction of the bounding-box half-size used as the annulus ring radius + # (unitless, 0–1). At 0.8 the ring samples the outer 80 % of the box, + # avoiding the central hub while staying inside the valve rim. + # Tuned empirically on recorded valve footage. annulus_radius_ratio: 0.8 + + # Maximum point-to-plane distance (metres) for a depth point to be counted + # as a RANSAC inlier. 0.01 m (1 cm) tolerates typical RealSense depth noise + # at ~0.5–1 m range without accepting gross outliers. plane_ransac_threshold: 0.01 + + # Number of RANSAC iterations. 50 gives >99 % probability of finding a + # good plane when the inlier ratio is ≥ 50 %, while keeping CPU load low. plane_ransac_max_iterations: 50 # ── Duplicate-detection suppression ────────────────────────────────────── @@ -64,20 +76,51 @@ iou_duplicate_threshold: 0.5 # ── Pose offset ─────────────────────────────────────────────────────────── - # Shift the estimated position along the plane normal by this amount (m). - valve_handle_offset: 0.05 + # Shift the estimated position along the plane normal by this amount (metres). + # Value taken from the official valve CAD file: distance between the valve + # face frame and the inside length of the handle is 0.065 m. + valve_handle_offset: 0.065 # ── Behaviour toggles ───────────────────────────────────────────────────── - use_color_image: true - visualize_detections: true # publish annotated color image - debug_visualize: false # publish annulus / plane point clouds + debug_visualize: false # enable all debug visualization topics + + # ── Debug visualization (only active when debug_visualize: true) ────────── + debug: + # All detected valve poses in one PoseArray – useful for visualising + # multiple simultaneous detections in Foxglove. + valve_poses_pub_topic: "/valve_poses" + + # Annulus point cloud (points sampled inside the bounding-box ring used + # for plane fitting) projected into 3D – useful for verifying the depth + # extraction region. + depth_cloud_pub_topic: "/valve_depth_cloud" + + # Depth image false-coloured with COLORMAP_TURBO and OBB outlines drawn + # on top – useful for checking detection alignment on the depth frame. + depth_colormap_pub_topic: "/valve_detection_depth_colormap" + # Colormap scaling range in raw 16UC1 depth units (millimetres). + # Min ~0 mm clips near-zero noise; max ~1125 mm (≈1.1 m) covers the + # expected close-range operating distance for valve interaction. + depth_colormap_value_min: 0.1 + depth_colormap_value_max: 1125.5 + + # Points sampled from the annulus region of the bounding box, collected + # as a byproduct of pose estimation – useful for verifying the depth + # extraction region in 3D. + annulus_pub_topic: "/bbx_annulus_pcl" + + # RANSAC plane inlier points, collected as a byproduct of pose estimation + # – useful for checking that the fitted plane normal (and therefore the + # final pose orientation) is correct. + plane_pub_topic: "/annulus_plane_pcl" # ── Landmark fields ─────────────────────────────────────────────────────── - landmark_type: 1 # VALVE + landmark_type: 5 # VALVE landmark_subtype: 0 # UNKNOWN # ── Output frame ────────────────────────────────────────────────────────── - # frame_id used for all published poses. Set to the TF frame that matches - # your robot's TF tree so Foxglove can display the pose automatically. - # Leave empty ("") to inherit the frame_id from the depth image header. - output_frame_id: "camera_color_optical_frame" + # TF frame used as the frame_id for all published poses and landmarks. + # Must match the depth optical frame in the robot's TF tree. + # See: auv_setup/description/moby.urdf.xacro + output_frame_id: "moby/front_camera_depth_optical" + diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp index 4b03fa1..c045dff 100644 --- a/include/valve_detection/depth_image_processing.hpp +++ b/include/valve_detection/depth_image_processing.hpp @@ -3,32 +3,28 @@ #include #include #include +#include +#include #include "valve_detection/types.hpp" namespace valve_detection { -void project_pixel_to_point(int u, - int v, - float depth, - double fx, - double fy, - double cx, - double cy, +void project_pixel_to_point(const int u, + const int v, + const float depth, + const double fx, + const double fy, + const double cx, + const double cy, pcl::PointXYZ& out); void extract_annulus_pcl( const cv::Mat& depth_image, // CV_32FC1 meters const BoundingBox& bbox, // in ORIGINAL image pixels const ImageProperties& img_props, - float annulus_radius_ratio, // inner radius = outer*ratio + const float annulus_radius_ratio, // inner radius = outer*ratio pcl::PointCloud::Ptr& out); -// Hardcoded depth-to-color extrinsic for Intel RealSense D555. -// Values from the camera URDF / factory calibration. -// Verify with: ros2 topic echo /realsense/extrinsics/depth_to_color -// or the URDF at https://github.com/IntelRealSense/librealsense/issues/14577 -DepthColorExtrinsic d555_depth_to_color_extrinsic(); - // Like extract_annulus_pcl but with proper depth-to-color alignment. // Iterates depth pixels, back-projects with depth intrinsics, applies the // extrinsic transform, then checks whether the resulting color-frame @@ -40,7 +36,7 @@ void extract_annulus_pcl_aligned( const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extrinsic, - float annulus_radius_ratio, + const float annulus_radius_ratio, pcl::PointCloud::Ptr& out); // Extracts all valid depth points whose color-frame projection falls inside @@ -56,11 +52,21 @@ void extract_bbox_pcl_aligned( // Project a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). -cv::Point2f project_color_pixel_to_depth(float u_c, - float v_c, - float Z, +cv::Point2f project_color_pixel_to_depth(const float u_c, + const float v_c, + const float Z, const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extr); +// Returns undistorted copy of bbox (center_x/y corrected for lens distortion). +BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr); + +// Greedy NMS: returns indices of kept detections (max 2). +// scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin +// (intersection / min-area) or IoU exceeds iou_duplicate_threshold. +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold); + } // namespace valve_detection diff --git a/include/valve_detection/pose_estimator.hpp b/include/valve_detection/pose_estimator.hpp index 18b620e..bdc48e8 100644 --- a/include/valve_detection/pose_estimator.hpp +++ b/include/valve_detection/pose_estimator.hpp @@ -32,10 +32,9 @@ class PoseEstimator { void calculate_letterbox_padding(); BoundingBox transform_bounding_box(const BoundingBox& bbox) const; - bool compute_pose_from_depth( + PoseResult compute_pose_from_depth( const cv::Mat& depth_image, // CV_32FC1 meters const BoundingBox& bbox_org, // in original image pixels - Pose& out_pose, pcl::PointCloud::Ptr annulus_dbg, pcl::PointCloud::Ptr plane_dbg, bool debug_visualize) const; @@ -65,12 +64,12 @@ class PoseEstimator { DepthColorExtrinsic depth_color_extrinsic_{}; bool has_depth_props_{false}; - int yolo_img_width_{640}; - int yolo_img_height_{640}; - float annulus_radius_ratio_{0.8f}; - float plane_ransac_threshold_{0.01f}; - int plane_ransac_max_iterations_{50}; - float valve_handle_offset_{0.05f}; + int yolo_img_width_; + int yolo_img_height_; + float annulus_radius_ratio_; + float plane_ransac_threshold_; + int plane_ransac_max_iterations_; + float valve_handle_offset_; double letterbox_scale_factor_{1.0}; double letterbox_pad_x_{0}; diff --git a/include/valve_detection/types.hpp b/include/valve_detection/types.hpp index 214b0f5..3e1fd27 100644 --- a/include/valve_detection/types.hpp +++ b/include/valve_detection/types.hpp @@ -1,11 +1,14 @@ #pragma once #include +#include #include namespace valve_detection { struct CameraIntrinsics { double fx{0}, fy{0}, cx{0}, cy{0}; + // Plumb-bob distortion coefficients [k1, k2, p1, p2, k3]. + std::array dist{0, 0, 0, 0, 0}; }; struct ImageDimensions { @@ -30,6 +33,11 @@ struct Pose { Eigen::Quaternionf orientation{Eigen::Quaternionf::Identity()}; }; +struct PoseResult { + Pose result; + bool result_valid{false}; +}; + // Rigid transform from depth camera frame to color camera frame. // Rotation R and translation t satisfy: P_color = R * P_depth + t struct DepthColorExtrinsic { diff --git a/include/valve_detection_ros/ros_utils.hpp b/include/valve_detection_ros/ros_utils.hpp new file mode 100644 index 0000000..5e23770 --- /dev/null +++ b/include/valve_detection_ros/ros_utils.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "valve_detection/types.hpp" +#include "vortex_msgs/msg/landmark_array.hpp" + +#include + +namespace valve_detection { + +// Converts a ROS BoundingBox2D message to the internal BoundingBox type. +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b); + +// Builds a PoseArray message from a list of poses. +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, const std_msgs::msg::Header& header); + +// Builds a LandmarkArray message from a list of poses. +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, const std_msgs::msg::Header& header, + int type, int subtype); + +// Decodes a ROS depth image to a CV_32FC1 mat in metres. +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth); + +} // namespace valve_detection diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index 2a843c6..86acc83 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -25,10 +24,9 @@ #include #include #include -#include "vortex_msgs/msg/landmark.hpp" #include "vortex_msgs/msg/landmark_array.hpp" -#include "vortex_msgs/msg/landmark_subtype.hpp" #include "vortex_msgs/msg/landmark_type.hpp" +#include "vortex_msgs/msg/landmark_subtype.hpp" namespace valve_detection { @@ -37,55 +35,33 @@ class ValvePoseNode : public rclcpp::Node { explicit ValvePoseNode(const rclcpp::NodeOptions& options); private: + // Node setup — called from constructor. + void setup_estimator(); + void setup_publishers(const rclcpp::QoS& qos); + void setup_subscribers(const rclcpp::QoS& qos); + + // Camera info callbacks (one-shot, override config fallback). + void color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); void depth_camera_info_cb( const sensor_msgs::msg::CameraInfo::SharedPtr msg); - BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& bbox) const; - - // Returns undistorted copy of bbox (center_x/y corrected for lens - // distortion). - BoundingBox undistort_bbox(const BoundingBox& bbox) const; - - // Non-maximum suppression: returns indices of kept detections (max 2). - // Two boxes are duplicates when IoMin (intersection / min-area) or IoU - // exceeds iou_duplicate_threshold_. - std::vector filter_duplicate_detections( - const vision_msgs::msg::Detection2DArray& det) const; - - void publish_pose_array(const std::vector& poses, - const std_msgs::msg::Header& header); - - void publish_landmarks(const std::vector& poses, - const std_msgs::msg::Header& header); - - // Sync callback: depth + color + det + // Main synchronized callback: depth + detections. void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, - const sensor_msgs::msg::Image::ConstSharedPtr& color, const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); using SyncPolicy = message_filters::sync_policies::ApproximateTime< - sensor_msgs::msg::Image, sensor_msgs::msg::Image, vision_msgs::msg::Detection2DArray>; - bool got_info_{false}; - bool got_depth_info_{false}; - // params - bool use_color_image_{true}; - bool visualize_detections_{true}; - bool debug_visualize_{false}; - float iou_duplicate_threshold_{0.5f}; - std::string output_frame_id_{}; - - int landmark_type_{1}; - int landmark_subtype_{0}; - - // distortion (populated from color camera_info) - cv::Mat cv_camera_matrix_; - cv::Mat cv_dist_coeffs_; + bool debug_visualize_; + float iou_duplicate_threshold_; + std::string output_frame_id_; + int landmark_type_; + int landmark_subtype_; - // camera data (owned by ROS node, passed to estimator and depth functions) + // camera data (owned by node, passed to estimator and depth functions) ImageProperties color_props_{}; ImageProperties depth_props_{}; DepthColorExtrinsic depth_color_extrinsic_{}; @@ -94,30 +70,24 @@ class ValvePoseNode : public rclcpp::Node { std::unique_ptr detector_; // subs + rclcpp::Subscription::SharedPtr + color_cam_info_sub_; rclcpp::Subscription::SharedPtr depth_cam_info_sub_; message_filters::Subscriber depth_sub_; - message_filters::Subscriber color_sub_; message_filters::Subscriber det_sub_; std::shared_ptr> sync_; // pubs rclcpp::Publisher::SharedPtr pose_pub_; - rclcpp::Publisher::SharedPtr - pose_stamped_pub_; rclcpp::Publisher::SharedPtr landmark_pub_; - rclcpp::Publisher::SharedPtr image_pub_; rclcpp::Publisher::SharedPtr depth_colormap_pub_; - - float depth_colormap_vmin_{0.1f}; - float depth_colormap_vmax_{1125.5f}; - rclcpp::Publisher::SharedPtr annulus_pub_; rclcpp::Publisher::SharedPtr plane_pub_; - rclcpp::Publisher::SharedPtr - valve_points_pub_; - rclcpp::Publisher::SharedPtr - depth_cloud_pub_; + rclcpp::Publisher::SharedPtr depth_cloud_pub_; + + float depth_colormap_vmin_; + float depth_colormap_vmax_; }; } // namespace valve_detection diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index d385332..ec247a0 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -1,33 +1,43 @@ import os from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration from launch_ros.actions import ComposableNodeContainer from launch_ros.descriptions import ComposableNode -from launch import LaunchDescription - def generate_launch_description(): cfg = os.path.join( - get_package_share_directory("valve_detection"), - "config", - "valve_detection_params.yaml", + get_package_share_directory('valve_detection'), + 'config', + 'valve_detection_params.yaml', + ) + + debug_visualize_arg = DeclareLaunchArgument( + 'debug_visualize', + default_value='false', + description='Enable debug visualization topics (valve_pose, valve_poses, annulus, plane)', ) container = ComposableNodeContainer( - name="valve_detection_container", - namespace="", - package="rclcpp_components", - executable="component_container_mt", + name='valve_detection_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', composable_node_descriptions=[ ComposableNode( - package="valve_detection", - plugin="valve_detection::ValvePoseNode", - name="valve_pose_node", - parameters=[cfg], + package='valve_detection', + plugin='valve_detection::ValvePoseNode', + name='valve_pose_node', + parameters=[ + cfg, + {'debug_visualize': LaunchConfiguration('debug_visualize')}, + ], ) ], - output="screen", + output='screen', ) - return LaunchDescription([container]) + return LaunchDescription([debug_visualize_arg, container]) diff --git a/package.xml b/package.xml index 889ca49..b4f6cd1 100644 --- a/package.xml +++ b/package.xml @@ -4,7 +4,7 @@ valve_detection 0.1.0 Valve pose estimation from YOLOv26 OBB detections + depth image. - you + you Apache-2.0 ament_cmake diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp index ba2b6ca..2cabd6e 100644 --- a/src/depth_image_processing.cpp +++ b/src/depth_image_processing.cpp @@ -3,20 +3,23 @@ // projection. #include "valve_detection/depth_image_processing.hpp" #include +#include #include #include +#include +#include namespace valve_detection { // Back-projects a depth pixel (u, v, depth) to a 3D point using camera // intrinsics. -void project_pixel_to_point(int u, - int v, - float depth, - double fx, - double fy, - double cx, - double cy, +void project_pixel_to_point(const int u, + const int v, + const float depth, + const double fx, + const double fy, + const double cx, + const double cy, pcl::PointXYZ& out) { if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); @@ -32,21 +35,27 @@ void project_pixel_to_point(int u, void extract_annulus_pcl(const cv::Mat& depth_image, const BoundingBox& bbox, const ImageProperties& img_props, - float annulus_radius_ratio, + const float annulus_radius_ratio, pcl::PointCloud::Ptr& out) { out->clear(); const float cx = bbox.center_x; const float cy = bbox.center_y; + // Outer ellipse half-extents match the bounding box. const float outer_rx = bbox.size_x * 0.5f; const float outer_ry = bbox.size_y * 0.5f; + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. if (outer_rx < 2.0f || outer_ry < 2.0f) return; + // Inner ellipse is scaled down by annulus_radius_ratio, creating a ring + // that avoids the central hub and samples only the valve rim. const float inner_rx = outer_rx * annulus_radius_ratio; const float inner_ry = outer_ry * annulus_radius_ratio; + // Bounding rectangle of the outer ellipse in pixel space. const int u0 = static_cast(std::floor(cx - outer_rx)); const int u1 = static_cast(std::ceil(cx + outer_rx)); const int v0 = static_cast(std::floor(cy - outer_ry)); @@ -59,10 +68,14 @@ void extract_annulus_pcl(const cv::Mat& depth_image, if (u < 0 || u >= depth_image.cols) continue; + // Normalise pixel offset by the outer half-extents; the point is + // inside the outer ellipse when the sum of squares ≤ 1. const float dxo = (u - cx) / outer_rx; const float dyo = (v - cy) / outer_ry; const bool inside_outer = (dxo * dxo + dyo * dyo) <= 1.0f; + // Same test for the inner ellipse; keep only points outside it + // to form the annular ring. const float dxi = (u - cx) / inner_rx; const float dyi = (v - cy) / inner_ry; const bool outside_inner = (dxi * dxi + dyi * dyi) > 1.0f; @@ -70,12 +83,14 @@ void extract_annulus_pcl(const cv::Mat& depth_image, if (!inside_outer || !outside_inner) continue; + // Back-project the depth pixel to a 3-D point in camera space. const float z = depth_image.at(v, u); pcl::PointXYZ p; project_pixel_to_point(u, v, z, img_props.intr.fx, img_props.intr.fy, img_props.intr.cx, img_props.intr.cy, p); + // Discard invalid points (zero/NaN depth produces NaN coords). if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { out->points.push_back(p); } @@ -87,23 +102,6 @@ void extract_annulus_pcl(const cv::Mat& depth_image, out->is_dense = false; } -// Returns hardcoded depth-to-color extrinsic for Intel RealSense D555. -// Verify with: ros2 topic echo /realsense/extrinsics/depth_to_color -DepthColorExtrinsic d555_depth_to_color_extrinsic() { - DepthColorExtrinsic extr; - // Both depth and color optical frames share the same body-to-optical - // rotation, so R is identity. - extr.R = Eigen::Matrix3f::Identity(); - // t = depth origin expressed in color optical frame. - // Color is 0.059 m in -y (rightward) of depth in body frame, so depth is - // +0.059 m in y (leftward) of color. Converting to optical frame: - // t_optical = R_body_to_optical * [0, +0.059, 0] - // = [[0,-1,0],[0,0,-1],[1,0,0]] * [0, 0.059, 0] - // = [-0.059, 0, 0] - extr.t = Eigen::Vector3f(-0.059f, 0.0f, 0.0f); - return extr; -} - // Like extract_annulus_pcl but transforms depth pixels into the color frame // before the annulus test. void extract_annulus_pcl_aligned(const cv::Mat& depth_image, @@ -111,24 +109,29 @@ void extract_annulus_pcl_aligned(const cv::Mat& depth_image, const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extr, - float annulus_radius_ratio, + const float annulus_radius_ratio, pcl::PointCloud::Ptr& out) { out->clear(); + // Annulus defined in color-image coordinates. const float cx_c = color_bbox.center_x; const float cy_c = color_bbox.center_y; const float outer_rx = color_bbox.size_x * 0.5f; const float outer_ry = color_bbox.size_y * 0.5f; + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. if (outer_rx < 2.0f || outer_ry < 2.0f) return; + // Inner ellipse scaled down by annulus_radius_ratio, forming the ring. const float inner_rx = outer_rx * annulus_radius_ratio; const float inner_ry = outer_ry * annulus_radius_ratio; - // Approximate scale between depth and color focal lengths to define a - // coarse search region in depth-image coordinates. A 30-pixel margin - // accounts for the lateral offset introduced by the extrinsic. + // The depth and color images have different resolutions and focal lengths. + // Scale color-space bbox bounds into depth-image coordinates to get a + // coarse candidate region, then add a margin to cover the lateral shift + // introduced by the depth-to-color translation. const float scale = (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) ? static_cast(depth_props.intr.fx / color_props.intr.fx) @@ -152,7 +155,7 @@ void extract_annulus_pcl_aligned(const cv::Mat& depth_image, if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) continue; - // Back-project using depth intrinsics. + // Back-project depth pixel to 3-D point in depth camera frame. Eigen::Vector3f P_d; P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / depth_props.intr.fx); @@ -160,28 +163,32 @@ void extract_annulus_pcl_aligned(const cv::Mat& depth_image, depth_props.intr.fy); P_d.z() = z_d; - // Transform into color camera frame. + // Apply extrinsic to move the point into the color camera frame, + // where the annulus is defined. const Eigen::Vector3f P_c = extr.R * P_d + extr.t; if (P_c.z() <= 0.0f) continue; - // Project onto color image plane. + // Project the color-frame point onto the color image plane. const float u_c = static_cast( color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); const float v_c = static_cast( color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); - // Elliptic annulus test in color-image space. + // Normalise offset by outer half-extents; keep only points inside + // the outer ellipse (sum of squares ≤ 1). const float dxo = (u_c - cx_c) / outer_rx; const float dyo = (v_c - cy_c) / outer_ry; if (dxo * dxo + dyo * dyo > 1.0f) continue; // outside outer ellipse + // Discard points inside the inner ellipse to form the ring. const float dxi = (u_c - cx_c) / inner_rx; const float dyi = (v_c - cy_c) / inner_ry; if (dxi * dxi + dyi * dyi <= 1.0f) continue; // inside inner ellipse + // Store the point in the color camera frame. pcl::PointXYZ p; p.x = P_c.x(); p.y = P_c.y(); @@ -277,9 +284,9 @@ void extract_bbox_pcl_aligned(const cv::Mat& depth_image, // Projects a color image pixel to depth image coordinates using the full // intrinsic + extrinsic pipeline. -cv::Point2f project_color_pixel_to_depth(float u_c, - float v_c, - float Z, +cv::Point2f project_color_pixel_to_depth(const float u_c, + const float v_c, + const float Z, const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extr) { @@ -306,4 +313,100 @@ cv::Point2f project_color_pixel_to_depth(float u_c, return {u_d, v_d}; } +// Corrects the bbox center for lens distortion using the given intrinsics. +BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, + 0, intr.fy, intr.cy, + 0, 0, 1); + const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], + intr.dist[2], intr.dist[3], + intr.dist[4]); + // Build a RotatedRect and extract all 4 corners. + const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), + cv::Size2f(bbox.size_x, bbox.size_y), angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + + // Undistort all 4 corners. + std::vector pts(corners, corners + 4); + std::vector undistorted; + cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); + + // Refit an OBB to the undistorted corners. + cv::RotatedRect fitted = cv::minAreaRect(undistorted); + + BoundingBox result = bbox; + result.center_x = fitted.center.x; + result.center_y = fitted.center.y; + result.size_x = fitted.size.width; + result.size_y = fitted.size.height; + result.theta = fitted.angle * static_cast(M_PI) / 180.0f; + return result; +} + +// Greedy NMS: sorts by score descending, keeps at most 2 non-overlapping boxes. +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold) { + const size_t n = scored_boxes.size(); + if (n == 0) + return {}; + + std::vector> order; + order.reserve(n); + for (size_t i = 0; i < n; ++i) + order.emplace_back(scored_boxes[i].first, i); + std::sort(order.begin(), order.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < order.size() && kept.size() < 2; ++si) { + const size_t i = order[si].second; + if (suppressed[i]) + continue; + + kept.push_back(i); + + const BoundingBox& bi = scored_boxes[i].second; + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + for (size_t sj = si + 1; sj < order.size(); ++sj) { + const size_t j = order[sj].second; + if (suppressed[j]) + continue; + + const BoundingBox& bj = scored_boxes[j].second; + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) + continue; + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = inter / std::min(ai, aj); + + if (iou > iou_duplicate_threshold || iom > 0.7f) + suppressed[j] = true; + } + } + + return kept; +} + } // namespace valve_detection diff --git a/src/pose_estimator.cpp b/src/pose_estimator.cpp index d899857..112fc78 100644 --- a/src/pose_estimator.cpp +++ b/src/pose_estimator.cpp @@ -120,6 +120,8 @@ Eigen::Vector3f PoseEstimator::compute_plane_normal( Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& ray_direction) const { + // A plane ax+by+cz+d=0 has 4 coefficients: [a, b, c, d]. + // We need all 4 here: [a,b,c] for the normal and d for the offset. if (!coefficients || coefficients->values.size() < 4) return Eigen::Vector3f::Zero(); @@ -204,10 +206,9 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix( // Extracts a point cloud from the depth image, fits a plane, and returns the // valve pose. -bool PoseEstimator::compute_pose_from_depth( +PoseResult PoseEstimator::compute_pose_from_depth( const cv::Mat& depth_image, const BoundingBox& bbox_org, - Pose& out_pose, pcl::PointCloud::Ptr annulus_dbg, pcl::PointCloud::Ptr plane_dbg, bool debug_visualize) const { @@ -224,7 +225,7 @@ bool PoseEstimator::compute_pose_from_depth( } if (cloud->points.size() < 4) - return false; + return {}; if (debug_visualize && annulus_dbg) *annulus_dbg += *cloud; @@ -232,7 +233,7 @@ bool PoseEstimator::compute_pose_from_depth( pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); pcl::PointIndices::Ptr inliers(new pcl::PointIndices); if (!segment_plane(cloud, coeff, inliers)) - return false; + return {}; if (debug_visualize && plane_dbg) { for (int idx : inliers->indices) @@ -245,18 +246,42 @@ bool PoseEstimator::compute_pose_from_depth( const Eigen::Vector3f ray = get_ray_direction(bbox_org); const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); if (normal.isZero()) - return false; + return {}; const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); if (pos.isZero()) - return false; + return {}; - out_pose.position = shift_point_along_normal(pos, normal); + PoseResult out; + out.result.position = shift_point_along_normal(pos, normal); const Eigen::Matrix3f rot = create_rotation_matrix(coeff, normal, bbox_org.theta); - out_pose.orientation = Eigen::Quaternionf(rot).normalized(); + out.result.orientation = Eigen::Quaternionf(rot).normalized(); + + // The aligned path (has_depth_props_) produces the pose in the color camera + // frame. Transform it into the depth camera frame so the published frame_id + // matches the depth optical frame. + // P_depth = R^T * (P_color - t) + // R_depth = R^T * R_color + if (has_depth_props_) { + const Eigen::Matrix3f R_dc = depth_color_extrinsic_.R.transpose(); + out.result.position = + R_dc * (out.result.position - depth_color_extrinsic_.t); + out.result.orientation = Eigen::Quaternionf(R_dc * rot).normalized(); + } + + out.result_valid = true; + return out; + + // Currently code: + // 1. Transform N depth points into color frame (in extract_bbox_pcl_aligned) + // 2. Fit the plane in color frame + // 3. Transform 1 result point back to depth frame - return true; + // The more optimal approach would be to work entirely in depth frame from the start: + // 1. Map the bbox from color → depth (a single transform of a few corners) + // 2. Extract points and fit the plane directly in depth frame + // 3. Cast the ray using depth intrinsics — no back-and-forth needed } } // namespace valve_detection diff --git a/src/ros_utils.cpp b/src/ros_utils.cpp new file mode 100644 index 0000000..04cdc03 --- /dev/null +++ b/src/ros_utils.cpp @@ -0,0 +1,80 @@ +#include "valve_detection_ros/ros_utils.hpp" + +#include +#include +#include "vortex_msgs/msg/landmark.hpp" + +namespace valve_detection { + +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b) { + BoundingBox o; + o.center_x = static_cast(b.center.position.x); + o.center_y = static_cast(b.center.position.y); + o.size_x = static_cast(b.size_x); + o.size_y = static_cast(b.size_y); + o.theta = static_cast(b.center.theta); // radians + return o; +} + +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, const std_msgs::msg::Header& header) { + geometry_msgs::msg::PoseArray msg; + msg.header = header; + msg.poses.reserve(poses.size()); + for (const auto& p : poses) { + geometry_msgs::msg::Pose po; + po.position.x = p.position.x(); + po.position.y = p.position.y(); + po.position.z = p.position.z(); + po.orientation.x = p.orientation.x(); + po.orientation.y = p.orientation.y(); + po.orientation.z = p.orientation.z(); + po.orientation.w = p.orientation.w(); + msg.poses.push_back(po); + } + return msg; +} + +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, const std_msgs::msg::Header& header, + int type, int subtype) { + vortex_msgs::msg::LandmarkArray out; + out.header = header; + out.landmarks.reserve(poses.size()); + for (size_t i = 0; i < poses.size(); ++i) { + vortex_msgs::msg::Landmark lm; + lm.header = header; + lm.id = static_cast(i); + lm.type.value = type; + lm.subtype.value = subtype; + lm.pose.pose.position.x = poses[i].position.x(); + lm.pose.pose.position.y = poses[i].position.y(); + lm.pose.pose.position.z = poses[i].position.z(); + lm.pose.pose.orientation.x = poses[i].orientation.x(); + lm.pose.pose.orientation.y = poses[i].orientation.y(); + lm.pose.pose.orientation.z = poses[i].orientation.z(); + lm.pose.pose.orientation.w = poses[i].orientation.w(); + out.landmarks.push_back(lm); + } + return out; +} + +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) { + cv::Mat depth_img; + // RealSense publishes depth as 16UC1 (uint16 millimetres). + // cv_bridge type-casts without scaling, so we must divide by 1000. + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || + depth->encoding == "16UC1") { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "16UC1"); + cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); + } else { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "32FC1"); + depth_img = cv_depth->image.clone(); + } + return depth_img; +} + +} // namespace valve_detection diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index d3a7788..c3f731f 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -1,54 +1,36 @@ // valve_pose_ros.cpp -// ROS node: subscribes to depth, color, and detections; publishes valve poses +// ROS node: subscribes to depth and detections; publishes valve poses // and visualizations. #include "valve_detection_ros/valve_pose_ros.hpp" +#include "valve_detection_ros/ros_utils.hpp" -#include -#include #include -#include -#include #include namespace valve_detection { using std::placeholders::_1; using std::placeholders::_2; -using std::placeholders::_3; -// Declares all ROS parameters, initializes the pose estimator, and sets up -// subscribers and publishers. ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) : Node("valve_pose_node", options) { - const auto depth_topic = - declare_parameter("depth_image_sub_topic"); - const auto color_topic = - declare_parameter("color_image_sub_topic"); - const auto det_topic = - declare_parameter("detections_sub_topic"); - const auto depth_info_topic = - declare_parameter("depth_image_info_topic"); + debug_visualize_ = declare_parameter("debug_visualize"); + iou_duplicate_threshold_ = static_cast( + declare_parameter("iou_duplicate_threshold")); + output_frame_id_ = declare_parameter("output_frame_id"); + landmark_type_ = declare_parameter("landmark_type"); + landmark_subtype_ = declare_parameter("landmark_subtype"); - const auto pose_stamped_topic = - declare_parameter("valve_pose_stamped_pub_topic"); - const auto pose_topic = - declare_parameter("valve_poses_pub_topic"); - const auto lm_topic = declare_parameter("landmarks_pub_topic"); - const auto img_topic = - declare_parameter("processed_image_pub_topic"); - const auto depth_color_topic = - declare_parameter("depth_colormap_pub_topic"); - const auto valve_points_topic = - declare_parameter("valve_points_pub_topic"); - const auto depth_cloud_topic = - declare_parameter("depth_cloud_pub_topic"); + setup_estimator(); - depth_colormap_vmin_ = static_cast( - declare_parameter("depth_colormap_value_min")); - depth_colormap_vmax_ = static_cast( - declare_parameter("depth_colormap_value_max")); + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + setup_publishers(qos); + setup_subscribers(qos); +} +void ValvePoseNode::setup_estimator() { const int yolo_w = declare_parameter("yolo_img_width"); const int yolo_h = declare_parameter("yolo_img_height"); const float annulus_ratio = @@ -59,20 +41,16 @@ ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) declare_parameter("plane_ransac_max_iterations"); const float handle_off = declare_parameter("valve_handle_offset"); - use_color_image_ = declare_parameter("use_color_image"); - visualize_detections_ = declare_parameter("visualize_detections"); - debug_visualize_ = declare_parameter("debug_visualize"); - iou_duplicate_threshold_ = static_cast( - declare_parameter("iou_duplicate_threshold")); - output_frame_id_ = declare_parameter("output_frame_id"); - - landmark_type_ = declare_parameter("landmark_type"); - landmark_subtype_ = declare_parameter("landmark_subtype"); - detector_ = std::make_unique( yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); - depth_color_extrinsic_ = d555_depth_to_color_extrinsic(); + depth_color_extrinsic_.R = Eigen::Matrix3f::Identity(); + depth_color_extrinsic_.t.x() = + static_cast(declare_parameter("depth_to_color_tx")); + depth_color_extrinsic_.t.y() = + static_cast(declare_parameter("depth_to_color_ty")); + depth_color_extrinsic_.t.z() = + static_cast(declare_parameter("depth_to_color_tz")); detector_->set_depth_color_extrinsic(depth_color_extrinsic_); color_props_.intr.fx = declare_parameter("color_fx"); @@ -81,27 +59,18 @@ ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) color_props_.intr.cy = declare_parameter("color_cy"); color_props_.dim.width = declare_parameter("color_image_width"); color_props_.dim.height = declare_parameter("color_image_height"); - + color_props_.intr.dist[0] = declare_parameter("color_d1"); + color_props_.intr.dist[1] = declare_parameter("color_d2"); + color_props_.intr.dist[2] = declare_parameter("color_d3"); + color_props_.intr.dist[3] = declare_parameter("color_d4"); + color_props_.intr.dist[4] = declare_parameter("color_d5"); detector_->set_color_image_properties(color_props_); detector_->calculate_letterbox_padding(); - - cv_camera_matrix_ = (cv::Mat_(3, 3) << color_props_.intr.fx, 0, - color_props_.intr.cx, 0, color_props_.intr.fy, - color_props_.intr.cy, 0, 0, 1); - - cv_dist_coeffs_ = - (cv::Mat_(5, 1) << declare_parameter("color_d1"), - declare_parameter("color_d2"), - declare_parameter("color_d3"), - declare_parameter("color_d4"), - declare_parameter("color_d5")); - - got_info_ = true; - RCLCPP_INFO( - get_logger(), - "Color intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - color_props_.intr.fx, color_props_.intr.fy, color_props_.intr.cx, - color_props_.intr.cy); + RCLCPP_INFO(get_logger(), + "Color intrinsics loaded from config as fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, + color_props_.intr.cx, color_props_.intr.cy); depth_props_.intr.fx = declare_parameter("depth_fx"); depth_props_.intr.fy = declare_parameter("depth_fy"); @@ -109,70 +78,110 @@ ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) depth_props_.intr.cy = declare_parameter("depth_cy"); depth_props_.dim.width = declare_parameter("depth_image_width"); depth_props_.dim.height = declare_parameter("depth_image_height"); - detector_->set_depth_image_properties(depth_props_); - got_depth_info_ = true; - RCLCPP_INFO( - get_logger(), - "Depth intrinsics loaded from config (fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - depth_props_.intr.fx, depth_props_.intr.fy, depth_props_.intr.cx, - depth_props_.intr.cy); + RCLCPP_INFO(get_logger(), + "Depth intrinsics loaded from config as fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + depth_props_.intr.fx, depth_props_.intr.fy, + depth_props_.intr.cx, depth_props_.intr.cy); +} - const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); +void ValvePoseNode::setup_publishers(const rclcpp::QoS& qos) { + const auto lm_topic = declare_parameter("landmarks_pub_topic"); + landmark_pub_ = + create_publisher(lm_topic, qos); + + if (!debug_visualize_) + return; + + const auto pose_topic = + declare_parameter("debug.valve_poses_pub_topic"); + const auto depth_cloud_topic = + declare_parameter("debug.depth_cloud_pub_topic"); + const auto depth_color_topic = + declare_parameter("debug.depth_colormap_pub_topic"); + const auto ann_topic = + declare_parameter("debug.annulus_pub_topic"); + const auto pln_topic = + declare_parameter("debug.plane_pub_topic"); + depth_colormap_vmin_ = static_cast( + declare_parameter("debug.depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast( + declare_parameter("debug.depth_colormap_value_max")); + + pose_pub_ = + create_publisher(pose_topic, qos); + depth_cloud_pub_ = create_publisher( + depth_cloud_topic, qos); + depth_colormap_pub_ = + create_publisher(depth_color_topic, qos); + annulus_pub_ = + create_publisher(ann_topic, qos); + plane_pub_ = + create_publisher(pln_topic, qos); +} + +void ValvePoseNode::setup_subscribers(const rclcpp::QoS& qos) { + const auto depth_topic = + declare_parameter("depth_image_sub_topic"); + const auto det_topic = + declare_parameter("detections_sub_topic"); + const auto depth_info_topic = + declare_parameter("depth_image_info_topic"); + const auto color_info_topic = + declare_parameter("color_image_info_topic"); const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); - // Color camera_info is not subscribed — intrinsics and distortion are - // always taken from config params above. + color_cam_info_sub_ = create_subscription( + color_info_topic, info_qos, + std::bind(&ValvePoseNode::color_camera_info_cb, this, _1)); depth_cam_info_sub_ = create_subscription( depth_info_topic, info_qos, std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); - pose_stamped_pub_ = create_publisher( - pose_stamped_topic, qos); - pose_pub_ = - create_publisher(pose_topic, qos); - landmark_pub_ = - create_publisher(lm_topic, qos); - image_pub_ = create_publisher(img_topic, qos); - depth_colormap_pub_ = - create_publisher(depth_color_topic, qos); - valve_points_pub_ = create_publisher( - valve_points_topic, qos); - depth_cloud_pub_ = - create_publisher(depth_cloud_topic, qos); - - if (debug_visualize_) { - const auto ann_topic = - declare_parameter("debug_annulus_pub_topic"); - const auto pln_topic = - declare_parameter("debug_plane_pub_topic"); - annulus_pub_ = - create_publisher(ann_topic, qos); - plane_pub_ = - create_publisher(pln_topic, qos); - } - depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); - color_sub_.subscribe(this, color_topic, qos.get_rmw_qos_profile()); det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); sync_ = std::make_shared>( - SyncPolicy(10), depth_sub_, color_sub_, det_sub_); + SyncPolicy(10), depth_sub_, det_sub_); sync_->registerCallback( - std::bind(&ValvePoseNode::sync_cb, this, _1, _2, _3)); + std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); +} + +// One-shot callback that overrides color intrinsics and distortion from the +// camera_info topic. +void ValvePoseNode::color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + color_props_.intr.fx = msg->k[0]; + color_props_.intr.fy = msg->k[4]; + color_props_.intr.cx = msg->k[2]; + color_props_.intr.cy = msg->k[5]; + color_props_.dim.width = static_cast(msg->width); + color_props_.dim.height = static_cast(msg->height); + + if (msg->d.size() >= 5) { + for (size_t i = 0; i < 5; ++i) + color_props_.intr.dist[i] = msg->d[i]; + } + + detector_->set_color_image_properties(color_props_); + detector_->calculate_letterbox_padding(); + + color_cam_info_sub_.reset(); // one-shot + RCLCPP_INFO(get_logger(), + "Color camera_info received, overriding config fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, + color_props_.intr.cx, color_props_.intr.cy); } // One-shot callback that overrides depth intrinsics from the camera_info topic. void ValvePoseNode::depth_camera_info_cb( const sensor_msgs::msg::CameraInfo::SharedPtr msg) { - if (got_depth_info_) - return; - depth_props_.intr.fx = msg->k[0]; depth_props_.intr.fy = msg->k[4]; depth_props_.intr.cx = msg->k[2]; @@ -182,177 +191,22 @@ void ValvePoseNode::depth_camera_info_cb( detector_->set_depth_image_properties(depth_props_); - got_depth_info_ = true; depth_cam_info_sub_.reset(); // one-shot RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", depth_props_.intr.fx, depth_props_.intr.fy); } -// Converts a ROS BoundingBox2D message to the internal BoundingBox struct. -BoundingBox ValvePoseNode::to_bbox( - const vision_msgs::msg::BoundingBox2D& b) const { - BoundingBox o; - o.center_x = static_cast(b.center.position.x); - o.center_y = static_cast(b.center.position.y); - o.size_x = static_cast(b.size_x); - o.size_y = static_cast(b.size_y); - o.theta = static_cast(b.center.theta); // radians - return o; -} - -// Corrects the bbox center for lens distortion using the color camera matrix. -BoundingBox ValvePoseNode::undistort_bbox(const BoundingBox& bbox) const { - if (cv_camera_matrix_.empty() || cv_dist_coeffs_.empty()) - return bbox; - - std::vector pts{{bbox.center_x, bbox.center_y}}; - std::vector undistorted; - cv::undistortPoints(pts, undistorted, cv_camera_matrix_, cv_dist_coeffs_, - cv::noArray(), cv_camera_matrix_); - - BoundingBox result = bbox; - result.center_x = undistorted[0].x; - result.center_y = undistorted[0].y; - return result; -} - -// Greedy NMS: sorts detections by score, keeps at most 2 non-overlapping boxes. -std::vector ValvePoseNode::filter_duplicate_detections( - const vision_msgs::msg::Detection2DArray& det) const { - const size_t n = det.detections.size(); - if (n == 0) - return {}; - - // Collect (score, index) pairs – use bbox area when no score is available. - std::vector> scored; - scored.reserve(n); - for (size_t i = 0; i < n; ++i) { - float score = 0.0f; - if (!det.detections[i].results.empty()) { - score = static_cast( - det.detections[i].results[0].hypothesis.score); - } else { - const auto& b = det.detections[i].bbox; - score = static_cast(b.size_x * b.size_y); - } - scored.emplace_back(score, i); - } - std::sort(scored.begin(), scored.end(), - [](const auto& a, const auto& b) { return a.first > b.first; }); - - // Greedy NMS – keep at most 2. - std::vector kept; - std::vector suppressed(n, false); - - for (size_t si = 0; si < scored.size() && kept.size() < 2; ++si) { - const size_t i = scored[si].second; - if (suppressed[i]) - continue; - - kept.push_back(i); - - const BoundingBox bi = to_bbox(det.detections[i].bbox); - const float ai = bi.size_x * bi.size_y; - const float bx1i = bi.center_x - bi.size_x * 0.5f; - const float by1i = bi.center_y - bi.size_y * 0.5f; - const float bx2i = bi.center_x + bi.size_x * 0.5f; - const float by2i = bi.center_y + bi.size_y * 0.5f; - - for (size_t sj = si + 1; sj < scored.size(); ++sj) { - const size_t j = scored[sj].second; - if (suppressed[j]) - continue; - - const BoundingBox bj = to_bbox(det.detections[j].bbox); - const float aj = bj.size_x * bj.size_y; - const float bx1j = bj.center_x - bj.size_x * 0.5f; - const float by1j = bj.center_y - bj.size_y * 0.5f; - const float bx2j = bj.center_x + bj.size_x * 0.5f; - const float by2j = bj.center_y + bj.size_y * 0.5f; - - const float ix1 = std::max(bx1i, bx1j); - const float iy1 = std::max(by1i, by1j); - const float ix2 = std::min(bx2i, bx2j); - const float iy2 = std::min(by2i, by2j); - - if (ix2 <= ix1 || iy2 <= iy1) - continue; // no overlap - - const float inter = (ix2 - ix1) * (iy2 - iy1); - const float iou = inter / (ai + aj - inter); - const float iom = - inter / std::min(ai, aj); // intersection over minimum - - if (iou > iou_duplicate_threshold_ || iom > 0.7f) { - suppressed[j] = true; - } - } - } - - return kept; -} - -// Packs poses into a PoseArray message and publishes it. -void ValvePoseNode::publish_pose_array(const std::vector& poses, - const std_msgs::msg::Header& header) { - geometry_msgs::msg::PoseArray msg; - msg.header = header; - msg.poses.reserve(poses.size()); - for (const auto& p : poses) { - geometry_msgs::msg::Pose po; - po.position.x = p.position.x(); - po.position.y = p.position.y(); - po.position.z = p.position.z(); - po.orientation.x = p.orientation.x(); - po.orientation.y = p.orientation.y(); - po.orientation.z = p.orientation.z(); - po.orientation.w = p.orientation.w(); - msg.poses.push_back(po); - } - pose_pub_->publish(msg); -} - -// Packs poses into a LandmarkArray message with type/subtype fields and -// publishes it. -void ValvePoseNode::publish_landmarks(const std::vector& poses, - const std_msgs::msg::Header& header) { - vortex_msgs::msg::LandmarkArray out; - out.header = header; - out.landmarks.reserve(poses.size()); - - for (size_t i = 0; i < poses.size(); ++i) { - vortex_msgs::msg::Landmark lm; - lm.header = header; - lm.id = static_cast(i); - lm.type.value = landmark_type_; - lm.subtype.value = landmark_subtype_; - - lm.pose.pose.position.x = poses[i].position.x(); - lm.pose.pose.position.y = poses[i].position.y(); - lm.pose.pose.position.z = poses[i].position.z(); - lm.pose.pose.orientation.x = poses[i].orientation.x(); - lm.pose.pose.orientation.y = poses[i].orientation.y(); - lm.pose.pose.orientation.z = poses[i].orientation.z(); - lm.pose.pose.orientation.w = poses[i].orientation.w(); - - out.landmarks.push_back(lm); - } - landmark_pub_->publish(out); -} - // Main synchronized callback: runs NMS, estimates poses, and publishes all // outputs. void ValvePoseNode::sync_cb( const sensor_msgs::msg::Image::ConstSharedPtr& depth, - const sensor_msgs::msg::Image::ConstSharedPtr& color, const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) { - if (!got_info_) - return; if (!depth || !det) return; cv::Mat depth_color; const bool publish_colormap = + debug_visualize_ && depth_colormap_pub_ && depth_colormap_pub_->get_subscription_count() > 0; if (publish_colormap) { cv_bridge::CvImageConstPtr cv_raw = @@ -367,110 +221,67 @@ void ValvePoseNode::sync_cb( cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); } - cv::Mat color_vis; - const bool publish_image = - color && image_pub_->get_subscription_count() > 0; - if (publish_image) { - cv_bridge::CvImageConstPtr cv_color = - cv_bridge::toCvShare(color, "bgr8"); - color_vis = cv_color->image.clone(); - } - if (det->detections.empty()) { - publish_pose_array({}, depth->header); - publish_landmarks({}, depth->header); + // Publish empty arrays to clear stale data from previous detections. + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array({}, depth->header)); + landmark_pub_->publish( + make_landmark_array({}, depth->header, landmark_type_, + landmark_subtype_)); if (publish_colormap) { depth_colormap_pub_->publish( *cv_bridge::CvImage(depth->header, "bgr8", depth_color) .toImageMsg()); } - if (publish_image) { - image_pub_->publish( - *cv_bridge::CvImage(color->header, "bgr8", color_vis) - .toImageMsg()); - } return; } - const std::vector kept = filter_duplicate_detections(*det); - - // RealSense publishes depth as 16UC1 (uint16 millimetres). - // cv_bridge type-casts without scaling, so we must divide by 1000. - cv::Mat depth_img; - if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || - depth->encoding == "16UC1") { - cv_bridge::CvImageConstPtr cv_depth = - cv_bridge::toCvShare(depth, "16UC1"); - cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); - } else { - cv_bridge::CvImageConstPtr cv_depth = - cv_bridge::toCvShare(depth, "32FC1"); - depth_img = cv_depth->image.clone(); + std::vector> scored_boxes; + scored_boxes.reserve(det->detections.size()); + for (const auto& d : det->detections) { + float score = 0.0f; + if (!d.results.empty()) { + score = static_cast(d.results[0].hypothesis.score); + } else { + score = static_cast(d.bbox.size_x * d.bbox.size_y); + } + scored_boxes.emplace_back(score, to_bbox(d.bbox)); } + const std::vector kept = + filter_duplicate_detections(scored_boxes, iou_duplicate_threshold_); + + const cv::Mat depth_img = decode_depth_to_float(depth); pcl::PointCloud::Ptr ann_dbg( new pcl::PointCloud); pcl::PointCloud::Ptr pln_dbg( new pcl::PointCloud); - std::vector kept_boxes; - std::vector - raw_boxes; // raw detection coords for depth colormap + std::vector raw_boxes; std::vector poses; for (size_t idx : kept) { - // Parse bounding box (YOLO-space), map to original image, then - // undistort. - BoundingBox yolo_box = to_bbox(det->detections[idx].bbox); - BoundingBox org_box = yolo_box; - org_box = undistort_bbox(org_box); - + const BoundingBox& yolo_box = scored_boxes[idx].second; + BoundingBox org_box = undistort_bbox(yolo_box, color_props_.intr); raw_boxes.push_back(yolo_box); - kept_boxes.push_back(org_box); - Pose pose; - if (detector_->compute_pose_from_depth(depth_img, yolo_box, pose, - ann_dbg, pln_dbg, true)) { - poses.push_back(pose); - } + const auto pose_result = detector_->compute_pose_from_depth( + depth_img, org_box, ann_dbg, pln_dbg, true); + if (pose_result.result_valid) + poses.push_back(pose_result.result); } - if (depth_cloud_pub_->get_subscription_count() > 0) { + if (debug_visualize_ && depth_cloud_pub_ && + depth_cloud_pub_->get_subscription_count() > 0) { sensor_msgs::msg::PointCloud2 cloud_msg; pcl::toROSMsg(*ann_dbg, cloud_msg); - std_msgs::msg::Header cloud_header = depth->header; - if (!output_frame_id_.empty()) - cloud_header.frame_id = output_frame_id_; - cloud_msg.header = cloud_header; + cloud_msg.header = depth->header; depth_cloud_pub_->publish(cloud_msg); } - if (publish_image) { - for (size_t i = 0; i < kept_boxes.size(); ++i) { - const auto& box = kept_boxes[i]; - const float angle_deg = - box.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), - cv::Size2f(box.size_x, box.size_y), - angle_deg); - cv::Point2f corners[4]; - rrect.points(corners); - for (int j = 0; j < 4; ++j) { - cv::line(color_vis, corners[j], corners[(j + 1) % 4], - cv::Scalar(0, 255, 0), 2); - } - } - image_pub_->publish( - *cv_bridge::CvImage(color->header, "bgr8", color_vis).toImageMsg()); - } - if (publish_colormap) { for (size_t i = 0; i < raw_boxes.size(); ++i) { const auto& box = raw_boxes[i]; - - // Project each OBB corner from color image space to depth image - // space using the full intrinsic + extrinsic pipeline. Fall back to - // the raw color pixel when no valid pose depth is available. const float Z = (i < poses.size() && poses[i].position.z() > 0.0f) ? poses[i].position.z() : 0.0f; @@ -483,14 +294,13 @@ void ValvePoseNode::sync_cb( cv::Point2f corners[4]; rrect.points(corners); - if (Z > 0.0f && got_depth_info_) { + if (Z > 0.0f) { for (auto& c : corners) { c = project_color_pixel_to_depth(c.x, c.y, Z, color_props_, depth_props_, depth_color_extrinsic_); } } - for (int j = 0; j < 4; ++j) { cv::line(depth_color, corners[j], corners[(j + 1) % 4], cv::Scalar(0, 255, 0), 2); @@ -515,38 +325,11 @@ void ValvePoseNode::sync_cb( if (!output_frame_id_.empty()) pose_header.frame_id = output_frame_id_; - publish_pose_array(poses, pose_header); - publish_landmarks(poses, pose_header); - - // Publish valve center positions as a PointCloud2 for Foxglove 3D view. - if (valve_points_pub_->get_subscription_count() > 0) { - pcl::PointCloud pts; - for (const auto& p : poses) { - pts.points.push_back( - {p.position.x(), p.position.y(), p.position.z()}); - } - pts.width = static_cast(pts.points.size()); - pts.height = 1; - pts.is_dense = true; - sensor_msgs::msg::PointCloud2 pc_msg; - pcl::toROSMsg(pts, pc_msg); - pc_msg.header = pose_header; - valve_points_pub_->publish(pc_msg); - } - - // Publish best (first) detection as PoseStamped for easy Foxglove 3D view. - if (!poses.empty()) { - geometry_msgs::msg::PoseStamped ps; - ps.header = pose_header; - ps.pose.position.x = poses[0].position.x(); - ps.pose.position.y = poses[0].position.y(); - ps.pose.position.z = poses[0].position.z(); - ps.pose.orientation.x = poses[0].orientation.x(); - ps.pose.orientation.y = poses[0].orientation.y(); - ps.pose.orientation.z = poses[0].orientation.z(); - ps.pose.orientation.w = poses[0].orientation.w(); - pose_stamped_pub_->publish(ps); - } + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array(poses, pose_header)); + landmark_pub_->publish( + make_landmark_array(poses, pose_header, landmark_type_, + landmark_subtype_)); } } // namespace valve_detection From 19bd2e7c8af7a2d39af61de876dee18da7adef89 Mon Sep 17 00:00:00 2001 From: jens Date: Sat, 21 Mar 2026 19:28:49 +0100 Subject: [PATCH 28/29] Fix pre-commit hooks --- config/valve_detection_params.yaml | 1 - .../valve_detection/depth_image_processing.hpp | 3 ++- include/valve_detection_ros/ros_utils.hpp | 9 ++++++--- include/valve_detection_ros/valve_pose_ros.hpp | 5 +++-- launch/valve_detection.launch.py | 5 +++-- src/depth_image_processing.cpp | 11 +++++------ src/pose_estimator.cpp | 13 ++++++++----- src/ros_utils.cpp | 9 ++++++--- src/valve_pose_ros.cpp | 17 +++++++---------- 9 files changed, 40 insertions(+), 33 deletions(-) diff --git a/config/valve_detection_params.yaml b/config/valve_detection_params.yaml index 28fb9a4..296e0c8 100644 --- a/config/valve_detection_params.yaml +++ b/config/valve_detection_params.yaml @@ -123,4 +123,3 @@ # Must match the depth optical frame in the robot's TF tree. # See: auv_setup/description/moby.urdf.xacro output_frame_id: "moby/front_camera_depth_optical" - diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp index c045dff..4c11deb 100644 --- a/include/valve_detection/depth_image_processing.hpp +++ b/include/valve_detection/depth_image_processing.hpp @@ -60,7 +60,8 @@ cv::Point2f project_color_pixel_to_depth(const float u_c, const DepthColorExtrinsic& extr); // Returns undistorted copy of bbox (center_x/y corrected for lens distortion). -BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr); +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr); // Greedy NMS: returns indices of kept detections (max 2). // scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin diff --git a/include/valve_detection_ros/ros_utils.hpp b/include/valve_detection_ros/ros_utils.hpp index 5e23770..370f972 100644 --- a/include/valve_detection_ros/ros_utils.hpp +++ b/include/valve_detection_ros/ros_utils.hpp @@ -18,12 +18,15 @@ BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b); // Builds a PoseArray message from a list of poses. geometry_msgs::msg::PoseArray make_pose_array( - const std::vector& poses, const std_msgs::msg::Header& header); + const std::vector& poses, + const std_msgs::msg::Header& header); // Builds a LandmarkArray message from a list of poses. vortex_msgs::msg::LandmarkArray make_landmark_array( - const std::vector& poses, const std_msgs::msg::Header& header, - int type, int subtype); + const std::vector& poses, + const std_msgs::msg::Header& header, + int type, + int subtype); // Decodes a ROS depth image to a CV_32FC1 mat in metres. cv::Mat decode_depth_to_float( diff --git a/include/valve_detection_ros/valve_pose_ros.hpp b/include/valve_detection_ros/valve_pose_ros.hpp index 86acc83..8026267 100644 --- a/include/valve_detection_ros/valve_pose_ros.hpp +++ b/include/valve_detection_ros/valve_pose_ros.hpp @@ -25,8 +25,8 @@ #include #include #include "vortex_msgs/msg/landmark_array.hpp" -#include "vortex_msgs/msg/landmark_type.hpp" #include "vortex_msgs/msg/landmark_subtype.hpp" +#include "vortex_msgs/msg/landmark_type.hpp" namespace valve_detection { @@ -84,7 +84,8 @@ class ValvePoseNode : public rclcpp::Node { rclcpp::Publisher::SharedPtr depth_colormap_pub_; rclcpp::Publisher::SharedPtr annulus_pub_; rclcpp::Publisher::SharedPtr plane_pub_; - rclcpp::Publisher::SharedPtr depth_cloud_pub_; + rclcpp::Publisher::SharedPtr + depth_cloud_pub_; float depth_colormap_vmin_; float depth_colormap_vmax_; diff --git a/launch/valve_detection.launch.py b/launch/valve_detection.launch.py index ec247a0..06da9fa 100644 --- a/launch/valve_detection.launch.py +++ b/launch/valve_detection.launch.py @@ -1,11 +1,12 @@ import os from ament_index_python.packages import get_package_share_directory +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + from launch import LaunchDescription from launch.actions import DeclareLaunchArgument from launch.substitutions import LaunchConfiguration -from launch_ros.actions import ComposableNodeContainer -from launch_ros.descriptions import ComposableNode def generate_launch_description(): diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp index 2cabd6e..65068a7 100644 --- a/src/depth_image_processing.cpp +++ b/src/depth_image_processing.cpp @@ -314,13 +314,12 @@ cv::Point2f project_color_pixel_to_depth(const float u_c, } // Corrects the bbox center for lens distortion using the given intrinsics. -BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr) { - const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, - 0, intr.fy, intr.cy, - 0, 0, 1); +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, + intr.fy, intr.cy, 0, 0, 1); const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], - intr.dist[2], intr.dist[3], - intr.dist[4]); + intr.dist[2], intr.dist[3], intr.dist[4]); // Build a RotatedRect and extract all 4 corners. const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), diff --git a/src/pose_estimator.cpp b/src/pose_estimator.cpp index 112fc78..3640b20 100644 --- a/src/pose_estimator.cpp +++ b/src/pose_estimator.cpp @@ -259,8 +259,8 @@ PoseResult PoseEstimator::compute_pose_from_depth( out.result.orientation = Eigen::Quaternionf(rot).normalized(); // The aligned path (has_depth_props_) produces the pose in the color camera - // frame. Transform it into the depth camera frame so the published frame_id - // matches the depth optical frame. + // frame. Transform it into the depth camera frame so the published + // frame_id matches the depth optical frame. // P_depth = R^T * (P_color - t) // R_depth = R^T * R_color if (has_depth_props_) { @@ -274,12 +274,15 @@ PoseResult PoseEstimator::compute_pose_from_depth( return out; // Currently code: - // 1. Transform N depth points into color frame (in extract_bbox_pcl_aligned) + // 1. Transform N depth points into color frame (in + // extract_bbox_pcl_aligned) // 2. Fit the plane in color frame // 3. Transform 1 result point back to depth frame - // The more optimal approach would be to work entirely in depth frame from the start: - // 1. Map the bbox from color → depth (a single transform of a few corners) + // The more optimal approach would be to work entirely in depth frame from + // the start: + // 1. Map the bbox from color → depth (a single transform of a few + // corners) // 2. Extract points and fit the plane directly in depth frame // 3. Cast the ray using depth intrinsics — no back-and-forth needed } diff --git a/src/ros_utils.cpp b/src/ros_utils.cpp index 04cdc03..9f23681 100644 --- a/src/ros_utils.cpp +++ b/src/ros_utils.cpp @@ -17,7 +17,8 @@ BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b) { } geometry_msgs::msg::PoseArray make_pose_array( - const std::vector& poses, const std_msgs::msg::Header& header) { + const std::vector& poses, + const std_msgs::msg::Header& header) { geometry_msgs::msg::PoseArray msg; msg.header = header; msg.poses.reserve(poses.size()); @@ -36,8 +37,10 @@ geometry_msgs::msg::PoseArray make_pose_array( } vortex_msgs::msg::LandmarkArray make_landmark_array( - const std::vector& poses, const std_msgs::msg::Header& header, - int type, int subtype) { + const std::vector& poses, + const std_msgs::msg::Header& header, + int type, + int subtype) { vortex_msgs::msg::LandmarkArray out; out.header = header; out.landmarks.reserve(poses.size()); diff --git a/src/valve_pose_ros.cpp b/src/valve_pose_ros.cpp index c3f731f..4379d3e 100644 --- a/src/valve_pose_ros.cpp +++ b/src/valve_pose_ros.cpp @@ -111,8 +111,8 @@ void ValvePoseNode::setup_publishers(const rclcpp::QoS& qos) { pose_pub_ = create_publisher(pose_topic, qos); - depth_cloud_pub_ = create_publisher( - depth_cloud_topic, qos); + depth_cloud_pub_ = + create_publisher(depth_cloud_topic, qos); depth_colormap_pub_ = create_publisher(depth_color_topic, qos); annulus_pub_ = @@ -148,8 +148,7 @@ void ValvePoseNode::setup_subscribers(const rclcpp::QoS& qos) { sync_ = std::make_shared>( SyncPolicy(10), depth_sub_, det_sub_); - sync_->registerCallback( - std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); + sync_->registerCallback(std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); } // One-shot callback that overrides color intrinsics and distortion from the @@ -225,9 +224,8 @@ void ValvePoseNode::sync_cb( // Publish empty arrays to clear stale data from previous detections. if (debug_visualize_ && pose_pub_) pose_pub_->publish(make_pose_array({}, depth->header)); - landmark_pub_->publish( - make_landmark_array({}, depth->header, landmark_type_, - landmark_subtype_)); + landmark_pub_->publish(make_landmark_array( + {}, depth->header, landmark_type_, landmark_subtype_)); if (publish_colormap) { depth_colormap_pub_->publish( *cv_bridge::CvImage(depth->header, "bgr8", depth_color) @@ -327,9 +325,8 @@ void ValvePoseNode::sync_cb( if (debug_visualize_ && pose_pub_) pose_pub_->publish(make_pose_array(poses, pose_header)); - landmark_pub_->publish( - make_landmark_array(poses, pose_header, landmark_type_, - landmark_subtype_)); + landmark_pub_->publish(make_landmark_array( + poses, pose_header, landmark_type_, landmark_subtype_)); } } // namespace valve_detection From ee51daf8295a70459c618236c9b08a522d0ea179 Mon Sep 17 00:00:00 2001 From: jens Date: Mon, 23 Mar 2026 09:31:32 +0100 Subject: [PATCH 29/29] Refactor OBB extraction and depth frame processing for better optimization --- .../depth_image_processing.hpp | 21 ++- include/valve_detection/pose_estimator.hpp | 11 +- src/depth_image_processing.cpp | 108 +++++++++++- src/pose_estimator.cpp | 162 +++++++++++++----- 4 files changed, 252 insertions(+), 50 deletions(-) diff --git a/include/valve_detection/depth_image_processing.hpp b/include/valve_detection/depth_image_processing.hpp index 4c11deb..ae6e582 100644 --- a/include/valve_detection/depth_image_processing.hpp +++ b/include/valve_detection/depth_image_processing.hpp @@ -25,11 +25,10 @@ void extract_annulus_pcl( const float annulus_radius_ratio, // inner radius = outer*ratio pcl::PointCloud::Ptr& out); -// Like extract_annulus_pcl but with proper depth-to-color alignment. -// Iterates depth pixels, back-projects with depth intrinsics, applies the -// extrinsic transform, then checks whether the resulting color-frame -// projection falls inside the annulus. Output points are in the color -// camera frame. +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, then checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. void extract_annulus_pcl_aligned( const cv::Mat& depth_image, // CV_32FC1 meters, depth frame const BoundingBox& color_bbox, // annulus defined in color pixels @@ -49,6 +48,18 @@ void extract_bbox_pcl_aligned( const DepthColorExtrinsic& extrinsic, pcl::PointCloud::Ptr& out); +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + // Project a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). diff --git a/include/valve_detection/pose_estimator.hpp b/include/valve_detection/pose_estimator.hpp index bdc48e8..d734a3d 100644 --- a/include/valve_detection/pose_estimator.hpp +++ b/include/valve_detection/pose_estimator.hpp @@ -50,7 +50,8 @@ class PoseEstimator { const Eigen::Vector3f& ray_direction) const; Eigen::Vector3f find_ray_plane_intersection( const pcl::ModelCoefficients::Ptr& coeff, - const Eigen::Vector3f& ray_direction) const; + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin = Eigen::Vector3f::Zero()) const; Eigen::Vector3f shift_point_along_normal( const Eigen::Vector3f& intersection_point, const Eigen::Vector3f& plane_normal) const; @@ -58,6 +59,14 @@ class PoseEstimator { const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& plane_normal, float angle) const; + // Variant that works entirely in depth frame: color rays are rotated by + // R_dc = R^T before intersecting the depth-frame plane. + Eigen::Matrix3f create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_dc) const; ImageProperties color_image_properties_{}; ImageProperties depth_image_properties_{}; diff --git a/src/depth_image_processing.cpp b/src/depth_image_processing.cpp index 65068a7..2c3feae 100644 --- a/src/depth_image_processing.cpp +++ b/src/depth_image_processing.cpp @@ -102,8 +102,10 @@ void extract_annulus_pcl(const cv::Mat& depth_image, out->is_dense = false; } -// Like extract_annulus_pcl but transforms depth pixels into the color frame -// before the annulus test. +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, and checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. void extract_annulus_pcl_aligned(const cv::Mat& depth_image, const BoundingBox& color_bbox, const ImageProperties& color_props, @@ -282,6 +284,108 @@ void extract_bbox_pcl_aligned(const cv::Mat& depth_image, out->is_dense = false; } +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Sample a representative depth near the bbox center to project the color + // OBB corners into depth image space. The valve is roughly flat, so a + // single Z gives a good approximation for all 4 corners. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const int u_c_d = std::clamp(static_cast(color_bbox.center_x * scale), + 0, depth_image.cols - 1); + const int v_c_d = std::clamp(static_cast(color_bbox.center_y * scale), + 0, depth_image.rows - 1); + + float Z_est = 0.0f; + constexpr int kSampleRadius = 5; + for (int dv = -kSampleRadius; dv <= kSampleRadius && Z_est <= 0.0f; ++dv) { + for (int du = -kSampleRadius; du <= kSampleRadius && Z_est <= 0.0f; + ++du) { + const int u = std::clamp(u_c_d + du, 0, depth_image.cols - 1); + const int v = std::clamp(v_c_d + dv, 0, depth_image.rows - 1); + const float z = depth_image.at(v, u); + if (z > 0.0f && !std::isnan(z) && !std::isinf(z)) + Z_est = z; + } + } + if (Z_est <= 0.0f) + return; + + // Project the 4 color OBB corners into depth image coordinates. + const float angle_deg = + color_bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect color_rrect( + cv::Point2f(color_bbox.center_x, color_bbox.center_y), + cv::Size2f(color_bbox.size_x, color_bbox.size_y), angle_deg); + cv::Point2f color_corners[4]; + color_rrect.points(color_corners); + + std::vector depth_corners(4); + for (int i = 0; i < 4; ++i) { + depth_corners[i] = + project_color_pixel_to_depth(color_corners[i].x, color_corners[i].y, + Z_est, color_props, depth_props, extr); + } + + // Fit a rotated rect to the 4 projected corners in depth image space. + const cv::RotatedRect depth_rrect = cv::minAreaRect(depth_corners); + const float angle_d = depth_rrect.angle * static_cast(M_PI) / 180.0f; + const float cos_d = std::cos(angle_d); + const float sin_d = std::sin(angle_d); + const float half_w_d = depth_rrect.size.width * 0.5f; + const float half_h_d = depth_rrect.size.height * 0.5f; + const float cx_d = depth_rrect.center.x; + const float cy_d = depth_rrect.center.y; + + // Bounding rectangle of the depth OBB. + const float r_d = std::sqrt(half_w_d * half_w_d + half_h_d * half_h_d); + const int u0_d = std::max(0, static_cast(cx_d - r_d) - 1); + const int u1_d = + std::min(depth_image.cols - 1, static_cast(cx_d + r_d) + 1); + const int v0_d = std::max(0, static_cast(cy_d - r_d) - 1); + const int v1_d = + std::min(depth_image.rows - 1, static_cast(cy_d + r_d) + 1); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + // Test against the depth-image OBB — no matrix multiply. + const float dx = u_d - cx_d; + const float dy = v_d - cy_d; + if (std::abs(cos_d * dx + sin_d * dy) > half_w_d || + std::abs(-sin_d * dx + cos_d * dy) > half_h_d) + continue; + + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to 3D depth frame. + out->points.push_back( + {static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx), + static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy), + z_d}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + // Projects a color image pixel to depth image coordinates using the full // intrinsic + extrinsic pipeline. cv::Point2f project_color_pixel_to_depth(const float u_c, diff --git a/src/pose_estimator.cpp b/src/pose_estimator.cpp index 3640b20..82fcbdb 100644 --- a/src/pose_estimator.cpp +++ b/src/pose_estimator.cpp @@ -116,12 +116,15 @@ Eigen::Vector3f PoseEstimator::compute_plane_normal( return normal; } -// Finds the 3D point where the viewing ray intersects the fitted plane. +// Finds the 3D point where a ray intersects the fitted plane. +// ray_origin defaults to the camera origin (zero), which is correct for the +// color-frame path. Pass the actual origin when the ray does not start at +// the frame origin (e.g. the color camera origin expressed in depth frame). Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& ray_direction) const { + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin) const { // A plane ax+by+cz+d=0 has 4 coefficients: [a, b, c, d]. - // We need all 4 here: [a,b,c] for the normal and d for the offset. if (!coefficients || coefficients->values.size() < 4) return Eigen::Vector3f::Zero(); @@ -134,7 +137,8 @@ Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( if (std::abs(denom) < 1e-6f) return Eigen::Vector3f::Zero(); - return (-D / denom) * ray_direction; + const float lambda = -(plane_normal.dot(ray_origin) + D) / denom; + return ray_origin + lambda * ray_direction; } // Shifts a 3D point along the plane normal by the valve handle offset. @@ -204,6 +208,69 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix( return rot; } +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected +// bbox angle (X), working entirely in the depth camera frame. Color-image +// rays are rotated into depth frame before intersecting the plane, and +// ray_origin is the color camera origin expressed in depth frame. +Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_dc) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back-project color pixels to rays, then rotate into depth frame. + const Eigen::Vector3f r1 = (R_dc * (Kinv * p1)).normalized(); + const Eigen::Vector3f r2 = (R_dc * (Kinv * p2)).normalized(); + + // Intersect each ray (from color origin in depth frame) with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const float n_dot_O = z_axis.dot(ray_origin); + const Eigen::Vector3f X1 = ray_origin + (-(n_dot_O + D) / denom1) * r1; + const Eigen::Vector3f X2 = ray_origin + (-(n_dot_O + D) / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping between frames). + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; + rot.col(1) = y_axis; + rot.col(2) = z_axis; + return rot; +} + // Extracts a point cloud from the depth image, fits a plane, and returns the // valve pose. PoseResult PoseEstimator::compute_pose_from_depth( @@ -216,9 +283,13 @@ PoseResult PoseEstimator::compute_pose_from_depth( new pcl::PointCloud); if (has_depth_props_) { - extract_bbox_pcl_aligned(depth_image, bbox_org, color_image_properties_, - depth_image_properties_, - depth_color_extrinsic_, cloud); + // Extract points directly in the depth camera frame; only the OBB + // membership test is done in the color frame (a per-pixel projection + // that cannot be avoided without approximating the bbox in depth + // space). + extract_bbox_pcl_depth(depth_image, bbox_org, color_image_properties_, + depth_image_properties_, depth_color_extrinsic_, + cloud); } else { extract_annulus_pcl(depth_image, bbox_org, color_image_properties_, annulus_radius_ratio_, cloud); @@ -243,48 +314,55 @@ PoseResult PoseEstimator::compute_pose_from_depth( plane_dbg->is_dense = false; } - const Eigen::Vector3f ray = get_ray_direction(bbox_org); - const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); - if (normal.isZero()) - return {}; - - const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); - if (pos.isZero()) - return {}; - PoseResult out; - out.result.position = shift_point_along_normal(pos, normal); - const Eigen::Matrix3f rot = - create_rotation_matrix(coeff, normal, bbox_org.theta); - out.result.orientation = Eigen::Quaternionf(rot).normalized(); - - // The aligned path (has_depth_props_) produces the pose in the color camera - // frame. Transform it into the depth camera frame so the published - // frame_id matches the depth optical frame. - // P_depth = R^T * (P_color - t) - // R_depth = R^T * R_color + if (has_depth_props_) { + // The color bbox center defines a ray that originates at the color + // camera, not the depth camera. Express that ray in depth frame: + // origin = -R^T * t (color camera origin in depth frame) + // direction = R^T * K_c⁻¹ * [cx, cy, 1]ᵀ (normalized) const Eigen::Matrix3f R_dc = depth_color_extrinsic_.R.transpose(); - out.result.position = - R_dc * (out.result.position - depth_color_extrinsic_.t); - out.result.orientation = Eigen::Quaternionf(R_dc * rot).normalized(); + const Eigen::Vector3f O_d = -(R_dc * depth_color_extrinsic_.t); + + const float fx_c = static_cast(color_image_properties_.intr.fx); + const float fy_c = static_cast(color_image_properties_.intr.fy); + const float cx_c = static_cast(color_image_properties_.intr.cx); + const float cy_c = static_cast(color_image_properties_.intr.cy); + const Eigen::Vector3f r_c((bbox_org.center_x - cx_c) / fx_c, + (bbox_org.center_y - cy_c) / fy_c, 1.0f); + const Eigen::Vector3f ray = (R_dc * r_c).normalized(); + + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = + find_ray_plane_intersection(coeff, ray, O_d); + if (pos.isZero()) + return {}; + + out.result.position = shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = create_rotation_matrix_depth( + coeff, normal, bbox_org.theta, O_d, R_dc); + out.result.orientation = Eigen::Quaternionf(rot).normalized(); + } else { + const Eigen::Vector3f ray = get_ray_direction(bbox_org); + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); + if (pos.isZero()) + return {}; + + out.result.position = shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = + create_rotation_matrix(coeff, normal, bbox_org.theta); + out.result.orientation = Eigen::Quaternionf(rot).normalized(); } out.result_valid = true; return out; - - // Currently code: - // 1. Transform N depth points into color frame (in - // extract_bbox_pcl_aligned) - // 2. Fit the plane in color frame - // 3. Transform 1 result point back to depth frame - - // The more optimal approach would be to work entirely in depth frame from - // the start: - // 1. Map the bbox from color → depth (a single transform of a few - // corners) - // 2. Extract points and fit the plane directly in depth frame - // 3. Cast the ray using depth intrinsics — no back-and-forth needed } } // namespace valve_detection