From 9d340d1e40231472b62fa07097b78e7bf286a0f4 Mon Sep 17 00:00:00 2001 From: minibot-1 <paddy-hofmann@web.de> Date: Fri, 24 Mar 2023 13:41:48 +0000 Subject: [PATCH] more accuracy for sign distance --- minibot_vision/scripts/SegmentSign.py | 48 ++++++++++++++------------ minibot_vision/scripts/SignDetector.py | 30 +++++++++++----- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/minibot_vision/scripts/SegmentSign.py b/minibot_vision/scripts/SegmentSign.py index 30ef2d0..1cb6fc3 100644 --- a/minibot_vision/scripts/SegmentSign.py +++ b/minibot_vision/scripts/SegmentSign.py @@ -10,6 +10,7 @@ import yaml import rospkg import std_srvs.srv from minibot_msgs.srv import segment_sign_command, segment_sign_commandRequest, segment_sign_commandResponse +import time # *** hyper params *** IMG_RES = (480, 640) @@ -107,38 +108,39 @@ def circular_mean(p, r, arr : np.array): def do_hough_circle_detection(img_rgb, img_depth, VISUALIZE=False): global canny, accum_thresh - gray = img_rgb gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) #gray = cv2.medianBlur(gray, 5) # reduce noise + # TODO try + # It also helps to smooth image a bit unless it's already soft. For example, + # GaussianBlur() with 7x7 kernel and 1.5x1.5 sigma or similar blurring may help. circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, gray.shape[0] / 4, param1=canny, # First method-specific parameter. In case of HOUGH_GRADIENT , it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). param2=accum_thresh, # Second method-specific parameter. In case of HOUGH_GRADIENT , it is the accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. Circles, corresponding to the larger accumulator values, will be returned first. minRadius=MIN_RADIUS, maxRadius=MAX_RADIUS) - keypoints = [] + keypoint = [] if circles is not None: circles = np.uint16(np.around(circles)) - for k, (i) in enumerate(circles[0, :]): - center = (i[0], i[1]) - radius = i[2] - - # get depth in [m] (was radius * 0.4 in real world) - d = circular_mean(center, radius * 0.2, copy(img_depth)) / 1000 - # filter if sign to close (circle detector will struggle) or to far (background) - # was 0.2 and 1.0 - if d < MIN_DEPTH or d > MAX_DEPTH: - continue - keypoints.append({"center": center, "radius": radius, "depth": d}) - - # circle center - if VISUALIZE: - cv2.putText(img_rgb, "d:{:1.3f} r:{:1.0f} num:{}".format(d, radius, k), (center[0], center[1] - radius - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness=1) - cv2.circle(img_rgb, center, 1, (0, 100, 100), 3) - # circle outline - cv2.circle(img_rgb, center, radius, (255, 0, 255), 3) - - - return keypoints + i = circles[0, 0] + center = (i[0], i[1]) + radius = i[2] + + # get depth in [m] (was radius * 0.4 in real world) + d = circular_mean(center, radius * 0.2, copy(img_depth)) / 1000 # todo this is a slow implementation. You might want to speedup + # filter if sign to close (circle detector will struggle) or to far (background) + # was 0.2 and 1.0 + if d < MIN_DEPTH or d > MAX_DEPTH: + return [] + keypoint.append({"center": center, "radius": radius, "depth": d}) + + # circle center + if VISUALIZE: + cv2.putText(img_rgb, "d:{:1.3f} r:{:1.0f}".format(d, radius), (center[0], center[1] - radius - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness=1) + cv2.circle(img_rgb, center, 1, (0, 100, 100), 3) + # circle outline + cv2.circle(img_rgb, center, radius, (255, 0, 255), 3) + + return keypoint def crop_to_bounds(crop_bounds, max_val): diff --git a/minibot_vision/scripts/SignDetector.py b/minibot_vision/scripts/SignDetector.py index c77cd71..9004d51 100755 --- a/minibot_vision/scripts/SignDetector.py +++ b/minibot_vision/scripts/SignDetector.py @@ -14,6 +14,7 @@ import cv2 from copy import copy from vision_msgs.msg import Detection2D, ObjectHypothesisWithPose from minibot_msgs.srv import set_url +import time # *** CONSTANTS *** visualize = True @@ -100,16 +101,11 @@ def publish_img_patch(img_patch): pub_cmpr_img.publish(cmprsmsg) -def detect_sign(img_rgb_stream, image_timestamp): - global img_depth_stream, pub_result_img, toggle_hough_detection, enable_sign_detection - - img_orig = np.copy(img_rgb_stream) - # TODO the ratio between img_depth_stream and img_rgb_stream might be different! - +def hough_pipeline(img_color, img_depth): # get sign location in img if toggle_hough_detection: - keypoints = SegmentSign.do_hough_circle_detection(copy(img_orig), copy(img_depth_stream)) - keypoints += ShapeDetector.do_shape_detection(copy(img_orig), copy(img_depth_stream)) + keypoints = SegmentSign.do_hough_circle_detection(np.copy(img_color), np.copy(img_depth)) + keypoints += ShapeDetector.do_shape_detection(np.copy(img_color), np.copy(img_depth)) keypoints = SegmentSign.filter_duplicate_keypoints(keypoints) # only use first keypoint (this should be the most accurate guess) if len(keypoints) >= 1: @@ -118,7 +114,17 @@ def detect_sign(img_rgb_stream, image_timestamp): # use center of image keypoints = [{"center": (IMG_RES[1]//2, IMG_RES[0]//2), "radius": TF_RES[0] // 2, "depth": -1}] - patches = SegmentSign.get_tensor_patches(copy(img_orig), keypoints, zoom=toggle_hough_detection) + return keypoints + + +def detect_sign(img_rgb_stream, image_timestamp): + global img_depth_stream, pub_result_img, toggle_hough_detection, enable_sign_detection + + img_orig = np.copy(img_rgb_stream) + # TODO the ratio between img_depth_stream and img_rgb_stream might be different! + + keypoints = hough_pipeline(img_orig, img_depth_stream) + patches = SegmentSign.get_tensor_patches(img_orig, keypoints, zoom=toggle_hough_detection) # publish patch for capture images if enable_capture_images: @@ -127,6 +133,7 @@ def detect_sign(img_rgb_stream, image_timestamp): # cut to multiple images at keypoints text = [] + # TODO remove loop (there is anyways just one keypoint returned) for i in range(len(keypoints)): k = keypoints[i] p = patches[i] @@ -139,6 +146,11 @@ def detect_sign(img_rgb_stream, image_timestamp): if enable_sign_detection: label, precision = sign_classifier.predictImage(p) # returns tupel (label, precision), if no model / error is set up label= -1 + # TODO get most recent depth data + new_keypoints = hough_pipeline(img_rgb_stream, img_depth_stream) + if len(new_keypoints) >= 1: + d = new_keypoints[0]["depth"] + # publish results publish_results(center, r, d, label, precision, image_timestamp) text.append("c: {} p: {:1.3f} d:{:1.3f}".format(sign_classifier.labelOfClass(label), precision, d)) -- GitLab