比如,在yolov5里面,推理时,预处理为,直接把图像缩放到输入大小,比如640*640,这样会导致很多图像扭曲变形,导致识别不准确:
Ort::Value YoloV5::transform(const cv::Mat &mat)
{
cv::Mat canva = mat.clone();
cv::cvtColor(canva, canva, cv::COLOR_BGR2RGB);
cv::resize(canva, canva, cv::Size(input_node_dims.at(3),
input_node_dims.at(2)));
// (1,3,640,640) 1xCXHXW
ortcv::utils::transform::normalize_inplace(canva, mean_val, scale_val); // float32
return ortcv::utils::transform::create_tensor(
canva, input_node_dims, memory_info_handler,
input_values_handler, ortcv::utils::transform::CHW);
}
而在python里面的代码,确是求的一个最小缩放比例,然后把原图安装缩放比例缩放,然后进行不够640的,补边处理,这样就不会对图像里面的进行不等比例缩放,图像不会扭曲;还原识别框的时候,进行反操作,这样的:
def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True):
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup:
r = min(r, 1.0)
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords