1、 训练模型转onnx 和量化
from ultralytics import YOLOmodel_path = "yolov10/runs/train8/weights/best.pt"
model = YOLO(model_path) # 载入官方模型
# 导出模型
model.export(format='onnx',half=True)
2、量化,减少了三分之一的存储空间从100M到30M
from onnxruntime.quantization import quantize_dynamic, QuantType# 动态量化模型
quantize_dynamic("/train8/weights/best.onnx", "best_quantized.onnx", weight_type=QuantType.QUInt8)
3、利用onnx 模型推理
import math
import time
import cv2
import numpy as np
import onnxruntimeclass_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard','cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase','scissors', 'teddy bear', 'hair drier', 'toothbrush']# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))def nms(boxes, scores, iou_threshold):# Sort by scoresorted_indices = np.argsort(scores)[::-1]keep_boxes = []while sorted_indices.size > 0:# Pick the last boxbox_id = sorted_indices[0]keep_boxes.append(box_id)# Compute IoU of the picked box with the restious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])# Remove boxes with IoU over the thresholdkeep_indices = np.where(ious < iou_threshold)[0]# print(keep_indices.shape, sorted_indices.shape)sorted_indices = sorted_indices[keep_indices + 1]return keep_boxesdef compute_iou(box, boxes):# Compute xmin, ymin, xmax, ymax for both boxesxmin = np.maximum(box[0], boxes[:, 0])ymin = np.maximum(box[1], boxes[:, 1])xmax = np.minimum(box[2], boxes[:, 2])ymax = np.minimum(box[3], boxes[:, 3])# Compute intersection areaintersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)# Compute union areabox_area = (box[2] - box[0]) * (box[3] - box[1])boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])union_area = box_area + boxes_area - intersection_area# Compute IoUiou = intersection_area / union_areareturn ioudef xywh2xyxy(x):# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)y = np.copy(x)y[..., 0] = x[..., 0] - x[..., 2] / 2y[..., 1] = x[..., 1] - x[..., 3] / 2y[..., 2] = x[..., 0] + x[..., 2] / 2y[..., 3] = x[..., 1] + x[..., 3] / 2return ydef sigmoid(x):return 1 / (1 + np.exp(-x))def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3, mask_maps=None):img_height, img_width = image.shape[:2]size = min([img_height, img_width]) * 0.0006text_thickness = int(min([img_height, img_width]) * 0.001)mask_img = draw_masks(image, boxes, class_ids, mask_alpha, mask_maps)# Draw bounding boxes and labels of detectionsfor box, score, class_id in zip(boxes, scores, class_ids):color = colors[class_id]x1, y1, x2, y2 = box.astype(int)# Draw rectanglecv2.rectangle(mask_img, (x1, y1), (x2, y2), color, 2)label = class_names[class_id]caption = f'{label} {int(score * 100)}%'(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=size, thickness=text_thickness)th = int(th * 1.2)cv2.rectangle(mask_img, (x1, y1),(x1 + tw, y1 - th), color, -1)cv2.putText(mask_img, caption, (x1, y1),cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)return mask_imgdef draw_masks(image, boxes, class_ids, mask_alpha=0.3, mask_maps=None):mask_img = image.copy()# Draw bounding boxes and labels of detectionsfor i, (box, class_id) in enumerate(zip(boxes, class_ids)):color = colors[class_id]x1, y1, x2, y2 = box.astype(int)# Draw fill mask imageif mask_maps is None:cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)else:crop_mask = mask_maps[i][y1:y2, x1:x2, np.newaxis]crop_mask_img = mask_img[y1:y2, x1:x2]crop_mask_img = crop_mask_img * (1 - crop_mask) + crop_mask * colormask_img[y1:y2, x1:x2] = crop_mask_imgreturn cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)def draw_comparison(img1, img2, name1, name2, fontsize=2.6, text_thickness=3):(tw, th), _ = cv2.getTextSize(text=name1, fontFace=cv2.FONT_HERSHEY_DUPLEX,fontScale=fontsize, thickness=text_thickness)x1 = img1.shape[1] // 3y1 = thoffset = th // 5cv2.rectangle(img1, (x1 - offset * 2, y1 + offset),(x1 + tw + offset * 2, y1 - th - offset), (0, 115, 255), -1)cv2.putText(img1, name1,(x1, y1),cv2.FONT_HERSHEY_DUPLEX, fontsize,(255, 255, 255), text_thickness)(tw, th), _ = cv2.getTextSize(text=name2, fontFace=cv2.FONT_HERSHEY_DUPLEX,fontScale=fontsize, thickness=text_thickness)x1 = img2.shape[1] // 3y1 = thoffset = th // 5cv2.rectangle(img2, (x1 - offset * 2, y1 + offset),(x1 + tw + offset * 2, y1 - th - offset), (94, 23, 235), -1)cv2.putText(img2, name2,(x1, y1),cv2.FONT_HERSHEY_DUPLEX, fontsize,(255, 255, 255), text_thickness)combined_img = cv2.hconcat([img1, img2])if combined_img.shape[1] > 3840:combined_img = cv2.resize(combined_img, (3840, 2160))return combined_imgclass YOLOSeg:def __init__(self, path, conf_thres=0.7, iou_thres=0.5, num_masks=32):self.conf_threshold = conf_thresself.iou_threshold = iou_thresself.num_masks = num_masks# Initialize modelself.initialize_model(path)def __call__(self, image):return self.segment_objects(image)def initialize_model(self, path):self.session = onnxruntime.InferenceSession(path,providers=['CUDAExecutionProvider','CPUExecutionProvider'])# Get model infoself.get_input_details()self.get_output_details()def segment_objects(self, image):input_tensor = self.prepare_input(image)# Perform inference on the image # 注意用dfl回归bbox的话,bbox的维度就是80+16*4+32outputs = self.inference(input_tensor) # output[0] (1,4+80+32=116,80*80+40*40+20*20=8400) // output[1] (1,32,160,160)# 对输出0进行处理,返回bbox和对应的mask_featureself.boxes, self.scores, self.class_ids, mask_pred = self.process_box_output(outputs[0])self.mask_maps = self.process_mask_output(mask_pred, outputs[1])return self.boxes, self.scores, self.class_ids, self.mask_mapsdef prepare_input(self, image):self.img_height, self.img_width = image.shape[:2]input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)# Resize input imageinput_img = cv2.resize(input_img, (self.input_width, self.input_height))# Scale input pixel values to 0 to 1input_img = input_img / 255.0input_img = input_img.transpose(2, 0, 1)input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)return input_tensordef inference(self, input_tensor):start = time.perf_counter()outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")return outputsdef process_box_output(self, box_output):predictions = np.squeeze(box_output).Tnum_classes = box_output.shape[1] - self.num_masks - 4# Filter out object confidence scores below thresholdscores = np.max(predictions[:, 4:4+num_classes], axis=1)predictions = predictions[scores > self.conf_threshold, :]scores = scores[scores > self.conf_threshold]if len(scores) == 0:return [], [], [], np.array([])box_predictions = predictions[..., :num_classes+4]mask_predictions = predictions[..., num_classes+4:]# Get the class with the highest confidenceclass_ids = np.argmax(box_predictions[:, 4:], axis=1)# Get bounding boxes for each objectboxes = self.extract_boxes(box_predictions)# Apply non-maxima suppression to suppress weak, overlapping bounding boxesindices = nms(boxes, scores, self.iou_threshold)return boxes[indices], scores[indices], class_ids[indices], mask_predictions[indices]def process_mask_output(self, mask_predictions, mask_output):if mask_predictions.shape[0] == 0:return []mask_output = np.squeeze(mask_output)# Calculate the mask maps for each boxnum_mask, mask_height, mask_width = mask_output.shape # CHWmasks = sigmoid(mask_predictions @ mask_output.reshape((num_mask, -1)))masks = masks.reshape((-1, mask_height, mask_width))# Downscale the boxes to match the mask sizescale_boxes = self.rescale_boxes(self.boxes,(self.img_height, self.img_width),(mask_height, mask_width))# For every box/mask pair, get the mask mapmask_maps = np.zeros((len(scale_boxes), self.img_height, self.img_width))blur_size = (int(self.img_width / mask_width), int(self.img_height / mask_height))for i in range(len(scale_boxes)):scale_x1 = int(math.floor(scale_boxes[i][0]))scale_y1 = int(math.floor(scale_boxes[i][1]))scale_x2 = int(math.ceil(scale_boxes[i][2]))scale_y2 = int(math.ceil(scale_boxes[i][3]))x1 = int(math.floor(self.boxes[i][0]))y1 = int(math.floor(self.boxes[i][1]))x2 = int(math.ceil(self.boxes[i][2]))y2 = int(math.ceil(self.boxes[i][3]))scale_crop_mask = masks[i][scale_y1:scale_y2, scale_x1:scale_x2]crop_mask = cv2.resize(scale_crop_mask,(x2 - x1, y2 - y1),interpolation=cv2.INTER_CUBIC)crop_mask = cv2.blur(crop_mask, blur_size)crop_mask = (crop_mask > 0.5).astype(np.uint8)mask_maps[i, y1:y2, x1:x2] = crop_maskreturn mask_mapsdef extract_boxes(self, box_predictions):# Extract boxes from predictionsboxes = box_predictions[:, :4]# Scale boxes to original image dimensionsboxes = self.rescale_boxes(boxes,(self.input_height, self.input_width),(self.img_height, self.img_width))# Convert boxes to xyxy formatboxes = xywh2xyxy(boxes)# Check the boxes are within the imageboxes[:, 0] = np.clip(boxes[:, 0], 0, self.img_width)boxes[:, 1] = np.clip(boxes[:, 1], 0, self.img_height)boxes[:, 2] = np.clip(boxes[:, 2], 0, self.img_width)boxes[:, 3] = np.clip(boxes[:, 3], 0, self.img_height)return boxesdef draw_detections(self, image, draw_scores=True, mask_alpha=0.4):return draw_detections(image, self.boxes, self.scores,self.class_ids, mask_alpha)def draw_masks(self, image, draw_scores=True, mask_alpha=0.5):return draw_detections(image, self.boxes, self.scores,self.class_ids, mask_alpha, mask_maps=self.mask_maps)def get_input_details(self):model_inputs = self.session.get_inputs()self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]self.input_shape = model_inputs[0].shapeself.input_height = self.input_shape[2]self.input_width = self.input_shape[3]def get_output_details(self):model_outputs = self.session.get_outputs()self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]@staticmethoddef rescale_boxes(boxes, input_shape, image_shape):# Rescale boxes to original image dimensionsinput_shape = np.array([input_shape[1], input_shape[0], input_shape[1], input_shape[0]])boxes = np.divide(boxes, input_shape, dtype=np.float32)boxes *= np.array([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])return boxesif __name__ == '__main__':model_path = "yolov8n-seg.onnx"model_path = "best_quantized.onnx"# model_path = "train8/weights/best.onnx"# Initialize YOLOv8 Instance Segmentatoryoloseg = YOLOSeg(model_path, conf_thres=0.3, iou_thres=0.5)img = cv2.imread('bus.jpg')img = cv2.imread('street.jpeg')# Detect Objectsyoloseg(img)# Draw detectionscombined_img = yoloseg.draw_masks(img)# cv2.imwrite("Output_yolov10_seg_quantized.jpg", combined_img)cv2.imwrite("street_Output_yolov10_seg_quantized.jpg", combined_img)# cv2.namedWindow("Output", cv2.WINDOW_NORMAL)# cv2.imshow("Output", combined_img)# cv2.waitKey(0)
输入图片: