安全帽目标检测-CSDN博客

安全帽数据集

这里我们使用的安全帽数据集是HelmentDetection，这是一个公开数据集，里面包含5000张voc标注格式的图像，分为三个类别，分别是 0: head 1: helmet 2: person

我们将数据集下载后，首先需要将其转换为YOLO标注格式：

import os
import xml.etree.ElementTree as ET
import random
import shutil

def collect_classes(voc_annotations_dir):
    """
    收集 VOC 数据集中所有出现的类别。

    :param voc_annotations_dir: 包含 VOC 格式 XML 文件的目录
    :return: 类别名称的集合
    """
    classes = set()

    # 遍历所有 XML 文件
    for xml_file in os.listdir(voc_annotations_dir):
        if not xml_file.endswith('.xml'):
            continue

        # 构造输入文件路径
        voc_annotation_path = os.path.join(voc_annotations_dir, xml_file)

        # 解析 XML 文件
        tree = ET.parse(voc_annotation_path)
        root = tree.getroot()

        # 遍历每个目标并收集类别
        for obj in root.findall('object'):
            class_name = obj.find('name').text
            classes.add(class_name)

    return sorted(classes)


def create_class_mapping(classes):
    """
    创建类别名称到索引的映射字典。

    :param classes: 类别名称列表
    :return: 类别名称到索引的映射字典
    """
    return {class_name: idx for idx, class_name in enumerate(classes)}


def convert_voc_to_yolo(voc_annotation_path, yolo_annotation_path, class_mapping):
    """
    将单个 VOC 格式的标注文件转换为 YOLO 格式。

    :param voc_annotation_path: VOC 格式 XML 文件路径
    :param yolo_annotation_path: 输出的 YOLO 格式 TXT 文件路径
    :param class_mapping: 类别名称到索引的映射字典
    """
    # 解析 XML 文件
    tree = ET.parse(voc_annotation_path)
    root = tree.getroot()

    # 获取图像尺寸
    size = root.find('size')
    image_width = int(size.find('width').text)
    image_height = int(size.find('height').text)

    # 存储 YOLO 格式的目标信息
    yolo_lines = []

    # 遍历每个目标
    for obj in root.findall('object'):
        # 获取类别名称
        class_name = obj.find('name').text
        if class_name not in class_mapping:
            print(f"类别 '{class_name}' 不在映射中，跳过该目标。")
            continue

        # 获取类别索引
        class_index = class_mapping[class_name]

        # 获取边界框坐标
        bbox = obj.find('bndbox')
        xmin = float(bbox.find('xmin').text)
        ymin = float(bbox.find('ymin').text)
        xmax = float(bbox.find('xmax').text)
        ymax = float(bbox.find('ymax').text)

        # 转换为 YOLO 格式的归一化坐标 (x_center, y_center, width, height)
        x_center = (xmin + xmax) / 2.0 / image_width
        y_center = (ymin + ymax) / 2.0 / image_height
        width = (xmax - xmin) / image_width
        height = (ymax - ymin) / image_height

        # 添加到 YOLO 格式的行
        yolo_lines.append(f"{class_index} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    # 写入 YOLO 格式的文件
    with open(yolo_annotation_path, 'w') as f:
        f.write("\n".join(yolo_lines))


def split_dataset(xml_files, train_ratio=0.8, val_ratio=0.2, test_ratio=0.1):
    """
    将数据集按照指定比例划分为训练集、验证集和测试集。

    :param xml_files: 所有 XML 文件的列表
    :param train_ratio: 训练集比例，默认 0.7
    :param val_ratio: 验证集比例，默认 0.2
    :param test_ratio: 测试集比例，默认 0.1
    :return: 划分后的三个集合（训练集、验证集、测试集）
    """
    random.shuffle(xml_files)  # 随机打乱文件顺序
    total_count = len(xml_files)

    # 计算各部分的分割点
    train_end = int(total_count * train_ratio)
    val_end = train_end + int(total_count * val_ratio)

    # 划分数据集
    train_set = xml_files[:train_end]
    val_set = xml_files[train_end:val_end]
    test_set = xml_files[val_end:]

    return train_set, val_set, test_set


def organize_files(voc_images_dir, voc_annotations_dir, output_dir, train_set, val_set, test_set):
    """
    将图片和标注文件按照划分结果组织到对应的文件夹中。

    :param voc_images_dir: 包含原始图片的目录
    :param voc_annotations_dir: 包含原始标注文件的目录
    :param output_dir: 输出目录
    :param train_set: 训练集文件名列表
    :param val_set: 验证集文件名列表
    :param test_set: 测试集文件名列表
    """
    # 创建输出目录结构
    images_dir = os.path.join(output_dir, "images")
    labels_dir = os.path.join(output_dir, "labels")
    os.makedirs(os.path.join(images_dir, "train"), exist_ok=True)
    os.makedirs(os.path.join(images_dir, "val"), exist_ok=True)
    os.makedirs(os.path.join(images_dir, "test"), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, "train"), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, "val"), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, "test"), exist_ok=True)

    def copy_files(file_list, subset):
        """
        复制文件到对应的子集目录中。
        """
        for file_name in file_list:
            base_name = os.path.splitext(file_name)[0]

            # 复制图片
            src_image_path = os.path.join(voc_images_dir, base_name + ".png")
            dst_image_path = os.path.join(images_dir, subset, base_name + ".png")
            if os.path.exists(src_image_path):
                shutil.copy(src_image_path, dst_image_path)
            else:
                print(f"警告：未找到图片文件 {src_image_path}")

            # 复制标注文件
            src_label_path = os.path.join(voc_annotations_dir, base_name + ".txt")
            dst_label_path = os.path.join(labels_dir, subset, base_name + ".txt")
            if os.path.exists(src_label_path):
                shutil.copy(src_label_path, dst_label_path)
            else:
                print(f"警告：未找到标注文件 {src_label_path}")

    # 复制训练集、验证集和测试集
    copy_files(train_set, "train")
    copy_files(val_set, "val")
    copy_files(test_set, "test")

    print("图片和标注文件已成功组织到对应的文件夹中！")


def batch_convert_voc_to_yolo(voc_images_dir, voc_annotations_dir, output_dir):
    """
    批量将 VOC 格式的标注文件转换为 YOLO 格式，并划分数据集。

    :param voc_images_dir: 包含原始图片的目录
    :param voc_annotations_dir: 包含 VOC 格式 XML 文件的目录
    :param output_dir: 输出目录
    """
    # 收集所有类别
    classes = collect_classes(voc_annotations_dir)
    class_mapping = create_class_mapping(classes)
    print("发现以下类别：", classes)

    # 获取所有 XML 文件
    xml_files = [f for f in os.listdir(voc_annotations_dir) if f.endswith('.xml')]

    # 划分数据集
    train_set, val_set, test_set = split_dataset(xml_files)
    print(f"训练集数量：{len(train_set)}，验证集数量：{len(val_set)}，测试集数量：{len(test_set)}")

    # 创建临时标注输出目录
    temp_labels_dir = os.path.join(output_dir, "temp_labels")
    os.makedirs(temp_labels_dir, exist_ok=True)

    # 转换所有标注文件为 YOLO 格式
    for xml_file in xml_files:
        # 构造输入和输出文件路径
        voc_annotation_path = os.path.join(voc_annotations_dir, xml_file)
        yolo_annotation_path = os.path.join(temp_labels_dir, os.path.splitext(xml_file)[0] + ".txt")

        # 转换单个文件
        convert_voc_to_yolo(voc_annotation_path, yolo_annotation_path, class_mapping)

    # 组织文件到对应的文件夹中
    organize_files(voc_images_dir, temp_labels_dir, output_dir, train_set, val_set, test_set)

    # 删除临时标注目录
    shutil.rmtree(temp_labels_dir)

    print("数据集转换与组织完成！")

# 示例用法
if __name__ == "__main__":
    # 输入和输出目录
    voc_images_dir = "D:/project_mine/detection/datasets/HelmetDetection/train/JPEGImages"
    voc_annotations_dir = "D:/project_mine/detection/datasets/HelmetDetection/train/Annotations"
    output_dir = "D:/project_mine/detection/datasets/anquanmao"

    # 批量转换与划分
    batch_convert_voc_to_yolo(voc_images_dir, voc_annotations_dir, output_dir)

模型训练

模型训练很简单，我们只需要修改一下数据集配置文件即可


path: ../datasets/anquanmao # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
# Classes
names:
  0: head
  1: helmet
  2: person

随后，即可开启模型训练

from ultralytics import YOLO

model=YOLO("yolo11.yaml")#.load("yolo11n.pt")#.load("yolo11s.pt")  # build from YAML and transfer weights
# Train the model
results = model.train(data="anquanmao.yaml",
                      epochs=60,
                      batch=16,       # 根据GPU显存调整（T4建议batch=8）
                      imgsz=640,
                      device="0",     # 指定GPU ID
                      optimizer="AdamW",
                      lr0=1e-4,
                      warmup_epochs=4,
                      label_smoothing=0.1,
                      amp=True)

结果如下：

在这里插入图片描述
这里的mAP值不高是因为，人这个类别似乎缺失了，导致人这个类别的AP值为0，单独来看，头和安全帽的AP值均在0.5以上。

ONNX模型推理

为使其能够具有更好的扩展性，我们将原本的pt模型文件转换为ONNX格式，随后进行推理，代码如下：

import time
import cv2
import numpy as np
import onnxruntime
#from my_utils.detect.utils import detections_dog
from utils import xywh2xyxy, multiclass_nms

class SafeHat:
    #初始化YOLO模型
    def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres
        # Initialize model
        self.initialize_model(path)
    #调用推理
    def __call__(self, image):
        return self.detect_objects(image)
    #加载模型并获取模型的输入与输出结构
    def initialize_model(self, path):
        self.session = onnxruntime.InferenceSession(path,providers=onnxruntime.get_available_providers())
        self.get_input_details()
        self.get_output_details()

    #执行模型推理过程
    def detect_objects(self, image):
        input_tensor = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        self.boxes, self.scores, self.class_ids = self.process_output(outputs)

        return self.boxes, self.scores, self.class_ids
    #前处理操作
    def prepare_input(self, image):
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize input image
        input_img = cv2.resize(input_img, (self.input_width, self.input_height))

        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

        return input_tensor

    #具体使用onnx推理
    def inference(self, input_tensor):
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
        return outputs
    #后处理操作
    def process_output(self, output):
        predictions = np.squeeze(output[0]).T

        # Filter out object confidence scores below threshold
        scores = np.max(predictions[:, 4:], axis=1)
        predictions = predictions[scores > self.conf_threshold, :]
        scores = scores[scores > self.conf_threshold]

        if len(scores) == 0:
            return [], [], []

        # Get the class with the highest confidence
        class_ids = np.argmax(predictions[:, 4:], axis=1)

        # Get bounding boxes for each object
        boxes = self.extract_boxes(predictions)

        indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold)

        return boxes[indices], scores[indices], class_ids[indices]
    #box转换，包含尺度变换与xywh转换
    def extract_boxes(self, predictions):
        # Extract boxes from predictions
        boxes = predictions[:, :4]
        boxes = self.rescale_boxes(boxes)
        boxes = xywh2xyxy(boxes)
        return boxes
    #尺度变换
    def rescale_boxes(self, boxes):
        # Rescale boxes to original image dimensions
        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
        return boxes

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        self.input_shape = model_inputs[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]


#     def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
#
#         return detections_dog(image, self.boxes, self.scores,
#                               self.class_ids, mask_alpha)
#
#
# if __name__ == "__main__":
#     model_path = "anquanmao.onnx"  # 替换为你的 TFLite 模型路径
#     image_path = "down_head.png"          # 替换为你的测试图像路径
#
#     # 初始化模型
#     detector = YOLODet(model_path)
#
#     # 加载图像
#     image = cv2.imread(image_path)
#
#     # 检测对象
#     boxes, scores, class_ids = detector(image)
#
#     # 绘制检测结果
#     result_image = detector.draw_detections(image)
#
#     # 显示结果
#     cv2.imshow("Detections", result_image)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()