master

分支 (1)

管理

管理

master

like-code
/
PYTHON
/
autoAnnotation.py

from ultralytics import YOLO
import os
import cv2
import numpy as np

# 加载YOLOv8人脸检测预训练模型（优先用小感受野模型）
# 若没有自定义face_yolov8n.pt，可改用官方轻量化人脸模型
face_model = YOLO('./face_yolov8n.pt')  # 建议替换为yolov8n-p2.pt（适配小尺寸）

# 数据集路径
data_path = "D:\\yolo\\biaoqing\\FER2013\\images\\train"
save_label_path = "D:\\yolo\\biaoqing\\FER2013\\labels\\train"
os.makedirs(save_label_path, exist_ok=True)

# ========== 核心修改1：适配48×48的目标尺寸（从640改为96，避免过度缩放） ==========
TARGET_SIZE = 96  # 96是32的倍数，且是48的2倍，缩放后模糊度最低
# TARGET_SIZE = 640  # 原640会导致48×48放大13倍，检测精度极差，不推荐

def preprocess_image(img):
    """将48x48图片等比例缩放并填充到96x96（而非640x640），减少缩放模糊"""
    h, w = img.shape[:2]
    # 强制校验输入尺寸（确保是48×48）
    if h != 48 or w != 48:
        img = cv2.resize(img, (48, 48), interpolation=cv2.INTER_CUBIC)
        h, w = 48, 48

    scale = TARGET_SIZE / max(h, w)  # 48→96，scale=2（仅放大2倍，模糊度极低）
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
    padded = np.full((TARGET_SIZE, TARGET_SIZE, 3), 114, dtype=np.uint8)
    top = (TARGET_SIZE - new_h) // 2
    left = (TARGET_SIZE - new_w) // 2
    padded[top:top+new_h, left:left+new_w] = resized
    return padded, (scale, left, top, new_w, new_h)

def convert_coords_back(box, padding_info, original_size=(48, 48)):
    """将YOLO在96x96图像上检测的坐标转换回原始48x48图像的归一化坐标"""
    scale, left, top, new_w, new_h = padding_info
    x1_96, y1_96, x2_96, y2_96 = box
    # 反向计算：先减去填充 → 除以缩放比例 → 限制在48×48范围内
    x1_scaled = (x1_96 - left) / scale
    y1_scaled = (y1_96 - top) / scale
    x2_scaled = (x2_96 - left) / scale
    y2_scaled = (y2_96 - top) / scale

    # 严格限制坐标在0~48范围内（避免缩放误差导致越界）
    x1_scaled = np.clip(x1_scaled, 0, original_size[1])
    y1_scaled = np.clip(y1_scaled, 0, original_size[0])
    x2_scaled = np.clip(x2_scaled, 0, original_size[1])
    y2_scaled = np.clip(y2_scaled, 0, original_size[0])

    # 计算YOLO格式的归一化坐标（核心：基于原始48×48尺寸）
    x_center = (x1_scaled + x2_scaled) / 2 / original_size[1]
    y_center = (y1_scaled + y2_scaled) / 2 / original_size[0]
    width = (x2_scaled - x1_scaled) / original_size[1]
    height = (y2_scaled - y1_scaled) / original_size[0]
    return x_center, y_center, width, height

# 自动获取所有类别文件夹并创建映射
category_folders = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))]
category_folders.sort()  # 按字母顺序排序，确保一致性
category_to_id = {category: idx for idx, category in enumerate(category_folders)}

print("发现以下类别文件夹:")
for category, class_id in category_to_id.items():
    print(f"  {category} -> 类别ID: {class_id}")

# 总计数器
total_images = 0

# 遍历每个类别文件夹
for category_name, class_id in category_to_id.items():
    category_path = os.path.join(data_path, category_name)
    category_save_path = os.path.join(save_label_path, category_name)
    os.makedirs(category_save_path, exist_ok=True)

    print(f"\n处理类别: {category_name} (ID: {class_id})")
    print(f"源路径: {category_path}")
    print(f"标签保存路径: {category_save_path}")

    category_images = 0

    # 遍历当前类别文件夹中的所有图片
    for img_name in os.listdir(category_path):
        if not img_name.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff')):
            continue

        img_path = os.path.join(category_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            print(f"  警告: 无法读取图片 {img_name}")
            continue

        # ========== 核心修改2：策略选择（FER2013推荐用策略A，策略B仅作备选） ==========
        # FER2013是标准化人脸数据集，48×48整张图都是人脸，无需YOLO检测（策略A更高效）
        use_full_image_as_face = True  # 强制用策略A（适配48×48的最优选择）

        # 准备标签文件名
        label_filename = os.path.splitext(img_name)[0] + '.txt'
        label_path = os.path.join(category_save_path, label_filename)

        with open(label_path, 'w') as f:
            if use_full_image_as_face:
                # 策略A：直接标注整张48×48图为人脸（FER2013最优解）
                # 归一化坐标：中心点(0.5,0.5)，宽高(1.0,1.0)（覆盖整张图）
                xc = 0.5  # 24/48
                yc = 0.5  # 24/48
                bw = 1.0  # 48/48
                bh = 1.0  # 48/48
                f.write(f"{class_id} {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}\n")
            else:
                # 策略B：YOLO检测（适配48×48的优化参数）
                processed_img, padding_info = preprocess_image(img)
                # ========== 核心修改3：YOLO检测参数优化（适配小尺寸） ==========
                results = face_model(
                    processed_img,
                    imgsz=TARGET_SIZE,  # 用96×96而非默认640×640
                    conf=0.2,  # 降低置信度（小目标置信度低）
                    iou=0.3,   # 降低IOU阈值（避免漏框）
                    device="cpu",  # 无GPU时显式指定
                    verbose=False  # 关闭冗余日志
                )
                detected_boxes = results[0].boxes.xyxy.cpu().numpy() if len(results[0].boxes) > 0 else []

                if len(detected_boxes) == 0:
                    # 若检测不到框，退化为策略A（避免空标签）
                    xc, yc, bw, bh = 0.5, 0.5, 1.0, 1.0
                    f.write(f"{class_id} {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}\n")
                    print(f"  警告: {img_name} 检测不到人脸框，已标注整张图")
                else:
                    for box in detected_boxes:
                        xc, yc, bw, bh = convert_coords_back(box, padding_info, (48, 48))
                        if bw > 0 and bh > 0:
                            f.write(f"{class_id} {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}\n")

        category_images += 1
        total_images += 1

        if category_images % 100 == 0:
            print(f"  已处理 {category_images} 张图片...")

    print(f"完成! 类别 {category_name} 共处理 {category_images} 张图片")

print(f"\n{'='*50}")
print(f"标注完成!")
print(f"总处理图片数: {total_images}")
print(f"类别数量: {len(category_folders)}")
print(f"标签保存路径: {save_label_path}")
print(f"{'='*50}")

# 保存类别映射文件
mapping_path = os.path.join(save_label_path, "classes.txt")
with open(mapping_path, 'w') as f:
    for category, class_id in category_to_id.items():
        f.write(f"{class_id}: {category}\n")
print(f"类别映射已保存至: {mapping_path}")