可以对目标检测文件夹的导入进行优化吗

lhj5426 commented 1 month ago

https://github.com/user-attachments/assets/118a1281-a5db-4946-bb0a-d81d9ab16b8e

如视频所示我是使用yolo 代码直接进行的推理并生成txt的代码如下

from ultralytics import YOLO
import os
import sys
import time
from queue import Queue
from threading import Thread

# Load model
model = YOLO(r'J:\G\Desktop\V11\runs\detect\train11\weights\best.pt', task='detect')
folders_queue = Queue()
MAX_QUEUE_SIZE = 10  # 限制队列大小

def process_image_folder(folder_path):
    try:
        # 获取文件夹中所有图片的路径
        image_paths = []
        for root, _, files in os.walk(folder_path):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                    image_paths.append(os.path.join(root, file))

        if not image_paths:
            print(f"跳过文件夹 {folder_path}: 未找到图片")
            return

        total_images = len(image_paths)
        print(f"\n开始处理文件夹: {folder_path}，共找到 {total_images} 张图片。")

        # 创建 biaoqianTXT 文件夹
        biaoqian_dir = os.path.join(folder_path, 'biaoqianTXT')
        os.makedirs(biaoqian_dir, exist_ok=True)

        # 逐个处理图片
        for i, image_path in enumerate(image_paths, 1):
            try:
                start_time = time.time()
                result = model.predict(source=image_path, save=True, show=False, device="cuda", verbose=False)[0]
                process_time = (time.time() - start_time) * 1000

                # 获取检测到的目标数量和类别
                detections = {}
                for box in result.boxes:
                    cls = int(box.cls)
                    cls_name = result.names[cls]
                    detections[cls_name] = detections.get(cls_name, 0) + 1

                # 构建检测结果字符串
                detection_str = ", ".join([f"{count} {name}{'s' if count > 1 else ''}" for name, count in detections.items()])
                if not detection_str:
                    detection_str = "no objects"

                # 打印处理信息
                print(f"image {i}/{total_images} {image_path}: {result.orig_shape[1]}x{result.orig_shape[0]} {detection_str}, {process_time:.1f}ms", flush=True)

                # 写入 TXT 文件
                image_name = os.path.splitext(os.path.basename(image_path))[0]
                txt_path = os.path.join(biaoqian_dir, f"{image_name}.txt")
                with open(txt_path, 'w') as f:
                    for box in result.boxes:
                        cls = int(box.cls)
                        x_center, y_center, width, height = box.xywhn[0].tolist()
                        f.write(f"{cls} {x_center} {y_center} {width} {height}\n")

            except Exception as e:
                print(f"处理图片出错 {image_path}: {str(e)}")

        print(f"文件夹处理完成：{folder_path}")
    except Exception as e:
        print(f"处理文件夹时出错 {folder_path}: {str(e)}")

def folder_processor():
    while True:
        try:
            folder_path = folders_queue.get(timeout=5)  # 5秒超时
            if folder_path is None:
                break
            process_image_folder(folder_path)
        except Queue.Empty:
            print("队列空闲，等待新的文件夹...")
        finally:
            folders_queue.task_done()

def process_paths(paths):
    # 过滤有效的文件夹路径
    valid_folders = []
    for path in paths:
        if os.path.isfile(path):
            dirname = os.path.dirname(path)
            if dirname not in valid_folders:
                valid_folders.append(dirname)
        elif os.path.isdir(path):
            if path not in valid_folders:
                valid_folders.append(path)

    total_folders = len(valid_folders)
    print(f"共找到 {total_folders} 个文件夹待处理")

    # 创建并启动处理线程
    processor_thread = Thread(target=folder_processor)
    processor_thread.start()

    # 将文件夹添加到队列
    for i, folder in enumerate(valid_folders, 1):
        print(f"添加到队列 ({i}/{total_folders}): {folder}")
        folders_queue.put(folder)
        if i % 10 == 0:  # 每添加10个文件夹暂停一下
            time.sleep(0.1)

    # 添加结束标记并等待所有文件夹处理完成
    folders_queue.put(None)
    processor_thread.join()
    print("所有文件夹处理完成！")

if __name__ == "__main__":
    if len(sys.argv) > 1:
        process_paths(sys.argv[1:])
    else:
        print("请将图片文件或包含图片的文件夹拖放到此脚本上运行。")

生成之后会在文件夹下生成一个这样的文件夹这样推理的TXT和图片数量是对应的比如我的父级路径是 D:\Ddown\训练用下载非保存到硬盘[N／A] タカスケ - 2月~3月まとめ (3082209) 2个文件夹的路径是 D:\Ddown\训练用下载非保存到硬盘[N／A] タカスケ - 2月~3月まとめ (3082209)\biaoqianTXT D:\Ddown\训练用下载非保存到硬盘[N／A] タカスケ - 2月~3月まとめ (3082209)\本篇可以优化成在导入的时候直接打开当直接打开这个父级路径的时候自动遍历子目录下图片和TXT吗

或者导入的时候先选择图片文件夹然后再弹出询问选择标签文件夹

因为现在的导入是

https://github.com/user-attachments/assets/6df9dab4-0ffa-4c11-ac76-16b63a04b266

必须额外的生成按照标注训练文件夹格式

的文件夹才能导入

导出使用标准文件夹格式是为了方便直接开始训练但是导入是为了标注或者说是为了OCR 和翻译是不可以不必要必须导入也要使用标准训练格式呢？

就是这个导入能请您优化一下吗？因为那个我电脑不用GPU跑推理的话跑yolo11X yolo8X 模型要好几秒才一张所以我想出了这种直接用yolo 在推理验证的时候直接生成对应的 TXT数据的方法让AI写了脚本现在可以批量推理然后现在推理完了导入有点小小麻烦就过来问问老哥您了

xulihang commented 1 month ago

一种方法是提供单纯导入文本框的功能，需要有一种数据交换规范。

一种是关闭离线气泡检测，改用在线检测，然后本地跑一个python服务器去调用。

lhj5426 commented 1 month ago

方法是2的话…那个绝大多数用户应该是不会搭这样的在线或者本地服务给it调用吧不代表大众仅说我自己我是没这个能力的所以方法1呢…… 可以这样【创建项目文件→打开图片文件夹→】←到这里还是正常都打开图片流程然后变动的是目标检测→导入→『选择包含标签数据txt的文件夹的单个文件夹』→导入成功这样可以吗？简化下不需要必须一个文件夹里包2个子文件夹的标准yolov11训练用格式……

这样的好处是每一本的文件夹可以减少一些工作量

比如现在我是用Python脚本自动划分并生成2个yolo训练用的标准训练用文件夹格式才能导入，等导入之后，回头我还得反手删了这个文件夹……这样有点不太方便这样就减少掉了【先创建标准训练格式文件夹导入然后再删除这个生成的标准文件夹的步骤】请您思量下，有没有可优化都空间

题外话…说是11比8精度又提升了可我11最近一直卡在 map50 0.73 map50-95 0.6左右的精度徘徊哈哈愁死了

虽然ocr无法识别的拟音是过滤掉了但正常文字识别会丢哈哈这个训练还真搞人心态

lhj5426 commented 1 month ago

话分2头说如果哥您能写一个可以调用本地YOLO 来给IT进行推理识别文字区域的话那应该是最好的解决方法了吧推理代码就是楼上发的那种基本上也就是换下模型地址和识别参数之类的没什么必要的改动而且本身运行IT的时候就需要开2个黑框一个OCR 一个涂抹也不介意再开一个

虚拟环境不知方法2 老哥您可以帮忙下写一个让 ImageTrans 使用在线模型来调用本地模型进行推理么这样就是【创建项目文件→打开图片文件夹→】←到这里还是正常都打开图片流程然后正常执行识别文字气泡就可以了连 2次导入都不用了不知老哥可否成为一下黄风大圣我来助你

xulihang commented 1 month ago

需要添加一个通过本地服务器调用的方式，然后可以当离线模型一样选择调用

lhj5426 commented 1 month ago

满城百姓感谢不尽

xulihang commented 3 weeks ago

v3.11.0增加了使用本地Python去用YOLO的方法：

运行这个本地服务项目：https://github.com/xulihang/YOLO-Server 可以改自己里面的模型名称
ImageTrans里关闭离线气泡检测，然后本地服务地址用默认的就行

lhj5426 commented 3 weeks ago

v3.11.0增加了使用本地Python去用YOLO的方法：

运行这个本地服务项目：https://github.com/xulihang/YOLO-Server* 可以改自己里面的模型名称

ImageTrans里关闭离线气泡检测，然后本地服务地址用默认的就行

非常感谢现在速度飞快了

https://github.com/user-attachments/assets/cb34ee27-d20e-401f-8b66-9d98730633f8

话说这里选择模型应该和服务器就没关系了吧? 选哪个用的都是服务器里是这的对吧老哥

lhj5426 commented 3 weeks ago

更提升了继续训练的信心现在效果还不是理想

https://github.com/user-attachments/assets/5c4457db-de0a-423e-80a6-8f69838cb507

xulihang commented 3 weeks ago

噢，启用在线的时候就不用选择了，这个我忘记关了。

xulihang commented 3 weeks ago

这个模式目前不支持添加分类，之后再完善了

lhj5426 commented 3 days ago

v3.11.0增加了使用本地Python去用YOLO的方法：

运行这个本地服务项目：https://github.com/xulihang/YOLO-Server* 可以改自己里面的模型名称

ImageTrans里关闭离线气泡检测，然后本地服务地址用默认的就行

我把这个代码改成了这样可以精确控制每一个类别的每一个方向的扩展像素可以更精确的调整生成拟补有时候某一个类的某一个方向的别框选不全或者框选太靠近文字的问题

#!/usr/bin/env python3

import os
from PIL import Image
from io import BytesIO
from bottle import BaseRequest, route, run, request, static_file
import base64
from ultralytics import YOLO

BaseRequest.MEMFILE_MAX = 1024 * 1024 * 10  # (or whatever you want)

# 每个类别的扩展值
EXPAND_VALUES = {
    0: (15, 0, 0, 0),   # balloon：上5，下20，左0，右0
    1: (0, 00, 30, 0),  # qipao：上0，下150，左0，右0
    2: (0, 0, 0, 0),    # fangkuai：上0，下0，左0，右0
    3: (0, 0, 0, 0),    # changfangtiao：上0，下0，左0，右0
    4: (0, 0, 0, 0)     # kuangwai：上0，下0，左0，右0
}

def adjust_bbox(x_center, y_center, w, h, expand_values):
    top, bottom, left, right = expand_values
    new_w = w + left + right
    new_h = h + top + bottom
    new_x = x_center - 0.5 * w - left
    new_y = y_center - 0.5 * h - top
    return new_x, new_y, new_w, new_h

@route('/detect', method='POST')
def detect():
    print("detect")
    json_data = request.json
    image = json_data["image"]
    bytes_decoded = base64.b64decode(image)
    net_img = Image.open(BytesIO(bytes_decoded))

    prediction = model.predict(source=net_img, conf=0.5)[0]
    ret = {}
    results = []
    if prediction.boxes is not None:
        for box in prediction.boxes:
            cls = int(box.cls)
            x_center, y_center, w, h = box.xywh[0].tolist()
            expand_values = EXPAND_VALUES.get(cls, (0, 0, 0, 0))
            x, y, w, h = adjust_bbox(x_center, y_center, w, h, expand_values)
            location = {
                "left": x,
                "top": y,
                "width": w,
                "height": h,
            }
            results.append({"location": location})

    elif prediction.obb is not None:
        for obb in prediction.obb:
            x_center, y_center, w, h, r = obb.xywhr[0].tolist()
            x = x_center - 0.5 * w
            y = y_center - 0.5 * h
            location = {
                "left": x,
                "top": y,
                "width": w,
                "height": h,
                "rotation": r,
            }
            results.append({"location": location})
    ret["results"] = results
    return ret

@route('/<filepath:path>')
def server_static(filepath):
    return static_file(filepath, root='www')

model = YOLO(r'J:\G\Desktop\V11\runs\detect\train11\weights\best.pt')

run(server="paste", host='127.0.0.1', port=8085)

比如这种识别太贴边了就可以只扩展下和右边来重新生成

xulihang / ImageTrans-docs

可以对目标检测文件夹的导入进行优化吗 #771