xml文件数据处理:删除指定类别,处理空格、中文

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time        :2023/8/8 9:08
# @Author      :weiz
# @ProjectName :jiangling_new
# @File        :xml_process.py
# @Description :
# Copyright (C) 2021-2025 Jiangxi Institute Of Intelligent Industry Technology Innovation
import os
import xml.dom.minidom
import xml.etree.cElementTree as ET


def delete_specified_cls(xmlFolderPath, xmlSavePath, delete_cls):
    """
    删除xml中指定类别
    :param xmlFolderPath:
    :param xmlSavePath:
    :param delete_cls:
    :return:
    """
    xml_list = os.listdir(xmlFolderPath)

    for xml_name in xml_list:
        xml_tree = ET.parse(os.path.join(xmlFolderPath, xml_name))
        xml_root = xml_tree.getroot()

        total_class_num = 0
        class_del_num = 0

        for xml_child in xml_root.findall("object"):

            total_class_num = total_class_num + 1

            old_cls = xml_child.find("name").text
            if old_cls in delete_cls:
                class_del_num = class_del_num + 1
                xml_root.remove(xml_child)
        if total_class_num == class_del_num:
            print("类别全部被删除了,请删除该文件及对应的图片:", xml_name)
            xml_tree.write(os.path.join(xmlSavePath, xml_name))
        else:
            xml_tree.write(os.path.join(xmlSavePath, xml_name))
            print(xml_name, "删除指定类别成功")


def change_class_name(xmlFolderPath, xmlSavePath, old_class, new_class):
    """
    voc 数据类别替换
    :param xmlFolderPath:
    :param xmlSavePath:
    :param old_class:
    :param new_class:
    :return:
    """
    xml_list = os.listdir(xmlFolderPath)
    if(len(old_class) != len(new_class)):
        print("len(old_class) != len(new_class)")
        return

    for xml_name in xml_list:
        xml_dom = xml.dom.minidom.parse(os.path.join(xmlFolderPath, xml_name))
        root = xml_dom.documentElement
        newfilename = root.getElementsByTagName('name')
        for i, node in enumerate(newfilename):
            for j, cls in enumerate(old_class):
                if node.firstChild.data == cls:
                    newfilename[i].firstChild.data = new_class[j]

        with open(os.path.join(xmlSavePath, xml_name), 'w') as fh:
            xml_dom.writexml(fh)
            print(xml_name, "文件类别替换成功")


def change_chinese(xmlFolderPath, xmlSavePath):
    """
    去掉xml路径的中文;去掉命名中的空格
    :param xmlFolderPath:
    :param xmlSavePath:
    :return:
    """
    xml_list = os.listdir(xmlFolderPath)

    for xml_name in xml_list:
        xml_tree = ET.parse(os.path.join(xmlFolderPath, xml_name))
        xml_root = xml_tree.getroot()

        filename_old = xml_root.find("filename").text
        path_old = xml_root.find("path").text
        xml_root.find("filename").text = filename_old.replace(' ', '')
        xml_root.find("path").text = path_old.split('\\')[-1].replace(' ', '')

        xml_tree.write(os.path.join(xmlSavePath, xml_name))
        print(xml_name, "修改成功!")

g_delete_cls = ["LK29-9A095-BA"]
# g_old_class = ["EKS_18_19", "EKS_20寸", "743出口", "743国内", "743BEV", "756"]
# g_new_class = ["NS1-1532-EA", "NS1-1352-DB", "MS1-1532-BA", "FS1-1532-AC", "KS2-1352-AA", "KS1-1532-AA"]
# g_old_class = ["fuel_92", "fuel_91", "fuel_87", "gasolina", "fuel_91_ron"]
# g_new_class = ["FS1-9A095-AB", "LK29-9A095-BA", "NS1-9A095-CA", "MS1-9A095-AA", "NS1-9A095-AA"]
# g_old_class = ["KS2-1352-AA", "NS1-1352-DB"]
# g_new_class = ["KS2-1532-AA", "NS1-1532-DB"]
g_old_class = ["NS1-10846-JA", "NS1-10846-MA", "KS1-10846-AA30", "FS1-10846-AB30", "KS2-10846-BA30", "NS1-10846-BC30", "KS1-10846-AA31", "FS1-10846-AB31", "KS2-10846-BA31", "NS1-10846-BC31",
               "KS1-10846-AA33", "FS1-10846AB33", "KS2-10846-BA33", "NS1-10846-BC33", "KS2-10846-BA52", "NS1-10846-BC52", "KS1-10846-AA77", "FS1-10846-AB77", "KS2-10846-BA77", "NS1-10846-BC77",
               "KS1-10846-AA80", "NS1-10846-BC80", "FS1-10846-AB92", "KS2-10846-BA92", "NS1-10846-BC92", "NS1-10846-BCAA", "FS1-10846-ABB7", "KS2-10846-BAB7", "KS1-10846-AAGS"]
g_new_class = ["10846", "10846", "10846_30or31", "10846_30or31", "10846_30or31", "10846_30or31", "10846_30or31", "10846_30or31", "10846_30or31", "10846_30or31",
               "10846_33or80", "10846_33or80", "10846_33or80", "10846_33or80", "10846_52", "10846_52", "10846_77", "10846_77", "10846_77", "10846_77",
               "10846_33or80", "10846_33or80", "10846_92", "10846_92", "10846_92", "10846_AA", "10846_B7", "10846_B7", "10846_GS"]
g_xmlFolderPath = r"C:\Users\weiz\Desktop\Annotations"
g_xmlSavePath = r"C:\Users\weiz\Desktop\Annotations"
g_chinese_path = r"C:\Users\weiz\Desktop\Annotations"
g_save_path = r"C:\Users\weiz\Desktop\Annotations"
if __name__ == "__main__":
    #change_class_name(g_xmlFolderPath, g_xmlSavePath, g_old_class, g_new_class)

    # delete_specified_cls(r"E:\江铃工业相机及手机采集数据\FL09\铭牌\Annotations", r"E:\江铃工业相机及手机采集数据\FL09\铭牌\Annotations", g_delete_cls)

    change_chinese(g_chinese_path, g_save_path)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值