python xml文件读写使用之xml.dom.minidom

XML文件读写xml结构xml和dom读取指定字符串写xml文件xml结构xml的结果性关系包括节点关系及属性内容xml的基本单位是元素，开始标记，属性，结束标记组成〈?xml version="1.0" encoding="gb2312" ?〉　　〈参考资料〉　　〈书籍〉　　〈名称〉xml入门精解〈/名称〉　　〈作者〉张三〈/作者〉　　〈价格货币单位="人民币"〉20.00〈/价格〉　　〈

乔qiao

9928人浏览 · 2020-11-13 17:15:00

乔qiao · 2020-11-13 17:15:00 发布

XML文件读写

xml结构
xml读写模块
- DOM(Document Object Model)
- - 写xml
  - 读xml
读取指定属性值
- - 获取属性值--iso信息
  - 设置属性值并写入文件

xml结构

在使用python读写xml文件之前，我们先来了解一下xml的文件结构。
xml的结构性关系包括节点关系及属性内容
xml的基本单位是元素，开始标记，属性，结束标记组成

〈?xml version="1.0" encoding="gb2312" ?〉
　　〈参考资料〉
　　 〈书籍〉
　　      〈名称〉xml入门精解〈/名称〉
　　      〈作者〉张三〈/作者〉
　　      〈价格 货币单位="人民币"〉20.00〈/价格〉
　　 〈/书籍〉
　　 〈书籍〉
　　      〈名称〉xml语法〈/名称〉
　　      〈!--此书即将出版--〉
　　      〈作者〉李四〈/作者〉
　　      〈价格 货币单位="人民币"〉18.00〈/价格〉
　　 〈/书籍〉
　　〈/参考资料〉

第一行是文件序言，告诉xml解析器如何工作，其中，version是标明此xml文件使用的标准版本号。encoding指明xml文件所使用的字符类型，可以省略，在你省略此声明的时候，后面的字符码必须是unicode字符码。
其余部分是文件主体，〈参考资料〉〈/参考资料〉中放置文件主体信息。<参考资料>是控制标志，是根元素，书籍是直属于根元素下的“子元素”，在书籍下有“名称”、“作者”、“价格”这些子元素。“货币单位”是子元素中属性，“人民币”则是对应的属性值。
注释：〈!-- xxx --〉

xml读写模块

DOM(Document Object Model)

dom是一个跨平台的标准模型，将xml文件解析成树形结构，树叶则被定义为节点。python的xml.dom.minidom模块实现了dom。

写xml

import os
from xml.dom import minidom

def write_xml():
    #1. 创建dom树对象
    doc = minidom.Document()

    #2. 创建根结点，并用dom对象添加根结点
    root_node = doc.createElement("annotation")
    doc.appendChild(root_node)

    #3. 创建结点，结点包含一个文本结点, 再将结点加入到根结点
    folder_node = doc.createElement("folder")
    folder_value = doc.createTextNode('user')
    folder_node.appendChild(folder_value)
    root_node.appendChild(folder_node)

    filename_node = doc.createElement("filename")
    filename_value = doc.createTextNode('0000001.jpg')
    filename_node.appendChild(filename_value)
    root_node.appendChild(filename_node)

    path_node = doc.createElement("path")
    path_value = doc.createTextNode('/home')
    path_node.appendChild(path_value)
    root_node.appendChild(path_node)

    source_node = doc.createElement("source")
    database_node = doc.createElement("database")
    database_node.appendChild(doc.createTextNode("Unknown"))
    source_node.appendChild(database_node)
    root_node.appendChild(source_node)

    size_node = doc.createElement("size")
    for item, value in zip(["width", "height", "depth"], [1920, 1080, 3]):
        elem = doc.createElement(item)
        elem.appendChild(doc.createTextNode(str(value)))
        size_node.appendChild(elem)
    root_node.appendChild(size_node)

    seg_node = doc.createElement("segmented")
    seg_node.appendChild(doc.createTextNode(str(0)))
    root_node.appendChild(seg_node)

    obj_node = doc.createElement("object")
    name_node = doc.createElement("name")
    name_node.appendChild(doc.createTextNode("boat"))
    obj_node.appendChild(name_node)

    pose_node = doc.createElement("pose")
    pose_node.appendChild(doc.createTextNode("Unspecified"))
    obj_node.appendChild(pose_node)

    trun_node = doc.createElement("truncated")
    trun_node.appendChild(doc.createTextNode(str(1)))
    obj_node.appendChild(trun_node)

    trun_node = doc.createElement("difficult")
    trun_node.appendChild(doc.createTextNode(str(0)))
    obj_node.appendChild(trun_node)

    bndbox_node = doc.createElement("bndbox")
    for item, value in zip(["xmin", "ymin", "xmax", "ymax"], [103, 1, 634, 402]):
        elem = doc.createElement(item)
        elem.appendChild(doc.createTextNode(str(value)))
        bndbox_node.appendChild(elem)
    obj_node.appendChild(bndbox_node)
    root_node.appendChild(obj_node)


    with open("0000001.xml", "w", encoding="utf-8") as f:
        # 4.writexml()第一个参数是目标文件对象，第二个参数是根节点的缩进格式，第三个参数是其他子节点的缩进格式，
        # 第四个参数制定了换行格式，第五个参数制定了xml内容的编码。
        doc.writexml(f, indent='', addindent='\t', newl='\n', encoding="utf-8")
        
if __name__ == "__main__":
    write_xml()

执行一下上述代码，生成“0000001.xml”，对照此文件，再看一下代码，so easy!

读xml

了解了怎么写入一个xml文件之后，我们下一步看一下如何读取一个xml文件。
通过dom将xml解析为树形结构，获取根节点，通过根节点获取子节点及子节点的文本属性等。

from xml.dom import minidom

def read_xml(xml_path):
    with open(xml_path, "r", encoding="utf-8") as f:
        doc = minidom.parse(xml_path)  #解析xml文件(句柄或文件路径)
        #doc = minidom.parseString()  #解析xml字符串
        root_node = doc.documentElement  #获得根节点

        print(root_node.nodeName)  # 结点名称
        print(root_node.nodeType)  # 结点类型  （元素结点，文本结点，属性结点）
        print(root_node.childNodes)  # 所有子节点，为列表
        filename_node = root_node.getElementsByTagName('filename')[0]  # 通过结点名称寻找结点，返回列表

        # 文本结点
        filename = filename_node.childNodes[0].data  # 子节点为文本结点，文本结点有data属性即为文本值

        #找到xmin结点并更新其对应的文本值
        xmin_node = root_node.getElementsByTagName("xmin")[0]
        print(xmin_node.childNodes[0].data)
        xmin_node.childNodes[0].data = str(300)
        print(xmin_node.childNodes[0].data)

    with open(xml_path, "w", encoding="utf-8") as f:
        doc.writexml(f)

if __name__ == "__main__":
    read_xml("0000001.xml")

读取指定属性值

xml文件文本信息。

<?xml version="1.0" encoding="UTF-8"?>
<RawReadBackConfig cameraScene="23" sceneName="portrait" count="2" lensFacing="0" masterId="1" previewId="0" zoomRatio="1" vcmcode="0" fps="0">
    <RawReadInfo captureMode="4" modeName="edof">
        <SensorInfo id="1">
            <name>C681FUV_M060</name>
            <fuseid>323330363034333036323039483030385744373030333139303030303030</fuseid>
        </SensorInfo>
        <CaptureSteps count="1">
            <Step index="0" count="1"/>
        </CaptureSteps>
        <previews count="1">
            <raw index="0" width="2304" height="1728" size="7962624" frameType="0">
                <filename>[1]C681FUV_M060_f[182]_w[2304]_h[1728].raw</filename>
                <algoInfo minExpo="111" expo="30000" iso="377" gain="1932" aelv="59" distance="0" seamlessMode="0"/>
                <cropRegion x="0" y="0" width="4864" height="3648"/>
            </raw>
        </previews>
        <captures count="1">
            <raw index="0" width="2304" height="1728" size="7962624" frameType="0">
                <filename>[1]C681FUV_M060_f[187]_w[2304]_h[1728].raw</filename>
                <algoInfo minExpo="111" expo="30000" iso="377" gain="1932" aelv="58" distance="0" seamlessMode="0"/>
                <cropRegion x="0" y="0" width="4864" height="3648"/>
            </raw>
        </captures>
    </RawReadInfo>
    <SensorCluster count="1">
        <Sensor type="3" count="1">
            <Data index="0">
                <data index="0" value="0.021903882"/>
                <data index="1" value="-0.029443704"/>
                <data index="2" value="0.022113321"/>
            </Data>
        </Sensor>
    </SensorCluster>
</RawReadBackConfig>

获取属性值–iso信息

import os
from xml.dom import minidom

def getiso(rawreadbackxml_path, filename):
	"""
	read iso info from .xml file
	Args:
		rawreadbackxml_path: base dir
		filename: xml name, example: "RawReadBackConfig.xml"
	Returns:
		iso: the property of iso
	"""
    rawradback_file = os.path.join(rawreadbackxml_path, filename)
    dom = minidom.parse(rawradback_file)    # 加载xml文件
    algoInfo = dom.getElementsByTagName("algoInfo")  # 读取tag值
    iso = algoInfo[0].getAttribute("iso")  # 获取指定tag的属性值
    # for i in range(len(algoInfo)):    # 获取节点属性
    #     print(algoInfo[i].getAttribute("iso"))
    return iso

设置属性值并写入文件

def get_seeemlessmode(rawreadbackxml_path, filename):    # path, "RawReadBackConfig.xml"
    rawradback_file = os.path.join(rawreadbackxml_path, filename)
    if not os.path.exists(rawradback_file):
        return None
    dom = minidom.parse(rawradback_file)    # 加载xml文件
    algoInfo = dom.getElementsByTagName("algoInfo")
    algoInfo[1].setAttribute("seamlessMode", "1")    # 更新属性值
    return dom

def writexmlfile(domfile, rawreadbackxml_path, filename):
    rawradback_file = os.path.join(rawreadbackxml_path, filename)
    if not os.path.exists(rawradback_file):
        return None
    f = open(rawradback_file, 'w', encoding='utf-8') 
    domfile.writexml(writer=f, indent="", addindent="", newl="", encoding="utf-8")
    f.close()

除了dom模块可以读写xml文件之后，还有另外其他模块，比如xml.etree.ElementTree、SAX (simple API for XML )。这两个模块等以后需要的时候再去了解吧。

参考：
【1】https://www.cnblogs.com/klb561/p/9196515.html
【2】https://blog.csdn.net/weixin_30267691/article/details/96400458?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-6.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-6.channel_param
【3】https://blog.csdn.net/ddnxh60840/article/details/102410344?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.channel_param （不改变写入xml文件的属性顺序）
【4】https://www.cnblogs.com/silence-cho/p/12542747.html

2048 AI社区

有“AI”的1024 = 2048，欢迎大家加入2048 AI社区

更多推荐