python解析xml模块封装代码

后端开发|Python教程 python解析xml模块封装代码

python,解析xml

后端开发-Python教程

有如下的xml文件：

多用户建站源码,以太坊ubuntu出租,爬虫ul找不到,goAuth php,镇赉网页seolzw

1
2

用户管理系统源码,ubuntu 文件大小,Python爬虫怎么停止,php svm,seo术语英文lzw

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

定时器源码,ubuntu管理scp,Tomcat是轻量级的,设备巡检爬虫,php字符串截取替换,河北抖音seo收益怎么样lzw

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse
class TestHandle(ContentHandler):
def __init__(self, inlist):
self.inlist = inlist

def startElement(self,name,attrs):
print ‘name:’,name, ‘attrs:’,attrs.keys()

def endElement(self,name):
print ‘endname’,name

def characters(self,chars):
print ‘chars’,chars
self.inlist.append(chars)

if __name__ == ‘__main__’:
lt = []
parse(‘test.xml’, TestHandle(lt))
print lt

结果：
[html] view plaincopy
name: root attrs: []
chars

endname childs
chars

endname root
[u’\n’, u’\n’, u’1′, u’\n’, u’2′, u’\n’, u’\n’]

方式2，python模块实现获取根节点，按需查找指定节点：

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.dom import minidom
xmlstr = ””’
/2/photos/square/type.xml
21301
auth faild!
”’
def doxml(xmlstr):
dom = minidom.parseString(xmlstr)
print ‘Dom:’
print dom.toxml()

root = dom.firstChild
print ‘root:’
print root.toxml()

childs = root.childNodes
for child in childs:
print child.toxml()
if child.nodeType == child.TEXT_NODE:
pass
else:
print ‘child node attribute name:’, child.getAttribute(‘name’)
print ‘child node name:’, child.nodeName
print ‘child node len:’,len(child.childNodes)
print ‘child data:’,child.childNodes[0].data
print ‘=======================================’
print ‘more help info to see:’
for med in dir(child):
print help(med)

if __name__ == ‘__main__’:
doxml(xmlstr)

结果：
[html] view plaincopy
Dom:

/2/photos/square/type.xml
21301
auth faild!

root:

/2/photos/square/type.xml
21301
auth faild!

/2/photos/square/type.xml
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========

#!\urs\bin\env python
#encoding: utf-8
from xml.dom import minidom

class xmlwrite:
def __init__(self, resultfile):
self.resultfile = resultfile
self.rootname = ‘api’
self.__create_xml_dom()

def __create_xml_dom(self):
xmlimpl = minidom.getDOMImplementation()
self.dom = xmlimpl.createDocument(None, self.rootname, None)
self.root = self.dom.documentElement

def __get_spec_node(self, xpath):
patharr = xpath.split(r’/’)
parentnode = self.root
exist = 1
for nodename in patharr:
if nodename.strip() == ”:
continue
if not exist:
return None
spcindex = nodename.find(‘[‘)
if spcindex > -1:
index = int(nodename[spcindex+1:-1])
else:
index = 0
count = 0
childs = parentnode.childNodes
for child in childs:
if child.nodeName == nodename[:spcindex]:
if count == index:
parentnode = child
exist = 1
break
count += 1
continue
else:
exist = 0
return parentnode

def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
node = self.dom.createElement(nodename)
if value:
if CDATA:
nodedata = self.dom.createCDATASection(value)
else:
nodedata = self.dom.createTextNode(value)
node.appendChild(nodedata)
if attribute and isinstance(attribute, dict):
for key, value in attribute.items():
node.setAttribute(key, value)
try:
parentnode = self.__get_spec_node(parent)
except:
print ‘Get parent Node Fail, Use the Root as parent Node’
parentnode = self.root
parentnode.appendChild(node)

def write_start_time(self, time):
self.write_node(‘/’,’StartTime’, time)

def write_end_time(self, time):
self.write_node(‘/’,’EndTime’, time)

def write_pass_count(self, count):
self.write_node(‘/’,’PassCount’, count)

def write_fail_count(self, count):
self.write_node(‘/’,’FailCount’, count)

def write_case(self):
self.write_node(‘/’,’Case’, None)

def write_case_no(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’No’, value)

def write_case_url(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’URL’, value)

def write_case_dbdata(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’DBData’, value)

def write_case_apidata(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’APIData’, value)

def write_case_dbsql(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’DBSQL’, value, CDATA=True)

def write_case_apixpath(self, index, value):
self.write_node(‘/Case[%s]/’ % index,’APIXPath’, value)

def save_xml(self):
myfile = file(self.resultfile, ‘w’)
self.dom.writexml(myfile, encoding=’utf-8′)
myfile.close()

if __name__ == ‘__main__’:
xr = xmlwrite(r’D:\test.xml’)
xr.write_start_time(‘2223’)
xr.write_end_time(‘444’)
xr.write_pass_count(’22’)
xr.write_fail_count(’33’)
xr.write_case()
xr.write_case()
xr.write_case_no(0, ‘0’)
xr.write_case_url(0, ‘http://www.google.com’)
xr.write_case_url(0, ‘http://www.google.com’)
xr.write_case_dbsql(0, ‘select * from ‘)
xr.write_case_dbdata(0, ‘dbtata’)
xr.write_case_apixpath(0, ‘/xpath’)
xr.write_case_apidata(0, ‘apidata’)
xr.write_case_no(1, ‘1’)
xr.write_case_url(1, ‘http://www.baidu.com’)
xr.write_case_url(1, ‘http://www.baidu.com’)
xr.write_case_dbsql(1, ‘select 1 from ‘)
xr.write_case_dbdata(1, ‘dbtata1’)
xr.write_case_apixpath(1, ‘/xpath1’)
xr.write_case_apidata(1, ‘apidata1’)
xr.save_xml()

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。
比如：/root/child[1], 表示root的第2个child节点。

解析XML