在使用dexml进行Python XML序列化时,需要在生成的XML中为某些标记添加属性,但是文档中没有找到如何实现这一目标的清晰说明。
以下是涉及的代码。
import dexml
import urllib2
from dexml import fields
from bs4 import BeautifulSoup
class Section(dexml.Model):
section = fields.String()
entries = fields.List(fields.String(tagname="Entry"))
# Add something for href here, maybe?
class AtoZ(dexml.Model):
list = fields.List(Section)
def makeSoup(url):
return BeautifulSoup(urllib2.urlopen(url).read())
def main():
soup = makeSoup("http://www.somewebsite.com")
sectionList = []
for li in soup.find('ul', {'class':"Nav_fm"}).find_all('li', {'class':"MenuLevel_0"}):
atzSection = Section()
atzSection.section = li.a.string
for innerLi in li.find_all('li', {'class':"MenuLevel_1"}):
atzSection.entries.append(innerLi.a.string)
# Somehow store innlerLi.a['href'] in atzSection
sectionList.append(atzSection)
atzList = AtoZ(list=sectionList)
f = open("C:\atoz.xml", "w")
f.write(atzList.render(pretty=True))
f.close()
if __name__ == '__main__':
main()
如上所示,生成的XML是这样的:
<?xml version="1.0" ?>
<AtoZ>
<Section section="#">
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
</Section>
...
<Section section="Z">
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
<Entry>...</Entry>
</Section>
</AtoZ>
现在想要让每个<Entry>都有一个<href>属性。
2. 解决方案
注释1中给出的解决方案是重新定义Section.entries,使其成为一个由Entry项组成的列表,如下所示:
class Entry(dexml.Model):
href = fields.String()
...
class Section(dexml.Model):
section = fields.String()
entries = fields.List(fields.Model(Entry), tagname='Entry')
现在,就可以在渲染XML时为每个<Entry>添加一个<href>属性。
以下是完整的代码。
import dexml
import urllib2
from dexml import fields
from bs4 import BeautifulSoup
class Entry(dexml.Model):
href = fields.String()
content = fields.String()
class Section(dexml.Model):
section = fields.String()
entries = fields.List(fields.Model(Entry), tagname='Entry')
class AtoZ(dexml.Model):
list = fields.List(Section)
def makeSoup(url):
return BeautifulSoup(urllib2.urlopen(url).read())
def main():
soup = makeSoup("http://www.somewebsite.com")
sectionList = []
for li in soup.find('ul', {'class':"Nav_fm"}).find_all('li', {'class':"MenuLevel_0"}):
atzSection = Section()
atzSection.section = li.a.string
for innerLi in li.find_all('li', {'class':"MenuLevel_1"}):
atzEntry = Entry()
atzEntry.href = innerLi.a['href']
atzEntry.content = innerLi.a.string
atzSection.entries.append(atzEntry)
sectionList.append(atzSection)
atzList = AtoZ(list=sectionList)
f = open("C:\atoz.xml", "w")
f.write(atzList.render(pretty=True))
f.close()
if __name__ == '__main__':
main()
生成的XML现在如下所示:
<?xml version="1.0" ?>
<AtoZ>
<Section section="#">
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
</Section>
...
<Section section="Z">
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
<Entry href="...">...</Entry>
</Section>
</AtoZ>
这就是利用dexml为标记添加属性的方法。