谓言挂席度沧海下一句,360yun,碰撞传感器
def get_seed_data(filename):
dom = minidom.parse(filename)
root = dom.documentelement
system_nodes = root.getelementsbytagname("system")
k = 0
seed_list = []
for system_node in system_nodes:
#print system_node.nodename+' id='+system_node.getattribute('id')
system_id = system_node.getattribute("id")
system_name = system_node.getattribute("name")
#print 'system_name:%s'%system_name
section_nodes = system_node.getelementsbytagname("section")
for section_node in section_nodes:
section_id = section_node.getattribute('id')
section_name = section_node.getattribute('name')
#print ' '+section_node.nodename+' id='+section_id+' name='+section_name
crawl_cycle_node = section_node.getelementsbytagname("crawl_cycle")
crawl_cycle = crawl_cycle_node[0].childnodes[0].nodevalue
#print ' '+crawl_cycle_node[0].nodename+'='+crawl_cycle
seed_nodes = section_node.getelementsbytagname('seed')
for seed_node in seed_nodes:
seed = {}
seed['crawl_cycle'] = crawl_cycle
seed['system_id'] = int(system_id)
seed['system_name'] = system_name
seed['section_id'] = int(section_id)
seed['section_name'] = section_name
seed_id = seed_node.getattribute('id')
seed['seed_id'] = int(seed_id)
#print ' '+seed_node.nodename+' '+'id='+seed_id
userblog_url_node = seed_node.getelementsbytagname('userblog_url')
userblog_url = userblog_url_node[0].childnodes[0].nodevalue
seed['userblog_url'] = userblog_url
#print ' '+'userblog_url'+' '+userblog_url
print '-------------------------------------------'
print 'system_id:%d' % seed['system_id']
print 'system_name:%s'%seed['system_name']
print ' section_id:%d' % seed['section_id']
print ' section_name:%s' % seed['section_name']
print ' seed_id:%d' %seed['seed_id']
print ' userblog_url:%s' %seed['userblog_url']
print '========================='
seed_list.append(seed)
print seed_list[k]
k += 1
os.system('pause')
return seed_list
<?xml version="1.0" encoding="utf-8" ?>
<seeds>
<system id="1" name="新浪">
<section id="1" name="娱乐">
<crawl_cycle> </crawl_cycle>
<seed id="1">
<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>
</seed>
<seed id="2">
<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>
</seed>
<seed id="3">
<userblog_url>http://aaa.com.cn/sanxiazaixian</userblog_url>
</seed>
</section>
<section id="2" name="读书">
<crawl_cycle> </crawl_cycle>
<seed id="11">
<userblog_url>http://aaa.com.cn/twocold</userblog_url>
</seed>
<seed id="12">
<userblog_url>http://aaa.com.cn/u/1233526741</userblog_url>
</seed>
</section>
</system>
</seeds>
如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!!
点击进行留言回复
相关文章:
-
-
-
-
-
python中def是做什么的
python使用def开始函数定义,紧接着是函数名,括号内部为函数的参数,内部为函数的 具体功能实现代码,如果想要函数有返回值, 在expressions中的逻...
[阅读全文]
-
-
-
-
-
-
网友评论