当前位置: 移动技术网 > 科技>操作系统>windows > 记一次 爬取LOL全皮肤原画保存到本地的实例

记一次 爬取LOL全皮肤原画保存到本地的实例

2020年01月05日  | 移动技术网科技  | 我要评论

影讯网,elkelake,宇智波鼬是好人吗

 1 #爬取lol全英雄皮肤
 2 import re
 3 import traceback #  异常跟踪
 4 import requests
 5 from bs4 import beautifulsoup
6 #获取html 7 def get_url(url, hander): 8 try: 9 r = requests.get(url, headers=hander, timeout=30) 10 r.raise_for_status() 11 r.encoding = r.apparent_encoding 12 return r.text 13 except: 14 traceback.print_exc() #将异常信息打印出来 15 return "" 16 #解析html 17 def prasing_page(lst,html): 18 try: 19 soup = beautifulsoup(html, "html.parser") 20 for a in soup.find_all('li', class_=re.compile('boxshadow')): 21 tag_a = a('a') 22 for i in tag_a: 23 lst.append(i['href']) 24 return lst 25 except: 26 traceback.print_exc() 27 return ""

28 #解析获取到的单个html并筛选和下载 29 def geturl_prasingpag(lst, hander): 30 hero_img_url = [] 31 hero_skin_name = [] 32 hero_name = [] 33 for u in lst: 34 try: 35 r = requests.get(u, headers=hander, timeout=30) 36 r.raise_for_status() 37 r.encoding = r.apparent_encoding
38        #二次解析 39 soup = beautifulsoup(r.text, "html.parser") 40 pag = soup.find_all('div', class_=re.compile('otherspifubox')) 41 for m in pag: 42 tag_img = m('img') 43 tag_p = m('p') 44 tag_span = m('span') 45 for m in tag_p: 46 hero_skin_name.append(m.string) 47 for m in tag_img: 48 hero_img_url.append(m['src']) 49 for m in tag_span: 50 hero_name.append(m.string) 51 except: 52 traceback.print_exc() # 将异常信息打印出来 53 continue 54       
        #下载到本地
55 for i in range(len(hero_name)): 56 try: 57 path = 'o:/lol_hero_jpg/' + hero_skin_name[i]+'--' + hero_name[i] + '.jpg' 58 f = open(path, 'wb') 59 r = requests.get(hero_img_url[i], stream=true) 60 f.write(r.content) 61 print("\r当前进度>>>>>>>>>>>>>>>>>>{:.0f}%>>>>>>>>>>>>>>>>>>".format(i * 100 / len(lst)), end="") 62 f.close() 63 except: 64 traceback.print_exc() # 将异常信息打印出来 65 continue 66 67 def main(): 68 hander = {"user-agent":"mozilla/5.0"} 69 deep = 43 #定义爬取页数 70 list = [] 71 for i in range(deep): 72 try: 73 url = "http://********/hero_"+str(1+i)+".shtml" 74 html = get_url(url, hander) 75 prasing_page(list, html) 76 geturl_prasingpag(list, hander) 77 except: 78 continue 79 80 main()

如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!! 点击进行留言回复

相关文章:

验证码:
移动技术网