
import urllib.request |
import re |
# 获取主页源码 |
url = 'https://pvp.qq.com/web201605/herolist.shtml' |
a = urllib.request.urlopen(url) # 访问网页 |
a = a.read() # 读取源码 |
a = a.decode('gbk') # 转码为gbk |
# 获取英雄名称和ID |
a = (re.compile('输入英雄不存在,请重新输入' + "(.*?)" + '英雄介绍', re.S).findall(a))[0] # 取主要源码 |
id = re.compile('<li><a href="herodetail/' + '(.*?)' + '.shtml"', re.S).findall(a) # 取id |
name = re.compile('height="91" alt="' + '(.*?)' + '">').findall(a) # 取名字 |
print('本次共抓取到' + str(len(id)) + '个英雄数据') |
# 询问保存路径 |
r = True |
while r: |
f = input('请输入想要保存的路径:') |
if f == '': |
print('不能为空') |
if f != '': |
r = False |
# i |
i = 0 |
for i in range(0, len(id) + 1): # 对列表位置进行循环 |
print('正在下载' + name[i]) |
url1 = 'https://pvp.qq.com/web201605/herodetail/' + id[i] + '.shtml' # 英雄主页网址 |
url1 = ((urllib.request.urlopen(url1)).read()).decode('gbk') # 获取源码,并且转码为gbk |
pf = re.compile('data-imgname="' + '(.*?)' + '">', re.S).findall(url1) # 取出皮肤名字 |
pf = pf[0] |
"""删除沉余字符""" |
pf = pf.replace('&', '', ) |
pf = pf.replace('0', '', ) |
pf = pf.replace('1', '', ) |
pf = pf.replace('2', '', ) |
pf = pf.replace('3', '', ) |
pf = pf.replace('4', '', ) |
pf = pf.replace('5', '', ) |
pf = pf.replace('6', '', ) |
pf = pf.replace('7', '', ) |
pf = pf.replace('8', '', ) |
pf = pf.replace('9', '', ) |
pf = pf.split('|') # 文本分割 |
print(pf) |
for i1 in range(0, len(pf)): # 对皮肤列表进行循环 |
print(name[i] + '-' + pf[i1]) |
url2 = 'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/' + id[i] + '/' + id[i] + '-bigskin-' + str( |
i1 + 1) + '.jpg' # 皮肤下载地址 |
urllib.request.urlretrieve(url2, f + '/' + name[i] + '-' + pf[i1] + '.jpg') # 下载 |



