今天给大家爬取一个王者荣耀的详情,让大家更好更快了解,说白了,就是想练练手
点个赞留个关注吧!!
今天爬取的内容相对简单,我也不知道难不难,可能对于新手来说会一点难,难就难着吧,不下功夫当然过不了,熬三个夜晚就会了
今天爬取的主要内容分别是:英雄名称、详情链接、英雄属性(生存、伤害、技能、难度) 、英雄皮肤(非图片) 、铭文搭配推荐、召唤师技能推荐(针对不同英雄给予不同的推荐) 、出装搭配推荐(两种方案) 、最佳的搭档、压制英雄、被压制英雄
爬取的不包含图片/文件,只有可视文字字幕,,用了一个晚上做出来的,闲的睡不着觉才想到做这个爬虫的,希望能帮助到大家,不妨先点个关注,给个赞也可以
因为这个网站的一些问题,爬取不到前几位的英雄,我试了很多次都无济于事,也懒得搞了就暂时先不管这个了,如果有哪一位博主知道怎么回事,请在评论区或私信说一下,谢谢
废话不多说,由于代码较多,原本我也想分三个文件写入,但是我太懒了,直接写到一个py文件内了,直接上代码:
# -*- coding: utf-8 -*-
import requests
import re
from bs4 import BeautifulSoup
URL = requests.get('https://pvp.qq.com/web201605/herolist.shtml')
html = URL.content
soup = BeautifulSoup(html,'html.parser',from_encoding="utf-8") #解析器
div_people_list = soup.find('ul', attrs={'class': 'herolist clearfix'})
for a in div_people_list.find_all('li'):
#爬取人物详情链接
text_1 = a.find('a')
URL_2 = ('https://pvp.qq.com/web201605/'+text_1['href']) #链接
#脾气任务名称
URL_3 = requests.get(f'{URL_2}')
html_2 = URL_3.content #再次解析
soup_2 = BeautifulSoup(html_2,'html.parser',from_encoding="utf-8") #解析器
text_2 = soup_2.find('h2', attrs={'class': 'cover-name'}).get_text() #名称
#print(text_2+'\t\t'+URL_2)
#爬取任务伤害比例
text_3 = soup_2.find('ul', attrs={'class': 'cover-list'}) # 伤害
T = 0
for text_4 in text_3.find_all('i', attrs={'class': 'ibar'}):
r = text_4['style']
T += 1
#获取属性伤害
if T == 1:
print(f'------------------------------>>{text_2}<<------------------------------\n'+text_2 + '\t人物详情链接:' + URL_2)
print('生存能力:', end='')
print(re.findall('width:(.*)', r), end='')
else:
if T == 2:
print('\t\t攻击伤害:', end='')
print(re.findall('width:(.*)', r))
else:
if T == 3:
print('技能效果:', end='')
print(re.findall('width:(.*)', r), end='')
else:
if T == 4:
print('\t\t上手难度:', end='')
print(re.findall('width:(.*)', r))
#皮肤获取
text_5 = soup_2.find('ul', attrs={'class': 'pic-pf-list pic-pf-list3'})['data-imgname'] # 读取标签
data = text_5.replace("&0", "").replace("|", "] [").replace("&", "") #去除杂物
new_string = ''.join([i for i in data if not i.isdigit()]) #去除烦人的数字
print(f'{text_2}的皮肤:['+new_string+']') #打印皮肤
#铭文推荐
text_6 = soup_2.find('ul', attrs={'class': 'sugg-u1'})['data-ming']
data_2 = text_6.replace("|", "] [") # 替换
#铭文ID转换
def mwtj(f):
if f == 1514:
f = '梦魇'
return f
else:
if f == 3531:
f = '心眼'
return f
else:
if f == 2520:
f = '狩猎'
return f
else:
if f == 1504:
f = '异变'
return f
else:
if f == 3514:
f = '鹰眼'
return f
else:
if f == 2517:
f = '隐匿'
return f
else:
if f == 1510:
f = '无双'
return f
else:
if f == 2503:
f = '贪婪'
return f
else:
if f == 3515:
f = '心眼'
return f
else:
if f == 1512:
f = '宿命'
return f
else:
if f == 2515:
f = '调和'
return f
else:
if f == 3509:
f = '虚空'
return f
else:
if f == 3515:
f = '心眼'
return f
else:
if f == 3516:
f = '怜悯'
return f
else:
if f == 1501:
f = '圣人'
return f
else:
if f == 3511:
f = '献祭'
return f
else:
if f == 2512:
f = '轮回'
return f
else:
if f == 2506:
f = '兽痕'
return f
else:
if f == 1517:
f = '凶兆'
return f
else:
if f == 2506:
f = '兽痕'
return f
else:
if f == 1519:
f = '祸源'
return f
else:
if f == 1503:
f = '传承'
return f
else:
if f == 3503:
f = '均衡'
return f
else:
if f == 2501:
f = '长生'
return f
else:
if f == 1520:
f = '红月'
return f
else:
if f == 2504:
f = '夺萃'
return f
else:
if f == 1505:
f = '纷争'
return f
else:
if f == 3518:
f = '回声'
return f
else:
pass
#读取铭文ID
data_3 = int(data_2[0:][:4])
data_4 = int(data_2[8:][:4])
data_5 = int(data_2[16:][:4])
#打印铭文
print('铭文搭配推荐:['+mwtj(data_3) +'] ['+ mwtj(data_4) +'] ['+ mwtj(data_5)+']')
#推荐召唤师技能
def Z_C(L):
if L == 80115:
L = '闪现'
return L
else:
if L == 80107:
L = '净化'
return L
else:
if L == 80102:
L = '治疗'
return L
else:
if L == 80108:
L = '终结'
return L
else:
if L == 80121:
L = '弱化'
return L
else:
if L == 80103:
L = '眩晕'
return L
else:
if L == 80104:
L = '惩击'
return L
else:
if L == 80109:
L = '疾跑'
return L
else:
if L == 80110:
L = '狂暴'
return L
else:
pass
skill = soup_2.find('div', attrs={'class': 'sugg-info2 info'})
skill_list = skill.find('p', attrs={'id': 'skill3'})['data-skill']
skill_2 = skill_list.replace("|", " ") # 替换
Z_1 = int(skill_2[0:][:8])
Z_2 = int(skill_2[7:][:8])
print('召唤师技能推荐:['+Z_C(Z_1)+'] 或 ['+Z_C(Z_2)+']')
#推荐出装
#出装一
cz_list_1 = soup_2.find('div', attrs={'class': 'equip-info l'})
cz_text = cz_list_1.find('ul', attrs={'class': 'equip-list fl'})['data-item']
#出装二
cz_list_2 = soup_2.find('div', attrs={'class': 'equip-bd'})
cz_text_2 = cz_list_2.find_all('div', attrs={'class': 'equip-info l'})[1]
cz_2 = cz_text_2.find('ul', attrs={'class': 'equip-list fl'})['data-item']
def cztj(G):
if G == 1425:
G = '急速战靴'
return G
else:
if G == 1126:
G = '末世'
return G
else:
if G == 1137:
G = '暗影战斧'
return G
else:
if G == 1138:
G = '破军'
return G
else:
if G == 1155:
G = '破晓'
return G
else:
if G == 1127:
G = '名刀·司命'
return G
else:
if G == 1132:
G = '泣血之刃'
return G
else:
if G == 1423:
G = '冷静之靴'
return G
else:
if G == 1233:
G = '回响之杖'
return G
else:
if G == 1235:
G = '痛苦面具'
return G
else:
if G == 1232:
G = '博学者之怒'
return G
else:
if G == 1727:
G = '日暮之流'
return G
else:
if G == 1238:
G = '贤者之书'
return G
else:
if G == 1240:
G = '噬神之书'
return G
else:
if G == 1234:
G = '凝冰之息'
return G
else:
if G == 1533:
G = '贪婪之噬'
return G
else:
if G == 1422:
G = '抵抗之靴'
return G
else:
if G == 1137:
G = '暗影战斧'
return G
else:
if G == 13310:
G = '冰痕之握'
return G
else:
if G == 1335:
G = '魔女斗篷'
return G
else:
if G == 1129:
G = '速击之枪'
return G
else:
if G == 1728:
G = '金色圣剑'
return G
else:
if G == 1222:
G = '血族之书'
return G
else:
if G == 1231:
G = '虚无法杖'
return G
else:
if G == 1223:
G = '光辉之剑'
return G
else:
if G == 1236:
G = '巫术法杖'
return G
else:
if G == 1134:
G = '宗师之力'
return G
else:
if G == 1131:
G = '碎星锤'
return G
else:
if G == 1336:
G = '极寒风暴'
return G
else:
if G == 1334:
G = '不死鸟之眼'
return G
else:
if G == 1532:
G = '巨人之握'
return G
else:
if G == 1426:
G = '疾步之靴'
return G
else:
if G == 1332:
G = '霸者重装'
return G
else:
if G == 1327:
G = '反伤刺甲'
return G
else:
if G == 1421:
G = '影忍之足'
return G
else:
if G == 1333:
G = '不详征兆'
return G
else:
if G == 1337:
G = '贤者的庇护'
return G
else:
if G == 1522:
G = '巡守利斧'
return G
else:
if G == 1133:
G = '无尽战刃'
return G
else:
if G == 1136:
G = '影刃'
return G
else:
if G == 1721:
G = '极影'
return G
else:
if G == 1722:
G = '救赎之翼'
return G
else:
if G == 1331:
G = '红莲斗篷'
return G
else:
if G == 1239:
G = '辉月'
return G
else:
if G == 1523:
G = '追击刀锋'
return G
else:
if G == 1333:
G = '不详征兆'
return G
else:
if G == 1424:
G = '秘法之靴'
return G
else:
if G == 1135:
G = '闪电匕首'
return G
else:
if G == 1724:
G = '近卫荣耀'
return G
else:
if G == 1226:
G = '圣杯'
return G
else:
if G == 1226:
G = '圣杯'
return G
else:
if G == 1225:
G = '进化水晶'
return G
else:
if G == 1531:
G = '符文大剑'
return G
else:
if G == 1227:
G = '炽热支配者'
return G
else:
if G == 1237:
G = '时之预言'
return G
else:
if G == 11311:
G = '纯净苍穹'
return G
else:
if G == 1521:
G = '游击弯刀'
return G
else:
if G == 1328:
G = '血魔之怒'
return G
else:
if G == 12211:
G = '梦魇之牙'
return G
else:
if G == 1723:
G = '奔狼纹章'
return G
else:
if G == 1338:
G = '爆裂之甲'
return G
else:
pass
#出装一 进行转换
data_2 = cz_text.replace("|", " ") # 替换
C_z_1 = int(data_2[0:][:7])
C_z_2 = int(data_2[10:][:12])
C_z_3 = int(data_2[22:][:15])
C_z_4 = int(data_2[36:][:15])
C_z_5 = int(data_2[50:][:16])
C_z_6 = int(data_2[65:][:16])
# 出装二 进行转换
data_list2 = cz_2.replace("|", " ") # 替换
C_z_2_1 = int(data_list2[0:][:7])
C_z_2_2 = int(data_list2[10:][:12])
C_z_2_3 = int(data_list2[22:][:15])
C_z_2_4 = int(data_list2[36:][:15])
C_z_2_5 = int(data_list2[50:][:16])
C_z_2_6 = int(data_list2[65:][:16])
#打印出装一
print('推荐出装一:[' + cztj(C_z_1) + '] [' + cztj(C_z_2) + '] [' + cztj(C_z_3) + '] [' + cztj(C_z_4) + '] [' + cztj(C_z_5) + '] [' + cztj(C_z_6) + ']')
#打印出装二
try:
print('推荐出装二:[' + cztj(C_z_2_1) + '] [' + cztj(C_z_2_2) + '] [' + cztj(C_z_2_3) + '] [' + cztj(C_z_2_4) + '] [' + cztj(C_z_2_5) + '] [' + cztj(C_z_2_6) + ']')
except:
print('推荐出装二:[' + cztj(C_z_2_1) + '] [' + cztj(C_z_2_2) + '] [' + cztj(C_z_2_3) + '] [' + cztj(C_z_2_4) + '] [' + cztj(C_z_2_5) + ']')
#搭配英雄推荐
dp = soup_2.find('div', attrs={'class': 'hero-info-box'})
H = 0
for TTT in range(3):
dp_list_1 = dp.find_all('div', attrs={'class': 'hero-info l info'})[H].find('a')['href']
URL_4 = ('https://pvp.qq.com/web201605/herodetail/' + dp_list_1) # 链接
URL_5 = requests.get(f'{URL_4}')
html_3 = URL_5.content # 再次解析
soup_3 = BeautifulSoup(html_3, 'html.parser', from_encoding="utf-8") # 解析器
text_7 = soup_3.find('h2', attrs={'class': 'cover-name'}).get_text() # 搭档一
dp_list_2 = dp.find_all('div', attrs={'class': 'hero-info l info'})[H].find_all('a')[1]['href']
URL_5 = ('https://pvp.qq.com/web201605/herodetail/' + dp_list_2) # 链接
URL_5 = requests.get(f'{URL_5}')
html_4 = URL_5.content # 再次解析
soup_4 = BeautifulSoup(html_4, 'html.parser', from_encoding="utf-8") # 解析器
text_8 = soup_4.find('h2', attrs={'class': 'cover-name'}).get_text() # 搭档二
H += 1
if H == 1: #最佳搭档
print('最佳搭档:['+text_7+'] ['+text_8+']')
else:
if H == 2: #压制英雄
print('压制英雄:[' + text_7 + '] [' + text_8 + ']')
else:
if H == 3: #被压制英雄
print('被压制英雄:[' + text_7 + '] [' + text_8 + ']')
print('\n')
H = 0
if H == 4:
H = 0
pass
else:
if T == 5:
T = 0
else:
pass
可以看到已经爬取出来了
不妨点个赞在走呗!!!