selenium模拟登陆b站
工具链接:
chromedriver镜像
stealth.min.js抹掉机器特征的脚本
selenium-wire获取http请求头信息
# 安装
pip install selenium-wire
部分参考链接:
b站滑动登陆 ----- こころ~
selenium重复执行move by offset时位移值自动累加的问题 ---- vansl
【python】selenium获取http请求头信息
大佬的模拟轨迹算法很强,但比较看脸
登陆成功次数不算多,但至少也算是可以糊弄b站下了
本来想获取下自己账号的cookies的,但模拟登录实在有些麻烦,以后有时间在优化优化吧
手动登录写的差不多了,selenium-wire能获取完整的cookie就很强
建议
结合Python爬取b站视频(api真实地址)下载某b视频属实香
完整代码:
import random
from PIL import Image, ImageChops
# from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
import time
import base64
import json
# pip install selenium-wire
# https://github.com/wkeeling/selenium-wire
from seleniumwire import webdriver
def get(bro):
# 链接
url = 'https://passport.bilibili.com/login'
bro.get(url)
time.sleep(2)
# 动作获取标签
u = bro.find_element_by_id('login-username')
u.send_keys('username')
time.sleep(1)
p = bro.find_element_by_id('login-passwd')
p.send_keys('password')
time.sleep(1)
btn = bro.find_element_by_xpath('//*[@id="geetest-wrap"]/div/div[5]/a[1]')
btn.click()
time.sleep(2)
# 执行js获得图片
JS1 = 'return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");'
img_info1 = bro.execute_script(JS1) # 执行js文件得到带图片信息的图片数据
img_base641 = img_info1.split(',')[1] # 拿到base64编码的图片信息
img_bytes1 = base64.b64decode(img_base641) # 转为bytes类型
with open(r".\test1.png", "wb") as f:
f.write(img_bytes1)
time.sleep(1)
JS2 = 'return document.getElementsByClassName("geetest_canvas_fullbg geetest_fade geetest_absolute")[0].toDataURL("image/png");'
img_info2 = bro.execute_script(JS2) # 执行js文件得到带图片信息的图片数据
img_base642 = img_info2.split(',')[1] # 拿到base64编码的图片信息
img_bytes2 = base64.b64decode(img_base642) # 转为bytes类型
with open(r".\test2.png", "wb") as f:
f.write(img_bytes2)
def dif():
im1 = Image.open(r".\test1.png")
im2 = Image.open(r".\test2.png")
dif_pic = ImageChops.difference(im2, im1)
lim = dif_pic.convert("L")
# 差值阈值
t = 30
table = []
for i in range(256):
if i < t:
table.append(0)
else:
table.append(1)
lim = lim.point(table, "1")
lim.save(r".\out.png")
w, h = lim.size
i = 0
j = 0
point = 0
while j < h:
i = 0
while i < w:
point = lim.getpixel((i, j))
if point == 1:
break
i += 1
if point == 1:
break
j += 1
return i, j
def get_track(x):
"""
模拟匀加速运动
:param x:
:return:
"""
v = 0
t = 0.2
tracks = []
current = 0
# 到达mid值之和开始减速
mid = x * 5 / 8
x = x + 10
while current < x:
if current < mid:
a = random.randint(1, 3)
else:
a = -random.randint(2, 4)
# a = 2
v0 = v
s = v0 * t + 0.5 * a * (t ** 2)
current += s
tracks.append(round(s))
v = v0 + a * t
for i in range(4):
tracks.append(-random.randint(-1, 3))
# tracks.append(x - sum(tracks))
return tracks
def move(bro, long):
t = get_track(long*0.8)
print(t)
move_btn = bro.find_element_by_xpath('//*[@class="geetest_slider_button"]')
ActionChains(bro).click_and_hold(move_btn).perform()
# chrome缩放1.25
for move_l in t:
if move_l == 0:
continue
'''
https://github.com/SeleniumHQ/selenium/issues/5747#issuecomment-379949052
action会自动累加位移,除非reset这个action。他给出的另外一个方案是新建一个action,采用之后果然能够很好地解决我的问题。
'''
ActionChains(bro).move_by_offset(xoffset=move_l, yoffset=0).perform()
print(move_l,move_btn.location['x'])
# action.move_by_offset(long / 1.2, 0).perform()
time.sleep(0.3)
ActionChains(bro).release(move_btn).perform()
time.sleep(2)
if __name__ == '__main__':
# 初始化
path = r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver'
# 规避浏览器selenium检测
a = Options()
a.add_experimental_option("excludeSwitches", ["enable-automation"])
a.add_argument("--disable-blink-features=AutomationControlled")
a.add_argument('window-size=1920x1080') # 指定浏览器分辨率
with open('./stealth.min.js') as fp:
js = fp.read()
choice = input("1.自动登录(待优化)\n2.手动验证码登录(建议)\n3.登录测试并获取cookie\n0.退出\n输入:")
if choice == "1" or choice == "2":
# 实例化一个浏览器对象
br = webdriver.Chrome(path, options=a)
# 加载规避js
br.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
br.maximize_window()
elif choice == "3":
br = webdriver.Chrome()
br.maximize_window()
# 自动登录(待优化)
if choice == "1":
# 获取验证码图片
get(br)
# 比较差值图片
a, b = dif()
print(a, b)
# 移动
time.sleep(1)
move(br, a)
# 获取
cookie = br.get_cookies()
print(cookie)
# 手动验证码登录
elif choice == "2":
# 获取验证码图片
get(br)
a = input("等待手动登录...(完成请回车继续)")
# br.delete_all_cookies()#先删除cookies
br.get('https://www.bilibili.com')
time.sleep(1)
# time.sleep(30)#这个时间用于手动登录,扫码登录可以适当缩短这个等待时间
# 获取请求头cookie
###
time.sleep(1)
JS3 = 'return document.cookie'
cookies = br.execute_script(JS3)
print(cookies)
dictcookies = br.get_cookies()#读取登录之后浏览器的cookies
jsoncookies = json.dumps(dictcookies)#将字典数据转成json数据便于保存
with open('cookie.txt','w') as f:#写进文本保存
f.write(cookies)
with open('jsoncookies.txt','w') as f:#写进文本保存
f.write(jsoncookies)
print('cookies is ok')
# 登录测试并获取cookie
elif choice == "3":
# 加载cookie
br.get('https://www.bilibili.com/video/BV1V44y1q7RQ')
f = open('jsoncookies.txt','r')
listcookie = json.loads(f.read())#读取文件中的cookies数据
for cookie in listcookie:
br.add_cookie(cookie)#将cookies数据添加到浏览器
br.refresh()#刷新网页
time.sleep(2)
JS3 = 'return document.cookie'
cookies = br.execute_script(JS3)
print(cookies)
# Access requests via the `requests` attribute
cookie_list = []
for request in br.requests:
if request.response and "cookie" in request.headers:
cookie_list.append(request.headers["cookie"])
print("\n下载cookie:" + max(cookie_list))
with open('cookie.txt','w') as f:#写进文本保存
f.write(max(cookie_list))
# 退出
if choice in ["1","2","3"]:
time.sleep(1)
br.quit()