selenium模拟登陆b站

649 阅读4分钟

selenium模拟登陆b站

工具链接:
chromedriver镜像
stealth.min.js抹掉机器特征的脚本
selenium-wire获取http请求头信息

# 安装
pip install selenium-wire

部分参考链接:
b站滑动登陆 ----- こころ~
selenium重复执行move by offset时位移值自动累加的问题 ---- vansl
【python】selenium获取http请求头信息
大佬的模拟轨迹算法很强,但比较看脸
登陆成功次数不算多,但至少也算是可以糊弄b站下了

本来想获取下自己账号的cookies的,但模拟登录实在有些麻烦,以后有时间在优化优化吧
手动登录写的差不多了,selenium-wire能获取完整的cookie就很强
建议
结合Python爬取b站视频(api真实地址)下载某b视频属实香
完整代码:

import random
from PIL import Image, ImageChops
# from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
import time
import base64
import json
# pip install selenium-wire
# https://github.com/wkeeling/selenium-wire
from seleniumwire import webdriver


def get(bro):
    # 链接
    url = 'https://passport.bilibili.com/login'
    bro.get(url)
    time.sleep(2)
    # 动作获取标签
    u = bro.find_element_by_id('login-username')
    u.send_keys('username')
    time.sleep(1)
    p = bro.find_element_by_id('login-passwd')
    p.send_keys('password')
    time.sleep(1)
    btn = bro.find_element_by_xpath('//*[@id="geetest-wrap"]/div/div[5]/a[1]')
    btn.click()
    time.sleep(2)
    # 执行js获得图片
    JS1 = 'return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");'
    img_info1 = bro.execute_script(JS1)  # 执行js文件得到带图片信息的图片数据
    img_base641 = img_info1.split(',')[1]  # 拿到base64编码的图片信息
    img_bytes1 = base64.b64decode(img_base641)  # 转为bytes类型
    with open(r".\test1.png", "wb") as f:
        f.write(img_bytes1)
    time.sleep(1)

    JS2 = 'return document.getElementsByClassName("geetest_canvas_fullbg geetest_fade geetest_absolute")[0].toDataURL("image/png");'
    img_info2 = bro.execute_script(JS2)  # 执行js文件得到带图片信息的图片数据
    img_base642 = img_info2.split(',')[1]  # 拿到base64编码的图片信息
    img_bytes2 = base64.b64decode(img_base642)  # 转为bytes类型
    with open(r".\test2.png", "wb") as f:
        f.write(img_bytes2)


def dif():
    im1 = Image.open(r".\test1.png")
    im2 = Image.open(r".\test2.png")
    dif_pic = ImageChops.difference(im2, im1)
    lim = dif_pic.convert("L")
    # 差值阈值
    t = 30
    table = []
    for i in range(256):
        if i < t:
            table.append(0)
        else:
            table.append(1)
    lim = lim.point(table, "1")
    lim.save(r".\out.png")
    w, h = lim.size
    i = 0
    j = 0
    point = 0
    while j < h:
        i = 0
        while i < w:
            point = lim.getpixel((i, j))
            if point == 1:
                break
            i += 1
        if point == 1:
            break
        j += 1
    return i, j


def get_track(x):
    """
    模拟匀加速运动
    :param x:
    :return:
    """
    v = 0
    t = 0.2
    tracks = []
    current = 0
    # 到达mid值之和开始减速
    mid = x * 5 / 8
    x = x + 10
    while current < x:
        if current < mid:
            a = random.randint(1, 3)
        else:
            a = -random.randint(2, 4)
        # a = 2
        v0 = v
        s = v0 * t + 0.5 * a * (t ** 2)
        current += s
        tracks.append(round(s))
        v = v0 + a * t
    for i in range(4):
        tracks.append(-random.randint(-1, 3))
    # tracks.append(x - sum(tracks))
    return tracks


def move(bro, long):
    t = get_track(long*0.8)
    print(t)
    move_btn = bro.find_element_by_xpath('//*[@class="geetest_slider_button"]')
    ActionChains(bro).click_and_hold(move_btn).perform()
    # chrome缩放1.25
    for move_l in t:
        if move_l == 0:
            continue
        '''
        https://github.com/SeleniumHQ/selenium/issues/5747#issuecomment-379949052
        action会自动累加位移,除非reset这个action。他给出的另外一个方案是新建一个action,采用之后果然能够很好地解决我的问题。
        '''
        ActionChains(bro).move_by_offset(xoffset=move_l, yoffset=0).perform()
        print(move_l,move_btn.location['x'])  
    # action.move_by_offset(long / 1.2, 0).perform()
    time.sleep(0.3)
    ActionChains(bro).release(move_btn).perform()
    time.sleep(2)


if __name__ == '__main__':

    # 初始化
    path = r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver'
    # 规避浏览器selenium检测
    a = Options()
    a.add_experimental_option("excludeSwitches", ["enable-automation"])
    a.add_argument("--disable-blink-features=AutomationControlled")
    a.add_argument('window-size=1920x1080')  # 指定浏览器分辨率
    with open('./stealth.min.js') as fp:
        js = fp.read()
    
    choice = input("1.自动登录(待优化)\n2.手动验证码登录(建议)\n3.登录测试并获取cookie\n0.退出\n输入:")
    if choice == "1" or choice == "2":
        # 实例化一个浏览器对象
        br = webdriver.Chrome(path, options=a)
        # 加载规避js
        br.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",   {"source": js})
        br.maximize_window()
    elif choice == "3":
         br = webdriver.Chrome()
         br.maximize_window()
    # 自动登录(待优化)
    if choice == "1":
        # 获取验证码图片
        get(br)
        # 比较差值图片
        a, b = dif()
        print(a, b)
        # 移动
        time.sleep(1)
        move(br, a)
        # 获取
        cookie = br.get_cookies()
        print(cookie)
    # 手动验证码登录
    elif choice == "2": 
        # 获取验证码图片
        get(br)
        a = input("等待手动登录...(完成请回车继续)")
        # br.delete_all_cookies()#先删除cookies
        br.get('https://www.bilibili.com')
        time.sleep(1)
        # time.sleep(30)#这个时间用于手动登录,扫码登录可以适当缩短这个等待时间
        # 获取请求头cookie
        ###
        time.sleep(1)
        JS3 = 'return document.cookie'
        cookies = br.execute_script(JS3)
        print(cookies)
        
        dictcookies = br.get_cookies()#读取登录之后浏览器的cookies
        jsoncookies = json.dumps(dictcookies)#将字典数据转成json数据便于保存
        
        with open('cookie.txt','w') as f:#写进文本保存
            f.write(cookies)
        with open('jsoncookies.txt','w') as f:#写进文本保存
            f.write(jsoncookies)
        print('cookies is ok')
    # 登录测试并获取cookie
    elif choice == "3":
        # 加载cookie
        br.get('https://www.bilibili.com/video/BV1V44y1q7RQ')
        f = open('jsoncookies.txt','r')
        listcookie = json.loads(f.read())#读取文件中的cookies数据
        for cookie in listcookie:
            br.add_cookie(cookie)#将cookies数据添加到浏览器
        br.refresh()#刷新网页
        time.sleep(2)
        JS3 = 'return document.cookie'
        cookies = br.execute_script(JS3)
        print(cookies)
        # Access requests via the `requests` attribute
        cookie_list = []
        for request in br.requests:
            if request.response and "cookie" in request.headers:
                cookie_list.append(request.headers["cookie"])
        print("\n下载cookie:" + max(cookie_list))
        with open('cookie.txt','w') as f:#写进文本保存
            f.write(max(cookie_list))

    # 退出
    if choice in ["1","2","3"]:
        time.sleep(1)
        br.quit()