B站从2023年三月起,Web端部分接口开始使用 Wbi 鉴权方式,即一种独立于 APP 鉴权 与其他 Cookie 鉴权的方式,
表现在 REST API 请求时在 query 中添加了w_rid和wts字段,为一种 Web 端的风控手段
这些接口涵盖"用户投稿视频"、"用户投稿专栏"、"首页推送"、"推广信息"、"热搜"、"视频信息"、"视频取流"、”搜索“等待主要查询性业务接口,
如果请求这些 REST API 缺失w_rid和wts字段,则会在数次请求后返回-403:非法访问这样的风控错误.
from functools import reduce
from hashlib import md5
import urllib.parse
import time
import requests
mixinKeyEncTab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52
]
# 对 imgKey 和 subKey 进行字符顺序打乱编码
def getMixinKey(orig: str):
return reduce(lambda s, i: s + orig[i], mixinKeyEncTab, '')[:32]
# 为请求参数进行 wbi 签名
def encWbi(params: dict, img_key: str, sub_key: str):
mixin_key = getMixinKey(img_key + sub_key)
curr_time = round(time.time())
# 添加 wts 字段
params['wts'] = curr_time
# 按照 key 重排参数
params = dict(sorted(params.items()))
# 过滤 value 中的 "!'()*" 字符
params = {
k: ''.join(filter(lambda chr: chr not in "!'()*", str(v)))
for k, v
in params.items()
}
# 序列化参数
query = urllib.parse.urlencode(params)
# 计算 w_rid
wbi_sign = md5((query + mixin_key).encode()).hexdigest()
params['w_rid'] = wbi_sign
return params
# 获取最新的 img_key 和 sub_key
def getWbiKeys() -> tuple[str, str]:
resp = requests.get('https://api.bilibili.com/x/web-interface/nav')
resp.raise_for_status()
json_content = resp.json()
img_url: str = json_content['data']['wbi_img']['img_url']
sub_url: str = json_content['data']['wbi_img']['sub_url']
img_key = img_url.rsplit('/', 1)[1].split('.')[0]
sub_key = sub_url.rsplit('/', 1)[1].split('.')[0]
return img_key, sub_key
img_key, sub_key = getWbiKeys()
# 请求参数组装为字典
params = {
'pn': '8',
'mid': '20165629',
'ps': '30',
'tid': '0',
'keyword': '',
'orde': 'pubdate',
'platform': 'web',
'web_location': '1550101',
'order_avoided': 'true'
}
# 视频列表分页链接获取
# mid=20165629&ps=30&tid=0&pn=144&keyword=&order=pubdate&platform=web&web_location=1550101&order_avoided=true&w_rid=1dda1e4a5d5a7102c7e820d6edbdfe96&wts=1685644770
signed_params = encWbi(
params,
img_key=img_key,
sub_key=sub_key
)
query = urllib.parse.urlencode(signed_params)
print(signed_params)
print(query)
if __name__ == '__main__':
print(getWbiKeys())
for pn in range(1, 145):
print(pn)