Python爬虫b站视频(api真实地址)
声明:==代码只用于学习交流,侵删==
api地址
https://api.bilibili.com/x/player/playurl
参数
cid:根据bv号得到
bvid:bv号
qn:清晰度参数
cookie
请将自己的cookie保存在当前目录下的cookie.txt中
代码运行时会有获取cookie的建议地址
完整代码(可选清晰度)
from lxml import etree
import requests
import os
# 获取视频名称
def name(bv,headers):
url = 'https://www.bilibili.com/video/'+bv
text = requests.get(url,headers = headers).text
tree = etree.HTML(text)
name = tree.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0]
return name
# 获取cid
def cid(bv,headers):
url = 'https://api.bilibili.com/x/player/pagelist'
param = {
'bvid':'%s'%bv,
'jsonp':'jsonp'
}
text = requests.get(url,params = param,headers = headers).json()
cid = text['data'][0]['cid']
return cid
# 获取视频url
def flv(cid,bv,headers,quality):
url = 'https://api.bilibili.com/x/player/playurl'
param = {
'cid':'%s'%cid,
'bvid':'%s'%bv,
'qn':'%s'%quality,
}
text = requests.get(url,params = param,headers = headers).json()
return text
# 请求视频并保存
def get_flv(name,flv_url,headers):
print("\n等待响应数据(需要的时间较长)...")
response = requests.get(flv_url,headers = headers)
code = response.status_code
print('\n响应码:',code)
text = response.content
if code == 200:
with open("./%s.flv"%name,'wb') as fp1:
fp1.write(text)
with open("./list.txt",'a') as fp2:
fp2.write(name+"\n"+bv+"\n\n")
print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
else:
print("视频获取失败")
# main
bv = input("输入BV号:")
# 获取本地txt保存的cookie
cookie = ""
if os.path.exists("./cookie.txt"):
with open('./cookie.txt') as fp:
cookie = fp.read()
cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
headers = {
'Referer':'https://www.bilibili.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'cookie':"%s"%cookie,
}
cid = cid(bv,headers)
print('\n可获取cookie值的链接(需先在浏览器正常登录)\nhttps://api.bilibili.com/x/player/playurl?cid=%s&bvid=%s'%(cid,bv))
name = name(bv,headers)
print('\n标题:',name)
print('\ncid:',cid)
quality = ''
text =flv(cid,bv,headers,quality)
qn = text['data']['support_formats']
print("\n可选择的清晰度(部分清晰度可能获取失败):")
for qu in qn:
print(('清晰度:%s'%qu['new_description']).ljust(15)+('视频质量参数:%d'%qu['quality']).ljust(15)+('格式参数:%s'%qu['format']).ljust(15))
quality = input("输入清晰度对应的视频质量参数(默认1080p):")
if quality == '':
quality = '80'
text = flv(cid,bv,headers,quality)
flv_url = text['data']['durl'][0]['url']
print('\nflv_url:',flv_url)
get_flv(name,flv_url,headers)
==更新== 抓包得到新的api地址
https://api.bilibili.com/x/web-interface/view/detail?bvid=BV1CX4y1P7M8&aid=713833694&need_operation_card=1&web_rm_repeat=&need_elec=1&out_referer=https%3A%2F%2Fspace.bilibili.com%2F
可获取分页视频的cid值,以及视频封面链接pic
更新完整代码(清晰度,分页,视频封面)
from lxml import etree
import requests
import os
import re
from sys import stdout
import time
# 正则提取bv号
def bvre(bv):
bv += '?'
rx = '(BV[\S]*?)\\?'
r_bv = re.findall(rx,bv)[0]
r_bv = r_bv.replace('/','')
print('\nBV:',r_bv)
return r_bv
# 获取视频名称
def name(bv,headers):
url = 'https://www.bilibili.com/video/'+bv
text = requests.get(url,headers = headers).text
tree = etree.HTML(text)
name = tree.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0]
return name
'''
# =========备用api=========
# 获取(默认第一页的)cid
def cid(bv,headers):
url = 'https://api.bilibili.com/x/player/pagelist'
param = {
'bvid':'%s'%bv,
'jsonp':'jsonp'
}
text = requests.get(url,params = param,headers = headers).json()
cid = text['data'][0]['cid']
return cid
'''
# 获取分页信息以及对应的cid
# https://api.bilibili.com/x/web-interface/view/detail?bvid=BV1CX4y1P7M8&aid=713833694&need_operation_card=1&web_rm_repeat=&need_elec=1&out_referer=https%3A%2F%2Fspace.bilibili.com%2F
def page(bv):
url = 'https://api.bilibili.com/x/web-interface/view/detail'
param = {
'bvid':'%s'%bv,
}
text = requests.get(url,params = param,headers = headers).json()
text_pic = text['data']['View']['pic']
print("\n视频封面链接pic:",text_pic)
av=text['data']['View']['aid'] # AV号
print("AV号:","av" + str(av))
text_cid = text['data']['View']['pages']
print("\n分页信息以及对应的cid:")
for page in text_cid:
if page['page'] == 1:
f_cid = page['cid']
print('第 %d 页 cid:%s'%(page['page'],page['cid']))
cid = input("输入分页或cid值(默认第一页):")
for page in text_cid:
if cid == "":
cid = f_cid
num = 1
break
else:
cid = int(cid)
if page['page'] == cid or page['cid'] == cid:
cid = page['cid']
num = page['page']
break
ret = [cid,num]
return ret
# 获取视频url
def flv(cid,bv,headers,quality):
url = 'https://api.bilibili.com/x/player/playurl'
param = {
'cid':'%s'%cid,
'bvid':'%s'%bv,
'qn':'%s'%quality,
}
text = requests.get(url,params = param,headers = headers).json()
return text
# 请求视频并保存
def get_flv(name,flv_url,headers,page_num):
def MB(byte):
return byte / 1024 / 1024
print("\n等待响应数据(需要的时间较长)...")
# 当把get函数的stream参数设置成True时,它不会立即开始下载
# 当你使用iter_content或iter_lines遍历内容或访问内容属性时才开始下载
response = requests.get(flv_url, stream=True, headers = headers)
code = response.status_code
print('\n响应码:',code)
file_size = int(response.headers['content-length']) # 文件大小 Byte
print("文件大小: {:.2f} MB\n".format(file_size/1024/1024))
if code == 200:
# 删除标题中的 / 等字符,防止文件路径错误
name = name.replace('/','').replace('|','')
down_size = 0 # 已下载字节数
old_down_size = 0 # 上一次已下载字节数
interval = 0.5 # 下载输出间隔
time_ = time.time()
# 针对标题不能保存为路径的情况
localtime = time.localtime(time.time())#获取当前时间
tname = time.strftime('%Y%m%d-%H%M%S',localtime)#把获取的时间转换成"年月日格式”
try:
with open(r"./%s-%d.flv"%(name,page_num),'wb') as fp1:
pass
path1 = r"./%s-%d.flv"%(name,page_num)
path2 = name+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
except:
path1 = r"./%s-%d.flv"%(tname,page_num)
path2 = tname+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
with open(path1,'wb') as fp1:
for chunk in response.iter_content(chunk_size=512):
if chunk:
fp1.write(chunk)
down_size += len(chunk)
if time.time() - time_ > interval:
# rate = down_size / file_size * 100 # 进度 0.01%
speed = (down_size - old_down_size) / interval # 速率 0.01B/s
old_down_size = down_size
time_ = time.time()
print_params = [MB(speed), MB(down_size), MB(file_size), (file_size - down_size) / speed]
# \r 光标回到行首
print('\r{:.1f}MB/s - {:.2f}MB,共 {:.2f}MB,还剩 {:.0f} 秒 '.format(*print_params), end='')
print('\r下载成功' + " "*50)
with open("./list.txt",'a') as fp2:
fp2.write(path2)
print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
else:
print("视频获取失败")
# main
bv = input("输入BV号(网页链接):")
bv = bvre(bv)
# 获取本地txt保存的cookie
cookie = ""
if os.path.exists("./cookie.txt"):
with open('./cookie.txt') as fp:
cookie = fp.read()
cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
headers = {
'Referer':'https://www.bilibili.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'cookie':"%s"%cookie,
}
cid_get = page(bv)
cid = cid_get[0]
page_num = cid_get[1]
# cid = cid(bv,headers)
print('\ncid:',cid)
print('page:',page_num)
print('\n可获取cookie值的链接(需先在浏览器正常登录)\nhttps://api.bilibili.com/x/player/playurl?cid=%s&bvid=%s'%(cid,bv))
name = name(bv,headers)
print('\n标题:',name)
quality = ''
text =flv(cid,bv,headers,quality)
qn = text['data']['support_formats']
print("\n可选择的清晰度(部分清晰度可能获取失败):")
for qu in qn:
print(('清晰度:%s'%qu['new_description']).ljust(15)+('视频质量参数:%d'%qu['quality']).ljust(15)+('格式参数:%s'%qu['format']).ljust(15))
quality = input("输入清晰度对应的视频质量参数(默认1080p):")
if quality == '':
quality = '80'
text = flv(cid,bv,headers,quality)
flv_url = text['data']['durl'][0]['url']
print('\nflv_url:',flv_url)
get_flv(name,flv_url,headers,page_num)
==av号bv号转换代码(转载)==
table='fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF'
tr={}
for i in range(58):
tr[table[i]]=i
s=[11,10,3,8,4,6]
xor=177451812
add=8728348608
def dec(x):
r=0
for i in range(6):
r+=tr[x[s[i]]]*58**i
return (r-add)^xor
def enc(x):
x=(x^xor)+add
r=list('BV1 4 1 7 ')
for i in range(6):
r[s[i]]=table[x//58**i%58]
return ''.join(r)
print(dec('BV17x411w7KC'))
print(dec('BV1Q541167Qg'))
print(enc(170001))
print(enc(455017605))
==互动视频==
api链接需要自己抓包去找
不确定哪个链接是,所以比较麻烦,还是油猴脚本香
用cid搜索比较方便,有个 playurl?cid=******** 的包,从中获取api地址
获取最后一个视频成功了,合并就可以了,happy!!!
完整代码(更新:获取标题,创建文件夹)
from lxml import etree
import requests
import os
import re
'''
例子包名:
playurl?cid=285809643&bvid=BV1jy4y117H2&qn=80&type…fnval=80&session=f4050613906412f01562d3e7208f59fe
对应url:
https://api.bilibili.com/x/player/playurl?cid=285809643&bvid=BV1jy4y117H2&qn=80&type=&otype=json&fourk=1&fnver=0&fnval=80&session=f4050613906412f01562d3e7208f59fe
'''
# 正则提取bv号
def bvre(bv):
bv += '&'
rx = '(BV[\S]*?)\\&'
r_bv = re.findall(rx,bv)[0]
print('\nBV:',r_bv)
return r_bv
# 获取视频名称
def name(bv,headers):
url = 'https://www.bilibili.com/video/'+bv
text = requests.get(url,headers = headers).text
tree = etree.HTML(text)
name = tree.xpath('//*[@id="viewbox_report"]/h1/span[2]/text()')[0]
return name
# main
url = input("api地址:")
bv = bvre(url)
cookie = ''
headers = {
'Referer':'https://www.bilibili.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'cookie':"%s"%cookie,
}
if os.path.exists("./cookie.txt"):
with open('./cookie.txt') as fp:
cookie = fp.read()
cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
name = name(bv,headers)
# 创建文件夹
if not os.path.exists("./%s"%name):
os.mkdir("./%s"%name)
print('\n标题:',name)
print("\n等待响应数据(需要的时间较长)...")
text = requests.get(url,headers = headers).json()
# 视频数据
video_url = text['data']['dash']['video'][0]['baseUrl']
print('\nvideo_url:',video_url)
v_response = requests.get(video_url,headers = headers)
print("\nvideo_url响应码:",v_response.status_code)
if v_response.status_code == 200:
with open("./%s/video.mp4"%name,'wb') as fp1:
fp1.write(v_response.content)
print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
else:
print("视频获取失败")
# 音频数据
audio_url = text['data']['dash']['audio'][0]['baseUrl']
print('\naudio_url:',audio_url)
a_response = requests.get(audio_url,headers = headers)
print("\naudio_url响应码:",a_response.status_code)
if a_response.status_code == 200:
with open("./%s/audio.mp4"%name,'wb') as fp2:
fp2.write(a_response.content)
print("音频获取成功")
else:
print("音频获取失败")
# 合并提示
if v_response.status_code == 200 and a_response.status_code == 200:
print("\n请使用格式工厂合并\n1.audio.mp4 -> audio.mp3\n2.audio.mp3和video.mp4混流")
with open("./list_hudong.txt",'a') as fp2:
fp2.write('%s\n%s\n\n'%(name,url))
==附录==
json格式化网站
www.bejson.com/
av号bv号转换
www.zhihu.com/question/38…