Python爬取b站视频(api真实地址)

1,896 阅读6分钟

Python爬虫b站视频(api真实地址)

声明:==代码只用于学习交流,侵删==

api地址

 https://api.bilibili.com/x/player/playurl

参数

cid:根据bv号得到
bvid:bv号
qn:清晰度参数

cookie

请将自己的cookie保存在当前目录下的cookie.txt中
代码运行时会有获取cookie的建议地址

完整代码(可选清晰度)

from lxml import etree
import requests
import os
# 获取视频名称
def name(bv,headers):
    url = 'https://www.bilibili.com/video/'+bv
    text = requests.get(url,headers = headers).text
    tree = etree.HTML(text)
    name = tree.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0]
    return name
# 获取cid
def cid(bv,headers):
    url = 'https://api.bilibili.com/x/player/pagelist'
    param = {
        'bvid':'%s'%bv,
        'jsonp':'jsonp'
    }
    text = requests.get(url,params = param,headers = headers).json()
    cid = text['data'][0]['cid']
    return cid
# 获取视频url
def flv(cid,bv,headers,quality):
    url = 'https://api.bilibili.com/x/player/playurl'
    param = {
        'cid':'%s'%cid,
        'bvid':'%s'%bv,
        'qn':'%s'%quality,
    }
    text = requests.get(url,params = param,headers = headers).json()
    return text
# 请求视频并保存
def get_flv(name,flv_url,headers):
    print("\n等待响应数据(需要的时间较长)...")
    response = requests.get(flv_url,headers = headers)
    code = response.status_code
    print('\n响应码:',code)
    text = response.content
    if code == 200:
        with open("./%s.flv"%name,'wb') as fp1:
            fp1.write(text)
        with open("./list.txt",'a') as fp2:
            fp2.write(name+"\n"+bv+"\n\n")
        print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
    else:
        print("视频获取失败")
# main
bv = input("输入BV号:")
# 获取本地txt保存的cookie
cookie = ""
if os.path.exists("./cookie.txt"):
    with open('./cookie.txt') as fp:
        cookie = fp.read()
    cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
headers = {
    'Referer':'https://www.bilibili.com/',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    'cookie':"%s"%cookie,
}
cid = cid(bv,headers)
print('\n可获取cookie值的链接(需先在浏览器正常登录)\nhttps://api.bilibili.com/x/player/playurl?cid=%s&bvid=%s'%(cid,bv))
name = name(bv,headers)
print('\n标题:',name)
print('\ncid:',cid)
quality = ''
text =flv(cid,bv,headers,quality)
qn = text['data']['support_formats']
print("\n可选择的清晰度(部分清晰度可能获取失败):")
for qu in qn:
    print(('清晰度:%s'%qu['new_description']).ljust(15)+('视频质量参数:%d'%qu['quality']).ljust(15)+('格式参数:%s'%qu['format']).ljust(15))
quality = input("输入清晰度对应的视频质量参数(默认1080p):")
if quality == '':
    quality = '80'
text = flv(cid,bv,headers,quality)
flv_url = text['data']['durl'][0]['url']
print('\nflv_url:',flv_url)
get_flv(name,flv_url,headers)

==更新== 抓包得到新的api地址

https://api.bilibili.com/x/web-interface/view/detail?bvid=BV1CX4y1P7M8&aid=713833694&need_operation_card=1&web_rm_repeat=&need_elec=1&out_referer=https%3A%2F%2Fspace.bilibili.com%2F

可获取分页视频的cid值,以及视频封面链接pic
更新完整代码(清晰度,分页,视频封面)

from lxml import etree
import requests
import os
import re
from sys import stdout
import time

# 正则提取bv号
def bvre(bv):
    bv += '?'
    rx = '(BV[\S]*?)\\?'
    r_bv = re.findall(rx,bv)[0]
    r_bv = r_bv.replace('/','')
    print('\nBV:',r_bv)
    return r_bv
# 获取视频名称
def name(bv,headers):
    url = 'https://www.bilibili.com/video/'+bv
    text = requests.get(url,headers = headers).text
    tree = etree.HTML(text)
    name = tree.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0]
    return name
'''
# =========备用api=========
# 获取(默认第一页的)cid
def cid(bv,headers):
    url = 'https://api.bilibili.com/x/player/pagelist'
    param = {
        'bvid':'%s'%bv,
        'jsonp':'jsonp'
    }
    text = requests.get(url,params = param,headers = headers).json()
    cid = text['data'][0]['cid']
    return cid
'''
# 获取分页信息以及对应的cid
# https://api.bilibili.com/x/web-interface/view/detail?bvid=BV1CX4y1P7M8&aid=713833694&need_operation_card=1&web_rm_repeat=&need_elec=1&out_referer=https%3A%2F%2Fspace.bilibili.com%2F
def page(bv):
    url = 'https://api.bilibili.com/x/web-interface/view/detail'
    param = {
        'bvid':'%s'%bv,
    }
    text = requests.get(url,params = param,headers = headers).json()
    text_pic = text['data']['View']['pic']
    print("\n视频封面链接pic:",text_pic)
    av=text['data']['View']['aid']  # AV号
    print("AV号:","av" + str(av))
    text_cid = text['data']['View']['pages']
    print("\n分页信息以及对应的cid:")
    for page in text_cid:
        if page['page'] == 1:
            f_cid = page['cid']
        print('第 %d 页 cid:%s'%(page['page'],page['cid']))
    cid = input("输入分页或cid值(默认第一页):")
    for page in text_cid:
        if cid == "":
            cid = f_cid
            num = 1
            break
        else:
            cid = int(cid)
        if page['page'] == cid or page['cid'] == cid:
            cid = page['cid']
            num = page['page']
            break  
    ret = [cid,num]            
    return ret
# 获取视频url
def flv(cid,bv,headers,quality):
    url = 'https://api.bilibili.com/x/player/playurl'
    param = {
        'cid':'%s'%cid,
        'bvid':'%s'%bv,
        'qn':'%s'%quality,
    }
    text = requests.get(url,params = param,headers = headers).json()
    return text
# 请求视频并保存
def get_flv(name,flv_url,headers,page_num):
    def MB(byte):
        return byte / 1024 / 1024
    print("\n等待响应数据(需要的时间较长)...")
    # 当把get函数的stream参数设置成True时,它不会立即开始下载
    # 当你使用iter_content或iter_lines遍历内容或访问内容属性时才开始下载
    response = requests.get(flv_url, stream=True, headers = headers)
    code = response.status_code
    print('\n响应码:',code)
    file_size = int(response.headers['content-length'])  # 文件大小 Byte
    print("文件大小: {:.2f} MB\n".format(file_size/1024/1024))
    if code == 200:
        # 删除标题中的 / 等字符,防止文件路径错误
        name = name.replace('/','').replace('|','')
        down_size = 0  # 已下载字节数
        old_down_size = 0  # 上一次已下载字节数
        interval = 0.5 # 下载输出间隔
        time_ = time.time()
        # 针对标题不能保存为路径的情况
        localtime = time.localtime(time.time())#获取当前时间
        tname = time.strftime('%Y%m%d-%H%M%S',localtime)#把获取的时间转换成"年月日格式”
        try: 
            with open(r"./%s-%d.flv"%(name,page_num),'wb') as fp1:
                pass
            path1 = r"./%s-%d.flv"%(name,page_num)
            path2 = name+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
        except:
            path1 = r"./%s-%d.flv"%(tname,page_num)
            path2 = tname+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
        with open(path1,'wb') as fp1:
            for chunk in response.iter_content(chunk_size=512):
                if chunk:
                    fp1.write(chunk)
                    down_size += len(chunk)
                    if time.time() - time_ > interval:
                        # rate = down_size / file_size * 100  # 进度  0.01%
                        speed = (down_size - old_down_size) / interval  # 速率 0.01B/s
                        
                        old_down_size = down_size
                        time_ = time.time()
                        
                        print_params = [MB(speed), MB(down_size), MB(file_size), (file_size - down_size) / speed]
                        # \r 光标回到行首
                        print('\r{:.1f}MB/s - {:.2f}MB,共 {:.2f}MB,还剩 {:.0f} 秒   '.format(*print_params), end='')
                        
            print('\r下载成功' + " "*50)
        with open("./list.txt",'a') as fp2:
            fp2.write(path2)
        print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
    else:
        print("视频获取失败")
# main
bv = input("输入BV号(网页链接):")
bv = bvre(bv)
# 获取本地txt保存的cookie
cookie = ""
if os.path.exists("./cookie.txt"):
    with open('./cookie.txt') as fp:
        cookie = fp.read()
    cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
headers = {
    'Referer':'https://www.bilibili.com/',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    'cookie':"%s"%cookie,
}
cid_get = page(bv)
cid = cid_get[0]
page_num = cid_get[1]
# cid = cid(bv,headers)
print('\ncid:',cid)
print('page:',page_num) 
print('\n可获取cookie值的链接(需先在浏览器正常登录)\nhttps://api.bilibili.com/x/player/playurl?cid=%s&bvid=%s'%(cid,bv))
name = name(bv,headers)
print('\n标题:',name)
quality = ''
text =flv(cid,bv,headers,quality)
qn = text['data']['support_formats']
print("\n可选择的清晰度(部分清晰度可能获取失败):")
for qu in qn:
    print(('清晰度:%s'%qu['new_description']).ljust(15)+('视频质量参数:%d'%qu['quality']).ljust(15)+('格式参数:%s'%qu['format']).ljust(15))
quality = input("输入清晰度对应的视频质量参数(默认1080p):")
if quality == '':
    quality = '80'
text = flv(cid,bv,headers,quality)
flv_url = text['data']['durl'][0]['url']
print('\nflv_url:',flv_url)
get_flv(name,flv_url,headers,page_num)

==av号bv号转换代码(转载)==

table='fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF'
tr={}
for i in range(58):
	tr[table[i]]=i
s=[11,10,3,8,4,6]
xor=177451812
add=8728348608

def dec(x):
	r=0
	for i in range(6):
		r+=tr[x[s[i]]]*58**i
	return (r-add)^xor

def enc(x):
	x=(x^xor)+add
	r=list('BV1  4 1 7  ')
	for i in range(6):
		r[s[i]]=table[x//58**i%58]
	return ''.join(r)

print(dec('BV17x411w7KC'))
print(dec('BV1Q541167Qg'))
print(enc(170001))
print(enc(455017605))

==互动视频== api链接需要自己抓包去找
不确定哪个链接是,所以比较麻烦,还是油猴脚本香
用cid搜索比较方便,有个 playurl?cid=******** 的包,从中获取api地址
在这里插入图片描述

获取最后一个视频成功了,合并就可以了,happy!!! 在这里插入图片描述

完整代码(更新:获取标题,创建文件夹)

from lxml import etree
import requests
import os
import re
'''
例子包名:
playurl?cid=285809643&bvid=BV1jy4y117H2&qn=80&type…fnval=80&session=f4050613906412f01562d3e7208f59fe
对应url:
https://api.bilibili.com/x/player/playurl?cid=285809643&bvid=BV1jy4y117H2&qn=80&type=&otype=json&fourk=1&fnver=0&fnval=80&session=f4050613906412f01562d3e7208f59fe
'''
# 正则提取bv号
def bvre(bv):
    bv += '&'
    rx = '(BV[\S]*?)\\&'
    r_bv = re.findall(rx,bv)[0]
    print('\nBV:',r_bv)
    return r_bv
# 获取视频名称
def name(bv,headers):
    url = 'https://www.bilibili.com/video/'+bv
    text = requests.get(url,headers = headers).text
    tree = etree.HTML(text)
    name = tree.xpath('//*[@id="viewbox_report"]/h1/span[2]/text()')[0]
    return name
# main
url = input("api地址:")
bv = bvre(url)
cookie = ''
headers = {
    'Referer':'https://www.bilibili.com/',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    'cookie':"%s"%cookie,
}
if os.path.exists("./cookie.txt"):
    with open('./cookie.txt') as fp:
        cookie = fp.read()
    cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
name = name(bv,headers)
# 创建文件夹
if not os.path.exists("./%s"%name):
    os.mkdir("./%s"%name)
print('\n标题:',name)
print("\n等待响应数据(需要的时间较长)...")
text = requests.get(url,headers = headers).json()
# 视频数据
video_url = text['data']['dash']['video'][0]['baseUrl']
print('\nvideo_url:',video_url)
v_response = requests.get(video_url,headers = headers)
print("\nvideo_url响应码:",v_response.status_code)
if v_response.status_code == 200:
    with open("./%s/video.mp4"%name,'wb') as fp1:
        fp1.write(v_response.content)
    print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
else:
    print("视频获取失败")
# 音频数据
audio_url = text['data']['dash']['audio'][0]['baseUrl']
print('\naudio_url:',audio_url)
a_response = requests.get(audio_url,headers = headers)
print("\naudio_url响应码:",a_response.status_code)
if a_response.status_code == 200:
    with open("./%s/audio.mp4"%name,'wb') as fp2:
        fp2.write(a_response.content)
    print("音频获取成功")
else:
    print("音频获取失败")
# 合并提示
if v_response.status_code == 200 and a_response.status_code == 200:
    print("\n请使用格式工厂合并\n1.audio.mp4 -> audio.mp3\n2.audio.mp3和video.mp4混流")
    with open("./list_hudong.txt",'a') as fp2:
            fp2.write('%s\n%s\n\n'%(name,url))

==附录== json格式化网站
www.bejson.com/
av号bv号转换
www.zhihu.com/question/38…