自动化下载某短视频平台的视频

221 阅读1分钟

导入需要的模块


import os.path
from DrissionPage import ChromiumPage
from DrissionPage.easy_set import set_paths

os.system("pip install loguru lxml pandas")
import time
import pandas as pd
from loguru import logger
import requests
from lxml import etree

下载视频

def download_video(url_list, save_name):
    if url_list:
        req = requests.get(url_list[0].replace("//", "http://"))
        logger.info({"Downloading video": save_name})
        with open(save_name, "wb") as f:
            f.write(req.content)

获取下载链接进行下载

def get_download_link(url):
    # 如果url是//开头,则需要替换成http://
    if url.startswith("//"):
        url = "http:" + url
    if "https://www.xxx.com" not in url:
        url = "https://www.xxx.com" + url
    logger.info({"get url": url})
    file_name = url.split("/")[-1][:15]
    save_name = os.path.join(save_download_video_path, "{}.mp4".format(file_name))
    if os.path.exists(save_name):
        logger.info({"already save": save_name})
    else:
        page.get(url)
        time.sleep(1)
        logger.info({"run": url})
        logger.info({"wait for loading": "10s"})
        time.sleep(10)
        root = etree.HTML(page.html)
        res = root.xpath("//video//source/@src")
        logger.info({"start downloading url": res})
        download_video(res, save_name)

创建浏览器操作实例,打开网页,等待3秒进行下拉,下拉n次

# 创建浏览器驱动实例,打开网页,等待3秒进行下拉,下拉10次
def create_selenium_driver(url):
    page.get(url)
    logger.info({"wait for loading": "3s", "msg": "等待加载网页信息"})
    time.sleep(3)
    for i in range(scroll_times):
        page.scroll.down(200)
        time.sleep(1)
    page_source = page.html

    root = etree.HTML(page_source)
    res = root.xpath("//a/@href")
    for i in res:
        if "video" in i:
            logger.info({"get a video url": i})
            get_download_link(i)

其他基本参数

# edge浏览器驱动路径
driver_path = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"
# 设置驱动
set_paths(browser_path=driver_path)
# 创建浏览器驱动实例
page = ChromiumPage()
page.get("https://www.xxx.com/")
# 下载的视频数据保存路径
save_download_video_path = "./video"
if not os.path.exists(save_download_video_path):
    os.makedirs(save_download_video_path)
# 下拉多少次
try:
    scroll_times = int(input("往下拉多少次,每次会自动拉200像素 输入数字即可 例如:10>>>"))
except Exception as e:
    logger.error({"输入有误":str(e)})
    scroll_times = 10
else:
    while True:
        res = input("please input a url")
        if res == "q":
            break
        if "video" in res:
            logger.info({"get a video url": i})
            get_download_link(i)
        else:
            create_selenium_driver(res)
# 关闭实例
page.quit()