python爬虫 百度批量爬取图片'关键词'

267 阅读1分钟
import urllib.request
from urllib.parse import quote
import re
import os

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36",
    "referer": "https://image.baidu.com"
}
print("****************************************************************************************")
keyword = input("请输入要下载的图片:")
# 文件的保存路径
last_dir = "C://Users//Administrator//PycharmProjects//pythonProject//"
dir = "C://Users//Administrator//PycharmProjects//pythonProject//" + keyword
if os.path.exists(last_dir):
    if os.path.exists(dir):
        print("文件夹已经存在")
    else:
        os.mkdir(dir)
        print(dir + "已经创建成功")
else:
    os.mkdir(last_dir)
    if os.path.exists(dir):
        print("文件夹已经存在")
    else:
        os.mkdir(dir)
        print(dir + "已经创建成功")
keyword1 = quote(keyword, encoding="utf-8")
url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + keyword1 + '&ct=201326592&v=flip'
req = urllib.request.Request(url, headers=headers)
f = urllib.request.urlopen(req).read().decode("utf-8")
key = r'thumbURL":"(.+?)"'
key1 = re.compile(key)
num = 0
for string in re.findall(key1, f):
    print("正在下载" + string)
    f_req = urllib.request.Request(string, headers=headers)
    f_url = urllib.request.urlopen(f_req).read()
    fs = open(dir + "/" + keyword + str(num) + ".jpg", "wb+")
    fs.write(f_url)
    fs.close()
    num += 1
    print(string + "已下载成功")
input("按任意键结束程序:")

微信图片_20220515224426.png

4afbb4e23f1ce61ad8de1beca413cb7.png

37e380a9ac08a7e119a7d82a9610c7e.png 更改文件路径即可使用