1、首先需要安装所需依赖
pip install requests
pip install beautifulsoup4
2、建立一个getWebsiteImage.py文件,代码如下
# 爬取网站图片
import requests
from bs4 import BeautifulSoup
import os
def download_image(url, save_dir):
response = requests.get(url, stream=True)
if response.status_code == 200:
filename = url.split("/")[-1]
filepath = os.path.join(save_dir, filename)
with open(filepath, 'wb') as file:
for chunk in response.iter_content(1024):
file.write(chunk)
def scrape_images(url, save_dir):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
image_tags = soup.find_all('img')
for img_tag in image_tags:
print(img_tag)
img_url = img_tag.get('src')
if img_url.startswith('http'):
download_image(img_url, save_dir)
# 设置爬虫的网站URL 和 保存图片的目录
url = 'http://www.netbian.com/'
save_directory = 'images'
# 创建保存图片的目录 (如果不存在)
os.makedirs(save_directory, exist_ok=True)
# 执行爬虫操作
scrape_images(url, save_directory)
3、在控制台运行 python getWebsiteImage.py就可以获取网站图片啦,代码中url可以修改成自己需要爬的网站地址,以上仅作为示例.