只需要把以下代码中的存储目录换成自己的本地的存储目录即可
import random
import time
from six.moves import urllib
import re
count = 0
def getHtml(url):
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}
req = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
return html
def getImg(html):
global count
reg = 'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
for imgurl in imglist:
# 读取图片数据
print(imgurl)
# 部分 imgurl 是无效的,直接 try-catch 跳过,进行一个照片的下载
try:
response = urllib.request.urlopen(imgurl)
image = response.read() # 不能进行'utf-8'编码,不能调用open_url()函数
with open('/Users/wys/Desktop/intern/pictures/%s.jpg' % count , 'wb') as fp:
fp.write(image)
print("正在下载第%s张图片" % count)
count+=1
except Exception as e:
print(e)
# 一共有 70 页
for i in range(1,71):
html = getHtml("http://www.umei.cc/bizhitupian/meinvbizhi/%s.htm"%i)
print("------------" + str(i) +"------------")
getImg(html)
time.sleep(random.randint(1,2)+random.random())
成果:爬到很多张美图,keke~
每日格言:离群索居者,不是野兽,便是神灵 --亚里士多德
请作者喝吃樱桃 支付宝