import parsel
import os
for page in range(1,3):
print(f'正在下载第{page}页')
if not os.path.exists(f'img1/{page}'):
os.mkdir(f'img1/{page}')
url=f'https://www.jdlingyu.com/tuji/page/{page}'
headers={
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36"
}
response=requests.get(url, headers=headers)
response.encoding='UTF-8'
html_data=response.text
select=parsel.Selector(html_data)
lis=select.xpath('//div[@id="post-list"]/ul/li')
for li in lis:
title=li.xpath('.//h2/a/text()').get()#标题
pic_url=li.xpath('.//h2/a/@href').get()#url
if not os.path.exists('img1/'+title):
os.mkdir(f'img1/'+title)
response_pic=response=requests.get(pic_url, headers=headers).text
select_pic=parsel.Selector(response_pic)
pic_url_list=select_pic.xpath('//*[@id="primary-home"]/article/div[2]/p/img/@src').getall()
for item_url in pic_url_list:
pic_data = requests.get(item_url, headers=headers).content #如果是二进制
file_name=item_url.split('/')[-1]
with open(f'img1/{title}/'+file_name,mode='wb') as f:
f.write(pic_data)
print('下载完成',file_name)