python爬虫之采集妹子图片

97 阅读1分钟
import parsel
import  os
for page  in range(1,3):
    print(f'正在下载第{page}页')
    if not os.path.exists(f'img1/{page}'):
        os.mkdir(f'img1/{page}')
    url=f'https://www.jdlingyu.com/tuji/page/{page}'
    headers={
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36"
    }
    response=requests.get(url, headers=headers)
    response.encoding='UTF-8'
    html_data=response.text
    select=parsel.Selector(html_data)

    lis=select.xpath('//div[@id="post-list"]/ul/li')

    for li in lis:
        title=li.xpath('.//h2/a/text()').get()#标题
        pic_url=li.xpath('.//h2/a/@href').get()#url
        if not os.path.exists('img1/'+title):
            os.mkdir(f'img1/'+title)
        response_pic=response=requests.get(pic_url, headers=headers).text
        select_pic=parsel.Selector(response_pic)
        pic_url_list=select_pic.xpath('//*[@id="primary-home"]/article/div[2]/p/img/@src').getall()
        for item_url in pic_url_list:
            pic_data = requests.get(item_url, headers=headers).content  #如果是二进制
            file_name=item_url.split('/')[-1]
            with open(f'img1/{title}/'+file_name,mode='wb') as f:
                f.write(pic_data)
                print('下载完成',file_name)