``# -- codeing = utf-8 --
@Time : 2021/1/24 16:56
@Author : 老七疯狂吸氧
@file spider.sougou.py
@Software:PyCharm
import urllib import requests import json import os import shutil import re import time
def main(): n=1 m = 1 for w in range(0,1): url = "pic.sogou.com/d?query=%E7…" + str(m) m += 60 list = get_html(url) reurl = saveurl(list) for i in reurl: pt = requests.get(i) print("爬取完成:", n) time.sleep(1) with open("科比" + str(n) + ".jpg", "wb+") as file: file.write(pt.content) file.close() n += 1
def get_html(url): #一次请求 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } #请输入你个人的User-Agent response = requests.get(url, headers=headers) return response.text
def saveurl(baseurl): #获取真实的html findlink=re.compile(r'&url=(.*?)" alt="') cid = re.findall(findlink,baseurl) return cid
if name == 'main': main() ``