Python爬虫获取图片并下载保存至本地import urllib. req=urllib.request. req.a

import urllib.request

import os

#to open the url

def url_open(url):

``req=urllib.request.Request(url)

``req.add_header(``'User-Agent'``,``'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'``)

``response=urllib.request.urlopen(url)

``html=response.read()

``return html

#to get the num of page like 1,2,3,4...

def get_page(url):

``html=url_open(url).decode(``'utf-8'``)

``a=html.find(``'current-comment-page'``)+23 #add the 23 offset th arrive at the [2356]

``b=html.find(``']'``,a)

``#``print``(html[a:b])

``return html[a:b]

#find the url of imgs ``and return the url of arr

def find_imgs(url):

``html=url_open(url).decode(``'utf-8'``)

``img_addrs=[]

``a=html.find(``'img src='``)

``while a!=-1:

``b=html.find(``'.jpg'`` ,a,a+255) # ``if false : ``return -1

``if b!=-1:

``img_addrs.append(``'http:'``+html[a+9:b+4])

``else``:

``b=a+9

``a=html.find(``'img src='``,b)

``#``print``(img_addrs)

``return img_addrs

``#``print``(``'http:'``+each)

#save the imgs

def save_imgs(folder,img_addrs):

``for each in img_addrs:

``filename=each.split(``'/'``)[-1] #get the last member of arr,that is the name

``with open(filename,``'wb'`` ) ``as f:

``img = url_open(each)

``f.write(img)

def download_mm(folder=``'mm'``,pages=10):

``os.``mkdir``(folder)

``os.``chdir``(folder)

``url=``'http://jandan.net/ooxx/'

``page_num=int(get_page(url))

``for i in range(pages):

``page_num -= i

`` page_url = url + ``'page-' + str(page_num) + ``'#comments'

``img_addrs=find_imgs(page_url)

``save_imgs(folder,img_addrs)

if __name__ == ``'__main__'``:

``download_mm()