import urllib.request
import os
#to open the url
def url_open(url):
``req=urllib.request.Request(url)
``req.add_header(``'User-Agent'``,``'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'``)
``response=urllib.request.urlopen(url)
``html=response.read()
``return html
#to get the num of page like 1,2,3,4...
def get_page(url):
``html=url_open(url).decode(``'utf-8'``)
``a=html.find(``'current-comment-page'``)+23 #add the 23 offset th arrive at the [2356]
``b=html.find(``']'``,a)
``#``print``(html[a:b])
``return html[a:b]
#find the url of imgs ``and return the url of arr
def find_imgs(url):
``html=url_open(url).decode(``'utf-8'``)
``img_addrs=[]
``a=html.find(``'img src='``)
``while a!=-1:
``b=html.find(``'.jpg'`` ,a,a+255) # ``if false : ``return -1
``if b!=-1:
``img_addrs.append(``'http:'``+html[a+9:b+4])
``else``:
``b=a+9
``a=html.find(``'img src='``,b)
``#``print``(img_addrs)
``return img_addrs
``#``print``(``'http:'``+each)
#save the imgs
def save_imgs(folder,img_addrs):
``for each in img_addrs:
``filename=each.split(``'/'``)[-1] #get the last member of arr,that is the name
``with open(filename,``'wb'`` ) ``as f:
``img = url_open(each)
``f.write(img)
def download_mm(folder=``'mm'``,pages=10):
``os.``mkdir``(folder)
``os.``chdir``(folder)
``url=``'http://jandan.net/ooxx/'
``page_num=int(get_page(url))
``for i in range(pages):
``page_num -= i
`` page_url = url + ``'page-' + str(page_num) + ``'#comments'
``img_addrs=find_imgs(page_url)
``save_imgs(folder,img_addrs)
if __name__ == ``'__main__'``:
``download_mm()