'''
批量抓取企信宝页面(前200页吧)
'''
import urllib.request,socket,re,sys,os
import ssl
import fileinput
import time
import random
ssl._create_default_https_context = ssl._create_unverified_context
targetPath = "//Users//wangleilei//Documents//03__douban_Images"
def saveFile(data):
path = "//Users//wangleilei//Documents//007_企信宝.html"
f = open(path, 'ab')
f.write(data)
f.close()
def getData(index1):
temp=index1
url = "http://www.qixin.com/search?key=%E6%97%85%E6%B8%B8&page=" + temp + "&status[]=1"
print(url)
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:57.0) Gecko/20100101 Firefox/57.0',
'Cookie': 'channel=baidu; _zg=%7B%22uuid%22%3A%20%221604427941638e-0e3eb0a6fa80588-49566e-13c680-160442794174ec%22%2C%22sid%22%3A%201512971932.697%2C%22updated%22%3A%201512972319.613%2C%22info%22%3A%201512971932703%2C%22cuid%22%3A%20%228449f8dd-5c6a-4768-b489-f34053c20d77%22%7D; showsale=1; cookieShowLoginTip=1; responseTimeline=85; Hm_lvt_52d64b8d3f6d42a2e416d59635df3f71=1512971936; Hm_lpvt_52d64b8d3f6d42a2e416d59635df3f71=1512972313; sid=s%3ATF94-C8QhbiJVOwC2ZRAKYUXPJBXVJFn.dpf832kO3Fdn66716KAquegeH6LtIHYMCab5u9bINwU'}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
data = response.read()
saveFile(data)
print(data)
i=195
while i < 300:
print (i)
string = str(i)
getData(string)
i = i + 1
a=random.randrange(0, 2)
time.sleep(a)
print("随机数")
print(a)
我的Python3爬虫系列