import requests
from concurrent.futures import ThreadPoolExecutor #多线程支持模块
def downloadurl(url): #定义一个函数去爬取一页完整的数据
head = {
'limit': '20',
'current': '{}'.format(num),
'pubDateStartTime': '',
'pubDateEndTime': '',
'prodPcatid': '',
'prodCatid': '',
'prodName': '',
}
rsp = requests.post(url,data=head)
content = dict(rsp.json())
content_list = content['list']
for s in content_list:
info = s['prodName'], s['avgPrice'], s['place'], s['pubDate'].strip(' 00:00:00')
info_s = (iten.replace('\','_').replace('/','_') for iten in info) #处理数据中带有'\'和'/'的字符
info_l = list(info_s) #把生成器改为list
#保存到文件
with open('today_price.csv', mode='a', encoding='utf-8') as f:
f.write(str(info_l))
f.write('\n')
print(url,'提取完毕')
if __name__ == '__main__':
with ThreadPoolExecutor(2) as t: #创建一个2个线程的线程池
for num in range(1,10):
t.submit(downloadurl,'http://www.xinfadi.com.cn/getPriceData.html') #让线程开始执行任务