本文正在参加「Python主题月」,详情查看juejin.cn/post/697953…
最近,老婆老是问我晚上吃啥饭,关键我也很蒙圈啊,除了平时吃的那些,还有啥? 忽然有一天发现下厨房这个神器,里面好多菜谱,而且各种分类都有,不论你是爱吃肉菜,还是素菜,还是凉菜,只要你想吃,下厨房里基本都有, 虽然下厨房的app很方便,但是奈何在老婆问我吃啥的时候,我一般都是在公司加班,不方便打开下厨房的网站或者app, 那么怎么办呢? 我把他们的菜谱搞下来一部分不就阔以了。 整理成excel表格形式, 老婆问我晚上吃啥的时候,我打开excel,一看就知道了,哈哈哈
import requests
from pyquery import PyQuery as pq
import time
import xlsxwriter as xw
header = {
"Host": "www.xiachufang.com",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://www.xiachufang.com/category/",
"Upgrade-Insecure-Requests": "1"
}
rowi = 2 # 从第二行开始写入数据
#获取做菜材料
def get_meterial(div):
alltr = ""
table = div.find('table')
tr = table.find("tr")
for trs in tr.items():
txt = trs.find("td").text()
alltr = alltr + txt + ';'
return alltr
#获取做菜步骤
def get_steps(div):
alltxt = ""
container = div('.container')
for lis in container.items():
alltxt = alltxt + lis.text()
return alltxt
#写入excel
def xw_toExcel(worksheet1,data, fileName,rowi): # xlsxwriter库储存数据到excel
for j in range(len(data)):
insertData = [data[j]["name"],data[j]["author"],data[j]["desc"],data[j]["steps"],data[j]["ings"],data[j]["url"]]
row = 'A' + str(rowi)
worksheet1.write_row(row, insertData)
#获取子页面
def get_page(worksheet1,fileName,name, url):
global rowi
res = requests.get(url, headers=header)
html = pq(res.text)
panel = html('.main-panel')
author = panel('.author').text()
desc = panel('.desc').text()
ings = get_meterial(panel('.ings'))
steps = get_steps(panel('.steps'))
testData = [
{"name": name, "author": author,"desc":desc,"steps":steps,"ings":ings,"url":url},
]
rowi =rowi+1
xw_toExcel(worksheet1,testData, fileName,rowi)
print(rowi,testData)
def get_menu():
count=30 #页数
fileName = 'caipu.xlsx'
workbook = xw.Workbook(fileName) # 创建工作簿
worksheet1 = workbook.add_worksheet("sheet1") # 创建子表
worksheet1.activate() # 激活表
title = ['标题','作者','描述','步骤','用料',"url"] # 设置表头
worksheet1.write_row('A1', title) # 从A1单元格开始写入表头
namelist=["40076","52354","51848","52351","20137"]
for nam in namelist:
for i in range(count):
url = "https://www.xiachufang.com/category/{}/?page={}".format(nam,str(i+1))
res = requests.get(url, headers=header)
html = pq(res.text)
normal_list = html('.normal-recipe-list').find("ul").find("li")
for lists in normal_list.items():
info = lists('.info')
a_info = info('.name').find('a')
name = a_info.text()
href = a_info.attr("href")
get_page(worksheet1,fileName,name, "https://www.xiachufang.com" + href)
time.sleep(1)
workbook.close() # 关闭表
if __name__ == "__main__":
get_menu()