用python获取下厨房的菜谱,并保存为excel |Python 主题月

552 阅读2分钟

本文正在参加「Python主题月」,详情查看juejin.cn/post/697953…

最近,老婆老是问我晚上吃啥饭,关键我也很蒙圈啊,除了平时吃的那些,还有啥? 忽然有一天发现下厨房这个神器,里面好多菜谱,而且各种分类都有,不论你是爱吃肉菜,还是素菜,还是凉菜,只要你想吃,下厨房里基本都有, 虽然下厨房的app很方便,但是奈何在老婆问我吃啥的时候,我一般都是在公司加班,不方便打开下厨房的网站或者app, 那么怎么办呢? 我把他们的菜谱搞下来一部分不就阔以了。 整理成excel表格形式, 老婆问我晚上吃啥的时候,我打开excel,一看就知道了,哈哈哈

import requests
from pyquery import PyQuery as pq
import time
import xlsxwriter as xw

header = {
    "Host": "www.xiachufang.com",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.xiachufang.com/category/",
    "Upgrade-Insecure-Requests": "1"
}


rowi = 2  # 从第二行开始写入数据

#获取做菜材料
def get_meterial(div):
    alltr = ""
    table = div.find('table')
    tr = table.find("tr")
    for trs in tr.items():
        txt = trs.find("td").text()
        alltr = alltr + txt + ';'
    return alltr

#获取做菜步骤
def get_steps(div):
    alltxt = ""
    container = div('.container')
    for lis in container.items():
        alltxt = alltxt + lis.text()
    return alltxt

#写入excel
def xw_toExcel(worksheet1,data, fileName,rowi):  # xlsxwriter库储存数据到excel
    for j in range(len(data)):
        insertData = [data[j]["name"],data[j]["author"],data[j]["desc"],data[j]["steps"],data[j]["ings"],data[j]["url"]]
        row = 'A' + str(rowi)
        worksheet1.write_row(row, insertData)

#获取子页面
def get_page(worksheet1,fileName,name, url):
    global rowi
    res = requests.get(url, headers=header)
    html = pq(res.text)
    panel = html('.main-panel')
    author = panel('.author').text()
    desc = panel('.desc').text()
    ings = get_meterial(panel('.ings'))
    steps = get_steps(panel('.steps'))
    testData = [
        {"name": name, "author": author,"desc":desc,"steps":steps,"ings":ings,"url":url},
    ]
    rowi =rowi+1
    xw_toExcel(worksheet1,testData, fileName,rowi)
    print(rowi,testData)


def get_menu():
    count=30 #页数
    fileName = 'caipu.xlsx'
    workbook = xw.Workbook(fileName)  # 创建工作簿
    worksheet1 = workbook.add_worksheet("sheet1")  # 创建子表
    worksheet1.activate()  # 激活表
    title = ['标题','作者','描述','步骤','用料',"url"]  # 设置表头
    worksheet1.write_row('A1', title)  # 从A1单元格开始写入表头
    namelist=["40076","52354","51848","52351","20137"]
    for nam in namelist:
        for i in range(count):
            url = "https://www.xiachufang.com/category/{}/?page={}".format(nam,str(i+1))
            res = requests.get(url, headers=header)
            html = pq(res.text)
            normal_list = html('.normal-recipe-list').find("ul").find("li")
            for lists in normal_list.items():
                info = lists('.info')
                a_info = info('.name').find('a')
                name = a_info.text()
                href = a_info.attr("href")
                get_page(worksheet1,fileName,name, "https://www.xiachufang.com" + href)
                time.sleep(1)
    workbook.close()  # 关闭表



if __name__ == "__main__":
    get_menu()