Python爬虫并导出excel001pip install openpyxl pip install pandas F

pip install openpyxl

pip install pandas

File | Settings | Project: pythonpcyf01 | Python Interpreter +pandas、openpyxl

import requests
from bs4 import BeautifulSoup
import pandas as pd


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 目标网页URL
url = 'https://www.xxx.com/xxxxx'

# 发送HTTP请求
response = requests.get(url,headers=headers)

# 确保网页请求成功
if response.status_code == 200:
    # 使用BeautifulSoup解析网页内容
    soup = BeautifulSoup(response.text, 'html.parser')

    # 找到包含新闻标题的元素，这里假设新闻标题在<h1>标签中
    news_titles = soup.find_all('h1', class_='post_title')

    df = pd.DataFrame(news_titles)

    # 将DataFrame存储到Excel文件
    df.to_excel('output.xlsx', index=False)

    # 打印新闻标题
    for title in news_titles:
        print(title.text)
else:
    print('Failed to retrieve the webpage')