(1) import requests from bs4 import BeautifulSoup
假设的新闻网页URL
url = 'example.com/news'
发送HTTP请求
response = requests.get(url)
确保网页请求成功
if response.status_code == 200: # 使用BeautifulSoup解析网页内容 soup = BeautifulSoup(response.text, 'html.parser')
# 找到所有新闻标题,这里需要根据实际网页结构修改选择器
news_titles = soup.find_all('h2', class_='news-title')
# 找到所有新闻内容,同样需要根据实际网页结构修改选择器
news_contents = soup.find_all('div', class_='news-content')
# 遍历新闻标题和内容,打印或进行其他操作
for title, content in zip(news_titles, news_contents):
print(f"标题: {title.text.strip()}")
print(f"内容: {content.text.strip()}")
print()
else: print("网页请求失败")
(2)
import requests
from bs4 import BeautifulSoup
# 目标网页URL
#url = 'https://www.example.com/news'
# 发送HTTP请求
#response = requests.get(url)
# 确保网页请求成功
#if response.status_code == 200:
# 使用BeautifulSoup解析网页内容
#soup = BeautifulSoup(response.text, 'html.parser')
# 找到包含新闻标题的元素,这里假设新闻标题在<h1>标签中
#news_titles = soup.find_all('h1', class_='news-title')
# 假设我们已经有了数据,这里是模拟数据
#}
# 创建DataFrame
#df = pd.DataFrame(data)
# 将DataFrame存储到Excel文件中的多个工作表(sheet)
#df.to_excel('output.xlsx', sheet_name='Sheet1', index=False)
# 打印新闻标题
#for title in news_titles:
# print(title.text)
#else:
#print('Failed to retrieve the webpage')
data = {
'Column1': [1, 2, 3, 4],
'Column2': ['xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx。', 'B', 'C', 'D'],
'Column3': [5.5, 6.5, 7.5, 8.5]
}
print(data)