python-爬虫001

87 阅读1分钟
import requests
from bs4 import BeautifulSoup


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 目标网页
url = 'https://www.xxx.cn/xxxxx'

# 发送HTTP请求
response = requests.get(url,headers=headers)

# 确保网页请求成功
if response.status_code == 200:
    # 解析网页内容
    soup = BeautifulSoup(response.text, 'html.parser')

    # 找到所有的<a>标签,即链接
    for link in soup.find_all('a'):
        # 获取链接的href属性
        href = link.get('href')

        # 打印链接
        if href is not None:
            print(href)
            print(soup)
else:
    print(f'Failed to retrieve the webpage: Status code - {response.status_code}')