Python爬虫体验

100 阅读1分钟

Python爬取豆瓣top250电影名称: 使用到的库。requests,bs4

import  requests
from bs4 import BeautifulSoup

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41",
}

for start_num in range(0,250,25):

    context = requests.get(f'https://movie.douban.com/top250?start={start_num}',headers=headers).text
    soup = BeautifulSoup(context,'html.parser')

    all_titles = soup.findAll('span',attrs={"class":"title"})

    for title in all_titles:
        title_str = title.string
        if '/' not in title_str:
            print(title_str)

image.png