python爬虫爬取豆瓣电影top250

330 阅读1分钟

采用requests库搭配正则表达式,获取豆瓣电影top `

import requests
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33'
}

#循环获取,模拟翻页
for i in range(0, 100, 25):
    url = f'https://movie.douban.com/top250?start={i}'
    # print(url)
    r = requests.get(url=url, headers=headers).text
    # 正则获取参数
    movie_name = re.findall(r'<span class="title">(\w+.+)</span>', r)
    director = re.findall(r'导演: (.+)&nbsp;', r)
    quote = re.findall(r'<span class="inq">(.+)</span>', r)
    star = re.findall(r'<span class="rating_num" property="v:average">(.+)</span>', r)
 
    for x in range(0, 25):
        print('电影名称: ' + movie_name[x])
        print('导    演: ' + director[x])
        print('评    分: ' + star[x])
        print('引    用: ' + quote[x])
        print("----------------------")
        

`