采用requests库搭配正则表达式,获取豆瓣电影top `
import requests
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33'
}
#循环获取,模拟翻页
for i in range(0, 100, 25):
url = f'https://movie.douban.com/top250?start={i}'
# print(url)
r = requests.get(url=url, headers=headers).text
# 正则获取参数
movie_name = re.findall(r'<span class="title">(\w+.+)</span>', r)
director = re.findall(r'导演: (.+) ', r)
quote = re.findall(r'<span class="inq">(.+)</span>', r)
star = re.findall(r'<span class="rating_num" property="v:average">(.+)</span>', r)
for x in range(0, 25):
print('电影名称: ' + movie_name[x])
print('导 演: ' + director[x])
print('评 分: ' + star[x])
print('引 用: ' + quote[x])
print("----------------------")
`