import re
import requests
import csv
agent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
}
url = "https://movie.douban.com/top250"
res = requests.get(url,headers=agent)
page_content = res.text
obj = re.compile(r'<li>.*?<div class="item">.*?<div class="pic">.*?<em class="">.*?</em>.*?<a href="(?P<url_link>.*?)">.*?'
r'<img width="100" alt="(?P<movie_name>.*?)" src=".*?" class="">.*?</a>.*?</div>.*?<div class="info">.*?'
r'<div class="hd">.*?<a href=".*?" class="">.*?<span class="title">.*?</span>.*?<span class=".*?"> / .*?'
r'</span>.*?<span class=".*?"> / .*?</span>.*?</a>.*?<span class=".*?">.*?</span>.*?</div>.*?<div class="bd">'
r'.*?<p class="">.*?<br>(?P<year>.*?) / .*? / .*?</p>.*?<div class="star">.*?<span class=".*?'
r'"></span>.*?<span class="rating_num" property="v:average">(?P<averge>.*?)</span>.*?<span property="v:best" content="10.0"></span>'
r'.*?<span>.*?</span>.*?</div>.*?<p class="quote">.*?<span class="inq">.*?</span>.*?</p>.*?</div>.*?</div>.*?</div>',re.S)
result = obj.finditer(page_content)
movie_csv = open("douban.csv",mode='w')
writer_info = csv.writer(movie_csv)
for i in result:
a = 0
movie_list = []
url_list = []
year_list = []
averge_list = []
year_list.append(int(i.group('year'.strip())))
movie_list.append(i.group('movie_name'))
url_list.append(i.group('url_link'))
averge_list.append(i.group('averge'))
b = '电影名字:{}'.format(movie_list[a])
c = '播放链接:{}'.format(url_list[a])
d = '上映年份:{}'.format(year_list)
e = '电影评分:{}'.format(averge_list)
f = (b,c,d,e)
writer_info.writerow(f)
print(b,'\t',c,'\t',d,'\t',e)
a += 1
movie_csv.close()
res.close()