#导入模块
import requests import pymysql
#指定豆瓣的url
url='movie.douban.com/j/chart/top…'
#模拟UA
headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0' }
#发起请求获取
response=requests.get(url=url,headers=headers).json()
#信息
dict_list=[] for item in response: dict_item={ 'rank':item['rank'], 'title':item['title'], 'url':item['url'], } dict_list.append(dict_item) print(f'爬取成功,共爬取了{len(dict_list)}条数据')
try: #连接数据库
db=pymysql.connect(
host='localhost',
user='root',
password='',
port=3306,
charset='utf8mb4'
)
print('连接成功')
#利用游标进行MySQL语句的操作
with db.cursor() as cursor:
CreateDataBase="CREATE DATABASE IF NOT EXISTS my_spider1 DEFAULT CHARACTER SET utf8mb4 "
cursor.execute(CreateDataBase)
print('数据库my_spider1创建成功')
#打开数据库
cursor.execute('USE my_spider1')
#创建数据表
Create_Table="""
CREATE TABLE IF NOT EXISTS douban_listranking(
id INT AUTO_INCREMENT PRIMARY KEY,
`rank` INT NOT NULL,
title VARCHAR(255) NOT NULL,
url VARCHAR(500) NOT NULL
)ENGINE=innoDB DEFAULT CHARSET=utf8mb4;"""
cursor.execute(Create_Table)
print('数据表douban_listranking创建成功')
#插入获取的数据
insert_list="INSERT INTO douban_listranking (`rank` ,title,url) VALUES (%s,%s,%s)"
cursor.executemany(insert_list,[(item['rank'],item['title'],item['url']) for item in response])
db.commit()
print('数据插入成功')
except Exception as e:
#如果失败,则事物回滚
db.rollback()
print('出错',e)
finally:
#关闭数据库连接
if 'db' in locals() and db.open:
db.close()
print('数据库连接已关闭')
注意MySQL的user和password均自定义
以上是一个爬虫简单的学习案例,如果可以帮到您的话,请点个小赞吧!