爬虫简单案例:获取豆瓣排名数据并利用MySQL存储

123 阅读1分钟

#导入模块

import requests import pymysql

#指定豆瓣的url

url='movie.douban.com/j/chart/top…'

#模拟UA

headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0' }

#发起请求获取

response=requests.get(url=url,headers=headers).json()

#信息

dict_list=[] for item in response: dict_item={ 'rank':item['rank'], 'title':item['title'], 'url':item['url'], } dict_list.append(dict_item) print(f'爬取成功,共爬取了{len(dict_list)}条数据')

try: #连接数据库

db=pymysql.connect(
host='localhost',
user='root',
password='',
port=3306,
charset='utf8mb4'
)
print('连接成功')

#利用游标进行MySQL语句的操作

with db.cursor() as cursor:
    CreateDataBase="CREATE DATABASE IF NOT EXISTS my_spider1 DEFAULT CHARACTER SET utf8mb4 "
    cursor.execute(CreateDataBase)
    print('数据库my_spider1创建成功')

#打开数据库

    cursor.execute('USE my_spider1')

#创建数据表

    Create_Table="""
    CREATE TABLE IF NOT EXISTS douban_listranking(
        id INT AUTO_INCREMENT PRIMARY KEY,
        `rank` INT NOT NULL,
        title VARCHAR(255) NOT NULL,
        url VARCHAR(500) NOT NULL
    )ENGINE=innoDB DEFAULT CHARSET=utf8mb4;"""
    cursor.execute(Create_Table)
    print('数据表douban_listranking创建成功')

#插入获取的数据

    insert_list="INSERT INTO douban_listranking (`rank` ,title,url) VALUES (%s,%s,%s)"
    cursor.executemany(insert_list,[(item['rank'],item['title'],item['url']) for item in response])
    db.commit()
    print('数据插入成功')

except Exception as e:

#如果失败,则事物回滚

db.rollback()
print('出错',e)

finally:

#关闭数据库连接

if 'db' in locals() and db.open:
    db.close()
    print('数据库连接已关闭')

注意MySQL的user和password均自定义

以上是一个爬虫简单的学习案例,如果可以帮到您的话,请点个小赞吧!