摘要:SQLite是Python内置的数据库,零配置、零依赖,适合中小型项目和数据分析。本文从基础CRUD到性能优化、并发处理、与Pandas集成,全面讲解SQLite在Python中的最佳实践。
为什么选SQLite?
- 零配置:不需要安装数据库服务器
- Python内置:
import sqlite3直接用 - 单文件存储:一个
.db文件就是整个数据库,方便备份和迁移 - 性能够用:百万级数据毫无压力
基础操作
import sqlite3
from contextlib import contextmanager
# 推荐:用上下文管理器自动处理连接
@contextmanager
def get_db(db_path='app.db'):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row # 返回字典风格的行
conn.execute('PRAGMA journal_mode=WAL') # WAL模式,提升并发性能
conn.execute('PRAGMA foreign_keys=ON') # 启用外键约束
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
建表
with get_db() as db:
db.execute('''
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
email TEXT UNIQUE NOT NULL,
age INTEGER CHECK(age >= 0 AND age <= 150),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
db.execute('''
CREATE TABLE IF NOT EXISTS posts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
title TEXT NOT NULL,
content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
)
''')
# 创建索引
db.execute('CREATE INDEX IF NOT EXISTS idx_posts_user ON posts(user_id)')
db.execute('CREATE INDEX IF NOT EXISTS idx_users_email ON users(email)')
CRUD操作
with get_db() as db:
# 插入
db.execute(
'INSERT INTO users (name, email, age) VALUES (?, ?, ?)',
('张三', 'zhang@test.com', 25)
)
# 批量插入(比循环insert快10倍+)
users = [
('李四', 'li@test.com', 30),
('王五', 'wang@test.com', 28),
('赵六', 'zhao@test.com', 35),
]
db.executemany(
'INSERT INTO users (name, email, age) VALUES (?, ?, ?)',
users
)
# 查询
cursor = db.execute('SELECT * FROM users WHERE age > ?', (25,))
for row in cursor:
print(dict(row)) # {'id': 2, 'name': '李四', ...}
# 更新
db.execute('UPDATE users SET age = ? WHERE name = ?', (26, '张三'))
# 删除
db.execute('DELETE FROM users WHERE id = ?', (1,))
参数化查询(防SQL注入)
# ❌ 危险:字符串拼接
name = "'; DROP TABLE users; --"
db.execute(f"SELECT * FROM users WHERE name = '{name}'")
# ✅ 安全:参数化查询
db.execute("SELECT * FROM users WHERE name = ?", (name,))
# ✅ 命名参数
db.execute(
"SELECT * FROM users WHERE name = :name AND age > :age",
{'name': '张三', 'age': 20}
)
高级查询
分页
def get_users_page(db, page=1, per_page=20):
offset = (page - 1) * per_page
# 总数
total = db.execute('SELECT COUNT(*) FROM users').fetchone()[0]
# 当前页数据
rows = db.execute(
'SELECT * FROM users ORDER BY id DESC LIMIT ? OFFSET ?',
(per_page, offset)
).fetchall()
return {
'data': [dict(r) for r in rows],
'total': total,
'page': page,
'pages': (total + per_page - 1) // per_page,
}
聚合和分组
with get_db() as db:
# 每个用户的文章数
rows = db.execute('''
SELECT u.name, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id
ORDER BY post_count DESC
''').fetchall()
# 年龄分布统计
rows = db.execute('''
SELECT
CASE
WHEN age < 20 THEN '20以下'
WHEN age < 30 THEN '20-29'
WHEN age < 40 THEN '30-39'
ELSE '40以上'
END as age_group,
COUNT(*) as count
FROM users
GROUP BY age_group
''').fetchall()
全文搜索(FTS5)
with get_db() as db:
# 创建全文搜索虚拟表
db.execute('''
CREATE VIRTUAL TABLE IF NOT EXISTS posts_fts
USING fts5(title, content, content=posts, content_rowid=id)
''')
# 同步数据
db.execute('''
INSERT INTO posts_fts(rowid, title, content)
SELECT id, title, content FROM posts
''')
# 全文搜索
results = db.execute('''
SELECT rowid, title, snippet(posts_fts, 1, '<b>', '</b>', '...', 20) as excerpt
FROM posts_fts
WHERE posts_fts MATCH ?
ORDER BY rank
LIMIT 10
''', ('Python 爬虫',)).fetchall()
与Pandas集成
import pandas as pd
# 读取到DataFrame
with get_db() as db:
df = pd.read_sql_query('SELECT * FROM users', db)
# 带参数的查询
df = pd.read_sql_query(
'SELECT * FROM users WHERE age > ?',
db,
params=(25,)
)
# DataFrame写入SQLite
df = pd.DataFrame({
'name': ['A', 'B', 'C'],
'value': [1, 2, 3],
})
with get_db() as db:
df.to_sql('metrics', db, if_exists='replace', index=False)
# 追加模式
df.to_sql('metrics', db, if_exists='append', index=False)
性能优化
批量插入优化
import time
def benchmark_insert(db, n=100000):
data = [(f'user_{i}', f'user_{i}@test.com', i % 60 + 18) for i in range(n)]
# 方法1:逐条插入(最慢)
start = time.time()
for name, email, age in data:
db.execute('INSERT INTO users (name, email, age) VALUES (?, ?, ?)', (name, email, age))
db.commit()
print(f'逐条插入: {time.time() - start:.2f}s')
# 方法2:executemany(快5-10倍)
start = time.time()
db.executemany('INSERT INTO users (name, email, age) VALUES (?, ?, ?)', data)
db.commit()
print(f'executemany: {time.time() - start:.2f}s')
# 方法3:事务 + executemany + PRAGMA优化(最快)
start = time.time()
db.execute('PRAGMA synchronous=OFF')
db.execute('PRAGMA temp_store=MEMORY')
db.executemany('INSERT INTO users (name, email, age) VALUES (?, ?, ?)', data)
db.commit()
db.execute('PRAGMA synchronous=FULL')
print(f'优化后: {time.time() - start:.2f}s')
# 10万条数据大约:
# 逐条插入: 25s
# executemany: 3s
# 优化后: 0.5s
查询优化
with get_db() as db:
# 查看查询计划
plan = db.execute('EXPLAIN QUERY PLAN SELECT * FROM users WHERE email = ?', ('test@test.com',)).fetchall()
for row in plan:
print(dict(row))
# 如果显示 SCAN TABLE 说明没用到索引
# 如果显示 SEARCH TABLE ... USING INDEX 说明用到了索引
# 分析数据库统计信息
db.execute('ANALYZE')
数据库维护
with get_db() as db:
# 压缩数据库(删除数据后回收空间)
db.execute('VACUUM')
# 完整性检查
result = db.execute('PRAGMA integrity_check').fetchone()
print(result[0]) # 'ok' 表示正常
# 查看数据库大小
page_count = db.execute('PRAGMA page_count').fetchone()[0]
page_size = db.execute('PRAGMA page_size').fetchone()[0]
print(f'数据库大小: {page_count * page_size / 1024 / 1024:.1f} MB')
实战:简易KV存储
class KVStore:
"""基于SQLite的键值存储,支持过期时间"""
def __init__(self, db_path='kv.db'):
self.conn = sqlite3.connect(db_path)
self.conn.execute('PRAGMA journal_mode=WAL')
self.conn.execute('''
CREATE TABLE IF NOT EXISTS kv (
key TEXT PRIMARY KEY,
value TEXT,
expires_at REAL
)
''')
self.conn.commit()
def set(self, key, value, ttl=None):
import json
expires = time.time() + ttl if ttl else None
self.conn.execute(
'INSERT OR REPLACE INTO kv (key, value, expires_at) VALUES (?, ?, ?)',
(key, json.dumps(value), expires)
)
self.conn.commit()
def get(self, key, default=None):
import json
row = self.conn.execute(
'SELECT value, expires_at FROM kv WHERE key = ?', (key,)
).fetchone()
if row is None:
return default
value, expires = row
if expires and time.time() > expires:
self.delete(key)
return default
return json.loads(value)
def delete(self, key):
self.conn.execute('DELETE FROM kv WHERE key = ?', (key,))
self.conn.commit()
def cleanup(self):
self.conn.execute('DELETE FROM kv WHERE expires_at IS NOT NULL AND expires_at < ?', (time.time(),))
self.conn.commit()
# 使用
store = KVStore()
store.set('user:1', {'name': '张三', 'age': 25}, ttl=3600)
print(store.get('user:1'))
总结
SQLite的最佳使用场景:
- 单机应用、桌面软件、移动端
- 数据分析和原型开发
- 缓存和本地存储
- 测试环境替代MySQL/PostgreSQL
不适合的场景:
- 高并发写入(>100次/秒)
- 多机器共享数据库
- 数据量超过100GB
用好SQLite,很多场景根本不需要部署MySQL或PostgreSQL,开发效率直接翻倍。