文件操作是Python编程中的必备技能,就像你会用记事本记录重要信息一样,Python程序也需要能够读取和写入文件。今天,我们将学习如何让Python程序成为文件的"管理员",轻松处理各种文件操作!
引入:为什么需要文件操作?
想象一下,你开发了一个学生管理系统,需要:
- 将学生的成绩保存到文件中,下次打开程序还能看到
- 从CSV文件中导入大量的学生数据
- 将分析结果导出为报告文件
- 读取配置文件来调整程序行为
没有文件操作,你的程序就像失忆的人,每次运行都从头开始。有了文件操作,程序就能"记住"之前的数据,真正变得实用!
一、文件操作基础:打开、读写、关闭
1.1 最基本的文件操作流程
# 文件操作的三部曲:打开 → 操作 → 关闭
print("=== 文件操作基础 ===\n")
# 1. 写入文件
print("1. 创建并写入文件:")
# 打开文件(如果不存在会创建)
# 'w' 表示写入模式,会覆盖原有内容
file = open("test.txt", "w", encoding="utf-8")
# 写入内容
file.write("这是第一行内容\n")
file.write("这是第二行内容\n")
file.write("Python文件操作很简单!\n")
# 关闭文件(非常重要!)
file.close()
print("文件写入完成!\n")
# 2. 读取文件
print("2. 读取文件内容:")
# 重新打开文件进行读取
# 'r' 表示读取模式
file = open("test.txt", "r", encoding="utf-8")
# 读取整个文件内容
content = file.read()
print("整个文件内容:")
print(content)
file.close()
# 3. 追加内容
print("3. 向文件追加内容:")
# 'a' 表示追加模式,不会覆盖原有内容
file = open("test.txt", "a", encoding="utf-8")
file.write("这是追加的内容\n")
file.close()
print("追加完成!")
# 验证追加结果
file = open("test.txt", "r", encoding="utf-8")
print("\n追加后的文件内容:")
print(file.read())
file.close()
1.2 文件打开模式详解
print("=== 文件打开模式详解 ===\n")
# Python支持多种文件打开模式
modes = {
'r': '读取模式(默认)',
'w': '写入模式(覆盖)',
'a': '追加模式',
'x': '创建模式(文件存在则失败)',
'b': '二进制模式',
't': '文本模式(默认)',
'+': '读写模式'
}
print("常用文件模式:")
for mode, description in modes.items():
print(f" '{mode}': {description}")
print("\n模式组合示例:")
print(" 'rb': 以二进制模式读取")
print(" 'w+': 读写模式(覆盖)")
print(" 'a+': 读写模式(追加)")
# 实际示例
print("\n=== 实际应用示例 ===\n")
# 示例1:'x' 模式 - 创建新文件
try:
file = open("new_file.txt", "x", encoding="utf-8")
file.write("这是新创建的文件\n")
file.close()
print("1. 成功创建新文件")
except FileExistsError:
print("1. 文件已存在,创建失败")
# 示例2:'w+' 模式 - 可读可写(覆盖)
file = open("test_w+.txt", "w+", encoding="utf-8")
file.write("第一行内容\n")
file.seek(0) # 将文件指针移到开头
content = file.read()
print(f"2. w+模式写入后读取:{content.strip()}")
file.close()
# 示例3:'a+' 模式 - 可读可写(追加)
file = open("test_a+.txt", "a+", encoding="utf-8")
file.write("初始内容\n")
file.seek(0)
print(f"3. a+模式初始内容:{file.read().strip()}")
file.write("追加的内容\n")
file.seek(0)
print(f"3. a+模式追加后:{file.read().strip()}")
file.close()
二、安全的文件操作:使用with语句
2.1 with语句:自动管理文件资源
print("=== 使用with语句安全操作文件 ===\n")
# 传统方式的问题:容易忘记关闭文件
# file = open("test.txt", "r")
# content = file.read()
# 如果这里发生异常,文件可能不会被关闭!
# with语句:自动管理资源,确保文件被正确关闭
print("1. 使用with读取文件:")
with open("test.txt", "r", encoding="utf-8") as file:
content = file.read()
print("文件内容:")
print(content)
# 离开with块后,文件会自动关闭
print("文件已自动关闭\n")
print("2. 使用with写入文件:")
with open("output.txt", "w", encoding="utf-8") as file:
file.write("使用with语句写入内容\n")
file.write("无需手动关闭文件!\n")
print("文件已保存并自动关闭\n")
print("3. 同时操作多个文件:")
with open("source.txt", "w", encoding="utf-8") as src:
src.write("这是源文件的内容\n")
src.write("包含多行文本\n")
# 同时打开两个文件:一个读,一个写
with open("source.txt", "r", encoding="utf-8") as src, \
open("copy.txt", "w", encoding="utf-8") as dst:
content = src.read()
dst.write("复制的内容:\n")
dst.write(content)
print("文件复制完成!")
# 验证复制结果
with open("copy.txt", "r", encoding="utf-8") as file:
print("\n复制后的文件内容:")
print(file.read())
2.2 异常处理:处理文件操作中的错误
print("=== 文件操作中的异常处理 ===\n")
# 文件操作中可能出现的错误:
# 1. 文件不存在
# 2. 权限不足
# 3. 磁盘空间不足
# 4. 文件被其他程序占用
print("1. 处理文件不存在的情况:")
try:
with open("不存在的文件.txt", "r", encoding="utf-8") as file:
content = file.read()
except FileNotFoundError as e:
print(f"错误:文件不存在 - {e}")
except Exception as e:
print(f"其他错误:{e}")
else:
print("文件读取成功")
finally:
print("异常处理完成\n")
print("2. 处理权限问题:")
import os
# 创建一个只读文件
with open("readonly.txt", "w", encoding="utf-8") as file:
file.write("这是一个只读文件\n")
# 在Unix/Linux系统上设置只读权限
import platform
if platform.system() != "Windows":
os.chmod("readonly.txt", 0o444) # 设置只读权限
try:
with open("readonly.txt", "w", encoding="utf-8") as file:
file.write("尝试写入只读文件\n")
except PermissionError as e:
print(f"权限错误:{e}")
except Exception as e:
print(f"其他错误:{e}")
print("\n3. 完整的文件操作异常处理:")
def safe_file_operation(filename, mode="r", content=None):
"""安全的文件操作函数"""
try:
if "w" in mode or "a" in mode:
# 写入操作
with open(filename, mode, encoding="utf-8") as file:
if content:
file.write(content)
return True, "操作成功"
elif "r" in mode:
# 读取操作
with open(filename, mode, encoding="utf-8") as file:
return True, file.read()
else:
return False, "不支持的模式"
except FileNotFoundError:
return False, f"文件 '{filename}' 不存在"
except PermissionError:
return False, f"没有权限访问文件 '{filename}'"
except IOError as e:
return False, f"IO错误:{e}"
except Exception as e:
return False, f"未知错误:{e}"
# 测试安全文件操作函数
print("测试读取不存在的文件:")
success, result = safe_file_operation("nonexistent.txt", "r")
print(f"成功: {success}, 结果: {result}")
print("\n测试写入文件:")
success, result = safe_file_operation("test_output.txt", "w", "测试内容")
print(f"成功: {success}, 结果: {result}")
print("\n测试读取文件:")
success, result = safe_file_operation("test_output.txt", "r")
print(f"成功: {success}, 结果: {result}")
三、文件读取的高级技巧
3.1 多种读取方式
print("=== 文件读取的高级技巧 ===\n")
# 创建测试文件
with open("multi_line.txt", "w", encoding="utf-8") as file:
for i in range(1, 6):
file.write(f"这是第{i}行,包含一些测试数据,用于演示不同的读取方法。\n")
print("1. 逐行读取(推荐方法):")
with open("multi_line.txt", "r", encoding="utf-8") as file:
print("方法1:直接遍历文件对象")
line_number = 1
for line in file:
print(f"第{line_number}行: {line.strip()}")
line_number += 1
print("\n2. 一次性读取所有行:")
with open("multi_line.txt", "r", encoding="utf-8") as file:
lines = file.readlines() # 返回列表,每行一个元素
print(f"总行数: {len(lines)}")
for i, line in enumerate(lines, 1):
print(f"第{i}行: {line.strip()}")
print("\n3. 控制读取位置(seek和tell):")
with open("multi_line.txt", "r", encoding="utf-8") as file:
# tell() 返回当前位置
position = file.tell()
print(f"初始位置: {position}")
# 读取前10个字符
first_part = file.read(10)
print(f"前10个字符: '{first_part}'")
position = file.tell()
print(f"读取后位置: {position}")
# seek() 移动文件指针
file.seek(0) # 回到开头
print(f"回到开头后位置: {file.tell()}")
# 跳到第50个字符
file.seek(50)
from_position = file.read(20)
print(f"从位置50读取20字符: '{from_position}'")
print("\n4. 使用迭代器读取大文件:")
# 对于非常大的文件,逐行读取是内存友好的方式
with open("multi_line.txt", "r", encoding="utf-8") as file:
line_count = 0
char_count = 0
for line in file:
line_count += 1
char_count += len(line)
# 这里可以处理每一行,而不需要加载整个文件到内存
print(f"文件统计: {line_count}行, {char_count}字符")
print("\n5. 读取特定数量的字符:")
with open("multi_line.txt", "r", encoding="utf-8") as file:
# 读取前50个字符
chunk = file.read(50)
print(f"前50个字符: '{chunk}'")
# 再读取下一个50个字符
chunk = file.read(50)
print(f"接下来50个字符: '{chunk}'")
3.2 编码处理
print("=== 文件编码处理 ===\n")
# 文件编码非常重要,错误的编码会导致乱码
print("1. 不同编码的写入和读取:")
# UTF-8编码(最常用,支持中文)
with open("utf8_file.txt", "w", encoding="utf-8") as file:
file.write("UTF-8编码:Hello 世界!\n")
file.write("中文测试\n")
# GBK编码(Windows简体中文系统常用)
with open("gbk_file.txt", "w", encoding="gbk") as file:
file.write("GBK编码:Hello 世界!\n")
file.write("中文测试\n")
print("2. 正确读取不同编码的文件:")
# 以正确编码读取
with open("utf8_file.txt", "r", encoding="utf-8") as file:
print("UTF-8文件内容:", file.read().strip())
with open("gbk_file.txt", "r", encoding="gbk") as file:
print("GBK文件内容:", file.read().strip())
print("\n3. 编码错误的处理:")
try:
# 用错误的编码读取文件会导致UnicodeDecodeError
with open("gbk_file.txt", "r", encoding="utf-8") as file:
content = file.read()
except UnicodeDecodeError as e:
print(f"编码错误:{e}")
print("错误原因:用UTF-8编码读取了GBK编码的文件")
print("\n4. 自动检测编码(需要chardet库):")
# 首先安装:pip install chardet
try:
import chardet
# 检测文件编码
with open("gbk_file.txt", "rb") as file: # 以二进制模式打开
raw_data = file.read()
result = chardet.detect(raw_data)
print(f"检测到的编码:{result['encoding']},置信度:{result['confidence']:.2%}")
# 用检测到的编码读取内容
content = raw_data.decode(result['encoding'])
print(f"文件内容:{content.strip()}")
except ImportError:
print("未安装chardet库,运行 pip install chardet 来安装")
print("\n5. 处理不同操作系统的换行符:")
# Windows: \r\n
# Unix/Linux: \n
# Mac OS: \r
with open("newlines.txt", "w", encoding="utf-8", newline="") as file:
# newline="" 让Python不进行换行符转换
file.write("第一行\n")
file.write("第二行\r\n") # Windows换行符
file.write("第三行\r") # Mac换行符
print("文件写入完成,原始换行符被保留")
# 读取时统一转换为\n
with open("newlines.txt", "r", encoding="utf-8") as file:
print("\n读取内容(统一换行符):")
for i, line in enumerate(file, 1):
print(f"第{i}行: {repr(line)}") # repr显示转义字符
四、CSV和JSON文件处理
4.1 CSV文件操作
print("=== CSV文件处理 ===\n")
import csv
# CSV(逗号分隔值)是常用的数据交换格式
print("1. 写入CSV文件:")
# 学生数据
students = [
["学号", "姓名", "年龄", "语文", "数学", "英语"],
["001", "张三", 18, 85, 92, 78],
["002", "李四", 17, 92, 88, 95],
["003", "王五", 19, 76, 85, 90]
]
# 方法1:使用writer写入
with open("students.csv", "w", encoding="utf-8", newline="") as file:
writer = csv.writer(file)
writer.writerows(students) # 写入多行
print("CSV文件写入完成\n")
print("2. 读取CSV文件:")
print("方法1:使用reader逐行读取")
with open("students.csv", "r", encoding="utf-8") as file:
reader = csv.reader(file)
for row in reader:
print(row)
print("\n方法2:读取为字典列表")
with open("students.csv", "r", encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
print(row)
# 可以通过字段名访问:row["姓名"]
print("\n3. 处理包含特殊字符的CSV:")
# 当数据中包含逗号、引号时,需要特殊处理
data = [
["产品", "描述", "价格"],
["苹果", "新鲜的红苹果", 5.99],
["香蕉", "进口香蕉,非常甜", 3.99],
['西瓜', '大西瓜,重量约"10kg"', 25.99]
]
with open("products.csv", "w", encoding="utf-8", newline="") as file:
writer = csv.writer(file, quoting=csv.QUOTE_ALL) # 所有字段都加引号
writer.writerows(data)
print("包含特殊字符的CSV写入完成")
print("\n4. 读取并处理CSV数据:")
with open("students.csv", "r", encoding="utf-8") as file:
reader = csv.DictReader(file)
# 计算平均分
total_chinese = 0
total_math = 0
total_english = 0
count = 0
for row in reader:
total_chinese += int(row["语文"])
total_math += int(row["数学"])
total_english += int(row["英语"])
count += 1
if count > 0:
print(f"语文平均分: {total_chinese/count:.2f}")
print(f"数学平均分: {total_math/count:.2f}")
print(f"英语平均分: {total_english/count:.2f}")
print("\n5. 使用不同的分隔符:")
# CSV并不总是用逗号分隔,有时用制表符或其他字符
data = [
["月份", "收入", "支出"],
["一月", 10000, 8000],
["二月", 12000, 8500],
["三月", 11000, 8200]
]
# 使用制表符分隔
with open("finance.tsv", "w", encoding="utf-8", newline="") as file:
writer = csv.writer(file, delimiter="\t") # 指定制表符为分隔符
writer.writerows(data)
print("TSV(制表符分隔)文件写入完成")
4.2 JSON文件操作
print("=== JSON文件处理 ===\n")
import json
# JSON(JavaScript对象表示法)是常用的数据交换格式
print("1. 将Python对象写入JSON文件:")
# Python数据结构
company = {
"name": "科技公司",
"year": 2020,
"employees": [
{"name": "张三", "age": 28, "position": "工程师"},
{"name": "李四", "age": 32, "position": "经理"},
{"name": "王五", "age": 25, "position": "设计师"}
],
"departments": ["技术部", "市场部", "人力资源部"],
"location": {
"city": "北京",
"address": "中关村大街1号"
}
}
# 写入JSON文件
with open("company.json", "w", encoding="utf-8") as file:
json.dump(company, file, ensure_ascii=False, indent=2) # indent使格式更美观
print("JSON文件写入完成\n")
print("2. 从JSON文件读取数据:")
with open("company.json", "r", encoding="utf-8") as file:
data = json.load(file)
print("公司名称:", data["name"])
print("成立年份:", data["year"])
print("员工数量:", len(data["employees"]))
print("第一个员工:", data["employees"][0]["name"])
print("\n3. JSON字符串与Python对象的转换:")
# 有时我们需要处理JSON字符串而不是文件
# Python对象转JSON字符串
python_dict = {"name": "张三", "age": 25, "city": "北京"}
json_string = json.dumps(python_dict, ensure_ascii=False)
print("JSON字符串:", json_string)
print("类型:", type(json_string))
# JSON字符串转Python对象
parsed_dict = json.loads(json_string)
print("\n解析后的Python对象:", parsed_dict)
print("类型:", type(parsed_dict))
print("\n4. 处理复杂的JSON数据:")
# 从网络API获取的数据通常是JSON格式
api_response = '''
{
"status": "success",
"data": {
"users": [
{"id": 1, "name": "张三", "email": "zhangsan@example.com"},
{"id": 2, "name": "李四", "email": "lisi@example.com"},
{"id": 3, "name": "王五", "email": "wangwu@example.com"}
],
"total": 3,
"page": 1
},
"timestamp": "2024-01-15T10:30:00Z"
}
'''
# 解析JSON字符串
response_data = json.loads(api_response)
print("API响应状态:", response_data["status"])
print("用户总数:", response_data["data"]["total"])
print("第一个用户:", response_data["data"]["users"][0]["name"])
print("\n5. 处理JSON中的日期和自定义对象:")
import datetime
# 自定义JSON编码器
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat() # 将datetime转换为ISO格式字符串
elif isinstance(obj, set):
return list(obj) # 将set转换为list
return super().default(obj)
# 包含自定义对象的数据
custom_data = {
"name": "项目",
"start_date": datetime.datetime(2024, 1, 15, 10, 30, 0),
"tags": {"python", "json", "tutorial"},
"participants": ["张三", "李四"]
}
# 使用自定义编码器
json_output = json.dumps(custom_data, cls=CustomEncoder, ensure_ascii=False, indent=2)
print("编码后的JSON:")
print(json_output)
五、实战应用:文件管理器
print("=== 文件管理器实战 ===\n")
import os
import shutil
from datetime import datetime
class FileManager:
"""简单的文件管理器类"""
def __init__(self, base_dir="."):
self.base_dir = os.path.abspath(base_dir)
os.chdir(self.base_dir)
def list_files(self, show_details=False):
"""列出当前目录的文件"""
print(f"当前目录: {os.getcwd()}")
print("-" * 50)
items = os.listdir()
if not items:
print("目录为空")
return
if show_details:
print(f"{'名称':<30} {'大小(字节)':<15} {'类型':<10} {'修改时间':<20}")
print("-" * 80)
for item in items:
full_path = os.path.join(os.getcwd(), item)
if os.path.isfile(full_path):
size = os.path.getsize(full_path)
file_type = "文件"
elif os.path.isdir(full_path):
size = "-"
file_type = "目录"
else:
size = "-"
file_type = "其他"
mtime = datetime.fromtimestamp(os.path.getmtime(full_path))
mtime_str = mtime.strftime("%Y-%m-%d %H:%M:%S")
print(f"{item:<30} {str(size):<15} {file_type:<10} {mtime_str:<20}")
else:
print("目录列表:")
for item in items:
print(f" - {item}")
def create_file(self, filename, content=""):
"""创建文件"""
try:
with open(filename, "w", encoding="utf-8") as file:
file.write(content)
print(f"✓ 文件 '{filename}' 创建成功")
return True
except Exception as e:
print(f"✗ 创建文件失败: {e}")
return False
def read_file(self, filename):
"""读取文件内容"""
try:
if not os.path.exists(filename):
print(f"✗ 文件 '{filename}' 不存在")
return False
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
print(f"文件 '{filename}' 内容:")
print("-" * 40)
print(content)
print("-" * 40)
return True
except Exception as e:
print(f"✗ 读取文件失败: {e}")
return False
def copy_file(self, source, destination):
"""复制文件"""
try:
if not os.path.exists(source):
print(f"✗ 源文件 '{source}' 不存在")
return False
shutil.copy2(source, destination)
print(f"✓ 文件 '{source}' 已复制到 '{destination}'")
return True
except Exception as e:
print(f"✗ 复制文件失败: {e}")
return False
def find_files(self, pattern):
"""查找文件"""
import fnmatch
found_files = []
for root, dirs, files in os.walk("."):
for file in files:
if fnmatch.fnmatch(file, pattern):
found_files.append(os.path.join(root, file))
if found_files:
print(f"找到 {len(found_files)} 个匹配 '{pattern}' 的文件:")
for file in found_files:
print(f" - {file}")
else:
print(f"没有找到匹配 '{pattern}' 的文件")
return found_files
def count_words(self, filename):
"""统计文件中的单词数"""
try:
if not os.path.exists(filename):
print(f"✗ 文件 '{filename}' 不存在")
return 0
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
# 简单的单词统计
words = content.split()
word_count = len(words)
# 字符和行数统计
char_count = len(content)
line_count = content.count('\n') + 1 if content else 0
print(f"文件 '{filename}' 统计:")
print(f" 字符数: {char_count}")
print(f" 单词数: {word_count}")
print(f" 行数: {line_count}")
return word_count
except Exception as e:
print(f"✗ 统计文件失败: {e}")
return 0
def export_to_csv(self, data, filename):
"""将数据导出为CSV"""
import csv
try:
with open(filename, "w", encoding="utf-8", newline="") as file:
if isinstance(data[0], dict):
# 字典列表
writer = csv.DictWriter(file, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
else:
# 列表的列表
writer = csv.writer(file)
writer.writerows(data)
print(f"✓ 数据已导出到 '{filename}'")
return True
except Exception as e:
print(f"✗ 导出失败: {e}")
return False
def main():
"""文件管理器主程序"""
manager = FileManager()
# 创建一些示例文件
print("创建示例文件...")
manager.create_file("示例.txt", "这是一个示例文件\n包含两行文本\n用于演示文件操作")
manager.create_file("报告.docx", "这是模拟的Word文档内容")
manager.create_file("数据.json", '{"name": "示例", "value": 123}')
print("\n" + "="*60)
print("文件管理器")
print("="*60)
while True:
print("\n选项:")
print("1. 列出文件")
print("2. 创建文件")
print("3. 读取文件")
print("4. 复制文件")
print("5. 查找文件")
print("6. 统计文件")
print("7. 导出数据")
print("8. 退出")
choice = input("请选择操作 (1-8): ").strip()
if choice == "1":
detailed = input("显示详细信息? (y/n): ").strip().lower() == "y"
manager.list_files(detailed)
elif choice == "2":
filename = input("文件名: ").strip()
content = input("文件内容 (多行,输入END结束):\n")
lines = []
while True:
line = input()
if line == "END":
break
lines.append(line)
content = "\n".join(lines)
manager.create_file(filename, content)
elif choice == "3":
filename = input("文件名: ").strip()
manager.read_file(filename)
elif choice == "4":
source = input("源文件: ").strip()
destination = input("目标文件: ").strip()
manager.copy_file(source, destination)
elif choice == "5":
pattern = input("查找模式 (如: *.txt, *.json): ").strip()
manager.find_files(pattern)
elif choice == "6":
filename = input("文件名: ").strip()
manager.count_words(filename)
elif choice == "7":
# 示例数据
data = [
["姓名", "年龄", "城市"],
["张三", 25, "北京"],
["李四", 30, "上海"],
["王五", 28, "广州"]
]
filename = input("导出文件名 (如: data.csv): ").strip()
manager.export_to_csv(data, filename)
elif choice == "8":
print("退出文件管理器")
break
else:
print("无效选择,请重新输入")
if __name__ == "__main__":
main()
六、总结:文件操作的核心要点
🎯 核心要点总结:
-
基本文件操作三步曲:
# 打开 → 操作 → 关闭 file = open("file.txt", "r") # 打开 content = file.read() # 操作 file.close() # 关闭 -
使用with语句更安全:
# 自动管理资源,确保文件被正确关闭 with open("file.txt", "r") as file: content = file.read() -
重要文件模式:
'r'- 读取(默认)'w'- 写入(覆盖)'a'- 追加'b'- 二进制模式'+'- 读写模式
-
编码处理:
# 指定编码,避免乱码 with open("file.txt", "r", encoding="utf-8") as file:
💡 最佳实践:
-
总是使用with语句:避免忘记关闭文件
-
明确指定编码:特别是处理中文文本时
-
处理异常:文件操作容易出错,要有异常处理
-
使用合适的数据格式:
- 文本数据:普通文本文件
- 结构化数据:CSV、JSON
- 配置数据:JSON、INI文件