简述
本文介绍了 python 解压多层嵌套的压缩包的一种方式。目前支持的压缩格式有:
zip, tar, gz, rar, 7z
如果仅需要代码可直接访问:源码地址
实现细节
-
用队列代替递归
压缩包的嵌套层级无法预估,递归调用栈保存中间变量过多可能导致解压时内存溢出,故使用队列一层一层地解压压缩文件。
-
压缩包中重名文件处理
当压缩文件名和同层级有同名的文件夹时,解压时会将解压路径进行重命名。
-
加密或解压失败的压缩文件
加密的压缩文件和解压失败的压缩文件不会阻塞解压流程,会在压缩子文件记录中保存信息。
-
中文文件名异常处理
python 标准库的 zipfile 模块对跨系统的中文文件名识别不友好,该方法通过修改
Python/lib/zipfile.py源码优化了该问题。
zipfile.py 修改说明
如果不想修改标准库下的源文件,可将 Python/lib/zipfile.py 下的文件复制出来修改再放到项目下使用。
修改步骤如下:
- 增加函数
def get_decode_name(name):
guessed_encoding = chardet.detect(name)['encoding'] or 'cp1252'
if guessed_encoding == 'ISO-8859-9':
guessed_encoding = 'gbk'
return name.decode(guessed_encoding, 'replace')
-
将
filename = filename.decode('cp437')这行代码改成filename = get_decode_name(filename) -
将
fname_str = fname.decode("cp437")这行代码改成fname_str = get_decode_name(fname)
解压嵌套压缩文件完整源码
import os
import zipfile_modify as zipfile
import tarfile
import gzip
import rarfile
import py7zr
class CompressStatus:
SUCCESS = 0
ENCRYPTED = 1
FAIL = 2
def zipfile_extract_all_to(file_path, target_path):
status = CompressStatus.SUCCESS
message = None
try:
zip_file = zipfile.ZipFile(file_path)
for zinfo in zip_file.infolist():
is_encrypted = zinfo.flag_bits & 0x1
if is_encrypted:
# 加密的压缩文件
message = "加密文件"
status = CompressStatus.ENCRYPTED
if status == CompressStatus.SUCCESS:
zip_file.extractall(target_path)
except Exception as e:
# 解压失败
status = CompressStatus.FAIL
message = str(e)
finally:
if 'zip_file' in dir():
zip_file.close()
return status, message
def tarfile_extract_all_to(file_path, target_path):
status = CompressStatus.SUCCESS
message = None
try:
tar_file = tarfile.open(file_path)
tar_file.extractall(target_path)
except Exception as e:
# 解压失败
status = CompressStatus.FAIL
message = str(e)
finally:
if 'tar_file' in dir():
tar_file.close()
return status, message
def gzfile_extract_all_to(file_path, target_path):
'''
.gz 为压缩单个文件, .tar.gz 结尾的包通过 tarfile 处理
'''
status = CompressStatus.SUCCESS
message = None
try:
gz_file = gzip.open(file_path, 'rb')
target_file = open(target_path, 'wb')
while True:
block = gz_file.read(65536)
if not block:
break
else:
target_file.write(block)
except Exception as e:
# 解压失败
status = CompressStatus.FAIL
message = str(e)
finally:
if 'gz_file' in dir():
gz_file.close()
if 'target_file' in dir():
target_file.close()
return status, message
def rarfile_extract_all_to(file_path, target_path):
status = CompressStatus.SUCCESS
message = None
try:
rar_file = rarfile.RarFile(file_path)
if rar_file.needs_password():
# 加密的压缩文件
status = CompressStatus.ENCRYPTED
message = "加密文件"
else:
rar_file.extractall(target_path)
except Exception as e:
# 解压失败
status = CompressStatus.FAIL
message = str(e)
finally:
if 'rar_file' in dir():
rar_file.close()
return status, message
def py7zr_extract_all_to(file_path, target_path):
status = CompressStatus.SUCCESS
message = None
try:
py7zr_file = py7zr.SevenZipFile(file_path)
py7zr_file.extractall(target_path)
except py7zr.exceptions.PasswordRequired:
status = CompressStatus.ENCRYPTED
message = "加密文件"
except Exception as e:
# 解压失败
status = CompressStatus.FAIL
message = str(e)
finally:
if 'py7zr_file' in dir():
py7zr_file.close()
return status, message
class FileInfo:
'''
解压的单个文件信息保存
'''
def __init__(self, path, compressed_path):
self.path = path
self.compressed_path = compressed_path
class CompressedFileInfo(FileInfo):
'''
压缩文件信息
'''
def __init__(self, path, compressed_path):
super().__init__(path, compressed_path)
self.to_path = ''
self.encrypted = False
self.is_error = False
self.msg = ''
class CompressedFile():
'''
压缩文件工具
'''
uncomprees_func_dict = {
'zip': zipfile_extract_all_to,
'tar': tarfile_extract_all_to,
'gz': gzfile_extract_all_to,
'rar': rarfile_extract_all_to,
'7z': py7zr_extract_all_to
}
@classmethod
def is_compressed_file(cls, path):
'''
判断是否是当前支持的压缩文件
'''
_, ext = os.path.splitext(path)
return ext[1:] in cls.uncomprees_func_dict
def __init__(self, path, compressed_path, target_path_default=None):
self.info = CompressedFileInfo(path, compressed_path)
if path.endswith('.tar.gz'):
self.uncompress_func = tarfile_extract_all_to
self.target_path_default = path[:-len('.tar.gz')]
else:
path_without_ext, ext = os.path.splitext(path)
self.uncompress_func = self.uncomprees_func_dict.get(ext[1:])
self.target_path_default = path_without_ext
if target_path_default != None:
self.target_path_default = target_path_default
def extract_all(self, target_path=None):
if not target_path:
target_path = self.target_path_default
self.info.to_path = self.check_uncompress_path(target_path)
status, message = self.uncompress_func(self.info.path, self.info.to_path)
if status == CompressStatus.ENCRYPTED:
self.info.encrypted = True
self.info.msg = message
elif status == CompressStatus.FAIL:
self.info.is_error = True
self.info.msg = message
return self.info
def check_uncompress_path(self, to_path):
'''
若解压路径存在,返回新的路径
在原路径前添加 (i) i=1,2,3...
'''
suffix = 1
file_dir, filename = os.path.split(to_path)
while True:
if os.path.exists(to_path):
to_path = os.path.join(file_dir, f"({str(suffix)}){filename}")
suffix += 1
else:
return to_path
class NestedCompressedFile():
def __init__(self, file_path):
self.file_path = file_path
self.compressed_file_queue = []
self.filelist = []
self.origin_compress_file = None
def deep_extract_all_to(self, target_path):
'''
核心功能
将嵌套的压缩包按源相对路径挨个解压到 target_path ,
并将子文件的信息保存到 fileList 中
'''
self.filelist = []
_, filename = os.path.split(self.file_path)
self.origin_compress_file = CompressedFile(self.file_path, filename, target_path_default=target_path)
self.compressed_file_queue.append(self.origin_compress_file)
while len(self.compressed_file_queue) != 0:
compressed_file = self.compressed_file_queue.pop(0)
compressed_file_info = compressed_file.extract_all()
if compressed_file_info.encrypted or compressed_file_info.is_error:
self.filelist.append(compressed_file_info)
else:
self.uncompress_subfile_walk(compressed_file_info.to_path, compressed_file_info.compressed_path)
def uncompress_subfile_walk(self, walk_path, path_in_compressed):
'''
遍历文件夹子文件
将子文件信息保存于 filelist 和 compressed_file_queue
'''
if os.path.isdir(walk_path):
for current_dir, _, sub_file_list in os.walk(walk_path):
if len(sub_file_list) != 0:
for sub_file_fullname in sub_file_list:
file_save_path = os.path.join(current_dir, sub_file_fullname)
file_path_in_compressed = file_save_path.replace(walk_path, path_in_compressed)
if CompressedFile.is_compressed_file(file_save_path):
self.compressed_file_queue.append(CompressedFile(file_save_path, file_path_in_compressed))
else:
self.filelist.append(FileInfo(file_save_path, file_path_in_compressed))
else:
if CompressedFile.is_compressed_file(walk_path):
self.compressed_file_queue.append(CompressedFile(walk_path, path_in_compressed))
else:
self.filelist.append(FileInfo(walk_path, path_in_compressed))