需求分析
这个脚本支持扫描并统计某个文件夹内,以某个后缀结尾的,比如sql,java(以使用参数为准)。并且可以看到其中的文件大小和完整文件路径,还支持以CSV、XLSX等格式导出等。这个脚本适用于统计本地代码行数之后,需要查看某个编程语言,比如Java写了多少文件、每个文件大小是多少、文件路径在哪里等操作。
使用教程
我们使用脚本时,输入想要检测的路径、文件后缀、以及是否导出文件、导出文件的命名(文件数量过多时使用)等就好。
比如python list_zip_contents.py -tp “C:\Users\your_name\Pictures\” -a -o rpng.csv ,演示时为best.csv。
最后,用编辑器打开即可。
详细代码
# -*- coding: utf-8 -*-
import zipfile
import os
import argparse
EXTENSIONS = {
'sql': '.sql', 'java': '.java', 'c': '.c', 'cpp': '.cpp', 'h': '.h',
'py': '.py', 'js': '.js', 'html': '.html', 'css': '.css', 'markdown': '.md',
'txt': '.txt', 'xml': '.xml', 'yaml': '.yaml', 'yml': '.yml', 'json': '.json',
'properties': '.properties', 'go': '.go', 'rs': '.rs', 'rb': '.rb',
'php': '.php', 'swift': '.swift', 'kt': '.kt', 'cs': '.cs', 'lua': '.lua',
'sh': '.sh', 'bat': '.bat', 'ps1': '.ps1', 'sql': '.sql'
}
def split_archive_path(path):
lower_path = path.lower()
all_extensions = ['.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.zip', '.7z', '.rar', '.tar', '.gz', '.bz2']
candidates = []
for ext in all_extensions:
ext_lower = ext.lower()
idx = lower_path.rfind(ext_lower)
if idx >= 0:
candidates.append((idx, len(ext), ext, path[:idx + len(ext)], path[idx + len(ext):].lstrip('\\').lstrip('/')))
if candidates:
candidates.sort(key=lambda x: (x[0], -x[1]))
_, _, _, archive_path, sub_path = candidates[0]
if sub_path:
return archive_path, sub_path
else:
return archive_path, ''
return path, ''
def is_archive(path):
return path.lower().endswith(('.zip', '.7z', '.rar', '.tar', '.tar.gz', '.tar.bz2', '.tgz', '.gz', '.bz2'))
def list_files_in_path(target_path, filter_exts, show_all):
if os.path.isdir(target_path):
return list_files_in_dir(target_path, filter_exts, show_all)
elif os.path.isfile(target_path) and is_archive(target_path):
return list_files_in_archive(target_path, filter_exts, show_all)
else:
archive_path, sub_path = split_archive_path(target_path)
if os.path.isfile(archive_path) and is_archive(archive_path) and sub_path:
return list_files_in_archive_subdir(archive_path, sub_path, filter_exts, show_all)
else:
return {'error': f'Not a valid directory or zip file - {target_path}', 'files': [], 'header': ''}
def list_files_in_archive_subdir(zip_path, sub_path, filter_exts, show_all):
import tempfile
temp_dir = tempfile.mkdtemp()
try:
if zip_path.lower().endswith('.zip'):
with zipfile.ZipFile(zip_path, 'r') as z:
for member in z.namelist():
normalized = member.replace('/', os.sep).replace('\\', os.sep)
sub_path_normalized = sub_path.replace('/', os.sep).replace('\\', os.sep)
if normalized.startswith(sub_path_normalized + os.sep) or normalized == sub_path_normalized:
z.extract(member, temp_dir)
elif zip_path.lower().endswith('.7z'):
import py7zr
with py7zr.SevenZipFile(zip_path, 'r') as sz:
sz.extractall(temp_dir)
else:
return {'error': f'Unsupported archive format - {zip_path}', 'files': [], 'header': ''}
extract_subdir = temp_dir
for item in os.listdir(temp_dir):
if sub_path.replace('/', os.sep).replace('\\', os.sep).endswith(item) or item.endswith('.zip') or item.endswith('.7z'):
potential = os.path.join(temp_dir, item)
if os.path.isdir(potential):
extract_subdir = potential
break
return list_files_recursive(extract_subdir, filter_exts, show_all, zip_path, sub_path)
finally:
import shutil
try:
shutil.rmtree(temp_dir)
except:
pass
def list_files_recursive(dir_path, filter_exts, show_all, archive_path=None, sub_path=None):
files_found = []
dirs_to_scan = [dir_path]
while dirs_to_scan:
current_dir = dirs_to_scan.pop()
for root, dirs, files in os.walk(current_dir):
for filename in files:
filepath = os.path.join(root, filename)
try:
size = os.path.getsize(filepath)
except:
size = 0
rel_path = os.path.relpath(filepath, dir_path)
ext = filename.lower()
if ext.endswith(('.zip', '.7z', '.rar', '.tar', '.tar.gz', '.tgz', '.gz', '.bz2')):
nested_result = try_extract_nested(filepath, filter_exts, show_all)
if nested_result:
files_found.extend(nested_result['files'])
continue
if show_all:
files_found.append((rel_path, size))
else:
file_ext = os.path.splitext(filename)[1].lower()
if file_ext in filter_exts:
files_found.append((rel_path, size))
if archive_path and sub_path:
header = f'Archive: {archive_path} / {sub_path}'
elif archive_path:
header = f'Archive: {archive_path}'
else:
header = f'Directory: {dir_path}'
header += f'\nTotal files: {len(files_found)}\n' + '=' * 80
return {'files': sorted(files_found), 'header': header}
def try_extract_nested(archive_file, filter_exts, show_all):
import tempfile
temp_dir = tempfile.mkdtemp()
try:
ext = archive_file.lower()
if ext.endswith('.zip'):
with zipfile.ZipFile(archive_file, 'r') as z:
z.extractall(temp_dir)
elif ext.endswith('.7z'):
import py7zr
with py7zr.SevenZipFile(archive_file, 'r') as sz:
sz.extractall(temp_dir)
else:
return None
files_found = []
for root, dirs, files in os.walk(temp_dir):
for filename in files:
filepath = os.path.join(root, filename)
try:
size = os.path.getsize(filepath)
except:
size = 0
rel_path = os.path.relpath(filepath, temp_dir)
if show_all:
files_found.append((rel_path, size))
else:
file_ext = os.path.splitext(filename)[1].lower()
if file_ext in filter_exts:
files_found.append((rel_path, size))
return {'files': sorted(files_found), 'header': f'Nested: {archive_file}'}
except:
return None
finally:
import shutil
try:
shutil.rmtree(temp_dir)
except:
pass
def list_files_in_archive(zip_path, filter_exts, show_all):
if zip_path.lower().endswith('.zip'):
return list_files_in_zip(zip_path, filter_exts, show_all)
elif zip_path.lower().endswith('.7z'):
return list_files_in_7z(zip_path, filter_exts, show_all)
else:
return {'error': f'Unsupported archive format - {zip_path}', 'files': [], 'header': ''}
def list_files_in_7z(archive_path, filter_exts, show_all):
import py7zr
import tempfile
temp_dir = tempfile.mkdtemp()
try:
with py7zr.SevenZipFile(archive_path, 'r') as sz:
sz.extractall(temp_dir)
files_found = []
for root, dirs, files in os.walk(temp_dir):
for filename in files:
filepath = os.path.join(root, filename)
try:
size = os.path.getsize(filepath)
except:
size = 0
rel_path = os.path.relpath(filepath, temp_dir)
if show_all:
files_found.append((rel_path, size))
else:
ext = os.path.splitext(filename)[1].lower()
if ext in filter_exts:
files_found.append((rel_path, size))
return {'files': sorted(files_found), 'header': f'Archive: {archive_path}\nTotal files: {len(files_found)}\n' + '=' * 80}
finally:
import shutil
try:
shutil.rmtree(temp_dir)
except:
pass
def list_files_in_zip_subdir(zip_path, sub_path, filter_exts, show_all):
import tempfile
temp_dir = tempfile.mkdtemp()
try:
with zipfile.ZipFile(zip_path, 'r') as z:
for member in z.namelist():
if member.startswith(sub_path + '/') or member == sub_path:
z.extract(member, temp_dir)
extract_subdir = os.path.join(temp_dir, sub_path)
if os.path.exists(extract_subdir):
files_found = []
for root, dirs, files in os.walk(extract_subdir):
for filename in files:
filepath = os.path.join(root, filename)
try:
size = os.path.getsize(filepath)
except:
size = 0
rel_path = os.path.relpath(filepath, extract_subdir)
if show_all:
files_found.append((rel_path, size))
else:
ext = os.path.splitext(filename)[1].lower()
if ext in filter_exts:
files_found.append((rel_path, size))
print(f'Archive: {zip_path} / {sub_path}')
print(f'Total files: {len(files_found)}')
print('=' * 80)
for path, size in sorted(files_found):
print(f'{size:>12,} bytes {path}')
else:
print(f'Error: Subdirectory not found in archive - {sub_path}')
finally:
import shutil
try:
shutil.rmtree(temp_dir)
except:
pass
def list_files_in_dir(dir_path, filter_exts, show_all):
files_found = []
for root, dirs, files in os.walk(dir_path):
for filename in files:
filepath = os.path.join(root, filename)
try:
size = os.path.getsize(filepath)
except:
size = 0
rel_path = os.path.relpath(filepath, dir_path)
if show_all:
files_found.append((rel_path, size))
else:
ext = os.path.splitext(filename)[1].lower()
if ext in filter_exts:
files_found.append((rel_path, size))
return {'files': sorted(files_found), 'header': f'Directory: {dir_path}\nTotal files: {len(files_found)}\n' + '=' * 80}
def list_files_in_zip(zip_path, filter_exts, show_all):
if not os.path.exists(zip_path):
return {'error': f'File not found - {zip_path}', 'files': [], 'header': ''}
with zipfile.ZipFile(zip_path, 'r') as z:
files_found = []
for name in z.namelist():
info = z.getinfo(name)
if info.file_size == 0:
continue
if show_all:
files_found.append((name, info.file_size))
else:
ext = os.path.splitext(name)[1].lower()
if ext in filter_exts:
files_found.append((name, info.file_size))
return {'files': sorted(files_found), 'header': f'Archive: {zip_path}\nTotal files: {len(files_found)}\n' + '=' * 80}
def main():
import sys
parser = argparse.ArgumentParser(
description='List files in a zip archive with filtering by type',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
python list_zip_contents.py --target_path archive.zip --all
python list_zip_contents.py --target_path archive.zip --type java py js
python list_zip_contents.py --target_path archive.zip --type sql
python list_zip_contents.py archive.zip --type c cpp h
'''
)
parser.add_argument('positional_path', nargs='?', help='Path to the zip archive')
parser.add_argument('-tp', '--target_path', '--target', dest='target_path', help='Path to the zip archive')
parser.add_argument('-t', '--type', '--types', dest='types', nargs='+',
help=f'File types to filter: {" ".join(EXTENSIONS.keys())}')
parser.add_argument('-a', '--all', dest='show_all', action='store_true',
help='Show all files instead of code files only')
parser.add_argument('-o', '--output', '--out', dest='output',
help='Output file path (e.g., result.csv, result.xlsx)')
args = parser.parse_args()
if args.target_path:
zip_path = args.target_path
elif args.positional_path:
zip_path = args.positional_path
else:
parser.print_help()
return
run_dir = os.getcwd()
script_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = run_dir if os.path.exists(run_dir) else script_dir
types = args.types if args.types else list(EXTENSIONS.keys())
filter_exts = set(EXTENSIONS.values())
if args.types:
filter_exts = {EXTENSIONS.get(t, '.' + t) for t in args.types}
result = list_files_in_path(zip_path, filter_exts, args.show_all)
if result.get('error'):
print(result['error'])
return
if args.output:
output_path = args.output
ext = os.path.splitext(output_path)[1].lower()
target_name = os.path.basename(zip_path)
target_stem = os.path.splitext(target_name)[0]
if not os.path.isabs(output_path):
output_path = os.path.join(output_dir, output_path)
filter_exts = set(EXTENSIONS.values())
if args.types:
filter_exts = {EXTENSIONS.get(t, '.' + t) for t in args.types}
result = list_files_in_path(zip_path, filter_exts, args.show_all)
if result.get('error'):
print(result['error'])
return
if args.output:
output_path = args.output
ext = os.path.splitext(output_path)[1].lower()
target_name = os.path.basename(zip_path)
target_stem = os.path.splitext(target_name)[0]
if not os.path.isabs(output_path):
output_path = os.path.join(output_dir, output_path)
if os.path.exists(output_path):
print(f'File "{output_path}" already exists. Overwrite? (Yes/No): ')
choice = input().strip().lower()
if choice != 'yes' and choice != 'y':
default_name = f'{target_stem}_{ext[1:]}' if ext else target_stem
print(f'Please modify the file name to: {default_name}')
new_name = input().strip()
if new_name:
if not os.path.isabs(new_name):
new_name = os.path.join(output_dir, new_name)
output_path = new_name
else:
output_path = os.path.join(output_dir, default_name)
abs_path = os.path.abspath(output_path)
if ext == '.csv':
import csv
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerow(['Size (bytes)', 'Path'])
for path, size in result['files']:
writer.writerow([size, path])
print(f'CSV saved to: {abs_path}')
elif ext in ('.xlsx', '.xls'):
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.title = 'Files'
ws.append(['Size (bytes)', 'Path'])
for path, size in result['files']:
ws.append([size, path])
for col in ws.columns:
max_length = 0
col_letter = col[0].column_letter
for cell in col:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
ws.column_dimensions[col_letter].width = min(max_length + 2, 80)
wb.save(output_path)
print(f'Excel saved: {abs_path}')
else:
print(f'Error: Unsupported file extension - {ext}')
return
else:
if result['header']:
print(result['header'])
for path, size in result['files']:
print(f'{size:>12,} bytes {path}')
if __name__ == '__main__':
main()
优化建议
仅支持csv、xlsx,建议添加md格式,或者结合格式转换等在线网站。
仅支持后缀,建议添加正则匹配,可以向everything那样支持任意关键字搜索。
潜在影响,不支持exe格式等文件格式扫描,可能会被防火墙、EDR等产品或服务阻止。