Python一键统计你的本地文件夹或压缩包的代码行数,可处理嵌套压缩包、文件夹。包含代码行数、空白行数、注释行数。
# -*- coding: utf-8 -*-
import os
import sys
import argparse
import urllib.parse
import tempfile
import shutil
import zipfile
import tarfile
EXTENSIONS = {
'.py': 'python',
'.js': 'javascript',
'.ts': 'typescript',
'.jsx': 'javascript',
'.tsx': 'typescript',
'.html': 'html',
'.css': 'css',
'.sql': 'sql',
'.sh': 'shell',
'.bat': 'batch',
'.ps1': 'powershell',
'.md': 'markdown',
'.json': 'json',
'.yaml': 'yaml',
'.yml': 'yaml',
'.xml': 'xml',
'.txt': 'text',
'.conf': 'config',
'.ini': 'config',
'.env': 'env',
'.db': 'database',
'.c': 'c',
'.h': 'c',
'.cpp': 'cpp',
'.cc': 'cpp',
'.cxx': 'cpp',
'.hpp': 'cpp',
'.java': 'java',
'.go': 'go',
'.rs': 'rust',
'.swift': 'swift',
'.kt': 'kotlin',
'.scala': 'scala',
'.rb': 'ruby',
'.php': 'php',
'.pl': 'perl',
'.lua': 'lua',
'.r': 'r',
'.m': 'objective-c',
'.mm': 'objective-c',
}
ARCHIVE_EXTENSIONS = {'.zip', '.tar', '.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.gz', '.bz2', '.7z', '.rar'}
SKIP_DIRS = {'__pycache__', '.git', '.venv', 'venv', 'node_modules', '.pytest_cache', '.mypy_cache', 'dist', 'build', '.egg-info'}
def url_to_path(url_or_path):
path = url_or_path.strip()
if path.startswith('file://'):
path = path.replace('file:///', '').replace('file://', '')
elif '://' in path:
parsed = urllib.parse.urlparse(path)
path = parsed.path if parsed.path else parsed.netloc
path = path.replace('/', os.sep).replace('\\', os.sep)
if path.startswith('\\') and ':' not in path:
path = path[1:]
return path.rstrip('\\')
def split_archive_path(path):
if os.path.exists(path):
return path, ''
lower_path = path.lower()
all_extensions = ['.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.zip', '.7z', '.rar', '.tar', '.gz', '.bz2']
all_candidates = []
for ext in all_extensions:
ext_lower = ext.lower()
start = 0
while True:
idx = lower_path.find(ext_lower, start)
if idx == -1:
break
archive_path = path[:idx + len(ext)]
sub_path = path[idx + len(ext):].lstrip('\\').lstrip('/')
all_candidates.append((idx, archive_path, sub_path))
start = idx + 1
all_candidates.sort(key=lambda x: x[0])
for idx, archive_path, sub_path in all_candidates:
if os.path.exists(archive_path):
return archive_path, sub_path
return path, ''
def get_archive_ext(filename):
name = filename.lower()
if name.endswith('.tar.gz'):
return '.tar.gz'
elif name.endswith('.tar.bz2'):
return '.tar.bz2'
elif name.endswith('.tar.zst'):
return '.tar.zst'
for ext in ['.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar']:
if name.endswith(ext):
return ext
return os.path.splitext(filename)[1].lower()
def extract_archive(archive_path, extract_dir):
ext = get_archive_ext(archive_path)
base_name = os.path.splitext(archive_path)[0]
if ext in ['.tar.gz', '.tar.bz2', '.tar.zst']:
base_name = os.path.splitext(base_name)[0]
temp_dir = tempfile.mkdtemp()
try:
if ext == '.zip':
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
return temp_dir
elif ext == '.tar':
with tarfile.open(archive_path, 'r') as tar:
tar.extractall(temp_dir, filter='data')
return temp_dir
elif ext == '.tar.gz' or ext == '.tgz':
with tarfile.open(archive_path, 'r:gz') as tar:
tar.extractall(temp_dir, filter='data')
return temp_dir
elif ext == '.tar.bz2':
with tarfile.open(archive_path, 'r:bz2') as tar:
tar.extractall(temp_dir, filter='data')
return temp_dir
elif ext == '.tar.zst':
try:
import zstandard
with tarfile.open(archive_path, 'r:bz2') as tar:
tar.extractall(temp_dir, filter='data')
return temp_dir
except ImportError:
pass
elif ext == '.gz':
import gzip
output_file = os.path.join(temp_dir, os.path.basename(base_name))
with gzip.open(archive_path, 'rb') as f_in:
with open(output_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return temp_dir
elif ext == '.bz2':
import bz2
output_file = os.path.join(temp_dir, os.path.basename(base_name))
with bz2.open(archive_path, 'rb') as f_in:
with open(output_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return temp_dir
elif ext == '.7z':
import py7zr
with py7zr.SevenZipFile(archive_path, 'r') as sz:
sz.extractall(temp_dir)
return temp_dir
elif ext == '.rar':
try:
import rarfile
with rarfile.RarFile(archive_path, 'r') as rf:
rf.extractall(temp_dir)
return temp_dir
except ImportError:
try:
import subprocess
unrar_path = r'C:\Program Files\WinRAR\unrar.exe'
result = subprocess.run([unrar_path, 'x', '-o+', archive_path, temp_dir], capture_output=True)
if result.returncode == 0:
return temp_dir
except Exception:
pass
except Exception as e:
print('Warning: Failed to extract ' + archive_path + ': ' + str(e))
if temp_dir and os.path.exists(temp_dir):
try:
shutil.rmtree(temp_dir)
except Exception:
pass
return None
def extract_zip_with_subdir(zip_path, sub_path, extract_dir):
temp_dir = tempfile.mkdtemp()
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
if sub_path:
for member in zip_ref.namelist():
if member.startswith(sub_path + '/') or member == sub_path:
zip_ref.extract(member, temp_dir)
new_base = temp_dir
if sub_path:
actual_sub = os.path.join(temp_dir, sub_path)
if os.path.exists(actual_sub):
new_base = actual_sub
return new_base
else:
zip_ref.extractall(temp_dir)
return temp_dir
except Exception as e:
print('Warning: Failed to extract ' + zip_path + ' subdir ' + sub_path + ': ' + str(e))
if temp_dir and os.path.exists(temp_dir):
try:
shutil.rmtree(temp_dir)
except Exception:
pass
return None
def count_lines(file_path):
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except Exception:
try:
with open(file_path, 'r', encoding='gbk', errors='ignore') as f:
lines = f.readlines()
except Exception:
return 0, 0, 0, 0
total_lines = len(lines)
code_lines = 0
blank_lines = 0
comment_lines = 0
in_multiline_comment = False
ext = os.path.splitext(file_path)[1].lower()
for line in lines:
stripped = line.strip()
if ext == '.py':
if stripped.startswith('"""') or stripped.startswith("'''"):
comment_lines += 1
if stripped.count('"""') == 2 or stripped.count("'''") == 2:
continue
in_multiline_comment = not in_multiline_comment
continue
if in_multiline_comment:
comment_lines += 1
continue
if stripped.startswith('#'):
comment_lines += 1
continue
elif ext in ['.js', '.ts', '.jsx', '.tsx', '.html', '.css', '.java', '.c', '.cpp', '.h', '.m', '.mm', '.swift', '.kt', '.scala', '.go', '.rs', '.rb', '.php', '.pl', '.lua', '.sh']:
if '/*' in stripped and '*/' in stripped:
comment_lines += 1
continue
if '/*' in stripped:
comment_lines += 1
in_multiline_comment = True
continue
if '*/' in stripped:
comment_lines += 1
in_multiline_comment = False
continue
if in_multiline_comment:
comment_lines += 1
continue
if stripped.startswith('//'):
comment_lines += 1
continue
elif ext == '.bat' or ext == '.ps1':
if stripped.startswith('REM') or stripped.startswith('::'):
comment_lines += 1
continue
if not stripped:
blank_lines += 1
else:
code_lines += 1
return total_lines, code_lines, blank_lines, comment_lines
def scan_directory(target_path, is_root=True):
total_files = 0
total_lines = 0
code_lines = 0
blank_lines = 0
comment_lines = 0
file_stats = {}
nested_archives = []
for dirpath, dirnames, filenames in os.walk(target_path):
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
for filename in filenames:
file_path = os.path.join(dirpath, filename)
ext = get_archive_ext(filename)
if ext in ARCHIVE_EXTENSIONS:
nested_archives.append(file_path)
continue
if ext not in EXTENSIONS:
if ext in ('.xlsx', '.xls', '.xlsb', '.xlsm', '.csv'):
total_files += 1
lang = 'excel'
if lang not in file_stats:
file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
file_stats[lang]['files'] += 1
continue
total_files += 1
total, code, blank, comment = count_lines(file_path)
total_lines += total
code_lines += code
blank_lines += blank
comment_lines += comment
lang = EXTENSIONS.get(ext, 'other')
if lang not in file_stats:
file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
file_stats[lang]['files'] += 1
file_stats[lang]['lines'] += total
file_stats[lang]['code'] += code
file_stats[lang]['comment'] += comment
for archive_path in nested_archives:
print(' -> Found nested archive: ' + os.path.basename(archive_path))
nested_dir = extract_archive(archive_path, None)
if nested_dir:
nested_results = scan_directory(nested_dir, False)
if nested_results:
total_files += nested_results['total_files']
total_lines += nested_results['total_lines']
code_lines += nested_results['code_lines']
blank_lines += nested_results['blank_lines']
comment_lines += nested_results['comment_lines']
for lang, stats in nested_results['file_stats'].items():
if lang not in file_stats:
file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
file_stats[lang]['files'] += stats['files']
file_stats[lang]['lines'] += stats['lines']
file_stats[lang]['code'] += stats['code']
file_stats[lang]['comment'] += stats['comment']
try:
shutil.rmtree(nested_dir)
except Exception:
pass
return {
'total_files': total_files,
'total_lines': total_lines,
'code_lines': code_lines,
'blank_lines': blank_lines,
'comment_lines': comment_lines,
'file_stats': file_stats
}
def process_nested_path(archive_path, sub_path, temp_dirs=None):
if temp_dirs is None:
temp_dirs = []
if not sub_path:
return archive_path, ''
parts = sub_path.replace('\\', '/').split('/')
current_path = archive_path
for i, part in enumerate(parts):
current_ext = get_archive_ext(current_path)
if current_ext in ARCHIVE_EXTENSIONS:
print('Extracting archive: ' + current_path)
temp_dir = extract_archive(current_path, None)
if not temp_dir:
return None, None
temp_dirs.append(temp_dir)
target = os.path.join(temp_dir, part)
if not os.path.exists(target):
for root, dirs, files in os.walk(temp_dir):
if part in files:
target = os.path.join(root, part)
break
if part in dirs:
target = os.path.join(root, part)
break
if not os.path.exists(target):
print('Warning: ' + part + ' not found in archive')
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
return None, None
current_path = target
else:
target = os.path.join(current_path, part)
if not os.path.exists(target):
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
return None, None
current_path = target
return current_path, temp_dirs
def scan_path(target_path):
if not os.path.exists(target_path):
archive_path, sub_path = split_archive_path(target_path)
if os.path.exists(archive_path) and sub_path:
archive_ext = archive_path.lower()
if archive_ext.endswith('.zip'):
print('Extracting from archive: ' + archive_path)
print('Subdirectory: ' + sub_path)
scan_base = extract_zip_with_subdir(archive_path, sub_path, None)
if scan_base and os.path.exists(scan_base):
print('Scanning subdirectory: ' + sub_path)
results = scan_directory(scan_base, None)
try:
shutil.rmtree(scan_base)
except Exception:
pass
if results:
results['is_archive'] = True
results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
return results
elif archive_ext.endswith(('.7z', '.rar', '.tar', '.tar.gz', '.tar.bz2', '.tgz', '.gz', '.bz2')):
final_path, temp_dirs = process_nested_path(archive_path, sub_path)
if final_path:
final_ext = get_archive_ext(final_path) if os.path.isfile(final_path) else ''
if final_ext in ARCHIVE_EXTENSIONS:
results = scan_path(final_path)
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
if results:
results['is_archive'] = True
results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
return results
elif os.path.isfile(final_path):
results = scan_path(final_path)
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
if results:
results['is_archive'] = True
results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
return results
elif os.path.isdir(final_path):
print('Scanning subdirectory: ' + sub_path)
results = scan_directory(final_path, None)
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
if results:
results['is_archive'] = True
results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
return results
for td in temp_dirs:
try:
shutil.rmtree(td)
except:
pass
return None
print('Error: Path does not exist - ' + target_path)
return None
ext = get_archive_ext(target_path)
if ext in ARCHIVE_EXTENSIONS:
print('Extracting archive: ' + target_path)
extract_dir = None
nested_dir = extract_archive(target_path, extract_dir)
if nested_dir:
print('Scanning files (including nested archives)...')
results = scan_directory(nested_dir, extract_dir)
try:
shutil.rmtree(nested_dir)
except Exception:
pass
if results:
results['is_archive'] = True
results['archive_name'] = os.path.basename(target_path)
return results
else:
print('Error: Unsupported archive format - ' + target_path)
return None
if os.path.isfile(target_path):
file_ext = get_archive_ext(target_path)
if file_ext not in EXTENSIONS:
print('Error: Unsupported file type - ' + target_path)
return None
total, code, blank, comment = count_lines(target_path)
lang = EXTENSIONS.get(file_ext, 'other')
return {
'total_files': 1,
'total_lines': total,
'code_lines': code,
'blank_lines': blank,
'comment_lines': comment,
'file_stats': {lang: {'files': 1, 'lines': total, 'code': code, 'comment': comment}}
}
if not os.path.isdir(target_path):
print('Error: Path is not a valid file or directory - ' + target_path)
return None
return scan_directory(target_path)
def print_results(results, root_path):
display_name = results.get('archive_name', root_path)
if results.get('is_archive'):
print('\n' + '=' * 60)
print('Archive: ' + display_name)
print('=' * 60)
else:
print('\n' + '=' * 60)
print('Directory: ' + display_name)
print('=' * 60)
print('\nTotal Statistics:')
print(' Total files: ' + str(results['total_files']))
print(' Code lines: ' + str(results['code_lines']))
print(' Comment lines: ' + str(results['comment_lines']))
print(' Blank lines: ' + str(results['blank_lines']))
print(' Total lines: ' + str(results['total_lines']))
if results['file_stats']:
print('\nStatistics by file type:')
print('-' * 65)
print('{:<15} {:>6} {:>10} {:>10} {:>10} {:>10}'.format('Type', 'Files', 'Code', 'Comment', 'Blank', 'Total'))
print('-' * 65)
sorted_stats = sorted(results['file_stats'].items(), key=lambda x: x[1]['code'], reverse=True)
for lang, stats in sorted_stats:
blank = stats['lines'] - stats['code'] - stats['comment']
print('{:<15} {:>6} {:>10} {:>10} {:>10} {:>10}'.format(
lang, stats['files'], stats['code'], stats['comment'], blank, stats['lines']))
print('=' * 60)
def main():
parser = argparse.ArgumentParser(
description='Count files and lines of code in a directory, file, or archive (including nested)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
python counter_code_Ultra.py "C:\\path\\to\\directory"
python counter_code_Ultra.py "C:\\path\\to\\file.py"
python counter_code_Ultra.py "C:\\path\\to\\archive.zip"
python counter_code_Ultra.py "C:\\path\\to\\archive.zip\\subdir"
python counter_code_Ultra.py "C:\\path\\to\\nested.tar.gz"
'''
)
parser.add_argument(
'path',
nargs='?',
default='C:\\Users\\your_username\\Documents\\Vscode',
help='Directory, file, or archive path to scan'
)
args = parser.parse_args()
target_path = url_to_path(args.path)
print('Scanning: ' + target_path)
results = scan_path(target_path)
if results:
print_results(results, target_path)
else:
sys.exit(1)
if __name__ == '__main__':
main()
运行结果图如下: