Python一键统计你的本地文件、文件夹或压缩包的代码行数

0 阅读7分钟

Python一键统计你的本地文件夹或压缩包的代码行数,可处理嵌套压缩包、文件夹。包含代码行数、空白行数、注释行数。

# -*- coding: utf-8 -*-
import os
import sys
import argparse
import urllib.parse
import tempfile
import shutil
import zipfile
import tarfile

EXTENSIONS = {
    '.py': 'python',
    '.js': 'javascript',
    '.ts': 'typescript',
    '.jsx': 'javascript',
    '.tsx': 'typescript',
    '.html': 'html',
    '.css': 'css',
    '.sql': 'sql',
    '.sh': 'shell',
    '.bat': 'batch',
    '.ps1': 'powershell',
    '.md': 'markdown',
    '.json': 'json',
    '.yaml': 'yaml',
    '.yml': 'yaml',
    '.xml': 'xml',
    '.txt': 'text',
    '.conf': 'config',
    '.ini': 'config',
    '.env': 'env',
    '.db': 'database',
    '.c': 'c',
    '.h': 'c',
    '.cpp': 'cpp',
    '.cc': 'cpp',
    '.cxx': 'cpp',
    '.hpp': 'cpp',
    '.java': 'java',
    '.go': 'go',
    '.rs': 'rust',
    '.swift': 'swift',
    '.kt': 'kotlin',
    '.scala': 'scala',
    '.rb': 'ruby',
    '.php': 'php',
    '.pl': 'perl',
    '.lua': 'lua',
    '.r': 'r',
    '.m': 'objective-c',
    '.mm': 'objective-c',
}

ARCHIVE_EXTENSIONS = {'.zip', '.tar', '.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.gz', '.bz2', '.7z', '.rar'}

SKIP_DIRS = {'__pycache__', '.git', '.venv', 'venv', 'node_modules', '.pytest_cache', '.mypy_cache', 'dist', 'build', '.egg-info'}

def url_to_path(url_or_path):
    path = url_or_path.strip()
    if path.startswith('file://'):
        path = path.replace('file:///', '').replace('file://', '')
    elif '://' in path:
        parsed = urllib.parse.urlparse(path)
        path = parsed.path if parsed.path else parsed.netloc
    path = path.replace('/', os.sep).replace('\\', os.sep)
    if path.startswith('\\') and ':' not in path:
        path = path[1:]
    return path.rstrip('\\')

def split_archive_path(path):
    if os.path.exists(path):
        return path, ''
    
    lower_path = path.lower()
    all_extensions = ['.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.zip', '.7z', '.rar', '.tar', '.gz', '.bz2']
    
    all_candidates = []
    
    for ext in all_extensions:
        ext_lower = ext.lower()
        start = 0
        while True:
            idx = lower_path.find(ext_lower, start)
            if idx == -1:
                break
            archive_path = path[:idx + len(ext)]
            sub_path = path[idx + len(ext):].lstrip('\\').lstrip('/')
            all_candidates.append((idx, archive_path, sub_path))
            start = idx + 1
    
    all_candidates.sort(key=lambda x: x[0])
    
    for idx, archive_path, sub_path in all_candidates:
        if os.path.exists(archive_path):
            return archive_path, sub_path
    
    return path, ''

def get_archive_ext(filename):
    name = filename.lower()
    if name.endswith('.tar.gz'):
        return '.tar.gz'
    elif name.endswith('.tar.bz2'):
        return '.tar.bz2'
    elif name.endswith('.tar.zst'):
        return '.tar.zst'
    for ext in ['.tar.gz', '.tar.bz2', '.tar.zst', '.tgz', '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar']:
        if name.endswith(ext):
            return ext
    return os.path.splitext(filename)[1].lower()

def extract_archive(archive_path, extract_dir):
    ext = get_archive_ext(archive_path)
    base_name = os.path.splitext(archive_path)[0]
    if ext in ['.tar.gz', '.tar.bz2', '.tar.zst']:
        base_name = os.path.splitext(base_name)[0]
    
    temp_dir = tempfile.mkdtemp()
    
    try:
        if ext == '.zip':
            with zipfile.ZipFile(archive_path, 'r') as zip_ref:
                zip_ref.extractall(temp_dir)
                return temp_dir
        elif ext == '.tar':
            with tarfile.open(archive_path, 'r') as tar:
                tar.extractall(temp_dir, filter='data')
                return temp_dir
        elif ext == '.tar.gz' or ext == '.tgz':
            with tarfile.open(archive_path, 'r:gz') as tar:
                tar.extractall(temp_dir, filter='data')
                return temp_dir
        elif ext == '.tar.bz2':
            with tarfile.open(archive_path, 'r:bz2') as tar:
                tar.extractall(temp_dir, filter='data')
                return temp_dir
        elif ext == '.tar.zst':
            try:
                import zstandard
                with tarfile.open(archive_path, 'r:bz2') as tar:
                    tar.extractall(temp_dir, filter='data')
                    return temp_dir
            except ImportError:
                pass
        elif ext == '.gz':
            import gzip
            output_file = os.path.join(temp_dir, os.path.basename(base_name))
            with gzip.open(archive_path, 'rb') as f_in:
                with open(output_file, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
            return temp_dir
        elif ext == '.bz2':
            import bz2
            output_file = os.path.join(temp_dir, os.path.basename(base_name))
            with bz2.open(archive_path, 'rb') as f_in:
                with open(output_file, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
            return temp_dir
        elif ext == '.7z':
            import py7zr
            with py7zr.SevenZipFile(archive_path, 'r') as sz:
                sz.extractall(temp_dir)
            return temp_dir
        elif ext == '.rar':
            try:
                import rarfile
                with rarfile.RarFile(archive_path, 'r') as rf:
                    rf.extractall(temp_dir)
                return temp_dir
            except ImportError:
                try:
                    import subprocess
                    unrar_path = r'C:\Program Files\WinRAR\unrar.exe'
                    result = subprocess.run([unrar_path, 'x', '-o+', archive_path, temp_dir], capture_output=True)
                    if result.returncode == 0:
                        return temp_dir
                except Exception:
                    pass
    except Exception as e:
        print('Warning: Failed to extract ' + archive_path + ': ' + str(e))
    
    if temp_dir and os.path.exists(temp_dir):
        try:
            shutil.rmtree(temp_dir)
        except Exception:
            pass
    return None

def extract_zip_with_subdir(zip_path, sub_path, extract_dir):
    temp_dir = tempfile.mkdtemp()
    
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            if sub_path:
                for member in zip_ref.namelist():
                    if member.startswith(sub_path + '/') or member == sub_path:
                        zip_ref.extract(member, temp_dir)
                new_base = temp_dir
                if sub_path:
                    actual_sub = os.path.join(temp_dir, sub_path)
                    if os.path.exists(actual_sub):
                        new_base = actual_sub
                return new_base
            else:
                zip_ref.extractall(temp_dir)
                return temp_dir
    except Exception as e:
        print('Warning: Failed to extract ' + zip_path + ' subdir ' + sub_path + ': ' + str(e))
    
    if temp_dir and os.path.exists(temp_dir):
        try:
            shutil.rmtree(temp_dir)
        except Exception:
            pass
    
    return None

def count_lines(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()
    except Exception:
        try:
            with open(file_path, 'r', encoding='gbk', errors='ignore') as f:
                lines = f.readlines()
        except Exception:
            return 0, 0, 0, 0
    
    total_lines = len(lines)
    code_lines = 0
    blank_lines = 0
    comment_lines = 0
    
    in_multiline_comment = False
    ext = os.path.splitext(file_path)[1].lower()
    
    for line in lines:
        stripped = line.strip()
        
        if ext == '.py':
            if stripped.startswith('"""') or stripped.startswith("'''"):
                comment_lines += 1
                if stripped.count('"""') == 2 or stripped.count("'''") == 2:
                    continue
                in_multiline_comment = not in_multiline_comment
                continue
            if in_multiline_comment:
                comment_lines += 1
                continue
            if stripped.startswith('#'):
                comment_lines += 1
                continue
        elif ext in ['.js', '.ts', '.jsx', '.tsx', '.html', '.css', '.java', '.c', '.cpp', '.h', '.m', '.mm', '.swift', '.kt', '.scala', '.go', '.rs', '.rb', '.php', '.pl', '.lua', '.sh']:
            if '/*' in stripped and '*/' in stripped:
                comment_lines += 1
                continue
            if '/*' in stripped:
                comment_lines += 1
                in_multiline_comment = True
                continue
            if '*/' in stripped:
                comment_lines += 1
                in_multiline_comment = False
                continue
            if in_multiline_comment:
                comment_lines += 1
                continue
            if stripped.startswith('//'):
                comment_lines += 1
                continue
        elif ext == '.bat' or ext == '.ps1':
            if stripped.startswith('REM') or stripped.startswith('::'):
                comment_lines += 1
                continue
        
        if not stripped:
            blank_lines += 1
        else:
            code_lines += 1
    
    return total_lines, code_lines, blank_lines, comment_lines

def scan_directory(target_path, is_root=True):
    total_files = 0
    total_lines = 0
    code_lines = 0
    blank_lines = 0
    comment_lines = 0
    file_stats = {}
    
    nested_archives = []
    
    for dirpath, dirnames, filenames in os.walk(target_path):
        dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
        
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            ext = get_archive_ext(filename)
            
            if ext in ARCHIVE_EXTENSIONS:
                nested_archives.append(file_path)
                continue

            if ext not in EXTENSIONS:
                if ext in ('.xlsx', '.xls', '.xlsb', '.xlsm', '.csv'):
                    total_files += 1
                    lang = 'excel'
                    if lang not in file_stats:
                        file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
                    file_stats[lang]['files'] += 1
                continue
            
            total_files += 1
            total, code, blank, comment = count_lines(file_path)
            total_lines += total
            code_lines += code
            blank_lines += blank
            comment_lines += comment
            
            lang = EXTENSIONS.get(ext, 'other')
            if lang not in file_stats:
                file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
            file_stats[lang]['files'] += 1
            file_stats[lang]['lines'] += total
            file_stats[lang]['code'] += code
            file_stats[lang]['comment'] += comment
    
    for archive_path in nested_archives:
        print('  -> Found nested archive: ' + os.path.basename(archive_path))
        nested_dir = extract_archive(archive_path, None)
        if nested_dir:
            nested_results = scan_directory(nested_dir, False)
            if nested_results:
                total_files += nested_results['total_files']
                total_lines += nested_results['total_lines']
                code_lines += nested_results['code_lines']
                blank_lines += nested_results['blank_lines']
                comment_lines += nested_results['comment_lines']
                for lang, stats in nested_results['file_stats'].items():
                    if lang not in file_stats:
                        file_stats[lang] = {'files': 0, 'lines': 0, 'code': 0, 'comment': 0}
                    file_stats[lang]['files'] += stats['files']
                    file_stats[lang]['lines'] += stats['lines']
                    file_stats[lang]['code'] += stats['code']
                    file_stats[lang]['comment'] += stats['comment']
            try:
                shutil.rmtree(nested_dir)
            except Exception:
                pass
    
    return {
        'total_files': total_files,
        'total_lines': total_lines,
        'code_lines': code_lines,
        'blank_lines': blank_lines,
        'comment_lines': comment_lines,
        'file_stats': file_stats
    }

def process_nested_path(archive_path, sub_path, temp_dirs=None):
    if temp_dirs is None:
        temp_dirs = []
    
    if not sub_path:
        return archive_path, ''
    
    parts = sub_path.replace('\\', '/').split('/')
    current_path = archive_path
    
    for i, part in enumerate(parts):
        current_ext = get_archive_ext(current_path)
        
        if current_ext in ARCHIVE_EXTENSIONS:
            print('Extracting archive: ' + current_path)
            temp_dir = extract_archive(current_path, None)
            if not temp_dir:
                return None, None
            temp_dirs.append(temp_dir)
            
            target = os.path.join(temp_dir, part)
            if not os.path.exists(target):
                for root, dirs, files in os.walk(temp_dir):
                    if part in files:
                        target = os.path.join(root, part)
                        break
                    if part in dirs:
                        target = os.path.join(root, part)
                        break
            
            if not os.path.exists(target):
                print('Warning: ' + part + ' not found in archive')
                for td in temp_dirs:
                    try:
                        shutil.rmtree(td)
                    except:
                        pass
                return None, None
            
            current_path = target
        else:
            target = os.path.join(current_path, part)
            if not os.path.exists(target):
                for td in temp_dirs:
                    try:
                        shutil.rmtree(td)
                    except:
                        pass
                return None, None
            current_path = target
    
    return current_path, temp_dirs

def scan_path(target_path):
    if not os.path.exists(target_path):
        archive_path, sub_path = split_archive_path(target_path)
        if os.path.exists(archive_path) and sub_path:
            archive_ext = archive_path.lower()
            if archive_ext.endswith('.zip'):
                print('Extracting from archive: ' + archive_path)
                print('Subdirectory: ' + sub_path)
                scan_base = extract_zip_with_subdir(archive_path, sub_path, None)
                if scan_base and os.path.exists(scan_base):
                    print('Scanning subdirectory: ' + sub_path)
                    results = scan_directory(scan_base, None)
                    try:
                        shutil.rmtree(scan_base)
                    except Exception:
                        pass
                    if results:
                        results['is_archive'] = True
                        results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
                    return results
            elif archive_ext.endswith(('.7z', '.rar', '.tar', '.tar.gz', '.tar.bz2', '.tgz', '.gz', '.bz2')):
                final_path, temp_dirs = process_nested_path(archive_path, sub_path)
                if final_path:
                    final_ext = get_archive_ext(final_path) if os.path.isfile(final_path) else ''
                    if final_ext in ARCHIVE_EXTENSIONS:
                        results = scan_path(final_path)
                        for td in temp_dirs:
                            try:
                                shutil.rmtree(td)
                            except:
                                pass
                        if results:
                            results['is_archive'] = True
                            results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
                        return results
                    elif os.path.isfile(final_path):
                        results = scan_path(final_path)
                        for td in temp_dirs:
                            try:
                                shutil.rmtree(td)
                            except:
                                pass
                        if results:
                            results['is_archive'] = True
                            results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
                        return results
                    elif os.path.isdir(final_path):
                        print('Scanning subdirectory: ' + sub_path)
                        results = scan_directory(final_path, None)
                        for td in temp_dirs:
                            try:
                                shutil.rmtree(td)
                            except:
                                pass
                        if results:
                            results['is_archive'] = True
                            results['archive_name'] = os.path.basename(archive_path) + ' / ' + sub_path
                        return results
                for td in temp_dirs:
                    try:
                        shutil.rmtree(td)
                    except:
                        pass
                return None
        print('Error: Path does not exist - ' + target_path)
        return None
    
    ext = get_archive_ext(target_path)
    
    if ext in ARCHIVE_EXTENSIONS:
        print('Extracting archive: ' + target_path)
        extract_dir = None
        nested_dir = extract_archive(target_path, extract_dir)
        if nested_dir:
            print('Scanning files (including nested archives)...')
            results = scan_directory(nested_dir, extract_dir)
            try:
                shutil.rmtree(nested_dir)
            except Exception:
                pass
            if results:
                results['is_archive'] = True
                results['archive_name'] = os.path.basename(target_path)
            return results
        else:
            print('Error: Unsupported archive format - ' + target_path)
            return None
    
    if os.path.isfile(target_path):
        file_ext = get_archive_ext(target_path)
        if file_ext not in EXTENSIONS:
            print('Error: Unsupported file type - ' + target_path)
            return None
        
        total, code, blank, comment = count_lines(target_path)
        lang = EXTENSIONS.get(file_ext, 'other')
        
        return {
            'total_files': 1,
            'total_lines': total,
            'code_lines': code,
            'blank_lines': blank,
            'comment_lines': comment,
            'file_stats': {lang: {'files': 1, 'lines': total, 'code': code, 'comment': comment}}
        }
    
    if not os.path.isdir(target_path):
        print('Error: Path is not a valid file or directory - ' + target_path)
        return None
    
    return scan_directory(target_path)

def print_results(results, root_path):
    display_name = results.get('archive_name', root_path)
    if results.get('is_archive'):
        print('\n' + '=' * 60)
        print('Archive: ' + display_name)
        print('=' * 60)
    else:
        print('\n' + '=' * 60)
        print('Directory: ' + display_name)
        print('=' * 60)
    
    print('\nTotal Statistics:')
    print('   Total files:     ' + str(results['total_files']))
    print('   Code lines:     ' + str(results['code_lines']))
    print('   Comment lines:  ' + str(results['comment_lines']))
    print('   Blank lines:    ' + str(results['blank_lines']))
    print('   Total lines:    ' + str(results['total_lines']))
    
    if results['file_stats']:
        print('\nStatistics by file type:')
        print('-' * 65)
        print('{:<15} {:>6} {:>10} {:>10} {:>10} {:>10}'.format('Type', 'Files', 'Code', 'Comment', 'Blank', 'Total'))
        print('-' * 65)
        
        sorted_stats = sorted(results['file_stats'].items(), key=lambda x: x[1]['code'], reverse=True)
        for lang, stats in sorted_stats:
            blank = stats['lines'] - stats['code'] - stats['comment']
            print('{:<15} {:>6} {:>10} {:>10} {:>10} {:>10}'.format(
                lang, stats['files'], stats['code'], stats['comment'], blank, stats['lines']))
    
    print('=' * 60)

def main():
    parser = argparse.ArgumentParser(
        description='Count files and lines of code in a directory, file, or archive (including nested)',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog='''
Examples:
  python counter_code_Ultra.py "C:\\path\\to\\directory"
  python counter_code_Ultra.py  "C:\\path\\to\\file.py"
  python counter_code_Ultra.py  "C:\\path\\to\\archive.zip"
  python counter_code_Ultra.py  "C:\\path\\to\\archive.zip\\subdir"
  python counter_code_Ultra.py "C:\\path\\to\\nested.tar.gz"
        '''
    )
    
    parser.add_argument(
        'path', 
        nargs='?', 
        default='C:\\Users\\your_username\\Documents\\Vscode',
        help='Directory, file, or archive path to scan'
    )
    
    args = parser.parse_args()
    
    target_path = url_to_path(args.path)
    
    print('Scanning: ' + target_path)
    
    results = scan_path(target_path)
    
    if results:
        print_results(results, target_path)
    else:
        sys.exit(1)

if __name__ == '__main__':
    main()

运行结果图如下:

Snipaste_2026-04-22_23-23-54.png