Python数据处理实战:让数据开口说话

42 阅读4分钟

在信息爆炸的今天,数据就像新时代的石油。但原始数据本身没有价值,只有通过处理和分析,我们才能从中提取出有用的信息。Python正是处理数据的利器,它能帮你把枯燥的数字变成有意义的洞察,让数据真正"开口说话"。

实战代码:销售数据分析系统

基础数据结构操作

# 创建销售数据
sales_data = [
    {"product": "笔记本电脑", "price": 5999, "quantity": 3, "category": "电子产品"},
    {"product": "智能手机", "price": 3999, "quantity": 5, "category": "电子产品"},
    {"product": "书籍", "price": 49, "quantity": 20, "category": "文教用品"},
    {"product": "咖啡", "price": 35, "quantity": 15, "category": "食品饮料"},
    {"product": "鼠标", "price": 89, "quantity": 8, "category": "电子产品"},
    {"product": "笔记本", "price": 15, "quantity": 30, "category": "文教用品"}
]

print("原始销售数据:")
for sale in sales_data:
    print(f"{sale['product']}: {sale['quantity']}件, 单价{sale['price']}元")

数据处理函数

def calculate_revenue(sales):
    """计算总销售额"""
    total = 0
    for sale in sales:
        total += sale["price"] * sale["quantity"]
    return total

def filter_by_category(sales, category):
    """按类别筛选商品"""
    return [sale for sale in sales if sale["category"] == category]

def get_top_products(sales, n=3):
    """获取销售额最高的商品"""
    # 计算每个商品的销售额
    products_with_revenue = []
    for sale in sales:
        revenue = sale["price"] * sale["quantity"]
        products_with_revenue.append({
            "product": sale["product"],
            "revenue": revenue,
            "category": sale["category"]
        })
    
    # 按销售额排序并返回前n名
    sorted_products = sorted(products_with_revenue, 
                           key=lambda x: x["revenue"], 
                           reverse=True)
    return sorted_products[:n]

def analyze_by_category(sales):
    """按类别分析销售数据"""
    category_stats = {}
    
    for sale in sales:
        category = sale["category"]
        revenue = sale["price"] * sale["quantity"]
        
        if category not in category_stats:
            category_stats[category] = {
                "total_revenue": 0,
                "total_quantity": 0,
                "products": []
            }
        
        category_stats[category]["total_revenue"] += revenue
        category_stats[category]["total_quantity"] += sale["quantity"]
        category_stats[category]["products"].append(sale["product"])
    
    return category_stats

数据分析和报告

def generate_sales_report(sales_data):
    """生成销售分析报告"""
    print("=" * 50)
    print("销售数据分析报告")
    print("=" * 50)
    
    # 基本统计
    total_revenue = calculate_revenue(sales_data)
    print(f"总销售额: {total_revenue}元")
    
    # 按类别分析
    category_analysis = analyze_by_category(sales_data)
    print("\n按类别分析:")
    for category, stats in category_analysis.items():
        print(f"{category}:")
        print(f"  销售额: {stats['total_revenue']}元")
        print(f"  销量: {stats['total_quantity']}件")
        print(f"  商品数量: {len(stats['products'])}种")
    
    # 热销商品
    top_products = get_top_products(sales_data, 3)
    print(f"\n热销商品TOP{len(top_products)}:")
    for i, product in enumerate(top_products, 1):
        print(f"  第{i}名: {product['product']} - {product['revenue']}元")
    
    # 类别筛选示例
    electronic_products = filter_by_category(sales_data, "电子产品")
    print(f"\n电子产品销售额: {calculate_revenue(electronic_products)}元")
    
    return {
        "total_revenue": total_revenue,
        "category_analysis": category_analysis,
        "top_products": top_products
    }

# 生成报告
report = generate_sales_report(sales_data)

高级数据处理技巧

def find_best_selling_combination(sales):
    """寻找最佳销售组合(简化版)"""
    # 按销售额排序
    sorted_sales = sorted(sales, key=lambda x: x["price"] * x["quantity"], reverse=True)
    
    # 找出高价值商品和走量商品
    high_value = [s for s in sorted_sales if s["price"] > 1000]
    volume_products = [s for s in sorted_sales if s["quantity"] > 10]
    
    print("\n销售策略分析:")
    print("高价值商品:")
    for product in high_value:
        print(f"  {product['product']} (单价: {product['price']}元)")
    
    print("走量商品:")
    for product in volume_products:
        print(f"  {product['product']} (销量: {product['quantity']}件)")

def calculate_discount_impact(sales, discount_rate=0.1):
    """计算打折对销售额的影响"""
    print(f"\n打折影响分析 ({int(discount_rate * 100)}%折扣):")
    
    for sale in sales:
        original_revenue = sale["price"] * sale["quantity"]
        discounted_price = sale["price"] * (1 - discount_rate)
        
        # 假设打折后销量增加20%
        estimated_quantity = int(sale["quantity"] * 1.2)
        discounted_revenue = discounted_price * estimated_quantity
        
        revenue_change = discounted_revenue - original_revenue
        change_percent = (revenue_change / original_revenue) * 100
        
        print(f"{sale['product']}: {revenue_change:+.1f}元 ({change_percent:+.1f}%)")

# 使用高级分析功能
find_best_selling_combination(sales_data)
calculate_discount_impact(sales_data, 0.1)  # 10%折扣

数据导出功能

def export_to_csv(sales_data, filename="sales_report.csv"):
    """将销售数据导出为CSV文件"""
    import csv
    
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        
        # 写入表头
        writer.writerow(["商品名称", "价格", "销量", "类别", "销售额"])
        
        # 写入数据
        for sale in sales_data:
            revenue = sale["price"] * sale["quantity"]
            writer.writerow([
                sale["product"],
                sale["price"],
                sale["quantity"],
                sale["category"],
                revenue
            ])
    
    print(f"\n数据已导出到: {filename}")

def create_summary_statistics(sales_data):
    """创建数据摘要统计"""
    total_products = len(sales_data)
    total_quantity = sum(sale["quantity"] for sale in sales_data)
    average_price = sum(sale["price"] for sale in sales_data) / total_products
    
    categories = set(sale["category"] for sale in sales_data)
    
    print(f"\n数据摘要:")
    print(f"商品种类: {total_products}种")
    print(f"总销量: {total_quantity}件")
    print(f"平均价格: {average_price:.1f}元")
    print(f"商品类别: {', '.join(categories)}")

# 使用导出和统计功能
export_to_csv(sales_data)
create_summary_statistics(sales_data)

数据处理的核心原则

  1. 数据质量优先
  • 处理缺失值和异常值
  • 保持数据的一致性
  1. 代码可读性
  • 使用有意义的变量名
  • 将复杂操作拆分成小函数
  1. 性能考虑
  • 选择合适的数据结构
  • 避免不必要的循环嵌套
  • 使用内置函数和推导式
  1. 错误处理