在信息爆炸的今天,数据就像新时代的石油。但原始数据本身没有价值,只有通过处理和分析,我们才能从中提取出有用的信息。Python正是处理数据的利器,它能帮你把枯燥的数字变成有意义的洞察,让数据真正"开口说话"。
实战代码:销售数据分析系统
基础数据结构操作
# 创建销售数据
sales_data = [
{"product": "笔记本电脑", "price": 5999, "quantity": 3, "category": "电子产品"},
{"product": "智能手机", "price": 3999, "quantity": 5, "category": "电子产品"},
{"product": "书籍", "price": 49, "quantity": 20, "category": "文教用品"},
{"product": "咖啡", "price": 35, "quantity": 15, "category": "食品饮料"},
{"product": "鼠标", "price": 89, "quantity": 8, "category": "电子产品"},
{"product": "笔记本", "price": 15, "quantity": 30, "category": "文教用品"}
]
print("原始销售数据:")
for sale in sales_data:
print(f"{sale['product']}: {sale['quantity']}件, 单价{sale['price']}元")
数据处理函数
def calculate_revenue(sales):
"""计算总销售额"""
total = 0
for sale in sales:
total += sale["price"] * sale["quantity"]
return total
def filter_by_category(sales, category):
"""按类别筛选商品"""
return [sale for sale in sales if sale["category"] == category]
def get_top_products(sales, n=3):
"""获取销售额最高的商品"""
# 计算每个商品的销售额
products_with_revenue = []
for sale in sales:
revenue = sale["price"] * sale["quantity"]
products_with_revenue.append({
"product": sale["product"],
"revenue": revenue,
"category": sale["category"]
})
# 按销售额排序并返回前n名
sorted_products = sorted(products_with_revenue,
key=lambda x: x["revenue"],
reverse=True)
return sorted_products[:n]
def analyze_by_category(sales):
"""按类别分析销售数据"""
category_stats = {}
for sale in sales:
category = sale["category"]
revenue = sale["price"] * sale["quantity"]
if category not in category_stats:
category_stats[category] = {
"total_revenue": 0,
"total_quantity": 0,
"products": []
}
category_stats[category]["total_revenue"] += revenue
category_stats[category]["total_quantity"] += sale["quantity"]
category_stats[category]["products"].append(sale["product"])
return category_stats
数据分析和报告
def generate_sales_report(sales_data):
"""生成销售分析报告"""
print("=" * 50)
print("销售数据分析报告")
print("=" * 50)
# 基本统计
total_revenue = calculate_revenue(sales_data)
print(f"总销售额: {total_revenue}元")
# 按类别分析
category_analysis = analyze_by_category(sales_data)
print("\n按类别分析:")
for category, stats in category_analysis.items():
print(f"{category}:")
print(f" 销售额: {stats['total_revenue']}元")
print(f" 销量: {stats['total_quantity']}件")
print(f" 商品数量: {len(stats['products'])}种")
# 热销商品
top_products = get_top_products(sales_data, 3)
print(f"\n热销商品TOP{len(top_products)}:")
for i, product in enumerate(top_products, 1):
print(f" 第{i}名: {product['product']} - {product['revenue']}元")
# 类别筛选示例
electronic_products = filter_by_category(sales_data, "电子产品")
print(f"\n电子产品销售额: {calculate_revenue(electronic_products)}元")
return {
"total_revenue": total_revenue,
"category_analysis": category_analysis,
"top_products": top_products
}
# 生成报告
report = generate_sales_report(sales_data)
高级数据处理技巧
def find_best_selling_combination(sales):
"""寻找最佳销售组合(简化版)"""
# 按销售额排序
sorted_sales = sorted(sales, key=lambda x: x["price"] * x["quantity"], reverse=True)
# 找出高价值商品和走量商品
high_value = [s for s in sorted_sales if s["price"] > 1000]
volume_products = [s for s in sorted_sales if s["quantity"] > 10]
print("\n销售策略分析:")
print("高价值商品:")
for product in high_value:
print(f" {product['product']} (单价: {product['price']}元)")
print("走量商品:")
for product in volume_products:
print(f" {product['product']} (销量: {product['quantity']}件)")
def calculate_discount_impact(sales, discount_rate=0.1):
"""计算打折对销售额的影响"""
print(f"\n打折影响分析 ({int(discount_rate * 100)}%折扣):")
for sale in sales:
original_revenue = sale["price"] * sale["quantity"]
discounted_price = sale["price"] * (1 - discount_rate)
# 假设打折后销量增加20%
estimated_quantity = int(sale["quantity"] * 1.2)
discounted_revenue = discounted_price * estimated_quantity
revenue_change = discounted_revenue - original_revenue
change_percent = (revenue_change / original_revenue) * 100
print(f"{sale['product']}: {revenue_change:+.1f}元 ({change_percent:+.1f}%)")
# 使用高级分析功能
find_best_selling_combination(sales_data)
calculate_discount_impact(sales_data, 0.1) # 10%折扣
数据导出功能
def export_to_csv(sales_data, filename="sales_report.csv"):
"""将销售数据导出为CSV文件"""
import csv
with open(filename, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
# 写入表头
writer.writerow(["商品名称", "价格", "销量", "类别", "销售额"])
# 写入数据
for sale in sales_data:
revenue = sale["price"] * sale["quantity"]
writer.writerow([
sale["product"],
sale["price"],
sale["quantity"],
sale["category"],
revenue
])
print(f"\n数据已导出到: {filename}")
def create_summary_statistics(sales_data):
"""创建数据摘要统计"""
total_products = len(sales_data)
total_quantity = sum(sale["quantity"] for sale in sales_data)
average_price = sum(sale["price"] for sale in sales_data) / total_products
categories = set(sale["category"] for sale in sales_data)
print(f"\n数据摘要:")
print(f"商品种类: {total_products}种")
print(f"总销量: {total_quantity}件")
print(f"平均价格: {average_price:.1f}元")
print(f"商品类别: {', '.join(categories)}")
# 使用导出和统计功能
export_to_csv(sales_data)
create_summary_statistics(sales_data)
数据处理的核心原则
- 数据质量优先
- 处理缺失值和异常值
- 保持数据的一致性
- 代码可读性
- 使用有意义的变量名
- 将复杂操作拆分成小函数
- 性能考虑
- 选择合适的数据结构
- 避免不必要的循环嵌套
- 使用内置函数和推导式
- 错误处理