功能介绍
这是一个强大的数据可视化和图表生成工具,用于将复杂的数据集转换为直观的图表和可视化图形。该工具具备以下核心功能:
-
多类型图表支持:
- 折线图、柱状图、饼图、散点图
- 热力图、箱线图、直方图
- 3D图表和地理图表
- 自定义图表组合和布局
-
智能数据处理:
- 自动数据类型识别和转换
- 缺失值处理和异常值检测
- 数据聚合和分组统计
- 时间序列数据处理
-
灵活样式配置:
- 主题和颜色方案自定义
- 字体、标签、标题设置
- 图例、网格、注释添加
- 导出格式和分辨率控制
-
批量处理能力:
- 批量图表生成
- 模板化配置支持
- 自动化报告生成
- 并行处理优化
-
交互式可视化:
- 动态图表交互
- 缩放和平移功能
- 数据点悬停信息
- 图表导出和分享
场景应用
1. 业务数据分析
- 销售数据趋势可视化
- 用户行为分析图表
- 财务报表图形化展示
- KPI指标监控面板
2. 科学研究
- 实验数据可视化分析
- 统计结果图表展示
- 研究报告图形支持
- 数据模式识别和发现
3. 教育培训
- 教学数据图表制作
- 学生表现可视化
- 课程统计分析展示
- 教育研究报告图表
4. 媒体传播
- 新闻数据图表制作
- 信息图设计和生成
- 社交媒体内容可视化
- 报告和演示图表支持
报错处理
1. 数据读取异常
try:
data = pd.read_csv(file_path)
except FileNotFoundError:
logger.error(f"数据文件不存在: {file_path}")
raise DataVisualizationError(f"文件未找到: {file_path}")
except pd.errors.ParserError as e:
logger.error(f"数据解析错误: {str(e)}")
raise DataVisualizationError(f"数据格式错误: {str(e)}")
except UnicodeDecodeError:
# 尝试不同的编码格式
try:
data = pd.read_csv(file_path, encoding='gbk')
except Exception:
data = pd.read_csv(file_path, encoding='latin1')
except Exception as e:
logger.error(f"读取数据文件异常: {str(e)}")
raise DataVisualizationError(f"读取文件失败: {str(e)}")
2. 图表生成异常
try:
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(x_data, y_data)
plt.savefig(output_path, dpi=300, bbox_inches='tight')
except ValueError as e:
logger.error(f"数据值错误: {str(e)}")
raise ChartGenerationError(f"图表数据错误: {str(e)}")
except RuntimeError as e:
logger.error(f"图表渲染错误: {str(e)}")
raise ChartGenerationError(f"图表渲染失败: {str(e)}")
except Exception as e:
logger.error(f"图表生成异常: {str(e)}")
raise ChartGenerationError(f"生成图表失败: {str(e)}")
3. 配置文件异常
try:
with open(config_file, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
validate_config(config)
except yaml.YAMLError as e:
logger.error(f"配置文件YAML格式错误: {str(e)}")
raise ConfigError(f"配置文件格式无效: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"配置文件JSON格式错误: {str(e)}")
raise ConfigError(f"配置文件格式无效: {str(e)}")
except ValidationError as e:
logger.error(f"配置验证失败: {str(e)}")
raise ConfigError(f"配置无效: {str(e)}")
except Exception as e:
logger.error(f"加载配置异常: {str(e)}")
raise ConfigError(f"加载配置失败: {str(e)}")
4. 导出文件异常
try:
plt.savefig(output_path, **save_params)
except PermissionError:
logger.error(f"无权限写入文件: {output_path}")
raise ExportError(f"文件写入被拒绝: {output_path}")
except OSError as e:
logger.error(f"文件系统错误: {str(e)}")
raise ExportError(f"文件保存失败: {str(e)}")
except Exception as e:
logger.error(f"导出文件异常: {str(e)}")
raise ExportError(f"导出失败: {str(e)}")
代码实现
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
数据可视化和图表生成工具
功能:将数据转换为直观的图表和可视化图形
作者:Cline
版本:1.0
"""
import argparse
import sys
import json
import yaml
import logging
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from typing import Dict, List, Any, Optional
import warnings
warnings.filterwarnings('ignore')
# 配置中文字体支持
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('chart_generator.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
class DataVisualizationError(Exception):
"""数据可视化异常类"""
pass
class ChartGenerationError(Exception):
"""图表生成异常类"""
pass
class ConfigError(Exception):
"""配置异常类"""
pass
class ExportError(Exception):
"""导出异常类"""
pass
class DataProcessor:
"""数据处理器"""
@staticmethod
def load_data(file_path: str, **kwargs) -> pd.DataFrame:
"""加载数据文件"""
try:
if file_path.endswith('.csv'):
data = pd.read_csv(file_path, **kwargs)
elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
data = pd.read_excel(file_path, **kwargs)
elif file_path.endswith('.json'):
data = pd.read_json(file_path, **kwargs)
elif file_path.endswith('.tsv'):
data = pd.read_csv(file_path, sep='\t', **kwargs)
else:
raise DataVisualizationError(f"不支持的文件格式: {file_path}")
logger.info(f"成功加载数据文件: {file_path} ({len(data)} 行, {len(data.columns)} 列)")
return data
except Exception as e:
logger.error(f"加载数据文件失败: {str(e)}")
raise DataVisualizationError(f"加载数据失败: {str(e)}")
@staticmethod
def preprocess_data(data: pd.DataFrame, config: Dict) -> pd.DataFrame:
"""预处理数据"""
try:
processed_data = data.copy()
# 处理缺失值
missing_strategy = config.get('missing_values', 'drop')
if missing_strategy == 'drop':
processed_data = processed_data.dropna()
elif missing_strategy == 'fill_mean':
processed_data = processed_data.fillna(processed_data.mean())
elif missing_strategy == 'fill_median':
processed_data = processed_data.fillna(processed_data.median())
elif missing_strategy == 'fill_zero':
processed_data = processed_data.fillna(0)
# 数据类型转换
type_conversions = config.get('type_conversions', {})
for column, dtype in type_conversions.items():
if column in processed_data.columns:
processed_data[column] = processed_data[column].astype(dtype)
# 数据筛选
filters = config.get('filters', [])
for filter_config in filters:
column = filter_config.get('column')
operator = filter_config.get('operator')
value = filter_config.get('value')
if column in processed_data.columns:
if operator == 'eq':
processed_data = processed_data[processed_data[column] == value]
elif operator == 'ne':
processed_data = processed_data[processed_data[column] != value]
elif operator == 'gt':
processed_data = processed_data[processed_data[column] > value]
elif operator == 'lt':
processed_data = processed_data[processed_data[column] < value]
elif operator == 'ge':
processed_data = processed_data[processed_data[column] >= value]
elif operator == 'le':
processed_data = processed_data[processed_data[column] <= value]
elif operator == 'in':
processed_data = processed_data[processed_data[column].isin(value)]
logger.info(f"数据预处理完成: {len(processed_data)} 行, {len(processed_data.columns)} 列")
return processed_data
except Exception as e:
logger.error(f"数据预处理失败: {str(e)}")
raise DataVisualizationError(f"数据预处理失败: {str(e)}")
class ChartGenerator:
"""图表生成器"""
def __init__(self, config: Dict = None):
self.config = config or {}
self.theme = self.config.get('theme', 'default')
self.setup_theme()
def setup_theme(self):
"""设置图表主题"""
if self.theme == 'dark':
plt.style.use('dark_background')
elif self.theme == 'ggplot':
plt.style.use('ggplot')
elif self.theme == 'seaborn':
sns.set_style("whitegrid")
else:
plt.style.use('default')
def generate_line_chart(self, data: pd.DataFrame, x_column: str, y_columns: List[str],
title: str = None, output_path: str = None) -> str:
"""生成折线图"""
try:
fig, ax = plt.subplots(figsize=(12, 8))
for y_column in y_columns:
if y_column in data.columns and x_column in data.columns:
ax.plot(data[x_column], data[y_column], marker='o', label=y_column, linewidth=2)
ax.set_xlabel(x_column)
ax.set_ylabel(', '.join(y_columns))
ax.set_title(title or f'折线图 - {", ".join(y_columns)} vs {x_column}')
ax.legend()
ax.grid(True, alpha=0.3)
# 自动旋转x轴标签
plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
# 保存图表
if not output_path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = f'line_chart_{timestamp}.png'
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
logger.info(f"折线图已生成: {output_path}")
return output_path
except Exception as e:
logger.error(f"生成折线图失败: {str(e)}")
raise ChartGenerationError(f"生成折线图失败: {str(e)}")
def generate_bar_chart(self, data: pd.DataFrame, x_column: str, y_column: str,
title: str = None, output_path: str = None, horizontal: bool = False) -> str:
"""生成柱状图"""
try:
fig, ax = plt.subplots(figsize=(12, 8))
if horizontal:
bars = ax.barh(data[x_column], data[y_column], color='skyblue')
ax.set_xlabel(y_column)
ax.set_ylabel(x_column)
else:
bars = ax.bar(data[x_column], data[y_column], color='lightcoral')
ax.set_xlabel(x_column)
ax.set_ylabel(y_column)
ax.set_title(title or f'柱状图 - {y_column} by {x_column}')
# 添加数值标签
for bar in bars:
if horizontal:
width = bar.get_width()
ax.text(width, bar.get_y() + bar.get_height()/2,
f'{width:.1f}', ha='left', va='center')
else:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2, height,
f'{height:.1f}', ha='center', va='bottom')
# 自动旋转x轴标签
if not horizontal:
plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
# 保存图表
if not output_path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = f'bar_chart_{timestamp}.png'
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
logger.info(f"柱状图已生成: {output_path}")
return output_path
except Exception as e:
logger.error(f"生成柱状图失败: {str(e)}")
raise ChartGenerationError(f"生成柱状图失败: {str(e)}")
def generate_pie_chart(self, data: pd.DataFrame, labels_column: str, values_column: str,
title: str = None, output_path: str = None) -> str:
"""生成饼图"""
try:
fig, ax = plt.subplots(figsize=(10, 10))
# 过滤掉零值
filtered_data = data[data[values_column] > 0]
wedges, texts, autotexts = ax.pie(
filtered_data[values_column],
labels=filtered_data[labels_column],
autopct='%1.1f%%',
startangle=90,
colors=plt.cm.Set3(np.linspace(0, 1, len(filtered_data)))
)
ax.set_title(title or f'饼图 - {values_column} distribution')
# 美化文本
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('bold')
# 保存图表
if not output_path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = f'pie_chart_{timestamp}.png'
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
logger.info(f"饼图已生成: {output_path}")
return output_path
except Exception as e:
logger.error(f"生成饼图失败: {str(e)}")
raise ChartGenerationError(f"生成饼图失败: {str(e)}")
def generate_scatter_plot(self, data: pd.DataFrame, x_column: str, y_column: str,
title: str = None, output_path: str = None, color_column: str = None) -> str:
"""生成散点图"""
try:
fig, ax = plt.subplots(figsize=(12, 8))
if color_column and color_column in data.columns:
scatter = ax.scatter(data[x_column], data[y_column], c=data[color_column],
cmap='viridis', alpha=0.7)
plt.colorbar(scatter, ax=ax, label=color_column)
else:
ax.scatter(data[x_column], data[y_column], alpha=0.7, color='blue')
ax.set_xlabel(x_column)
ax.set_ylabel(y_column)
ax.set_title(title or f'散点图 - {y_column} vs {x_column}')
ax.grid(True, alpha=0.3)
# 添加趋势线
if x_column in data.columns and y_column in data.columns:
z = np.polyfit(data[x_column], data[y_column], 1)
p = np.poly1d(z)
ax.plot(data[x_column], p(data[x_column]), "r--", alpha=0.8)
# 保存图表
if not output_path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = f'scatter_plot_{timestamp}.png'
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
logger.info(f"散点图已生成: {output_path}")
return output_path
except Exception as e:
logger.error(f"生成散点图失败: {str(e)}")
raise ChartGenerationError(f"生成散点图失败: {str(e)}")
def generate_heatmap(self, data: pd.DataFrame, x_column: str, y_column: str,
value_column: str, title: str = None, output_path: str = None) -> str:
"""生成热力图"""
try:
# 创建透视表
pivot_table = data.pivot_table(
values=value_column,
index=y_column,
columns=x_column,
aggfunc='mean'
)
fig, ax = plt.subplots(figsize=(12, 10))
heatmap = sns.heatmap(
pivot_table,
annot=True,
fmt='.1f',
cmap='coolwarm',
ax=ax,
cbar_kws={'label': value_column}
)
ax.set_title(title or f'热力图 - {value_column} by {x_column} and {y_column}')
plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
plt.setp(ax.get_yticklabels(), rotation=0)
# 保存图表
if not output_path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = f'heatmap_{timestamp}.png'
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
logger.info(f"热力图已生成: {output_path}")
return output_path
except Exception as e:
logger.error(f"生成热力图失败: {str(e)}")
raise ChartGenerationError(f"生成热力图失败: {str(e)}")
class VisualizationManager:
"""可视化管理器"""
def __init__(self, config_file: str = None):
self.config_file = config_file
self.config = {}
self.chart_generator = None
# 加载配置
self.load_config()
# 初始化图表生成器
self.chart_generator = ChartGenerator(self.config.get('chart_settings', {}))
def load_config(self):
"""加载配置文件"""
if not self.config_file or not os.path.exists(self.config_file):
logger.info("未指定配置文件或文件不存在,使用默认配置")
self.config = self._create_default_config()
return
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
if self.config_file.endswith('.yaml') or self.config_file.endswith('.yml'):
self.config = yaml.safe_load(f)
else:
self.config = json.load(f)
logger.info(f"成功加载配置文件: {self.config_file}")
except Exception as e:
logger.error(f"加载配置文件失败: {str(e)}")
raise ConfigError(f"配置加载失败: {str(e)}")
def _create_default_config(self) -> Dict:
"""创建默认配置"""
return {
"data_settings": {
"missing_values": "drop",
"type_conversions": {}
},
"chart_settings": {
"theme": "default",
"figure_size": [12, 8],
"dpi": 300
},
"export_settings": {
"formats": ["png", "pdf", "svg"],
"output_dir": "./charts"
}
}
def generate_charts_from_config(self, data_file: str) -> List[str]:
"""根据配置生成图表"""
try:
# 加载数据
data = DataProcessor.load_data(data_file)
# 预处理数据
data = DataProcessor.preprocess_data(data, self.config.get('data_settings', {}))
# 生成图表
generated_charts = []
chart_configs = self.config.get('charts', [])
for chart_config in chart_configs:
chart_type = chart_config.get('type')
output_file = chart_config.get('output_file')
try:
if chart_type == 'line':
chart_path = self.chart_generator.generate_line_chart(
data=data,
x_column=chart_config.get('x_column'),
y_columns=chart_config.get('y_columns', []),
title=chart_config.get('title'),
output_path=output_file
)
generated_charts.append(chart_path)
elif chart_type == 'bar':
chart_path = self.chart_generator.generate_bar_chart(
data=data,
x_column=chart_config.get('x_column'),
y_column=chart_config.get('y_column'),
title=chart_config.get('title'),
output_path=output_file,
horizontal=chart_config.get('horizontal', False)
)
generated_charts.append(chart_path)
elif chart_type == 'pie':
chart_path = self.chart_generator.generate_pie_chart(
data=data,
labels_column=chart_config.get('labels_column'),
values_column=chart_config.get('values_column'),
title=chart_config.get('title'),
output_path=output_file
)
generated_charts.append(chart_path)
elif chart_type == 'scatter':
chart_path = self.chart_generator.generate_scatter_plot(
data=data,
x_column=chart_config.get('x_column'),
y_column=chart_config.get('y_column'),
title=chart_config.get('title'),
output_path=output_file,
color_column=chart_config.get('color_column')
)
generated_charts.append(chart_path)
elif chart_type == 'heatmap':
chart_path = self.chart_generator.generate_heatmap(
data=data,
x_column=chart_config.get('x_column'),
y_column=chart_config.get('y_column'),
value_column=chart_config.get('value_column'),
title=chart_config.get('title'),
output_path=output_file
)
generated_charts.append(chart_path)
except Exception as e:
logger.error(f"生成图表失败: {str(e)}")
continue
logger.info(f"图表生成完成,共生成 {len(generated_charts)} 个图表")
return generated_charts
except Exception as e:
logger.error(f"根据配置生成图表失败: {str(e)}")
raise DataVisualizationError(f"图表生成失败: {str(e)}")
def batch_generate_charts(self, data_files: List[str], config_dir: str) -> Dict[str, List[str]]:
"""批量生成图表"""
results = {}
for data_file in data_files:
try:
# 查找对应的配置文件
config_file = os.path.join(config_dir, f"{os.path.splitext(os.path.basename(data_file))[0]}.json")
if not os.path.exists(config_file):
config_file = os.path.join(config_dir, f"{os.path.splitext(os.path.basename(data_file))[0]}.yaml")
if os.path.exists(config_file):
# 使用特定配置文件
manager = VisualizationManager(config_file)
charts = manager.generate_charts_from_config(data_file)
results[data_file] = charts
else:
# 使用默认配置
charts = self.generate_charts_from_config(data_file)
results[data_file] = charts
except Exception as e:
logger.error(f"处理文件 {data_file} 失败: {str(e)}")
results[data_file] = []
return results
def create_sample_config():
"""创建示例配置文件"""
sample_config = {
"data_settings": {
"missing_values": "drop",
"type_conversions": {
"date": "datetime64[ns]",
"category": "category"
},
"filters": [
{
"column": "value",
"operator": "gt",
"value": 0
}
]
},
"chart_settings": {
"theme": "default",
"figure_size": [12, 8],
"dpi": 300
},
"export_settings": {
"formats": ["png", "pdf", "svg"],
"output_dir": "./charts"
},
"charts": [
{
"type": "line",
"x_column": "date",
"y_columns": ["sales", "profit"],
"title": "销售和利润趋势图",
"output_file": "sales_trend.png"
},
{
"type": "bar",
"x_column": "product",
"y_column": "quantity",
"title": "产品销量柱状图",
"output_file": "product_sales.png"
},
{
"type": "pie",
"labels_column": "category",
"values_column": "revenue",
"title": "收入分类饼图",
"output_file": "revenue_distribution.png"
},
{
"type": "scatter",
"x_column": "advertising",
"y_column": "sales",
"color_column": "region",
"title": "广告投入与销售额散点图",
"output_file": "ad_sales_scatter.png"
},
{
"type": "heatmap",
"x_column": "month",
"y_column": "product",
"value_column": "sales",
"title": "月度产品销售热力图",
"output_file": "monthly_sales_heatmap.png"
}
]
}
with open('visualization_sample_config.json', 'w', encoding='utf-8') as f:
json.dump(sample_config, f, indent=2, ensure_ascii=False)
logger.info("示例配置文件已创建: visualization_sample_config.json")
# 创建示例数据文件
sample_data = pd.DataFrame({
'date': pd.date_range('2023-01-01', periods=12, freq='M'),
'sales': [1000, 1200, 1100, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100],
'profit': [200, 240, 220, 260, 280, 300, 320, 340, 360, 380, 400, 420],
'product': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L'],
'quantity': [100, 120, 110, 130, 140, 150, 160, 170, 180, 190, 200, 210],
'category': ['Electronics', 'Clothing', 'Books', 'Electronics', 'Clothing', 'Books',
'Electronics', 'Clothing', 'Books', 'Electronics', 'Clothing', 'Books'],
'revenue': [50000, 30000, 20000, 55000, 32000, 22000, 58000, 34000, 24000, 60000, 36000, 26000],
'advertising': [1000, 800, 600, 1200, 900, 700, 1300, 1000, 800, 1400, 1100, 900],
'region': ['North', 'South', 'East', 'West', 'North', 'South', 'East', 'West', 'North', 'South', 'East', 'West'],
'month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
})
sample_data.to_csv('sample_data.csv', index=False)
logger.info("示例数据文件已创建: sample_data.csv")
def main():
parser = argparse.ArgumentParser(description='数据可视化和图表生成工具')
parser.add_argument('-c', '--config', help='配置文件路径')
parser.add_argument('-d', '--data', help='数据文件路径')
parser.add_argument('--sample-config', action='store_true', help='创建示例配置文件')
parser.add_argument('--batch', nargs='+', help='批量处理数据文件')
parser.add_argument('--config-dir', help='批量处理配置文件目录')
args = parser.parse_args()
if args.sample_config:
create_sample_config()
return
if args.batch and args.config_dir:
# 批量处理
manager = VisualizationManager(args.config)
results = manager.batch_generate_charts(args.batch, args.config_dir)
print("批量图表生成结果:")
for data_file, charts in results.items():
print(f" {data_file}: 生成 {len(charts)} 个图表")
for chart in charts:
print(f" - {chart}")
return
if args.data:
# 单个文件处理
manager = VisualizationManager(args.config)
charts = manager.generate_charts_from_config(args.data)
print(f"图表生成完成,共生成 {len(charts)} 个图表:")
for chart in charts:
print(f" - {chart}")
return
parser.print_help()
if __name__ == '__main__':
main()
使用说明
1. 安装依赖
pip install pandas matplotlib seaborn openpyxl
2. 创建示例配置文件和数据
python chart_generator.py --sample-config
3. 生成单个图表
python chart_generator.py --config visualization_sample_config.json --data sample_data.csv
4. 批量生成图表
python chart_generator.py --config visualization_sample_config.json --batch *.csv --config-dir ./configs
配置文件示例
JSON配置文件
{
"data_settings": {
"missing_values": "drop",
"type_conversions": {
"date": "datetime64[ns]",
"category": "category"
},
"filters": [
{
"column": "value",
"operator": "gt",
"value": 0
}
]
},
"chart_settings": {
"theme": "default",
"figure_size": [12, 8],
"dpi": 300
},
"export_settings": {
"formats": ["png", "pdf", "svg"],
"output_dir": "./charts"
},
"charts": [
{
"type": "line",
"x_column": "date",
"y_columns": ["sales", "profit"],
"title": "销售和利润趋势图",
"output_file": "sales_trend.png"
},
{
"type": "bar",
"x_column": "product",
"y_column": "quantity",
"title": "产品销量柱状图",
"output_file": "product_sales.png"
},
{
"type": "pie",
"labels_column": "category",
"values_column": "revenue",
"title": "收入分类饼图",
"output_file": "revenue_distribution.png"
},
{
"type": "scatter",
"x_column": "advertising",
"y_column": "sales",
"color_column": "region",
"title": "广告投入与销售额散点图",
"output_file": "ad_sales_scatter.png"
},
{
"type": "heatmap",
"x_column": "month",
"y_column": "product",
"value_column": "sales",
"title": "月度产品销售热力图",
"output_file": "monthly_sales_heatmap.png"
}
]
}
高级特性
1. 多样化图表类型
支持折线图、柱状图、饼图、散点图、热力图等多种图表类型,满足不同数据可视化需求。
2. 智能数据处理
自动处理缺失值、数据类型转换和数据筛选,确保数据质量和图表准确性。
3. 灵活配置管理
通过配置文件控制数据处理、图表样式和导出设置,实现高度定制化。
4. 批量处理能力
支持批量处理多个数据文件,提高工作效率。
最佳实践
1. 数据准备
- 确保数据格式规范和一致性
- 处理缺失值和异常值
- 选择合适的图表类型展示数据
2. 图表设计
- 选择清晰易懂的图表类型
- 合理设置颜色和样式
- 添加必要的标题和标签
3. 性能优化
- 对大数据集进行采样处理
- 合理设置图表分辨率
- 批量处理时注意内存管理
总结
这个数据可视化和图表生成工具提供了一个功能强大、易于使用的数据可视化解决方案。通过支持多种图表类型和灵活的配置选项,可以帮助用户快速将数据转换为直观的图表,提升数据分析和展示的效果。