题目1:滑动窗口最大值
代码
def get_nums_k(nums, k):
if len(nums) == 0 or k <= 0:
return []
res = [] # 存放结果
for i in range(0, len(nums) - k):
tmp = max(nums[i], nums[i+1], nums[i+2])
res.append(tmp)
return res
nums = [1, 3, -1, -3, 5, 3, 6, 7]
k = 3
print(get_nums_k(nums, k))
运行结果
[3, 3, 5, 5, 6]
题目2:外卖平台订单数据分析
代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("外卖订单数据.csv")
df['实际收入'] = df['订单金额 (元)'] + df['配送费 (元)']
print(df)
states = df.groupby("商家类别").agg({
"订单ID":"count",
"配送时长 (分钟)":"mean",
"实际收入":"sum"
})
print(states)
df['下单时间'] = pd.to_datetime(df['下单时间'])
df['星期'] = df['下单时间'].dt.weekday + 1
df_mean = df.groupby("星期")['订单金额 (元)'].mean()
print(df_mean)
plt.plot(df_mean.index, df_mean)
plt.show()
corr = df['配送距离 (km)'].corr(df['配送时长 (分钟)'])
print(corr)
states = states.sort_values("实际收入", ascending=False)
top4 = states[:4]
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.pie(top4['实际收入'], labels=top4.index, autopct="%1.1f%%")
plt.show()
题目3:酒店预订数据分析
代码
data = pd.read_csv("酒店预订数据.csv")
data['订单总金额'] = data['人均房价 (元)'] * data['入住人数'] * data['入住天数']
print(data)
data_not_cancel = data[data['是否取消'] == '否']
group_data = data.groupby("酒店星级 (1-5星)").agg({
'预订ID':"count",
"是否取消":lambda x:(x == '否').sum()
}).rename(columns={"预订ID":"总预定数", "是否取消":"未取消数"})
group_data['预订成功率'] = group_data['未取消数'] / group_data['总预定数']
print(group_data)
mean_data_not_cancel = data_not_cancel.groupby("酒店星级 (1-5星)").agg({
"入住天数":"mean",
"订单总金额":"mean"
}).rename(columns={"入住天数":"平均入住天数", "订单总金额":"平均订单总金额"})
print(mean_data_not_cancel)
data['预订日期'] = pd.to_datetime(data['预订日期'])
data['月份'] = data['预订日期'].dt.month
data['预定量'] = [1 for i in range(data.shape[0])]
month_data = data.groupby('月份')['预定量'].sum()
print(month_data)
plt.bar(month_data.index, month_data)
plt.show()
corr = data['入住天数'].corr(data['订单总金额'])
print(corr)
new_Data = data[(data['入住天数'] >= 3) & (data['是否取消'] == '否')]
new_Data.to_csv("长期入住订单.csv")
题目4:员工考勤数据分析
代码
df = pd.read_csv("考勤数据.csv")
df['签退时间'] = pd.to_datetime(df['签退时间'])
df['签到时间'] = pd.to_datetime(df['签到时间'])
df['每日工作时长'] = (df['签退时间'] - df['签到时间']).dt.total_seconds() / 3600
print(df)
dept_states = df.groupby('部门').agg({
'每日工作时长':"mean",
'请假类型 (无/事假/病假)':lambda x:(x=='无').sum() / len(x)
}).rename(columns={'每日工作时长':"平均工作时长", '请假类型 (无/事假/病假)':"出勤率"})
print(dept_states)
df['日期'] = pd.to_datetime(df['日期'])
df['星期'] = df['日期'].dt.day_name()
df['是否请假'] = df['请假类型 (无/事假/病假)'] != '无'
new_df = df[df['请假类型 (无/事假/病假)'] != '无']
df_sum = new_df.groupby('星期')['是否请假'].sum()
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.bar(df_sum.index, df_sum)
plt.xticks(range(7), ['周一', '周二', '周三', '周四', '周五', '周六', '周天'])
plt.show()
states = df['每日工作时长'].agg(['mean', 'median', 'std'])
print(states)
df['月份'] = df['日期'].dt.month
df_month = df.groupby(['员工ID', '月份'])['是否请假'].sum().reset_index()
high_employ = df_month[df_month['是否请假'] >= 2]
high_employ.to_csv("高频请假员工.csv")
题目5:企业销售业绩数据分析
代码
df = pd.read_csv("销售业绩数据.csv")
df['成交总金额'] = df['成交数量'] * df['产品单价 (元)']
print(df)
mean_df = df.groupby(['区域', '客户等级 (A/B/C/D)'])['成交总金额'].mean().reset_index().rename(
columns={'成交总金额':'平均成交金额'}
)
print(mean_df)
df['成交日期'] = pd.to_datetime(df['成交日期'])
df['季度'] = df['成交日期'].dt.quarter.map({1:'Q1', 2:'Q2', 3:'Q3', 4:'Q4'})
df_sum = df.groupby('季度')['成交总金额'].sum()
plt.bar(df_sum.index, df_sum)
plt.show()
top5_sales = df.groupby('销售代表ID')['成交总金额'].sum().nlargest(5).sort_values()
plt.bar(top5_sales.index, top5_sales)
plt.show()
corr = df['成交数量'].corr(df['成交总金额'])
print(corr)
总结
-
算法基础:第一题展示了基础的滑动窗口算法,虽然实现有局限,但为后续复杂问题打下基础
-
数据处理能力:掌握了pandas的核心操作,包括:
- 数据读取与清洗
- 列计算与转换
- 分组聚合统计
- 时间序列处理
-
可视化技能:使用matplotlib实现多种图表:
- 折线图(趋势分析)
- 饼图(占比分析)
- 柱状图(比较分析)
- 中文显示处理