#去极大值
import pandas as pd
import numpy as np
# 示例数据
item_ids = [1, 2, 3]
dates = pd.date_range(start='2023-01-01', periods=7)
# 创建一个空的DataFrame
data = []
prices=[167, 193, 1, 41, 194, 163, 22, 100, 114, 160, 123,1, 1, 1, 199, 1, 1, 1, 1, 1, 1]
# 填充数据
price_index=0
for item in item_ids:
for date in dates:
price =prices[price_index] # 生成随机价格
data.append([item, date, price])
price_index+=1
# 创建DataFrame
df = pd.DataFrame(data, columns=['itemid', 'date', 'price'])
df['max_price']=df.groupby('itemid')['price'].transform('max')
df['max_price_true']=df['price']==df['max_price']
df['max_price_true_count']=df[df['max_price_true']==True].groupby('itemid')['max_price_true'].transform('count')
df['count_price']=df.groupby('itemid')['price'].transform('nunique')
df_normal=df[df['count_price']!=2]
display(df_normal)
df_deal=df[(df['count_price']==2)&(df['max_price_true_count']!=1)]
display(df_deal)
df=pd.concat([df_normal,df_deal])
df=df[['itemid', 'date', 'price']]
display(df)
你可以通过以下方式优化和简化代码,使其更简洁:
import pandas as pd
# 示例数据
item_ids = [1, 2, 3]
dates = pd.date_range(start='2023-01-01', periods=7)
prices = [167, 193, 1, 41, 194, 163, 22, 100, 114, 160, 123, 1, 1, 1, 199, 1, 1, 1, 1, 1, 1]
# 创建DataFrame
data = []
price_index = 0
for item in item_ids:
for date in dates:
price = prices[price_index] # 使用给定价格
data.append([item, date, price])
price_index += 1
df = pd.DataFrame(data, columns=['itemid', 'date', 'price'])
# 添加计算列
df['max_price'] = df.groupby('itemid')['price'].transform('max')
df['count_price'] = df.groupby('itemid')['price'].transform('nunique')
df['max_price_true_count'] = df.groupby('itemid')['price'].transform(lambda x: (x == x.max()).sum())
# 筛选符合条件的行
df_normal = df[df['count_price'] != 2]
df_deal = df[(df['count_price'] == 2) & (df['max_price_true_count'] != 1)]
# 合并筛选后的DataFrame
df_result = pd.concat([df_normal, df_deal])[['itemid', 'date', 'price']]
# 显示结果
display(df_result)
简化后的代码:
- 合并了添加计算列的步骤。
- 使用
lambda函数简化max_price_true_count列的计算。 - 直接筛选并合并符合条件的行。