去极值代码优化

43 阅读2分钟
#去极大值
import pandas as pd
import numpy as np

# 示例数据
item_ids = [1, 2, 3]
dates = pd.date_range(start='2023-01-01', periods=7)

# 创建一个空的DataFrame
data = []
prices=[167, 193, 1, 41, 194, 163, 22, 100, 114, 160, 123,1, 1, 1, 199, 1, 1, 1, 1, 1, 1]

# 填充数据
price_index=0
for item in item_ids:
    for date in dates:
        price =prices[price_index]   # 生成随机价格
        data.append([item, date, price])
        price_index+=1

# 创建DataFrame
df = pd.DataFrame(data, columns=['itemid', 'date', 'price'])
df['max_price']=df.groupby('itemid')['price'].transform('max')
df['max_price_true']=df['price']==df['max_price']
df['max_price_true_count']=df[df['max_price_true']==True].groupby('itemid')['max_price_true'].transform('count')
df['count_price']=df.groupby('itemid')['price'].transform('nunique')
df_normal=df[df['count_price']!=2]
display(df_normal)
df_deal=df[(df['count_price']==2)&(df['max_price_true_count']!=1)]

display(df_deal)
df=pd.concat([df_normal,df_deal])
df=df[['itemid', 'date', 'price']]
display(df)

你可以通过以下方式优化和简化代码,使其更简洁:

import pandas as pd

# 示例数据
item_ids = [1, 2, 3]
dates = pd.date_range(start='2023-01-01', periods=7)
prices = [167, 193, 1, 41, 194, 163, 22, 100, 114, 160, 123, 1, 1, 1, 199, 1, 1, 1, 1, 1, 1]

# 创建DataFrame
data = []
price_index = 0
for item in item_ids:
    for date in dates:
        price = prices[price_index]  # 使用给定价格
        data.append([item, date, price])
        price_index += 1

df = pd.DataFrame(data, columns=['itemid', 'date', 'price'])

# 添加计算列
df['max_price'] = df.groupby('itemid')['price'].transform('max')
df['count_price'] = df.groupby('itemid')['price'].transform('nunique')
df['max_price_true_count'] = df.groupby('itemid')['price'].transform(lambda x: (x == x.max()).sum())

# 筛选符合条件的行
df_normal = df[df['count_price'] != 2]
df_deal = df[(df['count_price'] == 2) & (df['max_price_true_count'] != 1)]

# 合并筛选后的DataFrame
df_result = pd.concat([df_normal, df_deal])[['itemid', 'date', 'price']]

# 显示结果
display(df_result)

简化后的代码:

  1. 合并了添加计算列的步骤。
  2. 使用 lambda 函数简化 max_price_true_count 列的计算。
  3. 直接筛选并合并符合条件的行。