每个C值对应的第一个A值

39 阅读1分钟
import pandas as pd

# 创建DataFrame
data = {
    'A': [18, 18, 17, 17, 16, 16,17, 17, 17, 17, 16, 16],
    'B': ['00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539'],
    'C': ['068', '068', '068', '068', '068', '068','069','069','069','069','069','069'],
    'D': ['00370961', '00370966', '00370961', '00370966', '00370961', '00370966','00370961', '00370966', '00370961', '00370966', '00370961', '00370966']
}
df = pd.DataFrame(data)
print(df)
# 找出C列相同且A列值不同的组合
#grouped = df.groupby('C').filter(lambda x: x['A'].nunique() > 1)

# 去重,保留每个C列值的第一个A列值对应的行
#result = grouped.drop_duplicates(subset=['C'])

#print(result)

import numpy as np

# 找出C列相同且A列值不同的组合
grouped = df.groupby('C').filter(lambda x: x['A'].nunique() > 1)
print(grouped)
# 得到每个C值对应的第一个Afirst_a_values = grouped.drop_duplicates(subset=['C'])[['C', 'A']]
print(first_a_values)
# 筛选出A列值等于first_a_values中的A值的行
result = df.merge(first_a_values, on=['C', 'A'])
print(result)

1720083301275.png