import pandas as pd
# 创建DataFrame
data = {
'A': [18, 18, 17, 17, 16, 16,17, 17, 17, 17, 16, 16],
'B': ['00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539', '00004539'],
'C': ['068', '068', '068', '068', '068', '068','069','069','069','069','069','069'],
'D': ['00370961', '00370966', '00370961', '00370966', '00370961', '00370966','00370961', '00370966', '00370961', '00370966', '00370961', '00370966']
}
df = pd.DataFrame(data)
print(df)
# 找出C列相同且A列值不同的组合
#grouped = df.groupby('C').filter(lambda x: x['A'].nunique() > 1)
# 去重,保留每个C列值的第一个A列值对应的行
#result = grouped.drop_duplicates(subset=['C'])
#print(result)
import numpy as np
# 找出C列相同且A列值不同的组合
grouped = df.groupby('C').filter(lambda x: x['A'].nunique() > 1)
print(grouped)
# 得到每个C值对应的第一个A值
first_a_values = grouped.drop_duplicates(subset=['C'])[['C', 'A']]
print(first_a_values)
# 筛选出A列值等于first_a_values中的A值的行
result = df.merge(first_a_values, on=['C', 'A'])
print(result)
