import copy
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import time
import warnings
warnings.filterwarnings("ignore")
class MetricDetect(object):
def __init__(self, change_scale=2, change_value=0.07, miss_rate=0.3, change_weight=0.1,
abn_threshold=0.9, score_threshold=0.9):
self.change_scale = change_scale
self.change_value = change_value
self.change_weight = change_weight
self.abn_threshold = abn_threshold
self.score_threshold = score_threshold
self.miss_rate_threshold = miss_rate
def calCorr(self, array1, array2):
data_matrix = pd.DataFrame({'array1': array1, 'array2': array2})
if data_matrix['array1'].std() == 0 or data_matrix['array2'].std() == 0:
pearson_coefficient = 0.0
else:
pearson_coefficient = abs(data_matrix.corr('pearson')['array1'][1])
return pearson_coefficient
def missCount(self, v):
if len(v) >= 2:
v = v.sort_values(by=['timestamp'])
last_timestamp = v['timestamp'].iloc[-1]
first_timestamp = v['timestamp'].iloc[0]
data_len = last_timestamp - first_timestamp + 1
miss_rate = (data_len - len(v)) / data_len
else:
miss_rate = 0
return miss_rate
def changeTime(self, df, change_scale):
df = df.dropna(axis=0, how='any')
df = df.sort_values(by=['timestamp'], ascending=True)
last_timestamp = df['timestamp'].iloc[-1] if len(df) > 0 else 0
df['diff'] = df['value'].diff().fillna(0)
diff_median = df['diff'].median()
diff_sigma = df['diff'].std()
diff_upper = diff_median + change_scale * diff_sigma
diff_lower = diff_median - change_scale * diff_sigma
diff_df = df[(df['diff'] >= diff_upper) | (df['diff'] <= diff_lower)]
if not diff_df.empty:
if diff_df.iloc[0]['diff'] == 0:
Tc = 0
else:
Tc = diff_df.iloc[0]['timestamp']
if abs(int(last_timestamp) - int(Tc)) > 5:
Tc = 0
else:
Tc = 0
return Tc
def changeDegree(self, df, tc):
if df.empty:
abnormality = 0.0
change = 0.0
else:
nor_df = df[(df['timestamp'] < tc)].reset_index(drop=True)
ano_df = df[(df['timestamp'] >= tc)].reset_index(drop=True)
if nor_df.empty or ano_df.empty:
abnormality = 0.0
change = 0.0
else:
Xi = np.array(nor_df['value'])
Xj = np.array(ano_df['value'])
median = np.median(Xi)
tc_value = Xj.mean()
change = tc_value - median
if abs(tc_value - median) < self.change_value:
abnormality = 0.0
else:
array1 = np.append(np.zeros(len(Xi)), np.ones(len(Xj)))
array2 = np.append(np.zeros(len(Xi)), np.ones(1))
array2 = np.append(array2, np.zeros(len(Xj) - 1))
cor1 = self.calCorr(np.array(df['value']), array1)
cor2 = self.calCorr(np.array(df['value']), array2)
cor = max(cor1, cor2)
mad = abs(Xi - median)
mad_median = np.median(mad)
if mad_median != 0:
abn = (tc_value - mad_median) / mad_median
else:
abn = (tc_value - mad_median) / (mad_median + 0.1)
error_likelihood = 1 - 0.5 * math.erfc(abs(abn) / 1.4142135623730951)
abnormality = self.change_weight * error_likelihood + (1 - self.change_weight) * cor
return abnormality, change
def run(self, df):
global tc
if df.empty:
abnormality = 0.0
change = 0.0
else:
if 'change_scale' in df.columns:
tc = self.changeTime(df, df['change_scale'].iloc[0])
else:
tc = self.changeTime(df, self.change_scale)
if tc != 0:
abnormality, change = self.changeDegree(df, tc)
else:
change = 0.0
abnormality = 0
return tc, change, abnormality
def multi_run(self, df):
if df.empty:
result = pd.DataFrame()
else:
df_groups = df.groupby(['cmdb_id', 'kpi_name'])
result = pd.DataFrame()
CMDB = []
KPI = []
C = []
TC = []
Abn = []
for k, v in df_groups:
tc, change, abnormality = self.run(v)
if abnormality >= self.abn_threshold:
CMDB.append(str(k[0]))
KPI.append(str(k[1]))
C.append(change)
TC.append(str(time.strftime(
"%Y-%m-%d %H:%M:%S", time.localtime(int(tc))
)) if tc != 0 else 0)
Abn.append(abnormality)
result['cmdb_id'] = CMDB
result['kpi_name'] = KPI
result['change'] = C
result['tc'] = TC
result['abnormality'] = Abn
return result
if __name__ == '__main__':
path = "/Users/zyl/Desktop/Python/PythonFile/Web_coding/plotly/metric.csv"
df = pd.read_csv(path)
df = df.sort_values(by=['timestamp'], ascending=True)
df = df.dropna(how='any')
alg = MetricDetect()
last_detect_timestamp = int(df['timestamp'].values[0])
anomaly_data = pd.DataFrame()
detect_time = df['timestamp'].unique()
output = []
for dt in detect_time:
dtimestamp = int(dt)
if dtimestamp - last_detect_timestamp > 0:
history_df = df[(df['timestamp'] >= dtimestamp-600) & (df['timestamp'] < dtimestamp)]
tc, change, abnormality = alg.run(history_df)
if abnormality >= 0.9:
TC = str(time.strftime(
"%Y-%m-%d %H:%M:%S", time.localtime(int(tc))
)) if tc != 0 else 0
output.append([TC, change, abnormality])
last_detect_timestamp = dtimestamp
print("异常信息: ---------", output)
if len(output) > 0:
df['time'] = df['timestamp'].apply(lambda x: time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(x)))
df.index = pd.to_datetime(df['time'])
plot_df = df['value']
anomaly_times = [c[0] for c in output]
anomaly_value = [plot_df[t] for t in anomaly_times]
plt.figure(figsize=(12, 3))
plt.plot(plot_df, label='ooooo')
plt.scatter(anomaly_times, anomaly_value, color='r', s=14)
plt.legend()
plt.show()
else:
print("未检测出异常 程序结束!")