Python

19 阅读1分钟
import pandas as pd
import matplotlib.pyplot as plt
import scipy.interpolate as itp
1. 读取数据
ug_data = pd.read_csv(
    "ug_detect.csv",
    header=0, # 将第一行设置为索引列
    encoding="gbk"
)
print("读取的数据:\n", ug_data)
temperature = ug_data['温度(?C)']
humidity = ug_data['相对湿度']
gas = ug_data['瓦斯(m?/min)']
co = ug_data['一氧化碳(m?/min)']
2.异常值处理
def defectsCop(data, threshold):
    for i in range(len(data)):
        if data[i] >= threshold:
            data[i] = None #超过阙值的时候 认为它为异常值 赋值为None
defectsCop(temperature, 60)
defectsCop(humidity, 200)
defectsCop(gas, 100)
defectsCop(co, 100)
3.插值处理
def seriesItp(data):
    for i in range(len(data)):
        if pd.isnull(data[i]):
            x_list = [i - 1, i + 1]
            y_list = [data[i - 1], data[i + 1]]
            lagrange_poly = itp.lagrange(x_list, y_list)
            data[i] = lagrange_poly(i)
seriesItp(temperature)
seriesItp(humidity)
seriesItp(gas)
seriesItp(co)
4.可视化
def plot_data(array):
    t = range(len(array))  # 生成时间序列
    plt.plot(t, array)  # 在画布上绘制曲线
    plt.plot(t, array, 'pr')
    plt.show()  # 展示画布
plot_data(temperature)
plot_data(humidity)
plot_data(gas)
plot_data(co)
5.写入文件
all_data = pd.DataFrame(
    {"温度":temperature,
     "相对温度":humidity,
     "瓦斯浓度":gas,
     "一氧化碳浓度":co}
)
all_data.to_csv(
    "all_data.csv",
    index=False,
    float_format="%0.2f",
    encoding="utf-8"
)