python 井下环境监测数据处理

35 阅读3分钟

情景描述

有一家专业从事煤矿安全监控和预防的公司。最近该公司承接了某煤井的井下监控工作,需要通过采集井下温度,井下湿度以及气体涌出量来评估和监控井下的环境和状况。

任务实施

1.读取数据

import numpy as np
temperature_str = np.loadtxt(
    "ug_detect.csv",
    delimiter=",",
    skiprows=1,
    usecols=(1),
    dtype=bytes
)

humidity_str = np.loadtxt(
    "ug_detect.csv",
    delimiter=",",
    skiprows=1,
    usecols=(2),
    dtype=bytes
)

gas_str = np.loadtxt(
    "ug_detect.csv",
    delimiter=",",
    skiprows=1,
    usecols=(3),
    dtype=bytes
)

co_str = np.loadtxt(
    "ug_detect.csv",
    delimiter=",",
    skiprows=1,
    usecols=(4),
    dtype=bytes
)
print(temperature_str)
print(humidity_str)
print(gas_str)
print(co_str)

2.缺失值和异常值处理

缺失值处理

def conver_data(array_str,flag):
    array = np.zeros(len(array_str))
    # 类型转换+缺失值处理
    for i in range(len(array_str)):
        item = array_str[i]
        if item != b'':  # 如果当前元素为空
            item = float(item)
            if flag == 1:
                if item > 50:
                    item = None
            elif flag == 2:
                if item > 200:
                    item = None
            elif flag == 3 or flag == 4:
                if item > 100:
                    item = None
        else:   # 如果为空
            item = None  # 将缺失值填充为无效值
        array[i] = item
    return array
temperature = conver_data(temperature_str,1)
humidity = conver_data(humidity_str,2)
gas = conver_data(gas_str,3)
co = conver_data(co_str,4)

处理nan值

def bisec(array):
    for i in range(len(array)):
        if np.isnan(array[i]):
            if i == len(array) - 1:  #最后一个元素
                array[i] =array[i - 1]
            elif i == 0:  # 第一个元素
                array[i] = array[i + 1]
            else:
                array[i] = (array[i - 1] + array[i + 1]) / 2
    return array
temperature = bisec(temperature)
humidity = bisec(humidity)
gas = bisec(gas)
co = bisec(co)

3.绘制曲线

import matplotlib.pyplot as plt
def plot_data(array):
    t = np.arange(len(array))  # 生成时间序列
    plt.plot(t, array)  # 在画布上绘制曲线
    plt.plot(t,array,'pr')
    plt.show()  # 展示画布
plot_data(temperature)
plot_data(humidity)
plot_data(gas)
plot_data(co)

4.写入数据

def write_data(filename,array):
    np.savetxt(
        filename,
        array,
        "ug_temperature.csv",
        fmt="%.2f",
        delimiter=",",
        encoding="utf-8"
    )
write_data("ug_temperature.csv",temperature)
write_data("ug_humidity.csv",humidity)
write_data("ug_gas.csv",gas)
write_data("ug_co.csv",co)

5.处理其余井下环境指标数据

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate as itp

# 1.读取数据
ug_data = pd.read_csv(
    "ug_detect.csv",
    header=0,  # 将第一行设置为索引列
    encoding="gbk"
)
print("读取的数据:\n ", ug_data)
temperature = ug_data['温度(?C)']
humidity = ug_data['相对湿度']
gas = ug_data['瓦斯(m?/min)']
co = ug_data['一氧化碳(m?/min)']

print("ug_data第11行~第20行的值是:\n",ug_data[10:21])
print("ug_data的类型是:\n",type(ug_data))
print("ug_data的维度是:\n",ug_data.ndim)
print("ug_data的形状是:\n",ug_data.shape)

print("co前10个元素是:\n",co[0:10])
print("co_data的类型是:\n",type(co))
print("co_data的维度是:\n",co.ndim)
print("co_data的形状是:\n",co.shape)

"""
print("humidity_data第10~第20个元素是:\n",humidity[10:21])
seriesItp(temperature,60)
seriesItp(humidity,60)
seriesItp(gas,60)
seriesItp(co,60)
"""
print("ug_data第11行~第20行的值是:\n",ug_data[10:21])
print("ug_data的类型是:\n",type(ug_data))
print("ug_data的维度是:\n",ug_data.ndim)
print("ug_data的形状是:\n",ug_data.shape)
# 2. 异常值处理
def defectsCop(data,threshold):
    for i in range(len(data)):
        if data[i] >= threshold:
            data[i] = None  # 超过阈值的时候,以为它为异常值,赋值为None
defectsCop(temperature,60)
defectsCop(humidity,200)
defectsCop(gas,100)
defectsCop(co,100)

# 3.插值处理
def seriesItp(data):
    for i in range(len(data)):
        if pd.isnull(data[i]):
            x_list = [i - 1,i + 1]
            y_list = [data[i-1],data[i+1]]
            lagrange_poly = itp.lagrange(x_list,y_list)
            data[i] = lagrange_poly(i)
seriesItp(temperature)
seriesItp(humidity)
seriesItp(gas)
seriesItp(co)
# 4.可视化
def plot_data(array):
    t = range(len(array))  # 生成时间序列
    plt.plot(t, array)  # 在画布上绘制曲线
    plt.plot(t,array,'pr')
    plt.show()  # 展示画布
plot_data(temperature)
plot_data(humidity)
plot_data(gas)
plot_data(co)

# 5.写入文件
all_data = pd.DataFrame(
    {"温度":temperature,
     "相对湿度":humidity,
     "瓦斯浓度":gas,
     "一氧化碳浓度":co
    }
)
all_data.to_csv(
    "all_data.csv",
    index=False,
    float_format="%0.2f",
    encoding="utf-8"
)