鸢尾花数据集可视化

189 阅读1分钟

通过鸢尾花的长度和宽度观察它们的类别

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)

COLUMN_NAMES=['SepalLength','SepalWidth','PetalLength','PetalWidth','Species']
df_iris = pd.read_csv(train_path,names=COLUMN_NAMES,header=0)

iris=np.array(df_iris)

plt.scatter(iris[:,2],iris[:,3],c=iris[:,4],cmap='brg')
plt.title("Anderson's iris data set\n(blue->setosa | red->versicolor | green->virginica)")
plt.xlabel(COLUMN_NAMES[2])
plt.ylabel(COLUMN_NAMES[3])
plt.show()

image.png

通过长度与其它几个属性的组合观察鸢尾花的种类

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)

COLUMN_NAMES=['SepalLength','SepalWidth','PetalLength','PetalWidth','Species']
fig = plt.figure('Iris Data',figsize=(15,3))
df_iris = pd.read_csv(train_path,names=COLUMN_NAMES,header=0)

iris=np.array(df_iris)
fig.suptitle("Anderson's iris data set\n(blue->setosa | red->versicolor | green->virginica)")

for i in range(4):
    plt.subplot(1,4,i+1)
    if(i==0):
        plt.text(0.3,0.5,COLUMN_NAMES[0],fontsize=15)
    else:
        plt.scatter(iris[:,i],iris[:,0],c=iris[:,4],cmap='brg')

    plt.title(COLUMN_NAMES[i])
    plt.ylabel(COLUMN_NAMES[0])

plt.tight_layout(rect=[0,0,1,0.9])
plt.show()

image.png

将鸢尾花的4种属性两两结合能得到16种结果,但真正有效的其实只有6种,对角线上自己组合得不到数据,对角线上下方对称,只是坐标轴发生变化

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)

COLUMN_NAMES=['SepalLength','SepalWidth','PetalLength','PetalWidth','Species']
fig = plt.figure('Iris Data',figsize=(15,15))
df_iris = pd.read_csv(train_path,names=COLUMN_NAMES,header=0)

iris=np.array(df_iris)
fig.suptitle("Anderson's iris data set\n(blue->setosa | red->versicolor | green->virginica)")

for i in range(4):
    for j in range(4):
        plt.subplot(4,4,4*i + (j + 1))
        if(i==j):
            plt.text(0.3,0.4,COLUMN_NAMES[i],fontsize=15)
        else:
            plt.scatter(iris[:,j],iris[:,i],c=iris[:,4],cmap='brg')
        if(i==0):
            plt.title(COLUMN_NAMES[j])
        if(j==0):
            plt.title(COLUMN_NAMES[i])

plt.tight_layout(rect=[0,0,1,0.93])

plt.show()

image.png