kaggle猫狗识别

155 阅读3分钟

个人学习记录

数据集链接

反正是一路暴力卷积过来了。。。

准确率在90左右波动

import warnings
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets
from PIL import Image
import matplotlib.pyplot as plt


channel_size = 3
batch_size = 8
print_iter = 20 * batch_size
train_size = 20000
test_size = 5000
epochs = 15
sum_loss = 0

device = "cuda" if torch.cuda.is_available() else "cpu"
acc_list = list()
plt_loss = list()


def to_device(batch):
    inputs, labels = zip(*batch)
    return (torch.stack(inputs).to(device).float(),
            torch.tensor(labels).to(device).float())


def loader(path):
    try:
        with Image.open(path).convert("RGB") as image:
            warnings.simplefilter("ignore", category=UserWarning)
            image.load()
            return image
    except (IOError, OSError):
        #  print(f"Warning: Could not load image {path}")
        return Image.new('RGB', (256, 256), 'black')


transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomRotation(degrees=(-15, 15)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Grayscale(num_output_channels=channel_size),
])

ImgData = datasets.ImageFolder("PetImages", transform=transform, loader=loader)
train_set, test_set = random_split(ImgData, [train_size, test_size])

train_loader = DataLoader(dataset=train_set, shuffle=True, batch_size=batch_size, collate_fn=to_device)
test_loader = DataLoader(dataset=test_set, shuffle=True, batch_size=batch_size, collate_fn=to_device)


model = nn.Sequential(
    nn.BatchNorm2d(num_features=channel_size),
    nn.Conv2d(channel_size, 32, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(32, 64, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 64, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.BatchNorm2d(num_features=64),

    nn.Conv2d(64, 64, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(128, 128, kernel_size=3, stride=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.BatchNorm2d(num_features=128),

    nn.Flatten(),
    nn.Linear(in_features=512, out_features=16),
    nn.LeakyReLU(),
    nn.Linear(in_features=16, out_features=1),
    nn.Sigmoid()
).to(device)


optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()


for epoch in range(epochs):
    it = 0
    for img, label in train_loader:
        img = img.view(batch_size, channel_size, 256, 256)
        label = label.view(batch_size, 1)
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, label)
        sum_loss += loss.item()
        loss.backward()
        optimizer.step()
        if not it % print_iter:
            print(f"epoch{epoch:>3} train iter {it:>5}/{train_size} "
                  f"loss {sum_loss:>.3f}")
            plt_loss.append(sum_loss)
            sum_loss = 0
        it += batch_size

    it, correct, total = 0, 0, 0
    with torch.no_grad():
        for img, label in test_loader:
            img = img.view(batch_size, channel_size, 256, 256)
            label = label.view(batch_size, -1)
            outputs = model(img)
            hit: torch.tensor = (abs(outputs - label) < 0.5)
            correct += hit.sum().item()
            total += batch_size
            if not it % print_iter:
                print(f"test iter {it:>5}/{test_size}")
            it += batch_size

    accuracy = correct / total
    acc_list.append(accuracy)
    print(accuracy)


torch.save(model, 'DogCatRcg_model.pth')

plt.figure()
plt.plot([i for i in range(len(plt_loss))], plt_loss, label='loss')
plt.figure()
plt.plot([i for i in range(len(acc_list))], acc_list, label='accuracy')
plt.show()

loss: D0EF0884B18BAA03C524281EF7CEBBE3.png

acc:

9668309E42E089C77ADAB3E317F18DDC.png

模型调用所需的代码

import warnings
import torch
from PIL import Image
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets

channel_size = 3
batch_size = 100
print_iter = 200
train_size = 20000
test_size = 5000
device = "cuda" if torch.cuda.is_available() else "cpu"


def to_device(batch):
    inputs, labels = zip(*batch)
    return (torch.stack(inputs).to(device).float(),
            torch.tensor(labels).to(device).float())


def loader(path):
    try:
        with Image.open(path).convert("RGB") as image:
            warnings.simplefilter("ignore", category=UserWarning)
            image.load()
            return image
    except (IOError, OSError) as e:
        print(f"Warning: Could not load image {path}")
        return Image.new('RGB', (256, 256), 'black')


transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomRotation(degrees=(-15, 15)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Grayscale(num_output_channels=channel_size),
])

ImgData = datasets.ImageFolder("PetImages", transform=transform, loader=loader)
train_set, test_set = random_split(ImgData, [train_size, test_size])

train_loader = DataLoader(dataset=train_set, shuffle=True, batch_size=batch_size, collate_fn=to_device)
test_loader = DataLoader(dataset=test_set, shuffle=True, batch_size=batch_size, collate_fn=to_device)

model = torch.load(r"DogCatRcg_model.pth").to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()


it, correct, total = 0, 0, 0
with torch.no_grad():
    for img, label in test_loader:
        img = img.view(batch_size, channel_size, 256, 256)
        label = label.view(batch_size, -1)
        outputs = model(img)
        hit: torch.tensor = (abs(outputs - label) < 0.5)
        correct += hit.sum().item()
        total += batch_size
        if not it % print_iter:
            print(f"test iter {it:>5}/{test_size}")
        it += batch_size

accuracy = correct / total
print(accuracy)

单张图片的加载


import warnings
import torch
from PIL import Image
from torchvision import transforms

channel_size = 3
device = "cuda" if torch.cuda.is_available() else "cpu"
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Grayscale(num_output_channels=channel_size),
])


def load_img(img_path):
    try:
        with Image.open(img_path).convert("RGB") as image:
            warnings.simplefilter("ignore", category=UserWarning)
            image.load()

            return transform(image).to(device)
    except (IOError, OSError):
        #  print(f"Warning: Could not load image {path}")
        return Image.new('RGB', (256, 256), 'black')


path = r'E:\PycharmProjects\torchlibs\PetImages\Cat\10028.jpg'
model = torch.load('DogCatRcg_model.pth').to(device)
z = model(load_img(path).view(1, channel_size, 256, 256))
print(z)