卷积操作

87 阅读3分钟

持续创作,加速成长!这是我参与「掘金日新计划 · 10 月更文挑战」的第7天,点击查看活动详情


theme: condensed-night-purple

官方文档:

PyTorch documentation — PyTorch 1.12 documentation

卷积神经网络(Convolutional Neural Networks,简称CNN)。一个标准的卷积神经网络架构主要由卷积层、池化层和全连接层等核心层次构成。

1 卷积层

卷积层(Convolution Layer)的主要作用是对输入的数据进行特征提取,而完成该功能的是卷积层中的卷积核(Filter)。可以将卷积核看作一个指定窗口大小的扫描器,扫描器通过一次又一次地扫描输入的数据,来提取数据中的特征。

Conv2d

image.png

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("../data", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)

class Anke(nn.Module):
    def __init__(self):
        super(Anke, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x

anke = Anke()

writer = SummaryWriter("logs")

step = 0
for data in dataloader:
    imgs, targets = data
    output = anke(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30])  -> [xxx, 3, 30, 30]

    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)

    step = step + 1

2 池化层

池化层可以被看作卷积神经网络中的一种提取输入数据的核心特征的方式,不仅实现了对原始数据的压缩,还大量减少了参与模型计算的参数,提升了计算效率。其中,最常被用到的池化层方法是平均池化层和最大池化层,池化层处理的输入数据在一般情况下是经过卷积操作之后生成的特征图。 MaxPool2d

image.png

import torch
from torch.nn import Conv2d, MaxPool2d
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10('./cifar10', train=False, transform=torchvision.transforms.ToTensor(),download=False)
dataloader = DataLoader(dataset, 64, False)


input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)
kernal = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])

input = torch.reshape(input, (1,1,5,5))
# kernal = torch.reshape(kernal, (1,1,3,3))

class Anke(nn.Module):
    def __init__(self):
        super(Anke, self).__init__()
         # self.conv1 = Conv2d(3, 3, 3, 1,padding=1)
        self.maxpool =MaxPool2d(kernel_size=3, ceil_mode=True)

    def forward(self, x):
        # x = self.conv1(x)
        x = self.maxpool(x)
        return x

anke = Anke()
# output = anke(input)

writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    imgs, tagerts = data
    writer.add_images("input", imgs, step)
    output = anke(imgs)
    writer.add_images("output", output, step)
    step += 1

writer.close()

3 非线性激活

非线性激活函数是获取输入,并对其应用数学变换生成输出的函数。 Relu&Sigmoid

import torch
import torchvision.datasets
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)

input = torch.reshape(input, (-1,1, 5, 5))

dataset = torchvision.datasets.CIFAR10("./cifar10", False, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, 64)

class Anke(nn.Module):
    def __init__(self):
        super(Anke, self).__init__()
        self.relu1 = ReLU()
        self.sigmoid = Sigmoid()

    def forward(self, input):
        return self.sigmoid(input)

anke = Anke()

step = 0
writer = SummaryWriter("logs")

for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = anke(imgs)
    writer.add_images("output", output, step)
    step += 1

writer.close()
print("end")

4 线性层(全连接层)

全连接层的主要作用是将输入图像在经过卷积和池化操作后提取的特征进行压缩,并且根据压缩的特征完成模型的分类功能。

import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./data", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=64)

class Anke(nn.Module):
    def __init__(self):
        super(Anke, self).__init__()
        self.linear1 = Linear(196608, 10)

    def forward(self, input):
        output = self.linear1(input)
        return output

anke = Anke()

for data in dataloader:
    imgs, targets = data
    print(imgs.shape)
    output = torch.flatten(imgs)
    print(output.shape)
    output = anke(output)
    print(output.shape)