神经网络架构参考:2-1 卷积篇

46 阅读16分钟

提示词:

给出{xxx}的网络结构表格,包含层名称、类型、输入大小(HWC),输出大小(HWC)、核尺寸、步长、参数数量

AlexNet

层名称类型输入大小(HWC)输出大小(HWC)核尺寸步长参数数量
输入层输入227x227x3---0
Conv1卷积层227x227x355x55x9611x114961111*3 + 96 = 34944
MaxPool1最大池化层55x55x9627x27x963x320
LRN1局部响应归一化27x27x9627x27x96---
Conv2卷积层27x27x9627x27x2565x5125655*96 + 256 = 614656
MaxPool2最大池化层27x27x25613x13x2563x320
LRN2局部响应归一化13x13x25613x13x256---
Conv3卷积层13x13x25613x13x3843x3138433*256 + 384 = 885120
Conv4卷积层13x13x38413x13x3843x3138433*384 + 384 = 1327488
Conv5卷积层13x13x38413x13x2563x3125633*384 + 256 = 884992
MaxPool3最大池化层13x13x2566x6x2563x320
FC6全连接层6x6x2564096--66256*4096 + 4096 = 37752832
FC7全连接层40964096--4096*4096 + 4096 = 16781312
FC8全连接层40961000--4096*1000 + 1000 = 4194304

PyTorch 源码

import torch
import torch.nn as nn
import torch.nn.functional as F
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # Conv1
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # MaxPool1
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # LRN1
            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # Conv2
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # MaxPool2
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # LRN2
            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # Conv3
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # Conv4
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Conv5
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # MaxPool3
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),  # FC6
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),  # FC7
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),  # FC8
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x
# 创建AlexNet模型实例
model = AlexNet(num_classes=1000)
print(model)

LENET5

网络结构

层名称类型输入大小 (HWC)输出大小 (HWC)核尺寸步长参数数量
输入层输入32x32x132x32x1--0
C1卷积层32x32x128x28x65x51(5x5x1+1)x6 = 156
S2下采样层28x28x614x14x62x220
C3卷积层14x14x610x10x165x51(5x5x6+1)x16 = 2416
S4下采样层10x10x165x5x162x220
C5卷积层5x5x161x1x1205x51(5x5x16+1)x120 = 48120
F6全连接层1x1x1201x1x84--120x84 + 84 = 10164
输出层全连接层1x1x841x1x10--84x10 + 10 = 850

PyTorch 代码

import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        # Convolutional layer (C1)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        # Subsampling layer (S2)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        # Convolutional layer (C3)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        # Subsampling layer (S4)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        # Convolutional layer (C5)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        # Fully connected layer (F6)
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        # Output layer
        self.fc2 = nn.Linear(in_features=84, out_features=num_classes)
    def forward(self, x):
        # C1
        x = self.conv1(x)
        x = F.relu(x)
        # S2
        x = self.pool1(x)
        # C3
        x = self.conv2(x)
        x = F.relu(x)
        # S4
        x = self.pool2(x)
        # C5
        x = self.conv3(x)
        x = F.relu(x)
        # Flatten the output for the fully connected layer
        x = x.view(-1, self.num_flat_features(x))
        # F6
        x = self.fc1(x)
        x = F.relu(x)
        # Output layer
        x = self.fc2(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
# Example of creating the LeNet5 model
model = LeNet5(num_classes=10)
print(model)
# Example input tensor (batch size of 1, 1 channel, 32x32 image)
input_tensor = torch.randn(1, 1, 32, 32)
# Forward pass through the model
output = model(input_tensor)
print(output)

VGG16

层名称类型输入大小 (HWC)输出大小 (HWC)核尺寸步长参数数量
Input-224x224x3---0
Conv1_1Conv2D224x224x3224x224x643x311792
Conv1_2Conv2D224x224x64224x224x643x3136928
MaxPool1MaxPooling2D224x224x64112x112x642x220
Conv2_1Conv2D112x112x64112x112x1283x3173856
Conv2_2Conv2D112x112x128112x112x1283x31147584
MaxPool2MaxPooling2D112x112x12856x56x1282x220
Conv3_1Conv2D56x56x12856x56x2563x31295168
Conv3_2Conv2D56x56x25656x56x2563x31590080
Conv3_3Conv2D56x56x25656x56x2563x31590080
MaxPool3MaxPooling2D56x56x25628x28x2562x220
Conv4_1Conv2D28x28x25628x28x5123x311180160
Conv4_2Conv2D28x28x51228x28x5123x312359808
Conv4_3Conv2D28x28x51228x28x5123x312359808
MaxPool4MaxPooling2D28x28x51214x14x5122x220
Conv5_1Conv2D14x14x51214x14x5123x312359808
Conv5_2Conv2D14x14x51214x14x5123x312359808
Conv5_3Conv2D14x14x51214x14x5123x312359808
MaxPool5MaxPooling2D14x14x5127x7x5122x220
FlattenFlatten7x7x51225088--0
FC6Dense250884096--102760448
FC7Dense40964096--

PyTorch 代码

import torch
import torch.nn as nn
class VGG16(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            # Conv1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv5
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
# 实例化模型
model = VGG16(num_classes=1000)
print(model)

Inception

层名称类型输入大小(HWC)输出大小(HWC)核尺寸步长参数数量
Conv2d_1a_3x3卷积层299x299x3149x149x323x32864
Conv2d_2a_3x3卷积层149x149x32147x147x323x319216
Conv2d_2b_3x3卷积层147x147x32147x147x643x3118432
MaxPool_3a_3x3最大池化层147x147x6473x73x643x320
Conv2d_3b_1x1卷积层73x73x6473x73x801x115120
Conv2d_4a_3x3卷积层73x73x8071x71x1923x31138240
MaxPool_5a_3x3最大池化层71x71x19235x35x1923x320
Mixed_5bInception模块35x35x19235x35x256---
Mixed_5cInception模块35x35x25635x35x288---
Mixed_5dInception模块35x35x28835x35x288---
Mixed_6aInception模块35x35x28817x17x768-2-
Mixed_6bInception模块17x17x76817x17x768---
Mixed_6cInception模块17x17x76817x17x768---
Mixed_6dInception模块17x17x76817x17x768---
Mixed_6eInception模块17x17x76817x17x768---
Mixed_7aInception模块17x17x7688x8x1280-2-
Mixed_7bInception模块8x8x12808x8x2048---
Mixed_7cInception模块8x8x20488x8x2048---

以Mixed_5b为例,列出其内部结构。

层名称类型输入大小(HWC)输出大小(HWC)核尺寸步长参数数量
Mixed_5b/1x1卷积层35x35x19235x35x641x1112288
Mixed_5b/3x3/1x1卷积层35x35x19235x35x641x1112288
Mixed_5b/3x3/3x3卷积层35x35x6435x35x963x3163360
Mixed_5b/5x5/1x1卷积层35x35x19235x35x161x113072
Mixed_5b/5x5/5x5卷积层35x35x1635x35x165x513072
Mixed_5b/pool池化层35x35x19235x35x32-10
Mixed_5b/outputConcatenate-35x35x256---

PyTorch 源码

以下是使用PyTorch构建InceptionV3模型的一部分源码。这个源码展示了如何定义Inception模块和一些辅助函数,但不包括整个网络的所有细节。完整的InceptionV3模型定义会更长,这里只提供了核心部分。

import torch
import torch.nn as nn
import torch.nn.functional as F
class InceptionA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1)
        self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2)
        self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
        self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)
        self.branch_pool = BasicConv2d(in_channels, 32, kernel_size=1)
    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)
        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)
        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
        return torch.cat(outputs, 1)
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)
class InceptionV3(nn.Module):
    def __init__(self, num_classes=1000):
        super(InceptionV3, self).__init__()
        # Stem
        self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2)
        self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3)
        self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
        # ... additional stem layers ...
        # Inception modules
        self.Mixed_5b = InceptionA(256)
        self.Mixed_5c = InceptionA(288)
        # ... additional Inception modules ...
        # Auxiliary Logits
        self.AuxLogits = None
        # ... auxiliary logits layers ...
        # Final Logits
        self.Mixed_7c = InceptionA(768)
        # ... additional final layers ...
        self.fc = nn.Linear(2048, num_classes)
    def forward(self, x):
        # Stem
        x = self.Conv2d_1a_3x3(x)
        x = self.Conv2d_2a_3x3(x)
        x = self.Conv2d_2b_3x3(x)
        # ... additional stem layers ...
        # Inception modules
        x = self.Mixed_5b(x)
        x = self.Mixed_5c(x)
        # ... additional Inception modules ...
        # Auxiliary Logits
        if self.AuxLogits is not None:
            aux = self.AuxLogits(x)
        else:
            aux = None
        # Final Logits
        x = self.Mixed_7c(x)
        # ... additional final layers ...
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x, aux
# Example usage:
# model = InceptionV3(num_classes=1000)

Resnet18

层名称类型输入大小(HWC)输出大小(HWC)核尺寸步长参数数量
Conv1卷积层224x224x3112x112x647x729472
BatchNorm1批归一化层112x112x64112x112x64--256
ReLU1激活层112x112x64112x112x64--0
MaxPool1最大池化层112x112x6456x56x643x320
ResidualBlock1_1残差块56x56x6456x56x64--8448
ResidualBlock1_2残差块56x56x6456x56x64--8448
ResidualBlock2_1残差块56x56x6428x28x128-243008
ResidualBlock2_2残差块28x28x12828x28x128--43008
ResidualBlock3_1残差块28x28x12814x14x256-2172448
ResidualBlock3_2残差块14x14x25614x14x256--172448
AvgPool平均池化层14x14x2567x7x2567x720
Flatten展平层7x7x25612544--0
FC全连接层125441000--12545000
SoftmaxSoftmax层10001000--0

每个残差块的结构:

阶段残差块层名称类型输入大小(HWC)输出大小(HWC)核尺寸步长参数数量
11conv1卷积224x224x64112x112x647x729408
conv2卷积112x112x64112x112x643x3118432
skip1卷积224x224x64112x112x641x12256
12conv1卷积112x112x64112x112x643x3118432
conv2卷积112x112x64112x112x643x3118432
21conv1卷积112x112x6456x56x1283x3273984
conv2卷积56x56x12856x56x1283x31147584
skip1卷积112x112x6456x56x1281x12832
22conv1卷积56x56x12856x56x1283x31147584
conv2卷积56x56x12856x56x1283x31147584
31conv1卷积56x56x12828x28x2563x32295168
conv2卷积28x28x25628x28x2563x31589824
skip1卷积56x56x12828x28x2561x123328
32conv1卷积28x28x25628x28x2563x31589824
conv2卷积28x28x25628x28x2563x31589824
41conv1卷积28x28x25614x14x5123x321180928
conv2卷积14x14x51214x14x5123x312359296

PyTorch 代码

import torch
import torch.nn as nn
import torch.nn.functional as F
# 定义基本残差块
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out
# 定义ResNet网络
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        # 初始化权重
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
# 实例化ResNet-16模型
def resnet16(pretrained=False, **kwargs):
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        # 这里没有预训练权重,如果需要预训练,可以在这里加载
        pass
    return model
# 创建模型实例
model = resnet16()
print(model)