GoogLeNet - Implementation

Conv_Block 클래스 생성

class Conv_Block(nn.Module):
    def __init__(self, input_ch, output_ch, **kwargs) -> None:
        super(Conv_Block, self).__init__()
        self.conv = nn.Conv2d(input_ch, output_ch, **kwargs)
        self.batchnorm = nn.BatchNorm2d(output_ch)
        self.relu = nn.ReLU()

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)    # Convolutional Layer
        x = self.batchnorm(x)    # BatchNorm Layer
        x = self.relu(x)    # ReLU 
        return x

Convolutional Layer → BatchNorm Layer → ReLU

Inception 클래스 생성

<aside> 💡 Inception module 구현

</aside>

class Inception(nn.Module):
		# 클래스 생성자
    def __init__(self, input_ch, n1x1, nReduced_3x3, n3x3, nReduced_5x5, n5x5, pool_proj) -> None:
        super(Inception, self).__init__()

        self.branch1 = Conv_Block(input_ch, n1x1, kernel_size=1, stride=1, padding=0)

        self.branch2 = nn.Sequential(
            Conv_Block(input_ch, nReduced_3x3, kernel_size=1, stride=1, padding=0),
            Conv_Block(nReduced_3x3, n3x3, kernel_size=3, stride=1, padding=1)
        )

        self.branch3 = nn.Sequential(
            Conv_Block(input_ch, nReduced_5x5, kernel_size=1, stride=1, padding=0),
            Conv_Block(nReduced_5x5, n5x5, kernel_size=5, stride=1, padding=2)
        )

        self.branch4 = nn.Sequential(
           nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
           Conv_Block(input_ch, pool_proj, kernel_size=1, stride=1, padding=0)
        )

    def forward(self, x: Tensor) -> Tensor:    # 순방향 전파 함수
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)
        return torch.cat([x1, x2, x3, x4], dim=1)

Untitled

Inception module 내 4개의 분기를 지정함
- branch1 : 1x1 convolutions
- branch2 : 1x1 convolutions → 3x3 convolutions
- branch3 : 1x1 convolutions → 5x5 convolutions
- branch4 : 3x3 max pooling → 1x1 convolutions

InceptionAux 클래스 생성

<aside> 💡 Auxiliary Classification 과정

</aside>

class InceptionAux(nn.Module):
    def __init__(self, input_ch, num_classes) -> None:
        super(InceptionAux, self).__init__()
        self.avgpool = nn.AvgPool2d(kernel_size=5, stride=3)    # 5x5 크기의 avgpool layer
        self.conv = Conv_Block(input_ch, 128, kernel_size=1, stride=1, padding=0)   # 입력데이터의 채널을 128로
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)    # 선형 변환을 수행하는 fc 레이어
        self.dropout = nn.Dropout(p=0.7)    # 논문 상 Dropout rate인 0.7로
        self.relu = nn.ReLU()    # ReLU 활성화 함수

    def forward(self, x: Tensor) -> Tensor:
        x = self.avgpool(x)
        x = self.conv(x)
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

Untitled

GoogLeNet 클래스 생성

<aside> 💡 위의 클래스들을 이용하여 GoogLeNet 구현

</aside>

class GoogLeNet(nn.Module):
    def __init__(self, aux_logits=True, num_classes=1000) -> None:
        super(GoogLeNet, self).__init__()
        assert aux_logits == True or aux_logits == False
        self.aux_logits = aux_logits

        self.conv1 = Conv_Block(input_ch=3, output_ch=64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)
        self.conv2 = Conv_Block(input_ch=64, output_ch=64, kernel_size=1, stride=1, padding=0)
        self.conv3 = Conv_Block(input_ch=64, output_ch=192, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)
        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.dropout = nn.Dropout(p=0.4)
        self.linear = nn.Linear(1024, num_classes)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)
        else:
            self.aux1 = None
            self.aux2 = None

    def transform_input(self, x: Tensor) -> Tensor:
        x_R = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
        x_G = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
        x_B = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
        x = torch.cat([x_R, x_G, x_B], 1)
        return x

    def forward(self, x: Tensor) -> Tensor: # 순방향 전파 
        x = self.transform_input(x)

        x = self.conv1(x)    # conv filter 1
        x = self.maxpool1(x)
        x = self.conv2(x)    # conv filter 2
        x = self.conv3(x)    # conv filter 3
        x = self.maxpool2(x)
        x = self.a3(x)
        x = self.b3(x)
        x = self.maxpool3(x)
        x = self.a4(x)
        aux1: Optional[Tensor] = None
        if self.aux_logits and self.training:
            aux1 = self.aux1(x)

        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        aux2: Optional[Tensor] = None
        if self.aux_logits and self.training:
            aux2 = self.aux2(x)

        x = self.e4(x)
        x = self.maxpool4(x)
        x = self.a5(x)
        x = self.b5(x)
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1) # x = x.reshape(x.shape[0], -1)
        x = self.linear(x)
        x = self.dropout(x)

        if self.aux_logits and self.training:
            return aux1, aux2
        else:
            return x

Untitled

import os
import numpy as np 
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import argparse

def load_dataset():
    # preprocess
    transform = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load train, test data
    train = datasets.CIFAR10(root="../data", train=True, transform=transform, download=True)
    test = datasets.CIFAR10(root="../data", train=False, transform=transform, download=True)
    train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True)
    test_loader = DataLoader(test, batch_size=args.batch_size, shuffle=False)
    return train_loader, test_loader

if __name__ == "__main__":
    # set hyperparameter
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', action='store', type=int, default=100)
    parser.add_argument('--learning_rate', action='store', type=float, default='0.0002')
    parser.add_argument('--n_epochs', action='store', type=int, default=100)
    parser.add_argument('--plot', action='store', type=bool, default=True)
    args = parser.parse_args(args=[]) # Jupyter 환경이므로 다음과 같이 설정
    
    np.random.seed(1)
    seed = torch.manual_seed(1)

    # load dataset
    train_loader, test_loader = load_dataset()

    # model, loss, optimizer
    losses = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GoogLeNet(aux_logits=False, num_classes=10).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # train
    for epoch in range(args.n_epochs):
        model.train()
        train_loss = 0
        correct, count = 0, 0
        for batch_idx, (images, labels) in enumerate(train_loader, start=1):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, preds = torch.max(output, 1)
            count += labels.size(0)
            correct += preds.eq(labels).sum().item() # torch.sum(preds == labels)

            if batch_idx % 100 == 0:
                print (f"[*] Epoch: {epoch} \\tStep: {batch_idx}/{len(train_loader)}\\tTrain accuracy: {round((correct/count), 4)} \\tTrain Loss: {round((train_loss/count)*100, 4)}")
        
        # valid
        model.eval()
        correct, count = 0, 0
        valid_loss = 0
        with torch.no_grad():
            for batch_idx, (images, labels) in enumerate(test_loader, start=1):
                images, labels = images.to(device), labels.to(device)
                output = model.forward(images)
                loss = criterion(output, labels)
                valid_loss += loss.item()
                _, preds = torch.max(output, 1)
                count += labels.size(0)
                correct += preds.eq(labels).sum().item() # torch.sum(preds == labels)
                if batch_idx % 100 == 0:
                    print (f"[*] Step: {batch_idx}/{len(test_loader)}\\tValid accuracy: {round((correct/count), 4)} \\tValid Loss: {round((valid_loss/count)*100, 4)}")