In [ ]:
import torch
import torch.nn as nn

Stacking layer with nn.Sequential¶

In [ ]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_stack = nn.Sequential(nn.Linear(28*28, 512),
                                          nn.ReLU(),
                                          nn.Linear(512, 512),
                                          nn.ReLU(),
                                          nn.Linear(512, 10))

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_stack(x)
        return logits


model = SimpleNN()

Model's prediction of an instance¶

In [ ]:
x = torch.rand(28, 28, 1)
model(x.unsqueeze(axis=0))
Out[ ]:
tensor([[-0.0907,  0.0114, -0.0487,  0.0036, -0.0181, -0.0989,  0.0320, -0.0028,
         -0.1364, -0.0212]], grad_fn=<AddmmBackward0>)

Image classification with LeNet-5¶

lenet5

In [ ]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
In [ ]:
# Hyperparameters
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 5

# Classification
NUM_CLASSES = 10

CIFAR10 dataset¶

Classify (32x32x3) images into 10 classes

Here, train_dataset and val_dataset are (image, label) generators

In [ ]:
train_dataset = datasets.CIFAR10(root='data', 
                                 train=True, 
                                 transform=transforms.ToTensor(),
                                 download=True)

val_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor())
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
  0%|          | 0/170498071 [00:00<?, ?it/s]
Extracting data/cifar-10-python.tar.gz to data

Look at an image in the dataset

In [ ]:
image, label = train_dataset[1]

image.shape

plt.imshow(image.permute(1, 2, 0))
Out[ ]:
<matplotlib.image.AxesImage at 0x7f360c2df410>

Data augmentation¶

augmentation

In [ ]:
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomResizedCrop((32, 32), 
                                                                   scale=(0.7,0.8),
                                                                   ratio=(0.6,0.7)),
                                     transforms.ToTensor()
                                     ])

train_dataset = datasets.CIFAR10(root='data', 
                                 train=True, 
                                 transform=train_transform,
                                 download=True)

val_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor())
Files already downloaded and verified
In [ ]:
image, labels = train_dataset[0]
plt.imshow(image.permute(1,2,0))
Out[ ]:
<matplotlib.image.AxesImage at 0x7f360c28e250>

Use DataLoader to split the dataset into minibatches.

In [ ]:
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          num_workers=2,
                          shuffle=True)

val_loader = DataLoader(dataset=val_dataset, 
                         batch_size=BATCH_SIZE,
                         num_workers=2,
                         shuffle=False)

Check the size of each minibatch

In [ ]:
for images, labels in train_loader:
  print(images.shape)
  break
torch.Size([128, 3, 32, 32])

LeNet5¶

lenet5

In [ ]:
class LeNet5(nn.Module):

    def __init__(self, num_classes):
        super(LeNet5, self).__init__()

        self.num_classes = num_classes

        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6*3, kernel_size=5),
            nn.Dropout(0.2),
            nn.BatchNorm2d(6*3),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6*3, out_channels=16*3, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(16*5*5*3, 3*120),
            nn.Tanh(),
            nn.Linear(3*120, 3*84),
            nn.Tanh(),
            nn.Linear(3*84, self.num_classes)            
        )


    def forward(self, x):
      x = self.features(x)
      logits = self.classifier(x)
      return logits

model = LeNet5(NUM_CLASSES)

Apply the model on an image

In [ ]:
image, label = train_dataset[0]


model(image.unsqueeze(axis=0))
Out[ ]:
tensor([[-0.0706, -0.0291, -0.0192, -0.0557, -0.0180, -0.0098,  0.1197, -0.0266,
          0.0345,  0.0621]], grad_fn=<AddmmBackward0>)
In [ ]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  
In [ ]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
In [ ]:
for t in range(NUM_EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(val_loader, model, loss_fn)
print("Done!")
Epoch 1
-------------------------------
loss: 2.304136  [    0/50000]
loss: 1.645977  [12800/50000]
loss: 1.456666  [25600/50000]
loss: 1.367008  [38400/50000]
Test Error: 
 Accuracy: 49.2%, Avg loss: 1.393618 

Epoch 2
-------------------------------
loss: 1.412076  [    0/50000]
loss: 1.526403  [12800/50000]
loss: 1.273957  [25600/50000]
loss: 1.282038  [38400/50000]
Test Error: 
 Accuracy: 54.1%, Avg loss: 1.274829 

Epoch 3
-------------------------------
loss: 1.173802  [    0/50000]
loss: 1.162728  [12800/50000]
loss: 1.113539  [25600/50000]
loss: 1.081153  [38400/50000]
Test Error: 
 Accuracy: 55.9%, Avg loss: 1.235138 

Epoch 4
-------------------------------
loss: 0.921830  [    0/50000]
loss: 1.000973  [12800/50000]
loss: 0.991180  [25600/50000]
loss: 1.028961  [38400/50000]
Test Error: 
 Accuracy: 59.3%, Avg loss: 1.157810 

Epoch 5
-------------------------------
loss: 1.029751  [    0/50000]
loss: 1.001973  [12800/50000]
loss: 0.852175  [25600/50000]
loss: 0.875668  [38400/50000]
Test Error: 
 Accuracy: 61.0%, Avg loss: 1.106099 

Done!

Exercise 1¶

Make any model that can do more than 58% accuracy on validation set.

Also, add some dropout layers. Train for 5 epochs. Do you see some improvement over LeNet5?

In [ ]:
 

Exercise 2: Transfer Learning¶

There are many CNNs that have been trained on a bigger dataset than CIFAR10.

For example, here's a ResNet18 model, which has been trained on ImageNet dataset, which has 1000 classes.

resnet18

We can use model to train images in CIFAR10, but we have to

  1. Freeze the weights of all but the final layer.
  2. Change the output dimension of the final layer to 10.

Task: fill out the code in TODO below and train the model for 5 epochs. Do you see some improvement over LeNet5 and AlexNet?¶

In [ ]:
from torchvision.models import resnet18, ResNet18_Weights

# Load the pretrained model
# The model is already trained on a bigger dataset
# so there is no need to retrain the weights on our dataset
resnet = resnet18(weights=ResNet18_Weights.DEFAULT)

# We will freeze all the model's parameters
for param in resnet.parameters():
    param.requires_grad = False

 
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
  0%|          | 0.00/44.7M [00:00<?, ?B/s]
In [ ]:
resnet.fc
Out[ ]:
Linear(in_features=512, out_features=1000, bias=True)
In [ ]:
# input_dim is the input dimension in the last layer
input_dim = resnet.fc.in_features

# Originally, the output dimension of resnet18 is 1000
# Change the output dimension to 10
resnet.fc = # TODO: your code here

loss_fn = nn.CrossEntropyLoss()
# Only optimize the parameters in the last layer
optimizer = torch.optim.Adam(resnet.fc.parameters(), lr=LEARNING_RATE)
In [ ]:
for t in range(NUM_EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, resnet, loss_fn, optimizer)
    test_loop(val_loader, resnet, loss_fn)
print("Done!")
In [ ]: