import torch
import torch.nn as nn
nn.Sequential
¶class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()
self.flatten = nn.Flatten()
self.linear_stack = nn.Sequential(nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10))
def forward(self, x):
x = self.flatten(x)
logits = self.linear_stack(x)
return logits
model = SimpleNN()
x = torch.rand(28, 28, 1)
model(x.unsqueeze(axis=0))
tensor([[-0.0907, 0.0114, -0.0487, 0.0036, -0.0181, -0.0989, 0.0320, -0.0028, -0.1364, -0.0212]], grad_fn=<AddmmBackward0>)
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
# Hyperparameters
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 5
# Classification
NUM_CLASSES = 10
Classify (32x32x3) images into 10 classes
Here, train_dataset
and val_dataset
are (image, label)
generators
train_dataset = datasets.CIFAR10(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
val_dataset = datasets.CIFAR10(root='data',
train=False,
transform=transforms.ToTensor())
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
0%| | 0/170498071 [00:00<?, ?it/s]
Extracting data/cifar-10-python.tar.gz to data
Look at an image in the dataset
image, label = train_dataset[1]
image.shape
plt.imshow(image.permute(1, 2, 0))
<matplotlib.image.AxesImage at 0x7f360c2df410>
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.RandomResizedCrop((32, 32),
scale=(0.7,0.8),
ratio=(0.6,0.7)),
transforms.ToTensor()
])
train_dataset = datasets.CIFAR10(root='data',
train=True,
transform=train_transform,
download=True)
val_dataset = datasets.CIFAR10(root='data',
train=False,
transform=transforms.ToTensor())
Files already downloaded and verified
image, labels = train_dataset[0]
plt.imshow(image.permute(1,2,0))
<matplotlib.image.AxesImage at 0x7f360c28e250>
Use DataLoader
to split the dataset into minibatches.
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
num_workers=2,
shuffle=True)
val_loader = DataLoader(dataset=val_dataset,
batch_size=BATCH_SIZE,
num_workers=2,
shuffle=False)
Check the size of each minibatch
for images, labels in train_loader:
print(images.shape)
break
torch.Size([128, 3, 32, 32])
class LeNet5(nn.Module):
def __init__(self, num_classes):
super(LeNet5, self).__init__()
self.num_classes = num_classes
self.features = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=6*3, kernel_size=5),
nn.Dropout(0.2),
nn.BatchNorm2d(6*3),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=6*3, out_channels=16*3, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(16*5*5*3, 3*120),
nn.Tanh(),
nn.Linear(3*120, 3*84),
nn.Tanh(),
nn.Linear(3*84, self.num_classes)
)
def forward(self, x):
x = self.features(x)
logits = self.classifier(x)
return logits
model = LeNet5(NUM_CLASSES)
Apply the model on an image
image, label = train_dataset[0]
model(image.unsqueeze(axis=0))
tensor([[-0.0706, -0.0291, -0.0192, -0.0557, -0.0180, -0.0098, 0.1197, -0.0266, 0.0345, 0.0621]], grad_fn=<AddmmBackward0>)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(NUM_EPOCHS):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_loader, model, loss_fn, optimizer)
test_loop(val_loader, model, loss_fn)
print("Done!")
Epoch 1 ------------------------------- loss: 2.304136 [ 0/50000] loss: 1.645977 [12800/50000] loss: 1.456666 [25600/50000] loss: 1.367008 [38400/50000] Test Error: Accuracy: 49.2%, Avg loss: 1.393618 Epoch 2 ------------------------------- loss: 1.412076 [ 0/50000] loss: 1.526403 [12800/50000] loss: 1.273957 [25600/50000] loss: 1.282038 [38400/50000] Test Error: Accuracy: 54.1%, Avg loss: 1.274829 Epoch 3 ------------------------------- loss: 1.173802 [ 0/50000] loss: 1.162728 [12800/50000] loss: 1.113539 [25600/50000] loss: 1.081153 [38400/50000] Test Error: Accuracy: 55.9%, Avg loss: 1.235138 Epoch 4 ------------------------------- loss: 0.921830 [ 0/50000] loss: 1.000973 [12800/50000] loss: 0.991180 [25600/50000] loss: 1.028961 [38400/50000] Test Error: Accuracy: 59.3%, Avg loss: 1.157810 Epoch 5 ------------------------------- loss: 1.029751 [ 0/50000] loss: 1.001973 [12800/50000] loss: 0.852175 [25600/50000] loss: 0.875668 [38400/50000] Test Error: Accuracy: 61.0%, Avg loss: 1.106099 Done!
Make any model that can do more than 58% accuracy on validation set.
Also, add some dropout layers. Train for 5 epochs. Do you see some improvement over LeNet5?
There are many CNNs that have been trained on a bigger dataset than CIFAR10.
For example, here's a ResNet18 model, which has been trained on ImageNet dataset, which has 1000 classes.
We can use model to train images in CIFAR10, but we have to
TODO
below and train the model for 5 epochs. Do you see some improvement over LeNet5 and AlexNet?¶from torchvision.models import resnet18, ResNet18_Weights
# Load the pretrained model
# The model is already trained on a bigger dataset
# so there is no need to retrain the weights on our dataset
resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
# We will freeze all the model's parameters
for param in resnet.parameters():
param.requires_grad = False
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
0%| | 0.00/44.7M [00:00<?, ?B/s]
resnet.fc
Linear(in_features=512, out_features=1000, bias=True)
# input_dim is the input dimension in the last layer
input_dim = resnet.fc.in_features
# Originally, the output dimension of resnet18 is 1000
# Change the output dimension to 10
resnet.fc = # TODO: your code here
loss_fn = nn.CrossEntropyLoss()
# Only optimize the parameters in the last layer
optimizer = torch.optim.Adam(resnet.fc.parameters(), lr=LEARNING_RATE)
for t in range(NUM_EPOCHS):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_loader, resnet, loss_fn, optimizer)
test_loop(val_loader, resnet, loss_fn)
print("Done!")