First working status.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -6,4 +6,5 @@ __pycache__/
|
|||||||
*.synctex.gz
|
*.synctex.gz
|
||||||
*.synctex.gz(buzy)
|
*.synctex.gz(buzy)
|
||||||
*.out
|
*.out
|
||||||
*.pdf
|
*.pdf
|
||||||
|
.DS_Store
|
||||||
@@ -2,8 +2,8 @@
|
|||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 2 Convolutional Neural Network
|
# Homework 2 Convolutional Neural Network
|
||||||
# datasets.py - Define the data loader for the traffic sign classification dataset
|
# datasets.py - Define the data loader for the traffic sign classification dataset
|
||||||
# Student ID:
|
# Student ID: 2022010639
|
||||||
# Name:
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
@@ -11,7 +11,8 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torchvision.transforms as transforms
|
import torchvision.transforms.v2 as transforms
|
||||||
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torchvision.datasets import ImageFolder
|
from torchvision.datasets import ImageFolder
|
||||||
|
|
||||||
@@ -39,31 +40,38 @@ def get_data_loader(
|
|||||||
# (2) convert the images to PyTorch tensors
|
# (2) convert the images to PyTorch tensors
|
||||||
# (3) normalize the pixel values to [-1, 1]
|
# (3) normalize the pixel values to [-1, 1]
|
||||||
data_transforms = [
|
data_transforms = [
|
||||||
|
# transforms.ToImage(),
|
||||||
|
transforms.Resize(image_size),
|
||||||
|
transforms.ToImage(),
|
||||||
|
transforms.ToDtype(torch.float32, scale=True),
|
||||||
|
transforms.Normalize(mean=[-127.0, -127.0, -127.0], std=[128.0, 128.0, 128.0])
|
||||||
]
|
]
|
||||||
|
|
||||||
# You should insert some data augmentation techniques to `data_transforms` when `augment` is True
|
# You should insert some data augmentation techniques to `data_transforms` when `augment` is True
|
||||||
# for the training dataset.
|
# for the training dataset.
|
||||||
# Consider what is an appropriate data augmentation technique for traffic sign classification.
|
# Consider what is an appropriate data augmentation technique for traffic sign classification.
|
||||||
if mode == "train" and augment:
|
if mode == "train" and augment:
|
||||||
pass # TODO
|
# pass # TODO
|
||||||
|
data_transforms.append(transforms.AutoAugment())
|
||||||
# Else, the `data_transforms` should be left unchanged
|
# Else, the `data_transforms` should be left unchanged
|
||||||
# <<< TODO 1.1
|
# <<< TODO 1.1
|
||||||
# Use `transforms.Compose` to compose the list of transforms into a single transform
|
# Use `transforms.Compose` to compose the list of transforms into a single transform
|
||||||
data_transforms = transforms.Compose(data_transforms)
|
data_transforms = transforms.Compose(data_transforms)
|
||||||
|
|
||||||
|
print(type(data_transforms))
|
||||||
|
|
||||||
# >>> TODO 1.2: Define the dataset.
|
# >>> TODO 1.2: Define the dataset.
|
||||||
# You should build the path to the selected dataset according to the `mode` parameter,
|
# You should build the path to the selected dataset according to the `mode` parameter,
|
||||||
# and use the `ImageFolder` class from `torchvision.datasets` to load the datasets.
|
# and use the `ImageFolder` class from `torchvision.datasets` to load the datasets.
|
||||||
# Docs: https://pytorch.org/vision/stable/generated/torchvision.datasets.ImageFolder.html
|
# Docs: https://pytorch.org/vision/stable/generated/torchvision.datasets.ImageFolder.html
|
||||||
# The `ImageFolder` class takes in the path to the dataset and the transform to apply to the images.
|
# The `ImageFolder` class takes in the path to the dataset and the transform to apply to the images.
|
||||||
# The `ImageFolder` class will automatically load the images and labels for you.
|
# The `ImageFolder` class will automatically load the images and labels for you.
|
||||||
dataset = ?
|
dataset = ImageFolder(root=data_root + "/" + mode, transform=data_transforms)
|
||||||
# <<< TODO 1.2
|
# <<< TODO 1.2
|
||||||
|
|
||||||
# >>> TODO 1.3: Define the data loader.
|
# >>> TODO 1.3: Define the data loader.
|
||||||
# You should set the `shuffle` parameter to `True` when `mode=='train'`, and `False` otherwise.
|
# You should set the `shuffle` parameter to `True` when `mode=='train'`, and `False` otherwise.
|
||||||
loader = ?
|
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=(mode=='train'))
|
||||||
# <<< TODO 1.3
|
# <<< TODO 1.3
|
||||||
|
|
||||||
return loader
|
return loader
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 2 Convolutional Neural Network
|
# Homework 2 Convolutional Neural Network
|
||||||
# networks.py - Network definition
|
# networks.py - Network definition
|
||||||
# Student ID:
|
# Student ID: 2022010639
|
||||||
# Name:
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
@@ -49,14 +49,22 @@ class ConvBlock(nn.Module):
|
|||||||
# Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`
|
# Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`
|
||||||
# Network structure:
|
# Network structure:
|
||||||
# conv -> batchnorm -> relu
|
# conv -> batchnorm -> relu
|
||||||
self.conv = ?
|
self.conv = nn.Conv2d(
|
||||||
self.bn = ?
|
in_channels, out_channels, kernel_size, stride=stride, padding=padding
|
||||||
self.relu = ?
|
)
|
||||||
|
self.bn = bn2d(out_channels)
|
||||||
|
self.relu = nn.ReLU()
|
||||||
# <<< TODO 2.1
|
# <<< TODO 2.1
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
# >>> TODO 2.2: forward process
|
# >>> TODO 2.2: forward process
|
||||||
# Hint: apply residual connection if `self.use_residual` is True
|
# Hint: apply residual connection if `self.use_residual` is True
|
||||||
|
fx = self.relu(self.bn(self.conv(x)))
|
||||||
|
# out = self.relu(self.bn(self.conv(x)))
|
||||||
|
if self.use_residual:
|
||||||
|
out = fx + x
|
||||||
|
else:
|
||||||
|
out = fx
|
||||||
|
|
||||||
# <<< TODO 2.2
|
# <<< TODO 2.2
|
||||||
return out
|
return out
|
||||||
@@ -108,7 +116,38 @@ class Classifier(nn.Module):
|
|||||||
# dropout(p), where p is input parameter of dropout ratio
|
# dropout(p), where p is input parameter of dropout ratio
|
||||||
|
|
||||||
self.conv_net = nn.Sequential(
|
self.conv_net = nn.Sequential(
|
||||||
|
ConvBlock(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=32,
|
||||||
|
kernel_size=5,
|
||||||
|
stride=1,
|
||||||
|
padding=2,
|
||||||
|
),
|
||||||
|
ConvBlock(
|
||||||
|
in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2
|
||||||
|
),
|
||||||
|
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
|
||||||
|
ConvBlock(
|
||||||
|
in_channels=64,
|
||||||
|
out_channels=64,
|
||||||
|
kernel_size=3,
|
||||||
|
stride=1,
|
||||||
|
padding=1,
|
||||||
|
use_residual=True,
|
||||||
|
),
|
||||||
|
ConvBlock(
|
||||||
|
in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1
|
||||||
|
),
|
||||||
|
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
|
||||||
|
ConvBlock(
|
||||||
|
in_channels=128,
|
||||||
|
out_channels=128,
|
||||||
|
kernel_size=3,
|
||||||
|
stride=1,
|
||||||
|
padding=1,
|
||||||
|
use_residual=True,
|
||||||
|
),
|
||||||
|
nn.Dropout2d(p=dropout_prob),
|
||||||
)
|
)
|
||||||
# <<< TODO 3.1
|
# <<< TODO 3.1
|
||||||
|
|
||||||
@@ -125,7 +164,11 @@ class Classifier(nn.Module):
|
|||||||
# dropout(p), where p is input parameter of dropout ratio
|
# dropout(p), where p is input parameter of dropout ratio
|
||||||
# linear num_classes
|
# linear num_classes
|
||||||
self.fc_net = nn.Sequential(
|
self.fc_net = nn.Sequential(
|
||||||
|
nn.Linear(2048, 256),
|
||||||
|
nn.ReLU(),
|
||||||
|
bn1d(256),
|
||||||
|
nn.Dropout1d(dropout_prob),
|
||||||
|
nn.Linear(256, num_classes),
|
||||||
)
|
)
|
||||||
# <<< TODO 3.2
|
# <<< TODO 3.2
|
||||||
|
|
||||||
@@ -140,12 +183,14 @@ class Classifier(nn.Module):
|
|||||||
|
|
||||||
# >>> TODO 3.3: forward process
|
# >>> TODO 3.3: forward process
|
||||||
# Step 2: forward process for the convolutional network
|
# Step 2: forward process for the convolutional network
|
||||||
|
x = self.conv_net(x)
|
||||||
|
|
||||||
# Step 3: use `Tensor.view()` to flatten the tensor to match the size of the input of the
|
# Step 3: use `Tensor.view()` to flatten the tensor to match the size of the input of the
|
||||||
# fully connected layers.
|
# fully connected layers.
|
||||||
|
x = x.view(-1, 2048)
|
||||||
|
|
||||||
# Step 4: forward process for the fully connected network
|
# Step 4: forward process for the fully connected network
|
||||||
|
out = self.fc_net(x)
|
||||||
# <<< TODO 3.3
|
# <<< TODO 3.3
|
||||||
|
|
||||||
return out
|
return out
|
||||||
@@ -184,7 +229,10 @@ class STN(nn.Module):
|
|||||||
# this network.
|
# this network.
|
||||||
# Suggested structure: 3 down-sampling convolutional layers with doubling output channels, using BN and ReLU.
|
# Suggested structure: 3 down-sampling convolutional layers with doubling output channels, using BN and ReLU.
|
||||||
self.localization_conv = nn.Sequential(
|
self.localization_conv = nn.Sequential(
|
||||||
|
ConvBlock(in_channels=in_channels, out_channels=8, kernel_size=3, stride=2, padding=1, use_batch_norm=True),
|
||||||
|
ConvBlock(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1, use_batch_norm=True),
|
||||||
|
ConvBlock(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1, use_batch_norm=True),
|
||||||
|
# 32 * 4 * 4
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 2: Build a fully connected network to predict the parameters of affine transformation from
|
# Step 2: Build a fully connected network to predict the parameters of affine transformation from
|
||||||
@@ -192,7 +240,10 @@ class STN(nn.Module):
|
|||||||
# Hint: Combine linear layers and ReLU activation functions to build this network.
|
# Hint: Combine linear layers and ReLU activation functions to build this network.
|
||||||
# Suggested structure: 2 linear layers with one BN and ReLU.
|
# Suggested structure: 2 linear layers with one BN and ReLU.
|
||||||
self.localization_fc = nn.Sequential(
|
self.localization_fc = nn.Sequential(
|
||||||
|
nn.Linear(16, 256),
|
||||||
|
nn.Linear(256, 360),
|
||||||
|
nn.BatchNorm1d(360),
|
||||||
|
nn.ReLU()
|
||||||
)
|
)
|
||||||
# <<< TODO 4.1
|
# <<< TODO 4.1
|
||||||
|
|
||||||
@@ -200,7 +251,7 @@ class STN(nn.Module):
|
|||||||
# Hint: The STN should generate the identity transformation by default before training.
|
# Hint: The STN should generate the identity transformation by default before training.
|
||||||
# How to initialize the weight/bias of the last linear layer of the fully connected network to
|
# How to initialize the weight/bias of the last linear layer of the fully connected network to
|
||||||
# achieve this goal?
|
# achieve this goal?
|
||||||
|
nn.init.zeros_(self.localization_fc[1].weight)
|
||||||
# <<< TODO 4.2
|
# <<< TODO 4.2
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
|||||||
Reference in New Issue
Block a user