Mac Sync
This commit is contained in:
@@ -1,41 +1,41 @@
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# check.py - Check your implementation of several modules
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
from svm_hw import SVM_HINGE, LinearFunction, Hinge
|
||||
import torch
|
||||
from torch.autograd import gradcheck
|
||||
|
||||
|
||||
def run():
|
||||
model = SVM_HINGE(2, C=1.0).double()
|
||||
x = torch.randn(50, 2, requires_grad=False).double()
|
||||
W = torch.randn(1, 2, requires_grad=True).double()
|
||||
b = torch.zeros(1, requires_grad=True).double()
|
||||
test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
|
||||
if test:
|
||||
print('Linear successully tested!')
|
||||
output = torch.randn(50, 1, requires_grad=True).double()
|
||||
W = torch.randn(1, 2, requires_grad=True).double()
|
||||
labels = torch.ones(1, requires_grad=False).double()
|
||||
C = torch.tensor([[1.0]], requires_grad=False).double()
|
||||
test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
|
||||
if test:
|
||||
print('Hinge successfully tested!')
|
||||
x = torch.randn(50, 2, requires_grad=False).double()
|
||||
labels = torch.ones(50, requires_grad=False).double()
|
||||
try:
|
||||
output, loss = model(x, labels)
|
||||
assert model.W.requires_grad is True
|
||||
assert model.b.requires_grad is True
|
||||
print('SVM_HINGE successfully tested!')
|
||||
except:
|
||||
raise Exception('Failed testing SVM_HINGE!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# check.py - Check your implementation of several modules
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
from svm_hw import SVM_HINGE, LinearFunction, Hinge
|
||||
import torch
|
||||
from torch.autograd import gradcheck
|
||||
|
||||
|
||||
def run():
|
||||
model = SVM_HINGE(2, C=1.0).double()
|
||||
x = torch.randn(50, 2, requires_grad=False).double()
|
||||
W = torch.randn(1, 2, requires_grad=True).double()
|
||||
b = torch.zeros(1, requires_grad=True).double()
|
||||
test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
|
||||
if test:
|
||||
print('Linear successully tested!')
|
||||
output = torch.randn(50, 1, requires_grad=True).double()
|
||||
W = torch.randn(1, 2, requires_grad=True).double()
|
||||
labels = torch.ones(1, requires_grad=False).double()
|
||||
C = torch.tensor([[1.0]], requires_grad=False).double()
|
||||
test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
|
||||
if test:
|
||||
print('Hinge successfully tested!')
|
||||
x = torch.randn(50, 2, requires_grad=False).double()
|
||||
labels = torch.ones(50, requires_grad=False).double()
|
||||
try:
|
||||
output, loss = model(x, labels)
|
||||
assert model.W.requires_grad is True
|
||||
assert model.b.requires_grad is True
|
||||
print('SVM_HINGE successfully tested!')
|
||||
except:
|
||||
raise Exception('Failed testing SVM_HINGE!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
@@ -1,178 +1,178 @@
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
|
||||
# and using PCA for dimensionality reduction
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
import os
|
||||
import torchvision.transforms as transforms
|
||||
import torch
|
||||
from PIL import Image
|
||||
from networks import Classifier
|
||||
import matplotlib.pyplot as plt
|
||||
import argparse
|
||||
|
||||
|
||||
def preprocess(pre_conv, data_root, image_size, classes):
|
||||
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
|
||||
|
||||
# =============== process training dataset ======================
|
||||
print("Start preprocessing the training dataset !!!")
|
||||
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
|
||||
|
||||
# calculate the mean and PCA projection matrix
|
||||
data_mean, u = PCA(train_data, 2)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
|
||||
train_data_pca = ???
|
||||
|
||||
visualize(train_data_pca, train_label, "train")
|
||||
savedata(train_data_pca, train_label, data_root+"/train.pt")
|
||||
print("training dataset saved !!!")
|
||||
|
||||
# =============== process validation dataset ======================
|
||||
print("Start preprocessing the validation dataset!!!")
|
||||
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
|
||||
val_data_pca = ???
|
||||
|
||||
visualize(val_data_pca, val_label, "val")
|
||||
savedata(val_data_pca, val_label, data_root+"/val.pt")
|
||||
print("validation dataset saved !!!")
|
||||
|
||||
# =============== process testing dataset ======================
|
||||
print("Start preprocessing the testing dataset!!!")
|
||||
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
|
||||
test_data_pca = ???
|
||||
|
||||
visualize(test_data_pca, test_label, "test")
|
||||
savedata(test_data_pca, test_label, data_root+"/test.pt")
|
||||
print("testing dataset saved !!!")
|
||||
|
||||
|
||||
def savedata(data, label, save_path):
|
||||
save_dict = {
|
||||
'data': data,
|
||||
'label': label
|
||||
}
|
||||
torch.save(save_dict, save_path)
|
||||
|
||||
|
||||
def visualize(datas, labels, mode):
|
||||
"""
|
||||
Display feature points after dimensionality reduction
|
||||
-------------------------------
|
||||
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
|
||||
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
|
||||
:param mode: chosen from {'train', 'val', 'test'}
|
||||
:return:
|
||||
"""
|
||||
plt.figure()
|
||||
for idx in range(datas.shape[1]):
|
||||
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
|
||||
plt.legend()
|
||||
plt.title(mode)
|
||||
plt.show()
|
||||
|
||||
|
||||
def PCA(data, dim=2):
|
||||
"""
|
||||
calculate the mean value of the data and the projection matrix for PCA
|
||||
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
|
||||
:param dim: the data dimension after projection
|
||||
:return:
|
||||
data_mean: the mean value of the data
|
||||
u: the projection matrix for PCA, with the shape of [2048, dim]
|
||||
"""
|
||||
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
|
||||
|
||||
# TODO: compute the mean of train_data
|
||||
data_mean = ???
|
||||
# TODO: compute the covariance matrix of train_data
|
||||
data_cov = ???
|
||||
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
|
||||
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
|
||||
???
|
||||
# TODO: return the proper 'data_mean' and 'u[]'
|
||||
return ???
|
||||
|
||||
|
||||
def loaddata(pre_conv, data_root, mode, image_size, classes):
|
||||
"""
|
||||
load one dataset, and use pretrained network in homework 2 to extract feature
|
||||
:param pre_conv: pretrained network in homework 2
|
||||
:param data_root: the path of the dataset
|
||||
:param mode: chosen from {'train', 'val', 'test'}
|
||||
:param image_size: the preset size that each image try to zoom to
|
||||
:param classes: two classes that need to be classified
|
||||
:return:
|
||||
datas: the samples of extracted features with the shape of [N, 2048]
|
||||
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
|
||||
"""
|
||||
assert len(classes) == 2
|
||||
datas = []
|
||||
labels = []
|
||||
for idx in range(len(classes)):
|
||||
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
|
||||
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
|
||||
label = 2 * idx - 1
|
||||
datas.append(data)
|
||||
labels.append(label)
|
||||
return torch.stack(datas), torch.tensor(labels)
|
||||
|
||||
|
||||
def readimg(pre_conv, filepath, image_size):
|
||||
"""
|
||||
Read one image and use pretrained network to extract the feature
|
||||
--------------------------
|
||||
:param pre_conv: pretrained network in homework 2
|
||||
:param filepath: the file path of one image
|
||||
:param image_size: the preset size that each image try to zoom to
|
||||
:return:
|
||||
data: the extracted feature with the length of 2048
|
||||
"""
|
||||
img_pil = Image.open(filepath).convert('RGB')
|
||||
img_pil = img_pil.resize(image_size)
|
||||
img_transform = transforms.Compose([transforms.ToTensor(),
|
||||
transforms.Normalize(0.5, 0.5),
|
||||
])
|
||||
img_tensor = img_transform(img_pil)
|
||||
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
|
||||
help="the filepath of the pretrained network in homework 2")
|
||||
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
|
||||
parser.add_argument("--image_size", type=tuple, default=(32, 32),
|
||||
help="the preset size that each image try to zoom to")
|
||||
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
|
||||
configs = pretrained_checkpoint["configs"]
|
||||
cls = Classifier(
|
||||
configs["in_channels"],
|
||||
configs["num_classes"],
|
||||
configs["use_batch_norm"],
|
||||
configs["use_stn"],
|
||||
configs["dropout_prob"],
|
||||
)
|
||||
cls.load_state_dict(pretrained_checkpoint["model_state"])
|
||||
for param in cls.parameters():
|
||||
param.requires_grad = False
|
||||
conv = cls.conv_net
|
||||
|
||||
preprocess(conv, args.data_root, args.image_size, args.classes)
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
|
||||
# and using PCA for dimensionality reduction
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
import os
|
||||
import torchvision.transforms as transforms
|
||||
import torch
|
||||
from PIL import Image
|
||||
from networks import Classifier
|
||||
import matplotlib.pyplot as plt
|
||||
import argparse
|
||||
|
||||
|
||||
def preprocess(pre_conv, data_root, image_size, classes):
|
||||
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
|
||||
|
||||
# =============== process training dataset ======================
|
||||
print("Start preprocessing the training dataset !!!")
|
||||
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
|
||||
|
||||
# calculate the mean and PCA projection matrix
|
||||
data_mean, u = PCA(train_data, 2)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
|
||||
train_data_pca = ???
|
||||
|
||||
visualize(train_data_pca, train_label, "train")
|
||||
savedata(train_data_pca, train_label, data_root+"/train.pt")
|
||||
print("training dataset saved !!!")
|
||||
|
||||
# =============== process validation dataset ======================
|
||||
print("Start preprocessing the validation dataset!!!")
|
||||
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
|
||||
val_data_pca = ???
|
||||
|
||||
visualize(val_data_pca, val_label, "val")
|
||||
savedata(val_data_pca, val_label, data_root+"/val.pt")
|
||||
print("validation dataset saved !!!")
|
||||
|
||||
# =============== process testing dataset ======================
|
||||
print("Start preprocessing the testing dataset!!!")
|
||||
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
|
||||
|
||||
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
|
||||
test_data_pca = ???
|
||||
|
||||
visualize(test_data_pca, test_label, "test")
|
||||
savedata(test_data_pca, test_label, data_root+"/test.pt")
|
||||
print("testing dataset saved !!!")
|
||||
|
||||
|
||||
def savedata(data, label, save_path):
|
||||
save_dict = {
|
||||
'data': data,
|
||||
'label': label
|
||||
}
|
||||
torch.save(save_dict, save_path)
|
||||
|
||||
|
||||
def visualize(datas, labels, mode):
|
||||
"""
|
||||
Display feature points after dimensionality reduction
|
||||
-------------------------------
|
||||
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
|
||||
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
|
||||
:param mode: chosen from {'train', 'val', 'test'}
|
||||
:return:
|
||||
"""
|
||||
plt.figure()
|
||||
for idx in range(datas.shape[1]):
|
||||
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
|
||||
plt.legend()
|
||||
plt.title(mode)
|
||||
plt.show()
|
||||
|
||||
|
||||
def PCA(data, dim=2):
|
||||
"""
|
||||
calculate the mean value of the data and the projection matrix for PCA
|
||||
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
|
||||
:param dim: the data dimension after projection
|
||||
:return:
|
||||
data_mean: the mean value of the data
|
||||
u: the projection matrix for PCA, with the shape of [2048, dim]
|
||||
"""
|
||||
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
|
||||
|
||||
# TODO: compute the mean of train_data
|
||||
data_mean = ???
|
||||
# TODO: compute the covariance matrix of train_data
|
||||
data_cov = ???
|
||||
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
|
||||
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
|
||||
???
|
||||
# TODO: return the proper 'data_mean' and 'u[]'
|
||||
return ???
|
||||
|
||||
|
||||
def loaddata(pre_conv, data_root, mode, image_size, classes):
|
||||
"""
|
||||
load one dataset, and use pretrained network in homework 2 to extract feature
|
||||
:param pre_conv: pretrained network in homework 2
|
||||
:param data_root: the path of the dataset
|
||||
:param mode: chosen from {'train', 'val', 'test'}
|
||||
:param image_size: the preset size that each image try to zoom to
|
||||
:param classes: two classes that need to be classified
|
||||
:return:
|
||||
datas: the samples of extracted features with the shape of [N, 2048]
|
||||
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
|
||||
"""
|
||||
assert len(classes) == 2
|
||||
datas = []
|
||||
labels = []
|
||||
for idx in range(len(classes)):
|
||||
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
|
||||
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
|
||||
label = 2 * idx - 1
|
||||
datas.append(data)
|
||||
labels.append(label)
|
||||
return torch.stack(datas), torch.tensor(labels)
|
||||
|
||||
|
||||
def readimg(pre_conv, filepath, image_size):
|
||||
"""
|
||||
Read one image and use pretrained network to extract the feature
|
||||
--------------------------
|
||||
:param pre_conv: pretrained network in homework 2
|
||||
:param filepath: the file path of one image
|
||||
:param image_size: the preset size that each image try to zoom to
|
||||
:return:
|
||||
data: the extracted feature with the length of 2048
|
||||
"""
|
||||
img_pil = Image.open(filepath).convert('RGB')
|
||||
img_pil = img_pil.resize(image_size)
|
||||
img_transform = transforms.Compose([transforms.ToTensor(),
|
||||
transforms.Normalize(0.5, 0.5),
|
||||
])
|
||||
img_tensor = img_transform(img_pil)
|
||||
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
|
||||
help="the filepath of the pretrained network in homework 2")
|
||||
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
|
||||
parser.add_argument("--image_size", type=tuple, default=(32, 32),
|
||||
help="the preset size that each image try to zoom to")
|
||||
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
|
||||
configs = pretrained_checkpoint["configs"]
|
||||
cls = Classifier(
|
||||
configs["in_channels"],
|
||||
configs["num_classes"],
|
||||
configs["use_batch_norm"],
|
||||
configs["use_stn"],
|
||||
configs["dropout_prob"],
|
||||
)
|
||||
cls.load_state_dict(pretrained_checkpoint["model_state"])
|
||||
for param in cls.parameters():
|
||||
param.requires_grad = False
|
||||
conv = cls.conv_net
|
||||
|
||||
preprocess(conv, args.data_root, args.image_size, args.classes)
|
||||
|
||||
@@ -1,139 +1,139 @@
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# svm_hw.py - The implementation of SVM using hinge loss
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
# TODO 1: complete the forward and backward propagation processes of the linear layer
|
||||
class LinearFunction(torch.autograd.Function):
|
||||
'''
|
||||
we will implement the linear function:
|
||||
y = xW^T + b
|
||||
as well as its gradient computation process
|
||||
'''
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, x, W, b):
|
||||
'''
|
||||
Input:
|
||||
:param ctx: a context object that can be used to stash information for backward computation
|
||||
:param x: input features with size [batch_size, input_size]
|
||||
:param W: weight matrix with size [output_size, input_size]
|
||||
:param b: bias with size [output_size]
|
||||
Return:
|
||||
y :output features with size [batch_size, output_size]
|
||||
'''
|
||||
|
||||
# TODO
|
||||
y = ???
|
||||
ctx.save_for_backward(x, W)
|
||||
|
||||
return y
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
'''
|
||||
Input:
|
||||
:param ctx: a context object with saved variables
|
||||
:param grad_output: dL/dy, with size [batch_size, output_size]
|
||||
Return:
|
||||
grad_input: dL/dx, with size [batch_size, input_size]
|
||||
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
||||
grad_b: dL/db, with size [output_size], summed for data in the batch
|
||||
'''
|
||||
|
||||
x, W = ctx.saved_variables
|
||||
|
||||
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
||||
# calculate dL/dW by using dL/dy (grad_output) and x
|
||||
# calculate dL/db using dL/dy (grad_output)
|
||||
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
||||
|
||||
# TODO
|
||||
grad_input = ???
|
||||
grad_W = ???
|
||||
grad_b = ???
|
||||
|
||||
return grad_input, grad_W, grad_b
|
||||
|
||||
|
||||
# TODO 2: complete the forward and backward propagation processes of the hinge loss
|
||||
class Hinge(torch.autograd.Function):
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, output, W, label, C):
|
||||
"""
|
||||
Compute the hinge loss
|
||||
--------------------------------------
|
||||
:param ctx: a context object that can be used to stash information for backward computation
|
||||
:param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
|
||||
:param W: weight matrix with size [1, input_size]
|
||||
:param label: the ground truth y in the equation for loss calculation, with size [batch_size]
|
||||
:param C: the regularization coefficient of hinge loss with size [1, 1]
|
||||
:return: the hinge loss with size [1, 1]
|
||||
"""
|
||||
C = C.type_as(W)
|
||||
|
||||
# TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
|
||||
# you may need F.relu() to implement the max() function.
|
||||
loss = ???
|
||||
ctx.save_for_backward(output, W, label, C)
|
||||
|
||||
return loss
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_loss):
|
||||
"""
|
||||
Compute the gradient of hinge loss
|
||||
:param ctx: a context object with saved variables
|
||||
:param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
|
||||
:return:
|
||||
grad_output: dL/doutput, with size [batch_size, 1]
|
||||
grad_W: dL/dW, with size [1, channels]
|
||||
"""
|
||||
output, W, label, C = ctx.saved_tensors
|
||||
# TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
|
||||
grad_output = ???
|
||||
grad_W = ???
|
||||
return grad_output, grad_W, None, None
|
||||
|
||||
|
||||
# TODO 3: complete the structure of SVM model
|
||||
class SVM_HINGE(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, C):
|
||||
"""
|
||||
:param in_channels: number of feature channels for SVM input
|
||||
:param C: regularization coefficient of hinge loss with size [1, 1]
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
# TODO: define the parameters W and b
|
||||
"""
|
||||
the shape of W should be [1, channels] and the shape of b should be [1, ]
|
||||
you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
|
||||
please use torch.randn() to initialize W and b
|
||||
"""
|
||||
|
||||
self.W = ???
|
||||
self.b = ???
|
||||
self.C = torch.tensor([[C]], requires_grad=False)
|
||||
|
||||
def forward(self, x, label=None):
|
||||
# SVM calculation
|
||||
output = LinearFunction.apply(x, self.W, self.b)
|
||||
if label is not None:
|
||||
loss = Hinge.apply(output, self.W, label, self.C)
|
||||
else:
|
||||
loss = None
|
||||
output = (output > 0.0).type_as(x) * 2.0 - 1.0
|
||||
return output, loss
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# svm_hw.py - The implementation of SVM using hinge loss
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
# TODO 1: complete the forward and backward propagation processes of the linear layer
|
||||
class LinearFunction(torch.autograd.Function):
|
||||
'''
|
||||
we will implement the linear function:
|
||||
y = xW^T + b
|
||||
as well as its gradient computation process
|
||||
'''
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, x, W, b):
|
||||
'''
|
||||
Input:
|
||||
:param ctx: a context object that can be used to stash information for backward computation
|
||||
:param x: input features with size [batch_size, input_size]
|
||||
:param W: weight matrix with size [output_size, input_size]
|
||||
:param b: bias with size [output_size]
|
||||
Return:
|
||||
y :output features with size [batch_size, output_size]
|
||||
'''
|
||||
|
||||
# TODO
|
||||
y = ???
|
||||
ctx.save_for_backward(x, W)
|
||||
|
||||
return y
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
'''
|
||||
Input:
|
||||
:param ctx: a context object with saved variables
|
||||
:param grad_output: dL/dy, with size [batch_size, output_size]
|
||||
Return:
|
||||
grad_input: dL/dx, with size [batch_size, input_size]
|
||||
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
||||
grad_b: dL/db, with size [output_size], summed for data in the batch
|
||||
'''
|
||||
|
||||
x, W = ctx.saved_variables
|
||||
|
||||
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
||||
# calculate dL/dW by using dL/dy (grad_output) and x
|
||||
# calculate dL/db using dL/dy (grad_output)
|
||||
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
||||
|
||||
# TODO
|
||||
grad_input = ???
|
||||
grad_W = ???
|
||||
grad_b = ???
|
||||
|
||||
return grad_input, grad_W, grad_b
|
||||
|
||||
|
||||
# TODO 2: complete the forward and backward propagation processes of the hinge loss
|
||||
class Hinge(torch.autograd.Function):
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, output, W, label, C):
|
||||
"""
|
||||
Compute the hinge loss
|
||||
--------------------------------------
|
||||
:param ctx: a context object that can be used to stash information for backward computation
|
||||
:param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
|
||||
:param W: weight matrix with size [1, input_size]
|
||||
:param label: the ground truth y in the equation for loss calculation, with size [batch_size]
|
||||
:param C: the regularization coefficient of hinge loss with size [1, 1]
|
||||
:return: the hinge loss with size [1, 1]
|
||||
"""
|
||||
C = C.type_as(W)
|
||||
|
||||
# TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
|
||||
# you may need F.relu() to implement the max() function.
|
||||
loss = ???
|
||||
ctx.save_for_backward(output, W, label, C)
|
||||
|
||||
return loss
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_loss):
|
||||
"""
|
||||
Compute the gradient of hinge loss
|
||||
:param ctx: a context object with saved variables
|
||||
:param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
|
||||
:return:
|
||||
grad_output: dL/doutput, with size [batch_size, 1]
|
||||
grad_W: dL/dW, with size [1, channels]
|
||||
"""
|
||||
output, W, label, C = ctx.saved_tensors
|
||||
# TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
|
||||
grad_output = ???
|
||||
grad_W = ???
|
||||
return grad_output, grad_W, None, None
|
||||
|
||||
|
||||
# TODO 3: complete the structure of SVM model
|
||||
class SVM_HINGE(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, C):
|
||||
"""
|
||||
:param in_channels: number of feature channels for SVM input
|
||||
:param C: regularization coefficient of hinge loss with size [1, 1]
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
# TODO: define the parameters W and b
|
||||
"""
|
||||
the shape of W should be [1, channels] and the shape of b should be [1, ]
|
||||
you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
|
||||
please use torch.randn() to initialize W and b
|
||||
"""
|
||||
|
||||
self.W = ???
|
||||
self.b = ???
|
||||
self.C = torch.tensor([[C]], requires_grad=False)
|
||||
|
||||
def forward(self, x, label=None):
|
||||
# SVM calculation
|
||||
output = LinearFunction.apply(x, self.W, self.b)
|
||||
if label is not None:
|
||||
loss = Hinge.apply(output, self.W, label, self.C)
|
||||
else:
|
||||
loss = None
|
||||
output = (output > 0.0).type_as(x) * 2.0 - 1.0
|
||||
return output, loss
|
||||
|
||||
@@ -1,106 +1,106 @@
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# test_svm.py - Test svm model for traffic sign
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
# ==== Part 1: import libs
|
||||
import argparse
|
||||
import torch
|
||||
from datasets import Traffic_Dataset
|
||||
from svm_hw import SVM_HINGE
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
# ==== Part 2: testing
|
||||
def test(
|
||||
data_root,
|
||||
model_save_path,
|
||||
device,
|
||||
):
|
||||
"""
|
||||
The main testing procedure of SVM model
|
||||
----------------------------
|
||||
:param data_root: path to the root directory of dataset
|
||||
:param model_save_path: path to pretrained SVM model
|
||||
:param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||
"""
|
||||
|
||||
# TODO 1: =================== load the pretrained SVM model ==================================
|
||||
|
||||
# TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
|
||||
test_data = ???
|
||||
test_loader = ???
|
||||
|
||||
# TODO: load state dictionary of pretrained SVM model
|
||||
model_svm = ???
|
||||
|
||||
# TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
|
||||
svm = ???
|
||||
|
||||
# TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
|
||||
???
|
||||
|
||||
# TODO: put the model on CPU or GPU
|
||||
???
|
||||
|
||||
# TODO 2 : ================================ testing ==============================================
|
||||
|
||||
# TODO: set the model in evaluation mode
|
||||
???
|
||||
|
||||
# to calculate and save the testing accuracy
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||
# TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO:sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# show prediction accuracy
|
||||
acc = 100 * n_correct / n_feas
|
||||
print('Test accuracy = {:.1f}%'.format(acc))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# set configurations of the testing process
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
|
||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.device is None:
|
||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# run the testing procedure
|
||||
test(
|
||||
data_root=args.data_root,
|
||||
model_save_path=args.model_save_path,
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# test_svm.py - Test svm model for traffic sign
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
# ==== Part 1: import libs
|
||||
import argparse
|
||||
import torch
|
||||
from datasets import Traffic_Dataset
|
||||
from svm_hw import SVM_HINGE
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
# ==== Part 2: testing
|
||||
def test(
|
||||
data_root,
|
||||
model_save_path,
|
||||
device,
|
||||
):
|
||||
"""
|
||||
The main testing procedure of SVM model
|
||||
----------------------------
|
||||
:param data_root: path to the root directory of dataset
|
||||
:param model_save_path: path to pretrained SVM model
|
||||
:param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||
"""
|
||||
|
||||
# TODO 1: =================== load the pretrained SVM model ==================================
|
||||
|
||||
# TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
|
||||
test_data = ???
|
||||
test_loader = ???
|
||||
|
||||
# TODO: load state dictionary of pretrained SVM model
|
||||
model_svm = ???
|
||||
|
||||
# TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
|
||||
svm = ???
|
||||
|
||||
# TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
|
||||
???
|
||||
|
||||
# TODO: put the model on CPU or GPU
|
||||
???
|
||||
|
||||
# TODO 2 : ================================ testing ==============================================
|
||||
|
||||
# TODO: set the model in evaluation mode
|
||||
???
|
||||
|
||||
# to calculate and save the testing accuracy
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||
# TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO:sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# show prediction accuracy
|
||||
acc = 100 * n_correct / n_feas
|
||||
print('Test accuracy = {:.1f}%'.format(acc))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# set configurations of the testing process
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
|
||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.device is None:
|
||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# run the testing procedure
|
||||
test(
|
||||
data_root=args.data_root,
|
||||
model_save_path=args.model_save_path,
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,289 +1,289 @@
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# train_svm.py - Train svm model for traffic sign
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
# ==== Part 1: import libs
|
||||
import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import numpy as np
|
||||
import random
|
||||
from datasets import Traffic_Dataset
|
||||
from svm_hw import SVM_HINGE
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
# ==== Part 2: training and validation
|
||||
def train(
|
||||
data_root,
|
||||
feature_channel,
|
||||
batch_size,
|
||||
n_epoch,
|
||||
lr,
|
||||
C,
|
||||
model_save_path,
|
||||
device,
|
||||
):
|
||||
"""
|
||||
The main training procedure of SVM model
|
||||
----------------------------
|
||||
:param data_root: path to the root directory of dataset
|
||||
:param feature_channel: number of feature channels for SVM input
|
||||
:param batch_size: batch size of training
|
||||
:param n_epoch: number of training epochs
|
||||
:param lr: learning rate
|
||||
:param C: regularization coefficient in hinge loss
|
||||
:param model_save_path: path to save SVM model
|
||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||
"""
|
||||
|
||||
# TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
|
||||
train_data = ???
|
||||
train_loader = ???
|
||||
val_data = ???
|
||||
val_loader = ???
|
||||
|
||||
# scale the regularization coefficient
|
||||
C = C * len(train_loader)
|
||||
|
||||
# TODO: initialize the SVM model
|
||||
svm = ???
|
||||
|
||||
# TODO: put the model on CPU or GPU
|
||||
???
|
||||
|
||||
# TODO: define the Adam optimizer
|
||||
optimizer = ???
|
||||
|
||||
# to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
|
||||
train_loss = []
|
||||
train_acc = []
|
||||
val_acc = []
|
||||
epochs = []
|
||||
|
||||
for epoch in range(n_epoch):
|
||||
# TODO: save the index of current epoch in the array 'epochs'
|
||||
???
|
||||
|
||||
# TODO 2: ========================= training =======================
|
||||
# TODO: set the model in training mode
|
||||
???
|
||||
|
||||
# to calculate and save the training loss and training accuracy
|
||||
total_loss = 0. # to save total training loss in one epoch
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
# TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
|
||||
# you can refer to previous homework, for example hw2
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: clear gradients in the optimizer
|
||||
???
|
||||
|
||||
# TODO: run the model with hinge loss; the model needs two inputs: feas and labels
|
||||
???
|
||||
|
||||
# TODO: back-propagation on the computation graph
|
||||
???
|
||||
|
||||
# TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
||||
total_loss += ???
|
||||
|
||||
# TODO: call a function to update the parameters of the models
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO: sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# average of the total loss for iterations
|
||||
acc = 100 * n_correct / n_feas
|
||||
avg_loss = total_loss / len(train_loader)
|
||||
train_acc.append(acc.cpu().numpy())
|
||||
train_loss.append(avg_loss)
|
||||
print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
|
||||
|
||||
# TODO 3: ========================== Validation ======================================
|
||||
|
||||
# TODO: set the model in evaluation mode
|
||||
???
|
||||
|
||||
# to calculate and save the validation accuracy
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||
# TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO: sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# show prediction accuracy
|
||||
acc = 100 * n_correct / n_feas
|
||||
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
|
||||
val_acc.append(acc.cpu().numpy())
|
||||
|
||||
# save model parameters in a file
|
||||
torch.save({'state_dict': svm.state_dict(),
|
||||
'configs': {
|
||||
'feature_channel': feature_channel,
|
||||
'C': C}
|
||||
}, model_save_path)
|
||||
print('Model saved in {}\n'.format(model_save_path))
|
||||
|
||||
W = svm.W.data.cpu()
|
||||
b = svm.b.data.cpu()
|
||||
|
||||
# TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
|
||||
# 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
|
||||
sv = ???
|
||||
|
||||
plot(train_loss, train_acc, val_acc, epochs)
|
||||
plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
|
||||
val_labels=val_data.labels, sv=sv, W=W, b=b)
|
||||
|
||||
|
||||
def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
|
||||
"""
|
||||
Draw the samples,SVM decision boundary, and support vectors
|
||||
---------------------
|
||||
:param train_features: training samples with the shape of [B, 2]
|
||||
:param val_features: validation samples with the shape of [B, 2]
|
||||
:param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
|
||||
:param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
|
||||
:param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
|
||||
:param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
|
||||
:param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
|
||||
"""
|
||||
train_labels = (train_labels > 0.0).int()
|
||||
val_labels = (val_labels > 0.0).int()
|
||||
train_labels[sv] = 2
|
||||
foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||
foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
|
||||
background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||
background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
|
||||
f, ax = plt.subplots()
|
||||
plt.title("training dataset")
|
||||
ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
|
||||
ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
|
||||
label="-1 (support vector)")
|
||||
ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
|
||||
ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
|
||||
label="+1 (support vector)")
|
||||
x = np.linspace(-20, 20, 100)
|
||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||
ax.legend(loc="best")
|
||||
plt.ylim([-30, 30])
|
||||
plt.show()
|
||||
f, ax = plt.subplots()
|
||||
plt.title("validation dataset")
|
||||
foreground_val = [i for i in range(val_labels.shape[0] // 2)]
|
||||
background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
|
||||
ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
|
||||
ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
|
||||
x = np.linspace(-20, 20, 100)
|
||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||
ax.legend(loc="best")
|
||||
plt.ylim([-30, 30])
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot(train_loss, train_acc, val_acc, epochs):
|
||||
"""
|
||||
Draw loss and accuracy curve
|
||||
------------------
|
||||
:param train_loss: a list with loss of each training epoch
|
||||
:param train_acc: a list with accuracy on training dataset of each training epoch
|
||||
:param val_acc: a list with accuracy on validation dataset of each training epoch
|
||||
:param epochs: a list with the index of all training epochs
|
||||
"""
|
||||
|
||||
# draw the training loss curve
|
||||
f, ax = plt.subplots()
|
||||
plt.title("Training Loss")
|
||||
ax.plot(epochs, train_loss, color="tab:blue")
|
||||
ax.set_xlabel("Training epoch")
|
||||
ax.set_ylabel("Loss")
|
||||
ax.legend(["training loss"], loc="best")
|
||||
plt.show()
|
||||
|
||||
# draw the accuracy curve
|
||||
f, ax = plt.subplots()
|
||||
plt.title("Training and Validation Accuracy")
|
||||
ax.plot(epochs, train_acc, color="tab:orange")
|
||||
ax.plot(epochs, val_acc, color="tab:green")
|
||||
ax.legend(["training accuracy","validation accuracy"], loc="best")
|
||||
ax.set_xlabel("Training epoch")
|
||||
ax.set_ylabel("Accuracy")
|
||||
ax.set_ylim(0, 101)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# set random seed for reproducibility
|
||||
seed = 2024
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
|
||||
# set configurations of the model and training process
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
|
||||
parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
|
||||
parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
|
||||
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
|
||||
parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
|
||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||
parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
|
||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.device is None:
|
||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# run the training procedure
|
||||
train(
|
||||
data_root=args.data_root,
|
||||
feature_channel=args.feature_channel,
|
||||
batch_size=args.batch_size,
|
||||
n_epoch=args.n_epoch,
|
||||
lr=args.lr,
|
||||
C=args.C,
|
||||
model_save_path=args.model_save_path,
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ========================================================
|
||||
# Media and Cognition
|
||||
# Homework 3 Support Vector Machine
|
||||
# train_svm.py - Train svm model for traffic sign
|
||||
# Student ID:
|
||||
# Name:
|
||||
# Tsinghua University
|
||||
# (C) Copyright 2024
|
||||
# ========================================================
|
||||
|
||||
# ==== Part 1: import libs
|
||||
import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import numpy as np
|
||||
import random
|
||||
from datasets import Traffic_Dataset
|
||||
from svm_hw import SVM_HINGE
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
# ==== Part 2: training and validation
|
||||
def train(
|
||||
data_root,
|
||||
feature_channel,
|
||||
batch_size,
|
||||
n_epoch,
|
||||
lr,
|
||||
C,
|
||||
model_save_path,
|
||||
device,
|
||||
):
|
||||
"""
|
||||
The main training procedure of SVM model
|
||||
----------------------------
|
||||
:param data_root: path to the root directory of dataset
|
||||
:param feature_channel: number of feature channels for SVM input
|
||||
:param batch_size: batch size of training
|
||||
:param n_epoch: number of training epochs
|
||||
:param lr: learning rate
|
||||
:param C: regularization coefficient in hinge loss
|
||||
:param model_save_path: path to save SVM model
|
||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||
"""
|
||||
|
||||
# TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
|
||||
train_data = ???
|
||||
train_loader = ???
|
||||
val_data = ???
|
||||
val_loader = ???
|
||||
|
||||
# scale the regularization coefficient
|
||||
C = C * len(train_loader)
|
||||
|
||||
# TODO: initialize the SVM model
|
||||
svm = ???
|
||||
|
||||
# TODO: put the model on CPU or GPU
|
||||
???
|
||||
|
||||
# TODO: define the Adam optimizer
|
||||
optimizer = ???
|
||||
|
||||
# to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
|
||||
train_loss = []
|
||||
train_acc = []
|
||||
val_acc = []
|
||||
epochs = []
|
||||
|
||||
for epoch in range(n_epoch):
|
||||
# TODO: save the index of current epoch in the array 'epochs'
|
||||
???
|
||||
|
||||
# TODO 2: ========================= training =======================
|
||||
# TODO: set the model in training mode
|
||||
???
|
||||
|
||||
# to calculate and save the training loss and training accuracy
|
||||
total_loss = 0. # to save total training loss in one epoch
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
# TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
|
||||
# you can refer to previous homework, for example hw2
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: clear gradients in the optimizer
|
||||
???
|
||||
|
||||
# TODO: run the model with hinge loss; the model needs two inputs: feas and labels
|
||||
???
|
||||
|
||||
# TODO: back-propagation on the computation graph
|
||||
???
|
||||
|
||||
# TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
||||
total_loss += ???
|
||||
|
||||
# TODO: call a function to update the parameters of the models
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO: sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# average of the total loss for iterations
|
||||
acc = 100 * n_correct / n_feas
|
||||
avg_loss = total_loss / len(train_loader)
|
||||
train_acc.append(acc.cpu().numpy())
|
||||
train_loss.append(avg_loss)
|
||||
print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
|
||||
|
||||
# TODO 3: ========================== Validation ======================================
|
||||
|
||||
# TODO: set the model in evaluation mode
|
||||
???
|
||||
|
||||
# to calculate and save the validation accuracy
|
||||
n_correct = 0. # number of images that are correctly classified
|
||||
n_feas = 0. # number of total images
|
||||
|
||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||
# TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
|
||||
for ??? in ???:
|
||||
# TODO: set data type (.float()) and device (.to())
|
||||
???
|
||||
|
||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||
???
|
||||
|
||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||
n_correct += ???
|
||||
|
||||
# TODO: sum up the total image number
|
||||
n_feas += ???
|
||||
|
||||
# show prediction accuracy
|
||||
acc = 100 * n_correct / n_feas
|
||||
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
|
||||
val_acc.append(acc.cpu().numpy())
|
||||
|
||||
# save model parameters in a file
|
||||
torch.save({'state_dict': svm.state_dict(),
|
||||
'configs': {
|
||||
'feature_channel': feature_channel,
|
||||
'C': C}
|
||||
}, model_save_path)
|
||||
print('Model saved in {}\n'.format(model_save_path))
|
||||
|
||||
W = svm.W.data.cpu()
|
||||
b = svm.b.data.cpu()
|
||||
|
||||
# TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
|
||||
# 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
|
||||
sv = ???
|
||||
|
||||
plot(train_loss, train_acc, val_acc, epochs)
|
||||
plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
|
||||
val_labels=val_data.labels, sv=sv, W=W, b=b)
|
||||
|
||||
|
||||
def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
|
||||
"""
|
||||
Draw the samples,SVM decision boundary, and support vectors
|
||||
---------------------
|
||||
:param train_features: training samples with the shape of [B, 2]
|
||||
:param val_features: validation samples with the shape of [B, 2]
|
||||
:param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
|
||||
:param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
|
||||
:param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
|
||||
:param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
|
||||
:param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
|
||||
"""
|
||||
train_labels = (train_labels > 0.0).int()
|
||||
val_labels = (val_labels > 0.0).int()
|
||||
train_labels[sv] = 2
|
||||
foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||
foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
|
||||
background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||
background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
|
||||
f, ax = plt.subplots()
|
||||
plt.title("training dataset")
|
||||
ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
|
||||
ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
|
||||
label="-1 (support vector)")
|
||||
ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
|
||||
ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
|
||||
label="+1 (support vector)")
|
||||
x = np.linspace(-20, 20, 100)
|
||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||
ax.legend(loc="best")
|
||||
plt.ylim([-30, 30])
|
||||
plt.show()
|
||||
f, ax = plt.subplots()
|
||||
plt.title("validation dataset")
|
||||
foreground_val = [i for i in range(val_labels.shape[0] // 2)]
|
||||
background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
|
||||
ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
|
||||
ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
|
||||
x = np.linspace(-20, 20, 100)
|
||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||
ax.legend(loc="best")
|
||||
plt.ylim([-30, 30])
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot(train_loss, train_acc, val_acc, epochs):
|
||||
"""
|
||||
Draw loss and accuracy curve
|
||||
------------------
|
||||
:param train_loss: a list with loss of each training epoch
|
||||
:param train_acc: a list with accuracy on training dataset of each training epoch
|
||||
:param val_acc: a list with accuracy on validation dataset of each training epoch
|
||||
:param epochs: a list with the index of all training epochs
|
||||
"""
|
||||
|
||||
# draw the training loss curve
|
||||
f, ax = plt.subplots()
|
||||
plt.title("Training Loss")
|
||||
ax.plot(epochs, train_loss, color="tab:blue")
|
||||
ax.set_xlabel("Training epoch")
|
||||
ax.set_ylabel("Loss")
|
||||
ax.legend(["training loss"], loc="best")
|
||||
plt.show()
|
||||
|
||||
# draw the accuracy curve
|
||||
f, ax = plt.subplots()
|
||||
plt.title("Training and Validation Accuracy")
|
||||
ax.plot(epochs, train_acc, color="tab:orange")
|
||||
ax.plot(epochs, val_acc, color="tab:green")
|
||||
ax.legend(["training accuracy","validation accuracy"], loc="best")
|
||||
ax.set_xlabel("Training epoch")
|
||||
ax.set_ylabel("Accuracy")
|
||||
ax.set_ylim(0, 101)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# set random seed for reproducibility
|
||||
seed = 2024
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
|
||||
# set configurations of the model and training process
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
|
||||
parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
|
||||
parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
|
||||
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
|
||||
parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
|
||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||
parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
|
||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.device is None:
|
||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# run the training procedure
|
||||
train(
|
||||
data_root=args.data_root,
|
||||
feature_channel=args.feature_channel,
|
||||
batch_size=args.batch_size,
|
||||
n_epoch=args.n_epoch,
|
||||
lr=args.lr,
|
||||
C=args.C,
|
||||
model_save_path=args.model_save_path,
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -20,21 +20,21 @@
|
||||
\begin{document}
|
||||
\courseheader
|
||||
% 请在YOUR NAME处填写自己的姓名
|
||||
\name{YOUR NAME}
|
||||
\name{高艺轩}
|
||||
\vspace{3mm}
|
||||
\centerline{\textbf{\Large{理论部分}}}
|
||||
|
||||
\section{单选题(15分)}
|
||||
% 请在?处填写答案
|
||||
\subsection{\underline{?}}
|
||||
\subsection{\underline{D}}
|
||||
|
||||
\subsection{\underline{?}}
|
||||
\subsection{\underline{C}}
|
||||
|
||||
\subsection{\underline{?}}
|
||||
\subsection{\underline{D}}
|
||||
|
||||
\subsection{\underline{?}}
|
||||
\subsection{\underline{D}}
|
||||
|
||||
\subsection{\underline{?}}
|
||||
\subsection{\underline{B}}
|
||||
|
||||
\section{计算题(15 分)}
|
||||
|
||||
@@ -47,17 +47,117 @@
|
||||
试利用LDA,将样本特征维数压缩为一维。
|
||||
}
|
||||
|
||||
\begin{proof}[解]
|
||||
首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
|
||||
\[S_1 = \frac{1}{4}
|
||||
\left(
|
||||
\begin{bmatrix}
|
||||
0 & 0\\
|
||||
0 & 1
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 1\\
|
||||
1 & 1
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
0 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
\right)
|
||||
=
|
||||
\begin{bmatrix}
|
||||
0.5 & 0.25\\
|
||||
0.25 & 0.5
|
||||
\end{bmatrix}\]
|
||||
\[S_2 = \frac{1}{4}
|
||||
\left(
|
||||
\begin{bmatrix}
|
||||
0 & 0\\
|
||||
0 & 1
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 1\\
|
||||
1 & 1
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
\right)
|
||||
=
|
||||
\begin{bmatrix}
|
||||
0.75 & 0.25\\
|
||||
0.25 & 0.5
|
||||
\end{bmatrix}\]
|
||||
进一步地,
|
||||
\[S_w = \frac{1}{2} (S_1 + S_2) =
|
||||
\begin{bmatrix}
|
||||
0.625 & 0.25\\
|
||||
0.25 & 0.5
|
||||
\end{bmatrix}\]
|
||||
\[S_b = \frac{1}{2} \left(
|
||||
\begin{bmatrix}
|
||||
2.25 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
2.25 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}
|
||||
\right)
|
||||
=
|
||||
\begin{bmatrix}
|
||||
2.25 & 0\\
|
||||
0 & 0
|
||||
\end{bmatrix}\]
|
||||
广义特征值分解得到$\lambda = 4.5$,$v = (0.8944, -0.4472)$。投影后的样本为
|
||||
\[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
|
||||
\[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
|
||||
\end{proof}
|
||||
|
||||
|
||||
|
||||
\vspace{3mm}
|
||||
\subsection{模型训练通常需要大量的数据,假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效,其中无效数据被成功判别为无效数据的概率为90\%,而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
|
||||
|
||||
\begin{proof}[解]
|
||||
\begin{align*}
|
||||
& P(\text{无效数据} \mid \text{判定无效})\\
|
||||
= & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
|
||||
= & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
|
||||
= & \frac{0.18}{0.18 + 0.04}\\
|
||||
= & \frac{9}{11}
|
||||
\end{align*}
|
||||
\end{proof}
|
||||
|
||||
\vspace{3mm}
|
||||
\subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
|
||||
4 & 2 \\
|
||||
2 & \frac{4}{3}
|
||||
\end{array} \right]$,$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。}
|
||||
|
||||
\begin{proof}[解]
|
||||
\[g_1(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_1)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_1) + \ln p(\omega_1)\]
|
||||
\[g_2(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_2)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_2) + \ln p(\omega_2)\]
|
||||
决策方程
|
||||
\[\]
|
||||
\end{proof}
|
||||
|
||||
\vspace{3mm}
|
||||
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足
|
||||
\begin{equation*}
|
||||
|
||||
Reference in New Issue
Block a user