Mac Sync

2024-05-15 20:05:18 +08:00
parent 4bc3f77879
commit 8b657be441
20 changed files with 2174 additions and 2074 deletions
--- a/hw3/code/check.py
+++ b/hw3/code/check.py
@@ -1,41 +1,41 @@
-# ========================================================
-#             Media and Cognition
-#             Homework 3 Support Vector Machine
-#             check.py - Check your implementation of several modules
-#             Tsinghua University
-#             (C) Copyright 2024
-# ========================================================
-
-from svm_hw import SVM_HINGE, LinearFunction, Hinge
-import torch
-from torch.autograd import gradcheck
-
-
-def run():
-    model = SVM_HINGE(2, C=1.0).double()
-    x = torch.randn(50, 2, requires_grad=False).double()
-    W = torch.randn(1, 2, requires_grad=True).double()
-    b = torch.zeros(1, requires_grad=True).double()
-    test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
-    if test:
-        print('Linear successully tested!')
-    output = torch.randn(50, 1, requires_grad=True).double()
-    W = torch.randn(1, 2, requires_grad=True).double()
-    labels = torch.ones(1, requires_grad=False).double()
-    C = torch.tensor([[1.0]], requires_grad=False).double()
-    test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
-    if test:
-        print('Hinge successfully tested！')
-    x = torch.randn(50, 2, requires_grad=False).double()
-    labels = torch.ones(50, requires_grad=False).double()
-    try:
-        output, loss = model(x, labels)
-        assert model.W.requires_grad is True
-        assert model.b.requires_grad is True
-        print('SVM_HINGE successfully tested！')
-    except:
-        raise Exception('Failed testing SVM_HINGE!')
-
-
-if __name__ == '__main__':
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             check.py - Check your implementation of several modules
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+from svm_hw import SVM_HINGE, LinearFunction, Hinge
+import torch
+from torch.autograd import gradcheck
+
+
+def run():
+    model = SVM_HINGE(2, C=1.0).double()
+    x = torch.randn(50, 2, requires_grad=False).double()
+    W = torch.randn(1, 2, requires_grad=True).double()
+    b = torch.zeros(1, requires_grad=True).double()
+    test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
+    if test:
+        print('Linear successully tested!')
+    output = torch.randn(50, 1, requires_grad=True).double()
+    W = torch.randn(1, 2, requires_grad=True).double()
+    labels = torch.ones(1, requires_grad=False).double()
+    C = torch.tensor([[1.0]], requires_grad=False).double()
+    test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
+    if test:
+        print('Hinge successfully tested！')
+    x = torch.randn(50, 2, requires_grad=False).double()
+    labels = torch.ones(50, requires_grad=False).double()
+    try:
+        output, loss = model(x, labels)
+        assert model.W.requires_grad is True
+        assert model.b.requires_grad is True
+        print('SVM_HINGE successfully tested！')
+    except:
+        raise Exception('Failed testing SVM_HINGE!')
+
+
+if __name__ == '__main__':
    run()
--- a/hw3/code/data_preprocess.py
+++ b/hw3/code/data_preprocess.py
@@ -1,178 +1,178 @@
-# ========================================================
-#             Media and Cognition
-#             Homework 3 Support Vector Machine
-#             data_preprocess.py - Using pretrained convolutional layers to extract feature,
-#                                   and using PCA for dimensionality reduction
-#             Student ID:
-#             Name:
-#             Tsinghua University
-#             (C) Copyright 2024
-# ========================================================
-
-import os
-import torchvision.transforms as transforms
-import torch
-from PIL import Image
-from networks import Classifier
-import matplotlib.pyplot as plt
-import argparse
-
-
-def preprocess(pre_conv, data_root, image_size, classes):
-    # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
-
-    # ===============  process training dataset ======================
-    print("Start preprocessing the training dataset !!!")
-    train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
-
-    # calculate the mean and PCA projection matrix
-    data_mean, u = PCA(train_data, 2)
-
-    # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
-    train_data_pca = ???
-
-    visualize(train_data_pca, train_label, "train")
-    savedata(train_data_pca, train_label, data_root+"/train.pt")
-    print("training dataset saved !!!")
-
-    # ===============  process validation dataset ======================
-    print("Start preprocessing the validation dataset!!!")
-    val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
-
-    # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
-    val_data_pca = ???
-
-    visualize(val_data_pca, val_label, "val")
-    savedata(val_data_pca, val_label, data_root+"/val.pt")
-    print("validation dataset saved !!!")
-
-    # ===============  process testing dataset ======================
-    print("Start preprocessing the testing dataset!!!")
-    test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
-
-    # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
-    test_data_pca = ???
-
-    visualize(test_data_pca, test_label, "test")
-    savedata(test_data_pca, test_label, data_root+"/test.pt")
-    print("testing dataset saved !!!")
-
-
-def savedata(data, label, save_path):
-    save_dict = {
-        'data': data,
-        'label': label
-    }
-    torch.save(save_dict, save_path)
-
-
-def visualize(datas, labels, mode):
-    """
-    Display feature points after dimensionality reduction
-    -------------------------------
-    :param datas: the samples after dimensionality reduction, with the shape of [N, 2]
-    :param labels: the labels (chosen from {-1, +1}) corresponding to the samples
-    :param mode: chosen from {'train', 'val', 'test'}
-    :return:
-    """
-    plt.figure()
-    for idx in range(datas.shape[1]):
-        plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
-    plt.legend()
-    plt.title(mode)
-    plt.show()
-
-
-def PCA(data, dim=2):
-    """
-    calculate the mean value of the data and the projection matrix for PCA
-    :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
-    :param dim: the data dimension after projection
-    :return:
-        data_mean: the mean value of the data
-        u: the projection matrix for PCA, with the shape of [2048, dim]
-    """
-    # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
-
-    # TODO: compute the mean of train_data
-    data_mean = ???
-    # TODO: compute the covariance matrix of train_data
-    data_cov = ???
-    # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
-    # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
-    ???
-    # TODO: return the proper 'data_mean' and 'u[]'
-    return ???
-
-
-def loaddata(pre_conv, data_root, mode, image_size, classes):
-    """
-    load one dataset, and use pretrained network in homework 2 to extract feature
-    :param pre_conv: pretrained network in homework 2
-    :param data_root: the path of the dataset
-    :param mode: chosen from {'train', 'val', 'test'}
-    :param image_size: the preset size that each image try to zoom to
-    :param classes: two classes that need to be classified
-    :return:
-        datas: the samples of extracted features with the shape of [N, 2048]
-        labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
-    """
-    assert len(classes) == 2
-    datas = []
-    labels = []
-    for idx in range(len(classes)):
-        for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
-            data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
-            label = 2 * idx - 1
-            datas.append(data)
-            labels.append(label)
-    return torch.stack(datas), torch.tensor(labels)
-
-
-def readimg(pre_conv, filepath, image_size):
-    """
-    Read one image and use pretrained network to extract the feature
-    --------------------------
-    :param pre_conv: pretrained network in homework 2
-    :param filepath: the file path of one image
-    :param image_size: the preset size that each image try to zoom to
-    :return:
-        data: the extracted feature with the length of 2048
-    """
-    img_pil = Image.open(filepath).convert('RGB')
-    img_pil = img_pil.resize(image_size)
-    img_transform = transforms.Compose([transforms.ToTensor(),
-                                        transforms.Normalize(0.5, 0.5),
-                                        ])
-    img_tensor = img_transform(img_pil)
-    data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
-
-    return data
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
-                        help="the filepath of the pretrained network in homework 2")
-    parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
-    parser.add_argument("--image_size", type=tuple, default=(32, 32),
-                        help="the preset size that each image try to zoom to")
-    parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
-
-    args = parser.parse_args()
-
-    pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
-    configs = pretrained_checkpoint["configs"]
-    cls = Classifier(
-        configs["in_channels"],
-        configs["num_classes"],
-        configs["use_batch_norm"],
-        configs["use_stn"],
-        configs["dropout_prob"],
-    )
-    cls.load_state_dict(pretrained_checkpoint["model_state"])
-    for param in cls.parameters():
-        param.requires_grad = False
-    conv = cls.conv_net
-
-    preprocess(conv, args.data_root, args.image_size, args.classes)
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             data_preprocess.py - Using pretrained convolutional layers to extract feature,
+#                                   and using PCA for dimensionality reduction
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+import os
+import torchvision.transforms as transforms
+import torch
+from PIL import Image
+from networks import Classifier
+import matplotlib.pyplot as plt
+import argparse
+
+
+def preprocess(pre_conv, data_root, image_size, classes):
+    # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
+
+    # ===============  process training dataset ======================
+    print("Start preprocessing the training dataset !!!")
+    train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
+
+    # calculate the mean and PCA projection matrix
+    data_mean, u = PCA(train_data, 2)
+
+    # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
+    train_data_pca = ???
+
+    visualize(train_data_pca, train_label, "train")
+    savedata(train_data_pca, train_label, data_root+"/train.pt")
+    print("training dataset saved !!!")
+
+    # ===============  process validation dataset ======================
+    print("Start preprocessing the validation dataset!!!")
+    val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
+
+    # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
+    val_data_pca = ???
+
+    visualize(val_data_pca, val_label, "val")
+    savedata(val_data_pca, val_label, data_root+"/val.pt")
+    print("validation dataset saved !!!")
+
+    # ===============  process testing dataset ======================
+    print("Start preprocessing the testing dataset!!!")
+    test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
+
+    # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
+    test_data_pca = ???
+
+    visualize(test_data_pca, test_label, "test")
+    savedata(test_data_pca, test_label, data_root+"/test.pt")
+    print("testing dataset saved !!!")
+
+
+def savedata(data, label, save_path):
+    save_dict = {
+        'data': data,
+        'label': label
+    }
+    torch.save(save_dict, save_path)
+
+
+def visualize(datas, labels, mode):
+    """
+    Display feature points after dimensionality reduction
+    -------------------------------
+    :param datas: the samples after dimensionality reduction, with the shape of [N, 2]
+    :param labels: the labels (chosen from {-1, +1}) corresponding to the samples
+    :param mode: chosen from {'train', 'val', 'test'}
+    :return:
+    """
+    plt.figure()
+    for idx in range(datas.shape[1]):
+        plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
+    plt.legend()
+    plt.title(mode)
+    plt.show()
+
+
+def PCA(data, dim=2):
+    """
+    calculate the mean value of the data and the projection matrix for PCA
+    :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
+    :param dim: the data dimension after projection
+    :return:
+        data_mean: the mean value of the data
+        u: the projection matrix for PCA, with the shape of [2048, dim]
+    """
+    # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
+
+    # TODO: compute the mean of train_data
+    data_mean = ???
+    # TODO: compute the covariance matrix of train_data
+    data_cov = ???
+    # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
+    # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
+    ???
+    # TODO: return the proper 'data_mean' and 'u[]'
+    return ???
+
+
+def loaddata(pre_conv, data_root, mode, image_size, classes):
+    """
+    load one dataset, and use pretrained network in homework 2 to extract feature
+    :param pre_conv: pretrained network in homework 2
+    :param data_root: the path of the dataset
+    :param mode: chosen from {'train', 'val', 'test'}
+    :param image_size: the preset size that each image try to zoom to
+    :param classes: two classes that need to be classified
+    :return:
+        datas: the samples of extracted features with the shape of [N, 2048]
+        labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
+    """
+    assert len(classes) == 2
+    datas = []
+    labels = []
+    for idx in range(len(classes)):
+        for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
+            data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
+            label = 2 * idx - 1
+            datas.append(data)
+            labels.append(label)
+    return torch.stack(datas), torch.tensor(labels)
+
+
+def readimg(pre_conv, filepath, image_size):
+    """
+    Read one image and use pretrained network to extract the feature
+    --------------------------
+    :param pre_conv: pretrained network in homework 2
+    :param filepath: the file path of one image
+    :param image_size: the preset size that each image try to zoom to
+    :return:
+        data: the extracted feature with the length of 2048
+    """
+    img_pil = Image.open(filepath).convert('RGB')
+    img_pil = img_pil.resize(image_size)
+    img_transform = transforms.Compose([transforms.ToTensor(),
+                                        transforms.Normalize(0.5, 0.5),
+                                        ])
+    img_tensor = img_transform(img_pil)
+    data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
+
+    return data
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
+                        help="the filepath of the pretrained network in homework 2")
+    parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
+    parser.add_argument("--image_size", type=tuple, default=(32, 32),
+                        help="the preset size that each image try to zoom to")
+    parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
+
+    args = parser.parse_args()
+
+    pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
+    configs = pretrained_checkpoint["configs"]
+    cls = Classifier(
+        configs["in_channels"],
+        configs["num_classes"],
+        configs["use_batch_norm"],
+        configs["use_stn"],
+        configs["dropout_prob"],
+    )
+    cls.load_state_dict(pretrained_checkpoint["model_state"])
+    for param in cls.parameters():
+        param.requires_grad = False
+    conv = cls.conv_net
+
+    preprocess(conv, args.data_root, args.image_size, args.classes)
--- a/hw3/code/svm_hw.py
+++ b/hw3/code/svm_hw.py
@@ -1,139 +1,139 @@
-# ========================================================
-#             Media and Cognition
-#             Homework 3 Support Vector Machine
-#             svm_hw.py - The implementation of SVM using hinge loss
-#             Student ID:
-#             Name:
-#             Tsinghua University
-#             (C) Copyright 2024
-# ========================================================
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-# TODO 1: complete the forward and backward propagation processes of the linear layer
-class LinearFunction(torch.autograd.Function):
-    '''
-    we will implement the linear function:
-    y = xW^T + b
-    as well as its gradient computation process
-    '''
-
-    @staticmethod
-    def forward(ctx, x, W, b):
-        '''
-        Input:
-        :param ctx: a context object that can be used to stash information for backward computation
-        :param x: input features with size [batch_size, input_size]
-        :param W: weight matrix with size [output_size, input_size]
-        :param b: bias with size [output_size]
-        Return:
-        y :output features with size [batch_size, output_size]
-        '''
-
-        # TODO
-        y = ???
-        ctx.save_for_backward(x, W)
-
-        return y
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        '''
-        Input:
-        :param ctx: a context object with saved variables
-        :param grad_output: dL/dy, with size [batch_size, output_size]
-        Return:
-        grad_input: dL/dx, with size [batch_size, input_size]
-        grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
-        grad_b: dL/db, with size [output_size], summed for data in the batch
-        '''
-
-        x, W = ctx.saved_variables
-
-        # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
-        # calculate dL/dW by using dL/dy (grad_output) and x
-        # calculate dL/db using dL/dy (grad_output)
-        # you can use torch.matmul(A, B) to compute matrix product of A and B
-
-        # TODO
-        grad_input = ???
-        grad_W = ???
-        grad_b = ???
-
-        return grad_input, grad_W, grad_b
-
-
-# TODO 2: complete the forward and backward propagation processes of the hinge loss
-class Hinge(torch.autograd.Function):
-
-    @staticmethod
-    def forward(ctx, output, W, label, C):
-        """
-        Compute the hinge loss
-        --------------------------------------
-        :param ctx: a context object that can be used to stash information for backward computation
-        :param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
-        :param W: weight matrix with size [1, input_size]
-        :param label: the ground truth y in the equation for loss calculation, with size [batch_size]
-        :param C: the regularization coefficient of hinge loss with size [1, 1]
-        :return: the hinge loss with size [1, 1]
-        """
-        C = C.type_as(W)
-
-        # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
-        # you may need F.relu() to implement the max() function.
-        loss = ???
-        ctx.save_for_backward(output, W, label, C)
-
-        return loss
-
-    @staticmethod
-    def backward(ctx, grad_loss):
-        """
-        Compute the gradient of hinge loss
-        :param ctx: a context object with saved variables
-        :param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
-        :return:
-            grad_output: dL/doutput, with size [batch_size, 1]
-            grad_W: dL/dW, with size [1, channels]
-        """
-        output, W, label, C = ctx.saved_tensors
-        # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
-        grad_output = ???
-        grad_W = ???
-        return grad_output, grad_W, None, None
-
-
-# TODO 3: complete the structure of SVM model
-class SVM_HINGE(nn.Module):
-
-    def __init__(self, in_channels, C):
-        """
-        :param in_channels: number of feature channels for SVM input
-        :param C: regularization coefficient of hinge loss with size [1, 1]
-        """
-        super().__init__()
-
-        # TODO: define the parameters W and b
-        """
-            the shape of W should be [1, channels] and the shape of b should be [1, ]
-            you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
-            please use torch.randn() to initialize W and b
-        """
-
-        self.W = ???
-        self.b = ???
-        self.C = torch.tensor([[C]], requires_grad=False)
-
-    def forward(self, x, label=None):
-        # SVM calculation
-        output = LinearFunction.apply(x, self.W, self.b)
-        if label is not None:
-            loss = Hinge.apply(output, self.W, label, self.C)
-        else:
-            loss = None
-        output = (output > 0.0).type_as(x) * 2.0 - 1.0
-        return output, loss
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             svm_hw.py - The implementation of SVM using hinge loss
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# TODO 1: complete the forward and backward propagation processes of the linear layer
+class LinearFunction(torch.autograd.Function):
+    '''
+    we will implement the linear function:
+    y = xW^T + b
+    as well as its gradient computation process
+    '''
+
+    @staticmethod
+    def forward(ctx, x, W, b):
+        '''
+        Input:
+        :param ctx: a context object that can be used to stash information for backward computation
+        :param x: input features with size [batch_size, input_size]
+        :param W: weight matrix with size [output_size, input_size]
+        :param b: bias with size [output_size]
+        Return:
+        y :output features with size [batch_size, output_size]
+        '''
+
+        # TODO
+        y = ???
+        ctx.save_for_backward(x, W)
+
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        '''
+        Input:
+        :param ctx: a context object with saved variables
+        :param grad_output: dL/dy, with size [batch_size, output_size]
+        Return:
+        grad_input: dL/dx, with size [batch_size, input_size]
+        grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
+        grad_b: dL/db, with size [output_size], summed for data in the batch
+        '''
+
+        x, W = ctx.saved_variables
+
+        # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
+        # calculate dL/dW by using dL/dy (grad_output) and x
+        # calculate dL/db using dL/dy (grad_output)
+        # you can use torch.matmul(A, B) to compute matrix product of A and B
+
+        # TODO
+        grad_input = ???
+        grad_W = ???
+        grad_b = ???
+
+        return grad_input, grad_W, grad_b
+
+
+# TODO 2: complete the forward and backward propagation processes of the hinge loss
+class Hinge(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, output, W, label, C):
+        """
+        Compute the hinge loss
+        --------------------------------------
+        :param ctx: a context object that can be used to stash information for backward computation
+        :param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
+        :param W: weight matrix with size [1, input_size]
+        :param label: the ground truth y in the equation for loss calculation, with size [batch_size]
+        :param C: the regularization coefficient of hinge loss with size [1, 1]
+        :return: the hinge loss with size [1, 1]
+        """
+        C = C.type_as(W)
+
+        # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
+        # you may need F.relu() to implement the max() function.
+        loss = ???
+        ctx.save_for_backward(output, W, label, C)
+
+        return loss
+
+    @staticmethod
+    def backward(ctx, grad_loss):
+        """
+        Compute the gradient of hinge loss
+        :param ctx: a context object with saved variables
+        :param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
+        :return:
+            grad_output: dL/doutput, with size [batch_size, 1]
+            grad_W: dL/dW, with size [1, channels]
+        """
+        output, W, label, C = ctx.saved_tensors
+        # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
+        grad_output = ???
+        grad_W = ???
+        return grad_output, grad_W, None, None
+
+
+# TODO 3: complete the structure of SVM model
+class SVM_HINGE(nn.Module):
+
+    def __init__(self, in_channels, C):
+        """
+        :param in_channels: number of feature channels for SVM input
+        :param C: regularization coefficient of hinge loss with size [1, 1]
+        """
+        super().__init__()
+
+        # TODO: define the parameters W and b
+        """
+            the shape of W should be [1, channels] and the shape of b should be [1, ]
+            you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
+            please use torch.randn() to initialize W and b
+        """
+
+        self.W = ???
+        self.b = ???
+        self.C = torch.tensor([[C]], requires_grad=False)
+
+    def forward(self, x, label=None):
+        # SVM calculation
+        output = LinearFunction.apply(x, self.W, self.b)
+        if label is not None:
+            loss = Hinge.apply(output, self.W, label, self.C)
+        else:
+            loss = None
+        output = (output > 0.0).type_as(x) * 2.0 - 1.0
+        return output, loss
--- a/hw3/code/test_svm.py
+++ b/hw3/code/test_svm.py
@@ -1,106 +1,106 @@
-# ========================================================
-#             Media and Cognition
-#             Homework 3 Support Vector Machine
-#             test_svm.py - Test svm model for traffic sign
-#             Student ID:
-#             Name:
-#             Tsinghua University
-#             (C) Copyright 2024
-# ========================================================
-
-# ==== Part 1: import libs
-import argparse
-import torch
-from datasets import Traffic_Dataset
-from svm_hw import SVM_HINGE
-from torch.utils.data import DataLoader
-
-
-# ==== Part 2: testing
-def test(
-    data_root,
-    model_save_path,
-    device,
-):
-    """
-    The main testing procedure of SVM model
-    ----------------------------
-    :param data_root: path to the root directory of dataset
-    :param model_save_path: path to pretrained SVM model
-    :param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
-    """
-
-    # TODO 1: =================== load the pretrained SVM model ==================================
-
-    # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
-    test_data = ???
-    test_loader = ???
-
-    # TODO: load state dictionary of pretrained SVM model
-    model_svm = ???
-
-    # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
-    svm = ???
-
-    # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
-    ???
-
-    # TODO: put the model on CPU or GPU
-    ???
-
-    # TODO 2 : ================================ testing ==============================================
-
-    # TODO: set the model in evaluation mode
-    ???
-
-    # to calculate and save the testing accuracy
-    n_correct = 0.  # number of images that are correctly classified
-    n_feas = 0.  # number of total images
-
-    with torch.no_grad():  # we do not need to compute gradients during validation
-        # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
-        for ??? in ???:
-            # TODO: set data type (.float()) and device (.to())
-            ???
-
-            # TODO: run the model; at the validation step, the model only needs one input: feas
-            # _ refers to a placeholder, which means we do not need the second returned value during validating
-            ???
-
-            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-            n_correct += ???
-
-            # TODO:sum up the total image number
-            n_feas += ???
-
-    # show prediction accuracy
-    acc = 100 * n_correct / n_feas
-    print('Test accuracy = {:.1f}%'.format(acc))
-
-
-if __name__ == "__main__":
-    # set configurations of the testing process
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
-    parser.add_argument("--device", type=str, help="cpu or cuda")
-    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
-
-    args = parser.parse_args()
-    if args.device is None:
-        args.device = "cuda" if torch.cuda.is_available() else "cpu"
-
-    # run the testing procedure
-    test(
-        data_root=args.data_root,
-        model_save_path=args.model_save_path,
-        device=args.device,
-    )
-
-
-
-
-
-
-
-
-
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             test_svm.py - Test svm model for traffic sign
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+# ==== Part 1: import libs
+import argparse
+import torch
+from datasets import Traffic_Dataset
+from svm_hw import SVM_HINGE
+from torch.utils.data import DataLoader
+
+
+# ==== Part 2: testing
+def test(
+    data_root,
+    model_save_path,
+    device,
+):
+    """
+    The main testing procedure of SVM model
+    ----------------------------
+    :param data_root: path to the root directory of dataset
+    :param model_save_path: path to pretrained SVM model
+    :param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
+    """
+
+    # TODO 1: =================== load the pretrained SVM model ==================================
+
+    # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
+    test_data = ???
+    test_loader = ???
+
+    # TODO: load state dictionary of pretrained SVM model
+    model_svm = ???
+
+    # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
+    svm = ???
+
+    # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
+    ???
+
+    # TODO: put the model on CPU or GPU
+    ???
+
+    # TODO 2 : ================================ testing ==============================================
+
+    # TODO: set the model in evaluation mode
+    ???
+
+    # to calculate and save the testing accuracy
+    n_correct = 0.  # number of images that are correctly classified
+    n_feas = 0.  # number of total images
+
+    with torch.no_grad():  # we do not need to compute gradients during validation
+        # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
+        for ??? in ???:
+            # TODO: set data type (.float()) and device (.to())
+            ???
+
+            # TODO: run the model; at the validation step, the model only needs one input: feas
+            # _ refers to a placeholder, which means we do not need the second returned value during validating
+            ???
+
+            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+            n_correct += ???
+
+            # TODO:sum up the total image number
+            n_feas += ???
+
+    # show prediction accuracy
+    acc = 100 * n_correct / n_feas
+    print('Test accuracy = {:.1f}%'.format(acc))
+
+
+if __name__ == "__main__":
+    # set configurations of the testing process
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
+    parser.add_argument("--device", type=str, help="cpu or cuda")
+    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
+
+    args = parser.parse_args()
+    if args.device is None:
+        args.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    # run the testing procedure
+    test(
+        data_root=args.data_root,
+        model_save_path=args.model_save_path,
+        device=args.device,
+    )
+
+
+
+
+
+
+
+
+
--- a/hw3/code/train_svm.py
+++ b/hw3/code/train_svm.py
@@ -1,289 +1,289 @@
-# ========================================================
-#             Media and Cognition
-#             Homework 3 Support Vector Machine
-#             train_svm.py - Train svm model for traffic sign
-#             Student ID:
-#             Name:
-#             Tsinghua University
-#             (C) Copyright 2024
-# ========================================================
-
-# ==== Part 1: import libs
-import argparse
-import matplotlib.pyplot as plt
-import torch
-import numpy as np
-import random
-from datasets import Traffic_Dataset
-from svm_hw import SVM_HINGE
-from torch.utils.data import DataLoader
-
-
-# ==== Part 2: training and validation
-def train(
-    data_root,
-    feature_channel,
-    batch_size,
-    n_epoch,
-    lr,
-    C,
-    model_save_path,
-    device,
-):
-    """
-    The main training procedure of SVM model
-    ----------------------------
-    :param data_root: path to the root directory of dataset
-    :param feature_channel: number of feature channels for SVM input
-    :param batch_size: batch size of training
-    :param n_epoch: number of training epochs
-    :param lr: learning rate
-    :param C: regularization coefficient in hinge loss
-    :param model_save_path: path to save SVM model
-    :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
-    """
-
-    # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
-    train_data = ???
-    train_loader = ???
-    val_data = ???
-    val_loader = ???
-
-    # scale the regularization coefficient
-    C = C * len(train_loader)
-
-    # TODO: initialize the SVM model
-    svm = ???
-
-    # TODO: put the model on CPU or GPU
-    ???
-
-    # TODO: define the Adam optimizer
-    optimizer = ???
-
-    # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
-    train_loss = []
-    train_acc = []
-    val_acc = []
-    epochs = []
-
-    for epoch in range(n_epoch):
-        # TODO: save the index of current epoch in the array 'epochs'
-        ???
-
-        # TODO 2: ========================= training =======================
-        # TODO: set the model in training mode
-        ???
-
-        # to calculate and save the training loss and training accuracy
-        total_loss = 0.  # to save total training loss in one epoch
-        n_correct = 0.  # number of images that are correctly classified
-        n_feas = 0.  # number of total images
-
-        # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
-        # you can refer to previous homework, for example hw2
-        for ??? in ???:
-            # TODO: set data type (.float()) and device (.to())
-            ???
-
-            # TODO: clear gradients in the optimizer
-            ???
-
-            # TODO: run the model with hinge loss; the model needs two inputs: feas and labels
-            ???
-
-            # TODO: back-propagation on the computation graph
-            ???
-
-            # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
-            total_loss += ???
-
-            # TODO: call a function to update the parameters of the models
-            ???
-
-            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-            n_correct += ???
-
-            # TODO: sum up the total image number
-            n_feas += ???
-
-        # average of the total loss for iterations
-        acc = 100 * n_correct / n_feas
-        avg_loss = total_loss / len(train_loader)
-        train_acc.append(acc.cpu().numpy())
-        train_loss.append(avg_loss)
-        print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
-
-        # TODO 3: ========================== Validation ======================================
-
-        # TODO: set the model in evaluation mode
-        ???
-
-        # to calculate and save the validation accuracy
-        n_correct = 0.  # number of images that are correctly classified
-        n_feas = 0.  # number of total images
-
-        with torch.no_grad():  # we do not need to compute gradients during validation
-            # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
-            for ??? in ???:
-                # TODO: set data type (.float()) and device (.to())
-                ???
-
-                # TODO: run the model; at the validation step, the model only needs one input: feas
-                # _ refers to a placeholder, which means we do not need the second returned value during validating
-                ???
-
-                # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-                n_correct += ???
-
-                # TODO: sum up the total image number
-                n_feas += ???
-
-        # show prediction accuracy
-        acc = 100 * n_correct / n_feas
-        print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
-        val_acc.append(acc.cpu().numpy())
-
-    # save model parameters in a file
-    torch.save({'state_dict': svm.state_dict(),
-                'configs': {
-                    'feature_channel': feature_channel,
-                    'C': C}
-                }, model_save_path)
-    print('Model saved in {}\n'.format(model_save_path))
-
-    W = svm.W.data.cpu()
-    b = svm.b.data.cpu()
-
-    # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
-    # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
-    sv = ???
-
-    plot(train_loss, train_acc, val_acc, epochs)
-    plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
-                 val_labels=val_data.labels, sv=sv, W=W, b=b)
-
-
-def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
-    """
-    Draw the samples,SVM decision boundary, and support vectors
-    ---------------------
-    :param train_features: training samples with the shape of [B, 2]
-    :param val_features: validation samples with the shape of [B, 2]
-    :param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
-    :param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
-    :param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
-    :param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
-    :param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
-    """
-    train_labels = (train_labels > 0.0).int()
-    val_labels = (val_labels > 0.0).int()
-    train_labels[sv] = 2
-    foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
-    foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
-    background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
-    background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
-    f, ax = plt.subplots()
-    plt.title("training dataset")
-    ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
-    ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
-               label="-1 (support vector)")
-    ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
-    ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
-               label="+1 (support vector)")
-    x = np.linspace(-20, 20, 100)
-    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
-    ax.legend(loc="best")
-    plt.ylim([-30, 30])
-    plt.show()
-    f, ax = plt.subplots()
-    plt.title("validation dataset")
-    foreground_val = [i for i in range(val_labels.shape[0] // 2)]
-    background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
-    ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
-    ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
-    x = np.linspace(-20, 20, 100)
-    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
-    ax.legend(loc="best")
-    plt.ylim([-30, 30])
-    plt.show()
-
-
-def plot(train_loss, train_acc, val_acc, epochs):
-    """
-    Draw loss and accuracy curve
-    ------------------
-    :param train_loss: a list with loss of each training epoch
-    :param train_acc: a list with accuracy on training dataset of each training epoch
-    :param val_acc: a list with accuracy on validation dataset of each training epoch
-    :param epochs: a list with the index of all training epochs
-    """
-
-    # draw the training loss curve
-    f, ax = plt.subplots()
-    plt.title("Training Loss")
-    ax.plot(epochs, train_loss, color="tab:blue")
-    ax.set_xlabel("Training epoch")
-    ax.set_ylabel("Loss")
-    ax.legend(["training loss"], loc="best")
-    plt.show()
-
-    # draw the accuracy curve
-    f, ax = plt.subplots()
-    plt.title("Training and Validation Accuracy")
-    ax.plot(epochs, train_acc, color="tab:orange")
-    ax.plot(epochs, val_acc, color="tab:green")
-    ax.legend(["training accuracy","validation accuracy"], loc="best")
-    ax.set_xlabel("Training epoch")
-    ax.set_ylabel("Accuracy")
-    ax.set_ylim(0, 101)
-    plt.show()
-
-
-if __name__ == "__main__":
-    # set random seed for reproducibility
-    seed = 2024
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    torch.backends.cudnn.deterministic = True
-
-    # set configurations of the model and training process
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
-    parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
-    parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
-    parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
-    parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
-    parser.add_argument("--device", type=str, help="cpu or cuda")
-    parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
-    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
-
-    args = parser.parse_args()
-    if args.device is None:
-        args.device = "cuda" if torch.cuda.is_available() else "cpu"
-
-    # run the training procedure
-    train(
-        data_root=args.data_root,
-        feature_channel=args.feature_channel,
-        batch_size=args.batch_size,
-        n_epoch=args.n_epoch,
-        lr=args.lr,
-        C=args.C,
-        model_save_path=args.model_save_path,
-        device=args.device,
-    )
-
-
-
-
-
-
-
-
-
-
-
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             train_svm.py - Train svm model for traffic sign
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+# ==== Part 1: import libs
+import argparse
+import matplotlib.pyplot as plt
+import torch
+import numpy as np
+import random
+from datasets import Traffic_Dataset
+from svm_hw import SVM_HINGE
+from torch.utils.data import DataLoader
+
+
+# ==== Part 2: training and validation
+def train(
+    data_root,
+    feature_channel,
+    batch_size,
+    n_epoch,
+    lr,
+    C,
+    model_save_path,
+    device,
+):
+    """
+    The main training procedure of SVM model
+    ----------------------------
+    :param data_root: path to the root directory of dataset
+    :param feature_channel: number of feature channels for SVM input
+    :param batch_size: batch size of training
+    :param n_epoch: number of training epochs
+    :param lr: learning rate
+    :param C: regularization coefficient in hinge loss
+    :param model_save_path: path to save SVM model
+    :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
+    """
+
+    # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
+    train_data = ???
+    train_loader = ???
+    val_data = ???
+    val_loader = ???
+
+    # scale the regularization coefficient
+    C = C * len(train_loader)
+
+    # TODO: initialize the SVM model
+    svm = ???
+
+    # TODO: put the model on CPU or GPU
+    ???
+
+    # TODO: define the Adam optimizer
+    optimizer = ???
+
+    # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
+    train_loss = []
+    train_acc = []
+    val_acc = []
+    epochs = []
+
+    for epoch in range(n_epoch):
+        # TODO: save the index of current epoch in the array 'epochs'
+        ???
+
+        # TODO 2: ========================= training =======================
+        # TODO: set the model in training mode
+        ???
+
+        # to calculate and save the training loss and training accuracy
+        total_loss = 0.  # to save total training loss in one epoch
+        n_correct = 0.  # number of images that are correctly classified
+        n_feas = 0.  # number of total images
+
+        # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
+        # you can refer to previous homework, for example hw2
+        for ??? in ???:
+            # TODO: set data type (.float()) and device (.to())
+            ???
+
+            # TODO: clear gradients in the optimizer
+            ???
+
+            # TODO: run the model with hinge loss; the model needs two inputs: feas and labels
+            ???
+
+            # TODO: back-propagation on the computation graph
+            ???
+
+            # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
+            total_loss += ???
+
+            # TODO: call a function to update the parameters of the models
+            ???
+
+            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+            n_correct += ???
+
+            # TODO: sum up the total image number
+            n_feas += ???
+
+        # average of the total loss for iterations
+        acc = 100 * n_correct / n_feas
+        avg_loss = total_loss / len(train_loader)
+        train_acc.append(acc.cpu().numpy())
+        train_loss.append(avg_loss)
+        print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
+
+        # TODO 3: ========================== Validation ======================================
+
+        # TODO: set the model in evaluation mode
+        ???
+
+        # to calculate and save the validation accuracy
+        n_correct = 0.  # number of images that are correctly classified
+        n_feas = 0.  # number of total images
+
+        with torch.no_grad():  # we do not need to compute gradients during validation
+            # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
+            for ??? in ???:
+                # TODO: set data type (.float()) and device (.to())
+                ???
+
+                # TODO: run the model; at the validation step, the model only needs one input: feas
+                # _ refers to a placeholder, which means we do not need the second returned value during validating
+                ???
+
+                # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+                n_correct += ???
+
+                # TODO: sum up the total image number
+                n_feas += ???
+
+        # show prediction accuracy
+        acc = 100 * n_correct / n_feas
+        print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
+        val_acc.append(acc.cpu().numpy())
+
+    # save model parameters in a file
+    torch.save({'state_dict': svm.state_dict(),
+                'configs': {
+                    'feature_channel': feature_channel,
+                    'C': C}
+                }, model_save_path)
+    print('Model saved in {}\n'.format(model_save_path))
+
+    W = svm.W.data.cpu()
+    b = svm.b.data.cpu()
+
+    # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
+    # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
+    sv = ???
+
+    plot(train_loss, train_acc, val_acc, epochs)
+    plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
+                 val_labels=val_data.labels, sv=sv, W=W, b=b)
+
+
+def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
+    """
+    Draw the samples,SVM decision boundary, and support vectors
+    ---------------------
+    :param train_features: training samples with the shape of [B, 2]
+    :param val_features: validation samples with the shape of [B, 2]
+    :param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
+    :param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
+    :param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
+    :param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
+    :param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
+    """
+    train_labels = (train_labels > 0.0).int()
+    val_labels = (val_labels > 0.0).int()
+    train_labels[sv] = 2
+    foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
+    foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
+    background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
+    background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
+    f, ax = plt.subplots()
+    plt.title("training dataset")
+    ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
+    ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
+               label="-1 (support vector)")
+    ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
+    ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
+               label="+1 (support vector)")
+    x = np.linspace(-20, 20, 100)
+    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
+    ax.legend(loc="best")
+    plt.ylim([-30, 30])
+    plt.show()
+    f, ax = plt.subplots()
+    plt.title("validation dataset")
+    foreground_val = [i for i in range(val_labels.shape[0] // 2)]
+    background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
+    ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
+    ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
+    x = np.linspace(-20, 20, 100)
+    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
+    ax.legend(loc="best")
+    plt.ylim([-30, 30])
+    plt.show()
+
+
+def plot(train_loss, train_acc, val_acc, epochs):
+    """
+    Draw loss and accuracy curve
+    ------------------
+    :param train_loss: a list with loss of each training epoch
+    :param train_acc: a list with accuracy on training dataset of each training epoch
+    :param val_acc: a list with accuracy on validation dataset of each training epoch
+    :param epochs: a list with the index of all training epochs
+    """
+
+    # draw the training loss curve
+    f, ax = plt.subplots()
+    plt.title("Training Loss")
+    ax.plot(epochs, train_loss, color="tab:blue")
+    ax.set_xlabel("Training epoch")
+    ax.set_ylabel("Loss")
+    ax.legend(["training loss"], loc="best")
+    plt.show()
+
+    # draw the accuracy curve
+    f, ax = plt.subplots()
+    plt.title("Training and Validation Accuracy")
+    ax.plot(epochs, train_acc, color="tab:orange")
+    ax.plot(epochs, val_acc, color="tab:green")
+    ax.legend(["training accuracy","validation accuracy"], loc="best")
+    ax.set_xlabel("Training epoch")
+    ax.set_ylabel("Accuracy")
+    ax.set_ylim(0, 101)
+    plt.show()
+
+
+if __name__ == "__main__":
+    # set random seed for reproducibility
+    seed = 2024
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+
+    # set configurations of the model and training process
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
+    parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
+    parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
+    parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
+    parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
+    parser.add_argument("--device", type=str, help="cpu or cuda")
+    parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
+    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
+
+    args = parser.parse_args()
+    if args.device is None:
+        args.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    # run the training procedure
+    train(
+        data_root=args.data_root,
+        feature_channel=args.feature_channel,
+        batch_size=args.batch_size,
+        n_epoch=args.n_epoch,
+        lr=args.lr,
+        C=args.C,
+        model_save_path=args.model_save_path,
+        device=args.device,
+    )
+
+
+
+
+
+
+
+
+
+
+
--- a/hw3/report/main.tex
+++ b/hw3/report/main.tex
@@ -20,21 +20,21 @@
 \begin{document}
 \courseheader
 % 请在YOUR NAME处填写自己的姓名
-\name{YOUR NAME}
+\name{高艺轩}
 \vspace{3mm}
 \centerline{\textbf{\Large{理论部分}}}

 \section{单选题（15分）}
 % 请在？处填写答案
-\subsection{\underline{?}}
+\subsection{\underline{D}}

-\subsection{\underline{?}}
+\subsection{\underline{C}}

-\subsection{\underline{?}}
+\subsection{\underline{D}}

-\subsection{\underline{?}}
+\subsection{\underline{D}}

-\subsection{\underline{?}}
+\subsection{\underline{B}}

 \section{计算题（15 分）}

@@ -47,17 +47,117 @@
 试利用LDA，将样本特征维数压缩为一维。
 }

+\begin{proof}[解]
+    首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
+    \[S_1 = \frac{1}{4}
+    \left(
+        \begin{bmatrix}
+            0 & 0\\
+            0 & 1
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            1 & 0\\
+            0 & 0
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            1 & 1\\
+            1 & 1
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            0 & 0\\
+            0 & 0
+        \end{bmatrix}
+    \right)
+    =
+    \begin{bmatrix}
+        0.5 & 0.25\\
+        0.25 & 0.5
+    \end{bmatrix}\]
+    \[S_2 = \frac{1}{4}
+    \left(
+        \begin{bmatrix}
+            0 & 0\\
+            0 & 1
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            1 & 0\\
+            0 & 0
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            1 & 1\\
+            1 & 1
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            1 & 0\\
+            0 & 0
+        \end{bmatrix}
+    \right)
+    =
+    \begin{bmatrix}
+        0.75 & 0.25\\
+        0.25 & 0.5
+    \end{bmatrix}\]
+    进一步地，
+    \[S_w = \frac{1}{2} (S_1 + S_2) = 
+    \begin{bmatrix}
+        0.625 & 0.25\\
+        0.25 & 0.5
+    \end{bmatrix}\]
+    \[S_b = \frac{1}{2} \left(
+        \begin{bmatrix}
+            2.25 & 0\\
+            0 & 0
+        \end{bmatrix}
+        +
+        \begin{bmatrix}
+            2.25 & 0\\
+            0 & 0
+        \end{bmatrix}
+    \right)
+    =
+    \begin{bmatrix}
+        2.25 & 0\\
+        0 & 0
+    \end{bmatrix}\]
+    广义特征值分解得到$\lambda = 4.5$，$v = (0.8944, -0.4472)$。投影后的样本为
+    \[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
+    \[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
+\end{proof}
+


 \vspace{3mm}
 \subsection{模型训练通常需要大量的数据，假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效，其中无效数据被成功判别为无效数据的概率为90\%，而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据，则根据贝叶斯定理，这条数据是无效数据的概率是多少？(提示：全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}

+\begin{proof}[解]
+    \begin{align*}
+        & P(\text{无效数据} \mid \text{判定无效})\\
+        = & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
+        = & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
+        = & \frac{0.18}{0.18 + 0.04}\\
+        = & \frac{9}{11} 
+    \end{align*}
+\end{proof}
+
 \vspace{3mm}
 \subsection{设有两类正态分布的样本集，第一类均值为$\mu_1=[2,-1]^T$，第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等：$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
    4 & 2 \\
    2 & \frac{4}{3}
 \end{array} \right]$，$p(\omega_1)=p(\omega_2)$。试计算分类界面，并对特征向量$x=[6,2]^T$分类。}

+\begin{proof}[解]
+    \[g_1(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_1)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_1) + \ln p(\omega_1)\]
+    \[g_2(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_2)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_2) + \ln p(\omega_2)\]
+    决策方程
+    \[\]
+\end{proof}
+
 \vspace{3mm}
 \subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的，可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$，其中$\phi(\mathbf{x})$满足
 \begin{equation*}