TA release homework3.

2024-05-01 17:13:51 +08:00
parent 8fc38ca6c5
commit 4bc3f77879
9 changed files with 1165 additions and 0 deletions
--- a/hw3/code/check.py
+++ b/hw3/code/check.py
@@ -0,0 +1,41 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             check.py - Check your implementation of several modules
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+from svm_hw import SVM_HINGE, LinearFunction, Hinge
+import torch
+from torch.autograd import gradcheck
+
+
+def run():
+    model = SVM_HINGE(2, C=1.0).double()
+    x = torch.randn(50, 2, requires_grad=False).double()
+    W = torch.randn(1, 2, requires_grad=True).double()
+    b = torch.zeros(1, requires_grad=True).double()
+    test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
+    if test:
+        print('Linear successully tested!')
+    output = torch.randn(50, 1, requires_grad=True).double()
+    W = torch.randn(1, 2, requires_grad=True).double()
+    labels = torch.ones(1, requires_grad=False).double()
+    C = torch.tensor([[1.0]], requires_grad=False).double()
+    test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
+    if test:
+        print('Hinge successfully tested！')
+    x = torch.randn(50, 2, requires_grad=False).double()
+    labels = torch.ones(50, requires_grad=False).double()
+    try:
+        output, loss = model(x, labels)
+        assert model.W.requires_grad is True
+        assert model.b.requires_grad is True
+        print('SVM_HINGE successfully tested！')
+    except:
+        raise Exception('Failed testing SVM_HINGE!')
+
+
+if __name__ == '__main__':
+    run()
--- a/hw3/code/data_preprocess.py
+++ b/hw3/code/data_preprocess.py
@@ -0,0 +1,178 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             data_preprocess.py - Using pretrained convolutional layers to extract feature,
+#                                   and using PCA for dimensionality reduction
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+import os
+import torchvision.transforms as transforms
+import torch
+from PIL import Image
+from networks import Classifier
+import matplotlib.pyplot as plt
+import argparse
+
+
+def preprocess(pre_conv, data_root, image_size, classes):
+    # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
+
+    # ===============  process training dataset ======================
+    print("Start preprocessing the training dataset !!!")
+    train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
+
+    # calculate the mean and PCA projection matrix
+    data_mean, u = PCA(train_data, 2)
+
+    # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
+    train_data_pca = ???
+
+    visualize(train_data_pca, train_label, "train")
+    savedata(train_data_pca, train_label, data_root+"/train.pt")
+    print("training dataset saved !!!")
+
+    # ===============  process validation dataset ======================
+    print("Start preprocessing the validation dataset!!!")
+    val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
+
+    # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
+    val_data_pca = ???
+
+    visualize(val_data_pca, val_label, "val")
+    savedata(val_data_pca, val_label, data_root+"/val.pt")
+    print("validation dataset saved !!!")
+
+    # ===============  process testing dataset ======================
+    print("Start preprocessing the testing dataset!!!")
+    test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
+
+    # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
+    test_data_pca = ???
+
+    visualize(test_data_pca, test_label, "test")
+    savedata(test_data_pca, test_label, data_root+"/test.pt")
+    print("testing dataset saved !!!")
+
+
+def savedata(data, label, save_path):
+    save_dict = {
+        'data': data,
+        'label': label
+    }
+    torch.save(save_dict, save_path)
+
+
+def visualize(datas, labels, mode):
+    """
+    Display feature points after dimensionality reduction
+    -------------------------------
+    :param datas: the samples after dimensionality reduction, with the shape of [N, 2]
+    :param labels: the labels (chosen from {-1, +1}) corresponding to the samples
+    :param mode: chosen from {'train', 'val', 'test'}
+    :return:
+    """
+    plt.figure()
+    for idx in range(datas.shape[1]):
+        plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
+    plt.legend()
+    plt.title(mode)
+    plt.show()
+
+
+def PCA(data, dim=2):
+    """
+    calculate the mean value of the data and the projection matrix for PCA
+    :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
+    :param dim: the data dimension after projection
+    :return:
+        data_mean: the mean value of the data
+        u: the projection matrix for PCA, with the shape of [2048, dim]
+    """
+    # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
+
+    # TODO: compute the mean of train_data
+    data_mean = ???
+    # TODO: compute the covariance matrix of train_data
+    data_cov = ???
+    # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
+    # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
+    ???
+    # TODO: return the proper 'data_mean' and 'u[]'
+    return ???
+
+
+def loaddata(pre_conv, data_root, mode, image_size, classes):
+    """
+    load one dataset, and use pretrained network in homework 2 to extract feature
+    :param pre_conv: pretrained network in homework 2
+    :param data_root: the path of the dataset
+    :param mode: chosen from {'train', 'val', 'test'}
+    :param image_size: the preset size that each image try to zoom to
+    :param classes: two classes that need to be classified
+    :return:
+        datas: the samples of extracted features with the shape of [N, 2048]
+        labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
+    """
+    assert len(classes) == 2
+    datas = []
+    labels = []
+    for idx in range(len(classes)):
+        for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
+            data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
+            label = 2 * idx - 1
+            datas.append(data)
+            labels.append(label)
+    return torch.stack(datas), torch.tensor(labels)
+
+
+def readimg(pre_conv, filepath, image_size):
+    """
+    Read one image and use pretrained network to extract the feature
+    --------------------------
+    :param pre_conv: pretrained network in homework 2
+    :param filepath: the file path of one image
+    :param image_size: the preset size that each image try to zoom to
+    :return:
+        data: the extracted feature with the length of 2048
+    """
+    img_pil = Image.open(filepath).convert('RGB')
+    img_pil = img_pil.resize(image_size)
+    img_transform = transforms.Compose([transforms.ToTensor(),
+                                        transforms.Normalize(0.5, 0.5),
+                                        ])
+    img_tensor = img_transform(img_pil)
+    data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
+
+    return data
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
+                        help="the filepath of the pretrained network in homework 2")
+    parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
+    parser.add_argument("--image_size", type=tuple, default=(32, 32),
+                        help="the preset size that each image try to zoom to")
+    parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
+
+    args = parser.parse_args()
+
+    pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
+    configs = pretrained_checkpoint["configs"]
+    cls = Classifier(
+        configs["in_channels"],
+        configs["num_classes"],
+        configs["use_batch_norm"],
+        configs["use_stn"],
+        configs["dropout_prob"],
+    )
+    cls.load_state_dict(pretrained_checkpoint["model_state"])
+    for param in cls.parameters():
+        param.requires_grad = False
+    conv = cls.conv_net
+
+    preprocess(conv, args.data_root, args.image_size, args.classes)
--- a/hw3/code/datasets.py
+++ b/hw3/code/datasets.py
@@ -0,0 +1,26 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             datasets.py - Define the data loader for the traffic sign classification dataset
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+
+import torch
+import torch.utils.data as data
+
+
+class Traffic_Dataset(data.Dataset):
+    def __init__(self, data_root):
+        dataset = torch.load(data_root)
+        self.datas = dataset["data"]
+        self.labels = dataset["label"]
+
+    def __getitem__(self, index):
+        return self.datas[index], self.labels[index]
+
+    def __len__(self):
+        return len(self.datas)
--- a/hw3/code/svm_hw.py
+++ b/hw3/code/svm_hw.py
@@ -0,0 +1,139 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             svm_hw.py - The implementation of SVM using hinge loss
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# TODO 1: complete the forward and backward propagation processes of the linear layer
+class LinearFunction(torch.autograd.Function):
+    '''
+    we will implement the linear function:
+    y = xW^T + b
+    as well as its gradient computation process
+    '''
+
+    @staticmethod
+    def forward(ctx, x, W, b):
+        '''
+        Input:
+        :param ctx: a context object that can be used to stash information for backward computation
+        :param x: input features with size [batch_size, input_size]
+        :param W: weight matrix with size [output_size, input_size]
+        :param b: bias with size [output_size]
+        Return:
+        y :output features with size [batch_size, output_size]
+        '''
+
+        # TODO
+        y = ???
+        ctx.save_for_backward(x, W)
+
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        '''
+        Input:
+        :param ctx: a context object with saved variables
+        :param grad_output: dL/dy, with size [batch_size, output_size]
+        Return:
+        grad_input: dL/dx, with size [batch_size, input_size]
+        grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
+        grad_b: dL/db, with size [output_size], summed for data in the batch
+        '''
+
+        x, W = ctx.saved_variables
+
+        # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
+        # calculate dL/dW by using dL/dy (grad_output) and x
+        # calculate dL/db using dL/dy (grad_output)
+        # you can use torch.matmul(A, B) to compute matrix product of A and B
+
+        # TODO
+        grad_input = ???
+        grad_W = ???
+        grad_b = ???
+
+        return grad_input, grad_W, grad_b
+
+
+# TODO 2: complete the forward and backward propagation processes of the hinge loss
+class Hinge(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, output, W, label, C):
+        """
+        Compute the hinge loss
+        --------------------------------------
+        :param ctx: a context object that can be used to stash information for backward computation
+        :param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
+        :param W: weight matrix with size [1, input_size]
+        :param label: the ground truth y in the equation for loss calculation, with size [batch_size]
+        :param C: the regularization coefficient of hinge loss with size [1, 1]
+        :return: the hinge loss with size [1, 1]
+        """
+        C = C.type_as(W)
+
+        # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
+        # you may need F.relu() to implement the max() function.
+        loss = ???
+        ctx.save_for_backward(output, W, label, C)
+
+        return loss
+
+    @staticmethod
+    def backward(ctx, grad_loss):
+        """
+        Compute the gradient of hinge loss
+        :param ctx: a context object with saved variables
+        :param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
+        :return:
+            grad_output: dL/doutput, with size [batch_size, 1]
+            grad_W: dL/dW, with size [1, channels]
+        """
+        output, W, label, C = ctx.saved_tensors
+        # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
+        grad_output = ???
+        grad_W = ???
+        return grad_output, grad_W, None, None
+
+
+# TODO 3: complete the structure of SVM model
+class SVM_HINGE(nn.Module):
+
+    def __init__(self, in_channels, C):
+        """
+        :param in_channels: number of feature channels for SVM input
+        :param C: regularization coefficient of hinge loss with size [1, 1]
+        """
+        super().__init__()
+
+        # TODO: define the parameters W and b
+        """
+            the shape of W should be [1, channels] and the shape of b should be [1, ]
+            you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
+            please use torch.randn() to initialize W and b
+        """
+
+        self.W = ???
+        self.b = ???
+        self.C = torch.tensor([[C]], requires_grad=False)
+
+    def forward(self, x, label=None):
+        # SVM calculation
+        output = LinearFunction.apply(x, self.W, self.b)
+        if label is not None:
+            loss = Hinge.apply(output, self.W, label, self.C)
+        else:
+            loss = None
+        output = (output > 0.0).type_as(x) * 2.0 - 1.0
+        return output, loss
--- a/hw3/code/test_svm.py
+++ b/hw3/code/test_svm.py
@@ -0,0 +1,106 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             test_svm.py - Test svm model for traffic sign
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+# ==== Part 1: import libs
+import argparse
+import torch
+from datasets import Traffic_Dataset
+from svm_hw import SVM_HINGE
+from torch.utils.data import DataLoader
+
+
+# ==== Part 2: testing
+def test(
+    data_root,
+    model_save_path,
+    device,
+):
+    """
+    The main testing procedure of SVM model
+    ----------------------------
+    :param data_root: path to the root directory of dataset
+    :param model_save_path: path to pretrained SVM model
+    :param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
+    """
+
+    # TODO 1: =================== load the pretrained SVM model ==================================
+
+    # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
+    test_data = ???
+    test_loader = ???
+
+    # TODO: load state dictionary of pretrained SVM model
+    model_svm = ???
+
+    # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
+    svm = ???
+
+    # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
+    ???
+
+    # TODO: put the model on CPU or GPU
+    ???
+
+    # TODO 2 : ================================ testing ==============================================
+
+    # TODO: set the model in evaluation mode
+    ???
+
+    # to calculate and save the testing accuracy
+    n_correct = 0.  # number of images that are correctly classified
+    n_feas = 0.  # number of total images
+
+    with torch.no_grad():  # we do not need to compute gradients during validation
+        # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
+        for ??? in ???:
+            # TODO: set data type (.float()) and device (.to())
+            ???
+
+            # TODO: run the model; at the validation step, the model only needs one input: feas
+            # _ refers to a placeholder, which means we do not need the second returned value during validating
+            ???
+
+            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+            n_correct += ???
+
+            # TODO:sum up the total image number
+            n_feas += ???
+
+    # show prediction accuracy
+    acc = 100 * n_correct / n_feas
+    print('Test accuracy = {:.1f}%'.format(acc))
+
+
+if __name__ == "__main__":
+    # set configurations of the testing process
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
+    parser.add_argument("--device", type=str, help="cpu or cuda")
+    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
+
+    args = parser.parse_args()
+    if args.device is None:
+        args.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    # run the testing procedure
+    test(
+        data_root=args.data_root,
+        model_save_path=args.model_save_path,
+        device=args.device,
+    )
+
+
+
+
+
+
+
+
+
--- a/hw3/code/train_svm.py
+++ b/hw3/code/train_svm.py
@@ -0,0 +1,289 @@
+# ========================================================
+#             Media and Cognition
+#             Homework 3 Support Vector Machine
+#             train_svm.py - Train svm model for traffic sign
+#             Student ID:
+#             Name:
+#             Tsinghua University
+#             (C) Copyright 2024
+# ========================================================
+
+# ==== Part 1: import libs
+import argparse
+import matplotlib.pyplot as plt
+import torch
+import numpy as np
+import random
+from datasets import Traffic_Dataset
+from svm_hw import SVM_HINGE
+from torch.utils.data import DataLoader
+
+
+# ==== Part 2: training and validation
+def train(
+    data_root,
+    feature_channel,
+    batch_size,
+    n_epoch,
+    lr,
+    C,
+    model_save_path,
+    device,
+):
+    """
+    The main training procedure of SVM model
+    ----------------------------
+    :param data_root: path to the root directory of dataset
+    :param feature_channel: number of feature channels for SVM input
+    :param batch_size: batch size of training
+    :param n_epoch: number of training epochs
+    :param lr: learning rate
+    :param C: regularization coefficient in hinge loss
+    :param model_save_path: path to save SVM model
+    :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
+    """
+
+    # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
+    train_data = ???
+    train_loader = ???
+    val_data = ???
+    val_loader = ???
+
+    # scale the regularization coefficient
+    C = C * len(train_loader)
+
+    # TODO: initialize the SVM model
+    svm = ???
+
+    # TODO: put the model on CPU or GPU
+    ???
+
+    # TODO: define the Adam optimizer
+    optimizer = ???
+
+    # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
+    train_loss = []
+    train_acc = []
+    val_acc = []
+    epochs = []
+
+    for epoch in range(n_epoch):
+        # TODO: save the index of current epoch in the array 'epochs'
+        ???
+
+        # TODO 2: ========================= training =======================
+        # TODO: set the model in training mode
+        ???
+
+        # to calculate and save the training loss and training accuracy
+        total_loss = 0.  # to save total training loss in one epoch
+        n_correct = 0.  # number of images that are correctly classified
+        n_feas = 0.  # number of total images
+
+        # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
+        # you can refer to previous homework, for example hw2
+        for ??? in ???:
+            # TODO: set data type (.float()) and device (.to())
+            ???
+
+            # TODO: clear gradients in the optimizer
+            ???
+
+            # TODO: run the model with hinge loss; the model needs two inputs: feas and labels
+            ???
+
+            # TODO: back-propagation on the computation graph
+            ???
+
+            # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
+            total_loss += ???
+
+            # TODO: call a function to update the parameters of the models
+            ???
+
+            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+            n_correct += ???
+
+            # TODO: sum up the total image number
+            n_feas += ???
+
+        # average of the total loss for iterations
+        acc = 100 * n_correct / n_feas
+        avg_loss = total_loss / len(train_loader)
+        train_acc.append(acc.cpu().numpy())
+        train_loss.append(avg_loss)
+        print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
+
+        # TODO 3: ========================== Validation ======================================
+
+        # TODO: set the model in evaluation mode
+        ???
+
+        # to calculate and save the validation accuracy
+        n_correct = 0.  # number of images that are correctly classified
+        n_feas = 0.  # number of total images
+
+        with torch.no_grad():  # we do not need to compute gradients during validation
+            # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
+            for ??? in ???:
+                # TODO: set data type (.float()) and device (.to())
+                ???
+
+                # TODO: run the model; at the validation step, the model only needs one input: feas
+                # _ refers to a placeholder, which means we do not need the second returned value during validating
+                ???
+
+                # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
+                n_correct += ???
+
+                # TODO: sum up the total image number
+                n_feas += ???
+
+        # show prediction accuracy
+        acc = 100 * n_correct / n_feas
+        print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
+        val_acc.append(acc.cpu().numpy())
+
+    # save model parameters in a file
+    torch.save({'state_dict': svm.state_dict(),
+                'configs': {
+                    'feature_channel': feature_channel,
+                    'C': C}
+                }, model_save_path)
+    print('Model saved in {}\n'.format(model_save_path))
+
+    W = svm.W.data.cpu()
+    b = svm.b.data.cpu()
+
+    # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
+    # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
+    sv = ???
+
+    plot(train_loss, train_acc, val_acc, epochs)
+    plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
+                 val_labels=val_data.labels, sv=sv, W=W, b=b)
+
+
+def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
+    """
+    Draw the samples,SVM decision boundary, and support vectors
+    ---------------------
+    :param train_features: training samples with the shape of [B, 2]
+    :param val_features: validation samples with the shape of [B, 2]
+    :param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
+    :param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
+    :param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
+    :param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
+    :param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
+    """
+    train_labels = (train_labels > 0.0).int()
+    val_labels = (val_labels > 0.0).int()
+    train_labels[sv] = 2
+    foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
+    foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
+    background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
+    background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
+    f, ax = plt.subplots()
+    plt.title("training dataset")
+    ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
+    ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
+               label="-1 (support vector)")
+    ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
+    ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
+               label="+1 (support vector)")
+    x = np.linspace(-20, 20, 100)
+    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
+    ax.legend(loc="best")
+    plt.ylim([-30, 30])
+    plt.show()
+    f, ax = plt.subplots()
+    plt.title("validation dataset")
+    foreground_val = [i for i in range(val_labels.shape[0] // 2)]
+    background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
+    ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
+    ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
+    x = np.linspace(-20, 20, 100)
+    ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
+    ax.legend(loc="best")
+    plt.ylim([-30, 30])
+    plt.show()
+
+
+def plot(train_loss, train_acc, val_acc, epochs):
+    """
+    Draw loss and accuracy curve
+    ------------------
+    :param train_loss: a list with loss of each training epoch
+    :param train_acc: a list with accuracy on training dataset of each training epoch
+    :param val_acc: a list with accuracy on validation dataset of each training epoch
+    :param epochs: a list with the index of all training epochs
+    """
+
+    # draw the training loss curve
+    f, ax = plt.subplots()
+    plt.title("Training Loss")
+    ax.plot(epochs, train_loss, color="tab:blue")
+    ax.set_xlabel("Training epoch")
+    ax.set_ylabel("Loss")
+    ax.legend(["training loss"], loc="best")
+    plt.show()
+
+    # draw the accuracy curve
+    f, ax = plt.subplots()
+    plt.title("Training and Validation Accuracy")
+    ax.plot(epochs, train_acc, color="tab:orange")
+    ax.plot(epochs, val_acc, color="tab:green")
+    ax.legend(["training accuracy","validation accuracy"], loc="best")
+    ax.set_xlabel("Training epoch")
+    ax.set_ylabel("Accuracy")
+    ax.set_ylim(0, 101)
+    plt.show()
+
+
+if __name__ == "__main__":
+    # set random seed for reproducibility
+    seed = 2024
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+
+    # set configurations of the model and training process
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
+    parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
+    parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
+    parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
+    parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
+    parser.add_argument("--device", type=str, help="cpu or cuda")
+    parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
+    parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
+
+    args = parser.parse_args()
+    if args.device is None:
+        args.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    # run the training procedure
+    train(
+        data_root=args.data_root,
+        feature_channel=args.feature_channel,
+        batch_size=args.batch_size,
+        n_epoch=args.n_epoch,
+        lr=args.lr,
+        C=args.C,
+        model_save_path=args.model_save_path,
+        device=args.device,
+    )
+
+
+
+
+
+
+
+
+
+
+
--- a/hw3/report/dtx-style.sty
+++ b/hw3/report/dtx-style.sty
@@ -0,0 +1,132 @@
+%%
+%% This is file `dtx-style.sty',
+%% generated with the docstrip utility.
+%%
+%% The original source files were:
+%%
+%% thucoursework.dtx  (with options: `dtx-style')
+%% 
+%% This is a generated file.
+%% 
+%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
+%% 
+%% This work may be distributed and/or modified under the
+%% conditions of the LaTeX Project Public License, either version 1.3
+%% of this license or (at your option) any later version.
+%% The latest version of this license is in
+%%   http://www.latex-project.org/lppl.txt
+%% and version 1.3 or later is part of all distributions of LaTeX
+%% version 2005/12/01 or later.
+%% 
+%% To produce the documentation run the original source files ending with `.dtx'
+%% through LaTeX.
+%% 
+
+\ProvidesPackage{dtx-style}
+\RequirePackage{hypdoc}
+\RequirePackage[UTF8,scheme=chinese]{ctex}
+\RequirePackage{newpxtext}
+\RequirePackage{newpxmath}
+\RequirePackage[
+  top=2.5cm, bottom=2.5cm,
+  left=4cm, right=2cm,
+  headsep=3mm]{geometry}
+\RequirePackage{array,longtable,booktabs}
+\RequirePackage{listings}
+\RequirePackage{fancyhdr}
+\RequirePackage{xcolor}
+\RequirePackage{enumitem}
+\RequirePackage{etoolbox}
+\RequirePackage{metalogo}
+
+\colorlet{thu@macro}{blue!60!black}
+\colorlet{thu@env}{blue!70!black}
+\colorlet{thu@option}{purple}
+\patchcmd{\PrintMacroName}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
+\patchcmd{\PrintDescribeMacro}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
+\patchcmd{\PrintDescribeEnv}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
+\patchcmd{\PrintEnvName}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
+
+\def\DescribeOption{%
+  \leavevmode\@bsphack\begingroup\MakePrivateLetters%
+  \Describe@Option}
+\def\Describe@Option#1{\endgroup
+  \marginpar{\raggedleft\PrintDescribeOption{#1}}%
+  \thu@special@index{option}{#1}\@esphack\ignorespaces}
+\def\PrintDescribeOption#1{\strut \MacroFont\bfseries\sffamily\color{thu@option} #1\ }
+\def\thu@special@index#1#2{\@bsphack
+  \begingroup
+    \HD@target
+    \let\HDorg@encapchar\encapchar
+    \edef\encapchar usage{%
+      \HDorg@encapchar hdclindex{\the\c@HD@hypercount}{usage}%
+    }%
+    \index{#2\actualchar{\string\ttfamily\space#2}
+           (#1)\encapchar usage}%
+    \index{#1:\levelchar#2\actualchar
+           {\string\ttfamily\space#2}\encapchar usage}%
+  \endgroup
+  \@esphack}
+
+\lstdefinestyle{lstStyleBase}{%
+   basicstyle=\small\ttfamily,
+   aboveskip=\medskipamount,
+   belowskip=\medskipamount,
+   lineskip=0pt,
+   boxpos=c,
+   showlines=false,
+   extendedchars=true,
+   upquote=true,
+   tabsize=2,
+   showtabs=false,
+   showspaces=false,
+   showstringspaces=false,
+   numbers=none,
+   linewidth=\linewidth,
+   xleftmargin=4pt,
+   xrightmargin=0pt,
+   resetmargins=false,
+   breaklines=true,
+   breakatwhitespace=false,
+   breakindent=0pt,
+   breakautoindent=true,
+   columns=flexible,
+   keepspaces=true,
+   gobble=2,
+   framesep=3pt,
+   rulesep=1pt,
+   framerule=1pt,
+   backgroundcolor=\color{gray!5},
+   stringstyle=\color{green!40!black!100},
+   keywordstyle=\bfseries\color{blue!50!black},
+   commentstyle=\slshape\color{black!60}}
+
+\lstdefinestyle{lstStyleShell}{%
+   style=lstStyleBase,
+   frame=l,
+   rulecolor=\color{purple},
+   language=bash}
+
+\lstdefinestyle{lstStyleLaTeX}{%
+   style=lstStyleBase,
+   frame=l,
+   rulecolor=\color{violet},
+   language=[LaTeX]TeX}
+
+\lstnewenvironment{latex}{\lstset{style=lstStyleLaTeX}}{}
+\lstnewenvironment{shell}{\lstset{style=lstStyleShell}}{}
+
+\setlist{nosep}
+
+\DeclareDocumentCommand{\option}{m}{\textsf{#1}}
+\DeclareDocumentCommand{\env}{m}{\texttt{#1}}
+\DeclareDocumentCommand{\pkg}{s m}{%
+  \texttt{#2}\IfBooleanF#1{\thu@special@index{package}{#2}}}
+\DeclareDocumentCommand{\file}{s m}{%
+  \texttt{#2}\IfBooleanF#1{\thu@special@index{file}{#2}}}
+\newcommand{\myentry}[1]{%
+  \marginpar{\raggedleft\color{purple}\bfseries\strut #1}}
+\newcommand{\note}[2][Note]{{%
+  \color{magenta}{\bfseries #1}\emph{#2}}}
+
+\def\thucoursework{\textsc{Thu}\-\textsc{Coursework}}
--- a/hw3/report/iidef.sty
+++ b/hw3/report/iidef.sty
@@ -0,0 +1,153 @@
+%%
+%% This is file `iidef.sty',
+%% generated with the docstrip utility.
+%%
+%% The original source files were:
+%%
+%% thucoursework.dtx  (with options: `sty')
+%% 
+%% This is a generated file.
+%% 
+%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
+%% 
+%% This work may be distributed and/or modified under the
+%% conditions of the LaTeX Project Public License, either version 1.3
+%% of this license or (at your option) any later version.
+%% The latest version of this license is in
+%%   http://www.latex-project.org/lppl.txt
+%% and version 1.3 or later is part of all distributions of LaTeX
+%% version 2005/12/01 or later.
+%% 
+%% To produce the documentation run the original source files ending with `.dtx'
+%% through LaTeX.
+%% 
+
+\NeedsTeXFormat{LaTeX2e}[1999/12/01]
+\ProvidesClass{iidef}
+[2020/09/09 2.6 Tsinghua University Coursework Template]
+%% configuration of nested enumerate env
+\RequirePackage{enumitem}
+%% set hwcount key-value option
+\RequirePackage{kvoptions}
+%% required by macro DeclareMathOperator
+\RequirePackage{amsmath}
+%% Set up page headers using with fancyhdr
+\@ifundefined{lhead}{\RequirePackage{fancyhdr}}
+{\def\@thulhead{thulhead}}
+\RequirePackage{amsthm}
+%% semester
+\def\@term{term}
+\newcommand{\theterm}[1]{\renewcommand\@term{#1}}
+%% institute
+\newcommand{\@courseinstitute}[1]{institute}
+\newcommand{\thecourseinstitute}[1]{\renewcommand\@courseinstitute{#1}}
+%% coursename
+\newcommand{\@coursename}[1]{coursename}
+\newcommand{\thecoursename}[1]{\renewcommand\@coursename{\textsc{#1}}}
+%% user can rewrite homework name
+\def\@hwname{Homework}
+\def\hwname#1{\renewcommand\@hwname{#1}}
+%% \iidef@thehwcnt = 1
+\DeclareStringOption[1]{thehwcnt}
+\ProcessKeyvalOptions*
+\def\thehwcnt{\iidef@thehwcnt}
+%% page header setup, distinguish between first page(plain style)
+%% and second page on (runningpage style)
+%%***************************************************************************
+\newcommand{\courseheader}{
+\thispagestyle{plain}%first page use native plain style to suppress header
+\vspace*{-1in}
+\begin{center}
+\@courseinstitute\\
+\@coursename\\
+\@term
+\vspace*{0.1in}
+\hrule
+\end{center}
+\begin{center}
+  \underline{\bf \@hwname\;\thehwcnt} \\
+\end{center}
+}
+\@ifundefined{@thulhead}{
+\fancypagestyle{runningpage}
+{
+  \fancyhead[L]{\small\@coursename}
+  \fancyhead[R]{\small\@courseinstitute}
+}
+%% use runningpage style from second page on
+\pagestyle{runningpage}
+}{}
+%% *********************************************************************************************
+%%name command macro
+%%*************************
+\newcommand{\name}[1]{
+\begin{flushleft}
+  #1\hfill
+  \today
+\end{flushleft}
+\hrule
+
+\vspace{2em}
+
+\flushleft
+}
+%%*************************
+%% enumitem related configuration
+\setlist[enumerate,1]{label=\thehwcnt.\arabic*.}
+\setlist[enumerate,2]{label=(\alph*)}
+\setlist[enumerate,3]{label=\roman*.}
+\setlist[enumerate,4]{label=\greek*}
+%%******************************
+\def\@slname{Solution}
+\def\slname#1{\renewcommand\@slname{#1}}
+
+\@ifundefined{solution}{
+\newenvironment{solution}
+{
+\proof[\@slname]
+}
+{
+%% no qed symbol in solution env
+\renewcommand{\qedsymbol}{}
+\endproof
+}
+}{}
+%%******************************
+%%common math symbols go here
+%%*************************************************
+\def\v#1{\underline{#1}}
+\newcommand{\uc}{\underline{c}}    % c, vec
+\newcommand{\uv}{\underline{v}}    % x, vec
+\newcommand{\uw}{\underline{w}}    % w, vec
+\newcommand{\ux}{\underline{x}}    % x, vec
+\newcommand{\uy}{\underline{y}}    % y, vec
+\newcommand{\uz}{\underline{z}}    % z, vec
+\newcommand{\um}{\underline{m}}    % m, vec
+\newcommand{\rvx}{\mathsf{x}}    % x, r.v.
+\newcommand{\rvy}{\mathsf{y}}    % y, r.v.
+\newcommand{\rvz}{\mathsf{z}}    % z, r.v.
+\newcommand{\rvw}{\mathsf{w}}    % w, r.v.
+\newcommand{\rvH}{\mathsf{H}}    % H, r.v.
+\newcommand{\urvx}{\underline{\mathsf{x}}}    % x, r.v. vec
+\newcommand{\urvy}{\underline{\mathsf{y}}}    % y, r.v. vec
+\newcommand{\urvz}{\underline{\mathsf{z}}}    % z, r.v. vec
+\newcommand{\urvw}{\underline{\mathsf{w}}}    % w, r.v. vec
+
+\newcommand{\defas}{\triangleq} %\coloneqq
+\newcommand{\reals}{\mathbb{R}}
+\newcommand{\TT}{\mathrm{T}}    % transpose
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
+\DeclareMathOperator*{\argsup}{arg\,sup}
+\DeclareMathOperator*{\arginf}{arg\,inf}
+\DeclareMathOperator{\diag}{diag}
+\DeclareMathOperator{\Var}{Var}
+\DeclareMathOperator{\Cov}{Cov}
+\DeclareMathOperator{\MSE}{MSE}
+\DeclareMathOperator{\1}{\mathds{1}}
+\DeclareMathOperator{\In}{\mathbb{I}}
+\DeclareMathOperator{\E}{\mathbb{E}}
+\DeclareMathOperator{\Prob}{\mathbb{P}}
+\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
+\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
+%%************************************************************************************
--- a/hw3/report/main.tex
+++ b/hw3/report/main.tex
@@ -0,0 +1,101 @@
+% Homework Template
+\documentclass[a4paper]{article}
+\usepackage{ctex}
+\usepackage{amsmath, amssymb, amsthm}
+\usepackage{moreenum}
+\usepackage{mathtools}
+\usepackage{url}
+\usepackage{bm}
+\usepackage{enumitem}
+\usepackage{graphicx}
+\usepackage{subcaption}
+\usepackage{booktabs} % toprule
+\usepackage[mathcal]{eucal}
+\usepackage[thehwcnt = 3]{iidef}
+
+\thecourseinstitute{清华大学电子工程系}
+\thecoursename{\textbf{媒体与认知}}
+\theterm{2023-2024学年春季学期}
+\hwname{作业}
+\begin{document}
+\courseheader
+% 请在YOUR NAME处填写自己的姓名
+\name{YOUR NAME}
+\vspace{3mm}
+\centerline{\textbf{\Large{理论部分}}}
+
+\section{单选题（15分）}
+% 请在？处填写答案
+\subsection{\underline{?}}
+
+\subsection{\underline{?}}
+
+\subsection{\underline{?}}
+
+\subsection{\underline{?}}
+
+\subsection{\underline{?}}
+
+\section{计算题（15 分）}
+
+
+\subsection{给定两个类别的样本分别为:
+\begin{align*}
+     &\omega_1:\{(3,1),(2,2),(4,3),(3,2)\} \\
+   &\omega_2:\{(1,3),(1,2),(-1,1),(-1,2)\}
+\end{align*}
+试利用LDA，将样本特征维数压缩为一维。
+}
+
+
+
+\vspace{3mm}
+\subsection{模型训练通常需要大量的数据，假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效，其中无效数据被成功判别为无效数据的概率为90\%，而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据，则根据贝叶斯定理，这条数据是无效数据的概率是多少？(提示：全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
+
+\vspace{3mm}
+\subsection{设有两类正态分布的样本集，第一类均值为$\mu_1=[2,-1]^T$，第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等：$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
+    4 & 2 \\
+    2 & \frac{4}{3}
+\end{array} \right]$，$p(\omega_1)=p(\omega_2)$。试计算分类界面，并对特征向量$x=[6,2]^T$分类。}
+
+\vspace{3mm}
+\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的，可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$，其中$\phi(\mathbf{x})$满足
+\begin{equation*}
+\begin{aligned}
+    \phi_1(\mathbf{x})&=2(x_1-0.5) \\
+    \phi_2(\mathbf{x})&=4(x_1-0.5)(x_2-0.5)
+\end{aligned}
+\end{equation*}
+\\
+\qquad(1) 给出映射后的样本集；\\
+\qquad(2) 在映射后的样本集中，设计一个线性SVM分类器，给出支持向量及分类界面。
+}
+
+
+
+\vspace{3mm}
+\subsection{使用KMeans算法对2维空间中的6个点$(0,2)$,$(2,0)$,$(2,3)$,$(3,2)$,$(4,0)$,$(5,4)$进行聚类，距离函数选择欧氏距离$d=\sqrt{(x_1-x_2)^2+(y_1-y_2)^2}$。\\
+\qquad (1)起始聚类中心选择(0,0)和(4,3)，计算聚类中心；\\
+\qquad (2)起始聚类中心选择(1,4)和(3,1)，计算聚类中心。\\
+}
+
+\vspace{3mm}
+\centerline{\textbf{\Large{编程部分}}}
+
+
+\vspace{3mm}
+% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题进度汇报”中的一项完成
+\section{编程作业报告}
+% 请在此处完成编程作业报告
+
+\section{自选课题进度汇报}
+% 请在此处介绍自选课题
+
+\end{document}
+
+
+
+%%% Local Variables:
+%%% mode: late\rvx
+%%% TeX-master: t
+%%% End: