SVM and PCA not working

2024-05-18 00:12:06 +08:00
parent 81de7b1d58
commit 820f679067
8 changed files with 85 additions and 64 deletions
--- a/hw3/code/data_preprocess.py
+++ b/hw3/code/data_preprocess.py
@@ -3,8 +3,8 @@
 #             Homework 3 Support Vector Machine
 #             data_preprocess.py - Using pretrained convolutional layers to extract feature,
 #                                   and using PCA for dimensionality reduction
-#             Student ID:
-#             Name:
+#             Student ID: 2022010639
+#             Name: Yixuan Gao
 #             Tsinghua University
 #             (C) Copyright 2024
 # ========================================================
@@ -29,7 +29,9 @@ def preprocess(pre_conv, data_root, image_size, classes):
    data_mean, u = PCA(train_data, 2)

    # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
-    train_data_pca = ???
+    print(train_data)
+    print(data_mean)
+    train_data_pca = (train_data - data_mean) @ u

    visualize(train_data_pca, train_label, "train")
    savedata(train_data_pca, train_label, data_root+"/train.pt")
@@ -40,7 +42,7 @@ def preprocess(pre_conv, data_root, image_size, classes):
    val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)

    # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
-    val_data_pca = ???
+    val_data_pca = (val_data - data_mean) @ u

    visualize(val_data_pca, val_label, "val")
    savedata(val_data_pca, val_label, data_root+"/val.pt")
@@ -51,7 +53,7 @@ def preprocess(pre_conv, data_root, image_size, classes):
    test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)

    # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
-    test_data_pca = ???
+    test_data_pca = (test_data - data_mean) @ u

    visualize(test_data_pca, test_label, "test")
    savedata(test_data_pca, test_label, data_root+"/test.pt")
@@ -95,14 +97,15 @@ def PCA(data, dim=2):
    # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix

    # TODO: compute the mean of train_data
-    data_mean = ???
+    data_mean = data.mean(dim=0)
    # TODO: compute the covariance matrix of train_data
-    data_cov = ???
+    diff = data - data_mean
+    data_cov = diff.T @ diff
    # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
    # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
-    ???
+    u, s, v = torch.linalg.svd(data_cov)
    # TODO: return the proper 'data_mean' and 'u[]'
-    return ???
+    return data_mean, u[:, :dim]


 def loaddata(pre_conv, data_root, mode, image_size, classes):
--- a/hw3/code/svm_hw.py
+++ b/hw3/code/svm_hw.py
@@ -2,8 +2,8 @@
 #             Media and Cognition
 #             Homework 3 Support Vector Machine
 #             svm_hw.py - The implementation of SVM using hinge loss
-#             Student ID:
-#             Name:
+#             Student ID: 2022010639
+#             Name: Yixuan Gao
 #             Tsinghua University
 #             (C) Copyright 2024
 # ========================================================
@@ -34,7 +34,7 @@ class LinearFunction(torch.autograd.Function):
        '''

        # TODO
-        y = ???
+        y = torch.matmul(x, W.T) + b
        ctx.save_for_backward(x, W)

        return y
@@ -59,9 +59,9 @@ class LinearFunction(torch.autograd.Function):
        # you can use torch.matmul(A, B) to compute matrix product of A and B

        # TODO
-        grad_input = ???
-        grad_W = ???
-        grad_b = ???
+        grad_input = torch.matmul(grad_output, W)
+        grad_W = torch.matmul(grad_output.T, x)
+        grad_b = grad_output.sum(0)

        return grad_input, grad_W, grad_b

@@ -85,7 +85,11 @@ class Hinge(torch.autograd.Function):

        # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
        # you may need F.relu() to implement the max() function.
-        loss = ???
+        # print("product", label * output.reshape_as(label))
+        # print("minus", 1 - label * output.reshape_as(label))
+        # print("relu", F.relu(1 - label * output.reshape_as(label)))
+        # print("sum", (F.relu(1 - label * output.reshape_as(label))).sum())
+        loss = 1/2 * (W @ W.T) + C * (F.relu(1 - label * output.reshape_as(label))).sum()
        ctx.save_for_backward(output, W, label, C)

        return loss
@@ -102,8 +106,11 @@ class Hinge(torch.autograd.Function):
        """
        output, W, label, C = ctx.saved_tensors
        # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
-        grad_output = ???
-        grad_W = ???
+        # print("output", output, "label", label, "product", (1 - label.reshape_as(output) * output))
+        # print("grad_loss size", grad_loss.size())
+        # print("sizeof l / output", (C * torch.heaviside(1 - label.reshape_as(output) * output, torch.tensor(0).type_as(output)) * (-label.reshape_as(output))).size())
+        grad_output = grad_loss * C * (torch.heaviside(1 - label.reshape_as(output) * output, torch.tensor(1).type_as(output)) * (-label.reshape_as(output)))
+        grad_W = grad_loss * W
        return grad_output, grad_W, None, None


@@ -124,8 +131,8 @@ class SVM_HINGE(nn.Module):
            please use torch.randn() to initialize W and b
        """

-        self.W = ???
-        self.b = ???
+        self.W = nn.Parameter(torch.rand(1, in_channels), requires_grad=True)
+        self.b = nn.Parameter(torch.rand(1, ), requires_grad=True)
        self.C = torch.tensor([[C]], requires_grad=False)

    def forward(self, x, label=None):
--- a/hw3/code/test_svm.py
+++ b/hw3/code/test_svm.py
@@ -2,8 +2,8 @@
 #             Media and Cognition
 #             Homework 3 Support Vector Machine
 #             test_svm.py - Test svm model for traffic sign
-#             Student ID:
-#             Name:
+#             Student ID: 2022010639
+#             Name: Yixuan Gao
 #             Tsinghua University
 #             (C) Copyright 2024
 # ========================================================
@@ -14,6 +14,7 @@ import torch
 from datasets import Traffic_Dataset
 from svm_hw import SVM_HINGE
 from torch.utils.data import DataLoader
+import os.path


 # ==== Part 2: testing
@@ -33,25 +34,25 @@ def test(
    # TODO 1: =================== load the pretrained SVM model ==================================

    # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
-    test_data = ???
-    test_loader = ???
+    test_data = Traffic_Dataset(os.path.join(data_root, 'test.pt'))
+    test_loader = DataLoader(test_data, shuffle=False)

    # TODO: load state dictionary of pretrained SVM model
-    model_svm = ???
+    model_svm = torch.load(os.path.join(model_save_path))

    # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
-    svm = ???
+    svm = SVM_HINGE(model_svm["configs"]["feature_channel"], model_svm["configs"]["C"])

    # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
-    ???
+    svm.load_state_dict(model_svm["model_state"])

    # TODO: put the model on CPU or GPU
-    ???
+    svm.to(device)

    # TODO 2 : ================================ testing ==============================================

    # TODO: set the model in evaluation mode
-    ???
+    svm.eval()

    # to calculate and save the testing accuracy
    n_correct = 0.  # number of images that are correctly classified
@@ -59,19 +60,22 @@ def test(

    with torch.no_grad():  # we do not need to compute gradients during validation
        # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
-        for ??? in ???:
+        for input, label in test_loader:
            # TODO: set data type (.float()) and device (.to())
-            ???
+            input, label = (
+                    input.type(torch.float).to(device),
+                    label.type(torch.float).to(device)
+            )

            # TODO: run the model; at the validation step, the model only needs one input: feas
            # _ refers to a placeholder, which means we do not need the second returned value during validating
-            ???
+            out, _ = svm(input)

            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-            n_correct += ???
+            n_correct += (out.reshape_as(label) == label).sum().item()

            # TODO:sum up the total image number
-            n_feas += ???
+            n_feas += label.numel()

    # show prediction accuracy
    acc = 100 * n_correct / n_feas
--- a/hw3/code/train_svm.py
+++ b/hw3/code/train_svm.py
@@ -2,8 +2,8 @@
 #             Media and Cognition
 #             Homework 3 Support Vector Machine
 #             train_svm.py - Train svm model for traffic sign
-#             Student ID:
-#             Name:
+#             Student ID: 2022010639
+#             Name: Yixuan Gao
 #             Tsinghua University
 #             (C) Copyright 2024
 # ========================================================
@@ -17,6 +17,7 @@ import random
 from datasets import Traffic_Dataset
 from svm_hw import SVM_HINGE
 from torch.utils.data import DataLoader
+import os.path


 # ==== Part 2: training and validation
@@ -44,22 +45,22 @@ def train(
    """

    # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
-    train_data = ???
-    train_loader = ???
-    val_data = ???
-    val_loader = ???
+    train_data = Traffic_Dataset(os.path.join(data_root, 'train.pt'))
+    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
+    val_data = Traffic_Dataset(os.path.join(data_root, 'val.pt'))
+    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

    # scale the regularization coefficient
    C = C * len(train_loader)

    # TODO: initialize the SVM model
-    svm = ???
+    svm = SVM_HINGE(feature_channel, C)

    # TODO: put the model on CPU or GPU
-    ???
+    svm.to(device)

    # TODO: define the Adam optimizer
-    optimizer = ???
+    optimizer = torch.optim.Adam(svm.parameters(), lr)

    # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
    train_loss = []
@@ -69,11 +70,11 @@ def train(

    for epoch in range(n_epoch):
        # TODO: save the index of current epoch in the array 'epochs'
-        ???
+        epochs.append(epoch + 1)

        # TODO 2: ========================= training =======================
-        # TODO: set the model in training mode
-        ???
+        # TODO: set the model in training mode›
+        svm.train()

        # to calculate and save the training loss and training accuracy
        total_loss = 0.  # to save total training loss in one epoch
@@ -82,42 +83,45 @@ def train(

        # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
        # you can refer to previous homework, for example hw2
-        for ??? in ???:
+        for step, (input, label) in enumerate(train_loader):
            # TODO: set data type (.float()) and device (.to())
-            ???
+            input, label = (
+                input.type(torch.float).to(device),
+                label.type(torch.float).to(device)
+            )

            # TODO: clear gradients in the optimizer
-            ???
+            optimizer.zero_grad()

            # TODO: run the model with hinge loss; the model needs two inputs: feas and labels
-            ???
+            out, loss = svm(input, label)

            # TODO: back-propagation on the computation graph
-            ???
+            loss.backward()

            # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
-            total_loss += ???
+            total_loss += loss.item()

            # TODO: call a function to update the parameters of the models
-            ???
+            optimizer.step()

            # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-            n_correct += ???
+            n_correct += (out.reshape_as(label) == label).sum().item()

            # TODO: sum up the total image number
-            n_feas += ???
+            n_feas += label.numel()

        # average of the total loss for iterations
        acc = 100 * n_correct / n_feas
        avg_loss = total_loss / len(train_loader)
-        train_acc.append(acc.cpu().numpy())
+        train_acc.append(acc)
        train_loss.append(avg_loss)
        print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))

        # TODO 3: ========================== Validation ======================================

        # TODO: set the model in evaluation mode
-        ???
+        svm.eval()

        # to calculate and save the validation accuracy
        n_correct = 0.  # number of images that are correctly classified
@@ -125,24 +129,27 @@ def train(

        with torch.no_grad():  # we do not need to compute gradients during validation
            # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
-            for ??? in ???:
+            for input, label in val_loader:
                # TODO: set data type (.float()) and device (.to())
-                ???
+                input, label = (
+                    input.type(torch.float).to(device),
+                    label.type(torch.float).to(device)
+                )

                # TODO: run the model; at the validation step, the model only needs one input: feas
                # _ refers to a placeholder, which means we do not need the second returned value during validating
-                ???
+                out, _ = svm(input)

                # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
-                n_correct += ???
+                n_correct += (out.reshape_as(label) == label).sum().item()

                # TODO: sum up the total image number
-                n_feas += ???
+                n_feas += label.numel()

        # show prediction accuracy
        acc = 100 * n_correct / n_feas
        print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
-        val_acc.append(acc.cpu().numpy())
+        val_acc.append(acc)

    # save model parameters in a file
    torch.save({'state_dict': svm.state_dict(),
@@ -157,7 +164,7 @@ def train(

    # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
    # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
-    sv = ???
+    sv = [idx for idx, (data, label) in enumerate(zip(train_data.datas, train_data.labels)) if label * ((W @ data) + b) <= 1]

    plot(train_loss, train_acc, val_acc, epochs)
    plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
--- a/hw3/report/img/check/check.png
+++ b/hw3/report/img/check/check.png
--- a/hw3/report/img/preprocess/preprocess_test.png
+++ b/hw3/report/img/preprocess/preprocess_test.png
--- a/hw3/report/img/preprocess/preprocess_train.png
+++ b/hw3/report/img/preprocess/preprocess_train.png
--- a/hw3/report/img/preprocess/preprocess_val.png
+++ b/hw3/report/img/preprocess/preprocess_val.png