diff --git a/hw3/code/data_preprocess.py b/hw3/code/data_preprocess.py index af110ed..318e3ae 100644 --- a/hw3/code/data_preprocess.py +++ b/hw3/code/data_preprocess.py @@ -28,9 +28,9 @@ def preprocess(pre_conv, data_root, image_size, classes): # calculate the mean and PCA projection matrix data_mean, u = PCA(train_data, 2) + u = u * 20 + # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector - print(train_data) - print(data_mean) train_data_pca = (train_data - data_mean) @ u visualize(train_data_pca, train_label, "train") @@ -100,14 +100,14 @@ def PCA(data, dim=2): data_mean = data.mean(dim=0) # TODO: compute the covariance matrix of train_data diff = data - data_mean - data_cov = diff.T @ diff + # data_cov = diff.T @ diff + data_cov = torch.cov(diff.T) # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html u, s, v = torch.linalg.svd(data_cov) # TODO: return the proper 'data_mean' and 'u[]' return data_mean, u[:, :dim] - def loaddata(pre_conv, data_root, mode, image_size, classes): """ load one dataset, and use pretrained network in homework 2 to extract feature @@ -173,7 +173,7 @@ if __name__ == "__main__": configs["use_stn"], configs["dropout_prob"], ) - cls.load_state_dict(pretrained_checkpoint["model_state"]) + cls.load_state_dict(pretrained_checkpoint["model_state"], strict=False) for param in cls.parameters(): param.requires_grad = False conv = cls.conv_net diff --git a/hw3/code/svm_hw.py b/hw3/code/svm_hw.py index abfb47a..f23a92c 100644 --- a/hw3/code/svm_hw.py +++ b/hw3/code/svm_hw.py @@ -85,11 +85,13 @@ class Hinge(torch.autograd.Function): # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)} # you may need F.relu() to implement the max() function. + # print("output size", output.size()) + # print("label size", label.size()) # print("product", label * output.reshape_as(label)) # print("minus", 1 - label * output.reshape_as(label)) # print("relu", F.relu(1 - label * output.reshape_as(label))) # print("sum", (F.relu(1 - label * output.reshape_as(label))).sum()) - loss = 1/2 * (W @ W.T) + C * (F.relu(1 - label * output.reshape_as(label))).sum() + loss = 1/2 * (W @ W.T) + C * (F.relu(1 - (output.T * label).T)).sum() ctx.save_for_backward(output, W, label, C) return loss @@ -109,7 +111,7 @@ class Hinge(torch.autograd.Function): # print("output", output, "label", label, "product", (1 - label.reshape_as(output) * output)) # print("grad_loss size", grad_loss.size()) # print("sizeof l / output", (C * torch.heaviside(1 - label.reshape_as(output) * output, torch.tensor(0).type_as(output)) * (-label.reshape_as(output))).size()) - grad_output = grad_loss * C * (torch.heaviside(1 - label.reshape_as(output) * output, torch.tensor(1).type_as(output)) * (-label.reshape_as(output))) + grad_output = grad_loss * C * ((torch.heaviside(1 - (output.T * label).T, torch.tensor(1).type_as(output)).T * (-label))).T grad_W = grad_loss * W return grad_output, grad_W, None, None diff --git a/hw3/code/test_svm.py b/hw3/code/test_svm.py index 4aa1ceb..5f49cc0 100644 --- a/hw3/code/test_svm.py +++ b/hw3/code/test_svm.py @@ -44,7 +44,7 @@ def test( svm = SVM_HINGE(model_svm["configs"]["feature_channel"], model_svm["configs"]["C"]) # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict() - svm.load_state_dict(model_svm["model_state"]) + svm.load_state_dict(model_svm["state_dict"]) # TODO: put the model on CPU or GPU svm.to(device) diff --git a/hw3/report/img/check/check.png b/hw3/report/img/check/check.png index 2907b96..f6697cf 100644 Binary files a/hw3/report/img/check/check.png and b/hw3/report/img/check/check.png differ diff --git a/hw3/report/img/preprocess/preprocess_test.png b/hw3/report/img/preprocess/preprocess_test.png index 683961c..aae0ba9 100644 Binary files a/hw3/report/img/preprocess/preprocess_test.png and b/hw3/report/img/preprocess/preprocess_test.png differ diff --git a/hw3/report/img/preprocess/preprocess_train.png b/hw3/report/img/preprocess/preprocess_train.png index 99590ad..4d9f961 100644 Binary files a/hw3/report/img/preprocess/preprocess_train.png and b/hw3/report/img/preprocess/preprocess_train.png differ diff --git a/hw3/report/img/preprocess/preprocess_val.png b/hw3/report/img/preprocess/preprocess_val.png index f2a6dc8..34d4755 100644 Binary files a/hw3/report/img/preprocess/preprocess_val.png and b/hw3/report/img/preprocess/preprocess_val.png differ diff --git a/hw3/report/img/train/1/accu.png b/hw3/report/img/train/1/accu.png new file mode 100644 index 0000000..544625a Binary files /dev/null and b/hw3/report/img/train/1/accu.png differ diff --git a/hw3/report/img/train/1/loss.png b/hw3/report/img/train/1/loss.png new file mode 100644 index 0000000..350381d Binary files /dev/null and b/hw3/report/img/train/1/loss.png differ diff --git a/hw3/report/img/train/1/sv.png b/hw3/report/img/train/1/sv.png new file mode 100644 index 0000000..78eddb0 Binary files /dev/null and b/hw3/report/img/train/1/sv.png differ diff --git a/hw3/report/img/train/1/test.png b/hw3/report/img/train/1/test.png new file mode 100644 index 0000000..acd2d0b Binary files /dev/null and b/hw3/report/img/train/1/test.png differ diff --git a/hw3/report/img/train/1/val.png b/hw3/report/img/train/1/val.png new file mode 100644 index 0000000..3d608b5 Binary files /dev/null and b/hw3/report/img/train/1/val.png differ diff --git a/hw3/report/img/train/1e-6/accu.png b/hw3/report/img/train/1e-6/accu.png new file mode 100644 index 0000000..0e56056 Binary files /dev/null and b/hw3/report/img/train/1e-6/accu.png differ diff --git a/hw3/report/img/train/1e-6/loss.png b/hw3/report/img/train/1e-6/loss.png new file mode 100644 index 0000000..ebfcb83 Binary files /dev/null and b/hw3/report/img/train/1e-6/loss.png differ diff --git a/hw3/report/img/train/1e-6/sv.png b/hw3/report/img/train/1e-6/sv.png new file mode 100644 index 0000000..b1f7b94 Binary files /dev/null and b/hw3/report/img/train/1e-6/sv.png differ diff --git a/hw3/report/img/train/1e-6/test.png b/hw3/report/img/train/1e-6/test.png new file mode 100644 index 0000000..247c318 Binary files /dev/null and b/hw3/report/img/train/1e-6/test.png differ diff --git a/hw3/report/img/train/1e-6/val.png b/hw3/report/img/train/1e-6/val.png new file mode 100644 index 0000000..54d7c0b Binary files /dev/null and b/hw3/report/img/train/1e-6/val.png differ diff --git a/hw3/report/img/train/default/loss.png b/hw3/report/img/train/default/loss.png new file mode 100644 index 0000000..d5dcf20 Binary files /dev/null and b/hw3/report/img/train/default/loss.png differ diff --git a/hw3/report/img/train/default/sv.png b/hw3/report/img/train/default/sv.png new file mode 100644 index 0000000..84019d2 Binary files /dev/null and b/hw3/report/img/train/default/sv.png differ diff --git a/hw3/report/img/train/default/test.png b/hw3/report/img/train/default/test.png new file mode 100644 index 0000000..ac5acc2 Binary files /dev/null and b/hw3/report/img/train/default/test.png differ diff --git a/hw3/report/img/train/default/train_accu.png b/hw3/report/img/train/default/train_accu.png new file mode 100644 index 0000000..f705c04 Binary files /dev/null and b/hw3/report/img/train/default/train_accu.png differ diff --git a/hw3/report/img/train/default/val.png b/hw3/report/img/train/default/val.png new file mode 100644 index 0000000..a0f7ef0 Binary files /dev/null and b/hw3/report/img/train/default/val.png differ diff --git a/hw3/report/main.tex b/hw3/report/main.tex index 0604068..41187ac 100644 --- a/hw3/report/main.tex +++ b/hw3/report/main.tex @@ -12,6 +12,42 @@ \usepackage{booktabs} % toprule \usepackage[mathcal]{eucal} \usepackage[thehwcnt = 3]{iidef} +\usepackage{listings} +\usepackage{fontspec} +\usepackage{xcolor} +\usepackage{float} +\usepackage{siunitx} + +\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}] +\newfontfamily\cascadia{Cascadia Code} + +\lstset{ + basicstyle = \small\codefont, + % --- + tabsize = 4, + showstringspaces = false, + numbers = left, + numberstyle = \codefont, + % --- + breaklines = true, + captionpos = t, + % --- + frame = l, + flexiblecolumns, +} + +\lstdefinestyle{Python}{ + language = Python, % 语言选Python + keywordstyle = \color{blue}, + keywordstyle = [2] \color{teal}, + stringstyle = \color{orange!80!black}, + commentstyle = \color{red}, + identifierstyle = \color{blue!80!white}, +} + +\lstdefinestyle{Bash}{ + language = bash +} \thecourseinstitute{清华大学电子工程系} \thecoursename{\textbf{媒体与认知}} @@ -232,10 +268,106 @@ \vspace{3mm} % 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题进度汇报”中的一项完成 \section{编程作业报告} -% 请在此处完成编程作业报告 +\subsection{程序验证} +与助教给出的图片相比,我写出的程序PCA得到的结果的xy坐标都在$[-1, 1]$之间,不利于之后的分类。我将所有的PCA之后的坐标都扩大了20倍。 -\section{自选课题进度汇报} -% 请在此处介绍自选课题 +运行\lstinline{check.py}进行检查: +\begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/check/check.png} +\end{figure} + +\subsection{数据预处理} +运行 +\begin{lstlisting}[style=Bash] +python data_preprocess.py +\end{lstlisting} +得到的输出为 +\begin{figure}[H] + \centering + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/preprocess/preprocess_train.png} + \caption{训练集preprocess结果} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/preprocess/preprocess_val.png} + \caption{验证集preprocess结果} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/preprocess/preprocess_test.png} + \caption{测试集preprocess结果} + \end{subfigure} +\end{figure} + +\subsection{训练、验证及测试} +\begin{figure}[H] + \centering + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/default/loss.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/default/train_accu.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/default/sv.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/default/val.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.8\linewidth} + \includegraphics[width=\textwidth]{img/train/default/test.png} + \end{subfigure} +\end{figure} + +\subsection{调整正则化系数} +\subsubsection{C = \num{1e-6}} +\begin{figure}[H] + \centering + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1e-6/loss.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1e-6/accu.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1e-6/sv.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1e-6/val.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.8\linewidth} + \includegraphics[width=\textwidth]{img/train/1e-6/test.png} + \end{subfigure} +\end{figure} +可以看到出现了严重的欠拟合,分类界面超出了绘图的范围。这是因为C过小,导致不能正确地分辨合适的分类界面。 + +\subsubsection{C = 1} +\begin{figure}[H] + \centering + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1/loss.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1/accu.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1/sv.png} + \end{subfigure} + \hspace{0.5cm} + \begin{subfigure}[t]{.45\linewidth} + \includegraphics[width=\textwidth]{img/train/1/val.png} + \end{subfigure}\\[2ex] + \begin{subfigure}[t]{.8\linewidth} + \includegraphics[width=\textwidth]{img/train/1/test.png} + \end{subfigure} +\end{figure} +发生了过拟合,直线被交界面的点限制,斜率不是最优。 \end{document} diff --git a/testtorch.ipynb b/testtorch.ipynb index 37d57d5..aea17b3 100644 --- a/testtorch.ipynb +++ b/testtorch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -152,6 +152,66 @@ "print(conv_1(a).size())\n", "print(conv_2(conv_1(a)).size())\n" ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([0., 1.])\n", + "1\n" + ] + } + ], + "source": [ + "a = torch.Tensor([1.0, 2.0])\n", + "b = torch.Tensor([1.0, 1.0])\n", + "print((a > b).type_as(a))\n", + "print((a == b).sum().item())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(2.5000)\n" + ] + } + ], + "source": [ + "a = torch.Tensor([[1.0, 2.0], [3.0, 4.0]])\n", + "mu = a.mean(dim=0)\n", + "print(mu, a - mu)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[5.],\n", + " [4.]])\n" + ] + } + ], + "source": [ + "a = torch.Tensor([[5], [4]])\n", + "b = torch.Tensor([1])\n", + "print((a.T * b).T)" + ] } ], "metadata": {