380 lines
12 KiB
TeX
380 lines
12 KiB
TeX
% Homework Template
|
||
\documentclass[a4paper]{article}
|
||
\usepackage{ctex}
|
||
\usepackage{amsmath, amssymb, amsthm}
|
||
\usepackage{moreenum}
|
||
\usepackage{mathtools}
|
||
\usepackage{url}
|
||
\usepackage{bm}
|
||
\usepackage{enumitem}
|
||
\usepackage{graphicx}
|
||
\usepackage{subcaption}
|
||
\usepackage{booktabs} % toprule
|
||
\usepackage[mathcal]{eucal}
|
||
\usepackage[thehwcnt = 3]{iidef}
|
||
\usepackage{listings}
|
||
\usepackage{fontspec}
|
||
\usepackage{xcolor}
|
||
\usepackage{float}
|
||
\usepackage{siunitx}
|
||
|
||
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
|
||
\newfontfamily\cascadia{Cascadia Code}
|
||
|
||
\lstset{
|
||
basicstyle = \small\codefont,
|
||
% ---
|
||
tabsize = 4,
|
||
showstringspaces = false,
|
||
numbers = left,
|
||
numberstyle = \codefont,
|
||
% ---
|
||
breaklines = true,
|
||
captionpos = t,
|
||
% ---
|
||
frame = l,
|
||
flexiblecolumns,
|
||
}
|
||
|
||
\lstdefinestyle{Python}{
|
||
language = Python, % 语言选Python
|
||
keywordstyle = \color{blue},
|
||
keywordstyle = [2] \color{teal},
|
||
stringstyle = \color{orange!80!black},
|
||
commentstyle = \color{red},
|
||
identifierstyle = \color{blue!80!white},
|
||
}
|
||
|
||
\lstdefinestyle{Bash}{
|
||
language = bash
|
||
}
|
||
|
||
\thecourseinstitute{清华大学电子工程系}
|
||
\thecoursename{\textbf{媒体与认知}}
|
||
\theterm{2023-2024学年春季学期}
|
||
\hwname{作业}
|
||
\begin{document}
|
||
\courseheader
|
||
% 请在YOUR NAME处填写自己的姓名
|
||
\name{高艺轩}
|
||
\vspace{3mm}
|
||
\centerline{\textbf{\Large{理论部分}}}
|
||
|
||
\section{单选题(15分)}
|
||
% 请在?处填写答案
|
||
\subsection{\underline{D}}
|
||
|
||
\subsection{\underline{C}}
|
||
|
||
\subsection{\underline{D}}
|
||
|
||
\subsection{\underline{D}}
|
||
|
||
\subsection{\underline{B}}
|
||
|
||
\section{计算题(15 分)}
|
||
|
||
|
||
\subsection{给定两个类别的样本分别为:
|
||
\begin{align*}
|
||
&\omega_1:\{(3,1),(2,2),(4,3),(3,2)\} \\
|
||
&\omega_2:\{(1,3),(1,2),(-1,1),(-1,2)\}
|
||
\end{align*}
|
||
试利用LDA,将样本特征维数压缩为一维。
|
||
}
|
||
|
||
\begin{proof}[解]
|
||
首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
|
||
\[S_1 = \frac{1}{4}
|
||
\left(
|
||
\begin{bmatrix}
|
||
0 & 0\\
|
||
0 & 1
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
1 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
1 & 1\\
|
||
1 & 1
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
0 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
\right)
|
||
=
|
||
\begin{bmatrix}
|
||
0.5 & 0.25\\
|
||
0.25 & 0.5
|
||
\end{bmatrix}\]
|
||
\[S_2 = \frac{1}{4}
|
||
\left(
|
||
\begin{bmatrix}
|
||
0 & 0\\
|
||
0 & 1
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
1 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
1 & 1\\
|
||
1 & 1
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
1 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
\right)
|
||
=
|
||
\begin{bmatrix}
|
||
0.75 & 0.25\\
|
||
0.25 & 0.5
|
||
\end{bmatrix}\]
|
||
进一步地,
|
||
\[S_w = \frac{1}{2} (S_1 + S_2) =
|
||
\begin{bmatrix}
|
||
0.625 & 0.25\\
|
||
0.25 & 0.5
|
||
\end{bmatrix}\]
|
||
\[S_b = \frac{1}{2} \left(
|
||
\begin{bmatrix}
|
||
2.25 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
2.25 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}
|
||
\right)
|
||
=
|
||
\begin{bmatrix}
|
||
2.25 & 0\\
|
||
0 & 0
|
||
\end{bmatrix}\]
|
||
广义特征值分解得到$\lambda = 4.5$,$v = (0.8944, -0.4472)$。投影后的样本为
|
||
\[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
|
||
\[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
|
||
\end{proof}
|
||
|
||
|
||
|
||
\vspace{3mm}
|
||
\subsection{模型训练通常需要大量的数据,假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效,其中无效数据被成功判别为无效数据的概率为90\%,而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
|
||
|
||
\begin{proof}[解]
|
||
\begin{align*}
|
||
& P(\text{无效数据} \mid \text{判定无效})\\
|
||
= & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
|
||
= & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
|
||
= & \frac{0.18}{0.18 + 0.04}\\
|
||
= & \frac{9}{11}
|
||
\end{align*}
|
||
\end{proof}
|
||
|
||
\vspace{3mm}
|
||
\subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
|
||
4 & 2 \\
|
||
2 & \frac{4}{3}
|
||
\end{array} \right]$,$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。}
|
||
|
||
\begin{proof}[解]
|
||
\[\Sigma^{-1} = \begin{bmatrix}
|
||
1 & -1.5\\
|
||
-1.5 & 3
|
||
\end{bmatrix}\]
|
||
决策方程
|
||
\[g_{LDF1} = \Sigma^{-1} \mu_1 \boldsymbol{x} + -\frac{1}{2} \mu_1^T \Sigma^{-1} \mu_1 = (3.5, -1) \boldsymbol{x} - 6.5\]
|
||
类似地可以得到
|
||
\[g_{LDF2} = (-0.5, 1.5) \boldsymbol{x} - 0.5\]
|
||
因此分类界面为
|
||
\begin{align*}
|
||
(3.5, -1) \boldsymbol{x} - 6.5 & = (-0.5, 1.5) \boldsymbol{x} - 0.5\\
|
||
(4, -2.5) \boldsymbol{x} & = 6
|
||
\end{align*}
|
||
对于$(6, 2)$,计算$g_{LDF1}((6, 2)) = 12.5$,$g_{LDF2}((6, 2)) = -0.5$,因此属于第一类。
|
||
\end{proof}
|
||
|
||
\vspace{3mm}
|
||
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足
|
||
\begin{equation*}
|
||
\begin{aligned}
|
||
\phi_1(\mathbf{x})&=2(x_1-0.5) \\
|
||
\phi_2(\mathbf{x})&=4(x_1-0.5)(x_2-0.5)
|
||
\end{aligned}
|
||
\end{equation*}
|
||
\\
|
||
\qquad(1) 给出映射后的样本集;\\
|
||
\qquad(2) 在映射后的样本集中,设计一个线性SVM分类器,给出支持向量及分类界面。
|
||
}
|
||
|
||
\begin{proof}[解]
|
||
映射后的样本集
|
||
\[D_{\phi} = \left\lbrace\left((-1, 1)^T, -1\right), \left((-1, -1)^T, 1\right), \left((1, -1)^T, 1\right), \left((1, 1)^T, -1\right)\right\rbrace\]
|
||
|
||
待优化的问题为
|
||
\[L(\boldsymbol{\alpha}) = \sum_{i = 1}^4 \alpha_i - \frac{1}{2} \sum_{i = 1}^4 \sum_{j = 1}^4 \alpha_i \alpha_j y_i y_j \boldsymbol{x}_i^T \boldsymbol{x}_j\]
|
||
因此
|
||
\begin{align*}
|
||
\frac{\partial L}{\partial \alpha_1} & = 1 - \frac{1}{2}\sum_{i \neq 1}^4 \alpha_i y_1 y_i \boldsymbol{x}_1^T \boldsymbol{x}_i - 2 \alpha_1 y_1 y_1 \boldsymbol{x}_1^T \boldsymbol{x}_1\\
|
||
& = 1 - 2 \alpha_3 - 4 \alpha_1\\
|
||
\frac{\partial L}{\partial \alpha_2} & = 1 - 2\alpha_4 - 4 \alpha_2\\
|
||
\frac{\partial L}{\partial \alpha_3} & = 1 - 2 \alpha_1 - 4 \alpha_3\\
|
||
\frac{\partial L}{\partial \alpha_4} & = 1 - 2 \alpha_3 - 4 \alpha_4
|
||
\end{align*}
|
||
令四个偏导数均为0,得到$\alpha_1 = \alpha_2 = \alpha_3 = \alpha_4 = \frac{1}{6}$。全部的点均为支持向量。因此
|
||
\[\boldsymbol{w} = \sum_{i = 1}^4 \alpha_i y_i \boldsymbol{x}_i = \left(0, -\frac{2}{3}\right)\]
|
||
|
||
为求偏置量,带入$\boldsymbol{x}_1$:
|
||
\[(-1) (\boldsymbol{w}^T \boldsymbol{x}_1 + b) = 1\]
|
||
得到$b = -\frac{1}{3}$。
|
||
|
||
分类界面$\boldsymbol{w}^T \boldsymbol{x} + b = 0$,即
|
||
\[\begin{bmatrix}
|
||
0\\-\frac{2}{3}
|
||
\end{bmatrix} \boldsymbol{x} - \frac{1}{3} = 0\]
|
||
得到$x_2 = \frac{1}{2}$,因此在原空间中,
|
||
\[4(x_1 - 0.5)(x_2 - 0.5) = 0.5\]
|
||
|
||
\end{proof}
|
||
|
||
|
||
|
||
\vspace{3mm}
|
||
\subsection{使用KMeans算法对2维空间中的6个点$(0,2)$,$(2,0)$,$(2,3)$,$(3,2)$,$(4,0)$,$(5,4)$进行聚类,距离函数选择欧氏距离$d=\sqrt{(x_1-x_2)^2+(y_1-y_2)^2}$。\\
|
||
\qquad (1)起始聚类中心选择(0,0)和(4,3),计算聚类中心;\\
|
||
\qquad (2)起始聚类中心选择(1,4)和(3,1),计算聚类中心。\\
|
||
}
|
||
|
||
\begin{proof}[解]
|
||
中心选择$(0, 0), (4, 3)$,第一次分为$(0, 2), (2,0)$与$(2, 3), (3, 2), (4, 0), (5, 4)$,更新后的中心为$(1, 1)$与$\left(\frac{7}{2}, \frac{9}{4}\right)$。收敛。
|
||
|
||
中心选择$(1, 4)$与$(3, 1)$,第一次分为$(0, 2), (2, 3)$与$(2, 0), (4, 0), (3, 2), (5, 4)$,更新后中心为$(1, \frac{5}{2})$与$(\frac{7}{2}, \frac{3}{2})$,收敛。
|
||
\end{proof}
|
||
|
||
\vspace{3mm}
|
||
\centerline{\textbf{\Large{编程部分}}}
|
||
|
||
|
||
\vspace{3mm}
|
||
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题进度汇报”中的一项完成
|
||
\section{编程作业报告}
|
||
\subsection{程序验证}
|
||
与助教给出的图片相比,我写出的程序PCA得到的结果的xy坐标都在$[-1, 1]$之间,不利于之后的分类。我将所有的PCA之后的坐标都扩大了20倍。
|
||
|
||
运行\lstinline{check.py}进行检查:
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/check/check.png}
|
||
\end{figure}
|
||
|
||
\subsection{数据预处理}
|
||
运行
|
||
\begin{lstlisting}[style=Bash]
|
||
python data_preprocess.py
|
||
\end{lstlisting}
|
||
得到的输出为
|
||
\begin{figure}[H]
|
||
\centering
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/preprocess/preprocess_train.png}
|
||
\caption{训练集preprocess结果}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/preprocess/preprocess_val.png}
|
||
\caption{验证集preprocess结果}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/preprocess/preprocess_test.png}
|
||
\caption{测试集preprocess结果}
|
||
\end{subfigure}
|
||
\end{figure}
|
||
|
||
\subsection{训练、验证及测试}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/default/loss.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/default/train_accu.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/default/sv.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/default/val.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.8\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/default/test.png}
|
||
\end{subfigure}
|
||
\end{figure}
|
||
|
||
\subsection{调整正则化系数}
|
||
\subsubsection{C = \num{1e-6}}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1e-6/loss.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1e-6/accu.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1e-6/sv.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1e-6/val.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.8\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1e-6/test.png}
|
||
\end{subfigure}
|
||
\end{figure}
|
||
可以看到出现了严重的欠拟合,分类界面超出了绘图的范围。这是因为C过小,导致不能正确地分辨合适的分类界面。
|
||
|
||
\subsubsection{C = 1}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1/loss.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1/accu.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1/sv.png}
|
||
\end{subfigure}
|
||
\hspace{0.5cm}
|
||
\begin{subfigure}[t]{.45\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1/val.png}
|
||
\end{subfigure}\\[2ex]
|
||
\begin{subfigure}[t]{.8\linewidth}
|
||
\includegraphics[width=\textwidth]{img/train/1/test.png}
|
||
\end{subfigure}
|
||
\end{figure}
|
||
发生了过拟合,直线被交界面的点限制,斜率不是最优。
|
||
|
||
\end{document}
|
||
|
||
|
||
|
||
%%% Local Variables:
|
||
%%% mode: late\rvx
|
||
%%% TeX-master: t
|
||
%%% End:
|