MediaNCognition/hw3/report/main.tex

% Homework Template
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 3]{iidef}
\usepackage{listings}
\usepackage{fontspec}
\usepackage{xcolor}
\usepackage{float}
\usepackage{siunitx}

\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
\newfontfamily\cascadia{Cascadia Code}

\lstset{
    basicstyle          =   \small\codefont,
    % ---
    tabsize             =   4,
    showstringspaces    =   false,
    numbers             =   left,
    numberstyle         =   \codefont,
    % ---
    breaklines          =   true,
    captionpos          =   t,
    % ---
    frame               =   l,
    flexiblecolumns,
}

\lstdefinestyle{Python}{
    language        =   Python, % 语言选Python
    keywordstyle    =   \color{blue},
    keywordstyle    =   [2] \color{teal},
    stringstyle     =   \color{orange!80!black},
    commentstyle    =   \color{red},
    identifierstyle =   \color{blue!80!white},
}

\lstdefinestyle{Bash}{
    language        =   bash
}

\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知}}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
% 请在YOUR NAME处填写自己的姓名
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}

\section{单选题（15分）}
% 请在？处填写答案
\subsection{\underline{D}}

\subsection{\underline{C}}

\subsection{\underline{D}}

\subsection{\underline{D}}

\subsection{\underline{B}}

\section{计算题（15 分）}


\subsection{给定两个类别的样本分别为:
\begin{align*}
     &\omega_1:\{(3,1),(2,2),(4,3),(3,2)\} \\
   &\omega_2:\{(1,3),(1,2),(-1,1),(-1,2)\}
\end{align*}
试利用LDA，将样本特征维数压缩为一维。
}

\begin{proof}[解]
    首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
    \[S_1 = \frac{1}{4}
    \left(
        \begin{bmatrix}
            0 & 0\\
            0 & 1
        \end{bmatrix}
        +
        \begin{bmatrix}
            1 & 0\\
            0 & 0
        \end{bmatrix}
        +
        \begin{bmatrix}
            1 & 1\\
            1 & 1
        \end{bmatrix}
        +
        \begin{bmatrix}
            0 & 0\\
            0 & 0
        \end{bmatrix}
    \right)
    =
    \begin{bmatrix}
        0.5 & 0.25\\
        0.25 & 0.5
    \end{bmatrix}\]
    \[S_2 = \frac{1}{4}
    \left(
        \begin{bmatrix}
            0 & 0\\
            0 & 1
        \end{bmatrix}
        +
        \begin{bmatrix}
            1 & 0\\
            0 & 0
        \end{bmatrix}
        +
        \begin{bmatrix}
            1 & 1\\
            1 & 1
        \end{bmatrix}
        +
        \begin{bmatrix}
            1 & 0\\
            0 & 0
        \end{bmatrix}
    \right)
    =
    \begin{bmatrix}
        0.75 & 0.25\\
        0.25 & 0.5
    \end{bmatrix}\]
    进一步地，
    \[S_w = \frac{1}{2} (S_1 + S_2) =
    \begin{bmatrix}
        0.625 & 0.25\\
        0.25 & 0.5
    \end{bmatrix}\]
    \[S_b = \frac{1}{2} \left(
        \begin{bmatrix}
            2.25 & 0\\
            0 & 0
        \end{bmatrix}
        +
        \begin{bmatrix}
            2.25 & 0\\
            0 & 0
        \end{bmatrix}
    \right)
    =
    \begin{bmatrix}
        2.25 & 0\\
        0 & 0
    \end{bmatrix}\]
    广义特征值分解得到$\lambda = 4.5$，$v = (0.8944, -0.4472)$。投影后的样本为
    \[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
    \[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
\end{proof}


\vspace{3mm}
\subsection{模型训练通常需要大量的数据，假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效，其中无效数据被成功判别为无效数据的概率为90\%，而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据，则根据贝叶斯定理，这条数据是无效数据的概率是多少？(提示：全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}

\begin{proof}[解]
    \begin{align*}
        & P(\text{无效数据} \mid \text{判定无效})\\
        = & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
        = & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
        = & \frac{0.18}{0.18 + 0.04}\\
        = & \frac{9}{11}
    \end{align*}
\end{proof}

\vspace{3mm}
\subsection{设有两类正态分布的样本集，第一类均值为$\mu_1=[2,-1]^T$，第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等：$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
    4 & 2 \\
    2 & \frac{4}{3}
\end{array} \right]$，$p(\omega_1)=p(\omega_2)$。试计算分类界面，并对特征向量$x=[6,2]^T$分类。}

\begin{proof}[解]
    \[\Sigma^{-1} = \begin{bmatrix}
        1 & -1.5\\
        -1.5 & 3
    \end{bmatrix}\]
    决策方程
    \[g_{LDF1} = \Sigma^{-1} \mu_1 \boldsymbol{x} + -\frac{1}{2} \mu_1^T \Sigma^{-1} \mu_1 = (3.5, -1) \boldsymbol{x} - 6.5\]
    类似地可以得到
    \[g_{LDF2} = (-0.5, 1.5) \boldsymbol{x} - 0.5\]
    因此分类界面为
    \begin{align*}
        (3.5, -1) \boldsymbol{x} - 6.5 & = (-0.5, 1.5) \boldsymbol{x} - 0.5\\
        (4, -2.5) \boldsymbol{x} & = 6
    \end{align*}
    对于$(6, 2)$，计算$g_{LDF1}((6, 2)) = 12.5$，$g_{LDF2}((6, 2)) = -0.5$，因此属于第一类。
\end{proof}

\vspace{3mm}
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的，可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$，其中$\phi(\mathbf{x})$满足
\begin{equation*}
\begin{aligned}
    \phi_1(\mathbf{x})&=2(x_1-0.5) \\
    \phi_2(\mathbf{x})&=4(x_1-0.5)(x_2-0.5)
\end{aligned}
\end{equation*}
\\
\qquad(1) 给出映射后的样本集；\\
\qquad(2) 在映射后的样本集中，设计一个线性SVM分类器，给出支持向量及分类界面。
}

\begin{proof}[解]
    映射后的样本集
    \[D_{\phi} = \left\lbrace\left((-1, 1)^T, -1\right), \left((-1, -1)^T, 1\right), \left((1, -1)^T, 1\right), \left((1, 1)^T, -1\right)\right\rbrace\]

    待优化的问题为
    \[L(\boldsymbol{\alpha}) = \sum_{i = 1}^4 \alpha_i - \frac{1}{2} \sum_{i = 1}^4 \sum_{j = 1}^4 \alpha_i \alpha_j y_i y_j \boldsymbol{x}_i^T \boldsymbol{x}_j\]
    因此
    \begin{align*}
        \frac{\partial L}{\partial \alpha_1} & = 1 - \frac{1}{2}\sum_{i \neq 1}^4 \alpha_i y_1 y_i \boldsymbol{x}_1^T \boldsymbol{x}_i - 2 \alpha_1 y_1 y_1 \boldsymbol{x}_1^T \boldsymbol{x}_1\\
        & = 1 - 2 \alpha_3 - 4 \alpha_1\\
        \frac{\partial L}{\partial \alpha_2} & = 1 - 2\alpha_4 - 4 \alpha_2\\
        \frac{\partial L}{\partial \alpha_3} & = 1 - 2 \alpha_1 - 4 \alpha_3\\
        \frac{\partial L}{\partial \alpha_4} & = 1 - 2 \alpha_3 - 4 \alpha_4
    \end{align*}
    令四个偏导数均为0，得到$\alpha_1 = \alpha_2 = \alpha_3 = \alpha_4 = \frac{1}{6}$。全部的点均为支持向量。因此
    \[\boldsymbol{w} = \sum_{i = 1}^4 \alpha_i y_i \boldsymbol{x}_i = \left(0, -\frac{2}{3}\right)\]

    为求偏置量，带入$\boldsymbol{x}_1$：
    \[(-1) (\boldsymbol{w}^T \boldsymbol{x}_1 + b) = 1\]
    得到$b = -\frac{1}{3}$。

    分类界面$\boldsymbol{w}^T \boldsymbol{x} + b = 0$，即
    \[\begin{bmatrix}
        0\\-\frac{2}{3}
    \end{bmatrix} \boldsymbol{x} - \frac{1}{3} = 0\]
    得到$x_2 = \frac{1}{2}$，因此在原空间中，
    \[4(x_1 - 0.5)(x_2 - 0.5) = 0.5\]

\end{proof}


\vspace{3mm}
\subsection{使用KMeans算法对2维空间中的6个点$(0,2)$,$(2,0)$,$(2,3)$,$(3,2)$,$(4,0)$,$(5,4)$进行聚类，距离函数选择欧氏距离$d=\sqrt{(x_1-x_2)^2+(y_1-y_2)^2}$。\\
\qquad (1)起始聚类中心选择(0,0)和(4,3)，计算聚类中心；\\
\qquad (2)起始聚类中心选择(1,4)和(3,1)，计算聚类中心。\\
}

\begin{proof}[解]
    中心选择$(0, 0), (4, 3)$，第一次分为$(0, 2), (2,0)$与$(2, 3), (3, 2), (4, 0), (5, 4)$，更新后的中心为$(1, 1)$与$\left(\frac{7}{2}, \frac{9}{4}\right)$。收敛。

    中心选择$(1, 4)$与$(3, 1)$，第一次分为$(0, 2), (2, 3)$与$(2, 0), (4, 0), (3, 2), (5, 4)$，更新后中心为$(1, \frac{5}{2})$与$(\frac{7}{2}, \frac{3}{2})$，收敛。
\end{proof}

\vspace{3mm}
\centerline{\textbf{\Large{编程部分}}}


\vspace{3mm}
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题进度汇报”中的一项完成
\section{编程作业报告}
\subsection{程序验证}
与助教给出的图片相比，我写出的程序PCA得到的结果的xy坐标都在$[-1, 1]$之间，不利于之后的分类。我将所有的PCA之后的坐标都扩大了20倍。

运行\lstinline{check.py}进行检查：
\begin{figure}[H]
    \centering
    \includegraphics[width=\linewidth]{img/check/check.png}
\end{figure}

\subsection{数据预处理}
运行
\begin{lstlisting}[style=Bash]
python data_preprocess.py
\end{lstlisting}
得到的输出为
\begin{figure}[H]
    \centering
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/preprocess/preprocess_train.png}
        \caption{训练集preprocess结果}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/preprocess/preprocess_val.png}
        \caption{验证集preprocess结果}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/preprocess/preprocess_test.png}
        \caption{测试集preprocess结果}
    \end{subfigure}
\end{figure}

\subsection{训练、验证及测试}
\begin{figure}[H]
    \centering
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/default/loss.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/default/train_accu.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/default/sv.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/default/val.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.8\linewidth}
        \includegraphics[width=\textwidth]{img/train/default/test.png}
    \end{subfigure}
\end{figure}

\subsection{调整正则化系数}
\subsubsection{C = \num{1e-6}}
\begin{figure}[H]
    \centering
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1e-6/loss.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1e-6/accu.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1e-6/sv.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1e-6/val.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.8\linewidth}
        \includegraphics[width=\textwidth]{img/train/1e-6/test.png}
    \end{subfigure}
\end{figure}
可以看到出现了严重的欠拟合，分类界面超出了绘图的范围。这是因为C过小，导致不能正确地分辨合适的分类界面。

\subsubsection{C = 1}
\begin{figure}[H]
    \centering
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1/loss.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1/accu.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1/sv.png}
    \end{subfigure}
    \hspace{0.5cm}
    \begin{subfigure}[t]{.45\linewidth}
        \includegraphics[width=\textwidth]{img/train/1/val.png}
    \end{subfigure}\\[2ex]
    \begin{subfigure}[t]{.8\linewidth}
        \includegraphics[width=\textwidth]{img/train/1/test.png}
    \end{subfigure}
\end{figure}
发生了过拟合，直线被交界面的点限制，斜率不是最优。

\end{document}


%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End: