MediaNCognition/hw2/report/main.tex

% Homework template for Inference and Information
% UPDATE: September 26, 2017 by Xiangxiang
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{fontspec}
\usepackage{xcolor}
\usepackage{float}
% \usepackage{color}

\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
\newfontfamily\cascadia{Cascadia Code}

\lstset{
    basicstyle          =   \small\codefont,
    % ---
    tabsize             =   4,
    showstringspaces    =   false,
    numbers             =   left,
    numberstyle         =   \codefont,
    % ---
    breaklines          =   true,
    captionpos          =   t,
    % ---
    frame               =   l,
    flexiblecolumns,
}

\lstdefinestyle{Python}{
    language        =   Python, % 语言选Python
    keywordstyle    =   \color{blue},
    keywordstyle    =   [2] \color{teal},
    stringstyle     =   \color{orange!80!black},
    commentstyle    =   \color{red},
    identifierstyle =   \color{blue!80!white},
}

\lstdefinestyle{Bash}{
    language        =   bash
}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 2]{iidef}

\allowdisplaybreaks

\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知} \space 课堂2}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}

\section{单选题（15分）}
\subsection{\underline{C}}

\subsection{\underline{D}}

\subsection{\underline{D}}

\subsection{\underline{C}}

\subsection{\underline{B}}

\section{计算题（15 分）}
\subsection{
已知某卷积层的输入为$X$(该批量中样本数目为1，输入样本通道数为1)，采用一个卷积核$W$，即卷积输出通道数为1，卷积核尺寸为$2\times 2$，卷积的步长为1，无边界延拓，偏置量为$b$：
$$X=\left[ \begin{array}{ccc}
    0.5 & -0.2 & 0.3 \\
    0.6 & 0.4 & -0.1 \\
    -0.4 & 0.5 & 0.2
\end{array}\right],
W=\left[ \begin{array}{cc}
    0.1 & -0.2  \\
    -0.3 & 0.4
\end{array}\right], b=0.04$$
}
\subsubsection{请计算卷积层的输出$Y$。}
\[\begin{cases}
    Y_{11} = 0.5 \times 0.1 + (-0.2) \times (-0.2) + 0.6 \times (-0.3) +  0.4 \times 0.4 + 0.04 = 0.11\\
    Y_{12} = (-0.2) \times 0.1 + 0.3 \times (-0.2) + 0.4 \times (-0.3) + (-0.1) \times 0.4  + 0.04 = -0.2\\
    Y_{21} = 0.6 \times 0.1 + 0.4 \times (-0.2) + (-0.4) \times (-0.3) + 0.5 \times 0.4 + 0.04 = 0.34\\
    Y_{22} = 0.4 \times 0.1 + (-0.1) \times (-0.2) + 0.5 \times (-0.3) + 0.2 \times 0.4 + 0.04 = 0.03
\end{cases}\]

\subsubsection{若训练过程中的目标函数为$L$，且已知$\frac{\partial L}{\partial Y}=\left[ \begin{array}{cc}
    0.3 & 0.1 \\
    -0.4 & 0.2
\end{array} \right]$，请计算$\frac{\partial L}{\partial X}$。
}

注：本题的计算方式不限，但需要提供计算过程以及各步骤的结果。
\vspace{6mm}

\begin{proof}[解]
    首先，
    \[\frac{\partial L}{\partial Y} = \begin{bmatrix}
        \frac{\partial L}{\partial Y_{11}} & \frac{\partial L}{\partial Y_{12}}\\
        \frac{\partial L}{\partial Y_{21}} & \frac{\partial L}{\partial Y_{22}}
    \end{bmatrix}\]
    \[\frac{\partial L}{\partial X} = \begin{bmatrix}
        \frac{\partial L}{\partial X_{11}} & \frac{\partial L}{\partial X_{12}} & \frac{\partial L}{\partial X_{12}}\\
        \frac{\partial L}{\partial X_{21}} & \frac{\partial L}{\partial X_{22}} & \frac{\partial L}{\partial X_{23}}\\
        \frac{\partial L}{\partial X_{31}} & \frac{\partial L}{\partial X_{32}} & \frac{\partial L}{\partial X_{33}}
    \end{bmatrix}\]
    同时，根据链式法则，
    \[\frac{\partial L}{\partial X_{11}} = \frac{\partial Y_{11}}{\partial X_{11}} \frac{\partial L}{\partial Y_{11}} + \frac{\partial Y_{12}}{\partial X_{11}} \frac{\partial L}{\partial Y_{12}} + \frac{\partial Y_{21}}{\partial X_{11}} \frac{\partial L}{\partial Y_{21}} + \frac{\partial Y_{22}}{\partial X_{11}} \frac{\partial L}{\partial Y_{22}}\]
    其它的$\frac{\partial L}{X_{12}}, \dots, \frac{\partial L}{\partial X_{33}}$的计算方式也是类似的。因此，
    \[\frac{\partial L}{\partial X} = \sum_{i = 1}^2 \sum_{j = 1}^2
    \begin{bmatrix}
        \frac{\partial Y_{ij}}{\partial X_{11}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{13}}\\
        \vdots & \ddots & \vdots\\
        \frac{\partial Y_{ij}}{\partial X_{31}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{33}}
    \end{bmatrix} \frac{\partial L}{\partial Y_{ij}} = \sum_{i = 1}^2 \sum_{j = 1}^2 \frac{\partial Y_{ij}}{\partial X} \frac{L}{\partial Y_{ij}}\]
    式中的$\frac{\partial Y_{ij}}{\partial X}$与对应元是由哪几个$X$中的元素卷积得到有关，它们是$W$在$3 \times 3$矩阵中的平移。综合起来，有
    \begin{align*}
        \frac{\partial L}{\partial X} & =
        \begin{bmatrix}
            0.1 & -0.2 & 0\\
            -0.3 & 0.4 & 0\\
            0 & 0 & 0
        \end{bmatrix} \frac{\partial L}{\partial Y_{11}}
        +
        \begin{bmatrix}
            0 & 0.1 & -0.2\\
            0 & -0.3 & 0.4\\
            0 & 0 & 0
        \end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\
        & \quad +
        \begin{bmatrix}
            0 & 0 & 0\\
            0.1 & -0.2 & 0\\
            -0.3 & 0.4 & 0
        \end{bmatrix} \frac{\partial L}{\partial Y_{21}}
        +
        \begin{bmatrix}
            0 & 0 & 0\\
            0 & 0.1 & -0.2\\
            0 & -0.3 & 0.4
        \end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\
        & = \mathrm{zeropad}(W) \ast \frac{\partial L}{\partial Y}\\
        & =
        \begin{bmatrix}
            0.03 & -0.06 & 0\\
            -0.09 & 0.12 & 0\\
            0 & 0 & 0
        \end{bmatrix}
        +
        \begin{bmatrix}
            0 & 0.01 & -0.02\\
            0 & -0.03 & 0.04\\
            0 & 0 & 0
        \end{bmatrix}\\
        & \quad +
        \begin{bmatrix}
            0 & 0 & 0\\
            -0.04 & 0.08 & 0\\
            0.12 & -0.16 & 0
        \end{bmatrix}
        +
        \begin{bmatrix}
            0 & 0 & 0\\
            0 & 0.02 & -0.04\\
            0 & -0.06 & 0.08
        \end{bmatrix}\\
        & =
        \begin{bmatrix}
            0.03 & -0.05 & -0.02\\
            -0.13 & 0.19 & 0\\
            0.12 & -0.22 & 0.08
        \end{bmatrix} \qedhere
    \end{align*}
\end{proof}

\centerline{\textbf{\Large{编程部分}}}
\vspace{3mm}

% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
\section{编程作业报告}
\subsection{探究batch normalization和dropout的作用}
\begin{enumerate}
    \item 使用默认配置训练模型：
    \begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/default
    \end{lstlisting}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/models/default/loss_and_acc.jpg}
    \end{figure}
    之后测试得到的正确率为90.8\%。
    \item 启用batch normalization：
    \begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/bn --bn
    \end{lstlisting}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/models/bn/loss_and_acc.jpg}
    \end{figure}
    测试得到的正确率为95.9\%。
    \item 启用dropout并设置概率为0.3：
    \begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/dropout --dropout 0.3
    \end{lstlisting}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/models/dropout/loss_and_acc.jpg}
    \end{figure}
    测试后得到的正确率为94.1\%。
\end{enumerate}

\subsection{探究数据增广的作用}
考虑到在不同的视角下，交通标志可能有旋转或者变形，因此使用
\begin{lstlisting}[style=Python]
transforms.RandomAffine(degrees=30,shear=10)
\end{lstlisting}
来对数据进行随机的形变与旋转；另外，考虑到可能在不同的光线条件下导致对比度变化，因此使用
\begin{lstlisting}[style=Python]
transforms.RandomAutocontrast()
\end{lstlisting}
来对数据进行随机的对比度调整。

执行
\begin{lstlisting}[style=Bash]
python unit_test.py data_loader
\end{lstlisting}
得到
\begin{figure}[H]
    \centering
    \includegraphics[width=\linewidth]{img/augmentation.jpg}
    \caption{数据增广后的结果}
\end{figure}

训练最优模型使用的命令为
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/bn_aug --bn --augment --epoch 20
\end{lstlisting}
\begin{figure}[H]
    \centering
    \includegraphics[width=\linewidth]{img/models/bn_aug/loss_and_acc.jpg}
\end{figure}
测试得到的正确率为96.0\%，略微高于不使用数据增强时的结果。

\subsection{探究空间变换网络（STN）的作用}
运行
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/stn --bn --stn
\end{lstlisting}
\begin{figure}[H]
    \centering
    \includegraphics[width=\linewidth]{img/models/stn/loss_and_acc.jpg}
\end{figure}
测试得到的正确率为94.6\%。正确率比不使用stn反而有所降低，可能是设计的网络结构不够理想导致的。

\subsection{可视化}
\begin{enumerate}
    \item 可视化各层卷积核：
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/filter/filter_layer_0.jpg}
        \caption{第0层的卷积核}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/filter/filter_layer_1.jpg}
        \caption{第1层的卷积核}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/filter/filter_layer_2.jpg}
        \caption{第2层的卷积核}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/filter/filter_layer_3.jpg}
        \caption{第3层的卷积核}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/filter/filter_layer_4.jpg}
        \caption{第4层的卷积核}
    \end{figure}
    \item 可视化各层卷积层的输出特征图
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/feature/layer_0/feature_map.jpg}
        \caption{第0层的卷积核特征图}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/feature/layer_1/feature_map.jpg}
        \caption{第1层的卷积核特征图}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/feature/layer_2/feature_map.jpg}
        \caption{第2层的卷积核特征图}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/feature/layer_3/feature_map.jpg}
        \caption{第3层的卷积核特征图}
    \end{figure}
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/feature/layer_4/feature_map.jpg}
        \caption{第4层的卷积核特征图}
    \end{figure}
    \item t-SNE可视化最后一层隐藏层的输出特征
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/tsne/tsne.jpg}
    \end{figure}
    t-SNE最后一层的隐藏层的输出证明，不同类别的输入已经被通过非线性变换分类到了不同的聚类。
    \item STN学习到的变换
    \begin{figure}[H]
        \centering
        \includegraphics[width=\linewidth]{img/stn/stn.jpg}
    \end{figure}
    网络尽可能将所有的路牌都变换到了同样的倾斜角度。
\end{enumerate}

\section{遇到的问题与解决办法}
在自定义STN网络的时候，我最开始使用了比较小的卷积核，使得STN的效果很差，使用之后会使得正确率只有80\%；之后，我分析认为STN主要要感知整个图片的倾斜以及旋转情况，需要较大的视野，因此选择了较大的卷积核，之后得到了比较理想的效果。

完成作业没有使用大模型。
% \section{自选课题工作进度汇报}

\end{document}


%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End: