Files
MediaNCognition/hw2/report/main.tex
2024-04-11 14:20:28 +08:00

347 lines
12 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
% Homework template for Inference and Information
% UPDATE: September 26, 2017 by Xiangxiang
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{fontspec}
\usepackage{xcolor}
\usepackage{float}
% \usepackage{color}
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
\newfontfamily\cascadia{Cascadia Code}
\lstset{
basicstyle = \small\codefont,
% ---
tabsize = 4,
showstringspaces = false,
numbers = left,
numberstyle = \codefont,
% ---
breaklines = true,
captionpos = t,
% ---
frame = l,
flexiblecolumns,
}
\lstdefinestyle{Python}{
language = Python, % 语言选Python
keywordstyle = \color{blue},
keywordstyle = [2] \color{teal},
stringstyle = \color{orange!80!black},
commentstyle = \color{red},
identifierstyle = \color{blue!80!white},
}
\lstdefinestyle{Bash}{
language = bash
}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 2]{iidef}
\allowdisplaybreaks
\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知} \space 课堂2}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}
\section{单选题15分}
\subsection{\underline{C}}
\subsection{\underline{D}}
\subsection{\underline{D}}
\subsection{\underline{C}}
\subsection{\underline{B}}
\section{计算题15 分)}
\subsection{
已知某卷积层的输入为$X$(该批量中样本数目为1输入样本通道数为1),采用一个卷积核$W$即卷积输出通道数为1卷积核尺寸为$2\times 2$卷积的步长为1无边界延拓偏置量为$b$
$$X=\left[ \begin{array}{ccc}
0.5 & -0.2 & 0.3 \\
0.6 & 0.4 & -0.1 \\
-0.4 & 0.5 & 0.2
\end{array}\right],
W=\left[ \begin{array}{cc}
0.1 & -0.2 \\
-0.3 & 0.4
\end{array}\right], b=0.04$$
}
\subsubsection{请计算卷积层的输出$Y$}
\[\begin{cases}
Y_{11} = 0.5 \times 0.1 + (-0.2) \times (-0.2) + 0.6 \times (-0.3) + 0.4 \times 0.4 + 0.04 = 0.11\\
Y_{12} = (-0.2) \times 0.1 + 0.3 \times (-0.2) + 0.4 \times (-0.3) + (-0.1) \times 0.4 + 0.04 = -0.2\\
Y_{21} = 0.6 \times 0.1 + 0.4 \times (-0.2) + (-0.4) \times (-0.3) + 0.5 \times 0.4 + 0.04 = 0.34\\
Y_{22} = 0.4 \times 0.1 + (-0.1) \times (-0.2) + 0.5 \times (-0.3) + 0.2 \times 0.4 + 0.04 = 0.03
\end{cases}\]
\subsubsection{若训练过程中的目标函数为$L$,且已知$\frac{\partial L}{\partial Y}=\left[ \begin{array}{cc}
0.3 & 0.1 \\
-0.4 & 0.2
\end{array} \right]$,请计算$\frac{\partial L}{\partial X}$
}
注:本题的计算方式不限,但需要提供计算过程以及各步骤的结果。
\vspace{6mm}
\begin{proof}[解]
首先,
\[\frac{\partial L}{\partial Y} = \begin{bmatrix}
\frac{\partial L}{\partial Y_{11}} & \frac{\partial L}{\partial Y_{12}}\\
\frac{\partial L}{\partial Y_{21}} & \frac{\partial L}{\partial Y_{22}}
\end{bmatrix}\]
\[\frac{\partial L}{\partial X} = \begin{bmatrix}
\frac{\partial L}{\partial X_{11}} & \frac{\partial L}{\partial X_{12}} & \frac{\partial L}{\partial X_{12}}\\
\frac{\partial L}{\partial X_{21}} & \frac{\partial L}{\partial X_{22}} & \frac{\partial L}{\partial X_{23}}\\
\frac{\partial L}{\partial X_{31}} & \frac{\partial L}{\partial X_{32}} & \frac{\partial L}{\partial X_{33}}
\end{bmatrix}\]
同时,根据链式法则,
\[\frac{\partial L}{\partial X_{11}} = \frac{\partial Y_{11}}{\partial X_{11}} \frac{\partial L}{\partial Y_{11}} + \frac{\partial Y_{12}}{\partial X_{11}} \frac{\partial L}{\partial Y_{12}} + \frac{\partial Y_{21}}{\partial X_{11}} \frac{\partial L}{\partial Y_{21}} + \frac{\partial Y_{22}}{\partial X_{11}} \frac{\partial L}{\partial Y_{22}}\]
其它的$\frac{\partial L}{X_{12}}, \dots, \frac{\partial L}{\partial X_{33}}$的计算方式也是类似的。因此,
\[\frac{\partial L}{\partial X} = \sum_{i = 1}^2 \sum_{j = 1}^2
\begin{bmatrix}
\frac{\partial Y_{ij}}{\partial X_{11}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{13}}\\
\vdots & \ddots & \vdots\\
\frac{\partial Y_{ij}}{\partial X_{31}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{33}}
\end{bmatrix} \frac{\partial L}{\partial Y_{ij}} = \sum_{i = 1}^2 \sum_{j = 1}^2 \frac{\partial Y_{ij}}{\partial X} \frac{L}{\partial Y_{ij}}\]
式中的$\frac{\partial Y_{ij}}{\partial X}$与对应元是由哪几个$X$中的元素卷积得到有关,它们是$W$$3 \times 3$矩阵中的平移。综合起来,有
\begin{align*}
\frac{\partial L}{\partial X} & =
\begin{bmatrix}
0.1 & -0.2 & 0\\
-0.3 & 0.4 & 0\\
0 & 0 & 0
\end{bmatrix} \frac{\partial L}{\partial Y_{11}}
+
\begin{bmatrix}
0 & 0.1 & -0.2\\
0 & -0.3 & 0.4\\
0 & 0 & 0
\end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\
& \quad +
\begin{bmatrix}
0 & 0 & 0\\
0.1 & -0.2 & 0\\
-0.3 & 0.4 & 0
\end{bmatrix} \frac{\partial L}{\partial Y_{21}}
+
\begin{bmatrix}
0 & 0 & 0\\
0 & 0.1 & -0.2\\
0 & -0.3 & 0.4
\end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\
& = \mathrm{zeropad}(W) \ast \frac{\partial L}{\partial Y}\\
& =
\begin{bmatrix}
0.03 & -0.06 & 0\\
-0.09 & 0.12 & 0\\
0 & 0 & 0
\end{bmatrix}
+
\begin{bmatrix}
0 & 0.01 & -0.02\\
0 & -0.03 & 0.04\\
0 & 0 & 0
\end{bmatrix}\\
& \quad +
\begin{bmatrix}
0 & 0 & 0\\
-0.04 & 0.08 & 0\\
0.12 & -0.16 & 0
\end{bmatrix}
+
\begin{bmatrix}
0 & 0 & 0\\
0 & 0.02 & -0.04\\
0 & -0.06 & 0.08
\end{bmatrix}\\
& =
\begin{bmatrix}
0.03 & -0.05 & -0.02\\
-0.13 & 0.19 & 0\\
0.12 & -0.22 & 0.08
\end{bmatrix} \qedhere
\end{align*}
\end{proof}
\centerline{\textbf{\Large{编程部分}}}
\vspace{3mm}
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
\section{编程作业报告}
\subsection{探究batch normalization和dropout的作用}
\begin{enumerate}
\item 使用默认配置训练模型:
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/default
\end{lstlisting}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/models/default/loss_and_acc.jpg}
\end{figure}
之后测试得到的正确率为90.8\%
\item 启用batch normalization
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/bn --bn
\end{lstlisting}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/models/bn/loss_and_acc.jpg}
\end{figure}
测试得到的正确率为95.9\%
\item 启用dropout并设置概率为0.3
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/dropout --dropout 0.3
\end{lstlisting}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/models/dropout/loss_and_acc.jpg}
\end{figure}
测试后得到的正确率为94.1\%
\end{enumerate}
\subsection{探究数据增广的作用}
考虑到在不同的视角下,交通标志可能有旋转或者变形,因此使用
\begin{lstlisting}[style=Python]
transforms.RandomAffine(degrees=30,shear=10)
\end{lstlisting}
来对数据进行随机的形变与旋转;另外,考虑到可能在不同的光线条件下导致对比度变化,因此使用
\begin{lstlisting}[style=Python]
transforms.RandomAutocontrast()
\end{lstlisting}
来对数据进行随机的对比度调整。
执行
\begin{lstlisting}[style=Bash]
python unit_test.py data_loader
\end{lstlisting}
得到
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/augmentation.jpg}
\caption{数据增广后的结果}
\end{figure}
训练最优模型使用的命令为
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/bn_aug --bn --augment --epoch 20
\end{lstlisting}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/models/bn_aug/loss_and_acc.jpg}
\end{figure}
测试得到的正确率为96.0\%,略微高于不使用数据增强时的结果。
\subsection{探究空间变换网络STN的作用}
运行
\begin{lstlisting}[style=Bash]
python train.py --ckpt_path checkpoints/stn --bn --stn
\end{lstlisting}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/models/stn/loss_and_acc.jpg}
\end{figure}
测试得到的正确率为94.6\%。正确率比不使用stn反而有所降低可能是设计的网络结构不够理想导致的。
\subsection{可视化}
\begin{enumerate}
\item 可视化各层卷积核:
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/filter/filter_layer_0.jpg}
\caption{第0层的卷积核}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/filter/filter_layer_1.jpg}
\caption{第1层的卷积核}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/filter/filter_layer_2.jpg}
\caption{第2层的卷积核}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/filter/filter_layer_3.jpg}
\caption{第3层的卷积核}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/filter/filter_layer_4.jpg}
\caption{第4层的卷积核}
\end{figure}
\item 可视化各层卷积层的输出特征图
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/feature/layer_0/feature_map.jpg}
\caption{第0层的卷积核特征图}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/feature/layer_1/feature_map.jpg}
\caption{第1层的卷积核特征图}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/feature/layer_2/feature_map.jpg}
\caption{第2层的卷积核特征图}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/feature/layer_3/feature_map.jpg}
\caption{第3层的卷积核特征图}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/feature/layer_4/feature_map.jpg}
\caption{第4层的卷积核特征图}
\end{figure}
\item t-SNE可视化最后一层隐藏层的输出特征
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/tsne/tsne.jpg}
\end{figure}
t-SNE最后一层的隐藏层的输出证明不同类别的输入已经被通过非线性变换分类到了不同的聚类。
\item STN学习到的变换
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/stn/stn.jpg}
\end{figure}
网络尽可能将所有的路牌都变换到了同样的倾斜角度。
\end{enumerate}
\section{遇到的问题与解决办法}
在自定义STN网络的时候我最开始使用了比较小的卷积核使得STN的效果很差使用之后会使得正确率只有80\%之后我分析认为STN主要要感知整个图片的倾斜以及旋转情况需要较大的视野因此选择了较大的卷积核之后得到了比较理想的效果。
完成作业没有使用大模型。
% \section{自选课题工作进度汇报}
\end{document}
%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End: