347 lines
12 KiB
TeX
347 lines
12 KiB
TeX
% Homework template for Inference and Information
|
||
% UPDATE: September 26, 2017 by Xiangxiang
|
||
\documentclass[a4paper]{article}
|
||
\usepackage{ctex}
|
||
\usepackage{amsmath, amssymb, amsthm}
|
||
\usepackage{moreenum}
|
||
\usepackage{mathtools}
|
||
\usepackage{url}
|
||
\usepackage{bm}
|
||
\usepackage{enumitem}
|
||
\usepackage{graphicx}
|
||
\usepackage{listings}
|
||
\usepackage{fontspec}
|
||
\usepackage{xcolor}
|
||
\usepackage{float}
|
||
% \usepackage{color}
|
||
|
||
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
|
||
\newfontfamily\cascadia{Cascadia Code}
|
||
|
||
\lstset{
|
||
basicstyle = \small\codefont,
|
||
% ---
|
||
tabsize = 4,
|
||
showstringspaces = false,
|
||
numbers = left,
|
||
numberstyle = \codefont,
|
||
% ---
|
||
breaklines = true,
|
||
captionpos = t,
|
||
% ---
|
||
frame = l,
|
||
flexiblecolumns,
|
||
}
|
||
|
||
\lstdefinestyle{Python}{
|
||
language = Python, % 语言选Python
|
||
keywordstyle = \color{blue},
|
||
keywordstyle = [2] \color{teal},
|
||
stringstyle = \color{orange!80!black},
|
||
commentstyle = \color{red},
|
||
identifierstyle = \color{blue!80!white},
|
||
}
|
||
|
||
\lstdefinestyle{Bash}{
|
||
language = bash
|
||
}
|
||
\usepackage{subcaption}
|
||
\usepackage{booktabs} % toprule
|
||
\usepackage[mathcal]{eucal}
|
||
\usepackage[thehwcnt = 2]{iidef}
|
||
|
||
\allowdisplaybreaks
|
||
|
||
\thecourseinstitute{清华大学电子工程系}
|
||
\thecoursename{\textbf{媒体与认知} \space 课堂2}
|
||
\theterm{2023-2024学年春季学期}
|
||
\hwname{作业}
|
||
\begin{document}
|
||
\courseheader
|
||
\name{高艺轩}
|
||
\vspace{3mm}
|
||
\centerline{\textbf{\Large{理论部分}}}
|
||
|
||
\section{单选题(15分)}
|
||
\subsection{\underline{C}}
|
||
|
||
\subsection{\underline{D}}
|
||
|
||
\subsection{\underline{D}}
|
||
|
||
\subsection{\underline{C}}
|
||
|
||
\subsection{\underline{B}}
|
||
|
||
\section{计算题(15 分)}
|
||
\subsection{
|
||
已知某卷积层的输入为$X$(该批量中样本数目为1,输入样本通道数为1),采用一个卷积核$W$,即卷积输出通道数为1,卷积核尺寸为$2\times 2$,卷积的步长为1,无边界延拓,偏置量为$b$:
|
||
$$X=\left[ \begin{array}{ccc}
|
||
0.5 & -0.2 & 0.3 \\
|
||
0.6 & 0.4 & -0.1 \\
|
||
-0.4 & 0.5 & 0.2
|
||
\end{array}\right],
|
||
W=\left[ \begin{array}{cc}
|
||
0.1 & -0.2 \\
|
||
-0.3 & 0.4
|
||
\end{array}\right], b=0.04$$
|
||
}
|
||
\subsubsection{请计算卷积层的输出$Y$。}
|
||
\[\begin{cases}
|
||
Y_{11} = 0.5 \times 0.1 + (-0.2) \times (-0.2) + 0.6 \times (-0.3) + 0.4 \times 0.4 + 0.04 = 0.11\\
|
||
Y_{12} = (-0.2) \times 0.1 + 0.3 \times (-0.2) + 0.4 \times (-0.3) + (-0.1) \times 0.4 + 0.04 = -0.2\\
|
||
Y_{21} = 0.6 \times 0.1 + 0.4 \times (-0.2) + (-0.4) \times (-0.3) + 0.5 \times 0.4 + 0.04 = 0.34\\
|
||
Y_{22} = 0.4 \times 0.1 + (-0.1) \times (-0.2) + 0.5 \times (-0.3) + 0.2 \times 0.4 + 0.04 = 0.03
|
||
\end{cases}\]
|
||
|
||
\subsubsection{若训练过程中的目标函数为$L$,且已知$\frac{\partial L}{\partial Y}=\left[ \begin{array}{cc}
|
||
0.3 & 0.1 \\
|
||
-0.4 & 0.2
|
||
\end{array} \right]$,请计算$\frac{\partial L}{\partial X}$。
|
||
}
|
||
|
||
注:本题的计算方式不限,但需要提供计算过程以及各步骤的结果。
|
||
\vspace{6mm}
|
||
|
||
\begin{proof}[解]
|
||
首先,
|
||
\[\frac{\partial L}{\partial Y} = \begin{bmatrix}
|
||
\frac{\partial L}{\partial Y_{11}} & \frac{\partial L}{\partial Y_{12}}\\
|
||
\frac{\partial L}{\partial Y_{21}} & \frac{\partial L}{\partial Y_{22}}
|
||
\end{bmatrix}\]
|
||
\[\frac{\partial L}{\partial X} = \begin{bmatrix}
|
||
\frac{\partial L}{\partial X_{11}} & \frac{\partial L}{\partial X_{12}} & \frac{\partial L}{\partial X_{12}}\\
|
||
\frac{\partial L}{\partial X_{21}} & \frac{\partial L}{\partial X_{22}} & \frac{\partial L}{\partial X_{23}}\\
|
||
\frac{\partial L}{\partial X_{31}} & \frac{\partial L}{\partial X_{32}} & \frac{\partial L}{\partial X_{33}}
|
||
\end{bmatrix}\]
|
||
同时,根据链式法则,
|
||
\[\frac{\partial L}{\partial X_{11}} = \frac{\partial Y_{11}}{\partial X_{11}} \frac{\partial L}{\partial Y_{11}} + \frac{\partial Y_{12}}{\partial X_{11}} \frac{\partial L}{\partial Y_{12}} + \frac{\partial Y_{21}}{\partial X_{11}} \frac{\partial L}{\partial Y_{21}} + \frac{\partial Y_{22}}{\partial X_{11}} \frac{\partial L}{\partial Y_{22}}\]
|
||
其它的$\frac{\partial L}{X_{12}}, \dots, \frac{\partial L}{\partial X_{33}}$的计算方式也是类似的。因此,
|
||
\[\frac{\partial L}{\partial X} = \sum_{i = 1}^2 \sum_{j = 1}^2
|
||
\begin{bmatrix}
|
||
\frac{\partial Y_{ij}}{\partial X_{11}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{13}}\\
|
||
\vdots & \ddots & \vdots\\
|
||
\frac{\partial Y_{ij}}{\partial X_{31}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{33}}
|
||
\end{bmatrix} \frac{\partial L}{\partial Y_{ij}} = \sum_{i = 1}^2 \sum_{j = 1}^2 \frac{\partial Y_{ij}}{\partial X} \frac{L}{\partial Y_{ij}}\]
|
||
式中的$\frac{\partial Y_{ij}}{\partial X}$与对应元是由哪几个$X$中的元素卷积得到有关,它们是$W$在$3 \times 3$矩阵中的平移。综合起来,有
|
||
\begin{align*}
|
||
\frac{\partial L}{\partial X} & =
|
||
\begin{bmatrix}
|
||
0.1 & -0.2 & 0\\
|
||
-0.3 & 0.4 & 0\\
|
||
0 & 0 & 0
|
||
\end{bmatrix} \frac{\partial L}{\partial Y_{11}}
|
||
+
|
||
\begin{bmatrix}
|
||
0 & 0.1 & -0.2\\
|
||
0 & -0.3 & 0.4\\
|
||
0 & 0 & 0
|
||
\end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\
|
||
& \quad +
|
||
\begin{bmatrix}
|
||
0 & 0 & 0\\
|
||
0.1 & -0.2 & 0\\
|
||
-0.3 & 0.4 & 0
|
||
\end{bmatrix} \frac{\partial L}{\partial Y_{21}}
|
||
+
|
||
\begin{bmatrix}
|
||
0 & 0 & 0\\
|
||
0 & 0.1 & -0.2\\
|
||
0 & -0.3 & 0.4
|
||
\end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\
|
||
& = \mathrm{zeropad}(W) \ast \frac{\partial L}{\partial Y}\\
|
||
& =
|
||
\begin{bmatrix}
|
||
0.03 & -0.06 & 0\\
|
||
-0.09 & 0.12 & 0\\
|
||
0 & 0 & 0
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
0 & 0.01 & -0.02\\
|
||
0 & -0.03 & 0.04\\
|
||
0 & 0 & 0
|
||
\end{bmatrix}\\
|
||
& \quad +
|
||
\begin{bmatrix}
|
||
0 & 0 & 0\\
|
||
-0.04 & 0.08 & 0\\
|
||
0.12 & -0.16 & 0
|
||
\end{bmatrix}
|
||
+
|
||
\begin{bmatrix}
|
||
0 & 0 & 0\\
|
||
0 & 0.02 & -0.04\\
|
||
0 & -0.06 & 0.08
|
||
\end{bmatrix}\\
|
||
& =
|
||
\begin{bmatrix}
|
||
0.03 & -0.05 & -0.02\\
|
||
-0.13 & 0.19 & 0\\
|
||
0.12 & -0.22 & 0.08
|
||
\end{bmatrix} \qedhere
|
||
\end{align*}
|
||
\end{proof}
|
||
|
||
\centerline{\textbf{\Large{编程部分}}}
|
||
\vspace{3mm}
|
||
|
||
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
|
||
\section{编程作业报告}
|
||
\subsection{探究batch normalization和dropout的作用}
|
||
\begin{enumerate}
|
||
\item 使用默认配置训练模型:
|
||
\begin{lstlisting}[style=Bash]
|
||
python train.py --ckpt_path checkpoints/default
|
||
\end{lstlisting}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/models/default/loss_and_acc.jpg}
|
||
\end{figure}
|
||
之后测试得到的正确率为90.8\%。
|
||
\item 启用batch normalization:
|
||
\begin{lstlisting}[style=Bash]
|
||
python train.py --ckpt_path checkpoints/bn --bn
|
||
\end{lstlisting}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/models/bn/loss_and_acc.jpg}
|
||
\end{figure}
|
||
测试得到的正确率为95.9\%。
|
||
\item 启用dropout并设置概率为0.3:
|
||
\begin{lstlisting}[style=Bash]
|
||
python train.py --ckpt_path checkpoints/dropout --dropout 0.3
|
||
\end{lstlisting}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/models/dropout/loss_and_acc.jpg}
|
||
\end{figure}
|
||
测试后得到的正确率为94.1\%。
|
||
\end{enumerate}
|
||
|
||
\subsection{探究数据增广的作用}
|
||
考虑到在不同的视角下,交通标志可能有旋转或者变形,因此使用
|
||
\begin{lstlisting}[style=Python]
|
||
transforms.RandomAffine(degrees=30,shear=10)
|
||
\end{lstlisting}
|
||
来对数据进行随机的形变与旋转;另外,考虑到可能在不同的光线条件下导致对比度变化,因此使用
|
||
\begin{lstlisting}[style=Python]
|
||
transforms.RandomAutocontrast()
|
||
\end{lstlisting}
|
||
来对数据进行随机的对比度调整。
|
||
|
||
执行
|
||
\begin{lstlisting}[style=Bash]
|
||
python unit_test.py data_loader
|
||
\end{lstlisting}
|
||
得到
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/augmentation.jpg}
|
||
\caption{数据增广后的结果}
|
||
\end{figure}
|
||
|
||
训练最优模型使用的命令为
|
||
\begin{lstlisting}[style=Bash]
|
||
python train.py --ckpt_path checkpoints/bn_aug --bn --augment --epoch 20
|
||
\end{lstlisting}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/models/bn_aug/loss_and_acc.jpg}
|
||
\end{figure}
|
||
测试得到的正确率为96.0\%,略微高于不使用数据增强时的结果。
|
||
|
||
\subsection{探究空间变换网络(STN)的作用}
|
||
运行
|
||
\begin{lstlisting}[style=Bash]
|
||
python train.py --ckpt_path checkpoints/stn --bn --stn
|
||
\end{lstlisting}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/models/stn/loss_and_acc.jpg}
|
||
\end{figure}
|
||
测试得到的正确率为94.6\%。正确率比不使用stn反而有所降低,可能是设计的网络结构不够理想导致的。
|
||
|
||
\subsection{可视化}
|
||
\begin{enumerate}
|
||
\item 可视化各层卷积核:
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/filter/filter_layer_0.jpg}
|
||
\caption{第0层的卷积核}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/filter/filter_layer_1.jpg}
|
||
\caption{第1层的卷积核}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/filter/filter_layer_2.jpg}
|
||
\caption{第2层的卷积核}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/filter/filter_layer_3.jpg}
|
||
\caption{第3层的卷积核}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/filter/filter_layer_4.jpg}
|
||
\caption{第4层的卷积核}
|
||
\end{figure}
|
||
\item 可视化各层卷积层的输出特征图
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/feature/layer_0/feature_map.jpg}
|
||
\caption{第0层的卷积核特征图}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/feature/layer_1/feature_map.jpg}
|
||
\caption{第1层的卷积核特征图}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/feature/layer_2/feature_map.jpg}
|
||
\caption{第2层的卷积核特征图}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/feature/layer_3/feature_map.jpg}
|
||
\caption{第3层的卷积核特征图}
|
||
\end{figure}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/feature/layer_4/feature_map.jpg}
|
||
\caption{第4层的卷积核特征图}
|
||
\end{figure}
|
||
\item t-SNE可视化最后一层隐藏层的输出特征
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/tsne/tsne.jpg}
|
||
\end{figure}
|
||
t-SNE最后一层的隐藏层的输出证明,不同类别的输入已经被通过非线性变换分类到了不同的聚类。
|
||
\item STN学习到的变换
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=\linewidth]{img/stn/stn.jpg}
|
||
\end{figure}
|
||
网络尽可能将所有的路牌都变换到了同样的倾斜角度。
|
||
\end{enumerate}
|
||
|
||
\section{遇到的问题与解决办法}
|
||
在自定义STN网络的时候,我最开始使用了比较小的卷积核,使得STN的效果很差,使用之后会使得正确率只有80\%;之后,我分析认为STN主要要感知整个图片的倾斜以及旋转情况,需要较大的视野,因此选择了较大的卷积核,之后得到了比较理想的效果。
|
||
|
||
完成作业没有使用大模型。
|
||
% \section{自选课题工作进度汇报}
|
||
|
||
\end{document}
|
||
|
||
|
||
|
||
%%% Local Variables:
|
||
%%% mode: late\rvx
|
||
%%% TeX-master: t
|
||
%%% End:
|