% Homework template for Inference and Information % UPDATE: September 26, 2017 by Xiangxiang \documentclass[a4paper]{article} \usepackage{ctex} \usepackage{amsmath, amssymb, amsthm} \usepackage{moreenum} \usepackage{mathtools} \usepackage{url} \usepackage{bm} \usepackage{enumitem} \usepackage{graphicx} \usepackage{listings} \usepackage{fontspec} \usepackage{xcolor} \usepackage{float} % \usepackage{color} \newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}] \newfontfamily\cascadia{Cascadia Code} \lstset{ basicstyle = \small\codefont, % --- tabsize = 4, showstringspaces = false, numbers = left, numberstyle = \codefont, % --- breaklines = true, captionpos = t, % --- frame = l, flexiblecolumns, } \lstdefinestyle{Python}{ language = Python, % 语言选Python keywordstyle = \color{blue}, keywordstyle = [2] \color{teal}, stringstyle = \color{orange!80!black}, commentstyle = \color{red}, identifierstyle = \color{blue!80!white}, } \lstdefinestyle{Bash}{ language = bash } \usepackage{subcaption} \usepackage{booktabs} % toprule \usepackage[mathcal]{eucal} \usepackage[thehwcnt = 2]{iidef} \allowdisplaybreaks \thecourseinstitute{清华大学电子工程系} \thecoursename{\textbf{媒体与认知} \space 课堂2} \theterm{2023-2024学年春季学期} \hwname{作业} \begin{document} \courseheader \name{高艺轩} \vspace{3mm} \centerline{\textbf{\Large{理论部分}}} \section{单选题(15分)} \subsection{\underline{C}} \subsection{\underline{D}} \subsection{\underline{D}} \subsection{\underline{C}} \subsection{\underline{B}} \section{计算题(15 分)} \subsection{ 已知某卷积层的输入为$X$(该批量中样本数目为1,输入样本通道数为1),采用一个卷积核$W$,即卷积输出通道数为1,卷积核尺寸为$2\times 2$,卷积的步长为1,无边界延拓,偏置量为$b$: $$X=\left[ \begin{array}{ccc} 0.5 & -0.2 & 0.3 \\ 0.6 & 0.4 & -0.1 \\ -0.4 & 0.5 & 0.2 \end{array}\right], W=\left[ \begin{array}{cc} 0.1 & -0.2 \\ -0.3 & 0.4 \end{array}\right], b=0.04$$ } \subsubsection{请计算卷积层的输出$Y$。} \[\begin{cases} Y_{11} = 0.5 \times 0.1 + (-0.2) \times (-0.2) + 0.6 \times (-0.3) + 0.4 \times 0.4 + 0.04 = 0.11\\ Y_{12} = (-0.2) \times 0.1 + 0.3 \times (-0.2) + 0.4 \times (-0.3) + (-0.1) \times 0.4 + 0.04 = -0.2\\ Y_{21} = 0.6 \times 0.1 + 0.4 \times (-0.2) + (-0.4) \times (-0.3) + 0.5 \times 0.4 + 0.04 = 0.34\\ Y_{22} = 0.4 \times 0.1 + (-0.1) \times (-0.2) + 0.5 \times (-0.3) + 0.2 \times 0.4 + 0.04 = 0.03 \end{cases}\] \subsubsection{若训练过程中的目标函数为$L$,且已知$\frac{\partial L}{\partial Y}=\left[ \begin{array}{cc} 0.3 & 0.1 \\ -0.4 & 0.2 \end{array} \right]$,请计算$\frac{\partial L}{\partial X}$。 } 注:本题的计算方式不限,但需要提供计算过程以及各步骤的结果。 \vspace{6mm} \begin{proof}[解] 首先, \[\frac{\partial L}{\partial Y} = \begin{bmatrix} \frac{\partial L}{\partial Y_{11}} & \frac{\partial L}{\partial Y_{12}}\\ \frac{\partial L}{\partial Y_{21}} & \frac{\partial L}{\partial Y_{22}} \end{bmatrix}\] \[\frac{\partial L}{\partial X} = \begin{bmatrix} \frac{\partial L}{\partial X_{11}} & \frac{\partial L}{\partial X_{12}} & \frac{\partial L}{\partial X_{12}}\\ \frac{\partial L}{\partial X_{21}} & \frac{\partial L}{\partial X_{22}} & \frac{\partial L}{\partial X_{23}}\\ \frac{\partial L}{\partial X_{31}} & \frac{\partial L}{\partial X_{32}} & \frac{\partial L}{\partial X_{33}} \end{bmatrix}\] 同时,根据链式法则, \[\frac{\partial L}{\partial X_{11}} = \frac{\partial Y_{11}}{\partial X_{11}} \frac{\partial L}{\partial Y_{11}} + \frac{\partial Y_{12}}{\partial X_{11}} \frac{\partial L}{\partial Y_{12}} + \frac{\partial Y_{21}}{\partial X_{11}} \frac{\partial L}{\partial Y_{21}} + \frac{\partial Y_{22}}{\partial X_{11}} \frac{\partial L}{\partial Y_{22}}\] 其它的$\frac{\partial L}{X_{12}}, \dots, \frac{\partial L}{\partial X_{33}}$的计算方式也是类似的。因此, \[\frac{\partial L}{\partial X} = \sum_{i = 1}^2 \sum_{j = 1}^2 \begin{bmatrix} \frac{\partial Y_{ij}}{\partial X_{11}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{13}}\\ \vdots & \ddots & \vdots\\ \frac{\partial Y_{ij}}{\partial X_{31}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{33}} \end{bmatrix} \frac{\partial L}{\partial Y_{ij}} = \sum_{i = 1}^2 \sum_{j = 1}^2 \frac{\partial Y_{ij}}{\partial X} \frac{L}{\partial Y_{ij}}\] 式中的$\frac{\partial Y_{ij}}{\partial X}$与对应元是由哪几个$X$中的元素卷积得到有关,它们是$W$在$3 \times 3$矩阵中的平移。综合起来,有 \begin{align*} \frac{\partial L}{\partial X} & = \begin{bmatrix} 0.1 & -0.2 & 0\\ -0.3 & 0.4 & 0\\ 0 & 0 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{11}} + \begin{bmatrix} 0 & 0.1 & -0.2\\ 0 & -0.3 & 0.4\\ 0 & 0 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\ & \quad + \begin{bmatrix} 0 & 0 & 0\\ 0.1 & -0.2 & 0\\ -0.3 & 0.4 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{21}} + \begin{bmatrix} 0 & 0 & 0\\ 0 & 0.1 & -0.2\\ 0 & -0.3 & 0.4 \end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\ & = \mathrm{zeropad}(W) \ast \frac{\partial L}{\partial Y}\\ & = \begin{bmatrix} 0.03 & -0.06 & 0\\ -0.09 & 0.12 & 0\\ 0 & 0 & 0 \end{bmatrix} + \begin{bmatrix} 0 & 0.01 & -0.02\\ 0 & -0.03 & 0.04\\ 0 & 0 & 0 \end{bmatrix}\\ & \quad + \begin{bmatrix} 0 & 0 & 0\\ -0.04 & 0.08 & 0\\ 0.12 & -0.16 & 0 \end{bmatrix} + \begin{bmatrix} 0 & 0 & 0\\ 0 & 0.02 & -0.04\\ 0 & -0.06 & 0.08 \end{bmatrix}\\ & = \begin{bmatrix} 0.03 & -0.05 & -0.02\\ -0.13 & 0.19 & 0\\ 0.12 & -0.22 & 0.08 \end{bmatrix} \qedhere \end{align*} \end{proof} \centerline{\textbf{\Large{编程部分}}} \vspace{3mm} % 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成 \section{编程作业报告} \subsection{探究batch normalization和dropout的作用} \begin{enumerate} \item 使用默认配置训练模型: \begin{lstlisting}[style=Bash] python train.py --ckpt_path checkpoints/default \end{lstlisting} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/models/default/loss_and_acc.jpg} \end{figure} 之后测试得到的正确率为90.8\%。 \item 启用batch normalization: \begin{lstlisting}[style=Bash] python train.py --ckpt_path checkpoints/bn --bn \end{lstlisting} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/models/bn/loss_and_acc.jpg} \end{figure} 测试得到的正确率为95.9\%。 \item 启用dropout并设置概率为0.3: \begin{lstlisting}[style=Bash] python train.py --ckpt_path checkpoints/dropout --dropout 0.3 \end{lstlisting} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/models/dropout/loss_and_acc.jpg} \end{figure} 测试后得到的正确率为94.1\%。 \end{enumerate} \subsection{探究数据增广的作用} 考虑到在不同的视角下,交通标志可能有旋转或者变形,因此使用 \begin{lstlisting}[style=Python] transforms.RandomAffine(degrees=30,shear=10) \end{lstlisting} 来对数据进行随机的形变与旋转;另外,考虑到可能在不同的光线条件下导致对比度变化,因此使用 \begin{lstlisting}[style=Python] transforms.RandomAutocontrast() \end{lstlisting} 来对数据进行随机的对比度调整。 执行 \begin{lstlisting}[style=Bash] python unit_test.py data_loader \end{lstlisting} 得到 \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/augmentation.jpg} \caption{数据增广后的结果} \end{figure} 训练最优模型使用的命令为 \begin{lstlisting}[style=Bash] python train.py --ckpt_path checkpoints/bn_aug --bn --augment --epoch 20 \end{lstlisting} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/models/bn_aug/loss_and_acc.jpg} \end{figure} 测试得到的正确率为96.0\%,略微高于不使用数据增强时的结果。 \subsection{探究空间变换网络(STN)的作用} 运行 \begin{lstlisting}[style=Bash] python train.py --ckpt_path checkpoints/stn --bn --stn \end{lstlisting} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/models/stn/loss_and_acc.jpg} \end{figure} 测试得到的正确率为94.6\%。正确率比不使用stn反而有所降低,可能是设计的网络结构不够理想导致的。 \subsection{可视化} \begin{enumerate} \item 可视化各层卷积核: \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/filter/filter_layer_0.jpg} \caption{第0层的卷积核} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/filter/filter_layer_1.jpg} \caption{第1层的卷积核} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/filter/filter_layer_2.jpg} \caption{第2层的卷积核} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/filter/filter_layer_3.jpg} \caption{第3层的卷积核} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/filter/filter_layer_4.jpg} \caption{第4层的卷积核} \end{figure} \item 可视化各层卷积层的输出特征图 \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/feature/layer_0/feature_map.jpg} \caption{第0层的卷积核特征图} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/feature/layer_1/feature_map.jpg} \caption{第1层的卷积核特征图} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/feature/layer_2/feature_map.jpg} \caption{第2层的卷积核特征图} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/feature/layer_3/feature_map.jpg} \caption{第3层的卷积核特征图} \end{figure} \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/feature/layer_4/feature_map.jpg} \caption{第4层的卷积核特征图} \end{figure} \item t-SNE可视化最后一层隐藏层的输出特征 \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/tsne/tsne.jpg} \end{figure} t-SNE最后一层的隐藏层的输出证明,不同类别的输入已经被通过非线性变换分类到了不同的聚类。 \item STN学习到的变换 \begin{figure}[H] \centering \includegraphics[width=\linewidth]{img/stn/stn.jpg} \end{figure} 网络尽可能将所有的路牌都变换到了同样的倾斜角度。 \end{enumerate} \section{遇到的问题与解决办法} 在自定义STN网络的时候,我最开始使用了比较小的卷积核,使得STN的效果很差,使用之后会使得正确率只有80\%;之后,我分析认为STN主要要感知整个图片的倾斜以及旋转情况,需要较大的视野,因此选择了较大的卷积核,之后得到了比较理想的效果。 完成作业没有使用大模型。 % \section{自选课题工作进度汇报} \end{document} %%% Local Variables: %%% mode: late\rvx %%% TeX-master: t %%% End: