245 lines
9.4 KiB
TeX
245 lines
9.4 KiB
TeX
% Homework Template
|
||
\documentclass[a4paper]{article}
|
||
\usepackage{ctex}
|
||
\usepackage{amsmath, amssymb, amsthm}
|
||
\usepackage{moreenum}
|
||
\usepackage{mathtools}
|
||
\usepackage{url}
|
||
\usepackage{bm}
|
||
\usepackage{enumitem}
|
||
\usepackage{graphicx}
|
||
\usepackage{subcaption}
|
||
\usepackage{booktabs} % toprule
|
||
\usepackage[mathcal]{eucal}
|
||
\usepackage[thehwcnt = 1]{iidef}
|
||
\usepackage{listings}
|
||
\usepackage[x11names]{xcolor}
|
||
\usepackage{float}
|
||
\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref}
|
||
|
||
\DeclareMathOperator{\arctanh}{arctanh}
|
||
% \DeclareMathOperator{\diag}{diag}
|
||
|
||
\setenumerate[1]{label=(\arabic{*})}
|
||
\setenumerate[2]{label=\arabic{*})}
|
||
|
||
\definecolor{codekeyword}{RGB}{171, 0, 216}
|
||
\definecolor{codetypename}{RGB}{29, 37, 251}
|
||
\definecolor{codevariable}{RGB}{10, 23, 126}
|
||
\definecolor{codestring}{RGB}{157, 0, 25}
|
||
\definecolor{codecomment}{RGB}{31, 129, 19}
|
||
|
||
\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code}
|
||
% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code}
|
||
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
|
||
% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings.
|
||
|
||
\lstset{
|
||
basicstyle = \small\codefont,
|
||
% ---
|
||
tabsize = 4,
|
||
showstringspaces = false,
|
||
numbers = left,
|
||
numberstyle = \cascadia,
|
||
% ---
|
||
breaklines = true,
|
||
captionpos = t,
|
||
% ---
|
||
frame = l,
|
||
flexiblecolumns,
|
||
columns = fixed,
|
||
}
|
||
|
||
\thecourseinstitute{清华大学电子工程系}
|
||
\thecoursename{\textbf{媒体与认知} \space 课堂2}
|
||
\theterm{2023-2024学年春季学期}
|
||
\hwname{作业}
|
||
\begin{document}
|
||
\courseheader
|
||
% 请在YOUR NAME处填写自己的姓名
|
||
\name{高艺轩}
|
||
\vspace{3mm}
|
||
\centerline{\textbf{\Large{理论部分}}}
|
||
|
||
\section{单选题(15分)}
|
||
% 请在?处填写答案
|
||
\subsection{\underline{B}}
|
||
|
||
\subsection{\underline{A}}
|
||
|
||
\subsection{\underline{B}}
|
||
|
||
\subsection{\underline{A}}
|
||
|
||
\subsection{\underline{B}}
|
||
|
||
\section{计算题(15 分)}
|
||
\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$,$\mathbf{z}\in R^{(n\times 1)}$,$\mathbf{W}\in R^{(m\times n)}$,$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下:
|
||
$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$
|
||
tanh表示双曲正切函数。若训练过程中的目标函数为L,且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$和$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。
|
||
}
|
||
\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。
|
||
}
|
||
|
||
\begin{proof}[解]
|
||
首先,对$i \neq j$,$\dfrac{\partial y_i}{\partial z_j} = 0$。
|
||
|
||
同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此
|
||
\[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\]
|
||
因此
|
||
\[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\]
|
||
\end{proof}
|
||
|
||
\subsubsection{请使用$\mathbf{y}$和$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$。
|
||
}
|
||
提示:$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。
|
||
|
||
\begin{proof}[解]
|
||
由链式法则
|
||
\[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\]
|
||
|
||
对于$\dfrac{\partial L}{\partial W}$,
|
||
\[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix}
|
||
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
||
\end{bmatrix}_{m \times n}\]
|
||
|
||
\begin{align*}
|
||
\frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\
|
||
& = \begin{bmatrix}
|
||
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
||
\end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}
|
||
\end{align*}
|
||
|
||
对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则
|
||
\[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\]
|
||
\end{proof}
|
||
\vspace{6mm}
|
||
\centerline{\textbf{\Large{编程部分}}}
|
||
|
||
|
||
\vspace{3mm}
|
||
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
|
||
\section{编程作业报告}
|
||
% 请在此处完成编程作业报告
|
||
完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。
|
||
\begin{enumerate}
|
||
\item 使用默认配置进行训练和测试。
|
||
\begin{enumerate}
|
||
\item 训练模型。
|
||
|
||
输入:
|
||
\lstinputlisting{codes/1.1.in.txt}
|
||
|
||
输出:
|
||
\lstinputlisting{codes/1.1.out.txt}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=0.9\linewidth]{img/1default_train.png}
|
||
\end{figure}
|
||
|
||
\item 测试模型。
|
||
|
||
输入:
|
||
\lstinputlisting{codes/1.2.in.txt}
|
||
|
||
输出:
|
||
\lstinputlisting{codes/1.2.out.txt}
|
||
\end{enumerate}
|
||
\item 调整参数、使用Adam优化器训练并测试。
|
||
\begin{enumerate}
|
||
\item 训练模型。
|
||
|
||
输入:
|
||
\lstinputlisting{codes/2.1.in.txt}
|
||
|
||
输出:
|
||
\lstinputlisting{codes/2.1.out.txt}
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=0.9\linewidth]{img/2adam_optim.png}
|
||
\end{figure}
|
||
\item 测试性能。
|
||
|
||
输入:
|
||
\lstinputlisting{codes/2.2.in.txt}
|
||
|
||
输出:
|
||
\lstinputlisting{codes/2.2.out.txt}
|
||
\end{enumerate}
|
||
|
||
\item 使用效果最佳的模型测试。
|
||
经过简单的尝试,发现使用
|
||
\lstinputlisting{codes/self_train.in.txt}
|
||
可以使测试集准确率达到88.8\%,有略微的提升。训练的loss曲线:
|
||
\begin{figure}[H]
|
||
\centering
|
||
\includegraphics[width=.9\linewidth]{img/3found_best.png}
|
||
\end{figure}
|
||
使用它进行预测:
|
||
\begin{figure}[H]
|
||
\centering
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict01.png}
|
||
\subcaption{预测:A}
|
||
\end{subfigure}
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict02.png}
|
||
\subcaption{预测:B}
|
||
\end{subfigure}
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict03.png}
|
||
\subcaption{预测:M}
|
||
\end{subfigure}
|
||
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict04.png}
|
||
\subcaption{预测:R}
|
||
\end{subfigure}
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict05.png}
|
||
\subcaption{预测:M}
|
||
\end{subfigure}
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict06.png}
|
||
\subcaption{预测:O}
|
||
\end{subfigure}
|
||
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict07.png}
|
||
\subcaption{预测:B}
|
||
\end{subfigure}
|
||
\hfill
|
||
\begin{subfigure}[b]{.3\linewidth}
|
||
\includegraphics[width=\linewidth]{img/predict/predict08.png}
|
||
\subcaption{预测:W}
|
||
\end{subfigure}
|
||
\hfill
|
||
\end{figure}
|
||
\item 遇到的问题及解决方法
|
||
\begin{enumerate}
|
||
\item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}的\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。
|
||
\item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。
|
||
\end{enumerate}
|
||
\item 建议:希望下次发布作业代码可以利用清华的git。
|
||
\end{enumerate}
|
||
|
||
|
||
|
||
|
||
% \section{自选课题开题报告}
|
||
% 请在此处介绍自选课题
|
||
|
||
\end{document}
|
||
|
||
|
||
|
||
%%% Local Variables:
|
||
%%% mode: late\rvx
|
||
%%% TeX-master: t
|
||
%%% End:
|