From 1a180f3c89ee9f61dac35376ec35f77f6e2f8f33 Mon Sep 17 00:00:00 2001 From: unlockable Date: Fri, 5 Apr 2024 17:43:17 +0800 Subject: [PATCH] theoretical part --- hw2/report/main.tex | 97 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 6 deletions(-) diff --git a/hw2/report/main.tex b/hw2/report/main.tex index f1f107d..d3801d4 100644 --- a/hw2/report/main.tex +++ b/hw2/report/main.tex @@ -49,20 +49,20 @@ \hwname{作业} \begin{document} \courseheader -\name{YOUR NAME} +\name{高艺轩} \vspace{3mm} \centerline{\textbf{\Large{理论部分}}} \section{单选题(15分)} -\subsection{\underline{?}} +\subsection{\underline{A}} -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{B}} \section{计算题(15 分)} \subsection{ @@ -78,6 +78,12 @@ W=\left[ \begin{array}{cc} \end{array}\right], b=0.04$$ } \subsubsection{请计算卷积层的输出$Y$。} +\[\begin{cases} + Y_{11} = 0.5 \times 0.1 + (-0.2) \times (-0.2) + 0.6 \times (-0.3) + 0.4 \times 0.4 + 0.04 = 0.11\\ + Y_{12} = (-0.2) \times 0.1 + 0.3 \times (-0.2) + 0.4 \times (-0.3) + (-0.1) \times 0.4 + 0.04 = -0.2\\ + Y_{21} = 0.6 \times 0.1 + 0.4 \times (-0.2) + (-0.4) \times (-0.3) + 0.5 \times 0.4 + 0.04 = 0.34\\ + Y_{22} = 0.4 \times 0.1 + (-0.1) \times (-0.2) + 0.5 \times (-0.3) + 0.2 \times 0.4 + 0.04 = 0.03 +\end{cases}\] \subsubsection{若训练过程中的目标函数为$L$,且已知$\frac{\partial L}{\partial Y}=\left[ \begin{array}{cc} 0.3 & 0.1 \\ @@ -88,6 +94,85 @@ W=\left[ \begin{array}{cc} 注:本题的计算方式不限,但需要提供计算过程以及各步骤的结果。 \vspace{6mm} +\begin{proof}[解] + 首先, + \[\frac{\partial L}{\partial Y} = \begin{bmatrix} + \frac{\partial L}{\partial Y_{11}} & \frac{\partial L}{\partial Y_{12}}\\ + \frac{\partial L}{\partial Y_{21}} & \frac{\partial L}{\partial Y_{22}} + \end{bmatrix}\] + \[\frac{\partial L}{\partial X} = \begin{bmatrix} + \frac{\partial L}{\partial X_{11}} & \frac{\partial L}{\partial X_{12}} & \frac{\partial L}{\partial X_{12}}\\ + \frac{\partial L}{\partial X_{21}} & \frac{\partial L}{\partial X_{22}} & \frac{\partial L}{\partial X_{23}}\\ + \frac{\partial L}{\partial X_{31}} & \frac{\partial L}{\partial X_{32}} & \frac{\partial L}{\partial X_{33}} + \end{bmatrix}\] + 同时,根据链式法则, + \[\frac{\partial L}{\partial X_{11}} = \frac{\partial Y_{11}}{\partial X_{11}} \frac{\partial L}{\partial Y_{11}} + \frac{\partial Y_{12}}{\partial X_{11}} \frac{\partial L}{\partial Y_{12}} + \frac{\partial Y_{21}}{\partial X_{11}} \frac{\partial L}{\partial Y_{21}} + \frac{\partial Y_{22}}{\partial X_{11}} \frac{\partial L}{\partial Y_{22}}\] + 其它的$\frac{\partial L}{X_{12}}, \dots, \frac{\partial L}{\partial X_{33}}$的计算方式也是类似的。因此, + \[\frac{\partial L}{\partial X} = \sum_{i = 1}^2 \sum_{j = 1}^2 + \begin{bmatrix} + \frac{\partial Y_{ij}}{\partial X_{11}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{13}}\\ + \vdots & \ddots & \vdots\\ + \frac{\partial Y_{ij}}{\partial X_{31}} & \cdots & \frac{\partial Y_{ij}}{\partial X_{33}} + \end{bmatrix} \frac{\partial L}{\partial Y_{ij}} = \sum_{i = 1}^2 \sum_{j = 1}^2 \frac{\partial Y_{ij}}{\partial X} \frac{L}{\partial Y_{ij}}\] + 式中的$\frac{\partial Y_{ij}}{\partial X}$与对应元是由哪几个$X$中的元素卷积得到有关,它们是$W$在$3 \times 3$矩阵中的平移。综合起来,有 + \begin{align*} + \frac{\partial L}{\partial X} & = + \begin{bmatrix} + 0.3 & 0.1 & 0\\ + -0.4 & 0.2 & 0\\ + 0 & 0 & 0 + \end{bmatrix} \frac{\partial L}{\partial Y_{11}} + + + \begin{bmatrix} + 0 & 0.3 & 0.1\\ + 0 & -0.4 & 0.2\\ + 0 & 0 & 0 + \end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\ + & \quad + + \begin{bmatrix} + 0 & 0 & 0\\ + 0.3 & 0.1 & 0\\ + -0.4 & 0.2 & 0 + \end{bmatrix} \frac{\partial L}{\partial Y_{21}} + + + \begin{bmatrix} + 0 & 0 & 0\\ + 0 & 0.3 & 0.1\\ + 0 & -0.4 & 0.2 + \end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\ + & = + \begin{bmatrix} + 0.09 & 0.03 & 0\\ + -0.12 & 0.06 & 0\\ + 0 & 0 & 0 + \end{bmatrix} + + + \begin{bmatrix} + 0 & 0.03 & 0.01\\ + 0 & -0.04 & 0.02\\ + 0 & 0 & 0 + \end{bmatrix}\\ + & \quad + + \begin{bmatrix} + 0 & 0 & 0\\ + -0.12 & -0.04 & 0\\ + 0.16 & -0.08 & 0 + \end{bmatrix} + + + \begin{bmatrix} + 0 & 0 & 0\\ + 0 & 0.06 & 0.02\\ + 0 & -0.08 & 0.04 + \end{bmatrix}\\ + & = + \begin{bmatrix} + 0.09 & 0.06 & 0.01\\ + -0.24 & 0.04 & 0.04\\ + 0.16 & -0.16 & 0.04 + \end{bmatrix} \qedhere + \end{align*} +\end{proof} + \centerline{\textbf{\Large{编程部分}}} \vspace{3mm}