6 Commits

Author SHA1 Message Date
unlockable
c850f38778 Homework3 Submit 2024-05-18 16:23:40 +08:00
unlockable
820f679067 SVM and PCA not working 2024-05-18 00:12:06 +08:00
unlockable
81de7b1d58 feat(hw3): Copy file from hw2 2024-05-16 17:41:27 +08:00
unlockable
b741c9d08e feat(hw3): Non program part of the homework 2024-05-16 17:38:56 +08:00
unlockable
8b657be441 Mac Sync 2024-05-15 20:05:18 +08:00
unlockable
4bc3f77879 TA release homework3. 2024-05-01 17:13:51 +08:00
44 changed files with 3117 additions and 1318 deletions

26
.gitignore vendored
View File

@@ -1,12 +1,14 @@
*.zip
__pycache__/
*.pth
*.log
*.aux
*.synctex.gz
*.synctex.gz(buzy)
*.out
*.pdf
.DS_Store
hw2/code/checkpoints/
hw2/code/visualized/
*.zip
__pycache__/
*.pth
*.log
*.aux
*.synctex.gz
*.synctex.gz(buzy)
*.out
*.pdf
.DS_Store
hw2/code/checkpoints/
hw2/code/visualized/
hw3/code/data/
hw3/code/checkpoints/

View File

@@ -1,4 +1,4 @@
{
"python.analysis.typeCheckingMode": "basic",
"python.analysis.autoImportCompletions": true
{
"python.analysis.typeCheckingMode": "basic",
"python.analysis.autoImportCompletions": true
}

View File

@@ -1,56 +1,56 @@
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = 6.678
Epoch 04: loss = 4.361
Epoch 05: loss = 3.110
Epoch 06: loss = 2.099
Epoch 07: loss = 1.698
Epoch 08: loss = 1.320
Epoch 09: loss = 0.970
Epoch 10: loss = 0.891
Epoch 10: validation accuracy = 66.0%
Epoch 11: loss = 0.817
Epoch 12: loss = 0.723
Epoch 13: loss = 0.512
Epoch 14: loss = 0.353
Epoch 15: loss = 0.202
Epoch 16: loss = 0.182
Epoch 17: loss = 0.184
Epoch 18: loss = 0.191
Epoch 19: loss = 0.175
Epoch 20: loss = 0.166
Epoch 20: validation accuracy = 68.0%
Epoch 21: loss = 0.146
Epoch 22: loss = 0.105
Epoch 23: loss = 0.109
Epoch 24: loss = 0.074
Epoch 25: loss = 0.097
Epoch 26: loss = 0.047
Epoch 27: loss = 0.038
Epoch 28: loss = 0.037
Epoch 29: loss = 0.024
Epoch 30: loss = 0.021
Epoch 30: validation accuracy = 68.8%
Epoch 31: loss = 0.019
Epoch 32: loss = 0.024
Epoch 33: loss = 0.023
Epoch 34: loss = 0.014
Epoch 35: loss = 0.013
Epoch 36: loss = 0.012
Epoch 37: loss = 0.011
Epoch 38: loss = 0.013
Epoch 39: loss = 0.013
Epoch 40: loss = 0.016
Epoch 40: validation accuracy = 70.5%
Epoch 41: loss = 0.015
Epoch 42: loss = 0.009
Epoch 43: loss = 0.011
Epoch 44: loss = 0.008
Epoch 45: loss = 0.008
Epoch 46: loss = 0.010
Epoch 47: loss = 0.009
Epoch 48: loss = 0.007
Epoch 49: loss = 0.007
Epoch 50: loss = 0.010
Epoch 50: validation accuracy = 70.5%
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = 6.678
Epoch 04: loss = 4.361
Epoch 05: loss = 3.110
Epoch 06: loss = 2.099
Epoch 07: loss = 1.698
Epoch 08: loss = 1.320
Epoch 09: loss = 0.970
Epoch 10: loss = 0.891
Epoch 10: validation accuracy = 66.0%
Epoch 11: loss = 0.817
Epoch 12: loss = 0.723
Epoch 13: loss = 0.512
Epoch 14: loss = 0.353
Epoch 15: loss = 0.202
Epoch 16: loss = 0.182
Epoch 17: loss = 0.184
Epoch 18: loss = 0.191
Epoch 19: loss = 0.175
Epoch 20: loss = 0.166
Epoch 20: validation accuracy = 68.0%
Epoch 21: loss = 0.146
Epoch 22: loss = 0.105
Epoch 23: loss = 0.109
Epoch 24: loss = 0.074
Epoch 25: loss = 0.097
Epoch 26: loss = 0.047
Epoch 27: loss = 0.038
Epoch 28: loss = 0.037
Epoch 29: loss = 0.024
Epoch 30: loss = 0.021
Epoch 30: validation accuracy = 68.8%
Epoch 31: loss = 0.019
Epoch 32: loss = 0.024
Epoch 33: loss = 0.023
Epoch 34: loss = 0.014
Epoch 35: loss = 0.013
Epoch 36: loss = 0.012
Epoch 37: loss = 0.011
Epoch 38: loss = 0.013
Epoch 39: loss = 0.013
Epoch 40: loss = 0.016
Epoch 40: validation accuracy = 70.5%
Epoch 41: loss = 0.015
Epoch 42: loss = 0.009
Epoch 43: loss = 0.011
Epoch 44: loss = 0.008
Epoch 45: loss = 0.008
Epoch 46: loss = 0.010
Epoch 47: loss = 0.009
Epoch 48: loss = 0.007
Epoch 49: loss = 0.007
Epoch 50: loss = 0.010
Epoch 50: validation accuracy = 70.5%
Model saved in ./saved_models/default.pth

View File

@@ -1,2 +1,2 @@
[Info] Load model from .\saved_models\default.pth
[Info] Load model from .\saved_models\default.pth
[Info] Test accuracy = 72.0%

View File

@@ -1,2 +1,2 @@
[Info] Load model from .\saved_models\adam_optim.pth
[Info] Load model from .\saved_models\adam_optim.pth
[Info] Test accuracy = 85.0%

View File

@@ -1,56 +1,56 @@
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = inf
Epoch 04: loss = inf
Epoch 05: loss = inf
Epoch 06: loss = inf
Epoch 07: loss = inf
Epoch 08: loss = inf
Epoch 09: loss = 3.250
Epoch 10: loss = 2.567
Epoch 10: validation accuracy = 59.0%
Epoch 11: loss = 1.963
Epoch 12: loss = 1.558
Epoch 13: loss = 1.320
Epoch 14: loss = 0.911
Epoch 15: loss = 0.808
Epoch 16: loss = 0.932
Epoch 17: loss = 0.861
Epoch 18: loss = 0.748
Epoch 19: loss = 0.783
Epoch 20: loss = 0.809
Epoch 20: validation accuracy = 65.5%
Epoch 21: loss = 0.678
Epoch 22: loss = 0.757
Epoch 23: loss = 0.747
Epoch 24: loss = 0.660
Epoch 25: loss = 0.536
Epoch 26: loss = 0.506
Epoch 27: loss = 0.577
Epoch 28: loss = 0.600
Epoch 29: loss = 0.681
Epoch 30: loss = 0.604
Epoch 30: validation accuracy = 68.0%
Epoch 31: loss = 0.552
Epoch 32: loss = 0.671
Epoch 33: loss = 0.604
Epoch 34: loss = 0.600
Epoch 35: loss = 0.818
Epoch 36: loss = 0.659
Epoch 37: loss = 0.375
Epoch 38: loss = 0.380
Epoch 39: loss = 0.418
Epoch 40: loss = 0.431
Epoch 40: validation accuracy = 73.5%
Epoch 41: loss = 0.551
Epoch 42: loss = 0.488
Epoch 43: loss = 0.350
Epoch 44: loss = 0.287
Epoch 45: loss = 0.294
Epoch 46: loss = 0.463
Epoch 47: loss = 0.438
Epoch 48: loss = 0.392
Epoch 49: loss = 0.325
Epoch 50: loss = 0.332
Epoch 50: validation accuracy = 80.8%
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = inf
Epoch 04: loss = inf
Epoch 05: loss = inf
Epoch 06: loss = inf
Epoch 07: loss = inf
Epoch 08: loss = inf
Epoch 09: loss = 3.250
Epoch 10: loss = 2.567
Epoch 10: validation accuracy = 59.0%
Epoch 11: loss = 1.963
Epoch 12: loss = 1.558
Epoch 13: loss = 1.320
Epoch 14: loss = 0.911
Epoch 15: loss = 0.808
Epoch 16: loss = 0.932
Epoch 17: loss = 0.861
Epoch 18: loss = 0.748
Epoch 19: loss = 0.783
Epoch 20: loss = 0.809
Epoch 20: validation accuracy = 65.5%
Epoch 21: loss = 0.678
Epoch 22: loss = 0.757
Epoch 23: loss = 0.747
Epoch 24: loss = 0.660
Epoch 25: loss = 0.536
Epoch 26: loss = 0.506
Epoch 27: loss = 0.577
Epoch 28: loss = 0.600
Epoch 29: loss = 0.681
Epoch 30: loss = 0.604
Epoch 30: validation accuracy = 68.0%
Epoch 31: loss = 0.552
Epoch 32: loss = 0.671
Epoch 33: loss = 0.604
Epoch 34: loss = 0.600
Epoch 35: loss = 0.818
Epoch 36: loss = 0.659
Epoch 37: loss = 0.375
Epoch 38: loss = 0.380
Epoch 39: loss = 0.418
Epoch 40: loss = 0.431
Epoch 40: validation accuracy = 73.5%
Epoch 41: loss = 0.551
Epoch 42: loss = 0.488
Epoch 43: loss = 0.350
Epoch 44: loss = 0.287
Epoch 45: loss = 0.294
Epoch 46: loss = 0.463
Epoch 47: loss = 0.438
Epoch 48: loss = 0.392
Epoch 49: loss = 0.325
Epoch 50: loss = 0.332
Epoch 50: validation accuracy = 80.8%
Model saved in .\saved_models\adam_optim_cuda.pth

View File

@@ -1,2 +1,2 @@
[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
[Info] Test accuracy = 88.8%

View File

@@ -1,111 +1,111 @@
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = inf
Epoch 04: loss = inf
Epoch 05: loss = inf
Epoch 06: loss = inf
Epoch 07: loss = inf
Epoch 08: loss = inf
Epoch 09: loss = inf
Epoch 10: loss = inf
Epoch 10: validation accuracy = 40.2%
Epoch 11: loss = inf
Epoch 12: loss = inf
Epoch 13: loss = inf
Epoch 14: loss = inf
Epoch 15: loss = inf
Epoch 16: loss = inf
Epoch 17: loss = 2.360
Epoch 18: loss = 2.086
Epoch 19: loss = 1.684
Epoch 20: loss = 1.453
Epoch 20: validation accuracy = 53.0%
Epoch 21: loss = 1.174
Epoch 22: loss = 1.046
Epoch 23: loss = 0.859
Epoch 24: loss = 0.740
Epoch 25: loss = 0.663
Epoch 26: loss = 0.495
Epoch 27: loss = 0.566
Epoch 28: loss = 0.521
Epoch 29: loss = 0.470
Epoch 30: loss = 0.363
Epoch 30: validation accuracy = 59.0%
Epoch 31: loss = 0.365
Epoch 32: loss = 0.305
Epoch 33: loss = 0.333
Epoch 34: loss = 0.293
Epoch 35: loss = 0.191
Epoch 36: loss = 0.295
Epoch 37: loss = 0.275
Epoch 38: loss = 0.461
Epoch 39: loss = 0.509
Epoch 40: loss = 0.298
Epoch 40: validation accuracy = 65.2%
Epoch 41: loss = 0.186
Epoch 42: loss = 0.395
Epoch 43: loss = 0.323
Epoch 44: loss = 0.309
Epoch 45: loss = 0.199
Epoch 46: loss = 0.285
Epoch 47: loss = 0.290
Epoch 48: loss = 0.302
Epoch 49: loss = 0.235
Epoch 50: loss = 0.190
Epoch 50: validation accuracy = 71.2%
Epoch 51: loss = 0.294
Epoch 52: loss = 0.311
Epoch 53: loss = 0.254
Epoch 54: loss = 0.289
Epoch 55: loss = 0.264
Epoch 56: loss = 0.213
Epoch 57: loss = 0.166
Epoch 58: loss = 0.218
Epoch 59: loss = 0.231
Epoch 60: loss = 0.283
Epoch 60: validation accuracy = 74.8%
Epoch 61: loss = 0.324
Epoch 62: loss = 0.245
Epoch 63: loss = 0.277
Epoch 64: loss = 0.286
Epoch 65: loss = 0.255
Epoch 66: loss = 0.263
Epoch 67: loss = 0.272
Epoch 68: loss = 0.272
Epoch 69: loss = 0.260
Epoch 70: loss = 0.271
Epoch 70: validation accuracy = 79.0%
Epoch 71: loss = 0.310
Epoch 72: loss = 0.301
Epoch 73: loss = 0.305
Epoch 74: loss = 0.311
Epoch 75: loss = 0.329
Epoch 76: loss = 0.295
Epoch 77: loss = 0.300
Epoch 78: loss = 0.316
Epoch 79: loss = 0.326
Epoch 80: loss = 0.352
Epoch 80: validation accuracy = 77.5%
Epoch 81: loss = 0.344
Epoch 82: loss = 0.326
Epoch 83: loss = 0.326
Epoch 84: loss = 0.335
Epoch 85: loss = 0.342
Epoch 86: loss = 0.361
Epoch 87: loss = 0.337
Epoch 88: loss = 0.339
Epoch 89: loss = 0.339
Epoch 90: loss = 0.341
Epoch 90: validation accuracy = 82.8%
Epoch 91: loss = 0.350
Epoch 92: loss = 0.359
Epoch 93: loss = 0.352
Epoch 94: loss = 0.363
Epoch 95: loss = 0.347
Epoch 96: loss = 0.341
Epoch 97: loss = 0.336
Epoch 98: loss = 0.348
Epoch 99: loss = 0.365
Epoch 100: loss = 0.350
Epoch 100: validation accuracy = 85.2%
Epoch 01: loss = inf
Epoch 02: loss = inf
Epoch 03: loss = inf
Epoch 04: loss = inf
Epoch 05: loss = inf
Epoch 06: loss = inf
Epoch 07: loss = inf
Epoch 08: loss = inf
Epoch 09: loss = inf
Epoch 10: loss = inf
Epoch 10: validation accuracy = 40.2%
Epoch 11: loss = inf
Epoch 12: loss = inf
Epoch 13: loss = inf
Epoch 14: loss = inf
Epoch 15: loss = inf
Epoch 16: loss = inf
Epoch 17: loss = 2.360
Epoch 18: loss = 2.086
Epoch 19: loss = 1.684
Epoch 20: loss = 1.453
Epoch 20: validation accuracy = 53.0%
Epoch 21: loss = 1.174
Epoch 22: loss = 1.046
Epoch 23: loss = 0.859
Epoch 24: loss = 0.740
Epoch 25: loss = 0.663
Epoch 26: loss = 0.495
Epoch 27: loss = 0.566
Epoch 28: loss = 0.521
Epoch 29: loss = 0.470
Epoch 30: loss = 0.363
Epoch 30: validation accuracy = 59.0%
Epoch 31: loss = 0.365
Epoch 32: loss = 0.305
Epoch 33: loss = 0.333
Epoch 34: loss = 0.293
Epoch 35: loss = 0.191
Epoch 36: loss = 0.295
Epoch 37: loss = 0.275
Epoch 38: loss = 0.461
Epoch 39: loss = 0.509
Epoch 40: loss = 0.298
Epoch 40: validation accuracy = 65.2%
Epoch 41: loss = 0.186
Epoch 42: loss = 0.395
Epoch 43: loss = 0.323
Epoch 44: loss = 0.309
Epoch 45: loss = 0.199
Epoch 46: loss = 0.285
Epoch 47: loss = 0.290
Epoch 48: loss = 0.302
Epoch 49: loss = 0.235
Epoch 50: loss = 0.190
Epoch 50: validation accuracy = 71.2%
Epoch 51: loss = 0.294
Epoch 52: loss = 0.311
Epoch 53: loss = 0.254
Epoch 54: loss = 0.289
Epoch 55: loss = 0.264
Epoch 56: loss = 0.213
Epoch 57: loss = 0.166
Epoch 58: loss = 0.218
Epoch 59: loss = 0.231
Epoch 60: loss = 0.283
Epoch 60: validation accuracy = 74.8%
Epoch 61: loss = 0.324
Epoch 62: loss = 0.245
Epoch 63: loss = 0.277
Epoch 64: loss = 0.286
Epoch 65: loss = 0.255
Epoch 66: loss = 0.263
Epoch 67: loss = 0.272
Epoch 68: loss = 0.272
Epoch 69: loss = 0.260
Epoch 70: loss = 0.271
Epoch 70: validation accuracy = 79.0%
Epoch 71: loss = 0.310
Epoch 72: loss = 0.301
Epoch 73: loss = 0.305
Epoch 74: loss = 0.311
Epoch 75: loss = 0.329
Epoch 76: loss = 0.295
Epoch 77: loss = 0.300
Epoch 78: loss = 0.316
Epoch 79: loss = 0.326
Epoch 80: loss = 0.352
Epoch 80: validation accuracy = 77.5%
Epoch 81: loss = 0.344
Epoch 82: loss = 0.326
Epoch 83: loss = 0.326
Epoch 84: loss = 0.335
Epoch 85: loss = 0.342
Epoch 86: loss = 0.361
Epoch 87: loss = 0.337
Epoch 88: loss = 0.339
Epoch 89: loss = 0.339
Epoch 90: loss = 0.341
Epoch 90: validation accuracy = 82.8%
Epoch 91: loss = 0.350
Epoch 92: loss = 0.359
Epoch 93: loss = 0.352
Epoch 94: loss = 0.363
Epoch 95: loss = 0.347
Epoch 96: loss = 0.341
Epoch 97: loss = 0.336
Epoch 98: loss = 0.348
Epoch 99: loss = 0.365
Epoch 100: loss = 0.350
Epoch 100: validation accuracy = 85.2%
Model saved in .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth

View File

@@ -1,244 +1,244 @@
% Homework Template
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 1]{iidef}
\usepackage{listings}
\usepackage[x11names]{xcolor}
\usepackage{float}
\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref}
\DeclareMathOperator{\arctanh}{arctanh}
% \DeclareMathOperator{\diag}{diag}
\setenumerate[1]{label=(\arabic{*})}
\setenumerate[2]{label=\arabic{*})}
\definecolor{codekeyword}{RGB}{171, 0, 216}
\definecolor{codetypename}{RGB}{29, 37, 251}
\definecolor{codevariable}{RGB}{10, 23, 126}
\definecolor{codestring}{RGB}{157, 0, 25}
\definecolor{codecomment}{RGB}{31, 129, 19}
\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code}
% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code}
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings.
\lstset{
basicstyle = \small\codefont,
% ---
tabsize = 4,
showstringspaces = false,
numbers = left,
numberstyle = \cascadia,
% ---
breaklines = true,
captionpos = t,
% ---
frame = l,
flexiblecolumns,
columns = fixed,
}
\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知} \space 课堂2}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
% 请在YOUR NAME处填写自己的姓名
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}
\section{单选题15分}
% 请在?处填写答案
\subsection{\underline{B}}
\subsection{\underline{A}}
\subsection{\underline{B}}
\subsection{\underline{A}}
\subsection{\underline{B}}
\section{计算题15 分)}
\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$$\mathbf{z}\in R^{(n\times 1)}$$\mathbf{W}\in R^{(m\times n)}$$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下:
$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$
tanh表示双曲正切函数。若训练过程中的目标函数为L且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。
}
\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。
}
\begin{proof}[解]
首先,对$i \neq j$$\dfrac{\partial y_i}{\partial z_j} = 0$
同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此
\[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\]
因此
\[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\]
\end{proof}
\subsubsection{请使用$\mathbf{y}$$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$$\frac{\partial L}{\partial \mathbf{W}}$$\frac{\partial L}{\partial \mathbf{b}}$
}
提示:$\frac{\partial L}{\partial \mathbf{x}}$$\frac{\partial L}{\partial \mathbf{W}}$$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。
\begin{proof}[解]
由链式法则
\[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\]
对于$\dfrac{\partial L}{\partial W}$
\[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix}
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
\end{bmatrix}_{m \times n}\]
\begin{align*}
\frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\
& = \begin{bmatrix}
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
\end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}
\end{align*}
对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则
\[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\]
\end{proof}
\vspace{6mm}
\centerline{\textbf{\Large{编程部分}}}
\vspace{3mm}
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
\section{编程作业报告}
% 请在此处完成编程作业报告
完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。
\begin{enumerate}
\item 使用默认配置进行训练和测试。
\begin{enumerate}
\item 训练模型。
输入:
\lstinputlisting{codes/1.1.in.txt}
输出:
\lstinputlisting{codes/1.1.out.txt}
\begin{figure}[H]
\centering
\includegraphics[width=0.9\linewidth]{img/1default_train.png}
\end{figure}
\item 测试模型。
输入:
\lstinputlisting{codes/1.2.in.txt}
输出:
\lstinputlisting{codes/1.2.out.txt}
\end{enumerate}
\item 调整参数、使用Adam优化器训练并测试。
\begin{enumerate}
\item 训练模型。
输入:
\lstinputlisting{codes/2.1.in.txt}
输出:
\lstinputlisting{codes/2.1.out.txt}
\begin{figure}[H]
\centering
\includegraphics[width=0.9\linewidth]{img/2adam_optim.png}
\end{figure}
\item 测试性能。
输入:
\lstinputlisting{codes/2.2.in.txt}
输出:
\lstinputlisting{codes/2.2.out.txt}
\end{enumerate}
\item 使用效果最佳的模型测试。
经过简单的尝试,发现使用
\lstinputlisting{codes/self_train.in.txt}
可以使测试集准确率达到88.8\%有略微的提升。训练的loss曲线
\begin{figure}[H]
\centering
\includegraphics[width=.9\linewidth]{img/3found_best.png}
\end{figure}
使用它进行预测:
\begin{figure}[H]
\centering
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict01.png}
\subcaption{预测A}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict02.png}
\subcaption{预测B}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict03.png}
\subcaption{预测M}
\end{subfigure}
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict04.png}
\subcaption{预测R}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict05.png}
\subcaption{预测M}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict06.png}
\subcaption{预测O}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict07.png}
\subcaption{预测B}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict08.png}
\subcaption{预测W}
\end{subfigure}
\hfill
\end{figure}
\item 遇到的问题及解决方法
\begin{enumerate}
\item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。
\item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。
\end{enumerate}
\item 建议希望下次发布作业代码可以利用清华的git。
\end{enumerate}
% \section{自选课题开题报告}
% 请在此处介绍自选课题
\end{document}
%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End:
% Homework Template
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 1]{iidef}
\usepackage{listings}
\usepackage[x11names]{xcolor}
\usepackage{float}
\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref}
\DeclareMathOperator{\arctanh}{arctanh}
% \DeclareMathOperator{\diag}{diag}
\setenumerate[1]{label=(\arabic{*})}
\setenumerate[2]{label=\arabic{*})}
\definecolor{codekeyword}{RGB}{171, 0, 216}
\definecolor{codetypename}{RGB}{29, 37, 251}
\definecolor{codevariable}{RGB}{10, 23, 126}
\definecolor{codestring}{RGB}{157, 0, 25}
\definecolor{codecomment}{RGB}{31, 129, 19}
\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code}
% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code}
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings.
\lstset{
basicstyle = \small\codefont,
% ---
tabsize = 4,
showstringspaces = false,
numbers = left,
numberstyle = \cascadia,
% ---
breaklines = true,
captionpos = t,
% ---
frame = l,
flexiblecolumns,
columns = fixed,
}
\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知} \space 课堂2}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
% 请在YOUR NAME处填写自己的姓名
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}
\section{单选题15分}
% 请在?处填写答案
\subsection{\underline{B}}
\subsection{\underline{A}}
\subsection{\underline{B}}
\subsection{\underline{A}}
\subsection{\underline{B}}
\section{计算题15 分)}
\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$$\mathbf{z}\in R^{(n\times 1)}$$\mathbf{W}\in R^{(m\times n)}$$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下:
$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$
tanh表示双曲正切函数。若训练过程中的目标函数为L且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。
}
\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。
}
\begin{proof}[解]
首先,对$i \neq j$$\dfrac{\partial y_i}{\partial z_j} = 0$
同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此
\[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\]
因此
\[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\]
\end{proof}
\subsubsection{请使用$\mathbf{y}$$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$$\frac{\partial L}{\partial \mathbf{W}}$$\frac{\partial L}{\partial \mathbf{b}}$
}
提示:$\frac{\partial L}{\partial \mathbf{x}}$$\frac{\partial L}{\partial \mathbf{W}}$$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。
\begin{proof}[解]
由链式法则
\[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\]
对于$\dfrac{\partial L}{\partial W}$
\[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix}
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
\end{bmatrix}_{m \times n}\]
\begin{align*}
\frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\
& = \begin{bmatrix}
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
\end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}
\end{align*}
对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则
\[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\]
\end{proof}
\vspace{6mm}
\centerline{\textbf{\Large{编程部分}}}
\vspace{3mm}
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
\section{编程作业报告}
% 请在此处完成编程作业报告
完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。
\begin{enumerate}
\item 使用默认配置进行训练和测试。
\begin{enumerate}
\item 训练模型。
输入:
\lstinputlisting{codes/1.1.in.txt}
输出:
\lstinputlisting{codes/1.1.out.txt}
\begin{figure}[H]
\centering
\includegraphics[width=0.9\linewidth]{img/1default_train.png}
\end{figure}
\item 测试模型。
输入:
\lstinputlisting{codes/1.2.in.txt}
输出:
\lstinputlisting{codes/1.2.out.txt}
\end{enumerate}
\item 调整参数、使用Adam优化器训练并测试。
\begin{enumerate}
\item 训练模型。
输入:
\lstinputlisting{codes/2.1.in.txt}
输出:
\lstinputlisting{codes/2.1.out.txt}
\begin{figure}[H]
\centering
\includegraphics[width=0.9\linewidth]{img/2adam_optim.png}
\end{figure}
\item 测试性能。
输入:
\lstinputlisting{codes/2.2.in.txt}
输出:
\lstinputlisting{codes/2.2.out.txt}
\end{enumerate}
\item 使用效果最佳的模型测试。
经过简单的尝试,发现使用
\lstinputlisting{codes/self_train.in.txt}
可以使测试集准确率达到88.8\%有略微的提升。训练的loss曲线
\begin{figure}[H]
\centering
\includegraphics[width=.9\linewidth]{img/3found_best.png}
\end{figure}
使用它进行预测:
\begin{figure}[H]
\centering
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict01.png}
\subcaption{预测A}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict02.png}
\subcaption{预测B}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict03.png}
\subcaption{预测M}
\end{subfigure}
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict04.png}
\subcaption{预测R}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict05.png}
\subcaption{预测M}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict06.png}
\subcaption{预测O}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict07.png}
\subcaption{预测B}
\end{subfigure}
\hfill
\begin{subfigure}[b]{.3\linewidth}
\includegraphics[width=\linewidth]{img/predict/predict08.png}
\subcaption{预测W}
\end{subfigure}
\hfill
\end{figure}
\item 遇到的问题及解决方法
\begin{enumerate}
\item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。
\item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。
\end{enumerate}
\item 建议希望下次发布作业代码可以利用清华的git。
\end{enumerate}
% \section{自选课题开题报告}
% 请在此处介绍自选课题
\end{document}
%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End:

View File

@@ -1,164 +1,164 @@
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# activations.py - activation functions
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn as nn
'''
In this script we will implement three activation functions, including both forward and backward processes.
More details about customizing a backward process in PyTorch can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
'''
## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later.
class Tanh(torch.autograd.Function):
'''
Tanh activation function
y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
'''
# static method of a python class means that we can call the function without initializing an instance of the class
@staticmethod
def forward(ctx, x):
'''
In the forward pass we receive a Tensor containing the input x and return
a Tensor containing the output.
ctx: it is a context object that can be used to save information for backward computation. You can save
objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors
x: input with arbitrary shape
'''
# Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value
# y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
# here we directly use torch.tanh(x) to avoid the problem above
y = torch.tanh(x)
# save an variable in ctx
ctx.save_for_backward(y)
return y
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
grad_output: dL/dy
grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x)
"""
# get an variable from ctx
y, = ctx.saved_tensors
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)!
grad_input = grad_output * (1 - y ** 2)
return grad_input
#TODO 1: complete the forward and backward functions of the Sigmoid activation function.
#Note: You can refer to the activation function Tanh
class Sigmoid(torch.autograd.Function):
'''
Sigmoid activation function
y = 1 / (1 + exp(-x))
'''
@staticmethod
def forward(ctx, x):
# hint: you can use torch.exp(x) to calculate exp(x)
y = 1 - (1 + torch.exp(-x))
# here we save y in ctx, in this way we can use y to calculate gradients in backward process
ctx.save_for_backward(y)
return y
@staticmethod
def backward(ctx, grad_output):
# get y from ctx
y, = ctx.saved_tensors
# implement gradient of x (grad_input), grad_input refers to dL/dx
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y)
grad_input = grad_output * y * (1 - y)
return grad_input
#TODO 2: complete the forward and backward functions of the ReLU activation function.
#Note: You can refer to the activation function Tanh
class ReLU(torch.autograd.Function):
'''
ReLU activation function
y = max{x, 0}
'''
@staticmethod
def forward(ctx, x):
# set elements less than 0 in x to 0
# this operation is inplace
x = torch.max(x, torch.tensor([0.]).to(x.device))
# save x in ctx, in this way we can use x to calculate gradients in backward process
ctx.save_for_backward(x)
# return the output
return x
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
# get x from ctx
x, = ctx.saved_tensors
# print("Before heaviside")
# print(x, x.size())
x = torch.heaviside(x, torch.tensor([0.]).to(x.device))
# print("After heaviside")
# print(x, x.size())
# print(grad_output, grad_output.size())
# print(grad_output * x)
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0
grad_input = grad_output * x
return grad_input
# activate function class according to the type
class Activation(nn.Module):
def __init__(self, type):
'''
:param type: 'sigmoid', 'tanh', or 'relu'
'''
super().__init__()
if type == 'sigmoid':
self.act = Sigmoid.apply
elif type == 'tanh':
self.act = Tanh.apply
elif type == 'relu':
self.act = ReLU.apply
else:
print('activation type should be one of [sigmoid, tanh, relu]')
raise NotImplementedError
def forward(self, x):
return self.act(x)
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# activations.py - activation functions
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn as nn
'''
In this script we will implement three activation functions, including both forward and backward processes.
More details about customizing a backward process in PyTorch can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
'''
## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later.
class Tanh(torch.autograd.Function):
'''
Tanh activation function
y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
'''
# static method of a python class means that we can call the function without initializing an instance of the class
@staticmethod
def forward(ctx, x):
'''
In the forward pass we receive a Tensor containing the input x and return
a Tensor containing the output.
ctx: it is a context object that can be used to save information for backward computation. You can save
objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors
x: input with arbitrary shape
'''
# Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value
# y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
# here we directly use torch.tanh(x) to avoid the problem above
y = torch.tanh(x)
# save an variable in ctx
ctx.save_for_backward(y)
return y
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
grad_output: dL/dy
grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x)
"""
# get an variable from ctx
y, = ctx.saved_tensors
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)!
grad_input = grad_output * (1 - y ** 2)
return grad_input
#TODO 1: complete the forward and backward functions of the Sigmoid activation function.
#Note: You can refer to the activation function Tanh
class Sigmoid(torch.autograd.Function):
'''
Sigmoid activation function
y = 1 / (1 + exp(-x))
'''
@staticmethod
def forward(ctx, x):
# hint: you can use torch.exp(x) to calculate exp(x)
y = 1 - (1 + torch.exp(-x))
# here we save y in ctx, in this way we can use y to calculate gradients in backward process
ctx.save_for_backward(y)
return y
@staticmethod
def backward(ctx, grad_output):
# get y from ctx
y, = ctx.saved_tensors
# implement gradient of x (grad_input), grad_input refers to dL/dx
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y)
grad_input = grad_output * y * (1 - y)
return grad_input
#TODO 2: complete the forward and backward functions of the ReLU activation function.
#Note: You can refer to the activation function Tanh
class ReLU(torch.autograd.Function):
'''
ReLU activation function
y = max{x, 0}
'''
@staticmethod
def forward(ctx, x):
# set elements less than 0 in x to 0
# this operation is inplace
x = torch.max(x, torch.tensor([0.]).to(x.device))
# save x in ctx, in this way we can use x to calculate gradients in backward process
ctx.save_for_backward(x)
# return the output
return x
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
# get x from ctx
x, = ctx.saved_tensors
# print("Before heaviside")
# print(x, x.size())
x = torch.heaviside(x, torch.tensor([0.]).to(x.device))
# print("After heaviside")
# print(x, x.size())
# print(grad_output, grad_output.size())
# print(grad_output * x)
# chain rule: dL/dx = dL/dy * dy/dx
# where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0
grad_input = grad_output * x
return grad_input
# activate function class according to the type
class Activation(nn.Module):
def __init__(self, type):
'''
:param type: 'sigmoid', 'tanh', or 'relu'
'''
super().__init__()
if type == 'sigmoid':
self.act = Sigmoid.apply
elif type == 'tanh':
self.act = Tanh.apply
elif type == 'relu':
self.act = ReLU.apply
else:
print('activation type should be one of [sigmoid, tanh, relu]')
raise NotImplementedError
def forward(self, x):
return self.act(x)

View File

@@ -1,118 +1,118 @@
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# losses.py - loss functions
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn.functional as F
'''
In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes.
More details about customizing a backward process can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
'''
# here is the sample code of MSELoss
# you can use this as reference to implement the CrossEntropyLoss
class MSELoss(torch.autograd.Function):
'''
MSE loss function
loss = (label - pred) ** 2
'''
@staticmethod
def forward(ctx, pred, label):
"""
:param pred: prediction with shape [batch_size, *], where means additional dimensions
:param label: groundtruth, same shape as the predition
:return: MSE loss, averaged by batch_size
"""
# step 1: here we compute the summation of loss for each element and save both pred and label in ctx
loss = torch.sum((pred - label) ** 2)
ctx.save_for_backward(pred, label)
return loss
@staticmethod
def backward(ctx, grad_output):
"""
:param grad_output: for loss function, grad_output will be 1
"""
# step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred)
pred, label = ctx.saved_tensors
grad_input = grad_output * 2 * (pred - label)
# return None for gradient of label since we do not need to compute dL/dlabel
return grad_input, None
#TODO 1: Complete the CrossEntropyLoss loss function
class CrossEntropyLoss(torch.autograd.Function):
'''
Cross entropy loss function:
loss = - log q_i
where
q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...)
However, when z_i has a lager value, exp(z_i) might become infinity.
So we use stable softmax:
softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...)
where
A = exp(-z_max) = exp(-max{z_0, z_1, ...})
therefore we have
softmax(z_i) = softmax(z_i - z_max)
'''
@staticmethod
def forward(ctx, logits, label):
"""
:param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula
:param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1
:return: cross entropy loss, averaged by batch_size
"""
# step 1: calculate softmax(z) using stable softmax method
# hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version
#e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
# calculate z_max
z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size]
# calculate exps = exp(z - z_max)
exps = torch.exp(logits - z_max) # of size [batch_size, n_classes]
# calculate q = softmax(y - y_max)
sums = torch.sum(exps, 1) # of size [batch_size]
# print(exps.size(), sums.size())
# print(sums.reshape(-1, 1))
q = exps / sums.reshape(-1, 1)
# step 2: convert label into one-hot version
# e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
# the converted label has shape [batch_size, n_classes]
# tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes
one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1])
# step 3: calculate cross entropy loss = - log q_i, and averaged by batch
# save result of softmax and one-hot label in ctx for gradient computation
cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0]
ctx.save_for_backward(q, one_hot_label)
return cross_entropy
@staticmethod
def backward(ctx, grad_output):
# step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz)
q, label = ctx.saved_tensors
grad_input = grad_output * (q - label)
# return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# losses.py - loss functions
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn.functional as F
'''
In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes.
More details about customizing a backward process can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
'''
# here is the sample code of MSELoss
# you can use this as reference to implement the CrossEntropyLoss
class MSELoss(torch.autograd.Function):
'''
MSE loss function
loss = (label - pred) ** 2
'''
@staticmethod
def forward(ctx, pred, label):
"""
:param pred: prediction with shape [batch_size, *], where means additional dimensions
:param label: groundtruth, same shape as the predition
:return: MSE loss, averaged by batch_size
"""
# step 1: here we compute the summation of loss for each element and save both pred and label in ctx
loss = torch.sum((pred - label) ** 2)
ctx.save_for_backward(pred, label)
return loss
@staticmethod
def backward(ctx, grad_output):
"""
:param grad_output: for loss function, grad_output will be 1
"""
# step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred)
pred, label = ctx.saved_tensors
grad_input = grad_output * 2 * (pred - label)
# return None for gradient of label since we do not need to compute dL/dlabel
return grad_input, None
#TODO 1: Complete the CrossEntropyLoss loss function
class CrossEntropyLoss(torch.autograd.Function):
'''
Cross entropy loss function:
loss = - log q_i
where
q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...)
However, when z_i has a lager value, exp(z_i) might become infinity.
So we use stable softmax:
softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...)
where
A = exp(-z_max) = exp(-max{z_0, z_1, ...})
therefore we have
softmax(z_i) = softmax(z_i - z_max)
'''
@staticmethod
def forward(ctx, logits, label):
"""
:param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula
:param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1
:return: cross entropy loss, averaged by batch_size
"""
# step 1: calculate softmax(z) using stable softmax method
# hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version
#e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
# calculate z_max
z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size]
# calculate exps = exp(z - z_max)
exps = torch.exp(logits - z_max) # of size [batch_size, n_classes]
# calculate q = softmax(y - y_max)
sums = torch.sum(exps, 1) # of size [batch_size]
# print(exps.size(), sums.size())
# print(sums.reshape(-1, 1))
q = exps / sums.reshape(-1, 1)
# step 2: convert label into one-hot version
# e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
# the converted label has shape [batch_size, n_classes]
# tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes
one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1])
# step 3: calculate cross entropy loss = - log q_i, and averaged by batch
# save result of softmax and one-hot label in ctx for gradient computation
cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0]
ctx.save_for_backward(q, one_hot_label)
return cross_entropy
@staticmethod
def backward(ctx, grad_output):
# step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz)
q, label = ctx.saved_tensors
grad_input = grad_output * (q - label)
# return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel
return grad_input, None

View File

@@ -1,156 +1,156 @@
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# network.py - linear layer and MLP network
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn as nn
from activations import Activation
'''
In this script we will implement our Linear layer and MLP network.
For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own.
More details about customizing a backward process can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function.
'''
class LinearFunction(torch.autograd.Function):
'''
we will implement the linear function:
y = xW^T + b
as well as its gradient computation process
'''
@staticmethod
def forward(ctx, x, W, b):
'''
Input:
:param ctx: a context object that can be used to stash information for backward computation
:param x: input features with size [batch_size, input_size]
:param W: weight matrix with size [output_size, input_size]
:param b: bias with size [output_size]
Return:
y :output features with size [batch_size, output_size]
'''
# print(x, x.size(), x.dtype)
# print(W.T, W.T.size(), W.T.dtype)
# print(x.device, W.T.device)
y = torch.matmul(x, W.T) + b
ctx.save_for_backward(x, W)
return y
@staticmethod
def backward(ctx, grad_output):
'''
Input:
:param ctx: a context object with saved variables
:param grad_output: dL/dy, with size [batch_size, output_size]
Return:
grad_input: dL/dx, with size [batch_size, input_size]
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
grad_b: dL/db, with size [output_size], summed for data in the batch
'''
x, W = ctx.saved_variables
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
# calculate dL/dW by using dL/dy (grad_output) and x
# calculate dL/db using dL/dy (grad_output)
# you can use torch.matmul(A, B) to compute matrix product of A and B
grad_input = torch.matmul(grad_output, W)
grad_W = torch.matmul(grad_output.T, x)
grad_b = grad_output.sum(0)
return grad_input, grad_W, grad_b
class Linear(nn.Module):
def __init__(self, input_size, output_size):
'''
A linear layer which uses our own LinearFunction implemented above.
-----------------------------------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
'''
super(Linear, self).__init__()
W = torch.randn(output_size, input_size).float()
b = torch.zeros(output_size).float()
self.W = nn.Parameter(W, requires_grad=True)
self.b = nn.Parameter(b, requires_grad=True)
def forward(self, x):
# here we call the LinearFunction we implement above
return LinearFunction.apply(x, self.W, self.b)
class MLP(nn.Module):
def __init__(self, input_size, output_size, hidden_size, n_layers, act_type):
'''
Multilayer Perceptron
----------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
:param hidden_size: a list containing hidden size for each hidden layer
:param n_layers: number of layers
:param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
'''
# TODO 1: initialize the parent class nn.Module
super(MLP, self).__init__()
# total layer number should be hidden layer number + 1 (output layer)
# print(hidden_size, n_layers)
assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1'
# TODO 2complete the network structures
# instantiate the activation function by using the defined classes in activations.py
self.act = Activation(act_type)
# initialize a list to save layers
layers = nn.ModuleList()
if n_layers == 1:
# append a linear layer into the module list
# if n_layers == 1, MLP degenerates to a single linear layer
layers.append(Linear(input_size, output_size))
# MLP with at least 2 layers
else:
# construct the hidden layers and add them to the module list
# a hidden layer of MLP consists of a linear layer and an activation function
in_size = input_size
for i in range(n_layers - 1):
layer = Linear(in_size, hidden_size[i])
layers.append(layer) # append the linear layer into the module list
layers.append(self.act)
in_size = hidden_size[i] # update in_size for the next layer
# initialize the output layer and append the layer into the module list
# hint: what is the output size of the output layer?
layers.append(Linear(hidden_size[-1], output_size))
# Use nn.Sequential to get the neural network
self.network = torch.nn.Sequential()
for layer in layers:
self.network.append(layer)
def forward(self, x):
'''
Define the forward function
:param x: input features with size [batch_size, input_size]
:return: output features with size [batch_size, output_size]
'''
# TODO 3: implement the forward propagation of the MLP
out = self.network(x)
return out
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# network.py - linear layer and MLP network
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn as nn
from activations import Activation
'''
In this script we will implement our Linear layer and MLP network.
For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own.
More details about customizing a backward process can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function.
'''
class LinearFunction(torch.autograd.Function):
'''
we will implement the linear function:
y = xW^T + b
as well as its gradient computation process
'''
@staticmethod
def forward(ctx, x, W, b):
'''
Input:
:param ctx: a context object that can be used to stash information for backward computation
:param x: input features with size [batch_size, input_size]
:param W: weight matrix with size [output_size, input_size]
:param b: bias with size [output_size]
Return:
y :output features with size [batch_size, output_size]
'''
# print(x, x.size(), x.dtype)
# print(W.T, W.T.size(), W.T.dtype)
# print(x.device, W.T.device)
y = torch.matmul(x, W.T) + b
ctx.save_for_backward(x, W)
return y
@staticmethod
def backward(ctx, grad_output):
'''
Input:
:param ctx: a context object with saved variables
:param grad_output: dL/dy, with size [batch_size, output_size]
Return:
grad_input: dL/dx, with size [batch_size, input_size]
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
grad_b: dL/db, with size [output_size], summed for data in the batch
'''
x, W = ctx.saved_variables
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
# calculate dL/dW by using dL/dy (grad_output) and x
# calculate dL/db using dL/dy (grad_output)
# you can use torch.matmul(A, B) to compute matrix product of A and B
grad_input = torch.matmul(grad_output, W)
grad_W = torch.matmul(grad_output.T, x)
grad_b = grad_output.sum(0)
return grad_input, grad_W, grad_b
class Linear(nn.Module):
def __init__(self, input_size, output_size):
'''
A linear layer which uses our own LinearFunction implemented above.
-----------------------------------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
'''
super(Linear, self).__init__()
W = torch.randn(output_size, input_size).float()
b = torch.zeros(output_size).float()
self.W = nn.Parameter(W, requires_grad=True)
self.b = nn.Parameter(b, requires_grad=True)
def forward(self, x):
# here we call the LinearFunction we implement above
return LinearFunction.apply(x, self.W, self.b)
class MLP(nn.Module):
def __init__(self, input_size, output_size, hidden_size, n_layers, act_type):
'''
Multilayer Perceptron
----------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
:param hidden_size: a list containing hidden size for each hidden layer
:param n_layers: number of layers
:param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
'''
# TODO 1: initialize the parent class nn.Module
super(MLP, self).__init__()
# total layer number should be hidden layer number + 1 (output layer)
# print(hidden_size, n_layers)
assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1'
# TODO 2complete the network structures
# instantiate the activation function by using the defined classes in activations.py
self.act = Activation(act_type)
# initialize a list to save layers
layers = nn.ModuleList()
if n_layers == 1:
# append a linear layer into the module list
# if n_layers == 1, MLP degenerates to a single linear layer
layers.append(Linear(input_size, output_size))
# MLP with at least 2 layers
else:
# construct the hidden layers and add them to the module list
# a hidden layer of MLP consists of a linear layer and an activation function
in_size = input_size
for i in range(n_layers - 1):
layer = Linear(in_size, hidden_size[i])
layers.append(layer) # append the linear layer into the module list
layers.append(self.act)
in_size = hidden_size[i] # update in_size for the next layer
# initialize the output layer and append the layer into the module list
# hint: what is the output size of the output layer?
layers.append(Linear(hidden_size[-1], output_size))
# Use nn.Sequential to get the neural network
self.network = torch.nn.Sequential()
for layer in layers:
self.network.append(layer)
def forward(self, x):
'''
Define the forward function
:param x: input features with size [batch_size, input_size]
:return: output features with size [batch_size, output_size]
'''
# TODO 3: implement the forward propagation of the MLP
out = self.network(x)
return out

View File

@@ -1,397 +1,397 @@
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# recognition.py - character classification
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
# ==== Part 0: import libs
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import json, cv2, os, string
import matplotlib.pyplot as plt
import numpy as np
# this time we implement our networks and loss functions in other python script, and import them here
from network import MLP
from losses import CrossEntropyLoss
# argparse is used to conveniently set our configurations
import argparse
# ==== Part 1: data loader
# construct a dataset and a data loader, more details can be found in
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
class ListDataset(Dataset):
def __init__(self, im_dir, file_path, norm_size=(32, 32)):
'''
:param im_dir: path to directory with images
:param file_path: json file containing image names and labels
:param norm_size: image normalization size, (height, width)
'''
# this time we will try to recognize 26 English letters (case-insensitive)
letters = string.ascii_letters[-26:] # ABCD...XYZ
self.alphabet = {letters[i]:i for i in range(len(letters))}
self.norm_size = norm_size
with open(file_path, 'r') as f:
imgs = json.load(f)
im_names = list(imgs.keys())
self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names]
self.labels = list(imgs.values())
def __len__(self):
# the __len__() function should return the total number of samples in the dataset
return len(self.im_paths)
def __getitem__(self, index):
assert index <= len(self), 'index range error'
# read an image and convert it to grey scale
im_path = self.im_paths[index]
im = cv2.imread(im_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1]
im = cv2.resize(im, self.norm_size)
# im = im / 255.
""" The above command does not seems to be valid in my environment """
im = np.divide(im, 255.)
im = (im - 0.5) * 2.0
# get the label of the current image
# upper() is used to convert a letter into uppercase
label = self.labels[index].upper()
# convert an English letter into a number index
label = self.alphabet[label]
# TODO 1: return the image and its label
return im, label
def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0):
'''
:param im_dir: path to directory with images
:param file_path: file with image paths and labels
:param norm_size: image normalization size, (height, width)
:param batch_size: batch size
:param workers: number of workers for loading data in multiple threads
:return: a data loader
'''
dataset = ListDataset(im_dir, file_path, norm_size)
return DataLoader(dataset,
batch_size=batch_size,
shuffle=True if 'train' in file_path else False, # shuffle images only when training
num_workers=workers)
# ==== Part 2: training, validation and testing
def train_val(model, trainloader, valloader, n_epochs,
lr, optim_type, momentum, weight_decay,
valInterval, device='cpu'):
'''
The main training procedure
----------------------------
:param model: the MLP model
:param trainloader: the dataloader of the train set
:param valloader: the dataloader of the validation set
:param n_epochs: number of training epochs
:param lr: learning rate
:param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta'
:param momentum: only used if optim_type == 'sgd'
:param weight_decay: the factor of L2 penalty on network weights
:param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# define the cross entropy loss function.
ce_loss = CrossEntropyLoss.apply
# optimizer
if optim_type == 'sgd':
optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay)
elif optim_type == 'adagrad':
optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'rmsprop':
optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'adam':
optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'adadelta':
optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay)
else:
print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta')
raise NotImplementedError
# training
# to save loss of each training epoch in a python "list" data structure
losses = []
for epoch in range(n_epochs):
# set the model in training mode
model.train()
# to save total loss in one epoch
total_loss = 0.
#TODO 2: Calculate losses and train the network using the optimizer
for data, labels in trainloader: # get a batch of data
# step 1: set data type and device
# data = torch.from_numpy(data)
data = data.type(torch.float32)
data = data.to(device)
labels = labels.to(device)
# print(data.device)
# step 2: convert an image to a vector as the input of the MLP
data = torch.flatten(data, start_dim=1)
# print(data.size())
# hit: clear gradients in the optimizer
optimizer.zero_grad()
# step 3: run the model which is the forward process
output = model(data)
# step 4: compute the loss, and call backward propagation function
loss = ce_loss(output, labels)
loss.backward()
# I have no idea why pylance can't get the data type of what ce_loss returns
# step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number
# this operation is not differentiable
total_loss += loss.item()
# step 6: call a function, optimizer.step(), to update the parameters of the models
optimizer.step()
# average of the total loss for iterations
avg_loss = total_loss / len(trainloader)
losses.append(avg_loss)
print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss))
# validation
if (epoch + 1) % valInterval == 0:
val_acc = test(model, valloader, device)
# show prediction accuracy
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc))
# save model parameters in a file
# model_save_path = 'saved_models/recognition.pth'.format(epoch + 1)
model_save_path = opt.model_path
torch.save({'state_dict': model.state_dict(),
}, model_save_path)
print('Model saved in {}\n'.format(model_save_path))
# draw the loss curve
plot_loss(losses)
def test(model, testloader, device):
'''
The testing procedure
----------------------------
:param model: the MLP model
:param testloader: the dataloader to be tested/validated
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# set the model in evaluation mode
model.eval()
n_correct = 0. # number of images that are correctly classified
n_imgs = 0. # number of total images
with torch.no_grad(): # we do not need to compute gradients during validation
#TODO 3: get the prediction of the data and calculate the accuracy
for imgs, labels in testloader:
# step 1: set data type and device
# imgs = torch.from_numpy(imgs)
imgs = imgs.type(torch.float32)
imgs = imgs.to(device)
labels = labels.to(device)
# step 2: convert an image to a vector as the input of the MLP
imgs = torch.flatten(imgs, start_dim=1)
# step 3: run the model which is the forward process
output = model(imgs)
# step 4: get the predicted value by the output using out.argmax(1)
pred = output.argmax(1)
# step 5: sum up the number of images correctly recognized and the total image number
for predict, label in zip(pred, labels):
if predict == label:
n_correct += 1
n_imgs += 1
accuracy = n_correct / n_imgs
return accuracy
# ==== Part 3: predict new images
def predict(model, im_path, norm_size, device):
'''
The predicting procedure
---------------
:param model: the MLP model
:param im_path: path of an image
:param norm_size: image normalization size, (height, width)
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# TODO 4: enter the evaluation mode
model.eval()
# TODO 4: image pre-processing, similar to what we do in ListDataset()
im = cv2.imread(im_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im = cv2.resize(im, norm_size)
im = np.divide(im, 255.)
im = (im - 0.5) * 2.0
# convert im from numpy.ndarray to torch.tensor
im = torch.from_numpy(im)
# input im into the model
with torch.no_grad():
input = im.view(1, -1).type(torch.float32).to(device)
out = model(input)
prediction = out.argmax(1)[0].item()
# convert index of prediction to the corresponding character
letters = string.ascii_letters[-26:] # ABCD...XYZ
prediction = letters[prediction]
print('Prediction: {}'.format(prediction))
# ==== Part 4: draw the loss curve
def plot_loss(losses):
'''
:param losses: list of losses for each epoch
:return:
'''
f, ax = plt.subplots()
# draw loss
ax.plot(losses)
# set labels
ax.set_xlabel('training epoch')
ax.set_ylabel('loss')
# show the plots
plt.show()
if __name__ == '__main__':
# set random seed for reproducibility
seed = 2023
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
# set configurations
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default='train', help='train, test or predict')
parser.add_argument('--im_dir', type=str, default='data/character_classification/images',
help='path to directory with images')
parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json',
help='file list of training image paths and labels')
parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json',
help='file list of validation image paths and labels')
parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json',
help='file list of test image paths and labels')
parser.add_argument('--batchsize', type=int, default=8, help='batch size')
parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda')
# configurations for training
parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma')
parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP')
parser.add_argument('--act', type=str, default='relu',
help='type of activation function, can be sigmoid, tanh, or relu')
parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)')
parser.add_argument('--epoch', type=int, default=50, help='number of training epochs')
parser.add_argument('--n_classes', type=int, default=26, help='number of classes')
parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation')
parser.add_argument('--lr', type=float, default=5e-4, help='learning rate')
parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd')
parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights')
# configurations for test and prediction
parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model')
parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png',
help='path of an image to be recognized')
opt = parser.parse_args()
# TODO 5: initialize the MLP model
# what is the input size of the MLP?
# hint 1: we convert an image to a vector as the input of the MLP
# hint 2: each image has shape [norm_size[0], norm_size[1]]
model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act)
# for the 'test' and 'predict' mode, we should load the saved checkpoint into the model
if opt.mode == 'test' or opt.mode == 'predict':
checkpoint = torch.load(opt.model_path, map_location='cpu')
# """The above code did not consider device problem"""
# checkpoint = torch.load(opt.model_path, map_location=opt.device)
# load model parameters we saved in model_path
model.load_state_dict(checkpoint['state_dict'])
print('[Info] Load model from {}'.format(opt.model_path))
# put the model on CPU or GPU according to the device in args
model = model.to(opt.device)
# -- run the code for training and validation
if opt.mode == 'train':
# training and validation data loader
trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize)
valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize)
train_val(model, trainloader, valloader,
n_epochs=opt.epoch,
lr=opt.lr,
optim_type=opt.optim_type,
momentum=opt.momentum,
weight_decay=opt.weight_decay,
valInterval=opt.valInterval,
device=opt.device)
# -- test the saved model
elif opt.mode == 'test':
testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize)
acc = test(model, testloader, opt.device)
print('[Info] Test accuracy = {:.1f}%'.format(100 * acc))
# -- predict a new image
elif opt.mode == 'predict':
predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device)
else:
print('mode should be train, test, or predict')
raise NotImplementedError
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# recognition.py - character classification
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
# ==== Part 0: import libs
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import json, cv2, os, string
import matplotlib.pyplot as plt
import numpy as np
# this time we implement our networks and loss functions in other python script, and import them here
from network import MLP
from losses import CrossEntropyLoss
# argparse is used to conveniently set our configurations
import argparse
# ==== Part 1: data loader
# construct a dataset and a data loader, more details can be found in
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
class ListDataset(Dataset):
def __init__(self, im_dir, file_path, norm_size=(32, 32)):
'''
:param im_dir: path to directory with images
:param file_path: json file containing image names and labels
:param norm_size: image normalization size, (height, width)
'''
# this time we will try to recognize 26 English letters (case-insensitive)
letters = string.ascii_letters[-26:] # ABCD...XYZ
self.alphabet = {letters[i]:i for i in range(len(letters))}
self.norm_size = norm_size
with open(file_path, 'r') as f:
imgs = json.load(f)
im_names = list(imgs.keys())
self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names]
self.labels = list(imgs.values())
def __len__(self):
# the __len__() function should return the total number of samples in the dataset
return len(self.im_paths)
def __getitem__(self, index):
assert index <= len(self), 'index range error'
# read an image and convert it to grey scale
im_path = self.im_paths[index]
im = cv2.imread(im_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1]
im = cv2.resize(im, self.norm_size)
# im = im / 255.
""" The above command does not seems to be valid in my environment """
im = np.divide(im, 255.)
im = (im - 0.5) * 2.0
# get the label of the current image
# upper() is used to convert a letter into uppercase
label = self.labels[index].upper()
# convert an English letter into a number index
label = self.alphabet[label]
# TODO 1: return the image and its label
return im, label
def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0):
'''
:param im_dir: path to directory with images
:param file_path: file with image paths and labels
:param norm_size: image normalization size, (height, width)
:param batch_size: batch size
:param workers: number of workers for loading data in multiple threads
:return: a data loader
'''
dataset = ListDataset(im_dir, file_path, norm_size)
return DataLoader(dataset,
batch_size=batch_size,
shuffle=True if 'train' in file_path else False, # shuffle images only when training
num_workers=workers)
# ==== Part 2: training, validation and testing
def train_val(model, trainloader, valloader, n_epochs,
lr, optim_type, momentum, weight_decay,
valInterval, device='cpu'):
'''
The main training procedure
----------------------------
:param model: the MLP model
:param trainloader: the dataloader of the train set
:param valloader: the dataloader of the validation set
:param n_epochs: number of training epochs
:param lr: learning rate
:param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta'
:param momentum: only used if optim_type == 'sgd'
:param weight_decay: the factor of L2 penalty on network weights
:param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# define the cross entropy loss function.
ce_loss = CrossEntropyLoss.apply
# optimizer
if optim_type == 'sgd':
optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay)
elif optim_type == 'adagrad':
optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'rmsprop':
optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'adam':
optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
elif optim_type == 'adadelta':
optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay)
else:
print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta')
raise NotImplementedError
# training
# to save loss of each training epoch in a python "list" data structure
losses = []
for epoch in range(n_epochs):
# set the model in training mode
model.train()
# to save total loss in one epoch
total_loss = 0.
#TODO 2: Calculate losses and train the network using the optimizer
for data, labels in trainloader: # get a batch of data
# step 1: set data type and device
# data = torch.from_numpy(data)
data = data.type(torch.float32)
data = data.to(device)
labels = labels.to(device)
# print(data.device)
# step 2: convert an image to a vector as the input of the MLP
data = torch.flatten(data, start_dim=1)
# print(data.size())
# hit: clear gradients in the optimizer
optimizer.zero_grad()
# step 3: run the model which is the forward process
output = model(data)
# step 4: compute the loss, and call backward propagation function
loss = ce_loss(output, labels)
loss.backward()
# I have no idea why pylance can't get the data type of what ce_loss returns
# step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number
# this operation is not differentiable
total_loss += loss.item()
# step 6: call a function, optimizer.step(), to update the parameters of the models
optimizer.step()
# average of the total loss for iterations
avg_loss = total_loss / len(trainloader)
losses.append(avg_loss)
print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss))
# validation
if (epoch + 1) % valInterval == 0:
val_acc = test(model, valloader, device)
# show prediction accuracy
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc))
# save model parameters in a file
# model_save_path = 'saved_models/recognition.pth'.format(epoch + 1)
model_save_path = opt.model_path
torch.save({'state_dict': model.state_dict(),
}, model_save_path)
print('Model saved in {}\n'.format(model_save_path))
# draw the loss curve
plot_loss(losses)
def test(model, testloader, device):
'''
The testing procedure
----------------------------
:param model: the MLP model
:param testloader: the dataloader to be tested/validated
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# set the model in evaluation mode
model.eval()
n_correct = 0. # number of images that are correctly classified
n_imgs = 0. # number of total images
with torch.no_grad(): # we do not need to compute gradients during validation
#TODO 3: get the prediction of the data and calculate the accuracy
for imgs, labels in testloader:
# step 1: set data type and device
# imgs = torch.from_numpy(imgs)
imgs = imgs.type(torch.float32)
imgs = imgs.to(device)
labels = labels.to(device)
# step 2: convert an image to a vector as the input of the MLP
imgs = torch.flatten(imgs, start_dim=1)
# step 3: run the model which is the forward process
output = model(imgs)
# step 4: get the predicted value by the output using out.argmax(1)
pred = output.argmax(1)
# step 5: sum up the number of images correctly recognized and the total image number
for predict, label in zip(pred, labels):
if predict == label:
n_correct += 1
n_imgs += 1
accuracy = n_correct / n_imgs
return accuracy
# ==== Part 3: predict new images
def predict(model, im_path, norm_size, device):
'''
The predicting procedure
---------------
:param model: the MLP model
:param im_path: path of an image
:param norm_size: image normalization size, (height, width)
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
'''
# TODO 4: enter the evaluation mode
model.eval()
# TODO 4: image pre-processing, similar to what we do in ListDataset()
im = cv2.imread(im_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im = cv2.resize(im, norm_size)
im = np.divide(im, 255.)
im = (im - 0.5) * 2.0
# convert im from numpy.ndarray to torch.tensor
im = torch.from_numpy(im)
# input im into the model
with torch.no_grad():
input = im.view(1, -1).type(torch.float32).to(device)
out = model(input)
prediction = out.argmax(1)[0].item()
# convert index of prediction to the corresponding character
letters = string.ascii_letters[-26:] # ABCD...XYZ
prediction = letters[prediction]
print('Prediction: {}'.format(prediction))
# ==== Part 4: draw the loss curve
def plot_loss(losses):
'''
:param losses: list of losses for each epoch
:return:
'''
f, ax = plt.subplots()
# draw loss
ax.plot(losses)
# set labels
ax.set_xlabel('training epoch')
ax.set_ylabel('loss')
# show the plots
plt.show()
if __name__ == '__main__':
# set random seed for reproducibility
seed = 2023
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
# set configurations
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default='train', help='train, test or predict')
parser.add_argument('--im_dir', type=str, default='data/character_classification/images',
help='path to directory with images')
parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json',
help='file list of training image paths and labels')
parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json',
help='file list of validation image paths and labels')
parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json',
help='file list of test image paths and labels')
parser.add_argument('--batchsize', type=int, default=8, help='batch size')
parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda')
# configurations for training
parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma')
parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP')
parser.add_argument('--act', type=str, default='relu',
help='type of activation function, can be sigmoid, tanh, or relu')
parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)')
parser.add_argument('--epoch', type=int, default=50, help='number of training epochs')
parser.add_argument('--n_classes', type=int, default=26, help='number of classes')
parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation')
parser.add_argument('--lr', type=float, default=5e-4, help='learning rate')
parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd')
parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights')
# configurations for test and prediction
parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model')
parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png',
help='path of an image to be recognized')
opt = parser.parse_args()
# TODO 5: initialize the MLP model
# what is the input size of the MLP?
# hint 1: we convert an image to a vector as the input of the MLP
# hint 2: each image has shape [norm_size[0], norm_size[1]]
model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act)
# for the 'test' and 'predict' mode, we should load the saved checkpoint into the model
if opt.mode == 'test' or opt.mode == 'predict':
checkpoint = torch.load(opt.model_path, map_location='cpu')
# """The above code did not consider device problem"""
# checkpoint = torch.load(opt.model_path, map_location=opt.device)
# load model parameters we saved in model_path
model.load_state_dict(checkpoint['state_dict'])
print('[Info] Load model from {}'.format(opt.model_path))
# put the model on CPU or GPU according to the device in args
model = model.to(opt.device)
# -- run the code for training and validation
if opt.mode == 'train':
# training and validation data loader
trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize)
valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize)
train_val(model, trainloader, valloader,
n_epochs=opt.epoch,
lr=opt.lr,
optim_type=opt.optim_type,
momentum=opt.momentum,
weight_decay=opt.weight_decay,
valInterval=opt.valInterval,
device=opt.device)
# -- test the saved model
elif opt.mode == 'test':
testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize)
acc = test(model, testloader, opt.device)
print('[Info] Test accuracy = {:.1f}%'.format(100 * acc))
# -- predict a new image
elif opt.mode == 'predict':
predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device)
else:
print('mode should be train, test, or predict')
raise NotImplementedError

41
hw3/code/check.py Normal file
View File

@@ -0,0 +1,41 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# check.py - Check your implementation of several modules
# Tsinghua University
# (C) Copyright 2024
# ========================================================
from svm_hw import SVM_HINGE, LinearFunction, Hinge
import torch
from torch.autograd import gradcheck
def run():
model = SVM_HINGE(2, C=1.0).double()
x = torch.randn(50, 2, requires_grad=False).double()
W = torch.randn(1, 2, requires_grad=True).double()
b = torch.zeros(1, requires_grad=True).double()
test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
if test:
print('Linear successully tested!')
output = torch.randn(50, 1, requires_grad=True).double()
W = torch.randn(1, 2, requires_grad=True).double()
labels = torch.ones(1, requires_grad=False).double()
C = torch.tensor([[1.0]], requires_grad=False).double()
test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
if test:
print('Hinge successfully tested')
x = torch.randn(50, 2, requires_grad=False).double()
labels = torch.ones(50, requires_grad=False).double()
try:
output, loss = model(x, labels)
assert model.W.requires_grad is True
assert model.b.requires_grad is True
print('SVM_HINGE successfully tested')
except:
raise Exception('Failed testing SVM_HINGE!')
if __name__ == '__main__':
run()

181
hw3/code/data_preprocess.py Normal file
View File

@@ -0,0 +1,181 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
# and using PCA for dimensionality reduction
# Student ID: 2022010639
# Name: Yixuan Gao
# Tsinghua University
# (C) Copyright 2024
# ========================================================
import os
import torchvision.transforms as transforms
import torch
from PIL import Image
from networks import Classifier
import matplotlib.pyplot as plt
import argparse
def preprocess(pre_conv, data_root, image_size, classes):
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
# =============== process training dataset ======================
print("Start preprocessing the training dataset !!!")
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
# calculate the mean and PCA projection matrix
data_mean, u = PCA(train_data, 2)
u = u * 20
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
train_data_pca = (train_data - data_mean) @ u
visualize(train_data_pca, train_label, "train")
savedata(train_data_pca, train_label, data_root+"/train.pt")
print("training dataset saved !!!")
# =============== process validation dataset ======================
print("Start preprocessing the validation dataset!!!")
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
val_data_pca = (val_data - data_mean) @ u
visualize(val_data_pca, val_label, "val")
savedata(val_data_pca, val_label, data_root+"/val.pt")
print("validation dataset saved !!!")
# =============== process testing dataset ======================
print("Start preprocessing the testing dataset!!!")
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
test_data_pca = (test_data - data_mean) @ u
visualize(test_data_pca, test_label, "test")
savedata(test_data_pca, test_label, data_root+"/test.pt")
print("testing dataset saved !!!")
def savedata(data, label, save_path):
save_dict = {
'data': data,
'label': label
}
torch.save(save_dict, save_path)
def visualize(datas, labels, mode):
"""
Display feature points after dimensionality reduction
-------------------------------
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
:param mode: chosen from {'train', 'val', 'test'}
:return:
"""
plt.figure()
for idx in range(datas.shape[1]):
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
plt.legend()
plt.title(mode)
plt.show()
def PCA(data, dim=2):
"""
calculate the mean value of the data and the projection matrix for PCA
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
:param dim: the data dimension after projection
:return:
data_mean: the mean value of the data
u: the projection matrix for PCA, with the shape of [2048, dim]
"""
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
# TODO: compute the mean of train_data
data_mean = data.mean(dim=0)
# TODO: compute the covariance matrix of train_data
diff = data - data_mean
# data_cov = diff.T @ diff
data_cov = torch.cov(diff.T)
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
u, s, v = torch.linalg.svd(data_cov)
# TODO: return the proper 'data_mean' and 'u[]'
return data_mean, u[:, :dim]
def loaddata(pre_conv, data_root, mode, image_size, classes):
"""
load one dataset, and use pretrained network in homework 2 to extract feature
:param pre_conv: pretrained network in homework 2
:param data_root: the path of the dataset
:param mode: chosen from {'train', 'val', 'test'}
:param image_size: the preset size that each image try to zoom to
:param classes: two classes that need to be classified
:return:
datas: the samples of extracted features with the shape of [N, 2048]
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
"""
assert len(classes) == 2
datas = []
labels = []
for idx in range(len(classes)):
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
label = 2 * idx - 1
datas.append(data)
labels.append(label)
return torch.stack(datas), torch.tensor(labels)
def readimg(pre_conv, filepath, image_size):
"""
Read one image and use pretrained network to extract the feature
--------------------------
:param pre_conv: pretrained network in homework 2
:param filepath: the file path of one image
:param image_size: the preset size that each image try to zoom to
:return:
data: the extracted feature with the length of 2048
"""
img_pil = Image.open(filepath).convert('RGB')
img_pil = img_pil.resize(image_size)
img_transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize(0.5, 0.5),
])
img_tensor = img_transform(img_pil)
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
return data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
help="the filepath of the pretrained network in homework 2")
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
parser.add_argument("--image_size", type=tuple, default=(32, 32),
help="the preset size that each image try to zoom to")
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
args = parser.parse_args()
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
configs = pretrained_checkpoint["configs"]
cls = Classifier(
configs["in_channels"],
configs["num_classes"],
configs["use_batch_norm"],
configs["use_stn"],
configs["dropout_prob"],
)
cls.load_state_dict(pretrained_checkpoint["model_state"], strict=False)
for param in cls.parameters():
param.requires_grad = False
conv = cls.conv_net
preprocess(conv, args.data_root, args.image_size, args.classes)

26
hw3/code/datasets.py Normal file
View File

@@ -0,0 +1,26 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# datasets.py - Define the data loader for the traffic sign classification dataset
# Student ID:
# Name:
# Tsinghua University
# (C) Copyright 2024
# ========================================================
import torch
import torch.utils.data as data
class Traffic_Dataset(data.Dataset):
def __init__(self, data_root):
dataset = torch.load(data_root)
self.datas = dataset["data"]
self.labels = dataset["label"]
def __getitem__(self, index):
return self.datas[index], self.labels[index]
def __len__(self):
return len(self.datas)

271
hw3/code/networks.py Normal file
View File

@@ -0,0 +1,271 @@
# ========================================================
# Media and Cognition
# Homework 2 Convolutional Neural Network
# networks.py - Network definition
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
# ========================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvBlock(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
use_batch_norm=False,
use_residual=False,
):
"""
Convolutional block with batch normalization and ReLU activation
----------------------
:param in_channels: channel number of input image
:param out_channels: channel number of output image
:param kernel_size: size of convolutional kernel
:param stride: stride of convolutional operation
:param padding: padding of convolutional operation
:param use_batch_norm: whether to use batch normalization in convolutional layers
:param use_residual: whether to use residual connection
"""
super().__init__()
if use_batch_norm:
bn2d = nn.BatchNorm2d
else:
# use identity function to replace batch normalization
bn2d = nn.Identity
self.use_residual = use_residual
# >>> TODO 2.1: complete a convolutional block with batch normalization and ReLU activation
# Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`
# Network structure:
# conv -> batchnorm -> relu
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size, stride=stride, padding=padding
)
self.bn = bn2d(out_channels)
self.relu = nn.ReLU()
# <<< TODO 2.1
def forward(self, x):
# >>> TODO 2.2: forward process
# Hint: apply residual connection if `self.use_residual` is True
fx = self.relu(self.bn(self.conv(x)))
# out = self.relu(self.bn(self.conv(x)))
if self.use_residual:
out = fx + x
else:
out = fx
# <<< TODO 2.2
return out
class Classifier(nn.Module):
def __init__(
self,
in_channels,
num_classes,
use_batch_norm=False,
use_stn=False,
dropout_prob=0,
):
"""
Convolutional Neural Networks
----------------------
:param in_channels: channel number of input image
:param num_classes: number of classes for the classification task
:param use_batch_norm: whether to use batch normalization in convolutional layers and linear layers
:param use_stn: whether to use spatial transformer network
:param dropout_prob: dropout ratio of dropout layer which ranges from 0 to 1
"""
super().__init__()
if use_batch_norm:
bn1d = nn.BatchNorm1d
else:
# use identity function to replace batch normalization
bn1d = nn.Identity
if use_stn:
self.stn = STN(in_channels)
else:
# use identity function to replace spatial transformer network
self.stn = nn.Identity(in_channels)
# >>> TODO 3.1: complete a multilayer convolutional neural network with nn.Sequential function.
# input image with size [batch_size, in_channels, img_h, img_w]
# Network structure:
# kernel_size stride padding out_channels use_residual
# ConvBlock 5 1 2 32 False
# ConvBlock 5 2 2 64 False
# maxpool 2 2 0
# ConvBlock 3 1 1 64 True
# ConvBlock 3 1 1 128 False
# maxpool 2 2 0
# ConvBlock 3 1 1 128 True
# dropout(p), where p is input parameter of dropout ratio
self.conv_net = nn.Sequential(
ConvBlock(
in_channels=in_channels,
out_channels=32,
kernel_size=5,
stride=1,
padding=2,
),
ConvBlock(
in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2
),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
ConvBlock(
in_channels=64,
out_channels=64,
kernel_size=3,
stride=1,
padding=1,
use_residual=True,
),
ConvBlock(
in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1
),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
ConvBlock(
in_channels=128,
out_channels=128,
kernel_size=3,
stride=1,
padding=1,
use_residual=True,
),
nn.Dropout2d(p=dropout_prob),
)
# <<< TODO 3.1
# >>> TODO 3.2: complete a sub-network with two linear layers by using nn.Sequential function
# Hint:
# (1) Note that the size of input images is (3, 32, 32) by default, what is the size of
# the output of the convolution layers?
# (2) Use the `bn1d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`
# Network structure:
# out_channels
# linear 256
# activation
# batchnorm
# dropout(p), where p is input parameter of dropout ratio
# linear num_classes
self.fc_net = nn.Sequential(
nn.Linear(2048, 256),
nn.ReLU(),
bn1d(256),
nn.Dropout1d(dropout_prob),
nn.Linear(256, num_classes),
)
# <<< TODO 3.2
def forward(self, x):
"""
Define the forward function
:param x: input features with size [batch_size, in_channels, img_h, img_w]
:return: output features with size [batch_size, num_classes]
"""
# Step 1: apply spatial transformer network if applicable
x = self.stn(x)
# >>> TODO 3.3: forward process
# Step 2: forward process for the convolutional network
x = self.conv_net(x)
# Step 3: use `Tensor.view()` to flatten the tensor to match the size of the input of the
# fully connected layers.
x = x.view(x.shape[0], -1)
# Step 4: forward process for the fully connected network
out = self.fc_net(x)
# <<< TODO 3.3
return out
class STN(nn.Module):
def __init__(self, in_channels):
"""
The spatial transformer network (STN) learns how to perform spatial transformations on the
input image in order to enhance the geometric invariance of the model. For example, it can
crop a region of interest, scale and correct the orientation of an image. It can be a useful
mechanism because CNNs are not invariant to rotation and scale and more general affine
transformations.
The spatial transformer network boils down to three main components:
- The localization network is a regular CNN which regresses the transformation parameters.
The transformation is never learned explicitly from this dataset, instead the network
learns automatically the spatial transformations that enhances the global accuracy.
- The grid generator generates a grid of coordinates in the input image corresponding
to each pixel from the output image.
- The sampler uses the parameters of the transformation and applies it to the input image.
Here, we are going to implement an STN that performs affine transformations on the input images.
For more information, please refer to the slides and
https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html .
----------------------
:param in_channels: channel number of input image
"""
super().__init__()
# >>> TODO 4.1: Build your localization net
# Step 1: Build a convolutional network to extract features from input images.
# Hint: Combine convolutional layers, batch normalization layers and ReLU activation functions to build
# this network.
# Suggested structure: 3 down-sampling convolutional layers with doubling output channels, using BN and ReLU.
self.localization_conv = nn.Sequential(
ConvBlock(in_channels=in_channels, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True),
# 8 * 13 * 13
ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True),
ConvBlock(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1, use_batch_norm=True),
# 32 * 4 * 4
)
# Step 2: Build a fully connected network to predict the parameters of affine transformation from
# the extracted features.
# Hint: Combine linear layers and ReLU activation functions to build this network.
# Suggested structure: 2 linear layers with one BN and ReLU.
self.localization_fc = nn.Sequential(
nn.Linear(32 * 4 * 4, 256),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Linear(256, 6)
)
# <<< TODO 4.1
# >>> TODO 4.2: Initialize the weight/bias of the last linear layer of the fully connected network
# Hint: The STN should generate the identity transformation by default before training.
# How to initialize the weight/bias of the last linear layer of the fully connected network to
# achieve this goal?
nn.init.zeros_(self.localization_fc[3].weight)
# <<< TODO 4.2
def forward(self, x):
# Extract the features from input images and flatten them
features = self.localization_conv(x)
features = features.view(features.shape[0], -1)
# Predict the parameters of affine transformation from the extracted features
theta = self.localization_fc(features)
theta = theta.view(-1, 2, 3)
# Apply affine transformation to input images
grid = F.affine_grid(theta, x.shape, align_corners=False)
x = F.grid_sample(x, grid, align_corners=False)
return x

148
hw3/code/svm_hw.py Normal file
View File

@@ -0,0 +1,148 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# svm_hw.py - The implementation of SVM using hinge loss
# Student ID: 2022010639
# Name: Yixuan Gao
# Tsinghua University
# (C) Copyright 2024
# ========================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
# TODO 1: complete the forward and backward propagation processes of the linear layer
class LinearFunction(torch.autograd.Function):
'''
we will implement the linear function:
y = xW^T + b
as well as its gradient computation process
'''
@staticmethod
def forward(ctx, x, W, b):
'''
Input:
:param ctx: a context object that can be used to stash information for backward computation
:param x: input features with size [batch_size, input_size]
:param W: weight matrix with size [output_size, input_size]
:param b: bias with size [output_size]
Return:
y :output features with size [batch_size, output_size]
'''
# TODO
y = torch.matmul(x, W.T) + b
ctx.save_for_backward(x, W)
return y
@staticmethod
def backward(ctx, grad_output):
'''
Input:
:param ctx: a context object with saved variables
:param grad_output: dL/dy, with size [batch_size, output_size]
Return:
grad_input: dL/dx, with size [batch_size, input_size]
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
grad_b: dL/db, with size [output_size], summed for data in the batch
'''
x, W = ctx.saved_variables
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
# calculate dL/dW by using dL/dy (grad_output) and x
# calculate dL/db using dL/dy (grad_output)
# you can use torch.matmul(A, B) to compute matrix product of A and B
# TODO
grad_input = torch.matmul(grad_output, W)
grad_W = torch.matmul(grad_output.T, x)
grad_b = grad_output.sum(0)
return grad_input, grad_W, grad_b
# TODO 2: complete the forward and backward propagation processes of the hinge loss
class Hinge(torch.autograd.Function):
@staticmethod
def forward(ctx, output, W, label, C):
"""
Compute the hinge loss
--------------------------------------
:param ctx: a context object that can be used to stash information for backward computation
:param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
:param W: weight matrix with size [1, input_size]
:param label: the ground truth y in the equation for loss calculation, with size [batch_size]
:param C: the regularization coefficient of hinge loss with size [1, 1]
:return: the hinge loss with size [1, 1]
"""
C = C.type_as(W)
# TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
# you may need F.relu() to implement the max() function.
# print("output size", output.size())
# print("label size", label.size())
# print("product", label * output.reshape_as(label))
# print("minus", 1 - label * output.reshape_as(label))
# print("relu", F.relu(1 - label * output.reshape_as(label)))
# print("sum", (F.relu(1 - label * output.reshape_as(label))).sum())
loss = 1/2 * (W @ W.T) + C * (F.relu(1 - (output.T * label).T)).sum()
ctx.save_for_backward(output, W, label, C)
return loss
@staticmethod
def backward(ctx, grad_loss):
"""
Compute the gradient of hinge loss
:param ctx: a context object with saved variables
:param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
:return:
grad_output: dL/doutput, with size [batch_size, 1]
grad_W: dL/dW, with size [1, channels]
"""
output, W, label, C = ctx.saved_tensors
# TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
# print("output", output, "label", label, "product", (1 - label.reshape_as(output) * output))
# print("grad_loss size", grad_loss.size())
# print("sizeof l / output", (C * torch.heaviside(1 - label.reshape_as(output) * output, torch.tensor(0).type_as(output)) * (-label.reshape_as(output))).size())
grad_output = grad_loss * C * ((torch.heaviside(1 - (output.T * label).T, torch.tensor(1).type_as(output)).T * (-label))).T
grad_W = grad_loss * W
return grad_output, grad_W, None, None
# TODO 3: complete the structure of SVM model
class SVM_HINGE(nn.Module):
def __init__(self, in_channels, C):
"""
:param in_channels: number of feature channels for SVM input
:param C: regularization coefficient of hinge loss with size [1, 1]
"""
super().__init__()
# TODO: define the parameters W and b
"""
the shape of W should be [1, channels] and the shape of b should be [1, ]
you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
please use torch.randn() to initialize W and b
"""
self.W = nn.Parameter(torch.rand(1, in_channels), requires_grad=True)
self.b = nn.Parameter(torch.rand(1, ), requires_grad=True)
self.C = torch.tensor([[C]], requires_grad=False)
def forward(self, x, label=None):
# SVM calculation
output = LinearFunction.apply(x, self.W, self.b)
if label is not None:
loss = Hinge.apply(output, self.W, label, self.C)
else:
loss = None
output = (output > 0.0).type_as(x) * 2.0 - 1.0
return output, loss

110
hw3/code/test_svm.py Normal file
View File

@@ -0,0 +1,110 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# test_svm.py - Test svm model for traffic sign
# Student ID: 2022010639
# Name: Yixuan Gao
# Tsinghua University
# (C) Copyright 2024
# ========================================================
# ==== Part 1: import libs
import argparse
import torch
from datasets import Traffic_Dataset
from svm_hw import SVM_HINGE
from torch.utils.data import DataLoader
import os.path
# ==== Part 2: testing
def test(
data_root,
model_save_path,
device,
):
"""
The main testing procedure of SVM model
----------------------------
:param data_root: path to the root directory of dataset
:param model_save_path: path to pretrained SVM model
:param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
"""
# TODO 1: =================== load the pretrained SVM model ==================================
# TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
test_data = Traffic_Dataset(os.path.join(data_root, 'test.pt'))
test_loader = DataLoader(test_data, shuffle=False)
# TODO: load state dictionary of pretrained SVM model
model_svm = torch.load(os.path.join(model_save_path))
# TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
svm = SVM_HINGE(model_svm["configs"]["feature_channel"], model_svm["configs"]["C"])
# TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
svm.load_state_dict(model_svm["state_dict"])
# TODO: put the model on CPU or GPU
svm.to(device)
# TODO 2 : ================================ testing ==============================================
# TODO: set the model in evaluation mode
svm.eval()
# to calculate and save the testing accuracy
n_correct = 0. # number of images that are correctly classified
n_feas = 0. # number of total images
with torch.no_grad(): # we do not need to compute gradients during validation
# TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
for input, label in test_loader:
# TODO: set data type (.float()) and device (.to())
input, label = (
input.type(torch.float).to(device),
label.type(torch.float).to(device)
)
# TODO: run the model; at the validation step, the model only needs one input: feas
# _ refers to a placeholder, which means we do not need the second returned value during validating
out, _ = svm(input)
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
n_correct += (out.reshape_as(label) == label).sum().item()
# TODO:sum up the total image number
n_feas += label.numel()
# show prediction accuracy
acc = 100 * n_correct / n_feas
print('Test accuracy = {:.1f}%'.format(acc))
if __name__ == "__main__":
# set configurations of the testing process
parser = argparse.ArgumentParser()
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
parser.add_argument("--device", type=str, help="cpu or cuda")
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
args = parser.parse_args()
if args.device is None:
args.device = "cuda" if torch.cuda.is_available() else "cpu"
# run the testing procedure
test(
data_root=args.data_root,
model_save_path=args.model_save_path,
device=args.device,
)

296
hw3/code/train_svm.py Normal file
View File

@@ -0,0 +1,296 @@
# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# train_svm.py - Train svm model for traffic sign
# Student ID: 2022010639
# Name: Yixuan Gao
# Tsinghua University
# (C) Copyright 2024
# ========================================================
# ==== Part 1: import libs
import argparse
import matplotlib.pyplot as plt
import torch
import numpy as np
import random
from datasets import Traffic_Dataset
from svm_hw import SVM_HINGE
from torch.utils.data import DataLoader
import os.path
# ==== Part 2: training and validation
def train(
data_root,
feature_channel,
batch_size,
n_epoch,
lr,
C,
model_save_path,
device,
):
"""
The main training procedure of SVM model
----------------------------
:param data_root: path to the root directory of dataset
:param feature_channel: number of feature channels for SVM input
:param batch_size: batch size of training
:param n_epoch: number of training epochs
:param lr: learning rate
:param C: regularization coefficient in hinge loss
:param model_save_path: path to save SVM model
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
"""
# TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
train_data = Traffic_Dataset(os.path.join(data_root, 'train.pt'))
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_data = Traffic_Dataset(os.path.join(data_root, 'val.pt'))
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
# scale the regularization coefficient
C = C * len(train_loader)
# TODO: initialize the SVM model
svm = SVM_HINGE(feature_channel, C)
# TODO: put the model on CPU or GPU
svm.to(device)
# TODO: define the Adam optimizer
optimizer = torch.optim.Adam(svm.parameters(), lr)
# to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
train_loss = []
train_acc = []
val_acc = []
epochs = []
for epoch in range(n_epoch):
# TODO: save the index of current epoch in the array 'epochs'
epochs.append(epoch + 1)
# TODO 2: ========================= training =======================
# TODO: set the model in training mode
svm.train()
# to calculate and save the training loss and training accuracy
total_loss = 0. # to save total training loss in one epoch
n_correct = 0. # number of images that are correctly classified
n_feas = 0. # number of total images
# TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
# you can refer to previous homework, for example hw2
for step, (input, label) in enumerate(train_loader):
# TODO: set data type (.float()) and device (.to())
input, label = (
input.type(torch.float).to(device),
label.type(torch.float).to(device)
)
# TODO: clear gradients in the optimizer
optimizer.zero_grad()
# TODO: run the model with hinge loss; the model needs two inputs: feas and labels
out, loss = svm(input, label)
# TODO: back-propagation on the computation graph
loss.backward()
# TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
total_loss += loss.item()
# TODO: call a function to update the parameters of the models
optimizer.step()
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
n_correct += (out.reshape_as(label) == label).sum().item()
# TODO: sum up the total image number
n_feas += label.numel()
# average of the total loss for iterations
acc = 100 * n_correct / n_feas
avg_loss = total_loss / len(train_loader)
train_acc.append(acc)
train_loss.append(avg_loss)
print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
# TODO 3: ========================== Validation ======================================
# TODO: set the model in evaluation mode
svm.eval()
# to calculate and save the validation accuracy
n_correct = 0. # number of images that are correctly classified
n_feas = 0. # number of total images
with torch.no_grad(): # we do not need to compute gradients during validation
# TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
for input, label in val_loader:
# TODO: set data type (.float()) and device (.to())
input, label = (
input.type(torch.float).to(device),
label.type(torch.float).to(device)
)
# TODO: run the model; at the validation step, the model only needs one input: feas
# _ refers to a placeholder, which means we do not need the second returned value during validating
out, _ = svm(input)
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
n_correct += (out.reshape_as(label) == label).sum().item()
# TODO: sum up the total image number
n_feas += label.numel()
# show prediction accuracy
acc = 100 * n_correct / n_feas
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
val_acc.append(acc)
# save model parameters in a file
torch.save({'state_dict': svm.state_dict(),
'configs': {
'feature_channel': feature_channel,
'C': C}
}, model_save_path)
print('Model saved in {}\n'.format(model_save_path))
W = svm.W.data.cpu()
b = svm.b.data.cpu()
# TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
# 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
sv = [idx for idx, (data, label) in enumerate(zip(train_data.datas, train_data.labels)) if label * ((W @ data) + b) <= 1]
plot(train_loss, train_acc, val_acc, epochs)
plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
val_labels=val_data.labels, sv=sv, W=W, b=b)
def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
"""
Draw the samples,SVM decision boundary, and support vectors
---------------------
:param train_features: training samples with the shape of [B, 2]
:param val_features: validation samples with the shape of [B, 2]
:param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
:param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
:param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
:param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
:param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
"""
train_labels = (train_labels > 0.0).int()
val_labels = (val_labels > 0.0).int()
train_labels[sv] = 2
foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
f, ax = plt.subplots()
plt.title("training dataset")
ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
label="-1 (support vector)")
ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
label="+1 (support vector)")
x = np.linspace(-20, 20, 100)
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
ax.legend(loc="best")
plt.ylim([-30, 30])
plt.show()
f, ax = plt.subplots()
plt.title("validation dataset")
foreground_val = [i for i in range(val_labels.shape[0] // 2)]
background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
x = np.linspace(-20, 20, 100)
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
ax.legend(loc="best")
plt.ylim([-30, 30])
plt.show()
def plot(train_loss, train_acc, val_acc, epochs):
"""
Draw loss and accuracy curve
------------------
:param train_loss: a list with loss of each training epoch
:param train_acc: a list with accuracy on training dataset of each training epoch
:param val_acc: a list with accuracy on validation dataset of each training epoch
:param epochs: a list with the index of all training epochs
"""
# draw the training loss curve
f, ax = plt.subplots()
plt.title("Training Loss")
ax.plot(epochs, train_loss, color="tab:blue")
ax.set_xlabel("Training epoch")
ax.set_ylabel("Loss")
ax.legend(["training loss"], loc="best")
plt.show()
# draw the accuracy curve
f, ax = plt.subplots()
plt.title("Training and Validation Accuracy")
ax.plot(epochs, train_acc, color="tab:orange")
ax.plot(epochs, val_acc, color="tab:green")
ax.legend(["training accuracy","validation accuracy"], loc="best")
ax.set_xlabel("Training epoch")
ax.set_ylabel("Accuracy")
ax.set_ylim(0, 101)
plt.show()
if __name__ == "__main__":
# set random seed for reproducibility
seed = 2024
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
# set configurations of the model and training process
parser = argparse.ArgumentParser()
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
parser.add_argument("--device", type=str, help="cpu or cuda")
parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
args = parser.parse_args()
if args.device is None:
args.device = "cuda" if torch.cuda.is_available() else "cpu"
# run the training procedure
train(
data_root=args.data_root,
feature_channel=args.feature_channel,
batch_size=args.batch_size,
n_epoch=args.n_epoch,
lr=args.lr,
C=args.C,
model_save_path=args.model_save_path,
device=args.device,
)

132
hw3/report/dtx-style.sty Normal file
View File

@@ -0,0 +1,132 @@
%%
%% This is file `dtx-style.sty',
%% generated with the docstrip utility.
%%
%% The original source files were:
%%
%% thucoursework.dtx (with options: `dtx-style')
%%
%% This is a generated file.
%%
%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
%%
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either version 1.3
%% of this license or (at your option) any later version.
%% The latest version of this license is in
%% http://www.latex-project.org/lppl.txt
%% and version 1.3 or later is part of all distributions of LaTeX
%% version 2005/12/01 or later.
%%
%% To produce the documentation run the original source files ending with `.dtx'
%% through LaTeX.
%%
\ProvidesPackage{dtx-style}
\RequirePackage{hypdoc}
\RequirePackage[UTF8,scheme=chinese]{ctex}
\RequirePackage{newpxtext}
\RequirePackage{newpxmath}
\RequirePackage[
top=2.5cm, bottom=2.5cm,
left=4cm, right=2cm,
headsep=3mm]{geometry}
\RequirePackage{array,longtable,booktabs}
\RequirePackage{listings}
\RequirePackage{fancyhdr}
\RequirePackage{xcolor}
\RequirePackage{enumitem}
\RequirePackage{etoolbox}
\RequirePackage{metalogo}
\colorlet{thu@macro}{blue!60!black}
\colorlet{thu@env}{blue!70!black}
\colorlet{thu@option}{purple}
\patchcmd{\PrintMacroName}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
\patchcmd{\PrintDescribeMacro}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
\patchcmd{\PrintDescribeEnv}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
\patchcmd{\PrintEnvName}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
\def\DescribeOption{%
\leavevmode\@bsphack\begingroup\MakePrivateLetters%
\Describe@Option}
\def\Describe@Option#1{\endgroup
\marginpar{\raggedleft\PrintDescribeOption{#1}}%
\thu@special@index{option}{#1}\@esphack\ignorespaces}
\def\PrintDescribeOption#1{\strut \MacroFont\bfseries\sffamily\color{thu@option} #1\ }
\def\thu@special@index#1#2{\@bsphack
\begingroup
\HD@target
\let\HDorg@encapchar\encapchar
\edef\encapchar usage{%
\HDorg@encapchar hdclindex{\the\c@HD@hypercount}{usage}%
}%
\index{#2\actualchar{\string\ttfamily\space#2}
(#1)\encapchar usage}%
\index{#1:\levelchar#2\actualchar
{\string\ttfamily\space#2}\encapchar usage}%
\endgroup
\@esphack}
\lstdefinestyle{lstStyleBase}{%
basicstyle=\small\ttfamily,
aboveskip=\medskipamount,
belowskip=\medskipamount,
lineskip=0pt,
boxpos=c,
showlines=false,
extendedchars=true,
upquote=true,
tabsize=2,
showtabs=false,
showspaces=false,
showstringspaces=false,
numbers=none,
linewidth=\linewidth,
xleftmargin=4pt,
xrightmargin=0pt,
resetmargins=false,
breaklines=true,
breakatwhitespace=false,
breakindent=0pt,
breakautoindent=true,
columns=flexible,
keepspaces=true,
gobble=2,
framesep=3pt,
rulesep=1pt,
framerule=1pt,
backgroundcolor=\color{gray!5},
stringstyle=\color{green!40!black!100},
keywordstyle=\bfseries\color{blue!50!black},
commentstyle=\slshape\color{black!60}}
\lstdefinestyle{lstStyleShell}{%
style=lstStyleBase,
frame=l,
rulecolor=\color{purple},
language=bash}
\lstdefinestyle{lstStyleLaTeX}{%
style=lstStyleBase,
frame=l,
rulecolor=\color{violet},
language=[LaTeX]TeX}
\lstnewenvironment{latex}{\lstset{style=lstStyleLaTeX}}{}
\lstnewenvironment{shell}{\lstset{style=lstStyleShell}}{}
\setlist{nosep}
\DeclareDocumentCommand{\option}{m}{\textsf{#1}}
\DeclareDocumentCommand{\env}{m}{\texttt{#1}}
\DeclareDocumentCommand{\pkg}{s m}{%
\texttt{#2}\IfBooleanF#1{\thu@special@index{package}{#2}}}
\DeclareDocumentCommand{\file}{s m}{%
\texttt{#2}\IfBooleanF#1{\thu@special@index{file}{#2}}}
\newcommand{\myentry}[1]{%
\marginpar{\raggedleft\color{purple}\bfseries\strut #1}}
\newcommand{\note}[2][Note]{{%
\color{magenta}{\bfseries #1}\emph{#2}}}
\def\thucoursework{\textsc{Thu}\-\textsc{Coursework}}

153
hw3/report/iidef.sty Normal file
View File

@@ -0,0 +1,153 @@
%%
%% This is file `iidef.sty',
%% generated with the docstrip utility.
%%
%% The original source files were:
%%
%% thucoursework.dtx (with options: `sty')
%%
%% This is a generated file.
%%
%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
%%
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either version 1.3
%% of this license or (at your option) any later version.
%% The latest version of this license is in
%% http://www.latex-project.org/lppl.txt
%% and version 1.3 or later is part of all distributions of LaTeX
%% version 2005/12/01 or later.
%%
%% To produce the documentation run the original source files ending with `.dtx'
%% through LaTeX.
%%
\NeedsTeXFormat{LaTeX2e}[1999/12/01]
\ProvidesClass{iidef}
[2020/09/09 2.6 Tsinghua University Coursework Template]
%% configuration of nested enumerate env
\RequirePackage{enumitem}
%% set hwcount key-value option
\RequirePackage{kvoptions}
%% required by macro DeclareMathOperator
\RequirePackage{amsmath}
%% Set up page headers using with fancyhdr
\@ifundefined{lhead}{\RequirePackage{fancyhdr}}
{\def\@thulhead{thulhead}}
\RequirePackage{amsthm}
%% semester
\def\@term{term}
\newcommand{\theterm}[1]{\renewcommand\@term{#1}}
%% institute
\newcommand{\@courseinstitute}[1]{institute}
\newcommand{\thecourseinstitute}[1]{\renewcommand\@courseinstitute{#1}}
%% coursename
\newcommand{\@coursename}[1]{coursename}
\newcommand{\thecoursename}[1]{\renewcommand\@coursename{\textsc{#1}}}
%% user can rewrite homework name
\def\@hwname{Homework}
\def\hwname#1{\renewcommand\@hwname{#1}}
%% \iidef@thehwcnt = 1
\DeclareStringOption[1]{thehwcnt}
\ProcessKeyvalOptions*
\def\thehwcnt{\iidef@thehwcnt}
%% page header setup, distinguish between first page(plain style)
%% and second page on (runningpage style)
%%***************************************************************************
\newcommand{\courseheader}{
\thispagestyle{plain}%first page use native plain style to suppress header
\vspace*{-1in}
\begin{center}
\@courseinstitute\\
\@coursename\\
\@term
\vspace*{0.1in}
\hrule
\end{center}
\begin{center}
\underline{\bf \@hwname\;\thehwcnt} \\
\end{center}
}
\@ifundefined{@thulhead}{
\fancypagestyle{runningpage}
{
\fancyhead[L]{\small\@coursename}
\fancyhead[R]{\small\@courseinstitute}
}
%% use runningpage style from second page on
\pagestyle{runningpage}
}{}
%% *********************************************************************************************
%%name command macro
%%*************************
\newcommand{\name}[1]{
\begin{flushleft}
#1\hfill
\today
\end{flushleft}
\hrule
\vspace{2em}
\flushleft
}
%%*************************
%% enumitem related configuration
\setlist[enumerate,1]{label=\thehwcnt.\arabic*.}
\setlist[enumerate,2]{label=(\alph*)}
\setlist[enumerate,3]{label=\roman*.}
\setlist[enumerate,4]{label=\greek*}
%%******************************
\def\@slname{Solution}
\def\slname#1{\renewcommand\@slname{#1}}
\@ifundefined{solution}{
\newenvironment{solution}
{
\proof[\@slname]
}
{
%% no qed symbol in solution env
\renewcommand{\qedsymbol}{}
\endproof
}
}{}
%%******************************
%%common math symbols go here
%%*************************************************
\def\v#1{\underline{#1}}
\newcommand{\uc}{\underline{c}} % c, vec
\newcommand{\uv}{\underline{v}} % x, vec
\newcommand{\uw}{\underline{w}} % w, vec
\newcommand{\ux}{\underline{x}} % x, vec
\newcommand{\uy}{\underline{y}} % y, vec
\newcommand{\uz}{\underline{z}} % z, vec
\newcommand{\um}{\underline{m}} % m, vec
\newcommand{\rvx}{\mathsf{x}} % x, r.v.
\newcommand{\rvy}{\mathsf{y}} % y, r.v.
\newcommand{\rvz}{\mathsf{z}} % z, r.v.
\newcommand{\rvw}{\mathsf{w}} % w, r.v.
\newcommand{\rvH}{\mathsf{H}} % H, r.v.
\newcommand{\urvx}{\underline{\mathsf{x}}} % x, r.v. vec
\newcommand{\urvy}{\underline{\mathsf{y}}} % y, r.v. vec
\newcommand{\urvz}{\underline{\mathsf{z}}} % z, r.v. vec
\newcommand{\urvw}{\underline{\mathsf{w}}} % w, r.v. vec
\newcommand{\defas}{\triangleq} %\coloneqq
\newcommand{\reals}{\mathbb{R}}
\newcommand{\TT}{\mathrm{T}} % transpose
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argsup}{arg\,sup}
\DeclareMathOperator*{\arginf}{arg\,inf}
\DeclareMathOperator{\diag}{diag}
\DeclareMathOperator{\Var}{Var}
\DeclareMathOperator{\Cov}{Cov}
\DeclareMathOperator{\MSE}{MSE}
\DeclareMathOperator{\1}{\mathds{1}}
\DeclareMathOperator{\In}{\mathbb{I}}
\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Prob}{\mathbb{P}}
\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
%%************************************************************************************

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

379
hw3/report/main.tex Normal file
View File

@@ -0,0 +1,379 @@
% Homework Template
\documentclass[a4paper]{article}
\usepackage{ctex}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{moreenum}
\usepackage{mathtools}
\usepackage{url}
\usepackage{bm}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{booktabs} % toprule
\usepackage[mathcal]{eucal}
\usepackage[thehwcnt = 3]{iidef}
\usepackage{listings}
\usepackage{fontspec}
\usepackage{xcolor}
\usepackage{float}
\usepackage{siunitx}
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
\newfontfamily\cascadia{Cascadia Code}
\lstset{
basicstyle = \small\codefont,
% ---
tabsize = 4,
showstringspaces = false,
numbers = left,
numberstyle = \codefont,
% ---
breaklines = true,
captionpos = t,
% ---
frame = l,
flexiblecolumns,
}
\lstdefinestyle{Python}{
language = Python, % 语言选Python
keywordstyle = \color{blue},
keywordstyle = [2] \color{teal},
stringstyle = \color{orange!80!black},
commentstyle = \color{red},
identifierstyle = \color{blue!80!white},
}
\lstdefinestyle{Bash}{
language = bash
}
\thecourseinstitute{清华大学电子工程系}
\thecoursename{\textbf{媒体与认知}}
\theterm{2023-2024学年春季学期}
\hwname{作业}
\begin{document}
\courseheader
% 请在YOUR NAME处填写自己的姓名
\name{高艺轩}
\vspace{3mm}
\centerline{\textbf{\Large{理论部分}}}
\section{单选题15分}
% 请在?处填写答案
\subsection{\underline{D}}
\subsection{\underline{C}}
\subsection{\underline{D}}
\subsection{\underline{D}}
\subsection{\underline{B}}
\section{计算题15 分)}
\subsection{给定两个类别的样本分别为:
\begin{align*}
&\omega_1:\{(3,1),(2,2),(4,3),(3,2)\} \\
&\omega_2:\{(1,3),(1,2),(-1,1),(-1,2)\}
\end{align*}
试利用LDA将样本特征维数压缩为一维。
}
\begin{proof}[解]
首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
\[S_1 = \frac{1}{4}
\left(
\begin{bmatrix}
0 & 0\\
0 & 1
\end{bmatrix}
+
\begin{bmatrix}
1 & 0\\
0 & 0
\end{bmatrix}
+
\begin{bmatrix}
1 & 1\\
1 & 1
\end{bmatrix}
+
\begin{bmatrix}
0 & 0\\
0 & 0
\end{bmatrix}
\right)
=
\begin{bmatrix}
0.5 & 0.25\\
0.25 & 0.5
\end{bmatrix}\]
\[S_2 = \frac{1}{4}
\left(
\begin{bmatrix}
0 & 0\\
0 & 1
\end{bmatrix}
+
\begin{bmatrix}
1 & 0\\
0 & 0
\end{bmatrix}
+
\begin{bmatrix}
1 & 1\\
1 & 1
\end{bmatrix}
+
\begin{bmatrix}
1 & 0\\
0 & 0
\end{bmatrix}
\right)
=
\begin{bmatrix}
0.75 & 0.25\\
0.25 & 0.5
\end{bmatrix}\]
进一步地,
\[S_w = \frac{1}{2} (S_1 + S_2) =
\begin{bmatrix}
0.625 & 0.25\\
0.25 & 0.5
\end{bmatrix}\]
\[S_b = \frac{1}{2} \left(
\begin{bmatrix}
2.25 & 0\\
0 & 0
\end{bmatrix}
+
\begin{bmatrix}
2.25 & 0\\
0 & 0
\end{bmatrix}
\right)
=
\begin{bmatrix}
2.25 & 0\\
0 & 0
\end{bmatrix}\]
广义特征值分解得到$\lambda = 4.5$$v = (0.8944, -0.4472)$。投影后的样本为
\[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
\[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
\end{proof}
\vspace{3mm}
\subsection{模型训练通常需要大量的数据假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效其中无效数据被成功判别为无效数据的概率为90\%而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
\begin{proof}[解]
\begin{align*}
& P(\text{无效数据} \mid \text{判定无效})\\
= & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
= & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
= & \frac{0.18}{0.18 + 0.04}\\
= & \frac{9}{11}
\end{align*}
\end{proof}
\vspace{3mm}
\subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
4 & 2 \\
2 & \frac{4}{3}
\end{array} \right]$$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。}
\begin{proof}[解]
\[\Sigma^{-1} = \begin{bmatrix}
1 & -1.5\\
-1.5 & 3
\end{bmatrix}\]
决策方程
\[g_{LDF1} = \Sigma^{-1} \mu_1 \boldsymbol{x} + -\frac{1}{2} \mu_1^T \Sigma^{-1} \mu_1 = (3.5, -1) \boldsymbol{x} - 6.5\]
类似地可以得到
\[g_{LDF2} = (-0.5, 1.5) \boldsymbol{x} - 0.5\]
因此分类界面为
\begin{align*}
(3.5, -1) \boldsymbol{x} - 6.5 & = (-0.5, 1.5) \boldsymbol{x} - 0.5\\
(4, -2.5) \boldsymbol{x} & = 6
\end{align*}
对于$(6, 2)$,计算$g_{LDF1}((6, 2)) = 12.5$$g_{LDF2}((6, 2)) = -0.5$,因此属于第一类。
\end{proof}
\vspace{3mm}
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足
\begin{equation*}
\begin{aligned}
\phi_1(\mathbf{x})&=2(x_1-0.5) \\
\phi_2(\mathbf{x})&=4(x_1-0.5)(x_2-0.5)
\end{aligned}
\end{equation*}
\\
\qquad(1) 给出映射后的样本集;\\
\qquad(2) 在映射后的样本集中设计一个线性SVM分类器给出支持向量及分类界面。
}
\begin{proof}[解]
映射后的样本集
\[D_{\phi} = \left\lbrace\left((-1, 1)^T, -1\right), \left((-1, -1)^T, 1\right), \left((1, -1)^T, 1\right), \left((1, 1)^T, -1\right)\right\rbrace\]
待优化的问题为
\[L(\boldsymbol{\alpha}) = \sum_{i = 1}^4 \alpha_i - \frac{1}{2} \sum_{i = 1}^4 \sum_{j = 1}^4 \alpha_i \alpha_j y_i y_j \boldsymbol{x}_i^T \boldsymbol{x}_j\]
因此
\begin{align*}
\frac{\partial L}{\partial \alpha_1} & = 1 - \frac{1}{2}\sum_{i \neq 1}^4 \alpha_i y_1 y_i \boldsymbol{x}_1^T \boldsymbol{x}_i - 2 \alpha_1 y_1 y_1 \boldsymbol{x}_1^T \boldsymbol{x}_1\\
& = 1 - 2 \alpha_3 - 4 \alpha_1\\
\frac{\partial L}{\partial \alpha_2} & = 1 - 2\alpha_4 - 4 \alpha_2\\
\frac{\partial L}{\partial \alpha_3} & = 1 - 2 \alpha_1 - 4 \alpha_3\\
\frac{\partial L}{\partial \alpha_4} & = 1 - 2 \alpha_3 - 4 \alpha_4
\end{align*}
令四个偏导数均为0得到$\alpha_1 = \alpha_2 = \alpha_3 = \alpha_4 = \frac{1}{6}$。全部的点均为支持向量。因此
\[\boldsymbol{w} = \sum_{i = 1}^4 \alpha_i y_i \boldsymbol{x}_i = \left(0, -\frac{2}{3}\right)\]
为求偏置量,带入$\boldsymbol{x}_1$
\[(-1) (\boldsymbol{w}^T \boldsymbol{x}_1 + b) = 1\]
得到$b = -\frac{1}{3}$
分类界面$\boldsymbol{w}^T \boldsymbol{x} + b = 0$,即
\[\begin{bmatrix}
0\\-\frac{2}{3}
\end{bmatrix} \boldsymbol{x} - \frac{1}{3} = 0\]
得到$x_2 = \frac{1}{2}$,因此在原空间中,
\[4(x_1 - 0.5)(x_2 - 0.5) = 0.5\]
\end{proof}
\vspace{3mm}
\subsection{使用KMeans算法对2维空间中的6个点$(0,2)$,$(2,0)$,$(2,3)$,$(3,2)$,$(4,0)$,$(5,4)$进行聚类,距离函数选择欧氏距离$d=\sqrt{(x_1-x_2)^2+(y_1-y_2)^2}$\\
\qquad (1)起始聚类中心选择(0,0)和(4,3),计算聚类中心;\\
\qquad (2)起始聚类中心选择(1,4)和(3,1),计算聚类中心。\\
}
\begin{proof}[解]
中心选择$(0, 0), (4, 3)$,第一次分为$(0, 2), (2,0)$$(2, 3), (3, 2), (4, 0), (5, 4)$,更新后的中心为$(1, 1)$$\left(\frac{7}{2}, \frac{9}{4}\right)$。收敛。
中心选择$(1, 4)$$(3, 1)$,第一次分为$(0, 2), (2, 3)$$(2, 0), (4, 0), (3, 2), (5, 4)$,更新后中心为$(1, \frac{5}{2})$$(\frac{7}{2}, \frac{3}{2})$,收敛。
\end{proof}
\vspace{3mm}
\centerline{\textbf{\Large{编程部分}}}
\vspace{3mm}
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题进度汇报”中的一项完成
\section{编程作业报告}
\subsection{程序验证}
与助教给出的图片相比我写出的程序PCA得到的结果的xy坐标都在$[-1, 1]$之间不利于之后的分类。我将所有的PCA之后的坐标都扩大了20倍。
运行\lstinline{check.py}进行检查:
\begin{figure}[H]
\centering
\includegraphics[width=\linewidth]{img/check/check.png}
\end{figure}
\subsection{数据预处理}
运行
\begin{lstlisting}[style=Bash]
python data_preprocess.py
\end{lstlisting}
得到的输出为
\begin{figure}[H]
\centering
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/preprocess/preprocess_train.png}
\caption{训练集preprocess结果}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/preprocess/preprocess_val.png}
\caption{验证集preprocess结果}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/preprocess/preprocess_test.png}
\caption{测试集preprocess结果}
\end{subfigure}
\end{figure}
\subsection{训练、验证及测试}
\begin{figure}[H]
\centering
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/default/loss.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/default/train_accu.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/default/sv.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/default/val.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.8\linewidth}
\includegraphics[width=\textwidth]{img/train/default/test.png}
\end{subfigure}
\end{figure}
\subsection{调整正则化系数}
\subsubsection{C = \num{1e-6}}
\begin{figure}[H]
\centering
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1e-6/loss.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1e-6/accu.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1e-6/sv.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1e-6/val.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.8\linewidth}
\includegraphics[width=\textwidth]{img/train/1e-6/test.png}
\end{subfigure}
\end{figure}
可以看到出现了严重的欠拟合分类界面超出了绘图的范围。这是因为C过小导致不能正确地分辨合适的分类界面。
\subsubsection{C = 1}
\begin{figure}[H]
\centering
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1/loss.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1/accu.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1/sv.png}
\end{subfigure}
\hspace{0.5cm}
\begin{subfigure}[t]{.45\linewidth}
\includegraphics[width=\textwidth]{img/train/1/val.png}
\end{subfigure}\\[2ex]
\begin{subfigure}[t]{.8\linewidth}
\includegraphics[width=\textwidth]{img/train/1/test.png}
\end{subfigure}
\end{figure}
发生了过拟合,直线被交界面的点限制,斜率不是最优。
\end{document}
%%% Local Variables:
%%% mode: late\rvx
%%% TeX-master: t
%%% End:

2
j.ps1
View File

@@ -1 +1 @@
cd ./hw2/code
cd ./hw3/code

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -152,6 +152,66 @@
"print(conv_1(a).size())\n",
"print(conv_2(conv_1(a)).size())\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([0., 1.])\n",
"1\n"
]
}
],
"source": [
"a = torch.Tensor([1.0, 2.0])\n",
"b = torch.Tensor([1.0, 1.0])\n",
"print((a > b).type_as(a))\n",
"print((a == b).sum().item())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.5000)\n"
]
}
],
"source": [
"a = torch.Tensor([[1.0, 2.0], [3.0, 4.0]])\n",
"mu = a.mean(dim=0)\n",
"print(mu, a - mu)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[5.],\n",
" [4.]])\n"
]
}
],
"source": [
"a = torch.Tensor([[5], [4]])\n",
"b = torch.Tensor([1])\n",
"print((a.T * b).T)"
]
}
],
"metadata": {