diff --git a/.gitignore b/.gitignore index a2e4f14..86532cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ -*.zip -__pycache__/ -*.pth -*.log -*.aux -*.synctex.gz -*.synctex.gz(buzy) -*.out -*.pdf -.DS_Store -hw2/code/checkpoints/ +*.zip +__pycache__/ +*.pth +*.log +*.aux +*.synctex.gz +*.synctex.gz(buzy) +*.out +*.pdf +.DS_Store +hw2/code/checkpoints/ hw2/code/visualized/ \ No newline at end of file diff --git a/hw1/.vscode/settings.json b/hw1/.vscode/settings.json index dcb1530..1363ec0 100644 --- a/hw1/.vscode/settings.json +++ b/hw1/.vscode/settings.json @@ -1,4 +1,4 @@ -{ - "python.analysis.typeCheckingMode": "basic", - "python.analysis.autoImportCompletions": true +{ + "python.analysis.typeCheckingMode": "basic", + "python.analysis.autoImportCompletions": true } \ No newline at end of file diff --git a/hw1/HW1-Report/codes/1.1.out.txt b/hw1/HW1-Report/codes/1.1.out.txt index 52893e4..97cd212 100644 --- a/hw1/HW1-Report/codes/1.1.out.txt +++ b/hw1/HW1-Report/codes/1.1.out.txt @@ -1,56 +1,56 @@ -Epoch 01: loss = inf -Epoch 02: loss = inf -Epoch 03: loss = 6.678 -Epoch 04: loss = 4.361 -Epoch 05: loss = 3.110 -Epoch 06: loss = 2.099 -Epoch 07: loss = 1.698 -Epoch 08: loss = 1.320 -Epoch 09: loss = 0.970 -Epoch 10: loss = 0.891 -Epoch 10: validation accuracy = 66.0% -Epoch 11: loss = 0.817 -Epoch 12: loss = 0.723 -Epoch 13: loss = 0.512 -Epoch 14: loss = 0.353 -Epoch 15: loss = 0.202 -Epoch 16: loss = 0.182 -Epoch 17: loss = 0.184 -Epoch 18: loss = 0.191 -Epoch 19: loss = 0.175 -Epoch 20: loss = 0.166 -Epoch 20: validation accuracy = 68.0% -Epoch 21: loss = 0.146 -Epoch 22: loss = 0.105 -Epoch 23: loss = 0.109 -Epoch 24: loss = 0.074 -Epoch 25: loss = 0.097 -Epoch 26: loss = 0.047 -Epoch 27: loss = 0.038 -Epoch 28: loss = 0.037 -Epoch 29: loss = 0.024 -Epoch 30: loss = 0.021 -Epoch 30: validation accuracy = 68.8% -Epoch 31: loss = 0.019 -Epoch 32: loss = 0.024 -Epoch 33: loss = 0.023 -Epoch 34: loss = 0.014 -Epoch 35: loss = 0.013 -Epoch 36: loss = 0.012 -Epoch 37: loss = 0.011 -Epoch 38: loss = 0.013 -Epoch 39: loss = 0.013 -Epoch 40: loss = 0.016 -Epoch 40: validation accuracy = 70.5% -Epoch 41: loss = 0.015 -Epoch 42: loss = 0.009 -Epoch 43: loss = 0.011 -Epoch 44: loss = 0.008 -Epoch 45: loss = 0.008 -Epoch 46: loss = 0.010 -Epoch 47: loss = 0.009 -Epoch 48: loss = 0.007 -Epoch 49: loss = 0.007 -Epoch 50: loss = 0.010 -Epoch 50: validation accuracy = 70.5% +Epoch 01: loss = inf +Epoch 02: loss = inf +Epoch 03: loss = 6.678 +Epoch 04: loss = 4.361 +Epoch 05: loss = 3.110 +Epoch 06: loss = 2.099 +Epoch 07: loss = 1.698 +Epoch 08: loss = 1.320 +Epoch 09: loss = 0.970 +Epoch 10: loss = 0.891 +Epoch 10: validation accuracy = 66.0% +Epoch 11: loss = 0.817 +Epoch 12: loss = 0.723 +Epoch 13: loss = 0.512 +Epoch 14: loss = 0.353 +Epoch 15: loss = 0.202 +Epoch 16: loss = 0.182 +Epoch 17: loss = 0.184 +Epoch 18: loss = 0.191 +Epoch 19: loss = 0.175 +Epoch 20: loss = 0.166 +Epoch 20: validation accuracy = 68.0% +Epoch 21: loss = 0.146 +Epoch 22: loss = 0.105 +Epoch 23: loss = 0.109 +Epoch 24: loss = 0.074 +Epoch 25: loss = 0.097 +Epoch 26: loss = 0.047 +Epoch 27: loss = 0.038 +Epoch 28: loss = 0.037 +Epoch 29: loss = 0.024 +Epoch 30: loss = 0.021 +Epoch 30: validation accuracy = 68.8% +Epoch 31: loss = 0.019 +Epoch 32: loss = 0.024 +Epoch 33: loss = 0.023 +Epoch 34: loss = 0.014 +Epoch 35: loss = 0.013 +Epoch 36: loss = 0.012 +Epoch 37: loss = 0.011 +Epoch 38: loss = 0.013 +Epoch 39: loss = 0.013 +Epoch 40: loss = 0.016 +Epoch 40: validation accuracy = 70.5% +Epoch 41: loss = 0.015 +Epoch 42: loss = 0.009 +Epoch 43: loss = 0.011 +Epoch 44: loss = 0.008 +Epoch 45: loss = 0.008 +Epoch 46: loss = 0.010 +Epoch 47: loss = 0.009 +Epoch 48: loss = 0.007 +Epoch 49: loss = 0.007 +Epoch 50: loss = 0.010 +Epoch 50: validation accuracy = 70.5% Model saved in ./saved_models/default.pth \ No newline at end of file diff --git a/hw1/HW1-Report/codes/1.2.out.txt b/hw1/HW1-Report/codes/1.2.out.txt index b7b6896..d781b23 100644 --- a/hw1/HW1-Report/codes/1.2.out.txt +++ b/hw1/HW1-Report/codes/1.2.out.txt @@ -1,2 +1,2 @@ -[Info] Load model from .\saved_models\default.pth +[Info] Load model from .\saved_models\default.pth [Info] Test accuracy = 72.0% \ No newline at end of file diff --git a/hw1/HW1-Report/codes/2.2.out.txt b/hw1/HW1-Report/codes/2.2.out.txt index 5e58b96..69b45df 100644 --- a/hw1/HW1-Report/codes/2.2.out.txt +++ b/hw1/HW1-Report/codes/2.2.out.txt @@ -1,2 +1,2 @@ -[Info] Load model from .\saved_models\adam_optim.pth +[Info] Load model from .\saved_models\adam_optim.pth [Info] Test accuracy = 85.0% \ No newline at end of file diff --git a/hw1/HW1-Report/codes/adam_optim_cuda.out.txt b/hw1/HW1-Report/codes/adam_optim_cuda.out.txt index 61e159d..cebf13b 100644 --- a/hw1/HW1-Report/codes/adam_optim_cuda.out.txt +++ b/hw1/HW1-Report/codes/adam_optim_cuda.out.txt @@ -1,56 +1,56 @@ -Epoch 01: loss = inf -Epoch 02: loss = inf -Epoch 03: loss = inf -Epoch 04: loss = inf -Epoch 05: loss = inf -Epoch 06: loss = inf -Epoch 07: loss = inf -Epoch 08: loss = inf -Epoch 09: loss = 3.250 -Epoch 10: loss = 2.567 -Epoch 10: validation accuracy = 59.0% -Epoch 11: loss = 1.963 -Epoch 12: loss = 1.558 -Epoch 13: loss = 1.320 -Epoch 14: loss = 0.911 -Epoch 15: loss = 0.808 -Epoch 16: loss = 0.932 -Epoch 17: loss = 0.861 -Epoch 18: loss = 0.748 -Epoch 19: loss = 0.783 -Epoch 20: loss = 0.809 -Epoch 20: validation accuracy = 65.5% -Epoch 21: loss = 0.678 -Epoch 22: loss = 0.757 -Epoch 23: loss = 0.747 -Epoch 24: loss = 0.660 -Epoch 25: loss = 0.536 -Epoch 26: loss = 0.506 -Epoch 27: loss = 0.577 -Epoch 28: loss = 0.600 -Epoch 29: loss = 0.681 -Epoch 30: loss = 0.604 -Epoch 30: validation accuracy = 68.0% -Epoch 31: loss = 0.552 -Epoch 32: loss = 0.671 -Epoch 33: loss = 0.604 -Epoch 34: loss = 0.600 -Epoch 35: loss = 0.818 -Epoch 36: loss = 0.659 -Epoch 37: loss = 0.375 -Epoch 38: loss = 0.380 -Epoch 39: loss = 0.418 -Epoch 40: loss = 0.431 -Epoch 40: validation accuracy = 73.5% -Epoch 41: loss = 0.551 -Epoch 42: loss = 0.488 -Epoch 43: loss = 0.350 -Epoch 44: loss = 0.287 -Epoch 45: loss = 0.294 -Epoch 46: loss = 0.463 -Epoch 47: loss = 0.438 -Epoch 48: loss = 0.392 -Epoch 49: loss = 0.325 -Epoch 50: loss = 0.332 -Epoch 50: validation accuracy = 80.8% +Epoch 01: loss = inf +Epoch 02: loss = inf +Epoch 03: loss = inf +Epoch 04: loss = inf +Epoch 05: loss = inf +Epoch 06: loss = inf +Epoch 07: loss = inf +Epoch 08: loss = inf +Epoch 09: loss = 3.250 +Epoch 10: loss = 2.567 +Epoch 10: validation accuracy = 59.0% +Epoch 11: loss = 1.963 +Epoch 12: loss = 1.558 +Epoch 13: loss = 1.320 +Epoch 14: loss = 0.911 +Epoch 15: loss = 0.808 +Epoch 16: loss = 0.932 +Epoch 17: loss = 0.861 +Epoch 18: loss = 0.748 +Epoch 19: loss = 0.783 +Epoch 20: loss = 0.809 +Epoch 20: validation accuracy = 65.5% +Epoch 21: loss = 0.678 +Epoch 22: loss = 0.757 +Epoch 23: loss = 0.747 +Epoch 24: loss = 0.660 +Epoch 25: loss = 0.536 +Epoch 26: loss = 0.506 +Epoch 27: loss = 0.577 +Epoch 28: loss = 0.600 +Epoch 29: loss = 0.681 +Epoch 30: loss = 0.604 +Epoch 30: validation accuracy = 68.0% +Epoch 31: loss = 0.552 +Epoch 32: loss = 0.671 +Epoch 33: loss = 0.604 +Epoch 34: loss = 0.600 +Epoch 35: loss = 0.818 +Epoch 36: loss = 0.659 +Epoch 37: loss = 0.375 +Epoch 38: loss = 0.380 +Epoch 39: loss = 0.418 +Epoch 40: loss = 0.431 +Epoch 40: validation accuracy = 73.5% +Epoch 41: loss = 0.551 +Epoch 42: loss = 0.488 +Epoch 43: loss = 0.350 +Epoch 44: loss = 0.287 +Epoch 45: loss = 0.294 +Epoch 46: loss = 0.463 +Epoch 47: loss = 0.438 +Epoch 48: loss = 0.392 +Epoch 49: loss = 0.325 +Epoch 50: loss = 0.332 +Epoch 50: validation accuracy = 80.8% Model saved in .\saved_models\adam_optim_cuda.pth \ No newline at end of file diff --git a/hw1/HW1-Report/codes/self_test.out.txt b/hw1/HW1-Report/codes/self_test.out.txt index 9b1d2ee..9897dd1 100644 --- a/hw1/HW1-Report/codes/self_test.out.txt +++ b/hw1/HW1-Report/codes/self_test.out.txt @@ -1,2 +1,2 @@ -[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth +[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth [Info] Test accuracy = 88.8% \ No newline at end of file diff --git a/hw1/HW1-Report/codes/self_train.out.txt b/hw1/HW1-Report/codes/self_train.out.txt index b866807..b221f6d 100644 --- a/hw1/HW1-Report/codes/self_train.out.txt +++ b/hw1/HW1-Report/codes/self_train.out.txt @@ -1,111 +1,111 @@ -Epoch 01: loss = inf -Epoch 02: loss = inf -Epoch 03: loss = inf -Epoch 04: loss = inf -Epoch 05: loss = inf -Epoch 06: loss = inf -Epoch 07: loss = inf -Epoch 08: loss = inf -Epoch 09: loss = inf -Epoch 10: loss = inf -Epoch 10: validation accuracy = 40.2% -Epoch 11: loss = inf -Epoch 12: loss = inf -Epoch 13: loss = inf -Epoch 14: loss = inf -Epoch 15: loss = inf -Epoch 16: loss = inf -Epoch 17: loss = 2.360 -Epoch 18: loss = 2.086 -Epoch 19: loss = 1.684 -Epoch 20: loss = 1.453 -Epoch 20: validation accuracy = 53.0% -Epoch 21: loss = 1.174 -Epoch 22: loss = 1.046 -Epoch 23: loss = 0.859 -Epoch 24: loss = 0.740 -Epoch 25: loss = 0.663 -Epoch 26: loss = 0.495 -Epoch 27: loss = 0.566 -Epoch 28: loss = 0.521 -Epoch 29: loss = 0.470 -Epoch 30: loss = 0.363 -Epoch 30: validation accuracy = 59.0% -Epoch 31: loss = 0.365 -Epoch 32: loss = 0.305 -Epoch 33: loss = 0.333 -Epoch 34: loss = 0.293 -Epoch 35: loss = 0.191 -Epoch 36: loss = 0.295 -Epoch 37: loss = 0.275 -Epoch 38: loss = 0.461 -Epoch 39: loss = 0.509 -Epoch 40: loss = 0.298 -Epoch 40: validation accuracy = 65.2% -Epoch 41: loss = 0.186 -Epoch 42: loss = 0.395 -Epoch 43: loss = 0.323 -Epoch 44: loss = 0.309 -Epoch 45: loss = 0.199 -Epoch 46: loss = 0.285 -Epoch 47: loss = 0.290 -Epoch 48: loss = 0.302 -Epoch 49: loss = 0.235 -Epoch 50: loss = 0.190 -Epoch 50: validation accuracy = 71.2% -Epoch 51: loss = 0.294 -Epoch 52: loss = 0.311 -Epoch 53: loss = 0.254 -Epoch 54: loss = 0.289 -Epoch 55: loss = 0.264 -Epoch 56: loss = 0.213 -Epoch 57: loss = 0.166 -Epoch 58: loss = 0.218 -Epoch 59: loss = 0.231 -Epoch 60: loss = 0.283 -Epoch 60: validation accuracy = 74.8% -Epoch 61: loss = 0.324 -Epoch 62: loss = 0.245 -Epoch 63: loss = 0.277 -Epoch 64: loss = 0.286 -Epoch 65: loss = 0.255 -Epoch 66: loss = 0.263 -Epoch 67: loss = 0.272 -Epoch 68: loss = 0.272 -Epoch 69: loss = 0.260 -Epoch 70: loss = 0.271 -Epoch 70: validation accuracy = 79.0% -Epoch 71: loss = 0.310 -Epoch 72: loss = 0.301 -Epoch 73: loss = 0.305 -Epoch 74: loss = 0.311 -Epoch 75: loss = 0.329 -Epoch 76: loss = 0.295 -Epoch 77: loss = 0.300 -Epoch 78: loss = 0.316 -Epoch 79: loss = 0.326 -Epoch 80: loss = 0.352 -Epoch 80: validation accuracy = 77.5% -Epoch 81: loss = 0.344 -Epoch 82: loss = 0.326 -Epoch 83: loss = 0.326 -Epoch 84: loss = 0.335 -Epoch 85: loss = 0.342 -Epoch 86: loss = 0.361 -Epoch 87: loss = 0.337 -Epoch 88: loss = 0.339 -Epoch 89: loss = 0.339 -Epoch 90: loss = 0.341 -Epoch 90: validation accuracy = 82.8% -Epoch 91: loss = 0.350 -Epoch 92: loss = 0.359 -Epoch 93: loss = 0.352 -Epoch 94: loss = 0.363 -Epoch 95: loss = 0.347 -Epoch 96: loss = 0.341 -Epoch 97: loss = 0.336 -Epoch 98: loss = 0.348 -Epoch 99: loss = 0.365 -Epoch 100: loss = 0.350 -Epoch 100: validation accuracy = 85.2% +Epoch 01: loss = inf +Epoch 02: loss = inf +Epoch 03: loss = inf +Epoch 04: loss = inf +Epoch 05: loss = inf +Epoch 06: loss = inf +Epoch 07: loss = inf +Epoch 08: loss = inf +Epoch 09: loss = inf +Epoch 10: loss = inf +Epoch 10: validation accuracy = 40.2% +Epoch 11: loss = inf +Epoch 12: loss = inf +Epoch 13: loss = inf +Epoch 14: loss = inf +Epoch 15: loss = inf +Epoch 16: loss = inf +Epoch 17: loss = 2.360 +Epoch 18: loss = 2.086 +Epoch 19: loss = 1.684 +Epoch 20: loss = 1.453 +Epoch 20: validation accuracy = 53.0% +Epoch 21: loss = 1.174 +Epoch 22: loss = 1.046 +Epoch 23: loss = 0.859 +Epoch 24: loss = 0.740 +Epoch 25: loss = 0.663 +Epoch 26: loss = 0.495 +Epoch 27: loss = 0.566 +Epoch 28: loss = 0.521 +Epoch 29: loss = 0.470 +Epoch 30: loss = 0.363 +Epoch 30: validation accuracy = 59.0% +Epoch 31: loss = 0.365 +Epoch 32: loss = 0.305 +Epoch 33: loss = 0.333 +Epoch 34: loss = 0.293 +Epoch 35: loss = 0.191 +Epoch 36: loss = 0.295 +Epoch 37: loss = 0.275 +Epoch 38: loss = 0.461 +Epoch 39: loss = 0.509 +Epoch 40: loss = 0.298 +Epoch 40: validation accuracy = 65.2% +Epoch 41: loss = 0.186 +Epoch 42: loss = 0.395 +Epoch 43: loss = 0.323 +Epoch 44: loss = 0.309 +Epoch 45: loss = 0.199 +Epoch 46: loss = 0.285 +Epoch 47: loss = 0.290 +Epoch 48: loss = 0.302 +Epoch 49: loss = 0.235 +Epoch 50: loss = 0.190 +Epoch 50: validation accuracy = 71.2% +Epoch 51: loss = 0.294 +Epoch 52: loss = 0.311 +Epoch 53: loss = 0.254 +Epoch 54: loss = 0.289 +Epoch 55: loss = 0.264 +Epoch 56: loss = 0.213 +Epoch 57: loss = 0.166 +Epoch 58: loss = 0.218 +Epoch 59: loss = 0.231 +Epoch 60: loss = 0.283 +Epoch 60: validation accuracy = 74.8% +Epoch 61: loss = 0.324 +Epoch 62: loss = 0.245 +Epoch 63: loss = 0.277 +Epoch 64: loss = 0.286 +Epoch 65: loss = 0.255 +Epoch 66: loss = 0.263 +Epoch 67: loss = 0.272 +Epoch 68: loss = 0.272 +Epoch 69: loss = 0.260 +Epoch 70: loss = 0.271 +Epoch 70: validation accuracy = 79.0% +Epoch 71: loss = 0.310 +Epoch 72: loss = 0.301 +Epoch 73: loss = 0.305 +Epoch 74: loss = 0.311 +Epoch 75: loss = 0.329 +Epoch 76: loss = 0.295 +Epoch 77: loss = 0.300 +Epoch 78: loss = 0.316 +Epoch 79: loss = 0.326 +Epoch 80: loss = 0.352 +Epoch 80: validation accuracy = 77.5% +Epoch 81: loss = 0.344 +Epoch 82: loss = 0.326 +Epoch 83: loss = 0.326 +Epoch 84: loss = 0.335 +Epoch 85: loss = 0.342 +Epoch 86: loss = 0.361 +Epoch 87: loss = 0.337 +Epoch 88: loss = 0.339 +Epoch 89: loss = 0.339 +Epoch 90: loss = 0.341 +Epoch 90: validation accuracy = 82.8% +Epoch 91: loss = 0.350 +Epoch 92: loss = 0.359 +Epoch 93: loss = 0.352 +Epoch 94: loss = 0.363 +Epoch 95: loss = 0.347 +Epoch 96: loss = 0.341 +Epoch 97: loss = 0.336 +Epoch 98: loss = 0.348 +Epoch 99: loss = 0.365 +Epoch 100: loss = 0.350 +Epoch 100: validation accuracy = 85.2% Model saved in .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth \ No newline at end of file diff --git a/hw1/HW1-Report/main.tex b/hw1/HW1-Report/main.tex index 8cd8de6..f302840 100644 --- a/hw1/HW1-Report/main.tex +++ b/hw1/HW1-Report/main.tex @@ -1,244 +1,244 @@ -% Homework Template -\documentclass[a4paper]{article} -\usepackage{ctex} -\usepackage{amsmath, amssymb, amsthm} -\usepackage{moreenum} -\usepackage{mathtools} -\usepackage{url} -\usepackage{bm} -\usepackage{enumitem} -\usepackage{graphicx} -\usepackage{subcaption} -\usepackage{booktabs} % toprule -\usepackage[mathcal]{eucal} -\usepackage[thehwcnt = 1]{iidef} -\usepackage{listings} -\usepackage[x11names]{xcolor} -\usepackage{float} -\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref} - -\DeclareMathOperator{\arctanh}{arctanh} -% \DeclareMathOperator{\diag}{diag} - -\setenumerate[1]{label=(\arabic{*})} -\setenumerate[2]{label=\arabic{*})} - -\definecolor{codekeyword}{RGB}{171, 0, 216} -\definecolor{codetypename}{RGB}{29, 37, 251} -\definecolor{codevariable}{RGB}{10, 23, 126} -\definecolor{codestring}{RGB}{157, 0, 25} -\definecolor{codecomment}{RGB}{31, 129, 19} - -\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code} -% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code} -\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}] -% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings. - -\lstset{ - basicstyle = \small\codefont, - % --- - tabsize = 4, - showstringspaces = false, - numbers = left, - numberstyle = \cascadia, - % --- - breaklines = true, - captionpos = t, - % --- - frame = l, - flexiblecolumns, - columns = fixed, -} - -\thecourseinstitute{清华大学电子工程系} -\thecoursename{\textbf{媒体与认知} \space 课堂2} -\theterm{2023-2024学年春季学期} -\hwname{作业} -\begin{document} -\courseheader -% 请在YOUR NAME处填写自己的姓名 -\name{高艺轩} -\vspace{3mm} -\centerline{\textbf{\Large{理论部分}}} - -\section{单选题(15分)} -% 请在?处填写答案 -\subsection{\underline{B}} - -\subsection{\underline{A}} - -\subsection{\underline{B}} - -\subsection{\underline{A}} - -\subsection{\underline{B}} - -\section{计算题(15 分)} -\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$,$\mathbf{z}\in R^{(n\times 1)}$,$\mathbf{W}\in R^{(m\times n)}$,$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下: -$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$ -tanh表示双曲正切函数。若训练过程中的目标函数为L,且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$和$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。 -} -\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。 -} - -\begin{proof}[解] - 首先,对$i \neq j$,$\dfrac{\partial y_i}{\partial z_j} = 0$。 - - 同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此 - \[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\] - 因此 - \[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\] -\end{proof} - -\subsubsection{请使用$\mathbf{y}$和$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$。 -} -提示:$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。 - -\begin{proof}[解] - 由链式法则 - \[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\] - - 对于$\dfrac{\partial L}{\partial W}$, - \[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix} - \boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x} - \end{bmatrix}_{m \times n}\] - - \begin{align*} - \frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\ - & = \begin{bmatrix} - \boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x} - \end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} - \end{align*} - - 对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则 - \[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\] -\end{proof} -\vspace{6mm} -\centerline{\textbf{\Large{编程部分}}} - - -\vspace{3mm} -% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成 -\section{编程作业报告} -% 请在此处完成编程作业报告 -完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。 -\begin{enumerate} - \item 使用默认配置进行训练和测试。 - \begin{enumerate} - \item 训练模型。 - - 输入: - \lstinputlisting{codes/1.1.in.txt} - - 输出: - \lstinputlisting{codes/1.1.out.txt} - \begin{figure}[H] - \centering - \includegraphics[width=0.9\linewidth]{img/1default_train.png} - \end{figure} - - \item 测试模型。 - - 输入: - \lstinputlisting{codes/1.2.in.txt} - - 输出: - \lstinputlisting{codes/1.2.out.txt} - \end{enumerate} - \item 调整参数、使用Adam优化器训练并测试。 - \begin{enumerate} - \item 训练模型。 - - 输入: - \lstinputlisting{codes/2.1.in.txt} - - 输出: - \lstinputlisting{codes/2.1.out.txt} - \begin{figure}[H] - \centering - \includegraphics[width=0.9\linewidth]{img/2adam_optim.png} - \end{figure} - \item 测试性能。 - - 输入: - \lstinputlisting{codes/2.2.in.txt} - - 输出: - \lstinputlisting{codes/2.2.out.txt} - \end{enumerate} - - \item 使用效果最佳的模型测试。 - 经过简单的尝试,发现使用 - \lstinputlisting{codes/self_train.in.txt} - 可以使测试集准确率达到88.8\%,有略微的提升。训练的loss曲线: - \begin{figure}[H] - \centering - \includegraphics[width=.9\linewidth]{img/3found_best.png} - \end{figure} - 使用它进行预测: - \begin{figure}[H] - \centering - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict01.png} - \subcaption{预测:A} - \end{subfigure} - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict02.png} - \subcaption{预测:B} - \end{subfigure} - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict03.png} - \subcaption{预测:M} - \end{subfigure} - - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict04.png} - \subcaption{预测:R} - \end{subfigure} - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict05.png} - \subcaption{预测:M} - \end{subfigure} - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict06.png} - \subcaption{预测:O} - \end{subfigure} - - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict07.png} - \subcaption{预测:B} - \end{subfigure} - \hfill - \begin{subfigure}[b]{.3\linewidth} - \includegraphics[width=\linewidth]{img/predict/predict08.png} - \subcaption{预测:W} - \end{subfigure} - \hfill - \end{figure} - \item 遇到的问题及解决方法 - \begin{enumerate} - \item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}的\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。 - \item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。 - \end{enumerate} - \item 建议:希望下次发布作业代码可以利用清华的git。 -\end{enumerate} - - - - -% \section{自选课题开题报告} -% 请在此处介绍自选课题 - -\end{document} - - - -%%% Local Variables: -%%% mode: late\rvx -%%% TeX-master: t -%%% End: +% Homework Template +\documentclass[a4paper]{article} +\usepackage{ctex} +\usepackage{amsmath, amssymb, amsthm} +\usepackage{moreenum} +\usepackage{mathtools} +\usepackage{url} +\usepackage{bm} +\usepackage{enumitem} +\usepackage{graphicx} +\usepackage{subcaption} +\usepackage{booktabs} % toprule +\usepackage[mathcal]{eucal} +\usepackage[thehwcnt = 1]{iidef} +\usepackage{listings} +\usepackage[x11names]{xcolor} +\usepackage{float} +\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref} + +\DeclareMathOperator{\arctanh}{arctanh} +% \DeclareMathOperator{\diag}{diag} + +\setenumerate[1]{label=(\arabic{*})} +\setenumerate[2]{label=\arabic{*})} + +\definecolor{codekeyword}{RGB}{171, 0, 216} +\definecolor{codetypename}{RGB}{29, 37, 251} +\definecolor{codevariable}{RGB}{10, 23, 126} +\definecolor{codestring}{RGB}{157, 0, 25} +\definecolor{codecomment}{RGB}{31, 129, 19} + +\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code} +% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code} +\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}] +% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings. + +\lstset{ + basicstyle = \small\codefont, + % --- + tabsize = 4, + showstringspaces = false, + numbers = left, + numberstyle = \cascadia, + % --- + breaklines = true, + captionpos = t, + % --- + frame = l, + flexiblecolumns, + columns = fixed, +} + +\thecourseinstitute{清华大学电子工程系} +\thecoursename{\textbf{媒体与认知} \space 课堂2} +\theterm{2023-2024学年春季学期} +\hwname{作业} +\begin{document} +\courseheader +% 请在YOUR NAME处填写自己的姓名 +\name{高艺轩} +\vspace{3mm} +\centerline{\textbf{\Large{理论部分}}} + +\section{单选题(15分)} +% 请在?处填写答案 +\subsection{\underline{B}} + +\subsection{\underline{A}} + +\subsection{\underline{B}} + +\subsection{\underline{A}} + +\subsection{\underline{B}} + +\section{计算题(15 分)} +\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$,$\mathbf{z}\in R^{(n\times 1)}$,$\mathbf{W}\in R^{(m\times n)}$,$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下: +$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$ +tanh表示双曲正切函数。若训练过程中的目标函数为L,且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$和$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。 +} +\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。 +} + +\begin{proof}[解] + 首先,对$i \neq j$,$\dfrac{\partial y_i}{\partial z_j} = 0$。 + + 同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此 + \[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\] + 因此 + \[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\] +\end{proof} + +\subsubsection{请使用$\mathbf{y}$和$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$。 +} +提示:$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。 + +\begin{proof}[解] + 由链式法则 + \[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\] + + 对于$\dfrac{\partial L}{\partial W}$, + \[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix} + \boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x} + \end{bmatrix}_{m \times n}\] + + \begin{align*} + \frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\ + & = \begin{bmatrix} + \boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x} + \end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} + \end{align*} + + 对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则 + \[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\] +\end{proof} +\vspace{6mm} +\centerline{\textbf{\Large{编程部分}}} + + +\vspace{3mm} +% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成 +\section{编程作业报告} +% 请在此处完成编程作业报告 +完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。 +\begin{enumerate} + \item 使用默认配置进行训练和测试。 + \begin{enumerate} + \item 训练模型。 + + 输入: + \lstinputlisting{codes/1.1.in.txt} + + 输出: + \lstinputlisting{codes/1.1.out.txt} + \begin{figure}[H] + \centering + \includegraphics[width=0.9\linewidth]{img/1default_train.png} + \end{figure} + + \item 测试模型。 + + 输入: + \lstinputlisting{codes/1.2.in.txt} + + 输出: + \lstinputlisting{codes/1.2.out.txt} + \end{enumerate} + \item 调整参数、使用Adam优化器训练并测试。 + \begin{enumerate} + \item 训练模型。 + + 输入: + \lstinputlisting{codes/2.1.in.txt} + + 输出: + \lstinputlisting{codes/2.1.out.txt} + \begin{figure}[H] + \centering + \includegraphics[width=0.9\linewidth]{img/2adam_optim.png} + \end{figure} + \item 测试性能。 + + 输入: + \lstinputlisting{codes/2.2.in.txt} + + 输出: + \lstinputlisting{codes/2.2.out.txt} + \end{enumerate} + + \item 使用效果最佳的模型测试。 + 经过简单的尝试,发现使用 + \lstinputlisting{codes/self_train.in.txt} + 可以使测试集准确率达到88.8\%,有略微的提升。训练的loss曲线: + \begin{figure}[H] + \centering + \includegraphics[width=.9\linewidth]{img/3found_best.png} + \end{figure} + 使用它进行预测: + \begin{figure}[H] + \centering + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict01.png} + \subcaption{预测:A} + \end{subfigure} + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict02.png} + \subcaption{预测:B} + \end{subfigure} + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict03.png} + \subcaption{预测:M} + \end{subfigure} + + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict04.png} + \subcaption{预测:R} + \end{subfigure} + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict05.png} + \subcaption{预测:M} + \end{subfigure} + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict06.png} + \subcaption{预测:O} + \end{subfigure} + + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict07.png} + \subcaption{预测:B} + \end{subfigure} + \hfill + \begin{subfigure}[b]{.3\linewidth} + \includegraphics[width=\linewidth]{img/predict/predict08.png} + \subcaption{预测:W} + \end{subfigure} + \hfill + \end{figure} + \item 遇到的问题及解决方法 + \begin{enumerate} + \item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}的\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。 + \item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。 + \end{enumerate} + \item 建议:希望下次发布作业代码可以利用清华的git。 +\end{enumerate} + + + + +% \section{自选课题开题报告} +% 请在此处介绍自选课题 + +\end{document} + + + +%%% Local Variables: +%%% mode: late\rvx +%%% TeX-master: t +%%% End: diff --git a/hw1/HW1-code/activations.py b/hw1/HW1-code/activations.py index a17144c..20637a8 100644 --- a/hw1/HW1-code/activations.py +++ b/hw1/HW1-code/activations.py @@ -1,164 +1,164 @@ -#======================================================== -# Media and Cognition -# Homework 1 Neural network basics -# activations.py - activation functions -# Student ID: 2022010639 -# Name: Gao Yixuan -# Tsinghua University -# (C) Copyright 2024 -#======================================================== -import torch -import torch.nn as nn - -''' -In this script we will implement three activation functions, including both forward and backward processes. -More details about customizing a backward process in PyTorch can be found in: -https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html -''' - -## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later. -class Tanh(torch.autograd.Function): - ''' - Tanh activation function - y = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) - ''' - # static method of a python class means that we can call the function without initializing an instance of the class - @staticmethod - def forward(ctx, x): - ''' - In the forward pass we receive a Tensor containing the input x and return - a Tensor containing the output. - - ctx: it is a context object that can be used to save information for backward computation. You can save - objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors - - x: input with arbitrary shape - ''' - # Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value - # y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x)) - - # here we directly use torch.tanh(x) to avoid the problem above - y = torch.tanh(x) - - # save an variable in ctx - ctx.save_for_backward(y) - - return y - - @staticmethod - def backward(ctx, grad_output): - """ - In the backward pass we receive a Tensor containing the gradient of the loss - with respect to the output, and we need to compute the gradient of the loss - with respect to the input. - - grad_output: dL/dy - grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x) - """ - # get an variable from ctx - y, = ctx.saved_tensors - - # chain rule: dL/dx = dL/dy * dy/dx - # where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)! - grad_input = grad_output * (1 - y ** 2) - - return grad_input - -#TODO 1: complete the forward and backward functions of the Sigmoid activation function. -#Note: You can refer to the activation function Tanh -class Sigmoid(torch.autograd.Function): - ''' - Sigmoid activation function - y = 1 / (1 + exp(-x)) - ''' - - @staticmethod - def forward(ctx, x): - - # hint: you can use torch.exp(x) to calculate exp(x) - y = 1 - (1 + torch.exp(-x)) - - # here we save y in ctx, in this way we can use y to calculate gradients in backward process - ctx.save_for_backward(y) - - return y - - @staticmethod - def backward(ctx, grad_output): - - # get y from ctx - y, = ctx.saved_tensors - - # implement gradient of x (grad_input), grad_input refers to dL/dx - # chain rule: dL/dx = dL/dy * dy/dx - # where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y) - grad_input = grad_output * y * (1 - y) - - return grad_input - -#TODO 2: complete the forward and backward functions of the ReLU activation function. -#Note: You can refer to the activation function Tanh -class ReLU(torch.autograd.Function): - ''' - ReLU activation function - y = max{x, 0} - ''' - - @staticmethod - def forward(ctx, x): - - # set elements less than 0 in x to 0 - # this operation is inplace - x = torch.max(x, torch.tensor([0.]).to(x.device)) - - # save x in ctx, in this way we can use x to calculate gradients in backward process - ctx.save_for_backward(x) - - # return the output - return x - - @staticmethod - def backward(ctx, grad_output): - """ - In the backward pass we receive a Tensor containing the gradient of the loss - with respect to the output, and we need to compute the gradient of the loss - with respect to the input. - """ - - # get x from ctx - x, = ctx.saved_tensors - # print("Before heaviside") - # print(x, x.size()) - x = torch.heaviside(x, torch.tensor([0.]).to(x.device)) - # print("After heaviside") - # print(x, x.size()) - # print(grad_output, grad_output.size()) - # print(grad_output * x) - - # chain rule: dL/dx = dL/dy * dy/dx - # where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0 - grad_input = grad_output * x - - return grad_input - - -# activate function class according to the type -class Activation(nn.Module): - def __init__(self, type): - ''' - :param type: 'sigmoid', 'tanh', or 'relu' - ''' - super().__init__() - - if type == 'sigmoid': - self.act = Sigmoid.apply - elif type == 'tanh': - self.act = Tanh.apply - elif type == 'relu': - self.act = ReLU.apply - else: - print('activation type should be one of [sigmoid, tanh, relu]') - raise NotImplementedError - - def forward(self, x): - return self.act(x) +#======================================================== +# Media and Cognition +# Homework 1 Neural network basics +# activations.py - activation functions +# Student ID: 2022010639 +# Name: Gao Yixuan +# Tsinghua University +# (C) Copyright 2024 +#======================================================== +import torch +import torch.nn as nn + +''' +In this script we will implement three activation functions, including both forward and backward processes. +More details about customizing a backward process in PyTorch can be found in: +https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html +''' + +## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later. +class Tanh(torch.autograd.Function): + ''' + Tanh activation function + y = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) + ''' + # static method of a python class means that we can call the function without initializing an instance of the class + @staticmethod + def forward(ctx, x): + ''' + In the forward pass we receive a Tensor containing the input x and return + a Tensor containing the output. + + ctx: it is a context object that can be used to save information for backward computation. You can save + objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors + + x: input with arbitrary shape + ''' + # Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value + # y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x)) + + # here we directly use torch.tanh(x) to avoid the problem above + y = torch.tanh(x) + + # save an variable in ctx + ctx.save_for_backward(y) + + return y + + @staticmethod + def backward(ctx, grad_output): + """ + In the backward pass we receive a Tensor containing the gradient of the loss + with respect to the output, and we need to compute the gradient of the loss + with respect to the input. + + grad_output: dL/dy + grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x) + """ + # get an variable from ctx + y, = ctx.saved_tensors + + # chain rule: dL/dx = dL/dy * dy/dx + # where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)! + grad_input = grad_output * (1 - y ** 2) + + return grad_input + +#TODO 1: complete the forward and backward functions of the Sigmoid activation function. +#Note: You can refer to the activation function Tanh +class Sigmoid(torch.autograd.Function): + ''' + Sigmoid activation function + y = 1 / (1 + exp(-x)) + ''' + + @staticmethod + def forward(ctx, x): + + # hint: you can use torch.exp(x) to calculate exp(x) + y = 1 - (1 + torch.exp(-x)) + + # here we save y in ctx, in this way we can use y to calculate gradients in backward process + ctx.save_for_backward(y) + + return y + + @staticmethod + def backward(ctx, grad_output): + + # get y from ctx + y, = ctx.saved_tensors + + # implement gradient of x (grad_input), grad_input refers to dL/dx + # chain rule: dL/dx = dL/dy * dy/dx + # where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y) + grad_input = grad_output * y * (1 - y) + + return grad_input + +#TODO 2: complete the forward and backward functions of the ReLU activation function. +#Note: You can refer to the activation function Tanh +class ReLU(torch.autograd.Function): + ''' + ReLU activation function + y = max{x, 0} + ''' + + @staticmethod + def forward(ctx, x): + + # set elements less than 0 in x to 0 + # this operation is inplace + x = torch.max(x, torch.tensor([0.]).to(x.device)) + + # save x in ctx, in this way we can use x to calculate gradients in backward process + ctx.save_for_backward(x) + + # return the output + return x + + @staticmethod + def backward(ctx, grad_output): + """ + In the backward pass we receive a Tensor containing the gradient of the loss + with respect to the output, and we need to compute the gradient of the loss + with respect to the input. + """ + + # get x from ctx + x, = ctx.saved_tensors + # print("Before heaviside") + # print(x, x.size()) + x = torch.heaviside(x, torch.tensor([0.]).to(x.device)) + # print("After heaviside") + # print(x, x.size()) + # print(grad_output, grad_output.size()) + # print(grad_output * x) + + # chain rule: dL/dx = dL/dy * dy/dx + # where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0 + grad_input = grad_output * x + + return grad_input + + +# activate function class according to the type +class Activation(nn.Module): + def __init__(self, type): + ''' + :param type: 'sigmoid', 'tanh', or 'relu' + ''' + super().__init__() + + if type == 'sigmoid': + self.act = Sigmoid.apply + elif type == 'tanh': + self.act = Tanh.apply + elif type == 'relu': + self.act = ReLU.apply + else: + print('activation type should be one of [sigmoid, tanh, relu]') + raise NotImplementedError + + def forward(self, x): + return self.act(x) diff --git a/hw1/HW1-code/losses.py b/hw1/HW1-code/losses.py index 522985d..b099bc6 100644 --- a/hw1/HW1-code/losses.py +++ b/hw1/HW1-code/losses.py @@ -1,118 +1,118 @@ -#======================================================== -# Media and Cognition -# Homework 1 Neural network basics -# losses.py - loss functions -# Student ID: 2022010639 -# Name: Gao Yixuan -# Tsinghua University -# (C) Copyright 2024 -#======================================================== - -import torch -import torch.nn.functional as F - -''' -In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes. -More details about customizing a backward process can be found in: -https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html -''' - -# here is the sample code of MSELoss -# you can use this as reference to implement the CrossEntropyLoss -class MSELoss(torch.autograd.Function): - ''' - MSE loss function - loss = (label - pred) ** 2 - ''' - - @staticmethod - def forward(ctx, pred, label): - """ - :param pred: prediction with shape [batch_size, *], where ∗ means additional dimensions - :param label: groundtruth, same shape as the predition - :return: MSE loss, averaged by batch_size - """ - - # step 1: here we compute the summation of loss for each element and save both pred and label in ctx - loss = torch.sum((pred - label) ** 2) - ctx.save_for_backward(pred, label) - - return loss - - @staticmethod - def backward(ctx, grad_output): - """ - :param grad_output: for loss function, grad_output will be 1 - """ - - # step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred) - pred, label = ctx.saved_tensors - grad_input = grad_output * 2 * (pred - label) - - # return None for gradient of label since we do not need to compute dL/dlabel - return grad_input, None - -#TODO 1: Complete the CrossEntropyLoss loss function -class CrossEntropyLoss(torch.autograd.Function): - ''' - Cross entropy loss function: - loss = - log q_i - where - q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...) - - However, when z_i has a lager value, exp(z_i) might become infinity. - So we use stable softmax: - softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...) - where - A = exp(-z_max) = exp(-max{z_0, z_1, ...}) - therefore we have - softmax(z_i) = softmax(z_i - z_max) - ''' - - @staticmethod - def forward(ctx, logits, label): - """ - :param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula - :param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1 - :return: cross entropy loss, averaged by batch_size - """ - - # step 1: calculate softmax(z) using stable softmax method - # hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version - #e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]] - - # calculate z_max - z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size] - - # calculate exps = exp(z - z_max) - exps = torch.exp(logits - z_max) # of size [batch_size, n_classes] - - # calculate q = softmax(y - y_max) - sums = torch.sum(exps, 1) # of size [batch_size] - # print(exps.size(), sums.size()) - # print(sums.reshape(-1, 1)) - q = exps / sums.reshape(-1, 1) - - # step 2: convert label into one-hot version - # e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]] - # the converted label has shape [batch_size, n_classes] - # tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes - one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1]) - - # step 3: calculate cross entropy loss = - log q_i, and averaged by batch - # save result of softmax and one-hot label in ctx for gradient computation - cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0] - - ctx.save_for_backward(q, one_hot_label) - - return cross_entropy - - @staticmethod - def backward(ctx, grad_output): - - # step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz) - q, label = ctx.saved_tensors - grad_input = grad_output * (q - label) - - # return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel +#======================================================== +# Media and Cognition +# Homework 1 Neural network basics +# losses.py - loss functions +# Student ID: 2022010639 +# Name: Gao Yixuan +# Tsinghua University +# (C) Copyright 2024 +#======================================================== + +import torch +import torch.nn.functional as F + +''' +In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes. +More details about customizing a backward process can be found in: +https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html +''' + +# here is the sample code of MSELoss +# you can use this as reference to implement the CrossEntropyLoss +class MSELoss(torch.autograd.Function): + ''' + MSE loss function + loss = (label - pred) ** 2 + ''' + + @staticmethod + def forward(ctx, pred, label): + """ + :param pred: prediction with shape [batch_size, *], where ∗ means additional dimensions + :param label: groundtruth, same shape as the predition + :return: MSE loss, averaged by batch_size + """ + + # step 1: here we compute the summation of loss for each element and save both pred and label in ctx + loss = torch.sum((pred - label) ** 2) + ctx.save_for_backward(pred, label) + + return loss + + @staticmethod + def backward(ctx, grad_output): + """ + :param grad_output: for loss function, grad_output will be 1 + """ + + # step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred) + pred, label = ctx.saved_tensors + grad_input = grad_output * 2 * (pred - label) + + # return None for gradient of label since we do not need to compute dL/dlabel + return grad_input, None + +#TODO 1: Complete the CrossEntropyLoss loss function +class CrossEntropyLoss(torch.autograd.Function): + ''' + Cross entropy loss function: + loss = - log q_i + where + q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...) + + However, when z_i has a lager value, exp(z_i) might become infinity. + So we use stable softmax: + softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...) + where + A = exp(-z_max) = exp(-max{z_0, z_1, ...}) + therefore we have + softmax(z_i) = softmax(z_i - z_max) + ''' + + @staticmethod + def forward(ctx, logits, label): + """ + :param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula + :param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1 + :return: cross entropy loss, averaged by batch_size + """ + + # step 1: calculate softmax(z) using stable softmax method + # hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version + #e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]] + + # calculate z_max + z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size] + + # calculate exps = exp(z - z_max) + exps = torch.exp(logits - z_max) # of size [batch_size, n_classes] + + # calculate q = softmax(y - y_max) + sums = torch.sum(exps, 1) # of size [batch_size] + # print(exps.size(), sums.size()) + # print(sums.reshape(-1, 1)) + q = exps / sums.reshape(-1, 1) + + # step 2: convert label into one-hot version + # e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]] + # the converted label has shape [batch_size, n_classes] + # tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes + one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1]) + + # step 3: calculate cross entropy loss = - log q_i, and averaged by batch + # save result of softmax and one-hot label in ctx for gradient computation + cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0] + + ctx.save_for_backward(q, one_hot_label) + + return cross_entropy + + @staticmethod + def backward(ctx, grad_output): + + # step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz) + q, label = ctx.saved_tensors + grad_input = grad_output * (q - label) + + # return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel return grad_input, None \ No newline at end of file diff --git a/hw1/HW1-code/network.py b/hw1/HW1-code/network.py index 178c94b..7a1d163 100644 --- a/hw1/HW1-code/network.py +++ b/hw1/HW1-code/network.py @@ -1,156 +1,156 @@ -#======================================================== -# Media and Cognition -# Homework 1 Neural network basics -# network.py - linear layer and MLP network -# Student ID: 2022010639 -# Name: Gao Yixuan -# Tsinghua University -# (C) Copyright 2024 -#======================================================== -import torch -import torch.nn as nn -from activations import Activation - -''' -In this script we will implement our Linear layer and MLP network. -For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own. -More details about customizing a backward process can be found in: -https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html -For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function. -''' - - -class LinearFunction(torch.autograd.Function): - ''' - we will implement the linear function: - y = xW^T + b - as well as its gradient computation process - ''' - - @staticmethod - def forward(ctx, x, W, b): - ''' - Input: - :param ctx: a context object that can be used to stash information for backward computation - :param x: input features with size [batch_size, input_size] - :param W: weight matrix with size [output_size, input_size] - :param b: bias with size [output_size] - Return: - y :output features with size [batch_size, output_size] - ''' - - # print(x, x.size(), x.dtype) - # print(W.T, W.T.size(), W.T.dtype) - # print(x.device, W.T.device) - y = torch.matmul(x, W.T) + b - ctx.save_for_backward(x, W) - - return y - - @staticmethod - def backward(ctx, grad_output): - ''' - Input: - :param ctx: a context object with saved variables - :param grad_output: dL/dy, with size [batch_size, output_size] - Return: - grad_input: dL/dx, with size [batch_size, input_size] - grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch - grad_b: dL/db, with size [output_size], summed for data in the batch - ''' - - x, W = ctx.saved_variables - - # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W - # calculate dL/dW by using dL/dy (grad_output) and x - # calculate dL/db using dL/dy (grad_output) - # you can use torch.matmul(A, B) to compute matrix product of A and B - - grad_input = torch.matmul(grad_output, W) - grad_W = torch.matmul(grad_output.T, x) - grad_b = grad_output.sum(0) - - return grad_input, grad_W, grad_b - - -class Linear(nn.Module): - def __init__(self, input_size, output_size): - ''' - A linear layer which uses our own LinearFunction implemented above. - ----------------------------------------------- - :param input_size: dimension of input features - :param output_size: dimension of output features - ''' - super(Linear, self).__init__() - - - W = torch.randn(output_size, input_size).float() - b = torch.zeros(output_size).float() - self.W = nn.Parameter(W, requires_grad=True) - self.b = nn.Parameter(b, requires_grad=True) - - def forward(self, x): - # here we call the LinearFunction we implement above - return LinearFunction.apply(x, self.W, self.b) - -class MLP(nn.Module): - def __init__(self, input_size, output_size, hidden_size, n_layers, act_type): - ''' - Multilayer Perceptron - ---------------------- - :param input_size: dimension of input features - :param output_size: dimension of output features - :param hidden_size: a list containing hidden size for each hidden layer - :param n_layers: number of layers - :param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu - ''' - # TODO 1: initialize the parent class nn.Module - super(MLP, self).__init__() - - # total layer number should be hidden layer number + 1 (output layer) - # print(hidden_size, n_layers) - assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1' - - # TODO 2;complete the network structures - # instantiate the activation function by using the defined classes in activations.py - self.act = Activation(act_type) - - # initialize a list to save layers - layers = nn.ModuleList() - - if n_layers == 1: - # append a linear layer into the module list - # if n_layers == 1, MLP degenerates to a single linear layer - layers.append(Linear(input_size, output_size)) - - # MLP with at least 2 layers - else: - # construct the hidden layers and add them to the module list - # a hidden layer of MLP consists of a linear layer and an activation function - in_size = input_size - for i in range(n_layers - 1): - layer = Linear(in_size, hidden_size[i]) - layers.append(layer) # append the linear layer into the module list - layers.append(self.act) - in_size = hidden_size[i] # update in_size for the next layer - - # initialize the output layer and append the layer into the module list - # hint: what is the output size of the output layer? - layers.append(Linear(hidden_size[-1], output_size)) - - # Use nn.Sequential to get the neural network - self.network = torch.nn.Sequential() - for layer in layers: - self.network.append(layer) - - - def forward(self, x): - ''' - Define the forward function - :param x: input features with size [batch_size, input_size] - :return: output features with size [batch_size, output_size] - ''' - # TODO 3: implement the forward propagation of the MLP - out = self.network(x) - - return out +#======================================================== +# Media and Cognition +# Homework 1 Neural network basics +# network.py - linear layer and MLP network +# Student ID: 2022010639 +# Name: Gao Yixuan +# Tsinghua University +# (C) Copyright 2024 +#======================================================== +import torch +import torch.nn as nn +from activations import Activation + +''' +In this script we will implement our Linear layer and MLP network. +For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own. +More details about customizing a backward process can be found in: +https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html +For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function. +''' + + +class LinearFunction(torch.autograd.Function): + ''' + we will implement the linear function: + y = xW^T + b + as well as its gradient computation process + ''' + + @staticmethod + def forward(ctx, x, W, b): + ''' + Input: + :param ctx: a context object that can be used to stash information for backward computation + :param x: input features with size [batch_size, input_size] + :param W: weight matrix with size [output_size, input_size] + :param b: bias with size [output_size] + Return: + y :output features with size [batch_size, output_size] + ''' + + # print(x, x.size(), x.dtype) + # print(W.T, W.T.size(), W.T.dtype) + # print(x.device, W.T.device) + y = torch.matmul(x, W.T) + b + ctx.save_for_backward(x, W) + + return y + + @staticmethod + def backward(ctx, grad_output): + ''' + Input: + :param ctx: a context object with saved variables + :param grad_output: dL/dy, with size [batch_size, output_size] + Return: + grad_input: dL/dx, with size [batch_size, input_size] + grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch + grad_b: dL/db, with size [output_size], summed for data in the batch + ''' + + x, W = ctx.saved_variables + + # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W + # calculate dL/dW by using dL/dy (grad_output) and x + # calculate dL/db using dL/dy (grad_output) + # you can use torch.matmul(A, B) to compute matrix product of A and B + + grad_input = torch.matmul(grad_output, W) + grad_W = torch.matmul(grad_output.T, x) + grad_b = grad_output.sum(0) + + return grad_input, grad_W, grad_b + + +class Linear(nn.Module): + def __init__(self, input_size, output_size): + ''' + A linear layer which uses our own LinearFunction implemented above. + ----------------------------------------------- + :param input_size: dimension of input features + :param output_size: dimension of output features + ''' + super(Linear, self).__init__() + + + W = torch.randn(output_size, input_size).float() + b = torch.zeros(output_size).float() + self.W = nn.Parameter(W, requires_grad=True) + self.b = nn.Parameter(b, requires_grad=True) + + def forward(self, x): + # here we call the LinearFunction we implement above + return LinearFunction.apply(x, self.W, self.b) + +class MLP(nn.Module): + def __init__(self, input_size, output_size, hidden_size, n_layers, act_type): + ''' + Multilayer Perceptron + ---------------------- + :param input_size: dimension of input features + :param output_size: dimension of output features + :param hidden_size: a list containing hidden size for each hidden layer + :param n_layers: number of layers + :param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu + ''' + # TODO 1: initialize the parent class nn.Module + super(MLP, self).__init__() + + # total layer number should be hidden layer number + 1 (output layer) + # print(hidden_size, n_layers) + assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1' + + # TODO 2;complete the network structures + # instantiate the activation function by using the defined classes in activations.py + self.act = Activation(act_type) + + # initialize a list to save layers + layers = nn.ModuleList() + + if n_layers == 1: + # append a linear layer into the module list + # if n_layers == 1, MLP degenerates to a single linear layer + layers.append(Linear(input_size, output_size)) + + # MLP with at least 2 layers + else: + # construct the hidden layers and add them to the module list + # a hidden layer of MLP consists of a linear layer and an activation function + in_size = input_size + for i in range(n_layers - 1): + layer = Linear(in_size, hidden_size[i]) + layers.append(layer) # append the linear layer into the module list + layers.append(self.act) + in_size = hidden_size[i] # update in_size for the next layer + + # initialize the output layer and append the layer into the module list + # hint: what is the output size of the output layer? + layers.append(Linear(hidden_size[-1], output_size)) + + # Use nn.Sequential to get the neural network + self.network = torch.nn.Sequential() + for layer in layers: + self.network.append(layer) + + + def forward(self, x): + ''' + Define the forward function + :param x: input features with size [batch_size, input_size] + :return: output features with size [batch_size, output_size] + ''' + # TODO 3: implement the forward propagation of the MLP + out = self.network(x) + + return out diff --git a/hw1/HW1-code/recognition.py b/hw1/HW1-code/recognition.py index 985356f..f2bf7f4 100644 --- a/hw1/HW1-code/recognition.py +++ b/hw1/HW1-code/recognition.py @@ -1,397 +1,397 @@ -#======================================================== -# Media and Cognition -# Homework 1 Neural network basics -# recognition.py - character classification -# Student ID: 2022010639 -# Name: Gao Yixuan -# Tsinghua University -# (C) Copyright 2024 -#======================================================== - -# ==== Part 0: import libs -import torch -import torch.optim as optim -from torch.utils.data import Dataset, DataLoader - -import json, cv2, os, string -import matplotlib.pyplot as plt - -import numpy as np - -# this time we implement our networks and loss functions in other python script, and import them here -from network import MLP -from losses import CrossEntropyLoss - -# argparse is used to conveniently set our configurations -import argparse - -# ==== Part 1: data loader - -# construct a dataset and a data loader, more details can be found in -# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader - -class ListDataset(Dataset): - def __init__(self, im_dir, file_path, norm_size=(32, 32)): - ''' - :param im_dir: path to directory with images - :param file_path: json file containing image names and labels - :param norm_size: image normalization size, (height, width) - ''' - - # this time we will try to recognize 26 English letters (case-insensitive) - letters = string.ascii_letters[-26:] # ABCD...XYZ - self.alphabet = {letters[i]:i for i in range(len(letters))} - self.norm_size = norm_size - - with open(file_path, 'r') as f: - imgs = json.load(f) - im_names = list(imgs.keys()) - - self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names] - self.labels = list(imgs.values()) - - def __len__(self): - # the __len__() function should return the total number of samples in the dataset - return len(self.im_paths) - - def __getitem__(self, index): - assert index <= len(self), 'index range error' - - # read an image and convert it to grey scale - im_path = self.im_paths[index] - im = cv2.imread(im_path) - im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) - - # image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1] - im = cv2.resize(im, self.norm_size) - # im = im / 255. - """ The above command does not seems to be valid in my environment """ - im = np.divide(im, 255.) - im = (im - 0.5) * 2.0 - - # get the label of the current image - # upper() is used to convert a letter into uppercase - label = self.labels[index].upper() - - # convert an English letter into a number index - label = self.alphabet[label] - - # TODO 1: return the image and its label - return im, label - - - -def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0): - ''' - :param im_dir: path to directory with images - :param file_path: file with image paths and labels - :param norm_size: image normalization size, (height, width) - :param batch_size: batch size - :param workers: number of workers for loading data in multiple threads - :return: a data loader - ''' - - dataset = ListDataset(im_dir, file_path, norm_size) - return DataLoader(dataset, - batch_size=batch_size, - shuffle=True if 'train' in file_path else False, # shuffle images only when training - num_workers=workers) - - -# ==== Part 2: training, validation and testing - -def train_val(model, trainloader, valloader, n_epochs, - lr, optim_type, momentum, weight_decay, - valInterval, device='cpu'): - ''' - The main training procedure - ---------------------------- - :param model: the MLP model - :param trainloader: the dataloader of the train set - :param valloader: the dataloader of the validation set - :param n_epochs: number of training epochs - :param lr: learning rate - :param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta' - :param momentum: only used if optim_type == 'sgd' - :param weight_decay: the factor of L2 penalty on network weights - :param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs - :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available - ''' - - # define the cross entropy loss function. - ce_loss = CrossEntropyLoss.apply - - # optimizer - if optim_type == 'sgd': - optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) - elif optim_type == 'adagrad': - optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay) - elif optim_type == 'rmsprop': - optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay) - elif optim_type == 'adam': - optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay) - elif optim_type == 'adadelta': - optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay) - else: - print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta') - raise NotImplementedError - - # training - - # to save loss of each training epoch in a python "list" data structure - losses = [] - - for epoch in range(n_epochs): - # set the model in training mode - model.train() - - # to save total loss in one epoch - total_loss = 0. - - #TODO 2: Calculate losses and train the network using the optimizer - for data, labels in trainloader: # get a batch of data - - # step 1: set data type and device - # data = torch.from_numpy(data) - data = data.type(torch.float32) - data = data.to(device) - labels = labels.to(device) - - # print(data.device) - - # step 2: convert an image to a vector as the input of the MLP - data = torch.flatten(data, start_dim=1) - # print(data.size()) - - # hit: clear gradients in the optimizer - optimizer.zero_grad() - - # step 3: run the model which is the forward process - output = model(data) - - # step 4: compute the loss, and call backward propagation function - loss = ce_loss(output, labels) - loss.backward() - # I have no idea why pylance can't get the data type of what ce_loss returns - - # step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number - # this operation is not differentiable - total_loss += loss.item() - - # step 6: call a function, optimizer.step(), to update the parameters of the models - optimizer.step() - - - # average of the total loss for iterations - avg_loss = total_loss / len(trainloader) - losses.append(avg_loss) - print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss)) - - # validation - if (epoch + 1) % valInterval == 0: - val_acc = test(model, valloader, device) - # show prediction accuracy - print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc)) - - - # save model parameters in a file - # model_save_path = 'saved_models/recognition.pth'.format(epoch + 1) - model_save_path = opt.model_path - - torch.save({'state_dict': model.state_dict(), - }, model_save_path) - print('Model saved in {}\n'.format(model_save_path)) - - # draw the loss curve - plot_loss(losses) - - -def test(model, testloader, device): - ''' - The testing procedure - ---------------------------- - :param model: the MLP model - :param testloader: the dataloader to be tested/validated - :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available - ''' - # set the model in evaluation mode - model.eval() - - n_correct = 0. # number of images that are correctly classified - n_imgs = 0. # number of total images - - with torch.no_grad(): # we do not need to compute gradients during validation - - #TODO 3: get the prediction of the data and calculate the accuracy - for imgs, labels in testloader: - # step 1: set data type and device - # imgs = torch.from_numpy(imgs) - imgs = imgs.type(torch.float32) - imgs = imgs.to(device) - labels = labels.to(device) - - # step 2: convert an image to a vector as the input of the MLP - imgs = torch.flatten(imgs, start_dim=1) - - # step 3: run the model which is the forward process - output = model(imgs) - - # step 4: get the predicted value by the output using out.argmax(1) - pred = output.argmax(1) - - # step 5: sum up the number of images correctly recognized and the total image number - for predict, label in zip(pred, labels): - if predict == label: - n_correct += 1 - n_imgs += 1 - - accuracy = n_correct / n_imgs - return accuracy - - -# ==== Part 3: predict new images -def predict(model, im_path, norm_size, device): - ''' - The predicting procedure - --------------- - :param model: the MLP model - :param im_path: path of an image - :param norm_size: image normalization size, (height, width) - :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available - ''' - - # TODO 4: enter the evaluation mode - model.eval() - - # TODO 4: image pre-processing, similar to what we do in ListDataset() - im = cv2.imread(im_path) - im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) - - im = cv2.resize(im, norm_size) - im = np.divide(im, 255.) - im = (im - 0.5) * 2.0 - - # convert im from numpy.ndarray to torch.tensor - im = torch.from_numpy(im) - - # input im into the model - with torch.no_grad(): - input = im.view(1, -1).type(torch.float32).to(device) - out = model(input) - prediction = out.argmax(1)[0].item() - - # convert index of prediction to the corresponding character - letters = string.ascii_letters[-26:] # ABCD...XYZ - prediction = letters[prediction] - - print('Prediction: {}'.format(prediction)) - - -# ==== Part 4: draw the loss curve -def plot_loss(losses): - ''' - :param losses: list of losses for each epoch - :return: - ''' - - f, ax = plt.subplots() - - # draw loss - ax.plot(losses) - - # set labels - ax.set_xlabel('training epoch') - ax.set_ylabel('loss') - - # show the plots - plt.show() - - -if __name__ == '__main__': - # set random seed for reproducibility - seed = 2023 - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - - # set configurations - parser = argparse.ArgumentParser() - parser.add_argument('--mode', type=str, default='train', help='train, test or predict') - parser.add_argument('--im_dir', type=str, default='data/character_classification/images', - help='path to directory with images') - parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json', - help='file list of training image paths and labels') - parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json', - help='file list of validation image paths and labels') - parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json', - help='file list of test image paths and labels') - parser.add_argument('--batchsize', type=int, default=8, help='batch size') - parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda') - - # configurations for training - parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma') - parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP') - parser.add_argument('--act', type=str, default='relu', - help='type of activation function, can be sigmoid, tanh, or relu') - parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)') - parser.add_argument('--epoch', type=int, default=50, help='number of training epochs') - parser.add_argument('--n_classes', type=int, default=26, help='number of classes') - parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation') - parser.add_argument('--lr', type=float, default=5e-4, help='learning rate') - parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta') - parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd') - parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights') - - # configurations for test and prediction - parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model') - parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png', - help='path of an image to be recognized') - - opt = parser.parse_args() - - # TODO 5: initialize the MLP model - # what is the input size of the MLP? - # hint 1: we convert an image to a vector as the input of the MLP - # hint 2: each image has shape [norm_size[0], norm_size[1]] - model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act) - - # for the 'test' and 'predict' mode, we should load the saved checkpoint into the model - if opt.mode == 'test' or opt.mode == 'predict': - checkpoint = torch.load(opt.model_path, map_location='cpu') - # """The above code did not consider device problem""" - # checkpoint = torch.load(opt.model_path, map_location=opt.device) - # load model parameters we saved in model_path - model.load_state_dict(checkpoint['state_dict']) - print('[Info] Load model from {}'.format(opt.model_path)) - - # put the model on CPU or GPU according to the device in args - model = model.to(opt.device) - - # -- run the code for training and validation - if opt.mode == 'train': - # training and validation data loader - trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize) - valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize) - train_val(model, trainloader, valloader, - n_epochs=opt.epoch, - lr=opt.lr, - optim_type=opt.optim_type, - momentum=opt.momentum, - weight_decay=opt.weight_decay, - valInterval=opt.valInterval, - device=opt.device) - - # -- test the saved model - elif opt.mode == 'test': - testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize) - acc = test(model, testloader, opt.device) - print('[Info] Test accuracy = {:.1f}%'.format(100 * acc)) - - # -- predict a new image - elif opt.mode == 'predict': - predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device) - - else: - print('mode should be train, test, or predict') - raise NotImplementedError +#======================================================== +# Media and Cognition +# Homework 1 Neural network basics +# recognition.py - character classification +# Student ID: 2022010639 +# Name: Gao Yixuan +# Tsinghua University +# (C) Copyright 2024 +#======================================================== + +# ==== Part 0: import libs +import torch +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +import json, cv2, os, string +import matplotlib.pyplot as plt + +import numpy as np + +# this time we implement our networks and loss functions in other python script, and import them here +from network import MLP +from losses import CrossEntropyLoss + +# argparse is used to conveniently set our configurations +import argparse + +# ==== Part 1: data loader + +# construct a dataset and a data loader, more details can be found in +# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader + +class ListDataset(Dataset): + def __init__(self, im_dir, file_path, norm_size=(32, 32)): + ''' + :param im_dir: path to directory with images + :param file_path: json file containing image names and labels + :param norm_size: image normalization size, (height, width) + ''' + + # this time we will try to recognize 26 English letters (case-insensitive) + letters = string.ascii_letters[-26:] # ABCD...XYZ + self.alphabet = {letters[i]:i for i in range(len(letters))} + self.norm_size = norm_size + + with open(file_path, 'r') as f: + imgs = json.load(f) + im_names = list(imgs.keys()) + + self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names] + self.labels = list(imgs.values()) + + def __len__(self): + # the __len__() function should return the total number of samples in the dataset + return len(self.im_paths) + + def __getitem__(self, index): + assert index <= len(self), 'index range error' + + # read an image and convert it to grey scale + im_path = self.im_paths[index] + im = cv2.imread(im_path) + im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + + # image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1] + im = cv2.resize(im, self.norm_size) + # im = im / 255. + """ The above command does not seems to be valid in my environment """ + im = np.divide(im, 255.) + im = (im - 0.5) * 2.0 + + # get the label of the current image + # upper() is used to convert a letter into uppercase + label = self.labels[index].upper() + + # convert an English letter into a number index + label = self.alphabet[label] + + # TODO 1: return the image and its label + return im, label + + + +def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0): + ''' + :param im_dir: path to directory with images + :param file_path: file with image paths and labels + :param norm_size: image normalization size, (height, width) + :param batch_size: batch size + :param workers: number of workers for loading data in multiple threads + :return: a data loader + ''' + + dataset = ListDataset(im_dir, file_path, norm_size) + return DataLoader(dataset, + batch_size=batch_size, + shuffle=True if 'train' in file_path else False, # shuffle images only when training + num_workers=workers) + + +# ==== Part 2: training, validation and testing + +def train_val(model, trainloader, valloader, n_epochs, + lr, optim_type, momentum, weight_decay, + valInterval, device='cpu'): + ''' + The main training procedure + ---------------------------- + :param model: the MLP model + :param trainloader: the dataloader of the train set + :param valloader: the dataloader of the validation set + :param n_epochs: number of training epochs + :param lr: learning rate + :param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta' + :param momentum: only used if optim_type == 'sgd' + :param weight_decay: the factor of L2 penalty on network weights + :param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs + :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available + ''' + + # define the cross entropy loss function. + ce_loss = CrossEntropyLoss.apply + + # optimizer + if optim_type == 'sgd': + optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) + elif optim_type == 'adagrad': + optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay) + elif optim_type == 'rmsprop': + optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay) + elif optim_type == 'adam': + optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay) + elif optim_type == 'adadelta': + optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay) + else: + print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta') + raise NotImplementedError + + # training + + # to save loss of each training epoch in a python "list" data structure + losses = [] + + for epoch in range(n_epochs): + # set the model in training mode + model.train() + + # to save total loss in one epoch + total_loss = 0. + + #TODO 2: Calculate losses and train the network using the optimizer + for data, labels in trainloader: # get a batch of data + + # step 1: set data type and device + # data = torch.from_numpy(data) + data = data.type(torch.float32) + data = data.to(device) + labels = labels.to(device) + + # print(data.device) + + # step 2: convert an image to a vector as the input of the MLP + data = torch.flatten(data, start_dim=1) + # print(data.size()) + + # hit: clear gradients in the optimizer + optimizer.zero_grad() + + # step 3: run the model which is the forward process + output = model(data) + + # step 4: compute the loss, and call backward propagation function + loss = ce_loss(output, labels) + loss.backward() + # I have no idea why pylance can't get the data type of what ce_loss returns + + # step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number + # this operation is not differentiable + total_loss += loss.item() + + # step 6: call a function, optimizer.step(), to update the parameters of the models + optimizer.step() + + + # average of the total loss for iterations + avg_loss = total_loss / len(trainloader) + losses.append(avg_loss) + print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss)) + + # validation + if (epoch + 1) % valInterval == 0: + val_acc = test(model, valloader, device) + # show prediction accuracy + print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc)) + + + # save model parameters in a file + # model_save_path = 'saved_models/recognition.pth'.format(epoch + 1) + model_save_path = opt.model_path + + torch.save({'state_dict': model.state_dict(), + }, model_save_path) + print('Model saved in {}\n'.format(model_save_path)) + + # draw the loss curve + plot_loss(losses) + + +def test(model, testloader, device): + ''' + The testing procedure + ---------------------------- + :param model: the MLP model + :param testloader: the dataloader to be tested/validated + :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available + ''' + # set the model in evaluation mode + model.eval() + + n_correct = 0. # number of images that are correctly classified + n_imgs = 0. # number of total images + + with torch.no_grad(): # we do not need to compute gradients during validation + + #TODO 3: get the prediction of the data and calculate the accuracy + for imgs, labels in testloader: + # step 1: set data type and device + # imgs = torch.from_numpy(imgs) + imgs = imgs.type(torch.float32) + imgs = imgs.to(device) + labels = labels.to(device) + + # step 2: convert an image to a vector as the input of the MLP + imgs = torch.flatten(imgs, start_dim=1) + + # step 3: run the model which is the forward process + output = model(imgs) + + # step 4: get the predicted value by the output using out.argmax(1) + pred = output.argmax(1) + + # step 5: sum up the number of images correctly recognized and the total image number + for predict, label in zip(pred, labels): + if predict == label: + n_correct += 1 + n_imgs += 1 + + accuracy = n_correct / n_imgs + return accuracy + + +# ==== Part 3: predict new images +def predict(model, im_path, norm_size, device): + ''' + The predicting procedure + --------------- + :param model: the MLP model + :param im_path: path of an image + :param norm_size: image normalization size, (height, width) + :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available + ''' + + # TODO 4: enter the evaluation mode + model.eval() + + # TODO 4: image pre-processing, similar to what we do in ListDataset() + im = cv2.imread(im_path) + im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + + im = cv2.resize(im, norm_size) + im = np.divide(im, 255.) + im = (im - 0.5) * 2.0 + + # convert im from numpy.ndarray to torch.tensor + im = torch.from_numpy(im) + + # input im into the model + with torch.no_grad(): + input = im.view(1, -1).type(torch.float32).to(device) + out = model(input) + prediction = out.argmax(1)[0].item() + + # convert index of prediction to the corresponding character + letters = string.ascii_letters[-26:] # ABCD...XYZ + prediction = letters[prediction] + + print('Prediction: {}'.format(prediction)) + + +# ==== Part 4: draw the loss curve +def plot_loss(losses): + ''' + :param losses: list of losses for each epoch + :return: + ''' + + f, ax = plt.subplots() + + # draw loss + ax.plot(losses) + + # set labels + ax.set_xlabel('training epoch') + ax.set_ylabel('loss') + + # show the plots + plt.show() + + +if __name__ == '__main__': + # set random seed for reproducibility + seed = 2023 + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + + # set configurations + parser = argparse.ArgumentParser() + parser.add_argument('--mode', type=str, default='train', help='train, test or predict') + parser.add_argument('--im_dir', type=str, default='data/character_classification/images', + help='path to directory with images') + parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json', + help='file list of training image paths and labels') + parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json', + help='file list of validation image paths and labels') + parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json', + help='file list of test image paths and labels') + parser.add_argument('--batchsize', type=int, default=8, help='batch size') + parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda') + + # configurations for training + parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma') + parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP') + parser.add_argument('--act', type=str, default='relu', + help='type of activation function, can be sigmoid, tanh, or relu') + parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)') + parser.add_argument('--epoch', type=int, default=50, help='number of training epochs') + parser.add_argument('--n_classes', type=int, default=26, help='number of classes') + parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation') + parser.add_argument('--lr', type=float, default=5e-4, help='learning rate') + parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta') + parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd') + parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights') + + # configurations for test and prediction + parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model') + parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png', + help='path of an image to be recognized') + + opt = parser.parse_args() + + # TODO 5: initialize the MLP model + # what is the input size of the MLP? + # hint 1: we convert an image to a vector as the input of the MLP + # hint 2: each image has shape [norm_size[0], norm_size[1]] + model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act) + + # for the 'test' and 'predict' mode, we should load the saved checkpoint into the model + if opt.mode == 'test' or opt.mode == 'predict': + checkpoint = torch.load(opt.model_path, map_location='cpu') + # """The above code did not consider device problem""" + # checkpoint = torch.load(opt.model_path, map_location=opt.device) + # load model parameters we saved in model_path + model.load_state_dict(checkpoint['state_dict']) + print('[Info] Load model from {}'.format(opt.model_path)) + + # put the model on CPU or GPU according to the device in args + model = model.to(opt.device) + + # -- run the code for training and validation + if opt.mode == 'train': + # training and validation data loader + trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize) + valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize) + train_val(model, trainloader, valloader, + n_epochs=opt.epoch, + lr=opt.lr, + optim_type=opt.optim_type, + momentum=opt.momentum, + weight_decay=opt.weight_decay, + valInterval=opt.valInterval, + device=opt.device) + + # -- test the saved model + elif opt.mode == 'test': + testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize) + acc = test(model, testloader, opt.device) + print('[Info] Test accuracy = {:.1f}%'.format(100 * acc)) + + # -- predict a new image + elif opt.mode == 'predict': + predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device) + + else: + print('mode should be train, test, or predict') + raise NotImplementedError diff --git a/hw3/code/check.py b/hw3/code/check.py index 72de24e..63e575d 100644 --- a/hw3/code/check.py +++ b/hw3/code/check.py @@ -1,41 +1,41 @@ -# ======================================================== -# Media and Cognition -# Homework 3 Support Vector Machine -# check.py - Check your implementation of several modules -# Tsinghua University -# (C) Copyright 2024 -# ======================================================== - -from svm_hw import SVM_HINGE, LinearFunction, Hinge -import torch -from torch.autograd import gradcheck - - -def run(): - model = SVM_HINGE(2, C=1.0).double() - x = torch.randn(50, 2, requires_grad=False).double() - W = torch.randn(1, 2, requires_grad=True).double() - b = torch.zeros(1, requires_grad=True).double() - test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4) - if test: - print('Linear successully tested!') - output = torch.randn(50, 1, requires_grad=True).double() - W = torch.randn(1, 2, requires_grad=True).double() - labels = torch.ones(1, requires_grad=False).double() - C = torch.tensor([[1.0]], requires_grad=False).double() - test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5) - if test: - print('Hinge successfully tested!') - x = torch.randn(50, 2, requires_grad=False).double() - labels = torch.ones(50, requires_grad=False).double() - try: - output, loss = model(x, labels) - assert model.W.requires_grad is True - assert model.b.requires_grad is True - print('SVM_HINGE successfully tested!') - except: - raise Exception('Failed testing SVM_HINGE!') - - -if __name__ == '__main__': +# ======================================================== +# Media and Cognition +# Homework 3 Support Vector Machine +# check.py - Check your implementation of several modules +# Tsinghua University +# (C) Copyright 2024 +# ======================================================== + +from svm_hw import SVM_HINGE, LinearFunction, Hinge +import torch +from torch.autograd import gradcheck + + +def run(): + model = SVM_HINGE(2, C=1.0).double() + x = torch.randn(50, 2, requires_grad=False).double() + W = torch.randn(1, 2, requires_grad=True).double() + b = torch.zeros(1, requires_grad=True).double() + test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4) + if test: + print('Linear successully tested!') + output = torch.randn(50, 1, requires_grad=True).double() + W = torch.randn(1, 2, requires_grad=True).double() + labels = torch.ones(1, requires_grad=False).double() + C = torch.tensor([[1.0]], requires_grad=False).double() + test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5) + if test: + print('Hinge successfully tested!') + x = torch.randn(50, 2, requires_grad=False).double() + labels = torch.ones(50, requires_grad=False).double() + try: + output, loss = model(x, labels) + assert model.W.requires_grad is True + assert model.b.requires_grad is True + print('SVM_HINGE successfully tested!') + except: + raise Exception('Failed testing SVM_HINGE!') + + +if __name__ == '__main__': run() \ No newline at end of file diff --git a/hw3/code/data_preprocess.py b/hw3/code/data_preprocess.py index 87fc695..5f09006 100644 --- a/hw3/code/data_preprocess.py +++ b/hw3/code/data_preprocess.py @@ -1,178 +1,178 @@ -# ======================================================== -# Media and Cognition -# Homework 3 Support Vector Machine -# data_preprocess.py - Using pretrained convolutional layers to extract feature, -# and using PCA for dimensionality reduction -# Student ID: -# Name: -# Tsinghua University -# (C) Copyright 2024 -# ======================================================== - -import os -import torchvision.transforms as transforms -import torch -from PIL import Image -from networks import Classifier -import matplotlib.pyplot as plt -import argparse - - -def preprocess(pre_conv, data_root, image_size, classes): - # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution - - # =============== process training dataset ====================== - print("Start preprocessing the training dataset !!!") - train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes) - - # calculate the mean and PCA projection matrix - data_mean, u = PCA(train_data, 2) - - # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector - train_data_pca = ??? - - visualize(train_data_pca, train_label, "train") - savedata(train_data_pca, train_label, data_root+"/train.pt") - print("training dataset saved !!!") - - # =============== process validation dataset ====================== - print("Start preprocessing the validation dataset!!!") - val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes) - - # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector - val_data_pca = ??? - - visualize(val_data_pca, val_label, "val") - savedata(val_data_pca, val_label, data_root+"/val.pt") - print("validation dataset saved !!!") - - # =============== process testing dataset ====================== - print("Start preprocessing the testing dataset!!!") - test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes) - - # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector - test_data_pca = ??? - - visualize(test_data_pca, test_label, "test") - savedata(test_data_pca, test_label, data_root+"/test.pt") - print("testing dataset saved !!!") - - -def savedata(data, label, save_path): - save_dict = { - 'data': data, - 'label': label - } - torch.save(save_dict, save_path) - - -def visualize(datas, labels, mode): - """ - Display feature points after dimensionality reduction - ------------------------------- - :param datas: the samples after dimensionality reduction, with the shape of [N, 2] - :param labels: the labels (chosen from {-1, +1}) corresponding to the samples - :param mode: chosen from {'train', 'val', 'test'} - :return: - """ - plt.figure() - for idx in range(datas.shape[1]): - plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1)) - plt.legend() - plt.title(mode) - plt.show() - - -def PCA(data, dim=2): - """ - calculate the mean value of the data and the projection matrix for PCA - :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048] - :param dim: the data dimension after projection - :return: - data_mean: the mean value of the data - u: the projection matrix for PCA, with the shape of [2048, dim] - """ - # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix - - # TODO: compute the mean of train_data - data_mean = ??? - # TODO: compute the covariance matrix of train_data - data_cov = ??? - # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd - # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html - ??? - # TODO: return the proper 'data_mean' and 'u[]' - return ??? - - -def loaddata(pre_conv, data_root, mode, image_size, classes): - """ - load one dataset, and use pretrained network in homework 2 to extract feature - :param pre_conv: pretrained network in homework 2 - :param data_root: the path of the dataset - :param mode: chosen from {'train', 'val', 'test'} - :param image_size: the preset size that each image try to zoom to - :param classes: two classes that need to be classified - :return: - datas: the samples of extracted features with the shape of [N, 2048] - labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N] - """ - assert len(classes) == 2 - datas = [] - labels = [] - for idx in range(len(classes)): - for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]): - data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size) - label = 2 * idx - 1 - datas.append(data) - labels.append(label) - return torch.stack(datas), torch.tensor(labels) - - -def readimg(pre_conv, filepath, image_size): - """ - Read one image and use pretrained network to extract the feature - -------------------------- - :param pre_conv: pretrained network in homework 2 - :param filepath: the file path of one image - :param image_size: the preset size that each image try to zoom to - :return: - data: the extracted feature with the length of 2048 - """ - img_pil = Image.open(filepath).convert('RGB') - img_pil = img_pil.resize(image_size) - img_transform = transforms.Compose([transforms.ToTensor(), - transforms.Normalize(0.5, 0.5), - ]) - img_tensor = img_transform(img_pil) - data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1) - - return data - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth", - help="the filepath of the pretrained network in homework 2") - parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets") - parser.add_argument("--image_size", type=tuple, default=(32, 32), - help="the preset size that each image try to zoom to") - parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified") - - args = parser.parse_args() - - pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu") - configs = pretrained_checkpoint["configs"] - cls = Classifier( - configs["in_channels"], - configs["num_classes"], - configs["use_batch_norm"], - configs["use_stn"], - configs["dropout_prob"], - ) - cls.load_state_dict(pretrained_checkpoint["model_state"]) - for param in cls.parameters(): - param.requires_grad = False - conv = cls.conv_net - - preprocess(conv, args.data_root, args.image_size, args.classes) +# ======================================================== +# Media and Cognition +# Homework 3 Support Vector Machine +# data_preprocess.py - Using pretrained convolutional layers to extract feature, +# and using PCA for dimensionality reduction +# Student ID: +# Name: +# Tsinghua University +# (C) Copyright 2024 +# ======================================================== + +import os +import torchvision.transforms as transforms +import torch +from PIL import Image +from networks import Classifier +import matplotlib.pyplot as plt +import argparse + + +def preprocess(pre_conv, data_root, image_size, classes): + # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution + + # =============== process training dataset ====================== + print("Start preprocessing the training dataset !!!") + train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes) + + # calculate the mean and PCA projection matrix + data_mean, u = PCA(train_data, 2) + + # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector + train_data_pca = ??? + + visualize(train_data_pca, train_label, "train") + savedata(train_data_pca, train_label, data_root+"/train.pt") + print("training dataset saved !!!") + + # =============== process validation dataset ====================== + print("Start preprocessing the validation dataset!!!") + val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes) + + # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector + val_data_pca = ??? + + visualize(val_data_pca, val_label, "val") + savedata(val_data_pca, val_label, data_root+"/val.pt") + print("validation dataset saved !!!") + + # =============== process testing dataset ====================== + print("Start preprocessing the testing dataset!!!") + test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes) + + # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector + test_data_pca = ??? + + visualize(test_data_pca, test_label, "test") + savedata(test_data_pca, test_label, data_root+"/test.pt") + print("testing dataset saved !!!") + + +def savedata(data, label, save_path): + save_dict = { + 'data': data, + 'label': label + } + torch.save(save_dict, save_path) + + +def visualize(datas, labels, mode): + """ + Display feature points after dimensionality reduction + ------------------------------- + :param datas: the samples after dimensionality reduction, with the shape of [N, 2] + :param labels: the labels (chosen from {-1, +1}) corresponding to the samples + :param mode: chosen from {'train', 'val', 'test'} + :return: + """ + plt.figure() + for idx in range(datas.shape[1]): + plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1)) + plt.legend() + plt.title(mode) + plt.show() + + +def PCA(data, dim=2): + """ + calculate the mean value of the data and the projection matrix for PCA + :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048] + :param dim: the data dimension after projection + :return: + data_mean: the mean value of the data + u: the projection matrix for PCA, with the shape of [2048, dim] + """ + # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix + + # TODO: compute the mean of train_data + data_mean = ??? + # TODO: compute the covariance matrix of train_data + data_cov = ??? + # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd + # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html + ??? + # TODO: return the proper 'data_mean' and 'u[]' + return ??? + + +def loaddata(pre_conv, data_root, mode, image_size, classes): + """ + load one dataset, and use pretrained network in homework 2 to extract feature + :param pre_conv: pretrained network in homework 2 + :param data_root: the path of the dataset + :param mode: chosen from {'train', 'val', 'test'} + :param image_size: the preset size that each image try to zoom to + :param classes: two classes that need to be classified + :return: + datas: the samples of extracted features with the shape of [N, 2048] + labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N] + """ + assert len(classes) == 2 + datas = [] + labels = [] + for idx in range(len(classes)): + for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]): + data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size) + label = 2 * idx - 1 + datas.append(data) + labels.append(label) + return torch.stack(datas), torch.tensor(labels) + + +def readimg(pre_conv, filepath, image_size): + """ + Read one image and use pretrained network to extract the feature + -------------------------- + :param pre_conv: pretrained network in homework 2 + :param filepath: the file path of one image + :param image_size: the preset size that each image try to zoom to + :return: + data: the extracted feature with the length of 2048 + """ + img_pil = Image.open(filepath).convert('RGB') + img_pil = img_pil.resize(image_size) + img_transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize(0.5, 0.5), + ]) + img_tensor = img_transform(img_pil) + data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1) + + return data + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth", + help="the filepath of the pretrained network in homework 2") + parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets") + parser.add_argument("--image_size", type=tuple, default=(32, 32), + help="the preset size that each image try to zoom to") + parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified") + + args = parser.parse_args() + + pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu") + configs = pretrained_checkpoint["configs"] + cls = Classifier( + configs["in_channels"], + configs["num_classes"], + configs["use_batch_norm"], + configs["use_stn"], + configs["dropout_prob"], + ) + cls.load_state_dict(pretrained_checkpoint["model_state"]) + for param in cls.parameters(): + param.requires_grad = False + conv = cls.conv_net + + preprocess(conv, args.data_root, args.image_size, args.classes) diff --git a/hw3/code/svm_hw.py b/hw3/code/svm_hw.py index 5fd432a..54dbce3 100644 --- a/hw3/code/svm_hw.py +++ b/hw3/code/svm_hw.py @@ -1,139 +1,139 @@ -# ======================================================== -# Media and Cognition -# Homework 3 Support Vector Machine -# svm_hw.py - The implementation of SVM using hinge loss -# Student ID: -# Name: -# Tsinghua University -# (C) Copyright 2024 -# ======================================================== - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -# TODO 1: complete the forward and backward propagation processes of the linear layer -class LinearFunction(torch.autograd.Function): - ''' - we will implement the linear function: - y = xW^T + b - as well as its gradient computation process - ''' - - @staticmethod - def forward(ctx, x, W, b): - ''' - Input: - :param ctx: a context object that can be used to stash information for backward computation - :param x: input features with size [batch_size, input_size] - :param W: weight matrix with size [output_size, input_size] - :param b: bias with size [output_size] - Return: - y :output features with size [batch_size, output_size] - ''' - - # TODO - y = ??? - ctx.save_for_backward(x, W) - - return y - - @staticmethod - def backward(ctx, grad_output): - ''' - Input: - :param ctx: a context object with saved variables - :param grad_output: dL/dy, with size [batch_size, output_size] - Return: - grad_input: dL/dx, with size [batch_size, input_size] - grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch - grad_b: dL/db, with size [output_size], summed for data in the batch - ''' - - x, W = ctx.saved_variables - - # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W - # calculate dL/dW by using dL/dy (grad_output) and x - # calculate dL/db using dL/dy (grad_output) - # you can use torch.matmul(A, B) to compute matrix product of A and B - - # TODO - grad_input = ??? - grad_W = ??? - grad_b = ??? - - return grad_input, grad_W, grad_b - - -# TODO 2: complete the forward and backward propagation processes of the hinge loss -class Hinge(torch.autograd.Function): - - @staticmethod - def forward(ctx, output, W, label, C): - """ - Compute the hinge loss - -------------------------------------- - :param ctx: a context object that can be used to stash information for backward computation - :param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b - :param W: weight matrix with size [1, input_size] - :param label: the ground truth y in the equation for loss calculation, with size [batch_size] - :param C: the regularization coefficient of hinge loss with size [1, 1] - :return: the hinge loss with size [1, 1] - """ - C = C.type_as(W) - - # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)} - # you may need F.relu() to implement the max() function. - loss = ??? - ctx.save_for_backward(output, W, label, C) - - return loss - - @staticmethod - def backward(ctx, grad_loss): - """ - Compute the gradient of hinge loss - :param ctx: a context object with saved variables - :param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function - :return: - grad_output: dL/doutput, with size [batch_size, 1] - grad_W: dL/dW, with size [1, channels] - """ - output, W, label, C = ctx.saved_tensors - # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW - grad_output = ??? - grad_W = ??? - return grad_output, grad_W, None, None - - -# TODO 3: complete the structure of SVM model -class SVM_HINGE(nn.Module): - - def __init__(self, in_channels, C): - """ - :param in_channels: number of feature channels for SVM input - :param C: regularization coefficient of hinge loss with size [1, 1] - """ - super().__init__() - - # TODO: define the parameters W and b - """ - the shape of W should be [1, channels] and the shape of b should be [1, ] - you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b - please use torch.randn() to initialize W and b - """ - - self.W = ??? - self.b = ??? - self.C = torch.tensor([[C]], requires_grad=False) - - def forward(self, x, label=None): - # SVM calculation - output = LinearFunction.apply(x, self.W, self.b) - if label is not None: - loss = Hinge.apply(output, self.W, label, self.C) - else: - loss = None - output = (output > 0.0).type_as(x) * 2.0 - 1.0 - return output, loss +# ======================================================== +# Media and Cognition +# Homework 3 Support Vector Machine +# svm_hw.py - The implementation of SVM using hinge loss +# Student ID: +# Name: +# Tsinghua University +# (C) Copyright 2024 +# ======================================================== + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +# TODO 1: complete the forward and backward propagation processes of the linear layer +class LinearFunction(torch.autograd.Function): + ''' + we will implement the linear function: + y = xW^T + b + as well as its gradient computation process + ''' + + @staticmethod + def forward(ctx, x, W, b): + ''' + Input: + :param ctx: a context object that can be used to stash information for backward computation + :param x: input features with size [batch_size, input_size] + :param W: weight matrix with size [output_size, input_size] + :param b: bias with size [output_size] + Return: + y :output features with size [batch_size, output_size] + ''' + + # TODO + y = ??? + ctx.save_for_backward(x, W) + + return y + + @staticmethod + def backward(ctx, grad_output): + ''' + Input: + :param ctx: a context object with saved variables + :param grad_output: dL/dy, with size [batch_size, output_size] + Return: + grad_input: dL/dx, with size [batch_size, input_size] + grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch + grad_b: dL/db, with size [output_size], summed for data in the batch + ''' + + x, W = ctx.saved_variables + + # calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W + # calculate dL/dW by using dL/dy (grad_output) and x + # calculate dL/db using dL/dy (grad_output) + # you can use torch.matmul(A, B) to compute matrix product of A and B + + # TODO + grad_input = ??? + grad_W = ??? + grad_b = ??? + + return grad_input, grad_W, grad_b + + +# TODO 2: complete the forward and backward propagation processes of the hinge loss +class Hinge(torch.autograd.Function): + + @staticmethod + def forward(ctx, output, W, label, C): + """ + Compute the hinge loss + -------------------------------------- + :param ctx: a context object that can be used to stash information for backward computation + :param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b + :param W: weight matrix with size [1, input_size] + :param label: the ground truth y in the equation for loss calculation, with size [batch_size] + :param C: the regularization coefficient of hinge loss with size [1, 1] + :return: the hinge loss with size [1, 1] + """ + C = C.type_as(W) + + # TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)} + # you may need F.relu() to implement the max() function. + loss = ??? + ctx.save_for_backward(output, W, label, C) + + return loss + + @staticmethod + def backward(ctx, grad_loss): + """ + Compute the gradient of hinge loss + :param ctx: a context object with saved variables + :param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function + :return: + grad_output: dL/doutput, with size [batch_size, 1] + grad_W: dL/dW, with size [1, channels] + """ + output, W, label, C = ctx.saved_tensors + # TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW + grad_output = ??? + grad_W = ??? + return grad_output, grad_W, None, None + + +# TODO 3: complete the structure of SVM model +class SVM_HINGE(nn.Module): + + def __init__(self, in_channels, C): + """ + :param in_channels: number of feature channels for SVM input + :param C: regularization coefficient of hinge loss with size [1, 1] + """ + super().__init__() + + # TODO: define the parameters W and b + """ + the shape of W should be [1, channels] and the shape of b should be [1, ] + you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b + please use torch.randn() to initialize W and b + """ + + self.W = ??? + self.b = ??? + self.C = torch.tensor([[C]], requires_grad=False) + + def forward(self, x, label=None): + # SVM calculation + output = LinearFunction.apply(x, self.W, self.b) + if label is not None: + loss = Hinge.apply(output, self.W, label, self.C) + else: + loss = None + output = (output > 0.0).type_as(x) * 2.0 - 1.0 + return output, loss diff --git a/hw3/code/test_svm.py b/hw3/code/test_svm.py index 5a61e95..0a2a917 100644 --- a/hw3/code/test_svm.py +++ b/hw3/code/test_svm.py @@ -1,106 +1,106 @@ -# ======================================================== -# Media and Cognition -# Homework 3 Support Vector Machine -# test_svm.py - Test svm model for traffic sign -# Student ID: -# Name: -# Tsinghua University -# (C) Copyright 2024 -# ======================================================== - -# ==== Part 1: import libs -import argparse -import torch -from datasets import Traffic_Dataset -from svm_hw import SVM_HINGE -from torch.utils.data import DataLoader - - -# ==== Part 2: testing -def test( - data_root, - model_save_path, - device, -): - """ - The main testing procedure of SVM model - ---------------------------- - :param data_root: path to the root directory of dataset - :param model_save_path: path to pretrained SVM model - :param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available - """ - - # TODO 1: =================== load the pretrained SVM model ================================== - - # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False' - test_data = ??? - test_loader = ??? - - # TODO: load state dictionary of pretrained SVM model - model_svm = ??? - - # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]' - svm = ??? - - # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict() - ??? - - # TODO: put the model on CPU or GPU - ??? - - # TODO 2 : ================================ testing ============================================== - - # TODO: set the model in evaluation mode - ??? - - # to calculate and save the testing accuracy - n_correct = 0. # number of images that are correctly classified - n_feas = 0. # number of total images - - with torch.no_grad(): # we do not need to compute gradients during validation - # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'. - for ??? in ???: - # TODO: set data type (.float()) and device (.to()) - ??? - - # TODO: run the model; at the validation step, the model only needs one input: feas - # _ refers to a placeholder, which means we do not need the second returned value during validating - ??? - - # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different - n_correct += ??? - - # TODO:sum up the total image number - n_feas += ??? - - # show prediction accuracy - acc = 100 * n_correct / n_feas - print('Test accuracy = {:.1f}%'.format(acc)) - - -if __name__ == "__main__": - # set configurations of the testing process - parser = argparse.ArgumentParser() - parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels") - parser.add_argument("--device", type=str, help="cpu or cuda") - parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model") - - args = parser.parse_args() - if args.device is None: - args.device = "cuda" if torch.cuda.is_available() else "cpu" - - # run the testing procedure - test( - data_root=args.data_root, - model_save_path=args.model_save_path, - device=args.device, - ) - - - - - - - - - +# ======================================================== +# Media and Cognition +# Homework 3 Support Vector Machine +# test_svm.py - Test svm model for traffic sign +# Student ID: +# Name: +# Tsinghua University +# (C) Copyright 2024 +# ======================================================== + +# ==== Part 1: import libs +import argparse +import torch +from datasets import Traffic_Dataset +from svm_hw import SVM_HINGE +from torch.utils.data import DataLoader + + +# ==== Part 2: testing +def test( + data_root, + model_save_path, + device, +): + """ + The main testing procedure of SVM model + ---------------------------- + :param data_root: path to the root directory of dataset + :param model_save_path: path to pretrained SVM model + :param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available + """ + + # TODO 1: =================== load the pretrained SVM model ================================== + + # TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False' + test_data = ??? + test_loader = ??? + + # TODO: load state dictionary of pretrained SVM model + model_svm = ??? + + # TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]' + svm = ??? + + # TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict() + ??? + + # TODO: put the model on CPU or GPU + ??? + + # TODO 2 : ================================ testing ============================================== + + # TODO: set the model in evaluation mode + ??? + + # to calculate and save the testing accuracy + n_correct = 0. # number of images that are correctly classified + n_feas = 0. # number of total images + + with torch.no_grad(): # we do not need to compute gradients during validation + # TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'. + for ??? in ???: + # TODO: set data type (.float()) and device (.to()) + ??? + + # TODO: run the model; at the validation step, the model only needs one input: feas + # _ refers to a placeholder, which means we do not need the second returned value during validating + ??? + + # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different + n_correct += ??? + + # TODO:sum up the total image number + n_feas += ??? + + # show prediction accuracy + acc = 100 * n_correct / n_feas + print('Test accuracy = {:.1f}%'.format(acc)) + + +if __name__ == "__main__": + # set configurations of the testing process + parser = argparse.ArgumentParser() + parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels") + parser.add_argument("--device", type=str, help="cpu or cuda") + parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model") + + args = parser.parse_args() + if args.device is None: + args.device = "cuda" if torch.cuda.is_available() else "cpu" + + # run the testing procedure + test( + data_root=args.data_root, + model_save_path=args.model_save_path, + device=args.device, + ) + + + + + + + + + diff --git a/hw3/code/train_svm.py b/hw3/code/train_svm.py index ca2b995..091a1e5 100644 --- a/hw3/code/train_svm.py +++ b/hw3/code/train_svm.py @@ -1,289 +1,289 @@ -# ======================================================== -# Media and Cognition -# Homework 3 Support Vector Machine -# train_svm.py - Train svm model for traffic sign -# Student ID: -# Name: -# Tsinghua University -# (C) Copyright 2024 -# ======================================================== - -# ==== Part 1: import libs -import argparse -import matplotlib.pyplot as plt -import torch -import numpy as np -import random -from datasets import Traffic_Dataset -from svm_hw import SVM_HINGE -from torch.utils.data import DataLoader - - -# ==== Part 2: training and validation -def train( - data_root, - feature_channel, - batch_size, - n_epoch, - lr, - C, - model_save_path, - device, -): - """ - The main training procedure of SVM model - ---------------------------- - :param data_root: path to the root directory of dataset - :param feature_channel: number of feature channels for SVM input - :param batch_size: batch size of training - :param n_epoch: number of training epochs - :param lr: learning rate - :param C: regularization coefficient in hinge loss - :param model_save_path: path to save SVM model - :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available - """ - - # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle' - train_data = ??? - train_loader = ??? - val_data = ??? - val_loader = ??? - - # scale the regularization coefficient - C = C * len(train_loader) - - # TODO: initialize the SVM model - svm = ??? - - # TODO: put the model on CPU or GPU - ??? - - # TODO: define the Adam optimizer - optimizer = ??? - - # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch - train_loss = [] - train_acc = [] - val_acc = [] - epochs = [] - - for epoch in range(n_epoch): - # TODO: save the index of current epoch in the array 'epochs' - ??? - - # TODO 2: ========================= training ======================= - # TODO: set the model in training mode - ??? - - # to calculate and save the training loss and training accuracy - total_loss = 0. # to save total training loss in one epoch - n_correct = 0. # number of images that are correctly classified - n_feas = 0. # number of total images - - # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'. - # you can refer to previous homework, for example hw2 - for ??? in ???: - # TODO: set data type (.float()) and device (.to()) - ??? - - # TODO: clear gradients in the optimizer - ??? - - # TODO: run the model with hinge loss; the model needs two inputs: feas and labels - ??? - - # TODO: back-propagation on the computation graph - ??? - - # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number - total_loss += ??? - - # TODO: call a function to update the parameters of the models - ??? - - # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different - n_correct += ??? - - # TODO: sum up the total image number - n_feas += ??? - - # average of the total loss for iterations - acc = 100 * n_correct / n_feas - avg_loss = total_loss / len(train_loader) - train_acc.append(acc.cpu().numpy()) - train_loss.append(avg_loss) - print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc)) - - # TODO 3: ========================== Validation ====================================== - - # TODO: set the model in evaluation mode - ??? - - # to calculate and save the validation accuracy - n_correct = 0. # number of images that are correctly classified - n_feas = 0. # number of total images - - with torch.no_grad(): # we do not need to compute gradients during validation - # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'. - for ??? in ???: - # TODO: set data type (.float()) and device (.to()) - ??? - - # TODO: run the model; at the validation step, the model only needs one input: feas - # _ refers to a placeholder, which means we do not need the second returned value during validating - ??? - - # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different - n_correct += ??? - - # TODO: sum up the total image number - n_feas += ??? - - # show prediction accuracy - acc = 100 * n_correct / n_feas - print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc)) - val_acc.append(acc.cpu().numpy()) - - # save model parameters in a file - torch.save({'state_dict': svm.state_dict(), - 'configs': { - 'feature_channel': feature_channel, - 'C': C} - }, model_save_path) - print('Model saved in {}\n'.format(model_save_path)) - - W = svm.W.data.cpu() - b = svm.b.data.cpu() - - # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels' - # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors. - sv = ??? - - plot(train_loss, train_acc, val_acc, epochs) - plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels, - val_labels=val_data.labels, sv=sv, W=W, b=b) - - -def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b): - """ - Draw the samples,SVM decision boundary, and support vectors - --------------------- - :param train_features: training samples with the shape of [B, 2] - :param val_features: validation samples with the shape of [B, 2] - :param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1] - :param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1] - :param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors) - :param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel] - :param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,] - """ - train_labels = (train_labels > 0.0).int() - val_labels = (val_labels > 0.0).int() - train_labels[sv] = 2 - foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv)) - foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground)) - background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv)) - background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background)) - f, ax = plt.subplots() - plt.title("training dataset") - ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1") - ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange', - label="-1 (support vector)") - ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1") - ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c', - label="+1 (support vector)") - x = np.linspace(-20, 20, 100) - ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y') - ax.legend(loc="best") - plt.ylim([-30, 30]) - plt.show() - f, ax = plt.subplots() - plt.title("validation dataset") - foreground_val = [i for i in range(val_labels.shape[0] // 2)] - background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)] - ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1") - ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1") - x = np.linspace(-20, 20, 100) - ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y') - ax.legend(loc="best") - plt.ylim([-30, 30]) - plt.show() - - -def plot(train_loss, train_acc, val_acc, epochs): - """ - Draw loss and accuracy curve - ------------------ - :param train_loss: a list with loss of each training epoch - :param train_acc: a list with accuracy on training dataset of each training epoch - :param val_acc: a list with accuracy on validation dataset of each training epoch - :param epochs: a list with the index of all training epochs - """ - - # draw the training loss curve - f, ax = plt.subplots() - plt.title("Training Loss") - ax.plot(epochs, train_loss, color="tab:blue") - ax.set_xlabel("Training epoch") - ax.set_ylabel("Loss") - ax.legend(["training loss"], loc="best") - plt.show() - - # draw the accuracy curve - f, ax = plt.subplots() - plt.title("Training and Validation Accuracy") - ax.plot(epochs, train_acc, color="tab:orange") - ax.plot(epochs, val_acc, color="tab:green") - ax.legend(["training accuracy","validation accuracy"], loc="best") - ax.set_xlabel("Training epoch") - ax.set_ylabel("Accuracy") - ax.set_ylim(0, 101) - plt.show() - - -if __name__ == "__main__": - # set random seed for reproducibility - seed = 2024 - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - - # set configurations of the model and training process - parser = argparse.ArgumentParser() - parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",) - parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs") - parser.add_argument("--batch_size", type=int, default=20, help="training batch size") - parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") - parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss") - parser.add_argument("--device", type=str, help="cpu or cuda") - parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network") - parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model") - - args = parser.parse_args() - if args.device is None: - args.device = "cuda" if torch.cuda.is_available() else "cpu" - - # run the training procedure - train( - data_root=args.data_root, - feature_channel=args.feature_channel, - batch_size=args.batch_size, - n_epoch=args.n_epoch, - lr=args.lr, - C=args.C, - model_save_path=args.model_save_path, - device=args.device, - ) - - - - - - - - - - - +# ======================================================== +# Media and Cognition +# Homework 3 Support Vector Machine +# train_svm.py - Train svm model for traffic sign +# Student ID: +# Name: +# Tsinghua University +# (C) Copyright 2024 +# ======================================================== + +# ==== Part 1: import libs +import argparse +import matplotlib.pyplot as plt +import torch +import numpy as np +import random +from datasets import Traffic_Dataset +from svm_hw import SVM_HINGE +from torch.utils.data import DataLoader + + +# ==== Part 2: training and validation +def train( + data_root, + feature_channel, + batch_size, + n_epoch, + lr, + C, + model_save_path, + device, +): + """ + The main training procedure of SVM model + ---------------------------- + :param data_root: path to the root directory of dataset + :param feature_channel: number of feature channels for SVM input + :param batch_size: batch size of training + :param n_epoch: number of training epochs + :param lr: learning rate + :param C: regularization coefficient in hinge loss + :param model_save_path: path to save SVM model + :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available + """ + + # TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle' + train_data = ??? + train_loader = ??? + val_data = ??? + val_loader = ??? + + # scale the regularization coefficient + C = C * len(train_loader) + + # TODO: initialize the SVM model + svm = ??? + + # TODO: put the model on CPU or GPU + ??? + + # TODO: define the Adam optimizer + optimizer = ??? + + # to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch + train_loss = [] + train_acc = [] + val_acc = [] + epochs = [] + + for epoch in range(n_epoch): + # TODO: save the index of current epoch in the array 'epochs' + ??? + + # TODO 2: ========================= training ======================= + # TODO: set the model in training mode + ??? + + # to calculate and save the training loss and training accuracy + total_loss = 0. # to save total training loss in one epoch + n_correct = 0. # number of images that are correctly classified + n_feas = 0. # number of total images + + # TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'. + # you can refer to previous homework, for example hw2 + for ??? in ???: + # TODO: set data type (.float()) and device (.to()) + ??? + + # TODO: clear gradients in the optimizer + ??? + + # TODO: run the model with hinge loss; the model needs two inputs: feas and labels + ??? + + # TODO: back-propagation on the computation graph + ??? + + # TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number + total_loss += ??? + + # TODO: call a function to update the parameters of the models + ??? + + # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different + n_correct += ??? + + # TODO: sum up the total image number + n_feas += ??? + + # average of the total loss for iterations + acc = 100 * n_correct / n_feas + avg_loss = total_loss / len(train_loader) + train_acc.append(acc.cpu().numpy()) + train_loss.append(avg_loss) + print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc)) + + # TODO 3: ========================== Validation ====================================== + + # TODO: set the model in evaluation mode + ??? + + # to calculate and save the validation accuracy + n_correct = 0. # number of images that are correctly classified + n_feas = 0. # number of total images + + with torch.no_grad(): # we do not need to compute gradients during validation + # TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'. + for ??? in ???: + # TODO: set data type (.float()) and device (.to()) + ??? + + # TODO: run the model; at the validation step, the model only needs one input: feas + # _ refers to a placeholder, which means we do not need the second returned value during validating + ??? + + # TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different + n_correct += ??? + + # TODO: sum up the total image number + n_feas += ??? + + # show prediction accuracy + acc = 100 * n_correct / n_feas + print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc)) + val_acc.append(acc.cpu().numpy()) + + # save model parameters in a file + torch.save({'state_dict': svm.state_dict(), + 'configs': { + 'feature_channel': feature_channel, + 'C': C} + }, model_save_path) + print('Model saved in {}\n'.format(model_save_path)) + + W = svm.W.data.cpu() + b = svm.b.data.cpu() + + # TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels' + # 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors. + sv = ??? + + plot(train_loss, train_acc, val_acc, epochs) + plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels, + val_labels=val_data.labels, sv=sv, W=W, b=b) + + +def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b): + """ + Draw the samples,SVM decision boundary, and support vectors + --------------------- + :param train_features: training samples with the shape of [B, 2] + :param val_features: validation samples with the shape of [B, 2] + :param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1] + :param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1] + :param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors) + :param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel] + :param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,] + """ + train_labels = (train_labels > 0.0).int() + val_labels = (val_labels > 0.0).int() + train_labels[sv] = 2 + foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv)) + foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground)) + background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv)) + background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background)) + f, ax = plt.subplots() + plt.title("training dataset") + ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1") + ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange', + label="-1 (support vector)") + ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1") + ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c', + label="+1 (support vector)") + x = np.linspace(-20, 20, 100) + ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y') + ax.legend(loc="best") + plt.ylim([-30, 30]) + plt.show() + f, ax = plt.subplots() + plt.title("validation dataset") + foreground_val = [i for i in range(val_labels.shape[0] // 2)] + background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)] + ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1") + ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1") + x = np.linspace(-20, 20, 100) + ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y') + ax.legend(loc="best") + plt.ylim([-30, 30]) + plt.show() + + +def plot(train_loss, train_acc, val_acc, epochs): + """ + Draw loss and accuracy curve + ------------------ + :param train_loss: a list with loss of each training epoch + :param train_acc: a list with accuracy on training dataset of each training epoch + :param val_acc: a list with accuracy on validation dataset of each training epoch + :param epochs: a list with the index of all training epochs + """ + + # draw the training loss curve + f, ax = plt.subplots() + plt.title("Training Loss") + ax.plot(epochs, train_loss, color="tab:blue") + ax.set_xlabel("Training epoch") + ax.set_ylabel("Loss") + ax.legend(["training loss"], loc="best") + plt.show() + + # draw the accuracy curve + f, ax = plt.subplots() + plt.title("Training and Validation Accuracy") + ax.plot(epochs, train_acc, color="tab:orange") + ax.plot(epochs, val_acc, color="tab:green") + ax.legend(["training accuracy","validation accuracy"], loc="best") + ax.set_xlabel("Training epoch") + ax.set_ylabel("Accuracy") + ax.set_ylim(0, 101) + plt.show() + + +if __name__ == "__main__": + # set random seed for reproducibility + seed = 2024 + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + + # set configurations of the model and training process + parser = argparse.ArgumentParser() + parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",) + parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs") + parser.add_argument("--batch_size", type=int, default=20, help="training batch size") + parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") + parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss") + parser.add_argument("--device", type=str, help="cpu or cuda") + parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network") + parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model") + + args = parser.parse_args() + if args.device is None: + args.device = "cuda" if torch.cuda.is_available() else "cpu" + + # run the training procedure + train( + data_root=args.data_root, + feature_channel=args.feature_channel, + batch_size=args.batch_size, + n_epoch=args.n_epoch, + lr=args.lr, + C=args.C, + model_save_path=args.model_save_path, + device=args.device, + ) + + + + + + + + + + + diff --git a/hw3/report/main.tex b/hw3/report/main.tex index 97b8b40..d83c040 100644 --- a/hw3/report/main.tex +++ b/hw3/report/main.tex @@ -20,21 +20,21 @@ \begin{document} \courseheader % 请在YOUR NAME处填写自己的姓名 -\name{YOUR NAME} +\name{高艺轩} \vspace{3mm} \centerline{\textbf{\Large{理论部分}}} \section{单选题(15分)} % 请在?处填写答案 -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{C}} -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{D}} -\subsection{\underline{?}} +\subsection{\underline{B}} \section{计算题(15 分)} @@ -47,17 +47,117 @@ 试利用LDA,将样本特征维数压缩为一维。 } +\begin{proof}[解] + 首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此 + \[S_1 = \frac{1}{4} + \left( + \begin{bmatrix} + 0 & 0\\ + 0 & 1 + \end{bmatrix} + + + \begin{bmatrix} + 1 & 0\\ + 0 & 0 + \end{bmatrix} + + + \begin{bmatrix} + 1 & 1\\ + 1 & 1 + \end{bmatrix} + + + \begin{bmatrix} + 0 & 0\\ + 0 & 0 + \end{bmatrix} + \right) + = + \begin{bmatrix} + 0.5 & 0.25\\ + 0.25 & 0.5 + \end{bmatrix}\] + \[S_2 = \frac{1}{4} + \left( + \begin{bmatrix} + 0 & 0\\ + 0 & 1 + \end{bmatrix} + + + \begin{bmatrix} + 1 & 0\\ + 0 & 0 + \end{bmatrix} + + + \begin{bmatrix} + 1 & 1\\ + 1 & 1 + \end{bmatrix} + + + \begin{bmatrix} + 1 & 0\\ + 0 & 0 + \end{bmatrix} + \right) + = + \begin{bmatrix} + 0.75 & 0.25\\ + 0.25 & 0.5 + \end{bmatrix}\] + 进一步地, + \[S_w = \frac{1}{2} (S_1 + S_2) = + \begin{bmatrix} + 0.625 & 0.25\\ + 0.25 & 0.5 + \end{bmatrix}\] + \[S_b = \frac{1}{2} \left( + \begin{bmatrix} + 2.25 & 0\\ + 0 & 0 + \end{bmatrix} + + + \begin{bmatrix} + 2.25 & 0\\ + 0 & 0 + \end{bmatrix} + \right) + = + \begin{bmatrix} + 2.25 & 0\\ + 0 & 0 + \end{bmatrix}\] + 广义特征值分解得到$\lambda = 4.5$,$v = (0.8944, -0.4472)$。投影后的样本为 + \[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\] + \[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\] +\end{proof} + \vspace{3mm} \subsection{模型训练通常需要大量的数据,假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效,其中无效数据被成功判别为无效数据的概率为90\%,而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\} +\begin{proof}[解] + \begin{align*} + & P(\text{无效数据} \mid \text{判定无效})\\ + = & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\ + = & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\ + = & \frac{0.18}{0.18 + 0.04}\\ + = & \frac{9}{11} + \end{align*} +\end{proof} + \vspace{3mm} \subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc} 4 & 2 \\ 2 & \frac{4}{3} \end{array} \right]$,$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。} +\begin{proof}[解] + \[g_1(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_1)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_1) + \ln p(\omega_1)\] + \[g_2(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_2)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_2) + \ln p(\omega_2)\] + 决策方程 + \[\] +\end{proof} + \vspace{3mm} \subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足 \begin{equation*} diff --git a/j.ps1 b/j.ps1 index 69a4272..3cb8918 100644 --- a/j.ps1 +++ b/j.ps1 @@ -1 +1 @@ -cd ./hw2/code \ No newline at end of file +cd ./hw3/code \ No newline at end of file