Mac Sync
This commit is contained in:
22
.gitignore
vendored
22
.gitignore
vendored
@@ -1,12 +1,12 @@
|
|||||||
*.zip
|
*.zip
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pth
|
*.pth
|
||||||
*.log
|
*.log
|
||||||
*.aux
|
*.aux
|
||||||
*.synctex.gz
|
*.synctex.gz
|
||||||
*.synctex.gz(buzy)
|
*.synctex.gz(buzy)
|
||||||
*.out
|
*.out
|
||||||
*.pdf
|
*.pdf
|
||||||
.DS_Store
|
.DS_Store
|
||||||
hw2/code/checkpoints/
|
hw2/code/checkpoints/
|
||||||
hw2/code/visualized/
|
hw2/code/visualized/
|
||||||
6
hw1/.vscode/settings.json
vendored
6
hw1/.vscode/settings.json
vendored
@@ -1,4 +1,4 @@
|
|||||||
{
|
{
|
||||||
"python.analysis.typeCheckingMode": "basic",
|
"python.analysis.typeCheckingMode": "basic",
|
||||||
"python.analysis.autoImportCompletions": true
|
"python.analysis.autoImportCompletions": true
|
||||||
}
|
}
|
||||||
@@ -1,56 +1,56 @@
|
|||||||
Epoch 01: loss = inf
|
Epoch 01: loss = inf
|
||||||
Epoch 02: loss = inf
|
Epoch 02: loss = inf
|
||||||
Epoch 03: loss = 6.678
|
Epoch 03: loss = 6.678
|
||||||
Epoch 04: loss = 4.361
|
Epoch 04: loss = 4.361
|
||||||
Epoch 05: loss = 3.110
|
Epoch 05: loss = 3.110
|
||||||
Epoch 06: loss = 2.099
|
Epoch 06: loss = 2.099
|
||||||
Epoch 07: loss = 1.698
|
Epoch 07: loss = 1.698
|
||||||
Epoch 08: loss = 1.320
|
Epoch 08: loss = 1.320
|
||||||
Epoch 09: loss = 0.970
|
Epoch 09: loss = 0.970
|
||||||
Epoch 10: loss = 0.891
|
Epoch 10: loss = 0.891
|
||||||
Epoch 10: validation accuracy = 66.0%
|
Epoch 10: validation accuracy = 66.0%
|
||||||
Epoch 11: loss = 0.817
|
Epoch 11: loss = 0.817
|
||||||
Epoch 12: loss = 0.723
|
Epoch 12: loss = 0.723
|
||||||
Epoch 13: loss = 0.512
|
Epoch 13: loss = 0.512
|
||||||
Epoch 14: loss = 0.353
|
Epoch 14: loss = 0.353
|
||||||
Epoch 15: loss = 0.202
|
Epoch 15: loss = 0.202
|
||||||
Epoch 16: loss = 0.182
|
Epoch 16: loss = 0.182
|
||||||
Epoch 17: loss = 0.184
|
Epoch 17: loss = 0.184
|
||||||
Epoch 18: loss = 0.191
|
Epoch 18: loss = 0.191
|
||||||
Epoch 19: loss = 0.175
|
Epoch 19: loss = 0.175
|
||||||
Epoch 20: loss = 0.166
|
Epoch 20: loss = 0.166
|
||||||
Epoch 20: validation accuracy = 68.0%
|
Epoch 20: validation accuracy = 68.0%
|
||||||
Epoch 21: loss = 0.146
|
Epoch 21: loss = 0.146
|
||||||
Epoch 22: loss = 0.105
|
Epoch 22: loss = 0.105
|
||||||
Epoch 23: loss = 0.109
|
Epoch 23: loss = 0.109
|
||||||
Epoch 24: loss = 0.074
|
Epoch 24: loss = 0.074
|
||||||
Epoch 25: loss = 0.097
|
Epoch 25: loss = 0.097
|
||||||
Epoch 26: loss = 0.047
|
Epoch 26: loss = 0.047
|
||||||
Epoch 27: loss = 0.038
|
Epoch 27: loss = 0.038
|
||||||
Epoch 28: loss = 0.037
|
Epoch 28: loss = 0.037
|
||||||
Epoch 29: loss = 0.024
|
Epoch 29: loss = 0.024
|
||||||
Epoch 30: loss = 0.021
|
Epoch 30: loss = 0.021
|
||||||
Epoch 30: validation accuracy = 68.8%
|
Epoch 30: validation accuracy = 68.8%
|
||||||
Epoch 31: loss = 0.019
|
Epoch 31: loss = 0.019
|
||||||
Epoch 32: loss = 0.024
|
Epoch 32: loss = 0.024
|
||||||
Epoch 33: loss = 0.023
|
Epoch 33: loss = 0.023
|
||||||
Epoch 34: loss = 0.014
|
Epoch 34: loss = 0.014
|
||||||
Epoch 35: loss = 0.013
|
Epoch 35: loss = 0.013
|
||||||
Epoch 36: loss = 0.012
|
Epoch 36: loss = 0.012
|
||||||
Epoch 37: loss = 0.011
|
Epoch 37: loss = 0.011
|
||||||
Epoch 38: loss = 0.013
|
Epoch 38: loss = 0.013
|
||||||
Epoch 39: loss = 0.013
|
Epoch 39: loss = 0.013
|
||||||
Epoch 40: loss = 0.016
|
Epoch 40: loss = 0.016
|
||||||
Epoch 40: validation accuracy = 70.5%
|
Epoch 40: validation accuracy = 70.5%
|
||||||
Epoch 41: loss = 0.015
|
Epoch 41: loss = 0.015
|
||||||
Epoch 42: loss = 0.009
|
Epoch 42: loss = 0.009
|
||||||
Epoch 43: loss = 0.011
|
Epoch 43: loss = 0.011
|
||||||
Epoch 44: loss = 0.008
|
Epoch 44: loss = 0.008
|
||||||
Epoch 45: loss = 0.008
|
Epoch 45: loss = 0.008
|
||||||
Epoch 46: loss = 0.010
|
Epoch 46: loss = 0.010
|
||||||
Epoch 47: loss = 0.009
|
Epoch 47: loss = 0.009
|
||||||
Epoch 48: loss = 0.007
|
Epoch 48: loss = 0.007
|
||||||
Epoch 49: loss = 0.007
|
Epoch 49: loss = 0.007
|
||||||
Epoch 50: loss = 0.010
|
Epoch 50: loss = 0.010
|
||||||
Epoch 50: validation accuracy = 70.5%
|
Epoch 50: validation accuracy = 70.5%
|
||||||
Model saved in ./saved_models/default.pth
|
Model saved in ./saved_models/default.pth
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
[Info] Load model from .\saved_models\default.pth
|
[Info] Load model from .\saved_models\default.pth
|
||||||
[Info] Test accuracy = 72.0%
|
[Info] Test accuracy = 72.0%
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
[Info] Load model from .\saved_models\adam_optim.pth
|
[Info] Load model from .\saved_models\adam_optim.pth
|
||||||
[Info] Test accuracy = 85.0%
|
[Info] Test accuracy = 85.0%
|
||||||
@@ -1,56 +1,56 @@
|
|||||||
Epoch 01: loss = inf
|
Epoch 01: loss = inf
|
||||||
Epoch 02: loss = inf
|
Epoch 02: loss = inf
|
||||||
Epoch 03: loss = inf
|
Epoch 03: loss = inf
|
||||||
Epoch 04: loss = inf
|
Epoch 04: loss = inf
|
||||||
Epoch 05: loss = inf
|
Epoch 05: loss = inf
|
||||||
Epoch 06: loss = inf
|
Epoch 06: loss = inf
|
||||||
Epoch 07: loss = inf
|
Epoch 07: loss = inf
|
||||||
Epoch 08: loss = inf
|
Epoch 08: loss = inf
|
||||||
Epoch 09: loss = 3.250
|
Epoch 09: loss = 3.250
|
||||||
Epoch 10: loss = 2.567
|
Epoch 10: loss = 2.567
|
||||||
Epoch 10: validation accuracy = 59.0%
|
Epoch 10: validation accuracy = 59.0%
|
||||||
Epoch 11: loss = 1.963
|
Epoch 11: loss = 1.963
|
||||||
Epoch 12: loss = 1.558
|
Epoch 12: loss = 1.558
|
||||||
Epoch 13: loss = 1.320
|
Epoch 13: loss = 1.320
|
||||||
Epoch 14: loss = 0.911
|
Epoch 14: loss = 0.911
|
||||||
Epoch 15: loss = 0.808
|
Epoch 15: loss = 0.808
|
||||||
Epoch 16: loss = 0.932
|
Epoch 16: loss = 0.932
|
||||||
Epoch 17: loss = 0.861
|
Epoch 17: loss = 0.861
|
||||||
Epoch 18: loss = 0.748
|
Epoch 18: loss = 0.748
|
||||||
Epoch 19: loss = 0.783
|
Epoch 19: loss = 0.783
|
||||||
Epoch 20: loss = 0.809
|
Epoch 20: loss = 0.809
|
||||||
Epoch 20: validation accuracy = 65.5%
|
Epoch 20: validation accuracy = 65.5%
|
||||||
Epoch 21: loss = 0.678
|
Epoch 21: loss = 0.678
|
||||||
Epoch 22: loss = 0.757
|
Epoch 22: loss = 0.757
|
||||||
Epoch 23: loss = 0.747
|
Epoch 23: loss = 0.747
|
||||||
Epoch 24: loss = 0.660
|
Epoch 24: loss = 0.660
|
||||||
Epoch 25: loss = 0.536
|
Epoch 25: loss = 0.536
|
||||||
Epoch 26: loss = 0.506
|
Epoch 26: loss = 0.506
|
||||||
Epoch 27: loss = 0.577
|
Epoch 27: loss = 0.577
|
||||||
Epoch 28: loss = 0.600
|
Epoch 28: loss = 0.600
|
||||||
Epoch 29: loss = 0.681
|
Epoch 29: loss = 0.681
|
||||||
Epoch 30: loss = 0.604
|
Epoch 30: loss = 0.604
|
||||||
Epoch 30: validation accuracy = 68.0%
|
Epoch 30: validation accuracy = 68.0%
|
||||||
Epoch 31: loss = 0.552
|
Epoch 31: loss = 0.552
|
||||||
Epoch 32: loss = 0.671
|
Epoch 32: loss = 0.671
|
||||||
Epoch 33: loss = 0.604
|
Epoch 33: loss = 0.604
|
||||||
Epoch 34: loss = 0.600
|
Epoch 34: loss = 0.600
|
||||||
Epoch 35: loss = 0.818
|
Epoch 35: loss = 0.818
|
||||||
Epoch 36: loss = 0.659
|
Epoch 36: loss = 0.659
|
||||||
Epoch 37: loss = 0.375
|
Epoch 37: loss = 0.375
|
||||||
Epoch 38: loss = 0.380
|
Epoch 38: loss = 0.380
|
||||||
Epoch 39: loss = 0.418
|
Epoch 39: loss = 0.418
|
||||||
Epoch 40: loss = 0.431
|
Epoch 40: loss = 0.431
|
||||||
Epoch 40: validation accuracy = 73.5%
|
Epoch 40: validation accuracy = 73.5%
|
||||||
Epoch 41: loss = 0.551
|
Epoch 41: loss = 0.551
|
||||||
Epoch 42: loss = 0.488
|
Epoch 42: loss = 0.488
|
||||||
Epoch 43: loss = 0.350
|
Epoch 43: loss = 0.350
|
||||||
Epoch 44: loss = 0.287
|
Epoch 44: loss = 0.287
|
||||||
Epoch 45: loss = 0.294
|
Epoch 45: loss = 0.294
|
||||||
Epoch 46: loss = 0.463
|
Epoch 46: loss = 0.463
|
||||||
Epoch 47: loss = 0.438
|
Epoch 47: loss = 0.438
|
||||||
Epoch 48: loss = 0.392
|
Epoch 48: loss = 0.392
|
||||||
Epoch 49: loss = 0.325
|
Epoch 49: loss = 0.325
|
||||||
Epoch 50: loss = 0.332
|
Epoch 50: loss = 0.332
|
||||||
Epoch 50: validation accuracy = 80.8%
|
Epoch 50: validation accuracy = 80.8%
|
||||||
Model saved in .\saved_models\adam_optim_cuda.pth
|
Model saved in .\saved_models\adam_optim_cuda.pth
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
|
[Info] Load model from .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
|
||||||
[Info] Test accuracy = 88.8%
|
[Info] Test accuracy = 88.8%
|
||||||
@@ -1,111 +1,111 @@
|
|||||||
Epoch 01: loss = inf
|
Epoch 01: loss = inf
|
||||||
Epoch 02: loss = inf
|
Epoch 02: loss = inf
|
||||||
Epoch 03: loss = inf
|
Epoch 03: loss = inf
|
||||||
Epoch 04: loss = inf
|
Epoch 04: loss = inf
|
||||||
Epoch 05: loss = inf
|
Epoch 05: loss = inf
|
||||||
Epoch 06: loss = inf
|
Epoch 06: loss = inf
|
||||||
Epoch 07: loss = inf
|
Epoch 07: loss = inf
|
||||||
Epoch 08: loss = inf
|
Epoch 08: loss = inf
|
||||||
Epoch 09: loss = inf
|
Epoch 09: loss = inf
|
||||||
Epoch 10: loss = inf
|
Epoch 10: loss = inf
|
||||||
Epoch 10: validation accuracy = 40.2%
|
Epoch 10: validation accuracy = 40.2%
|
||||||
Epoch 11: loss = inf
|
Epoch 11: loss = inf
|
||||||
Epoch 12: loss = inf
|
Epoch 12: loss = inf
|
||||||
Epoch 13: loss = inf
|
Epoch 13: loss = inf
|
||||||
Epoch 14: loss = inf
|
Epoch 14: loss = inf
|
||||||
Epoch 15: loss = inf
|
Epoch 15: loss = inf
|
||||||
Epoch 16: loss = inf
|
Epoch 16: loss = inf
|
||||||
Epoch 17: loss = 2.360
|
Epoch 17: loss = 2.360
|
||||||
Epoch 18: loss = 2.086
|
Epoch 18: loss = 2.086
|
||||||
Epoch 19: loss = 1.684
|
Epoch 19: loss = 1.684
|
||||||
Epoch 20: loss = 1.453
|
Epoch 20: loss = 1.453
|
||||||
Epoch 20: validation accuracy = 53.0%
|
Epoch 20: validation accuracy = 53.0%
|
||||||
Epoch 21: loss = 1.174
|
Epoch 21: loss = 1.174
|
||||||
Epoch 22: loss = 1.046
|
Epoch 22: loss = 1.046
|
||||||
Epoch 23: loss = 0.859
|
Epoch 23: loss = 0.859
|
||||||
Epoch 24: loss = 0.740
|
Epoch 24: loss = 0.740
|
||||||
Epoch 25: loss = 0.663
|
Epoch 25: loss = 0.663
|
||||||
Epoch 26: loss = 0.495
|
Epoch 26: loss = 0.495
|
||||||
Epoch 27: loss = 0.566
|
Epoch 27: loss = 0.566
|
||||||
Epoch 28: loss = 0.521
|
Epoch 28: loss = 0.521
|
||||||
Epoch 29: loss = 0.470
|
Epoch 29: loss = 0.470
|
||||||
Epoch 30: loss = 0.363
|
Epoch 30: loss = 0.363
|
||||||
Epoch 30: validation accuracy = 59.0%
|
Epoch 30: validation accuracy = 59.0%
|
||||||
Epoch 31: loss = 0.365
|
Epoch 31: loss = 0.365
|
||||||
Epoch 32: loss = 0.305
|
Epoch 32: loss = 0.305
|
||||||
Epoch 33: loss = 0.333
|
Epoch 33: loss = 0.333
|
||||||
Epoch 34: loss = 0.293
|
Epoch 34: loss = 0.293
|
||||||
Epoch 35: loss = 0.191
|
Epoch 35: loss = 0.191
|
||||||
Epoch 36: loss = 0.295
|
Epoch 36: loss = 0.295
|
||||||
Epoch 37: loss = 0.275
|
Epoch 37: loss = 0.275
|
||||||
Epoch 38: loss = 0.461
|
Epoch 38: loss = 0.461
|
||||||
Epoch 39: loss = 0.509
|
Epoch 39: loss = 0.509
|
||||||
Epoch 40: loss = 0.298
|
Epoch 40: loss = 0.298
|
||||||
Epoch 40: validation accuracy = 65.2%
|
Epoch 40: validation accuracy = 65.2%
|
||||||
Epoch 41: loss = 0.186
|
Epoch 41: loss = 0.186
|
||||||
Epoch 42: loss = 0.395
|
Epoch 42: loss = 0.395
|
||||||
Epoch 43: loss = 0.323
|
Epoch 43: loss = 0.323
|
||||||
Epoch 44: loss = 0.309
|
Epoch 44: loss = 0.309
|
||||||
Epoch 45: loss = 0.199
|
Epoch 45: loss = 0.199
|
||||||
Epoch 46: loss = 0.285
|
Epoch 46: loss = 0.285
|
||||||
Epoch 47: loss = 0.290
|
Epoch 47: loss = 0.290
|
||||||
Epoch 48: loss = 0.302
|
Epoch 48: loss = 0.302
|
||||||
Epoch 49: loss = 0.235
|
Epoch 49: loss = 0.235
|
||||||
Epoch 50: loss = 0.190
|
Epoch 50: loss = 0.190
|
||||||
Epoch 50: validation accuracy = 71.2%
|
Epoch 50: validation accuracy = 71.2%
|
||||||
Epoch 51: loss = 0.294
|
Epoch 51: loss = 0.294
|
||||||
Epoch 52: loss = 0.311
|
Epoch 52: loss = 0.311
|
||||||
Epoch 53: loss = 0.254
|
Epoch 53: loss = 0.254
|
||||||
Epoch 54: loss = 0.289
|
Epoch 54: loss = 0.289
|
||||||
Epoch 55: loss = 0.264
|
Epoch 55: loss = 0.264
|
||||||
Epoch 56: loss = 0.213
|
Epoch 56: loss = 0.213
|
||||||
Epoch 57: loss = 0.166
|
Epoch 57: loss = 0.166
|
||||||
Epoch 58: loss = 0.218
|
Epoch 58: loss = 0.218
|
||||||
Epoch 59: loss = 0.231
|
Epoch 59: loss = 0.231
|
||||||
Epoch 60: loss = 0.283
|
Epoch 60: loss = 0.283
|
||||||
Epoch 60: validation accuracy = 74.8%
|
Epoch 60: validation accuracy = 74.8%
|
||||||
Epoch 61: loss = 0.324
|
Epoch 61: loss = 0.324
|
||||||
Epoch 62: loss = 0.245
|
Epoch 62: loss = 0.245
|
||||||
Epoch 63: loss = 0.277
|
Epoch 63: loss = 0.277
|
||||||
Epoch 64: loss = 0.286
|
Epoch 64: loss = 0.286
|
||||||
Epoch 65: loss = 0.255
|
Epoch 65: loss = 0.255
|
||||||
Epoch 66: loss = 0.263
|
Epoch 66: loss = 0.263
|
||||||
Epoch 67: loss = 0.272
|
Epoch 67: loss = 0.272
|
||||||
Epoch 68: loss = 0.272
|
Epoch 68: loss = 0.272
|
||||||
Epoch 69: loss = 0.260
|
Epoch 69: loss = 0.260
|
||||||
Epoch 70: loss = 0.271
|
Epoch 70: loss = 0.271
|
||||||
Epoch 70: validation accuracy = 79.0%
|
Epoch 70: validation accuracy = 79.0%
|
||||||
Epoch 71: loss = 0.310
|
Epoch 71: loss = 0.310
|
||||||
Epoch 72: loss = 0.301
|
Epoch 72: loss = 0.301
|
||||||
Epoch 73: loss = 0.305
|
Epoch 73: loss = 0.305
|
||||||
Epoch 74: loss = 0.311
|
Epoch 74: loss = 0.311
|
||||||
Epoch 75: loss = 0.329
|
Epoch 75: loss = 0.329
|
||||||
Epoch 76: loss = 0.295
|
Epoch 76: loss = 0.295
|
||||||
Epoch 77: loss = 0.300
|
Epoch 77: loss = 0.300
|
||||||
Epoch 78: loss = 0.316
|
Epoch 78: loss = 0.316
|
||||||
Epoch 79: loss = 0.326
|
Epoch 79: loss = 0.326
|
||||||
Epoch 80: loss = 0.352
|
Epoch 80: loss = 0.352
|
||||||
Epoch 80: validation accuracy = 77.5%
|
Epoch 80: validation accuracy = 77.5%
|
||||||
Epoch 81: loss = 0.344
|
Epoch 81: loss = 0.344
|
||||||
Epoch 82: loss = 0.326
|
Epoch 82: loss = 0.326
|
||||||
Epoch 83: loss = 0.326
|
Epoch 83: loss = 0.326
|
||||||
Epoch 84: loss = 0.335
|
Epoch 84: loss = 0.335
|
||||||
Epoch 85: loss = 0.342
|
Epoch 85: loss = 0.342
|
||||||
Epoch 86: loss = 0.361
|
Epoch 86: loss = 0.361
|
||||||
Epoch 87: loss = 0.337
|
Epoch 87: loss = 0.337
|
||||||
Epoch 88: loss = 0.339
|
Epoch 88: loss = 0.339
|
||||||
Epoch 89: loss = 0.339
|
Epoch 89: loss = 0.339
|
||||||
Epoch 90: loss = 0.341
|
Epoch 90: loss = 0.341
|
||||||
Epoch 90: validation accuracy = 82.8%
|
Epoch 90: validation accuracy = 82.8%
|
||||||
Epoch 91: loss = 0.350
|
Epoch 91: loss = 0.350
|
||||||
Epoch 92: loss = 0.359
|
Epoch 92: loss = 0.359
|
||||||
Epoch 93: loss = 0.352
|
Epoch 93: loss = 0.352
|
||||||
Epoch 94: loss = 0.363
|
Epoch 94: loss = 0.363
|
||||||
Epoch 95: loss = 0.347
|
Epoch 95: loss = 0.347
|
||||||
Epoch 96: loss = 0.341
|
Epoch 96: loss = 0.341
|
||||||
Epoch 97: loss = 0.336
|
Epoch 97: loss = 0.336
|
||||||
Epoch 98: loss = 0.348
|
Epoch 98: loss = 0.348
|
||||||
Epoch 99: loss = 0.365
|
Epoch 99: loss = 0.365
|
||||||
Epoch 100: loss = 0.350
|
Epoch 100: loss = 0.350
|
||||||
Epoch 100: validation accuracy = 85.2%
|
Epoch 100: validation accuracy = 85.2%
|
||||||
Model saved in .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
|
Model saved in .\saved_models\adam_optim_lr1e-3_epoch100_momentum10.pth
|
||||||
@@ -1,244 +1,244 @@
|
|||||||
% Homework Template
|
% Homework Template
|
||||||
\documentclass[a4paper]{article}
|
\documentclass[a4paper]{article}
|
||||||
\usepackage{ctex}
|
\usepackage{ctex}
|
||||||
\usepackage{amsmath, amssymb, amsthm}
|
\usepackage{amsmath, amssymb, amsthm}
|
||||||
\usepackage{moreenum}
|
\usepackage{moreenum}
|
||||||
\usepackage{mathtools}
|
\usepackage{mathtools}
|
||||||
\usepackage{url}
|
\usepackage{url}
|
||||||
\usepackage{bm}
|
\usepackage{bm}
|
||||||
\usepackage{enumitem}
|
\usepackage{enumitem}
|
||||||
\usepackage{graphicx}
|
\usepackage{graphicx}
|
||||||
\usepackage{subcaption}
|
\usepackage{subcaption}
|
||||||
\usepackage{booktabs} % toprule
|
\usepackage{booktabs} % toprule
|
||||||
\usepackage[mathcal]{eucal}
|
\usepackage[mathcal]{eucal}
|
||||||
\usepackage[thehwcnt = 1]{iidef}
|
\usepackage[thehwcnt = 1]{iidef}
|
||||||
\usepackage{listings}
|
\usepackage{listings}
|
||||||
\usepackage[x11names]{xcolor}
|
\usepackage[x11names]{xcolor}
|
||||||
\usepackage{float}
|
\usepackage{float}
|
||||||
\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref}
|
\usepackage[colorlinks, linkcolor=black, anchorcolor=green, citecolor=blue]{hyperref}
|
||||||
|
|
||||||
\DeclareMathOperator{\arctanh}{arctanh}
|
\DeclareMathOperator{\arctanh}{arctanh}
|
||||||
% \DeclareMathOperator{\diag}{diag}
|
% \DeclareMathOperator{\diag}{diag}
|
||||||
|
|
||||||
\setenumerate[1]{label=(\arabic{*})}
|
\setenumerate[1]{label=(\arabic{*})}
|
||||||
\setenumerate[2]{label=\arabic{*})}
|
\setenumerate[2]{label=\arabic{*})}
|
||||||
|
|
||||||
\definecolor{codekeyword}{RGB}{171, 0, 216}
|
\definecolor{codekeyword}{RGB}{171, 0, 216}
|
||||||
\definecolor{codetypename}{RGB}{29, 37, 251}
|
\definecolor{codetypename}{RGB}{29, 37, 251}
|
||||||
\definecolor{codevariable}{RGB}{10, 23, 126}
|
\definecolor{codevariable}{RGB}{10, 23, 126}
|
||||||
\definecolor{codestring}{RGB}{157, 0, 25}
|
\definecolor{codestring}{RGB}{157, 0, 25}
|
||||||
\definecolor{codecomment}{RGB}{31, 129, 19}
|
\definecolor{codecomment}{RGB}{31, 129, 19}
|
||||||
|
|
||||||
\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code}
|
\newfontfamily\cascadia[Ligatures=ResetAll]{Cascadia Code}
|
||||||
% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code}
|
% \newfontfamily\codefont[Ligatures=ResetAll]{Cascadia Code}
|
||||||
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
|
\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}]
|
||||||
% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings.
|
% To enable ligature in listing, go check lstfiracode's github page and copy firacodestyle's settings.
|
||||||
|
|
||||||
\lstset{
|
\lstset{
|
||||||
basicstyle = \small\codefont,
|
basicstyle = \small\codefont,
|
||||||
% ---
|
% ---
|
||||||
tabsize = 4,
|
tabsize = 4,
|
||||||
showstringspaces = false,
|
showstringspaces = false,
|
||||||
numbers = left,
|
numbers = left,
|
||||||
numberstyle = \cascadia,
|
numberstyle = \cascadia,
|
||||||
% ---
|
% ---
|
||||||
breaklines = true,
|
breaklines = true,
|
||||||
captionpos = t,
|
captionpos = t,
|
||||||
% ---
|
% ---
|
||||||
frame = l,
|
frame = l,
|
||||||
flexiblecolumns,
|
flexiblecolumns,
|
||||||
columns = fixed,
|
columns = fixed,
|
||||||
}
|
}
|
||||||
|
|
||||||
\thecourseinstitute{清华大学电子工程系}
|
\thecourseinstitute{清华大学电子工程系}
|
||||||
\thecoursename{\textbf{媒体与认知} \space 课堂2}
|
\thecoursename{\textbf{媒体与认知} \space 课堂2}
|
||||||
\theterm{2023-2024学年春季学期}
|
\theterm{2023-2024学年春季学期}
|
||||||
\hwname{作业}
|
\hwname{作业}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\courseheader
|
\courseheader
|
||||||
% 请在YOUR NAME处填写自己的姓名
|
% 请在YOUR NAME处填写自己的姓名
|
||||||
\name{高艺轩}
|
\name{高艺轩}
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
\centerline{\textbf{\Large{理论部分}}}
|
\centerline{\textbf{\Large{理论部分}}}
|
||||||
|
|
||||||
\section{单选题(15分)}
|
\section{单选题(15分)}
|
||||||
% 请在?处填写答案
|
% 请在?处填写答案
|
||||||
\subsection{\underline{B}}
|
\subsection{\underline{B}}
|
||||||
|
|
||||||
\subsection{\underline{A}}
|
\subsection{\underline{A}}
|
||||||
|
|
||||||
\subsection{\underline{B}}
|
\subsection{\underline{B}}
|
||||||
|
|
||||||
\subsection{\underline{A}}
|
\subsection{\underline{A}}
|
||||||
|
|
||||||
\subsection{\underline{B}}
|
\subsection{\underline{B}}
|
||||||
|
|
||||||
\section{计算题(15 分)}
|
\section{计算题(15 分)}
|
||||||
\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$,$\mathbf{z}\in R^{(n\times 1)}$,$\mathbf{W}\in R^{(m\times n)}$,$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下:
|
\subsection{设隐含层为$\mathbf{z}=\mathbf{W}^T\mathbf{x}+\mathbf{b}$,其中$\mathbf{x}\in R^{(m \times 1)}$,$\mathbf{z}\in R^{(n\times 1)}$,$\mathbf{W}\in R^{(m\times n)}$,$\mathbf{b} \in R^{(n\times 1)}$均为已知,其激活函数如下:
|
||||||
$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$
|
$$\mathbf{y}=\delta(\mathbf{z})=tanh(\mathbf{z})$$
|
||||||
tanh表示双曲正切函数。若训练过程中的目标函数为L,且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$和$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。
|
tanh表示双曲正切函数。若训练过程中的目标函数为L,且已知L对$\mathbf{y}$的导数 $\frac{\partial L}{\partial \mathbf{y}}=[\frac{\partial L}{\partial y_1},\frac{\partial L}{\partial y_2},...,\frac{\partial L}{\partial y_n}]^T$和$\mathbf{y}=[y_1,y_2,...,y_n]^T$的值。
|
||||||
}
|
}
|
||||||
\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。
|
\subsubsection{请使用$\mathbf{y}$表示出$\frac{\partial \mathbf{y}^T}{\partial \mathbf{z}}$, 这里的$\mathbf{y}^T$ 为行向量。
|
||||||
}
|
}
|
||||||
|
|
||||||
\begin{proof}[解]
|
\begin{proof}[解]
|
||||||
首先,对$i \neq j$,$\dfrac{\partial y_i}{\partial z_j} = 0$。
|
首先,对$i \neq j$,$\dfrac{\partial y_i}{\partial z_j} = 0$。
|
||||||
|
|
||||||
同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此
|
同时$y_i = \tanh(z_i) = \tanh(\arctanh(y_i))$,因此
|
||||||
\[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\]
|
\[\frac{\partial y_i}{\partial z_i} = 1 - \tanh^2(z_i) = 1 - y_i^2\]
|
||||||
因此
|
因此
|
||||||
\[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\]
|
\[\dfrac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \qedhere\]
|
||||||
\end{proof}
|
\end{proof}
|
||||||
|
|
||||||
\subsubsection{请使用$\mathbf{y}$和$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$。
|
\subsubsection{请使用$\mathbf{y}$和$\frac{\partial L}{\partial \mathbf{y}}$表示$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$。
|
||||||
}
|
}
|
||||||
提示:$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。
|
提示:$\frac{\partial L}{\partial \mathbf{x}}$,$\frac{\partial L}{\partial \mathbf{W}}$,$\frac{\partial L}{\partial \mathbf{b}}$与x,W,b具有相同维度。
|
||||||
|
|
||||||
\begin{proof}[解]
|
\begin{proof}[解]
|
||||||
由链式法则
|
由链式法则
|
||||||
\[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\]
|
\[\frac{\partial L}{\partial \boldsymbol{x}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{x}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = W \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}\]
|
||||||
|
|
||||||
对于$\dfrac{\partial L}{\partial W}$,
|
对于$\dfrac{\partial L}{\partial W}$,
|
||||||
\[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix}
|
\[\frac{\partial \boldsymbol{z}^T}{\partial W} = \begin{bmatrix}
|
||||||
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
||||||
\end{bmatrix}_{m \times n}\]
|
\end{bmatrix}_{m \times n}\]
|
||||||
|
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\
|
\frac{\partial L}{\partial W} & = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial W} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}}\\
|
||||||
& = \begin{bmatrix}
|
& = \begin{bmatrix}
|
||||||
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
\boldsymbol{x} & \boldsymbol{x} & \cdots & \boldsymbol{x}
|
||||||
\end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}
|
\end{bmatrix}_{m \times n} \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}}
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则
|
对于$\dfrac{\partial L}{\partial \boldsymbol{b}}$,由链式法则
|
||||||
\[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\]
|
\[\frac{\partial L}{\partial \boldsymbol{b}} = \frac{\partial \boldsymbol{z}^\mathrm{T}}{\partial \boldsymbol{b}} \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = I_n \frac{\partial \boldsymbol{y}^\mathrm{T}}{\partial \boldsymbol{z}} \frac{\partial L}{\partial \boldsymbol{y}} = \diag\{1 - y_1^2, 1 - y_2^2, \dots, 1 - y_n^2\} \frac{\partial L}{\partial \boldsymbol{y}} \qedhere\]
|
||||||
\end{proof}
|
\end{proof}
|
||||||
\vspace{6mm}
|
\vspace{6mm}
|
||||||
\centerline{\textbf{\Large{编程部分}}}
|
\centerline{\textbf{\Large{编程部分}}}
|
||||||
|
|
||||||
|
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
|
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
|
||||||
\section{编程作业报告}
|
\section{编程作业报告}
|
||||||
% 请在此处完成编程作业报告
|
% 请在此处完成编程作业报告
|
||||||
完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。
|
完成后的代码也可以在 \href{https://git.unlockableworld.com/unlockable/MediaNCognition}{\url{https://git.unlockableworld.com/unlockable/MediaNCognition}}中找到。
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item 使用默认配置进行训练和测试。
|
\item 使用默认配置进行训练和测试。
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item 训练模型。
|
\item 训练模型。
|
||||||
|
|
||||||
输入:
|
输入:
|
||||||
\lstinputlisting{codes/1.1.in.txt}
|
\lstinputlisting{codes/1.1.in.txt}
|
||||||
|
|
||||||
输出:
|
输出:
|
||||||
\lstinputlisting{codes/1.1.out.txt}
|
\lstinputlisting{codes/1.1.out.txt}
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=0.9\linewidth]{img/1default_train.png}
|
\includegraphics[width=0.9\linewidth]{img/1default_train.png}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\item 测试模型。
|
\item 测试模型。
|
||||||
|
|
||||||
输入:
|
输入:
|
||||||
\lstinputlisting{codes/1.2.in.txt}
|
\lstinputlisting{codes/1.2.in.txt}
|
||||||
|
|
||||||
输出:
|
输出:
|
||||||
\lstinputlisting{codes/1.2.out.txt}
|
\lstinputlisting{codes/1.2.out.txt}
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\item 调整参数、使用Adam优化器训练并测试。
|
\item 调整参数、使用Adam优化器训练并测试。
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item 训练模型。
|
\item 训练模型。
|
||||||
|
|
||||||
输入:
|
输入:
|
||||||
\lstinputlisting{codes/2.1.in.txt}
|
\lstinputlisting{codes/2.1.in.txt}
|
||||||
|
|
||||||
输出:
|
输出:
|
||||||
\lstinputlisting{codes/2.1.out.txt}
|
\lstinputlisting{codes/2.1.out.txt}
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=0.9\linewidth]{img/2adam_optim.png}
|
\includegraphics[width=0.9\linewidth]{img/2adam_optim.png}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
\item 测试性能。
|
\item 测试性能。
|
||||||
|
|
||||||
输入:
|
输入:
|
||||||
\lstinputlisting{codes/2.2.in.txt}
|
\lstinputlisting{codes/2.2.in.txt}
|
||||||
|
|
||||||
输出:
|
输出:
|
||||||
\lstinputlisting{codes/2.2.out.txt}
|
\lstinputlisting{codes/2.2.out.txt}
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
\item 使用效果最佳的模型测试。
|
\item 使用效果最佳的模型测试。
|
||||||
经过简单的尝试,发现使用
|
经过简单的尝试,发现使用
|
||||||
\lstinputlisting{codes/self_train.in.txt}
|
\lstinputlisting{codes/self_train.in.txt}
|
||||||
可以使测试集准确率达到88.8\%,有略微的提升。训练的loss曲线:
|
可以使测试集准确率达到88.8\%,有略微的提升。训练的loss曲线:
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.9\linewidth]{img/3found_best.png}
|
\includegraphics[width=.9\linewidth]{img/3found_best.png}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
使用它进行预测:
|
使用它进行预测:
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict01.png}
|
\includegraphics[width=\linewidth]{img/predict/predict01.png}
|
||||||
\subcaption{预测:A}
|
\subcaption{预测:A}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict02.png}
|
\includegraphics[width=\linewidth]{img/predict/predict02.png}
|
||||||
\subcaption{预测:B}
|
\subcaption{预测:B}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict03.png}
|
\includegraphics[width=\linewidth]{img/predict/predict03.png}
|
||||||
\subcaption{预测:M}
|
\subcaption{预测:M}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict04.png}
|
\includegraphics[width=\linewidth]{img/predict/predict04.png}
|
||||||
\subcaption{预测:R}
|
\subcaption{预测:R}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict05.png}
|
\includegraphics[width=\linewidth]{img/predict/predict05.png}
|
||||||
\subcaption{预测:M}
|
\subcaption{预测:M}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict06.png}
|
\includegraphics[width=\linewidth]{img/predict/predict06.png}
|
||||||
\subcaption{预测:O}
|
\subcaption{预测:O}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict07.png}
|
\includegraphics[width=\linewidth]{img/predict/predict07.png}
|
||||||
\subcaption{预测:B}
|
\subcaption{预测:B}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\begin{subfigure}[b]{.3\linewidth}
|
\begin{subfigure}[b]{.3\linewidth}
|
||||||
\includegraphics[width=\linewidth]{img/predict/predict08.png}
|
\includegraphics[width=\linewidth]{img/predict/predict08.png}
|
||||||
\subcaption{预测:W}
|
\subcaption{预测:W}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
\hfill
|
||||||
\end{figure}
|
\end{figure}
|
||||||
\item 遇到的问题及解决方法
|
\item 遇到的问题及解决方法
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}的\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。
|
\item 代码中对灰度图像的矩阵进行标准化时,\lstinline{numpy}显示不能对\lstinline{NumpyGenericArray}进行对\lstinline{float}的\lstinline{/}操作。改用\lstinline{np.div()}解决了这个问题。
|
||||||
\item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。
|
\item 在利用训练好的模型进行预测时,发现自己找到的大部分模型都预测错误;最后与训练集的图片进行了对比,发现主要问题是裁切字母时留下了过大的边距,导致模型不能正确理解输入。重新裁剪边框后,得到正确的结果。
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\item 建议:希望下次发布作业代码可以利用清华的git。
|
\item 建议:希望下次发布作业代码可以利用清华的git。
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
% \section{自选课题开题报告}
|
% \section{自选课题开题报告}
|
||||||
% 请在此处介绍自选课题
|
% 请在此处介绍自选课题
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: late\rvx
|
%%% mode: late\rvx
|
||||||
%%% TeX-master: t
|
%%% TeX-master: t
|
||||||
%%% End:
|
%%% End:
|
||||||
|
|||||||
@@ -1,164 +1,164 @@
|
|||||||
#========================================================
|
#========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 1 Neural network basics
|
# Homework 1 Neural network basics
|
||||||
# activations.py - activation functions
|
# activations.py - activation functions
|
||||||
# Student ID: 2022010639
|
# Student ID: 2022010639
|
||||||
# Name: Gao Yixuan
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
#========================================================
|
#========================================================
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
|
||||||
'''
|
'''
|
||||||
In this script we will implement three activation functions, including both forward and backward processes.
|
In this script we will implement three activation functions, including both forward and backward processes.
|
||||||
More details about customizing a backward process in PyTorch can be found in:
|
More details about customizing a backward process in PyTorch can be found in:
|
||||||
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
||||||
'''
|
'''
|
||||||
|
|
||||||
## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later.
|
## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later.
|
||||||
class Tanh(torch.autograd.Function):
|
class Tanh(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
Tanh activation function
|
Tanh activation function
|
||||||
y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
||||||
'''
|
'''
|
||||||
# static method of a python class means that we can call the function without initializing an instance of the class
|
# static method of a python class means that we can call the function without initializing an instance of the class
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x):
|
def forward(ctx, x):
|
||||||
'''
|
'''
|
||||||
In the forward pass we receive a Tensor containing the input x and return
|
In the forward pass we receive a Tensor containing the input x and return
|
||||||
a Tensor containing the output.
|
a Tensor containing the output.
|
||||||
|
|
||||||
ctx: it is a context object that can be used to save information for backward computation. You can save
|
ctx: it is a context object that can be used to save information for backward computation. You can save
|
||||||
objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors
|
objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors
|
||||||
|
|
||||||
x: input with arbitrary shape
|
x: input with arbitrary shape
|
||||||
'''
|
'''
|
||||||
# Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value
|
# Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value
|
||||||
# y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
|
# y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
|
||||||
|
|
||||||
# here we directly use torch.tanh(x) to avoid the problem above
|
# here we directly use torch.tanh(x) to avoid the problem above
|
||||||
y = torch.tanh(x)
|
y = torch.tanh(x)
|
||||||
|
|
||||||
# save an variable in ctx
|
# save an variable in ctx
|
||||||
ctx.save_for_backward(y)
|
ctx.save_for_backward(y)
|
||||||
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
"""
|
"""
|
||||||
In the backward pass we receive a Tensor containing the gradient of the loss
|
In the backward pass we receive a Tensor containing the gradient of the loss
|
||||||
with respect to the output, and we need to compute the gradient of the loss
|
with respect to the output, and we need to compute the gradient of the loss
|
||||||
with respect to the input.
|
with respect to the input.
|
||||||
|
|
||||||
grad_output: dL/dy
|
grad_output: dL/dy
|
||||||
grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x)
|
grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x)
|
||||||
"""
|
"""
|
||||||
# get an variable from ctx
|
# get an variable from ctx
|
||||||
y, = ctx.saved_tensors
|
y, = ctx.saved_tensors
|
||||||
|
|
||||||
# chain rule: dL/dx = dL/dy * dy/dx
|
# chain rule: dL/dx = dL/dy * dy/dx
|
||||||
# where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)!
|
# where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)!
|
||||||
grad_input = grad_output * (1 - y ** 2)
|
grad_input = grad_output * (1 - y ** 2)
|
||||||
|
|
||||||
return grad_input
|
return grad_input
|
||||||
|
|
||||||
#TODO 1: complete the forward and backward functions of the Sigmoid activation function.
|
#TODO 1: complete the forward and backward functions of the Sigmoid activation function.
|
||||||
#Note: You can refer to the activation function Tanh
|
#Note: You can refer to the activation function Tanh
|
||||||
class Sigmoid(torch.autograd.Function):
|
class Sigmoid(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
Sigmoid activation function
|
Sigmoid activation function
|
||||||
y = 1 / (1 + exp(-x))
|
y = 1 / (1 + exp(-x))
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x):
|
def forward(ctx, x):
|
||||||
|
|
||||||
# hint: you can use torch.exp(x) to calculate exp(x)
|
# hint: you can use torch.exp(x) to calculate exp(x)
|
||||||
y = 1 - (1 + torch.exp(-x))
|
y = 1 - (1 + torch.exp(-x))
|
||||||
|
|
||||||
# here we save y in ctx, in this way we can use y to calculate gradients in backward process
|
# here we save y in ctx, in this way we can use y to calculate gradients in backward process
|
||||||
ctx.save_for_backward(y)
|
ctx.save_for_backward(y)
|
||||||
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
|
|
||||||
# get y from ctx
|
# get y from ctx
|
||||||
y, = ctx.saved_tensors
|
y, = ctx.saved_tensors
|
||||||
|
|
||||||
# implement gradient of x (grad_input), grad_input refers to dL/dx
|
# implement gradient of x (grad_input), grad_input refers to dL/dx
|
||||||
# chain rule: dL/dx = dL/dy * dy/dx
|
# chain rule: dL/dx = dL/dy * dy/dx
|
||||||
# where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y)
|
# where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y)
|
||||||
grad_input = grad_output * y * (1 - y)
|
grad_input = grad_output * y * (1 - y)
|
||||||
|
|
||||||
return grad_input
|
return grad_input
|
||||||
|
|
||||||
#TODO 2: complete the forward and backward functions of the ReLU activation function.
|
#TODO 2: complete the forward and backward functions of the ReLU activation function.
|
||||||
#Note: You can refer to the activation function Tanh
|
#Note: You can refer to the activation function Tanh
|
||||||
class ReLU(torch.autograd.Function):
|
class ReLU(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
ReLU activation function
|
ReLU activation function
|
||||||
y = max{x, 0}
|
y = max{x, 0}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x):
|
def forward(ctx, x):
|
||||||
|
|
||||||
# set elements less than 0 in x to 0
|
# set elements less than 0 in x to 0
|
||||||
# this operation is inplace
|
# this operation is inplace
|
||||||
x = torch.max(x, torch.tensor([0.]).to(x.device))
|
x = torch.max(x, torch.tensor([0.]).to(x.device))
|
||||||
|
|
||||||
# save x in ctx, in this way we can use x to calculate gradients in backward process
|
# save x in ctx, in this way we can use x to calculate gradients in backward process
|
||||||
ctx.save_for_backward(x)
|
ctx.save_for_backward(x)
|
||||||
|
|
||||||
# return the output
|
# return the output
|
||||||
return x
|
return x
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
"""
|
"""
|
||||||
In the backward pass we receive a Tensor containing the gradient of the loss
|
In the backward pass we receive a Tensor containing the gradient of the loss
|
||||||
with respect to the output, and we need to compute the gradient of the loss
|
with respect to the output, and we need to compute the gradient of the loss
|
||||||
with respect to the input.
|
with respect to the input.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# get x from ctx
|
# get x from ctx
|
||||||
x, = ctx.saved_tensors
|
x, = ctx.saved_tensors
|
||||||
# print("Before heaviside")
|
# print("Before heaviside")
|
||||||
# print(x, x.size())
|
# print(x, x.size())
|
||||||
x = torch.heaviside(x, torch.tensor([0.]).to(x.device))
|
x = torch.heaviside(x, torch.tensor([0.]).to(x.device))
|
||||||
# print("After heaviside")
|
# print("After heaviside")
|
||||||
# print(x, x.size())
|
# print(x, x.size())
|
||||||
# print(grad_output, grad_output.size())
|
# print(grad_output, grad_output.size())
|
||||||
# print(grad_output * x)
|
# print(grad_output * x)
|
||||||
|
|
||||||
# chain rule: dL/dx = dL/dy * dy/dx
|
# chain rule: dL/dx = dL/dy * dy/dx
|
||||||
# where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0
|
# where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0
|
||||||
grad_input = grad_output * x
|
grad_input = grad_output * x
|
||||||
|
|
||||||
return grad_input
|
return grad_input
|
||||||
|
|
||||||
|
|
||||||
# activate function class according to the type
|
# activate function class according to the type
|
||||||
class Activation(nn.Module):
|
class Activation(nn.Module):
|
||||||
def __init__(self, type):
|
def __init__(self, type):
|
||||||
'''
|
'''
|
||||||
:param type: 'sigmoid', 'tanh', or 'relu'
|
:param type: 'sigmoid', 'tanh', or 'relu'
|
||||||
'''
|
'''
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
if type == 'sigmoid':
|
if type == 'sigmoid':
|
||||||
self.act = Sigmoid.apply
|
self.act = Sigmoid.apply
|
||||||
elif type == 'tanh':
|
elif type == 'tanh':
|
||||||
self.act = Tanh.apply
|
self.act = Tanh.apply
|
||||||
elif type == 'relu':
|
elif type == 'relu':
|
||||||
self.act = ReLU.apply
|
self.act = ReLU.apply
|
||||||
else:
|
else:
|
||||||
print('activation type should be one of [sigmoid, tanh, relu]')
|
print('activation type should be one of [sigmoid, tanh, relu]')
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return self.act(x)
|
return self.act(x)
|
||||||
|
|||||||
@@ -1,118 +1,118 @@
|
|||||||
#========================================================
|
#========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 1 Neural network basics
|
# Homework 1 Neural network basics
|
||||||
# losses.py - loss functions
|
# losses.py - loss functions
|
||||||
# Student ID: 2022010639
|
# Student ID: 2022010639
|
||||||
# Name: Gao Yixuan
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
#========================================================
|
#========================================================
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
'''
|
'''
|
||||||
In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes.
|
In this script we will implement our MSE and Cross Entropy loss functions, including both the forward and backward processes.
|
||||||
More details about customizing a backward process can be found in:
|
More details about customizing a backward process can be found in:
|
||||||
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# here is the sample code of MSELoss
|
# here is the sample code of MSELoss
|
||||||
# you can use this as reference to implement the CrossEntropyLoss
|
# you can use this as reference to implement the CrossEntropyLoss
|
||||||
class MSELoss(torch.autograd.Function):
|
class MSELoss(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
MSE loss function
|
MSE loss function
|
||||||
loss = (label - pred) ** 2
|
loss = (label - pred) ** 2
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, pred, label):
|
def forward(ctx, pred, label):
|
||||||
"""
|
"""
|
||||||
:param pred: prediction with shape [batch_size, *], where ∗ means additional dimensions
|
:param pred: prediction with shape [batch_size, *], where ∗ means additional dimensions
|
||||||
:param label: groundtruth, same shape as the predition
|
:param label: groundtruth, same shape as the predition
|
||||||
:return: MSE loss, averaged by batch_size
|
:return: MSE loss, averaged by batch_size
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# step 1: here we compute the summation of loss for each element and save both pred and label in ctx
|
# step 1: here we compute the summation of loss for each element and save both pred and label in ctx
|
||||||
loss = torch.sum((pred - label) ** 2)
|
loss = torch.sum((pred - label) ** 2)
|
||||||
ctx.save_for_backward(pred, label)
|
ctx.save_for_backward(pred, label)
|
||||||
|
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
"""
|
"""
|
||||||
:param grad_output: for loss function, grad_output will be 1
|
:param grad_output: for loss function, grad_output will be 1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred)
|
# step 2: get pred and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dpred)
|
||||||
pred, label = ctx.saved_tensors
|
pred, label = ctx.saved_tensors
|
||||||
grad_input = grad_output * 2 * (pred - label)
|
grad_input = grad_output * 2 * (pred - label)
|
||||||
|
|
||||||
# return None for gradient of label since we do not need to compute dL/dlabel
|
# return None for gradient of label since we do not need to compute dL/dlabel
|
||||||
return grad_input, None
|
return grad_input, None
|
||||||
|
|
||||||
#TODO 1: Complete the CrossEntropyLoss loss function
|
#TODO 1: Complete the CrossEntropyLoss loss function
|
||||||
class CrossEntropyLoss(torch.autograd.Function):
|
class CrossEntropyLoss(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
Cross entropy loss function:
|
Cross entropy loss function:
|
||||||
loss = - log q_i
|
loss = - log q_i
|
||||||
where
|
where
|
||||||
q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...)
|
q_i = softmax(z_i) = exp(z_i) / (exp(z_0) + exp(z_1) + ...)
|
||||||
|
|
||||||
However, when z_i has a lager value, exp(z_i) might become infinity.
|
However, when z_i has a lager value, exp(z_i) might become infinity.
|
||||||
So we use stable softmax:
|
So we use stable softmax:
|
||||||
softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...)
|
softmax(z_i) = A exp(z_i) / A (exp(z_0) + exp(z_1) + ...)
|
||||||
where
|
where
|
||||||
A = exp(-z_max) = exp(-max{z_0, z_1, ...})
|
A = exp(-z_max) = exp(-max{z_0, z_1, ...})
|
||||||
therefore we have
|
therefore we have
|
||||||
softmax(z_i) = softmax(z_i - z_max)
|
softmax(z_i) = softmax(z_i - z_max)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, logits, label):
|
def forward(ctx, logits, label):
|
||||||
"""
|
"""
|
||||||
:param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula
|
:param logits: logits with shape [batch_size, n_classes], denoted by "z" in the above formula
|
||||||
:param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1
|
:param label: groundtruth with shape [batch_size], where 0 <= label[i] < n_classes - 1
|
||||||
:return: cross entropy loss, averaged by batch_size
|
:return: cross entropy loss, averaged by batch_size
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# step 1: calculate softmax(z) using stable softmax method
|
# step 1: calculate softmax(z) using stable softmax method
|
||||||
# hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version
|
# hint: you can use torch.exp(x) to calculate exp(x), and remember to convert label into one-hot version
|
||||||
#e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
|
#e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
|
||||||
|
|
||||||
# calculate z_max
|
# calculate z_max
|
||||||
z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size]
|
z_max = torch.max(logits, 1, keepdim=True).values # of size [batch_size]
|
||||||
|
|
||||||
# calculate exps = exp(z - z_max)
|
# calculate exps = exp(z - z_max)
|
||||||
exps = torch.exp(logits - z_max) # of size [batch_size, n_classes]
|
exps = torch.exp(logits - z_max) # of size [batch_size, n_classes]
|
||||||
|
|
||||||
# calculate q = softmax(y - y_max)
|
# calculate q = softmax(y - y_max)
|
||||||
sums = torch.sum(exps, 1) # of size [batch_size]
|
sums = torch.sum(exps, 1) # of size [batch_size]
|
||||||
# print(exps.size(), sums.size())
|
# print(exps.size(), sums.size())
|
||||||
# print(sums.reshape(-1, 1))
|
# print(sums.reshape(-1, 1))
|
||||||
q = exps / sums.reshape(-1, 1)
|
q = exps / sums.reshape(-1, 1)
|
||||||
|
|
||||||
# step 2: convert label into one-hot version
|
# step 2: convert label into one-hot version
|
||||||
# e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
|
# e.g., if label = [0, 2] and n_classes=4, then the one-hot version is [[1,0,0,0], [0,0,1,0]]
|
||||||
# the converted label has shape [batch_size, n_classes]
|
# the converted label has shape [batch_size, n_classes]
|
||||||
# tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes
|
# tips: you can use torch.nn.functional.one_hot() to convert label into one-hot vector with dimension n_classes
|
||||||
one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1])
|
one_hot_label = torch.nn.functional.one_hot(label, logits.size()[1])
|
||||||
|
|
||||||
# step 3: calculate cross entropy loss = - log q_i, and averaged by batch
|
# step 3: calculate cross entropy loss = - log q_i, and averaged by batch
|
||||||
# save result of softmax and one-hot label in ctx for gradient computation
|
# save result of softmax and one-hot label in ctx for gradient computation
|
||||||
cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0]
|
cross_entropy = -torch.sum(torch.log(torch.sum(q * one_hot_label, 1))) / label.size()[0]
|
||||||
|
|
||||||
ctx.save_for_backward(q, one_hot_label)
|
ctx.save_for_backward(q, one_hot_label)
|
||||||
|
|
||||||
return cross_entropy
|
return cross_entropy
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
|
|
||||||
# step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz)
|
# step 4: get q and label from ctx and calculate the derivative of loss w.r.t. pred (dL/dz)
|
||||||
q, label = ctx.saved_tensors
|
q, label = ctx.saved_tensors
|
||||||
grad_input = grad_output * (q - label)
|
grad_input = grad_output * (q - label)
|
||||||
|
|
||||||
# return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel
|
# return the pred (dL/dz) and None for dL/dlabel since we do not need to compute dL/dlabel
|
||||||
return grad_input, None
|
return grad_input, None
|
||||||
@@ -1,156 +1,156 @@
|
|||||||
#========================================================
|
#========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 1 Neural network basics
|
# Homework 1 Neural network basics
|
||||||
# network.py - linear layer and MLP network
|
# network.py - linear layer and MLP network
|
||||||
# Student ID: 2022010639
|
# Student ID: 2022010639
|
||||||
# Name: Gao Yixuan
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
#========================================================
|
#========================================================
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from activations import Activation
|
from activations import Activation
|
||||||
|
|
||||||
'''
|
'''
|
||||||
In this script we will implement our Linear layer and MLP network.
|
In this script we will implement our Linear layer and MLP network.
|
||||||
For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own.
|
For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own.
|
||||||
More details about customizing a backward process can be found in:
|
More details about customizing a backward process can be found in:
|
||||||
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
||||||
For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function.
|
For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
class LinearFunction(torch.autograd.Function):
|
class LinearFunction(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
we will implement the linear function:
|
we will implement the linear function:
|
||||||
y = xW^T + b
|
y = xW^T + b
|
||||||
as well as its gradient computation process
|
as well as its gradient computation process
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x, W, b):
|
def forward(ctx, x, W, b):
|
||||||
'''
|
'''
|
||||||
Input:
|
Input:
|
||||||
:param ctx: a context object that can be used to stash information for backward computation
|
:param ctx: a context object that can be used to stash information for backward computation
|
||||||
:param x: input features with size [batch_size, input_size]
|
:param x: input features with size [batch_size, input_size]
|
||||||
:param W: weight matrix with size [output_size, input_size]
|
:param W: weight matrix with size [output_size, input_size]
|
||||||
:param b: bias with size [output_size]
|
:param b: bias with size [output_size]
|
||||||
Return:
|
Return:
|
||||||
y :output features with size [batch_size, output_size]
|
y :output features with size [batch_size, output_size]
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# print(x, x.size(), x.dtype)
|
# print(x, x.size(), x.dtype)
|
||||||
# print(W.T, W.T.size(), W.T.dtype)
|
# print(W.T, W.T.size(), W.T.dtype)
|
||||||
# print(x.device, W.T.device)
|
# print(x.device, W.T.device)
|
||||||
y = torch.matmul(x, W.T) + b
|
y = torch.matmul(x, W.T) + b
|
||||||
ctx.save_for_backward(x, W)
|
ctx.save_for_backward(x, W)
|
||||||
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
'''
|
'''
|
||||||
Input:
|
Input:
|
||||||
:param ctx: a context object with saved variables
|
:param ctx: a context object with saved variables
|
||||||
:param grad_output: dL/dy, with size [batch_size, output_size]
|
:param grad_output: dL/dy, with size [batch_size, output_size]
|
||||||
Return:
|
Return:
|
||||||
grad_input: dL/dx, with size [batch_size, input_size]
|
grad_input: dL/dx, with size [batch_size, input_size]
|
||||||
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
||||||
grad_b: dL/db, with size [output_size], summed for data in the batch
|
grad_b: dL/db, with size [output_size], summed for data in the batch
|
||||||
'''
|
'''
|
||||||
|
|
||||||
x, W = ctx.saved_variables
|
x, W = ctx.saved_variables
|
||||||
|
|
||||||
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
||||||
# calculate dL/dW by using dL/dy (grad_output) and x
|
# calculate dL/dW by using dL/dy (grad_output) and x
|
||||||
# calculate dL/db using dL/dy (grad_output)
|
# calculate dL/db using dL/dy (grad_output)
|
||||||
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
||||||
|
|
||||||
grad_input = torch.matmul(grad_output, W)
|
grad_input = torch.matmul(grad_output, W)
|
||||||
grad_W = torch.matmul(grad_output.T, x)
|
grad_W = torch.matmul(grad_output.T, x)
|
||||||
grad_b = grad_output.sum(0)
|
grad_b = grad_output.sum(0)
|
||||||
|
|
||||||
return grad_input, grad_W, grad_b
|
return grad_input, grad_W, grad_b
|
||||||
|
|
||||||
|
|
||||||
class Linear(nn.Module):
|
class Linear(nn.Module):
|
||||||
def __init__(self, input_size, output_size):
|
def __init__(self, input_size, output_size):
|
||||||
'''
|
'''
|
||||||
A linear layer which uses our own LinearFunction implemented above.
|
A linear layer which uses our own LinearFunction implemented above.
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
:param input_size: dimension of input features
|
:param input_size: dimension of input features
|
||||||
:param output_size: dimension of output features
|
:param output_size: dimension of output features
|
||||||
'''
|
'''
|
||||||
super(Linear, self).__init__()
|
super(Linear, self).__init__()
|
||||||
|
|
||||||
|
|
||||||
W = torch.randn(output_size, input_size).float()
|
W = torch.randn(output_size, input_size).float()
|
||||||
b = torch.zeros(output_size).float()
|
b = torch.zeros(output_size).float()
|
||||||
self.W = nn.Parameter(W, requires_grad=True)
|
self.W = nn.Parameter(W, requires_grad=True)
|
||||||
self.b = nn.Parameter(b, requires_grad=True)
|
self.b = nn.Parameter(b, requires_grad=True)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
# here we call the LinearFunction we implement above
|
# here we call the LinearFunction we implement above
|
||||||
return LinearFunction.apply(x, self.W, self.b)
|
return LinearFunction.apply(x, self.W, self.b)
|
||||||
|
|
||||||
class MLP(nn.Module):
|
class MLP(nn.Module):
|
||||||
def __init__(self, input_size, output_size, hidden_size, n_layers, act_type):
|
def __init__(self, input_size, output_size, hidden_size, n_layers, act_type):
|
||||||
'''
|
'''
|
||||||
Multilayer Perceptron
|
Multilayer Perceptron
|
||||||
----------------------
|
----------------------
|
||||||
:param input_size: dimension of input features
|
:param input_size: dimension of input features
|
||||||
:param output_size: dimension of output features
|
:param output_size: dimension of output features
|
||||||
:param hidden_size: a list containing hidden size for each hidden layer
|
:param hidden_size: a list containing hidden size for each hidden layer
|
||||||
:param n_layers: number of layers
|
:param n_layers: number of layers
|
||||||
:param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
|
:param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
|
||||||
'''
|
'''
|
||||||
# TODO 1: initialize the parent class nn.Module
|
# TODO 1: initialize the parent class nn.Module
|
||||||
super(MLP, self).__init__()
|
super(MLP, self).__init__()
|
||||||
|
|
||||||
# total layer number should be hidden layer number + 1 (output layer)
|
# total layer number should be hidden layer number + 1 (output layer)
|
||||||
# print(hidden_size, n_layers)
|
# print(hidden_size, n_layers)
|
||||||
assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1'
|
assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1'
|
||||||
|
|
||||||
# TODO 2;complete the network structures
|
# TODO 2;complete the network structures
|
||||||
# instantiate the activation function by using the defined classes in activations.py
|
# instantiate the activation function by using the defined classes in activations.py
|
||||||
self.act = Activation(act_type)
|
self.act = Activation(act_type)
|
||||||
|
|
||||||
# initialize a list to save layers
|
# initialize a list to save layers
|
||||||
layers = nn.ModuleList()
|
layers = nn.ModuleList()
|
||||||
|
|
||||||
if n_layers == 1:
|
if n_layers == 1:
|
||||||
# append a linear layer into the module list
|
# append a linear layer into the module list
|
||||||
# if n_layers == 1, MLP degenerates to a single linear layer
|
# if n_layers == 1, MLP degenerates to a single linear layer
|
||||||
layers.append(Linear(input_size, output_size))
|
layers.append(Linear(input_size, output_size))
|
||||||
|
|
||||||
# MLP with at least 2 layers
|
# MLP with at least 2 layers
|
||||||
else:
|
else:
|
||||||
# construct the hidden layers and add them to the module list
|
# construct the hidden layers and add them to the module list
|
||||||
# a hidden layer of MLP consists of a linear layer and an activation function
|
# a hidden layer of MLP consists of a linear layer and an activation function
|
||||||
in_size = input_size
|
in_size = input_size
|
||||||
for i in range(n_layers - 1):
|
for i in range(n_layers - 1):
|
||||||
layer = Linear(in_size, hidden_size[i])
|
layer = Linear(in_size, hidden_size[i])
|
||||||
layers.append(layer) # append the linear layer into the module list
|
layers.append(layer) # append the linear layer into the module list
|
||||||
layers.append(self.act)
|
layers.append(self.act)
|
||||||
in_size = hidden_size[i] # update in_size for the next layer
|
in_size = hidden_size[i] # update in_size for the next layer
|
||||||
|
|
||||||
# initialize the output layer and append the layer into the module list
|
# initialize the output layer and append the layer into the module list
|
||||||
# hint: what is the output size of the output layer?
|
# hint: what is the output size of the output layer?
|
||||||
layers.append(Linear(hidden_size[-1], output_size))
|
layers.append(Linear(hidden_size[-1], output_size))
|
||||||
|
|
||||||
# Use nn.Sequential to get the neural network
|
# Use nn.Sequential to get the neural network
|
||||||
self.network = torch.nn.Sequential()
|
self.network = torch.nn.Sequential()
|
||||||
for layer in layers:
|
for layer in layers:
|
||||||
self.network.append(layer)
|
self.network.append(layer)
|
||||||
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
'''
|
'''
|
||||||
Define the forward function
|
Define the forward function
|
||||||
:param x: input features with size [batch_size, input_size]
|
:param x: input features with size [batch_size, input_size]
|
||||||
:return: output features with size [batch_size, output_size]
|
:return: output features with size [batch_size, output_size]
|
||||||
'''
|
'''
|
||||||
# TODO 3: implement the forward propagation of the MLP
|
# TODO 3: implement the forward propagation of the MLP
|
||||||
out = self.network(x)
|
out = self.network(x)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|||||||
@@ -1,397 +1,397 @@
|
|||||||
#========================================================
|
#========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 1 Neural network basics
|
# Homework 1 Neural network basics
|
||||||
# recognition.py - character classification
|
# recognition.py - character classification
|
||||||
# Student ID: 2022010639
|
# Student ID: 2022010639
|
||||||
# Name: Gao Yixuan
|
# Name: Gao Yixuan
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
#========================================================
|
#========================================================
|
||||||
|
|
||||||
# ==== Part 0: import libs
|
# ==== Part 0: import libs
|
||||||
import torch
|
import torch
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset, DataLoader
|
||||||
|
|
||||||
import json, cv2, os, string
|
import json, cv2, os, string
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# this time we implement our networks and loss functions in other python script, and import them here
|
# this time we implement our networks and loss functions in other python script, and import them here
|
||||||
from network import MLP
|
from network import MLP
|
||||||
from losses import CrossEntropyLoss
|
from losses import CrossEntropyLoss
|
||||||
|
|
||||||
# argparse is used to conveniently set our configurations
|
# argparse is used to conveniently set our configurations
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# ==== Part 1: data loader
|
# ==== Part 1: data loader
|
||||||
|
|
||||||
# construct a dataset and a data loader, more details can be found in
|
# construct a dataset and a data loader, more details can be found in
|
||||||
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
|
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
|
||||||
|
|
||||||
class ListDataset(Dataset):
|
class ListDataset(Dataset):
|
||||||
def __init__(self, im_dir, file_path, norm_size=(32, 32)):
|
def __init__(self, im_dir, file_path, norm_size=(32, 32)):
|
||||||
'''
|
'''
|
||||||
:param im_dir: path to directory with images
|
:param im_dir: path to directory with images
|
||||||
:param file_path: json file containing image names and labels
|
:param file_path: json file containing image names and labels
|
||||||
:param norm_size: image normalization size, (height, width)
|
:param norm_size: image normalization size, (height, width)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# this time we will try to recognize 26 English letters (case-insensitive)
|
# this time we will try to recognize 26 English letters (case-insensitive)
|
||||||
letters = string.ascii_letters[-26:] # ABCD...XYZ
|
letters = string.ascii_letters[-26:] # ABCD...XYZ
|
||||||
self.alphabet = {letters[i]:i for i in range(len(letters))}
|
self.alphabet = {letters[i]:i for i in range(len(letters))}
|
||||||
self.norm_size = norm_size
|
self.norm_size = norm_size
|
||||||
|
|
||||||
with open(file_path, 'r') as f:
|
with open(file_path, 'r') as f:
|
||||||
imgs = json.load(f)
|
imgs = json.load(f)
|
||||||
im_names = list(imgs.keys())
|
im_names = list(imgs.keys())
|
||||||
|
|
||||||
self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names]
|
self.im_paths = [os.path.join(im_dir, im_name) for im_name in im_names]
|
||||||
self.labels = list(imgs.values())
|
self.labels = list(imgs.values())
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
# the __len__() function should return the total number of samples in the dataset
|
# the __len__() function should return the total number of samples in the dataset
|
||||||
return len(self.im_paths)
|
return len(self.im_paths)
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
assert index <= len(self), 'index range error'
|
assert index <= len(self), 'index range error'
|
||||||
|
|
||||||
# read an image and convert it to grey scale
|
# read an image and convert it to grey scale
|
||||||
im_path = self.im_paths[index]
|
im_path = self.im_paths[index]
|
||||||
im = cv2.imread(im_path)
|
im = cv2.imread(im_path)
|
||||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
|
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
# image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1]
|
# image pre-processing, after pre-processing, the size of the image should be as norm_size and the values of image pixels should be within [-1,1]
|
||||||
im = cv2.resize(im, self.norm_size)
|
im = cv2.resize(im, self.norm_size)
|
||||||
# im = im / 255.
|
# im = im / 255.
|
||||||
""" The above command does not seems to be valid in my environment """
|
""" The above command does not seems to be valid in my environment """
|
||||||
im = np.divide(im, 255.)
|
im = np.divide(im, 255.)
|
||||||
im = (im - 0.5) * 2.0
|
im = (im - 0.5) * 2.0
|
||||||
|
|
||||||
# get the label of the current image
|
# get the label of the current image
|
||||||
# upper() is used to convert a letter into uppercase
|
# upper() is used to convert a letter into uppercase
|
||||||
label = self.labels[index].upper()
|
label = self.labels[index].upper()
|
||||||
|
|
||||||
# convert an English letter into a number index
|
# convert an English letter into a number index
|
||||||
label = self.alphabet[label]
|
label = self.alphabet[label]
|
||||||
|
|
||||||
# TODO 1: return the image and its label
|
# TODO 1: return the image and its label
|
||||||
return im, label
|
return im, label
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0):
|
def dataLoader(im_dir, file_path, norm_size, batch_size, workers=0):
|
||||||
'''
|
'''
|
||||||
:param im_dir: path to directory with images
|
:param im_dir: path to directory with images
|
||||||
:param file_path: file with image paths and labels
|
:param file_path: file with image paths and labels
|
||||||
:param norm_size: image normalization size, (height, width)
|
:param norm_size: image normalization size, (height, width)
|
||||||
:param batch_size: batch size
|
:param batch_size: batch size
|
||||||
:param workers: number of workers for loading data in multiple threads
|
:param workers: number of workers for loading data in multiple threads
|
||||||
:return: a data loader
|
:return: a data loader
|
||||||
'''
|
'''
|
||||||
|
|
||||||
dataset = ListDataset(im_dir, file_path, norm_size)
|
dataset = ListDataset(im_dir, file_path, norm_size)
|
||||||
return DataLoader(dataset,
|
return DataLoader(dataset,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=True if 'train' in file_path else False, # shuffle images only when training
|
shuffle=True if 'train' in file_path else False, # shuffle images only when training
|
||||||
num_workers=workers)
|
num_workers=workers)
|
||||||
|
|
||||||
|
|
||||||
# ==== Part 2: training, validation and testing
|
# ==== Part 2: training, validation and testing
|
||||||
|
|
||||||
def train_val(model, trainloader, valloader, n_epochs,
|
def train_val(model, trainloader, valloader, n_epochs,
|
||||||
lr, optim_type, momentum, weight_decay,
|
lr, optim_type, momentum, weight_decay,
|
||||||
valInterval, device='cpu'):
|
valInterval, device='cpu'):
|
||||||
'''
|
'''
|
||||||
The main training procedure
|
The main training procedure
|
||||||
----------------------------
|
----------------------------
|
||||||
:param model: the MLP model
|
:param model: the MLP model
|
||||||
:param trainloader: the dataloader of the train set
|
:param trainloader: the dataloader of the train set
|
||||||
:param valloader: the dataloader of the validation set
|
:param valloader: the dataloader of the validation set
|
||||||
:param n_epochs: number of training epochs
|
:param n_epochs: number of training epochs
|
||||||
:param lr: learning rate
|
:param lr: learning rate
|
||||||
:param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta'
|
:param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta'
|
||||||
:param momentum: only used if optim_type == 'sgd'
|
:param momentum: only used if optim_type == 'sgd'
|
||||||
:param weight_decay: the factor of L2 penalty on network weights
|
:param weight_decay: the factor of L2 penalty on network weights
|
||||||
:param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs
|
:param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs
|
||||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# define the cross entropy loss function.
|
# define the cross entropy loss function.
|
||||||
ce_loss = CrossEntropyLoss.apply
|
ce_loss = CrossEntropyLoss.apply
|
||||||
|
|
||||||
# optimizer
|
# optimizer
|
||||||
if optim_type == 'sgd':
|
if optim_type == 'sgd':
|
||||||
optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay)
|
optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay)
|
||||||
elif optim_type == 'adagrad':
|
elif optim_type == 'adagrad':
|
||||||
optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay)
|
optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay)
|
||||||
elif optim_type == 'rmsprop':
|
elif optim_type == 'rmsprop':
|
||||||
optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay)
|
optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay)
|
||||||
elif optim_type == 'adam':
|
elif optim_type == 'adam':
|
||||||
optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
|
optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
|
||||||
elif optim_type == 'adadelta':
|
elif optim_type == 'adadelta':
|
||||||
optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay)
|
optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay)
|
||||||
else:
|
else:
|
||||||
print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta')
|
print('[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta')
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
# training
|
# training
|
||||||
|
|
||||||
# to save loss of each training epoch in a python "list" data structure
|
# to save loss of each training epoch in a python "list" data structure
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for epoch in range(n_epochs):
|
for epoch in range(n_epochs):
|
||||||
# set the model in training mode
|
# set the model in training mode
|
||||||
model.train()
|
model.train()
|
||||||
|
|
||||||
# to save total loss in one epoch
|
# to save total loss in one epoch
|
||||||
total_loss = 0.
|
total_loss = 0.
|
||||||
|
|
||||||
#TODO 2: Calculate losses and train the network using the optimizer
|
#TODO 2: Calculate losses and train the network using the optimizer
|
||||||
for data, labels in trainloader: # get a batch of data
|
for data, labels in trainloader: # get a batch of data
|
||||||
|
|
||||||
# step 1: set data type and device
|
# step 1: set data type and device
|
||||||
# data = torch.from_numpy(data)
|
# data = torch.from_numpy(data)
|
||||||
data = data.type(torch.float32)
|
data = data.type(torch.float32)
|
||||||
data = data.to(device)
|
data = data.to(device)
|
||||||
labels = labels.to(device)
|
labels = labels.to(device)
|
||||||
|
|
||||||
# print(data.device)
|
# print(data.device)
|
||||||
|
|
||||||
# step 2: convert an image to a vector as the input of the MLP
|
# step 2: convert an image to a vector as the input of the MLP
|
||||||
data = torch.flatten(data, start_dim=1)
|
data = torch.flatten(data, start_dim=1)
|
||||||
# print(data.size())
|
# print(data.size())
|
||||||
|
|
||||||
# hit: clear gradients in the optimizer
|
# hit: clear gradients in the optimizer
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# step 3: run the model which is the forward process
|
# step 3: run the model which is the forward process
|
||||||
output = model(data)
|
output = model(data)
|
||||||
|
|
||||||
# step 4: compute the loss, and call backward propagation function
|
# step 4: compute the loss, and call backward propagation function
|
||||||
loss = ce_loss(output, labels)
|
loss = ce_loss(output, labels)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
# I have no idea why pylance can't get the data type of what ce_loss returns
|
# I have no idea why pylance can't get the data type of what ce_loss returns
|
||||||
|
|
||||||
# step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
# step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
||||||
# this operation is not differentiable
|
# this operation is not differentiable
|
||||||
total_loss += loss.item()
|
total_loss += loss.item()
|
||||||
|
|
||||||
# step 6: call a function, optimizer.step(), to update the parameters of the models
|
# step 6: call a function, optimizer.step(), to update the parameters of the models
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
|
|
||||||
# average of the total loss for iterations
|
# average of the total loss for iterations
|
||||||
avg_loss = total_loss / len(trainloader)
|
avg_loss = total_loss / len(trainloader)
|
||||||
losses.append(avg_loss)
|
losses.append(avg_loss)
|
||||||
print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss))
|
print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss))
|
||||||
|
|
||||||
# validation
|
# validation
|
||||||
if (epoch + 1) % valInterval == 0:
|
if (epoch + 1) % valInterval == 0:
|
||||||
val_acc = test(model, valloader, device)
|
val_acc = test(model, valloader, device)
|
||||||
# show prediction accuracy
|
# show prediction accuracy
|
||||||
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc))
|
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, 100 * val_acc))
|
||||||
|
|
||||||
|
|
||||||
# save model parameters in a file
|
# save model parameters in a file
|
||||||
# model_save_path = 'saved_models/recognition.pth'.format(epoch + 1)
|
# model_save_path = 'saved_models/recognition.pth'.format(epoch + 1)
|
||||||
model_save_path = opt.model_path
|
model_save_path = opt.model_path
|
||||||
|
|
||||||
torch.save({'state_dict': model.state_dict(),
|
torch.save({'state_dict': model.state_dict(),
|
||||||
}, model_save_path)
|
}, model_save_path)
|
||||||
print('Model saved in {}\n'.format(model_save_path))
|
print('Model saved in {}\n'.format(model_save_path))
|
||||||
|
|
||||||
# draw the loss curve
|
# draw the loss curve
|
||||||
plot_loss(losses)
|
plot_loss(losses)
|
||||||
|
|
||||||
|
|
||||||
def test(model, testloader, device):
|
def test(model, testloader, device):
|
||||||
'''
|
'''
|
||||||
The testing procedure
|
The testing procedure
|
||||||
----------------------------
|
----------------------------
|
||||||
:param model: the MLP model
|
:param model: the MLP model
|
||||||
:param testloader: the dataloader to be tested/validated
|
:param testloader: the dataloader to be tested/validated
|
||||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||||
'''
|
'''
|
||||||
# set the model in evaluation mode
|
# set the model in evaluation mode
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
n_correct = 0. # number of images that are correctly classified
|
n_correct = 0. # number of images that are correctly classified
|
||||||
n_imgs = 0. # number of total images
|
n_imgs = 0. # number of total images
|
||||||
|
|
||||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||||
|
|
||||||
#TODO 3: get the prediction of the data and calculate the accuracy
|
#TODO 3: get the prediction of the data and calculate the accuracy
|
||||||
for imgs, labels in testloader:
|
for imgs, labels in testloader:
|
||||||
# step 1: set data type and device
|
# step 1: set data type and device
|
||||||
# imgs = torch.from_numpy(imgs)
|
# imgs = torch.from_numpy(imgs)
|
||||||
imgs = imgs.type(torch.float32)
|
imgs = imgs.type(torch.float32)
|
||||||
imgs = imgs.to(device)
|
imgs = imgs.to(device)
|
||||||
labels = labels.to(device)
|
labels = labels.to(device)
|
||||||
|
|
||||||
# step 2: convert an image to a vector as the input of the MLP
|
# step 2: convert an image to a vector as the input of the MLP
|
||||||
imgs = torch.flatten(imgs, start_dim=1)
|
imgs = torch.flatten(imgs, start_dim=1)
|
||||||
|
|
||||||
# step 3: run the model which is the forward process
|
# step 3: run the model which is the forward process
|
||||||
output = model(imgs)
|
output = model(imgs)
|
||||||
|
|
||||||
# step 4: get the predicted value by the output using out.argmax(1)
|
# step 4: get the predicted value by the output using out.argmax(1)
|
||||||
pred = output.argmax(1)
|
pred = output.argmax(1)
|
||||||
|
|
||||||
# step 5: sum up the number of images correctly recognized and the total image number
|
# step 5: sum up the number of images correctly recognized and the total image number
|
||||||
for predict, label in zip(pred, labels):
|
for predict, label in zip(pred, labels):
|
||||||
if predict == label:
|
if predict == label:
|
||||||
n_correct += 1
|
n_correct += 1
|
||||||
n_imgs += 1
|
n_imgs += 1
|
||||||
|
|
||||||
accuracy = n_correct / n_imgs
|
accuracy = n_correct / n_imgs
|
||||||
return accuracy
|
return accuracy
|
||||||
|
|
||||||
|
|
||||||
# ==== Part 3: predict new images
|
# ==== Part 3: predict new images
|
||||||
def predict(model, im_path, norm_size, device):
|
def predict(model, im_path, norm_size, device):
|
||||||
'''
|
'''
|
||||||
The predicting procedure
|
The predicting procedure
|
||||||
---------------
|
---------------
|
||||||
:param model: the MLP model
|
:param model: the MLP model
|
||||||
:param im_path: path of an image
|
:param im_path: path of an image
|
||||||
:param norm_size: image normalization size, (height, width)
|
:param norm_size: image normalization size, (height, width)
|
||||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# TODO 4: enter the evaluation mode
|
# TODO 4: enter the evaluation mode
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
# TODO 4: image pre-processing, similar to what we do in ListDataset()
|
# TODO 4: image pre-processing, similar to what we do in ListDataset()
|
||||||
im = cv2.imread(im_path)
|
im = cv2.imread(im_path)
|
||||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
|
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
im = cv2.resize(im, norm_size)
|
im = cv2.resize(im, norm_size)
|
||||||
im = np.divide(im, 255.)
|
im = np.divide(im, 255.)
|
||||||
im = (im - 0.5) * 2.0
|
im = (im - 0.5) * 2.0
|
||||||
|
|
||||||
# convert im from numpy.ndarray to torch.tensor
|
# convert im from numpy.ndarray to torch.tensor
|
||||||
im = torch.from_numpy(im)
|
im = torch.from_numpy(im)
|
||||||
|
|
||||||
# input im into the model
|
# input im into the model
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
input = im.view(1, -1).type(torch.float32).to(device)
|
input = im.view(1, -1).type(torch.float32).to(device)
|
||||||
out = model(input)
|
out = model(input)
|
||||||
prediction = out.argmax(1)[0].item()
|
prediction = out.argmax(1)[0].item()
|
||||||
|
|
||||||
# convert index of prediction to the corresponding character
|
# convert index of prediction to the corresponding character
|
||||||
letters = string.ascii_letters[-26:] # ABCD...XYZ
|
letters = string.ascii_letters[-26:] # ABCD...XYZ
|
||||||
prediction = letters[prediction]
|
prediction = letters[prediction]
|
||||||
|
|
||||||
print('Prediction: {}'.format(prediction))
|
print('Prediction: {}'.format(prediction))
|
||||||
|
|
||||||
|
|
||||||
# ==== Part 4: draw the loss curve
|
# ==== Part 4: draw the loss curve
|
||||||
def plot_loss(losses):
|
def plot_loss(losses):
|
||||||
'''
|
'''
|
||||||
:param losses: list of losses for each epoch
|
:param losses: list of losses for each epoch
|
||||||
:return:
|
:return:
|
||||||
'''
|
'''
|
||||||
|
|
||||||
f, ax = plt.subplots()
|
f, ax = plt.subplots()
|
||||||
|
|
||||||
# draw loss
|
# draw loss
|
||||||
ax.plot(losses)
|
ax.plot(losses)
|
||||||
|
|
||||||
# set labels
|
# set labels
|
||||||
ax.set_xlabel('training epoch')
|
ax.set_xlabel('training epoch')
|
||||||
ax.set_ylabel('loss')
|
ax.set_ylabel('loss')
|
||||||
|
|
||||||
# show the plots
|
# show the plots
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# set random seed for reproducibility
|
# set random seed for reproducibility
|
||||||
seed = 2023
|
seed = 2023
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
torch.cuda.manual_seed(seed)
|
torch.cuda.manual_seed(seed)
|
||||||
torch.cuda.manual_seed_all(seed)
|
torch.cuda.manual_seed_all(seed)
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
||||||
|
|
||||||
# set configurations
|
# set configurations
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--mode', type=str, default='train', help='train, test or predict')
|
parser.add_argument('--mode', type=str, default='train', help='train, test or predict')
|
||||||
parser.add_argument('--im_dir', type=str, default='data/character_classification/images',
|
parser.add_argument('--im_dir', type=str, default='data/character_classification/images',
|
||||||
help='path to directory with images')
|
help='path to directory with images')
|
||||||
parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json',
|
parser.add_argument('--train_file_path', type=str, default='data/character_classification/train.json',
|
||||||
help='file list of training image paths and labels')
|
help='file list of training image paths and labels')
|
||||||
parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json',
|
parser.add_argument('--val_file_path', type=str, default='data/character_classification/validation.json',
|
||||||
help='file list of validation image paths and labels')
|
help='file list of validation image paths and labels')
|
||||||
parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json',
|
parser.add_argument('--test_file_path', type=str, default='data/character_classification/test.json',
|
||||||
help='file list of test image paths and labels')
|
help='file list of test image paths and labels')
|
||||||
parser.add_argument('--batchsize', type=int, default=8, help='batch size')
|
parser.add_argument('--batchsize', type=int, default=8, help='batch size')
|
||||||
parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda')
|
parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda')
|
||||||
|
|
||||||
# configurations for training
|
# configurations for training
|
||||||
parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma')
|
parser.add_argument('--hsize', type=str, default='32', help='hidden size for each hidden layer, splitted by comma')
|
||||||
parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP')
|
parser.add_argument('--layer', type=int, default=2, help='number of layers in the MLP')
|
||||||
parser.add_argument('--act', type=str, default='relu',
|
parser.add_argument('--act', type=str, default='relu',
|
||||||
help='type of activation function, can be sigmoid, tanh, or relu')
|
help='type of activation function, can be sigmoid, tanh, or relu')
|
||||||
parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)')
|
parser.add_argument('--norm_size', type=tuple, default=(32, 32), help='image normalization size, (height, width)')
|
||||||
parser.add_argument('--epoch', type=int, default=50, help='number of training epochs')
|
parser.add_argument('--epoch', type=int, default=50, help='number of training epochs')
|
||||||
parser.add_argument('--n_classes', type=int, default=26, help='number of classes')
|
parser.add_argument('--n_classes', type=int, default=26, help='number of classes')
|
||||||
parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation')
|
parser.add_argument('--valInterval', type=int, default=10, help='the frequency of validation')
|
||||||
parser.add_argument('--lr', type=float, default=5e-4, help='learning rate')
|
parser.add_argument('--lr', type=float, default=5e-4, help='learning rate')
|
||||||
parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta')
|
parser.add_argument('--optim_type', type=str, default='sgd', help='type of optimizer, can be sgd, adagrad, rmsprop, adam, or adadelta')
|
||||||
parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd')
|
parser.add_argument('--momentum', type=float, default=0.9, help='momentum of the SGD optimizer, only used if optim_type is sgd')
|
||||||
parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights')
|
parser.add_argument('--weight_decay', type=float, default=0., help='the factor of L2 penalty on network weights')
|
||||||
|
|
||||||
# configurations for test and prediction
|
# configurations for test and prediction
|
||||||
parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model')
|
parser.add_argument('--model_path', type=str, default='saved_models/recognition.pth', help='path of a saved model')
|
||||||
parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png',
|
parser.add_argument('--im_path', type=str, default='data/character_classification/new_images/predict01.png',
|
||||||
help='path of an image to be recognized')
|
help='path of an image to be recognized')
|
||||||
|
|
||||||
opt = parser.parse_args()
|
opt = parser.parse_args()
|
||||||
|
|
||||||
# TODO 5: initialize the MLP model
|
# TODO 5: initialize the MLP model
|
||||||
# what is the input size of the MLP?
|
# what is the input size of the MLP?
|
||||||
# hint 1: we convert an image to a vector as the input of the MLP
|
# hint 1: we convert an image to a vector as the input of the MLP
|
||||||
# hint 2: each image has shape [norm_size[0], norm_size[1]]
|
# hint 2: each image has shape [norm_size[0], norm_size[1]]
|
||||||
model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act)
|
model = MLP(opt.norm_size[0] * opt.norm_size[1], 26, [int(num) for num in opt.hsize.split(',')], opt.layer, opt.act)
|
||||||
|
|
||||||
# for the 'test' and 'predict' mode, we should load the saved checkpoint into the model
|
# for the 'test' and 'predict' mode, we should load the saved checkpoint into the model
|
||||||
if opt.mode == 'test' or opt.mode == 'predict':
|
if opt.mode == 'test' or opt.mode == 'predict':
|
||||||
checkpoint = torch.load(opt.model_path, map_location='cpu')
|
checkpoint = torch.load(opt.model_path, map_location='cpu')
|
||||||
# """The above code did not consider device problem"""
|
# """The above code did not consider device problem"""
|
||||||
# checkpoint = torch.load(opt.model_path, map_location=opt.device)
|
# checkpoint = torch.load(opt.model_path, map_location=opt.device)
|
||||||
# load model parameters we saved in model_path
|
# load model parameters we saved in model_path
|
||||||
model.load_state_dict(checkpoint['state_dict'])
|
model.load_state_dict(checkpoint['state_dict'])
|
||||||
print('[Info] Load model from {}'.format(opt.model_path))
|
print('[Info] Load model from {}'.format(opt.model_path))
|
||||||
|
|
||||||
# put the model on CPU or GPU according to the device in args
|
# put the model on CPU or GPU according to the device in args
|
||||||
model = model.to(opt.device)
|
model = model.to(opt.device)
|
||||||
|
|
||||||
# -- run the code for training and validation
|
# -- run the code for training and validation
|
||||||
if opt.mode == 'train':
|
if opt.mode == 'train':
|
||||||
# training and validation data loader
|
# training and validation data loader
|
||||||
trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize)
|
trainloader = dataLoader(opt.im_dir, opt.train_file_path, opt.norm_size, opt.batchsize)
|
||||||
valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize)
|
valloader = dataLoader(opt.im_dir, opt.val_file_path, opt.norm_size, opt.batchsize)
|
||||||
train_val(model, trainloader, valloader,
|
train_val(model, trainloader, valloader,
|
||||||
n_epochs=opt.epoch,
|
n_epochs=opt.epoch,
|
||||||
lr=opt.lr,
|
lr=opt.lr,
|
||||||
optim_type=opt.optim_type,
|
optim_type=opt.optim_type,
|
||||||
momentum=opt.momentum,
|
momentum=opt.momentum,
|
||||||
weight_decay=opt.weight_decay,
|
weight_decay=opt.weight_decay,
|
||||||
valInterval=opt.valInterval,
|
valInterval=opt.valInterval,
|
||||||
device=opt.device)
|
device=opt.device)
|
||||||
|
|
||||||
# -- test the saved model
|
# -- test the saved model
|
||||||
elif opt.mode == 'test':
|
elif opt.mode == 'test':
|
||||||
testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize)
|
testloader = dataLoader(opt.im_dir, opt.test_file_path, opt.norm_size, opt.batchsize)
|
||||||
acc = test(model, testloader, opt.device)
|
acc = test(model, testloader, opt.device)
|
||||||
print('[Info] Test accuracy = {:.1f}%'.format(100 * acc))
|
print('[Info] Test accuracy = {:.1f}%'.format(100 * acc))
|
||||||
|
|
||||||
# -- predict a new image
|
# -- predict a new image
|
||||||
elif opt.mode == 'predict':
|
elif opt.mode == 'predict':
|
||||||
predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device)
|
predict(model, im_path=opt.im_path, norm_size=opt.norm_size, device=opt.device)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print('mode should be train, test, or predict')
|
print('mode should be train, test, or predict')
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -1,41 +1,41 @@
|
|||||||
# ========================================================
|
# ========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 3 Support Vector Machine
|
# Homework 3 Support Vector Machine
|
||||||
# check.py - Check your implementation of several modules
|
# check.py - Check your implementation of several modules
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
|
|
||||||
from svm_hw import SVM_HINGE, LinearFunction, Hinge
|
from svm_hw import SVM_HINGE, LinearFunction, Hinge
|
||||||
import torch
|
import torch
|
||||||
from torch.autograd import gradcheck
|
from torch.autograd import gradcheck
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
model = SVM_HINGE(2, C=1.0).double()
|
model = SVM_HINGE(2, C=1.0).double()
|
||||||
x = torch.randn(50, 2, requires_grad=False).double()
|
x = torch.randn(50, 2, requires_grad=False).double()
|
||||||
W = torch.randn(1, 2, requires_grad=True).double()
|
W = torch.randn(1, 2, requires_grad=True).double()
|
||||||
b = torch.zeros(1, requires_grad=True).double()
|
b = torch.zeros(1, requires_grad=True).double()
|
||||||
test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
|
test = gradcheck(LinearFunction.apply, (x, W, b), eps=1e-6, atol=1e-4)
|
||||||
if test:
|
if test:
|
||||||
print('Linear successully tested!')
|
print('Linear successully tested!')
|
||||||
output = torch.randn(50, 1, requires_grad=True).double()
|
output = torch.randn(50, 1, requires_grad=True).double()
|
||||||
W = torch.randn(1, 2, requires_grad=True).double()
|
W = torch.randn(1, 2, requires_grad=True).double()
|
||||||
labels = torch.ones(1, requires_grad=False).double()
|
labels = torch.ones(1, requires_grad=False).double()
|
||||||
C = torch.tensor([[1.0]], requires_grad=False).double()
|
C = torch.tensor([[1.0]], requires_grad=False).double()
|
||||||
test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
|
test = gradcheck(Hinge.apply, (output, W, labels, C), eps=1e-6, atol=1e-5)
|
||||||
if test:
|
if test:
|
||||||
print('Hinge successfully tested!')
|
print('Hinge successfully tested!')
|
||||||
x = torch.randn(50, 2, requires_grad=False).double()
|
x = torch.randn(50, 2, requires_grad=False).double()
|
||||||
labels = torch.ones(50, requires_grad=False).double()
|
labels = torch.ones(50, requires_grad=False).double()
|
||||||
try:
|
try:
|
||||||
output, loss = model(x, labels)
|
output, loss = model(x, labels)
|
||||||
assert model.W.requires_grad is True
|
assert model.W.requires_grad is True
|
||||||
assert model.b.requires_grad is True
|
assert model.b.requires_grad is True
|
||||||
print('SVM_HINGE successfully tested!')
|
print('SVM_HINGE successfully tested!')
|
||||||
except:
|
except:
|
||||||
raise Exception('Failed testing SVM_HINGE!')
|
raise Exception('Failed testing SVM_HINGE!')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
run()
|
run()
|
||||||
@@ -1,178 +1,178 @@
|
|||||||
# ========================================================
|
# ========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 3 Support Vector Machine
|
# Homework 3 Support Vector Machine
|
||||||
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
|
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
|
||||||
# and using PCA for dimensionality reduction
|
# and using PCA for dimensionality reduction
|
||||||
# Student ID:
|
# Student ID:
|
||||||
# Name:
|
# Name:
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import torchvision.transforms as transforms
|
import torchvision.transforms as transforms
|
||||||
import torch
|
import torch
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from networks import Classifier
|
from networks import Classifier
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def preprocess(pre_conv, data_root, image_size, classes):
|
def preprocess(pre_conv, data_root, image_size, classes):
|
||||||
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
|
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
|
||||||
|
|
||||||
# =============== process training dataset ======================
|
# =============== process training dataset ======================
|
||||||
print("Start preprocessing the training dataset !!!")
|
print("Start preprocessing the training dataset !!!")
|
||||||
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
|
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
|
||||||
|
|
||||||
# calculate the mean and PCA projection matrix
|
# calculate the mean and PCA projection matrix
|
||||||
data_mean, u = PCA(train_data, 2)
|
data_mean, u = PCA(train_data, 2)
|
||||||
|
|
||||||
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
|
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
|
||||||
train_data_pca = ???
|
train_data_pca = ???
|
||||||
|
|
||||||
visualize(train_data_pca, train_label, "train")
|
visualize(train_data_pca, train_label, "train")
|
||||||
savedata(train_data_pca, train_label, data_root+"/train.pt")
|
savedata(train_data_pca, train_label, data_root+"/train.pt")
|
||||||
print("training dataset saved !!!")
|
print("training dataset saved !!!")
|
||||||
|
|
||||||
# =============== process validation dataset ======================
|
# =============== process validation dataset ======================
|
||||||
print("Start preprocessing the validation dataset!!!")
|
print("Start preprocessing the validation dataset!!!")
|
||||||
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
|
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
|
||||||
|
|
||||||
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
|
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
|
||||||
val_data_pca = ???
|
val_data_pca = ???
|
||||||
|
|
||||||
visualize(val_data_pca, val_label, "val")
|
visualize(val_data_pca, val_label, "val")
|
||||||
savedata(val_data_pca, val_label, data_root+"/val.pt")
|
savedata(val_data_pca, val_label, data_root+"/val.pt")
|
||||||
print("validation dataset saved !!!")
|
print("validation dataset saved !!!")
|
||||||
|
|
||||||
# =============== process testing dataset ======================
|
# =============== process testing dataset ======================
|
||||||
print("Start preprocessing the testing dataset!!!")
|
print("Start preprocessing the testing dataset!!!")
|
||||||
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
|
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
|
||||||
|
|
||||||
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
|
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
|
||||||
test_data_pca = ???
|
test_data_pca = ???
|
||||||
|
|
||||||
visualize(test_data_pca, test_label, "test")
|
visualize(test_data_pca, test_label, "test")
|
||||||
savedata(test_data_pca, test_label, data_root+"/test.pt")
|
savedata(test_data_pca, test_label, data_root+"/test.pt")
|
||||||
print("testing dataset saved !!!")
|
print("testing dataset saved !!!")
|
||||||
|
|
||||||
|
|
||||||
def savedata(data, label, save_path):
|
def savedata(data, label, save_path):
|
||||||
save_dict = {
|
save_dict = {
|
||||||
'data': data,
|
'data': data,
|
||||||
'label': label
|
'label': label
|
||||||
}
|
}
|
||||||
torch.save(save_dict, save_path)
|
torch.save(save_dict, save_path)
|
||||||
|
|
||||||
|
|
||||||
def visualize(datas, labels, mode):
|
def visualize(datas, labels, mode):
|
||||||
"""
|
"""
|
||||||
Display feature points after dimensionality reduction
|
Display feature points after dimensionality reduction
|
||||||
-------------------------------
|
-------------------------------
|
||||||
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
|
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
|
||||||
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
|
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
|
||||||
:param mode: chosen from {'train', 'val', 'test'}
|
:param mode: chosen from {'train', 'val', 'test'}
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
plt.figure()
|
plt.figure()
|
||||||
for idx in range(datas.shape[1]):
|
for idx in range(datas.shape[1]):
|
||||||
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
|
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.title(mode)
|
plt.title(mode)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def PCA(data, dim=2):
|
def PCA(data, dim=2):
|
||||||
"""
|
"""
|
||||||
calculate the mean value of the data and the projection matrix for PCA
|
calculate the mean value of the data and the projection matrix for PCA
|
||||||
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
|
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
|
||||||
:param dim: the data dimension after projection
|
:param dim: the data dimension after projection
|
||||||
:return:
|
:return:
|
||||||
data_mean: the mean value of the data
|
data_mean: the mean value of the data
|
||||||
u: the projection matrix for PCA, with the shape of [2048, dim]
|
u: the projection matrix for PCA, with the shape of [2048, dim]
|
||||||
"""
|
"""
|
||||||
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
|
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
|
||||||
|
|
||||||
# TODO: compute the mean of train_data
|
# TODO: compute the mean of train_data
|
||||||
data_mean = ???
|
data_mean = ???
|
||||||
# TODO: compute the covariance matrix of train_data
|
# TODO: compute the covariance matrix of train_data
|
||||||
data_cov = ???
|
data_cov = ???
|
||||||
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
|
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
|
||||||
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
|
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
|
||||||
???
|
???
|
||||||
# TODO: return the proper 'data_mean' and 'u[]'
|
# TODO: return the proper 'data_mean' and 'u[]'
|
||||||
return ???
|
return ???
|
||||||
|
|
||||||
|
|
||||||
def loaddata(pre_conv, data_root, mode, image_size, classes):
|
def loaddata(pre_conv, data_root, mode, image_size, classes):
|
||||||
"""
|
"""
|
||||||
load one dataset, and use pretrained network in homework 2 to extract feature
|
load one dataset, and use pretrained network in homework 2 to extract feature
|
||||||
:param pre_conv: pretrained network in homework 2
|
:param pre_conv: pretrained network in homework 2
|
||||||
:param data_root: the path of the dataset
|
:param data_root: the path of the dataset
|
||||||
:param mode: chosen from {'train', 'val', 'test'}
|
:param mode: chosen from {'train', 'val', 'test'}
|
||||||
:param image_size: the preset size that each image try to zoom to
|
:param image_size: the preset size that each image try to zoom to
|
||||||
:param classes: two classes that need to be classified
|
:param classes: two classes that need to be classified
|
||||||
:return:
|
:return:
|
||||||
datas: the samples of extracted features with the shape of [N, 2048]
|
datas: the samples of extracted features with the shape of [N, 2048]
|
||||||
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
|
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
|
||||||
"""
|
"""
|
||||||
assert len(classes) == 2
|
assert len(classes) == 2
|
||||||
datas = []
|
datas = []
|
||||||
labels = []
|
labels = []
|
||||||
for idx in range(len(classes)):
|
for idx in range(len(classes)):
|
||||||
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
|
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
|
||||||
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
|
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
|
||||||
label = 2 * idx - 1
|
label = 2 * idx - 1
|
||||||
datas.append(data)
|
datas.append(data)
|
||||||
labels.append(label)
|
labels.append(label)
|
||||||
return torch.stack(datas), torch.tensor(labels)
|
return torch.stack(datas), torch.tensor(labels)
|
||||||
|
|
||||||
|
|
||||||
def readimg(pre_conv, filepath, image_size):
|
def readimg(pre_conv, filepath, image_size):
|
||||||
"""
|
"""
|
||||||
Read one image and use pretrained network to extract the feature
|
Read one image and use pretrained network to extract the feature
|
||||||
--------------------------
|
--------------------------
|
||||||
:param pre_conv: pretrained network in homework 2
|
:param pre_conv: pretrained network in homework 2
|
||||||
:param filepath: the file path of one image
|
:param filepath: the file path of one image
|
||||||
:param image_size: the preset size that each image try to zoom to
|
:param image_size: the preset size that each image try to zoom to
|
||||||
:return:
|
:return:
|
||||||
data: the extracted feature with the length of 2048
|
data: the extracted feature with the length of 2048
|
||||||
"""
|
"""
|
||||||
img_pil = Image.open(filepath).convert('RGB')
|
img_pil = Image.open(filepath).convert('RGB')
|
||||||
img_pil = img_pil.resize(image_size)
|
img_pil = img_pil.resize(image_size)
|
||||||
img_transform = transforms.Compose([transforms.ToTensor(),
|
img_transform = transforms.Compose([transforms.ToTensor(),
|
||||||
transforms.Normalize(0.5, 0.5),
|
transforms.Normalize(0.5, 0.5),
|
||||||
])
|
])
|
||||||
img_tensor = img_transform(img_pil)
|
img_tensor = img_transform(img_pil)
|
||||||
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
|
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
|
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
|
||||||
help="the filepath of the pretrained network in homework 2")
|
help="the filepath of the pretrained network in homework 2")
|
||||||
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
|
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
|
||||||
parser.add_argument("--image_size", type=tuple, default=(32, 32),
|
parser.add_argument("--image_size", type=tuple, default=(32, 32),
|
||||||
help="the preset size that each image try to zoom to")
|
help="the preset size that each image try to zoom to")
|
||||||
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
|
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
|
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
|
||||||
configs = pretrained_checkpoint["configs"]
|
configs = pretrained_checkpoint["configs"]
|
||||||
cls = Classifier(
|
cls = Classifier(
|
||||||
configs["in_channels"],
|
configs["in_channels"],
|
||||||
configs["num_classes"],
|
configs["num_classes"],
|
||||||
configs["use_batch_norm"],
|
configs["use_batch_norm"],
|
||||||
configs["use_stn"],
|
configs["use_stn"],
|
||||||
configs["dropout_prob"],
|
configs["dropout_prob"],
|
||||||
)
|
)
|
||||||
cls.load_state_dict(pretrained_checkpoint["model_state"])
|
cls.load_state_dict(pretrained_checkpoint["model_state"])
|
||||||
for param in cls.parameters():
|
for param in cls.parameters():
|
||||||
param.requires_grad = False
|
param.requires_grad = False
|
||||||
conv = cls.conv_net
|
conv = cls.conv_net
|
||||||
|
|
||||||
preprocess(conv, args.data_root, args.image_size, args.classes)
|
preprocess(conv, args.data_root, args.image_size, args.classes)
|
||||||
|
|||||||
@@ -1,139 +1,139 @@
|
|||||||
# ========================================================
|
# ========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 3 Support Vector Machine
|
# Homework 3 Support Vector Machine
|
||||||
# svm_hw.py - The implementation of SVM using hinge loss
|
# svm_hw.py - The implementation of SVM using hinge loss
|
||||||
# Student ID:
|
# Student ID:
|
||||||
# Name:
|
# Name:
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
# TODO 1: complete the forward and backward propagation processes of the linear layer
|
# TODO 1: complete the forward and backward propagation processes of the linear layer
|
||||||
class LinearFunction(torch.autograd.Function):
|
class LinearFunction(torch.autograd.Function):
|
||||||
'''
|
'''
|
||||||
we will implement the linear function:
|
we will implement the linear function:
|
||||||
y = xW^T + b
|
y = xW^T + b
|
||||||
as well as its gradient computation process
|
as well as its gradient computation process
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x, W, b):
|
def forward(ctx, x, W, b):
|
||||||
'''
|
'''
|
||||||
Input:
|
Input:
|
||||||
:param ctx: a context object that can be used to stash information for backward computation
|
:param ctx: a context object that can be used to stash information for backward computation
|
||||||
:param x: input features with size [batch_size, input_size]
|
:param x: input features with size [batch_size, input_size]
|
||||||
:param W: weight matrix with size [output_size, input_size]
|
:param W: weight matrix with size [output_size, input_size]
|
||||||
:param b: bias with size [output_size]
|
:param b: bias with size [output_size]
|
||||||
Return:
|
Return:
|
||||||
y :output features with size [batch_size, output_size]
|
y :output features with size [batch_size, output_size]
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
y = ???
|
y = ???
|
||||||
ctx.save_for_backward(x, W)
|
ctx.save_for_backward(x, W)
|
||||||
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
'''
|
'''
|
||||||
Input:
|
Input:
|
||||||
:param ctx: a context object with saved variables
|
:param ctx: a context object with saved variables
|
||||||
:param grad_output: dL/dy, with size [batch_size, output_size]
|
:param grad_output: dL/dy, with size [batch_size, output_size]
|
||||||
Return:
|
Return:
|
||||||
grad_input: dL/dx, with size [batch_size, input_size]
|
grad_input: dL/dx, with size [batch_size, input_size]
|
||||||
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
|
||||||
grad_b: dL/db, with size [output_size], summed for data in the batch
|
grad_b: dL/db, with size [output_size], summed for data in the batch
|
||||||
'''
|
'''
|
||||||
|
|
||||||
x, W = ctx.saved_variables
|
x, W = ctx.saved_variables
|
||||||
|
|
||||||
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
|
||||||
# calculate dL/dW by using dL/dy (grad_output) and x
|
# calculate dL/dW by using dL/dy (grad_output) and x
|
||||||
# calculate dL/db using dL/dy (grad_output)
|
# calculate dL/db using dL/dy (grad_output)
|
||||||
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
# you can use torch.matmul(A, B) to compute matrix product of A and B
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
grad_input = ???
|
grad_input = ???
|
||||||
grad_W = ???
|
grad_W = ???
|
||||||
grad_b = ???
|
grad_b = ???
|
||||||
|
|
||||||
return grad_input, grad_W, grad_b
|
return grad_input, grad_W, grad_b
|
||||||
|
|
||||||
|
|
||||||
# TODO 2: complete the forward and backward propagation processes of the hinge loss
|
# TODO 2: complete the forward and backward propagation processes of the hinge loss
|
||||||
class Hinge(torch.autograd.Function):
|
class Hinge(torch.autograd.Function):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, output, W, label, C):
|
def forward(ctx, output, W, label, C):
|
||||||
"""
|
"""
|
||||||
Compute the hinge loss
|
Compute the hinge loss
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
:param ctx: a context object that can be used to stash information for backward computation
|
:param ctx: a context object that can be used to stash information for backward computation
|
||||||
:param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
|
:param output: the output of the linear layer with size [batch_size, 1], i.e. output = W^T*x + b
|
||||||
:param W: weight matrix with size [1, input_size]
|
:param W: weight matrix with size [1, input_size]
|
||||||
:param label: the ground truth y in the equation for loss calculation, with size [batch_size]
|
:param label: the ground truth y in the equation for loss calculation, with size [batch_size]
|
||||||
:param C: the regularization coefficient of hinge loss with size [1, 1]
|
:param C: the regularization coefficient of hinge loss with size [1, 1]
|
||||||
:return: the hinge loss with size [1, 1]
|
:return: the hinge loss with size [1, 1]
|
||||||
"""
|
"""
|
||||||
C = C.type_as(W)
|
C = C.type_as(W)
|
||||||
|
|
||||||
# TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
|
# TODO: compute the hinge loss (together with L2 norm for SVM): loss = 0.5*||w||^2 + C*\sum_i{max(0, 1 - y_i*output_i)}
|
||||||
# you may need F.relu() to implement the max() function.
|
# you may need F.relu() to implement the max() function.
|
||||||
loss = ???
|
loss = ???
|
||||||
ctx.save_for_backward(output, W, label, C)
|
ctx.save_for_backward(output, W, label, C)
|
||||||
|
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_loss):
|
def backward(ctx, grad_loss):
|
||||||
"""
|
"""
|
||||||
Compute the gradient of hinge loss
|
Compute the gradient of hinge loss
|
||||||
:param ctx: a context object with saved variables
|
:param ctx: a context object with saved variables
|
||||||
:param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
|
:param grad_loss: dL/dloss, with size [1, 1], the gradient of the final target loss with respect to the output (variable 'loss') of the forward function
|
||||||
:return:
|
:return:
|
||||||
grad_output: dL/doutput, with size [batch_size, 1]
|
grad_output: dL/doutput, with size [batch_size, 1]
|
||||||
grad_W: dL/dW, with size [1, channels]
|
grad_W: dL/dW, with size [1, channels]
|
||||||
"""
|
"""
|
||||||
output, W, label, C = ctx.saved_tensors
|
output, W, label, C = ctx.saved_tensors
|
||||||
# TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
|
# TODO: compute the grad with respect to the output of the linear function and W: dL/doutput, dL/dW
|
||||||
grad_output = ???
|
grad_output = ???
|
||||||
grad_W = ???
|
grad_W = ???
|
||||||
return grad_output, grad_W, None, None
|
return grad_output, grad_W, None, None
|
||||||
|
|
||||||
|
|
||||||
# TODO 3: complete the structure of SVM model
|
# TODO 3: complete the structure of SVM model
|
||||||
class SVM_HINGE(nn.Module):
|
class SVM_HINGE(nn.Module):
|
||||||
|
|
||||||
def __init__(self, in_channels, C):
|
def __init__(self, in_channels, C):
|
||||||
"""
|
"""
|
||||||
:param in_channels: number of feature channels for SVM input
|
:param in_channels: number of feature channels for SVM input
|
||||||
:param C: regularization coefficient of hinge loss with size [1, 1]
|
:param C: regularization coefficient of hinge loss with size [1, 1]
|
||||||
"""
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
# TODO: define the parameters W and b
|
# TODO: define the parameters W and b
|
||||||
"""
|
"""
|
||||||
the shape of W should be [1, channels] and the shape of b should be [1, ]
|
the shape of W should be [1, channels] and the shape of b should be [1, ]
|
||||||
you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
|
you need to use nn.Parameter() to make W and b be trainable parameters, don't forget to set requires_grad=True for self.W and self.b
|
||||||
please use torch.randn() to initialize W and b
|
please use torch.randn() to initialize W and b
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.W = ???
|
self.W = ???
|
||||||
self.b = ???
|
self.b = ???
|
||||||
self.C = torch.tensor([[C]], requires_grad=False)
|
self.C = torch.tensor([[C]], requires_grad=False)
|
||||||
|
|
||||||
def forward(self, x, label=None):
|
def forward(self, x, label=None):
|
||||||
# SVM calculation
|
# SVM calculation
|
||||||
output = LinearFunction.apply(x, self.W, self.b)
|
output = LinearFunction.apply(x, self.W, self.b)
|
||||||
if label is not None:
|
if label is not None:
|
||||||
loss = Hinge.apply(output, self.W, label, self.C)
|
loss = Hinge.apply(output, self.W, label, self.C)
|
||||||
else:
|
else:
|
||||||
loss = None
|
loss = None
|
||||||
output = (output > 0.0).type_as(x) * 2.0 - 1.0
|
output = (output > 0.0).type_as(x) * 2.0 - 1.0
|
||||||
return output, loss
|
return output, loss
|
||||||
|
|||||||
@@ -1,106 +1,106 @@
|
|||||||
# ========================================================
|
# ========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 3 Support Vector Machine
|
# Homework 3 Support Vector Machine
|
||||||
# test_svm.py - Test svm model for traffic sign
|
# test_svm.py - Test svm model for traffic sign
|
||||||
# Student ID:
|
# Student ID:
|
||||||
# Name:
|
# Name:
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
|
|
||||||
# ==== Part 1: import libs
|
# ==== Part 1: import libs
|
||||||
import argparse
|
import argparse
|
||||||
import torch
|
import torch
|
||||||
from datasets import Traffic_Dataset
|
from datasets import Traffic_Dataset
|
||||||
from svm_hw import SVM_HINGE
|
from svm_hw import SVM_HINGE
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
|
||||||
# ==== Part 2: testing
|
# ==== Part 2: testing
|
||||||
def test(
|
def test(
|
||||||
data_root,
|
data_root,
|
||||||
model_save_path,
|
model_save_path,
|
||||||
device,
|
device,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
The main testing procedure of SVM model
|
The main testing procedure of SVM model
|
||||||
----------------------------
|
----------------------------
|
||||||
:param data_root: path to the root directory of dataset
|
:param data_root: path to the root directory of dataset
|
||||||
:param model_save_path: path to pretrained SVM model
|
:param model_save_path: path to pretrained SVM model
|
||||||
:param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
:param device: device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO 1: =================== load the pretrained SVM model ==================================
|
# TODO 1: =================== load the pretrained SVM model ==================================
|
||||||
|
|
||||||
# TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
|
# TODO: construct testing data loader with 'Traffic_Dataset' and DataLoader, and set 'batch_size=1' and 'shuffle=False'
|
||||||
test_data = ???
|
test_data = ???
|
||||||
test_loader = ???
|
test_loader = ???
|
||||||
|
|
||||||
# TODO: load state dictionary of pretrained SVM model
|
# TODO: load state dictionary of pretrained SVM model
|
||||||
model_svm = ???
|
model_svm = ???
|
||||||
|
|
||||||
# TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
|
# TODO: initialize the SVM model using 'model_svm["configs"]["feature_channel"]' and 'model_svm["configs"]["C"]'
|
||||||
svm = ???
|
svm = ???
|
||||||
|
|
||||||
# TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
|
# TODO: load model parameters (model_svm['state_dict']) we saved in model_path using svm.load_state_dict()
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: put the model on CPU or GPU
|
# TODO: put the model on CPU or GPU
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO 2 : ================================ testing ==============================================
|
# TODO 2 : ================================ testing ==============================================
|
||||||
|
|
||||||
# TODO: set the model in evaluation mode
|
# TODO: set the model in evaluation mode
|
||||||
???
|
???
|
||||||
|
|
||||||
# to calculate and save the testing accuracy
|
# to calculate and save the testing accuracy
|
||||||
n_correct = 0. # number of images that are correctly classified
|
n_correct = 0. # number of images that are correctly classified
|
||||||
n_feas = 0. # number of total images
|
n_feas = 0. # number of total images
|
||||||
|
|
||||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||||
# TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
|
# TODO: inference on the testing dataset, similar to the training stage but use 'test_loader'.
|
||||||
for ??? in ???:
|
for ??? in ???:
|
||||||
# TODO: set data type (.float()) and device (.to())
|
# TODO: set data type (.float()) and device (.to())
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||||
n_correct += ???
|
n_correct += ???
|
||||||
|
|
||||||
# TODO:sum up the total image number
|
# TODO:sum up the total image number
|
||||||
n_feas += ???
|
n_feas += ???
|
||||||
|
|
||||||
# show prediction accuracy
|
# show prediction accuracy
|
||||||
acc = 100 * n_correct / n_feas
|
acc = 100 * n_correct / n_feas
|
||||||
print('Test accuracy = {:.1f}%'.format(acc))
|
print('Test accuracy = {:.1f}%'.format(acc))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# set configurations of the testing process
|
# set configurations of the testing process
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
|
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels")
|
||||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.device is None:
|
if args.device is None:
|
||||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
# run the testing procedure
|
# run the testing procedure
|
||||||
test(
|
test(
|
||||||
data_root=args.data_root,
|
data_root=args.data_root,
|
||||||
model_save_path=args.model_save_path,
|
model_save_path=args.model_save_path,
|
||||||
device=args.device,
|
device=args.device,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,289 +1,289 @@
|
|||||||
# ========================================================
|
# ========================================================
|
||||||
# Media and Cognition
|
# Media and Cognition
|
||||||
# Homework 3 Support Vector Machine
|
# Homework 3 Support Vector Machine
|
||||||
# train_svm.py - Train svm model for traffic sign
|
# train_svm.py - Train svm model for traffic sign
|
||||||
# Student ID:
|
# Student ID:
|
||||||
# Name:
|
# Name:
|
||||||
# Tsinghua University
|
# Tsinghua University
|
||||||
# (C) Copyright 2024
|
# (C) Copyright 2024
|
||||||
# ========================================================
|
# ========================================================
|
||||||
|
|
||||||
# ==== Part 1: import libs
|
# ==== Part 1: import libs
|
||||||
import argparse
|
import argparse
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
from datasets import Traffic_Dataset
|
from datasets import Traffic_Dataset
|
||||||
from svm_hw import SVM_HINGE
|
from svm_hw import SVM_HINGE
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
|
||||||
# ==== Part 2: training and validation
|
# ==== Part 2: training and validation
|
||||||
def train(
|
def train(
|
||||||
data_root,
|
data_root,
|
||||||
feature_channel,
|
feature_channel,
|
||||||
batch_size,
|
batch_size,
|
||||||
n_epoch,
|
n_epoch,
|
||||||
lr,
|
lr,
|
||||||
C,
|
C,
|
||||||
model_save_path,
|
model_save_path,
|
||||||
device,
|
device,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
The main training procedure of SVM model
|
The main training procedure of SVM model
|
||||||
----------------------------
|
----------------------------
|
||||||
:param data_root: path to the root directory of dataset
|
:param data_root: path to the root directory of dataset
|
||||||
:param feature_channel: number of feature channels for SVM input
|
:param feature_channel: number of feature channels for SVM input
|
||||||
:param batch_size: batch size of training
|
:param batch_size: batch size of training
|
||||||
:param n_epoch: number of training epochs
|
:param n_epoch: number of training epochs
|
||||||
:param lr: learning rate
|
:param lr: learning rate
|
||||||
:param C: regularization coefficient in hinge loss
|
:param C: regularization coefficient in hinge loss
|
||||||
:param model_save_path: path to save SVM model
|
:param model_save_path: path to save SVM model
|
||||||
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
:param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
|
# TODO 1: construct training and validation data loader with 'Traffic_Dataset' and DataLoader, and set proper values for 'batch_size' and 'shuffle'
|
||||||
train_data = ???
|
train_data = ???
|
||||||
train_loader = ???
|
train_loader = ???
|
||||||
val_data = ???
|
val_data = ???
|
||||||
val_loader = ???
|
val_loader = ???
|
||||||
|
|
||||||
# scale the regularization coefficient
|
# scale the regularization coefficient
|
||||||
C = C * len(train_loader)
|
C = C * len(train_loader)
|
||||||
|
|
||||||
# TODO: initialize the SVM model
|
# TODO: initialize the SVM model
|
||||||
svm = ???
|
svm = ???
|
||||||
|
|
||||||
# TODO: put the model on CPU or GPU
|
# TODO: put the model on CPU or GPU
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: define the Adam optimizer
|
# TODO: define the Adam optimizer
|
||||||
optimizer = ???
|
optimizer = ???
|
||||||
|
|
||||||
# to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
|
# to save the training loss, training accuracy, validation accuracy, and the epoch index of each training epoch
|
||||||
train_loss = []
|
train_loss = []
|
||||||
train_acc = []
|
train_acc = []
|
||||||
val_acc = []
|
val_acc = []
|
||||||
epochs = []
|
epochs = []
|
||||||
|
|
||||||
for epoch in range(n_epoch):
|
for epoch in range(n_epoch):
|
||||||
# TODO: save the index of current epoch in the array 'epochs'
|
# TODO: save the index of current epoch in the array 'epochs'
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO 2: ========================= training =======================
|
# TODO 2: ========================= training =======================
|
||||||
# TODO: set the model in training mode
|
# TODO: set the model in training mode
|
||||||
???
|
???
|
||||||
|
|
||||||
# to calculate and save the training loss and training accuracy
|
# to calculate and save the training loss and training accuracy
|
||||||
total_loss = 0. # to save total training loss in one epoch
|
total_loss = 0. # to save total training loss in one epoch
|
||||||
n_correct = 0. # number of images that are correctly classified
|
n_correct = 0. # number of images that are correctly classified
|
||||||
n_feas = 0. # number of total images
|
n_feas = 0. # number of total images
|
||||||
|
|
||||||
# TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
|
# TODO: get a batch of data; you may need enumerate() to iteratively get data from 'train_loader'.
|
||||||
# you can refer to previous homework, for example hw2
|
# you can refer to previous homework, for example hw2
|
||||||
for ??? in ???:
|
for ??? in ???:
|
||||||
# TODO: set data type (.float()) and device (.to())
|
# TODO: set data type (.float()) and device (.to())
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: clear gradients in the optimizer
|
# TODO: clear gradients in the optimizer
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: run the model with hinge loss; the model needs two inputs: feas and labels
|
# TODO: run the model with hinge loss; the model needs two inputs: feas and labels
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: back-propagation on the computation graph
|
# TODO: back-propagation on the computation graph
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
# TODO: sum up of total loss, loss.item() return the value of the tensor as a standard python number
|
||||||
total_loss += ???
|
total_loss += ???
|
||||||
|
|
||||||
# TODO: call a function to update the parameters of the models
|
# TODO: call a function to update the parameters of the models
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||||
n_correct += ???
|
n_correct += ???
|
||||||
|
|
||||||
# TODO: sum up the total image number
|
# TODO: sum up the total image number
|
||||||
n_feas += ???
|
n_feas += ???
|
||||||
|
|
||||||
# average of the total loss for iterations
|
# average of the total loss for iterations
|
||||||
acc = 100 * n_correct / n_feas
|
acc = 100 * n_correct / n_feas
|
||||||
avg_loss = total_loss / len(train_loader)
|
avg_loss = total_loss / len(train_loader)
|
||||||
train_acc.append(acc.cpu().numpy())
|
train_acc.append(acc.cpu().numpy())
|
||||||
train_loss.append(avg_loss)
|
train_loss.append(avg_loss)
|
||||||
print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
|
print('Epoch {:02d}: loss = {:.3f}, training accuracy = {:.1f}%'.format(epoch + 1, avg_loss, acc))
|
||||||
|
|
||||||
# TODO 3: ========================== Validation ======================================
|
# TODO 3: ========================== Validation ======================================
|
||||||
|
|
||||||
# TODO: set the model in evaluation mode
|
# TODO: set the model in evaluation mode
|
||||||
???
|
???
|
||||||
|
|
||||||
# to calculate and save the validation accuracy
|
# to calculate and save the validation accuracy
|
||||||
n_correct = 0. # number of images that are correctly classified
|
n_correct = 0. # number of images that are correctly classified
|
||||||
n_feas = 0. # number of total images
|
n_feas = 0. # number of total images
|
||||||
|
|
||||||
with torch.no_grad(): # we do not need to compute gradients during validation
|
with torch.no_grad(): # we do not need to compute gradients during validation
|
||||||
# TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
|
# TODO: inference on the validation dataset, similar to the training stage but use 'val_loader'.
|
||||||
for ??? in ???:
|
for ??? in ???:
|
||||||
# TODO: set data type (.float()) and device (.to())
|
# TODO: set data type (.float()) and device (.to())
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: run the model; at the validation step, the model only needs one input: feas
|
# TODO: run the model; at the validation step, the model only needs one input: feas
|
||||||
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
# _ refers to a placeholder, which means we do not need the second returned value during validating
|
||||||
???
|
???
|
||||||
|
|
||||||
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
# TODO: sum up the number of images correctly recognized. note the shapes of 'out' and 'labels' are different
|
||||||
n_correct += ???
|
n_correct += ???
|
||||||
|
|
||||||
# TODO: sum up the total image number
|
# TODO: sum up the total image number
|
||||||
n_feas += ???
|
n_feas += ???
|
||||||
|
|
||||||
# show prediction accuracy
|
# show prediction accuracy
|
||||||
acc = 100 * n_correct / n_feas
|
acc = 100 * n_correct / n_feas
|
||||||
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
|
print('Epoch {:02d}: validation accuracy = {:.1f}%'.format(epoch + 1, acc))
|
||||||
val_acc.append(acc.cpu().numpy())
|
val_acc.append(acc.cpu().numpy())
|
||||||
|
|
||||||
# save model parameters in a file
|
# save model parameters in a file
|
||||||
torch.save({'state_dict': svm.state_dict(),
|
torch.save({'state_dict': svm.state_dict(),
|
||||||
'configs': {
|
'configs': {
|
||||||
'feature_channel': feature_channel,
|
'feature_channel': feature_channel,
|
||||||
'C': C}
|
'C': C}
|
||||||
}, model_save_path)
|
}, model_save_path)
|
||||||
print('Model saved in {}\n'.format(model_save_path))
|
print('Model saved in {}\n'.format(model_save_path))
|
||||||
|
|
||||||
W = svm.W.data.cpu()
|
W = svm.W.data.cpu()
|
||||||
b = svm.b.data.cpu()
|
b = svm.b.data.cpu()
|
||||||
|
|
||||||
# TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
|
# TODO 4: calculate the index of support vectors in training samples using 'train_data.datas' and 'train_data.labels'
|
||||||
# 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
|
# 'sv' should be a list in python structure with the shape of [K], where K is the number of support vectors.
|
||||||
sv = ???
|
sv = ???
|
||||||
|
|
||||||
plot(train_loss, train_acc, val_acc, epochs)
|
plot(train_loss, train_acc, val_acc, epochs)
|
||||||
plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
|
plot_feature(train_features=train_data.datas, val_features=val_data.datas, train_labels=train_data.labels,
|
||||||
val_labels=val_data.labels, sv=sv, W=W, b=b)
|
val_labels=val_data.labels, sv=sv, W=W, b=b)
|
||||||
|
|
||||||
|
|
||||||
def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
|
def plot_feature(train_features, val_features, train_labels, val_labels, sv, W, b):
|
||||||
"""
|
"""
|
||||||
Draw the samples,SVM decision boundary, and support vectors
|
Draw the samples,SVM decision boundary, and support vectors
|
||||||
---------------------
|
---------------------
|
||||||
:param train_features: training samples with the shape of [B, 2]
|
:param train_features: training samples with the shape of [B, 2]
|
||||||
:param val_features: validation samples with the shape of [B, 2]
|
:param val_features: validation samples with the shape of [B, 2]
|
||||||
:param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
|
:param train_labels: the labels (chosen from{-1, +1}) corresponding to training samples, with the shape of [B, 1]
|
||||||
:param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
|
:param val_labels: the labels (chosen from{-1, +1}) corresponding to validation samples, with the shape of [B, 1]
|
||||||
:param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
|
:param sv: a list with the index of support vectors in training samples, with the shape of [K] (K is the number of support vectors)
|
||||||
:param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
|
:param W: the weight vector of SVM decision boundary (W^Tx + b), with the shape of [1, feature_channel]
|
||||||
:param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
|
:param b: the bias of SVM decision boundary (W^Tx + b), with the shape of [1,]
|
||||||
"""
|
"""
|
||||||
train_labels = (train_labels > 0.0).int()
|
train_labels = (train_labels > 0.0).int()
|
||||||
val_labels = (val_labels > 0.0).int()
|
val_labels = (val_labels > 0.0).int()
|
||||||
train_labels[sv] = 2
|
train_labels[sv] = 2
|
||||||
foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
foreground = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||||
foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
|
foreground_sv = list(set([i for i in range(train_labels.shape[0] // 2)]) - set(foreground))
|
||||||
background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
background = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(sv))
|
||||||
background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
|
background_sv = list(set([i + train_labels.shape[0] // 2 for i in range(train_labels.shape[0] // 2)]) - set(background))
|
||||||
f, ax = plt.subplots()
|
f, ax = plt.subplots()
|
||||||
plt.title("training dataset")
|
plt.title("training dataset")
|
||||||
ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
|
ax.scatter(train_features[foreground, 0], train_features[foreground, 1], marker='.', c='r', label="-1")
|
||||||
ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
|
ax.scatter(train_features[foreground_sv, 0], train_features[foreground_sv, 1], marker='.', c='darkorange',
|
||||||
label="-1 (support vector)")
|
label="-1 (support vector)")
|
||||||
ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
|
ax.scatter(train_features[background, 0], train_features[background, 1], marker='x', c='b', label="+1")
|
||||||
ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
|
ax.scatter(train_features[background_sv, 0], train_features[background_sv, 1], marker='x', c='c',
|
||||||
label="+1 (support vector)")
|
label="+1 (support vector)")
|
||||||
x = np.linspace(-20, 20, 100)
|
x = np.linspace(-20, 20, 100)
|
||||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||||
ax.legend(loc="best")
|
ax.legend(loc="best")
|
||||||
plt.ylim([-30, 30])
|
plt.ylim([-30, 30])
|
||||||
plt.show()
|
plt.show()
|
||||||
f, ax = plt.subplots()
|
f, ax = plt.subplots()
|
||||||
plt.title("validation dataset")
|
plt.title("validation dataset")
|
||||||
foreground_val = [i for i in range(val_labels.shape[0] // 2)]
|
foreground_val = [i for i in range(val_labels.shape[0] // 2)]
|
||||||
background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
|
background_val = [i + val_labels.shape[0] // 2 for i in range(val_labels.shape[0] // 2)]
|
||||||
ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
|
ax.scatter(val_features[foreground_val, 0], val_features[foreground_val, 1], marker='.', c='r', label="-1")
|
||||||
ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
|
ax.scatter(val_features[background_val, 0], val_features[background_val, 1], marker='x', c='b', label="+1")
|
||||||
x = np.linspace(-20, 20, 100)
|
x = np.linspace(-20, 20, 100)
|
||||||
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
ax.plot(x, -W[0, 0] / W[0, 1] * x - b / W[0, 1], c='y')
|
||||||
ax.legend(loc="best")
|
ax.legend(loc="best")
|
||||||
plt.ylim([-30, 30])
|
plt.ylim([-30, 30])
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def plot(train_loss, train_acc, val_acc, epochs):
|
def plot(train_loss, train_acc, val_acc, epochs):
|
||||||
"""
|
"""
|
||||||
Draw loss and accuracy curve
|
Draw loss and accuracy curve
|
||||||
------------------
|
------------------
|
||||||
:param train_loss: a list with loss of each training epoch
|
:param train_loss: a list with loss of each training epoch
|
||||||
:param train_acc: a list with accuracy on training dataset of each training epoch
|
:param train_acc: a list with accuracy on training dataset of each training epoch
|
||||||
:param val_acc: a list with accuracy on validation dataset of each training epoch
|
:param val_acc: a list with accuracy on validation dataset of each training epoch
|
||||||
:param epochs: a list with the index of all training epochs
|
:param epochs: a list with the index of all training epochs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# draw the training loss curve
|
# draw the training loss curve
|
||||||
f, ax = plt.subplots()
|
f, ax = plt.subplots()
|
||||||
plt.title("Training Loss")
|
plt.title("Training Loss")
|
||||||
ax.plot(epochs, train_loss, color="tab:blue")
|
ax.plot(epochs, train_loss, color="tab:blue")
|
||||||
ax.set_xlabel("Training epoch")
|
ax.set_xlabel("Training epoch")
|
||||||
ax.set_ylabel("Loss")
|
ax.set_ylabel("Loss")
|
||||||
ax.legend(["training loss"], loc="best")
|
ax.legend(["training loss"], loc="best")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
# draw the accuracy curve
|
# draw the accuracy curve
|
||||||
f, ax = plt.subplots()
|
f, ax = plt.subplots()
|
||||||
plt.title("Training and Validation Accuracy")
|
plt.title("Training and Validation Accuracy")
|
||||||
ax.plot(epochs, train_acc, color="tab:orange")
|
ax.plot(epochs, train_acc, color="tab:orange")
|
||||||
ax.plot(epochs, val_acc, color="tab:green")
|
ax.plot(epochs, val_acc, color="tab:green")
|
||||||
ax.legend(["training accuracy","validation accuracy"], loc="best")
|
ax.legend(["training accuracy","validation accuracy"], loc="best")
|
||||||
ax.set_xlabel("Training epoch")
|
ax.set_xlabel("Training epoch")
|
||||||
ax.set_ylabel("Accuracy")
|
ax.set_ylabel("Accuracy")
|
||||||
ax.set_ylim(0, 101)
|
ax.set_ylim(0, 101)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# set random seed for reproducibility
|
# set random seed for reproducibility
|
||||||
seed = 2024
|
seed = 2024
|
||||||
random.seed(seed)
|
random.seed(seed)
|
||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
torch.cuda.manual_seed(seed)
|
torch.cuda.manual_seed(seed)
|
||||||
torch.cuda.manual_seed_all(seed)
|
torch.cuda.manual_seed_all(seed)
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
||||||
|
|
||||||
# set configurations of the model and training process
|
# set configurations of the model and training process
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
|
parser.add_argument("--data_root", type=str, default="data", help="file list of training image paths and labels",)
|
||||||
parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
|
parser.add_argument("--n_epoch", type=int, default=50, help="number of training epochs")
|
||||||
parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
|
parser.add_argument("--batch_size", type=int, default=20, help="training batch size")
|
||||||
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
|
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
|
||||||
parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
|
parser.add_argument("--C", type=float, default=1e-3, help="regularization coefficient in hinge loss")
|
||||||
parser.add_argument("--device", type=str, help="cpu or cuda")
|
parser.add_argument("--device", type=str, help="cpu or cuda")
|
||||||
parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
|
parser.add_argument("--feature_channel", type=int, default=2, help="number of pre-extracted feature channel by pretrained network")
|
||||||
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
parser.add_argument("--model_save_path", type=str, default="checkpoints/svm.pth", help="path to save SVM model")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.device is None:
|
if args.device is None:
|
||||||
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
args.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
# run the training procedure
|
# run the training procedure
|
||||||
train(
|
train(
|
||||||
data_root=args.data_root,
|
data_root=args.data_root,
|
||||||
feature_channel=args.feature_channel,
|
feature_channel=args.feature_channel,
|
||||||
batch_size=args.batch_size,
|
batch_size=args.batch_size,
|
||||||
n_epoch=args.n_epoch,
|
n_epoch=args.n_epoch,
|
||||||
lr=args.lr,
|
lr=args.lr,
|
||||||
C=args.C,
|
C=args.C,
|
||||||
model_save_path=args.model_save_path,
|
model_save_path=args.model_save_path,
|
||||||
device=args.device,
|
device=args.device,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,21 +20,21 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
\courseheader
|
\courseheader
|
||||||
% 请在YOUR NAME处填写自己的姓名
|
% 请在YOUR NAME处填写自己的姓名
|
||||||
\name{YOUR NAME}
|
\name{高艺轩}
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
\centerline{\textbf{\Large{理论部分}}}
|
\centerline{\textbf{\Large{理论部分}}}
|
||||||
|
|
||||||
\section{单选题(15分)}
|
\section{单选题(15分)}
|
||||||
% 请在?处填写答案
|
% 请在?处填写答案
|
||||||
\subsection{\underline{?}}
|
\subsection{\underline{D}}
|
||||||
|
|
||||||
\subsection{\underline{?}}
|
\subsection{\underline{C}}
|
||||||
|
|
||||||
\subsection{\underline{?}}
|
\subsection{\underline{D}}
|
||||||
|
|
||||||
\subsection{\underline{?}}
|
\subsection{\underline{D}}
|
||||||
|
|
||||||
\subsection{\underline{?}}
|
\subsection{\underline{B}}
|
||||||
|
|
||||||
\section{计算题(15 分)}
|
\section{计算题(15 分)}
|
||||||
|
|
||||||
@@ -47,17 +47,117 @@
|
|||||||
试利用LDA,将样本特征维数压缩为一维。
|
试利用LDA,将样本特征维数压缩为一维。
|
||||||
}
|
}
|
||||||
|
|
||||||
|
\begin{proof}[解]
|
||||||
|
首先计算$\mu_1 = (3, 2), \mu_2 = (0, 2), \mu = (1.5, 2)$。因此
|
||||||
|
\[S_1 = \frac{1}{4}
|
||||||
|
\left(
|
||||||
|
\begin{bmatrix}
|
||||||
|
0 & 0\\
|
||||||
|
0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 1\\
|
||||||
|
1 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
0 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\right)
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
0.5 & 0.25\\
|
||||||
|
0.25 & 0.5
|
||||||
|
\end{bmatrix}\]
|
||||||
|
\[S_2 = \frac{1}{4}
|
||||||
|
\left(
|
||||||
|
\begin{bmatrix}
|
||||||
|
0 & 0\\
|
||||||
|
0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 1\\
|
||||||
|
1 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\right)
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
0.75 & 0.25\\
|
||||||
|
0.25 & 0.5
|
||||||
|
\end{bmatrix}\]
|
||||||
|
进一步地,
|
||||||
|
\[S_w = \frac{1}{2} (S_1 + S_2) =
|
||||||
|
\begin{bmatrix}
|
||||||
|
0.625 & 0.25\\
|
||||||
|
0.25 & 0.5
|
||||||
|
\end{bmatrix}\]
|
||||||
|
\[S_b = \frac{1}{2} \left(
|
||||||
|
\begin{bmatrix}
|
||||||
|
2.25 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
2.25 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\right)
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
2.25 & 0\\
|
||||||
|
0 & 0
|
||||||
|
\end{bmatrix}\]
|
||||||
|
广义特征值分解得到$\lambda = 4.5$,$v = (0.8944, -0.4472)$。投影后的样本为
|
||||||
|
\[\omega_1: \left\{2.2360, 0.8944, 2.2360, 1.7888\right\}\]
|
||||||
|
\[\omega_2: \left\{-0.4472, 0, -1.3416, -1.7888\right\}\]
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
\subsection{模型训练通常需要大量的数据,假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效,其中无效数据被成功判别为无效数据的概率为90\%,而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
|
\subsection{模型训练通常需要大量的数据,假设某采集的数据集包含80\%的有效数据和20\%的无效数据。采用一种算法判断数据是否有效,其中无效数据被成功判别为无效数据的概率为90\%,而有效数据被误判为无效数据的概率为5\%。如果某条数据经过该算法被判别为无效数据,则根据贝叶斯定理,这条数据是无效数据的概率是多少?(提示:全概率公式$P(Y)=\sum^{N}_{i=1}P(Y|X_i)P(X_i)$)\\}
|
||||||
|
|
||||||
|
\begin{proof}[解]
|
||||||
|
\begin{align*}
|
||||||
|
& P(\text{无效数据} \mid \text{判定无效})\\
|
||||||
|
= & \frac{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据})}{p(\text{判定无效} \mid \text{无效数据})p(\text{无效数据}) + p(\text{判定无效} \mid \text{有效数据})p(\text{有效数据})}\\
|
||||||
|
= & \frac{0.9 \times 0.2}{0.9 \times 0.2 + 0.05 \times 0.8}\\
|
||||||
|
= & \frac{0.18}{0.18 + 0.04}\\
|
||||||
|
= & \frac{9}{11}
|
||||||
|
\end{align*}
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
\subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
|
\subsection{设有两类正态分布的样本集,第一类均值为$\mu_1=[2,-1]^T$,第二类均值为$\mu_2=[1,1]^T$。两类样本集的协方差矩阵和出现的先验概率都相等:$\Sigma_1=\Sigma_2=\Sigma=\left[ \begin{array}{cc}
|
||||||
4 & 2 \\
|
4 & 2 \\
|
||||||
2 & \frac{4}{3}
|
2 & \frac{4}{3}
|
||||||
\end{array} \right]$,$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。}
|
\end{array} \right]$,$p(\omega_1)=p(\omega_2)$。试计算分类界面,并对特征向量$x=[6,2]^T$分类。}
|
||||||
|
|
||||||
|
\begin{proof}[解]
|
||||||
|
\[g_1(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_1)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_1) + \ln p(\omega_1)\]
|
||||||
|
\[g_2(\boldsymbol{x}) = -\frac{1}{2}(\boldsymbol{x} - \boldsymbol{\mu}_2)^\mathrm{T} \Sigma^{-1} (\boldsymbol{x} - \boldsymbol{\mu}_2) + \ln p(\omega_2)\]
|
||||||
|
决策方程
|
||||||
|
\[\]
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
\vspace{3mm}
|
\vspace{3mm}
|
||||||
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足
|
\subsection{给定异或的样本集$D=\left\{\left((0,0)^T,-1\right),\left((0,1)^T,1\right),\left((1,0)^T,1\right),\left((1,1)^T,-1\right)\right\}$该样本集是线性不可分的,可采用如下所示的多项式函数$\phi(\mathbf{x})$将样本$D=\left\{(\mathbf{x}_n,y_n)\right\}$映射为$D_\phi=\left\{(\phi(\mathbf{x}_n),y_n)\right\}$,其中$\phi(\mathbf{x})$满足
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
|
|||||||
Reference in New Issue
Block a user