298 lines
8.1 KiB
Plaintext
298 lines
8.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch\n",
|
|
"import torch.nn as nn\n",
|
|
"import torch.nn.functional as F\n",
|
|
"\n",
|
|
"import torchvision.transforms as transforms\n",
|
|
"\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class ConvBlock(nn.Module):\n",
|
|
" def __init__(\n",
|
|
" self,\n",
|
|
" in_channels,\n",
|
|
" out_channels,\n",
|
|
" kernel_size,\n",
|
|
" stride,\n",
|
|
" padding,\n",
|
|
" use_batch_norm=False,\n",
|
|
" use_residual=False,\n",
|
|
" ):\n",
|
|
" \"\"\"\n",
|
|
" Convolutional block with batch normalization and ReLU activation\n",
|
|
" ----------------------\n",
|
|
" :param in_channels: channel number of input image\n",
|
|
" :param out_channels: channel number of output image\n",
|
|
" :param kernel_size: size of convolutional kernel\n",
|
|
" :param stride: stride of convolutional operation\n",
|
|
" :param padding: padding of convolutional operation\n",
|
|
" :param use_batch_norm: whether to use batch normalization in convolutional layers\n",
|
|
" :param use_residual: whether to use residual connection\n",
|
|
" \"\"\"\n",
|
|
" super().__init__()\n",
|
|
"\n",
|
|
" if use_batch_norm:\n",
|
|
" bn2d = nn.BatchNorm2d\n",
|
|
" else:\n",
|
|
" # use identity function to replace batch normalization\n",
|
|
" bn2d = nn.Identity\n",
|
|
"\n",
|
|
" self.use_residual = use_residual\n",
|
|
"\n",
|
|
" # >>> TODO 2.1: complete a convolutional block with batch normalization and ReLU activation\n",
|
|
" # Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`\n",
|
|
" # Network structure:\n",
|
|
" # conv -> batchnorm -> relu\n",
|
|
" self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)\n",
|
|
" self.bn = bn2d(out_channels)\n",
|
|
" self.relu = nn.ReLU()\n",
|
|
" # <<< TODO 2.1\n",
|
|
"\n",
|
|
" def forward(self, x):\n",
|
|
" # >>> TODO 2.2: forward process\n",
|
|
" # Hint: apply residual connection if `self.use_residual` is True\n",
|
|
" out = self.relu(self.bn(self.conv(x)))\n",
|
|
" if self.use_residual:\n",
|
|
" out += x\n",
|
|
"\n",
|
|
" # <<< TODO 2.2\n",
|
|
" return out\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"in_channels = 3\n",
|
|
"dropout_prob = 0.5\n",
|
|
"conv_net = nn.Sequential(\n",
|
|
" ConvBlock(\n",
|
|
" in_channels=in_channels, out_channels=32, kernel_size=5, stride=1, padding=2\n",
|
|
" ),\n",
|
|
" ConvBlock(in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2),\n",
|
|
" nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n",
|
|
" ConvBlock(\n",
|
|
" in_channels=64,\n",
|
|
" out_channels=64,\n",
|
|
" kernel_size=3,\n",
|
|
" stride=1,\n",
|
|
" padding=1,\n",
|
|
" use_residual=True,\n",
|
|
" ),\n",
|
|
" ConvBlock(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),\n",
|
|
" nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n",
|
|
" ConvBlock(\n",
|
|
" in_channels=128,\n",
|
|
" out_channels=128,\n",
|
|
" kernel_size=3,\n",
|
|
" stride=1,\n",
|
|
" padding=1,\n",
|
|
" use_residual=True,\n",
|
|
" ),\n",
|
|
" nn.Dropout2d(p=dropout_prob),\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"torch.Size([10, 128, 4, 4])\n",
|
|
"ConvBlock(\n",
|
|
" (conv): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))\n",
|
|
" (bn): Identity()\n",
|
|
" (relu): ReLU()\n",
|
|
")\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"a = torch.randn(10, 3, 32, 32)\n",
|
|
"print(conv_net(a).size())\n",
|
|
"print(conv_net[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"torch.Size([10, 8, 16, 16])\n",
|
|
"torch.Size([10, 16, 8, 8])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"conv_1 = ConvBlock(in_channels=3, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True)\n",
|
|
"conv_2 = ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True)\n",
|
|
"\n",
|
|
"print(conv_1(a).size())\n",
|
|
"print(conv_2(conv_1(a)).size())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([0., 1.])\n",
|
|
"1\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"a = torch.Tensor([1.0, 2.0])\n",
|
|
"b = torch.Tensor([1.0, 1.0])\n",
|
|
"print((a > b).type_as(a))\n",
|
|
"print((a == b).sum().item())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor(2.5000)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"a = torch.Tensor([[1.0, 2.0], [3.0, 4.0]])\n",
|
|
"mu = a.mean(dim=0)\n",
|
|
"print(mu, a - mu)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([[5.],\n",
|
|
" [4.]])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"a = torch.Tensor([[5], [4]])\n",
|
|
"b = torch.Tensor([1])\n",
|
|
"print((a.T * b).T)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([[False, True, True, True, True],\n",
|
|
" [False, False, True, True, True],\n",
|
|
" [False, False, False, True, True],\n",
|
|
" [False, False, False, False, True],\n",
|
|
" [False, False, False, False, False]])\n",
|
|
"tensor([[-0.1170, 0.6130, 0.9644, -1.2733, -0.9671],\n",
|
|
" [-0.7806, 0.5082, -0.2731, 0.1660, -0.5451],\n",
|
|
" [-2.1527, -0.5059, -0.0079, -0.5796, -1.1107],\n",
|
|
" [-1.8357, -0.8010, -0.0424, 0.1491, -1.5009],\n",
|
|
" [-1.3666, -0.8209, 0.0483, -1.3165, -0.9222]])\n",
|
|
"tensor([[-0.1170, -inf, -inf, -inf, -inf],\n",
|
|
" [-0.7806, 0.5082, -inf, -inf, -inf],\n",
|
|
" [-2.1527, -0.5059, -0.0079, -inf, -inf],\n",
|
|
" [-1.8357, -0.8010, -0.0424, 0.1491, -inf],\n",
|
|
" [-1.3666, -0.8209, 0.0483, -1.3165, -0.9222]])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"mask = torch.triu(torch.ones(5, 5), diagonal=1).bool()\n",
|
|
"print(mask)\n",
|
|
"attn = torch.randn(5, 5)\n",
|
|
"print(attn)\n",
|
|
"print(attn.masked_fill(mask, -np.inf))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([0.1402, 0.2312, 0.6285])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"Q = torch.Tensor([1, 0, 1, 1])\n",
|
|
"K = torch.Tensor([[0, 0, 0, 2],\n",
|
|
" [2, 0, 1, 0],\n",
|
|
" [2, 1, 2, 1]])\n",
|
|
"\n",
|
|
"print(torch.softmax((Q @ K.T) / 2, dim=0))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "media_cognition",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|