{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "import torchvision.transforms as transforms\n", "\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class ConvBlock(nn.Module):\n", " def __init__(\n", " self,\n", " in_channels,\n", " out_channels,\n", " kernel_size,\n", " stride,\n", " padding,\n", " use_batch_norm=False,\n", " use_residual=False,\n", " ):\n", " \"\"\"\n", " Convolutional block with batch normalization and ReLU activation\n", " ----------------------\n", " :param in_channels: channel number of input image\n", " :param out_channels: channel number of output image\n", " :param kernel_size: size of convolutional kernel\n", " :param stride: stride of convolutional operation\n", " :param padding: padding of convolutional operation\n", " :param use_batch_norm: whether to use batch normalization in convolutional layers\n", " :param use_residual: whether to use residual connection\n", " \"\"\"\n", " super().__init__()\n", "\n", " if use_batch_norm:\n", " bn2d = nn.BatchNorm2d\n", " else:\n", " # use identity function to replace batch normalization\n", " bn2d = nn.Identity\n", "\n", " self.use_residual = use_residual\n", "\n", " # >>> TODO 2.1: complete a convolutional block with batch normalization and ReLU activation\n", " # Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`\n", " # Network structure:\n", " # conv -> batchnorm -> relu\n", " self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)\n", " self.bn = bn2d(out_channels)\n", " self.relu = nn.ReLU()\n", " # <<< TODO 2.1\n", "\n", " def forward(self, x):\n", " # >>> TODO 2.2: forward process\n", " # Hint: apply residual connection if `self.use_residual` is True\n", " out = self.relu(self.bn(self.conv(x)))\n", " if self.use_residual:\n", " out += x\n", "\n", " # <<< TODO 2.2\n", " return out\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "in_channels = 3\n", "dropout_prob = 0.5\n", "conv_net = nn.Sequential(\n", " ConvBlock(\n", " in_channels=in_channels, out_channels=32, kernel_size=5, stride=1, padding=2\n", " ),\n", " ConvBlock(in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2),\n", " nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n", " ConvBlock(\n", " in_channels=64,\n", " out_channels=64,\n", " kernel_size=3,\n", " stride=1,\n", " padding=1,\n", " use_residual=True,\n", " ),\n", " ConvBlock(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),\n", " nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n", " ConvBlock(\n", " in_channels=128,\n", " out_channels=128,\n", " kernel_size=3,\n", " stride=1,\n", " padding=1,\n", " use_residual=True,\n", " ),\n", " nn.Dropout2d(p=dropout_prob),\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([10, 128, 4, 4])\n", "ConvBlock(\n", " (conv): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))\n", " (bn): Identity()\n", " (relu): ReLU()\n", ")\n" ] } ], "source": [ "a = torch.randn(10, 3, 32, 32)\n", "print(conv_net(a).size())\n", "print(conv_net[1])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([10, 8, 16, 16])\n", "torch.Size([10, 16, 8, 8])\n" ] } ], "source": [ "conv_1 = ConvBlock(in_channels=3, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True)\n", "conv_2 = ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True)\n", "\n", "print(conv_1(a).size())\n", "print(conv_2(conv_1(a)).size())\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([0., 1.])\n", "1\n" ] } ], "source": [ "a = torch.Tensor([1.0, 2.0])\n", "b = torch.Tensor([1.0, 1.0])\n", "print((a > b).type_as(a))\n", "print((a == b).sum().item())" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor(2.5000)\n" ] } ], "source": [ "a = torch.Tensor([[1.0, 2.0], [3.0, 4.0]])\n", "mu = a.mean(dim=0)\n", "print(mu, a - mu)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[5.],\n", " [4.]])\n" ] } ], "source": [ "a = torch.Tensor([[5], [4]])\n", "b = torch.Tensor([1])\n", "print((a.T * b).T)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[False, True, True, True, True],\n", " [False, False, True, True, True],\n", " [False, False, False, True, True],\n", " [False, False, False, False, True],\n", " [False, False, False, False, False]])\n", "tensor([[-0.1170, 0.6130, 0.9644, -1.2733, -0.9671],\n", " [-0.7806, 0.5082, -0.2731, 0.1660, -0.5451],\n", " [-2.1527, -0.5059, -0.0079, -0.5796, -1.1107],\n", " [-1.8357, -0.8010, -0.0424, 0.1491, -1.5009],\n", " [-1.3666, -0.8209, 0.0483, -1.3165, -0.9222]])\n", "tensor([[-0.1170, -inf, -inf, -inf, -inf],\n", " [-0.7806, 0.5082, -inf, -inf, -inf],\n", " [-2.1527, -0.5059, -0.0079, -inf, -inf],\n", " [-1.8357, -0.8010, -0.0424, 0.1491, -inf],\n", " [-1.3666, -0.8209, 0.0483, -1.3165, -0.9222]])\n" ] } ], "source": [ "mask = torch.triu(torch.ones(5, 5), diagonal=1).bool()\n", "print(mask)\n", "attn = torch.randn(5, 5)\n", "print(attn)\n", "print(attn.masked_fill(mask, -np.inf))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([0.1402, 0.2312, 0.6285])\n" ] } ], "source": [ "Q = torch.Tensor([1, 0, 1, 1])\n", "K = torch.Tensor([[0, 0, 0, 2],\n", " [2, 0, 1, 0],\n", " [2, 1, 2, 1]])\n", "\n", "print(torch.softmax((Q @ K.T) / 2, dim=0))" ] } ], "metadata": { "kernelspec": { "display_name": "media_cognition", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }