{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "import torchvision.transforms as transforms\n",
    "\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ConvBlock(nn.Module):\n",
    "    def __init__(\n",
    "        self,\n",
    "        in_channels,\n",
    "        out_channels,\n",
    "        kernel_size,\n",
    "        stride,\n",
    "        padding,\n",
    "        use_batch_norm=False,\n",
    "        use_residual=False,\n",
    "    ):\n",
    "        \"\"\"\n",
    "        Convolutional block with batch normalization and ReLU activation\n",
    "        ----------------------\n",
    "        :param in_channels: channel number of input image\n",
    "        :param out_channels: channel number of output image\n",
    "        :param kernel_size: size of convolutional kernel\n",
    "        :param stride: stride of convolutional operation\n",
    "        :param padding: padding of convolutional operation\n",
    "        :param use_batch_norm: whether to use batch normalization in convolutional layers\n",
    "        :param use_residual: whether to use residual connection\n",
    "        \"\"\"\n",
    "        super().__init__()\n",
    "\n",
    "        if use_batch_norm:\n",
    "            bn2d = nn.BatchNorm2d\n",
    "        else:\n",
    "            # use identity function to replace batch normalization\n",
    "            bn2d = nn.Identity\n",
    "\n",
    "        self.use_residual = use_residual\n",
    "\n",
    "        # >>> TODO 2.1: complete a convolutional block with batch normalization and ReLU activation\n",
    "        # Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`\n",
    "        # Network structure:\n",
    "        # conv -> batchnorm -> relu\n",
    "        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)\n",
    "        self.bn = bn2d(out_channels)\n",
    "        self.relu = nn.ReLU()\n",
    "        # <<< TODO 2.1\n",
    "\n",
    "    def forward(self, x):\n",
    "        # >>> TODO 2.2: forward process\n",
    "        # Hint: apply residual connection if `self.use_residual` is True\n",
    "        out = self.relu(self.bn(self.conv(x)))\n",
    "        if self.use_residual:\n",
    "            out += x\n",
    "\n",
    "        # <<< TODO 2.2\n",
    "        return out\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_channels = 3\n",
    "dropout_prob = 0.5\n",
    "conv_net = nn.Sequential(\n",
    "    ConvBlock(\n",
    "        in_channels=in_channels, out_channels=32, kernel_size=5, stride=1, padding=2\n",
    "    ),\n",
    "    ConvBlock(in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2),\n",
    "    nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n",
    "    ConvBlock(\n",
    "        in_channels=64,\n",
    "        out_channels=64,\n",
    "        kernel_size=3,\n",
    "        stride=1,\n",
    "        padding=1,\n",
    "        use_residual=True,\n",
    "    ),\n",
    "    ConvBlock(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),\n",
    "    nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n",
    "    ConvBlock(\n",
    "        in_channels=128,\n",
    "        out_channels=128,\n",
    "        kernel_size=3,\n",
    "        stride=1,\n",
    "        padding=1,\n",
    "        use_residual=True,\n",
    "    ),\n",
    "    nn.Dropout2d(p=dropout_prob),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([10, 128, 4, 4])\n",
      "ConvBlock(\n",
      "  (conv): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))\n",
      "  (bn): Identity()\n",
      "  (relu): ReLU()\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "a = torch.randn(10, 3, 32, 32)\n",
    "print(conv_net(a).size())\n",
    "print(conv_net[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([10, 8, 16, 16])\n",
      "torch.Size([10, 16, 8, 8])\n"
     ]
    }
   ],
   "source": [
    "conv_1 = ConvBlock(in_channels=3, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True)\n",
    "conv_2 = ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True)\n",
    "\n",
    "print(conv_1(a).size())\n",
    "print(conv_2(conv_1(a)).size())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0., 1.])\n",
      "1\n"
     ]
    }
   ],
   "source": [
    "a = torch.Tensor([1.0, 2.0])\n",
    "b = torch.Tensor([1.0, 1.0])\n",
    "print((a > b).type_as(a))\n",
    "print((a == b).sum().item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(2.5000)\n"
     ]
    }
   ],
   "source": [
    "a = torch.Tensor([[1.0, 2.0], [3.0, 4.0]])\n",
    "mu = a.mean(dim=0)\n",
    "print(mu, a - mu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[5.],\n",
      "        [4.]])\n"
     ]
    }
   ],
   "source": [
    "a = torch.Tensor([[5], [4]])\n",
    "b = torch.Tensor([1])\n",
    "print((a.T * b).T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[False,  True,  True,  True,  True],\n",
      "        [False, False,  True,  True,  True],\n",
      "        [False, False, False,  True,  True],\n",
      "        [False, False, False, False,  True],\n",
      "        [False, False, False, False, False]])\n",
      "tensor([[-0.1170,  0.6130,  0.9644, -1.2733, -0.9671],\n",
      "        [-0.7806,  0.5082, -0.2731,  0.1660, -0.5451],\n",
      "        [-2.1527, -0.5059, -0.0079, -0.5796, -1.1107],\n",
      "        [-1.8357, -0.8010, -0.0424,  0.1491, -1.5009],\n",
      "        [-1.3666, -0.8209,  0.0483, -1.3165, -0.9222]])\n",
      "tensor([[-0.1170,    -inf,    -inf,    -inf,    -inf],\n",
      "        [-0.7806,  0.5082,    -inf,    -inf,    -inf],\n",
      "        [-2.1527, -0.5059, -0.0079,    -inf,    -inf],\n",
      "        [-1.8357, -0.8010, -0.0424,  0.1491,    -inf],\n",
      "        [-1.3666, -0.8209,  0.0483, -1.3165, -0.9222]])\n"
     ]
    }
   ],
   "source": [
    "mask = torch.triu(torch.ones(5, 5), diagonal=1).bool()\n",
    "print(mask)\n",
    "attn = torch.randn(5, 5)\n",
    "print(attn)\n",
    "print(attn.masked_fill(mask, -np.inf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0.1402, 0.2312, 0.6285])\n"
     ]
    }
   ],
   "source": [
    "Q = torch.Tensor([1, 0, 1, 1])\n",
    "K = torch.Tensor([[0, 0, 0, 2],\n",
    "                  [2, 0, 1, 0],\n",
    "                  [2, 1, 2, 1]])\n",
    "\n",
    "print(torch.softmax((Q @ K.T) / 2, dim=0))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "media_cognition",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}