diff --git a/.gitignore b/.gitignore index 13d9ea4..a2e4f14 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ __pycache__/ *.out *.pdf .DS_Store -hw2/code/checkpoints/ \ No newline at end of file +hw2/code/checkpoints/ +hw2/code/visualized/ \ No newline at end of file diff --git a/hw2/code/datasets.py b/hw2/code/datasets.py index 21153cb..8caef0a 100644 --- a/hw2/code/datasets.py +++ b/hw2/code/datasets.py @@ -52,7 +52,9 @@ def get_data_loader( # Consider what is an appropriate data augmentation technique for traffic sign classification. if mode == "train" and augment: # pass # TODO - data_transforms.append(transforms.AutoAugment()) + # data_transforms.append(transforms.AutoAugment()) + data_transforms.append(transforms.RandomAffine(degrees=30,shear=10)) + data_transforms.append(transforms.RandomAutocontrast()) # Else, the `data_transforms` should be left unchanged # <<< TODO 1.1 # Use `transforms.Compose` to compose the list of transforms into a single transform diff --git a/hw2/code/networks.py b/hw2/code/networks.py index 138f756..934f9fa 100644 --- a/hw2/code/networks.py +++ b/hw2/code/networks.py @@ -229,8 +229,9 @@ class STN(nn.Module): # this network. # Suggested structure: 3 down-sampling convolutional layers with doubling output channels, using BN and ReLU. self.localization_conv = nn.Sequential( - ConvBlock(in_channels=in_channels, out_channels=8, kernel_size=3, stride=2, padding=1, use_batch_norm=True), - ConvBlock(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1, use_batch_norm=True), + ConvBlock(in_channels=in_channels, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True), + # 8 * 13 * 13 + ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True), ConvBlock(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1, use_batch_norm=True), # 32 * 4 * 4 ) @@ -240,10 +241,10 @@ class STN(nn.Module): # Hint: Combine linear layers and ReLU activation functions to build this network. # Suggested structure: 2 linear layers with one BN and ReLU. self.localization_fc = nn.Sequential( - nn.Linear(16, 256), - nn.Linear(256, 6), - nn.BatchNorm1d(6), - nn.ReLU() + nn.Linear(32 * 4 * 4, 256), + nn.ReLU(), + nn.BatchNorm1d(256), + nn.Linear(256, 6) ) # <<< TODO 4.1 @@ -251,7 +252,7 @@ class STN(nn.Module): # Hint: The STN should generate the identity transformation by default before training. # How to initialize the weight/bias of the last linear layer of the fully connected network to # achieve this goal? - nn.init.zeros_(self.localization_fc[1].weight) + nn.init.zeros_(self.localization_fc[3].weight) # <<< TODO 4.2 def forward(self, x): diff --git a/hw2/report/img/augmentation.jpg b/hw2/report/img/augmentation.jpg new file mode 100644 index 0000000..7501321 Binary files /dev/null and b/hw2/report/img/augmentation.jpg differ diff --git a/hw2/report/img/feature/image.jpg b/hw2/report/img/feature/image.jpg new file mode 100644 index 0000000..937738b Binary files /dev/null and b/hw2/report/img/feature/image.jpg differ diff --git a/hw2/report/img/feature/layer_0/feature_map.jpg b/hw2/report/img/feature/layer_0/feature_map.jpg new file mode 100644 index 0000000..9e00a99 Binary files /dev/null and b/hw2/report/img/feature/layer_0/feature_map.jpg differ diff --git a/hw2/report/img/feature/layer_1/feature_map.jpg b/hw2/report/img/feature/layer_1/feature_map.jpg new file mode 100644 index 0000000..c131eae Binary files /dev/null and b/hw2/report/img/feature/layer_1/feature_map.jpg differ diff --git a/hw2/report/img/feature/layer_2/feature_map.jpg b/hw2/report/img/feature/layer_2/feature_map.jpg new file mode 100644 index 0000000..ecf51ec Binary files /dev/null and b/hw2/report/img/feature/layer_2/feature_map.jpg differ diff --git a/hw2/report/img/feature/layer_3/feature_map.jpg b/hw2/report/img/feature/layer_3/feature_map.jpg new file mode 100644 index 0000000..18dc43b Binary files /dev/null and b/hw2/report/img/feature/layer_3/feature_map.jpg differ diff --git a/hw2/report/img/feature/layer_4/feature_map.jpg b/hw2/report/img/feature/layer_4/feature_map.jpg new file mode 100644 index 0000000..b52994a Binary files /dev/null and b/hw2/report/img/feature/layer_4/feature_map.jpg differ diff --git a/hw2/report/img/filter/filter_layer_0.jpg b/hw2/report/img/filter/filter_layer_0.jpg new file mode 100644 index 0000000..e9b69d4 Binary files /dev/null and b/hw2/report/img/filter/filter_layer_0.jpg differ diff --git a/hw2/report/img/filter/filter_layer_1.jpg b/hw2/report/img/filter/filter_layer_1.jpg new file mode 100644 index 0000000..7b6006e Binary files /dev/null and b/hw2/report/img/filter/filter_layer_1.jpg differ diff --git a/hw2/report/img/filter/filter_layer_2.jpg b/hw2/report/img/filter/filter_layer_2.jpg new file mode 100644 index 0000000..135e4eb Binary files /dev/null and b/hw2/report/img/filter/filter_layer_2.jpg differ diff --git a/hw2/report/img/filter/filter_layer_3.jpg b/hw2/report/img/filter/filter_layer_3.jpg new file mode 100644 index 0000000..04d29d8 Binary files /dev/null and b/hw2/report/img/filter/filter_layer_3.jpg differ diff --git a/hw2/report/img/filter/filter_layer_4.jpg b/hw2/report/img/filter/filter_layer_4.jpg new file mode 100644 index 0000000..ea4e9c5 Binary files /dev/null and b/hw2/report/img/filter/filter_layer_4.jpg differ diff --git a/hw2/report/img/models/bn/loss_and_acc.jpg b/hw2/report/img/models/bn/loss_and_acc.jpg new file mode 100644 index 0000000..4e29fac Binary files /dev/null and b/hw2/report/img/models/bn/loss_and_acc.jpg differ diff --git a/hw2/report/img/models/bn_aug/loss_and_acc.jpg b/hw2/report/img/models/bn_aug/loss_and_acc.jpg new file mode 100644 index 0000000..c9ccf0a Binary files /dev/null and b/hw2/report/img/models/bn_aug/loss_and_acc.jpg differ diff --git a/hw2/report/img/models/default/loss_and_acc.jpg b/hw2/report/img/models/default/loss_and_acc.jpg new file mode 100644 index 0000000..1bcd548 Binary files /dev/null and b/hw2/report/img/models/default/loss_and_acc.jpg differ diff --git a/hw2/report/img/models/dropout/loss_and_acc.jpg b/hw2/report/img/models/dropout/loss_and_acc.jpg new file mode 100644 index 0000000..d6db9a6 Binary files /dev/null and b/hw2/report/img/models/dropout/loss_and_acc.jpg differ diff --git a/hw2/report/img/models/stn/loss_and_acc.jpg b/hw2/report/img/models/stn/loss_and_acc.jpg new file mode 100644 index 0000000..52be00c Binary files /dev/null and b/hw2/report/img/models/stn/loss_and_acc.jpg differ diff --git a/hw2/report/img/stn/stn.jpg b/hw2/report/img/stn/stn.jpg new file mode 100644 index 0000000..d38e164 Binary files /dev/null and b/hw2/report/img/stn/stn.jpg differ diff --git a/hw2/report/img/tsne/tsne.jpg b/hw2/report/img/tsne/tsne.jpg new file mode 100644 index 0000000..d083d3e Binary files /dev/null and b/hw2/report/img/tsne/tsne.jpg differ diff --git a/hw2/report/main.tex b/hw2/report/main.tex index d3801d4..4daa941 100644 --- a/hw2/report/main.tex +++ b/hw2/report/main.tex @@ -10,39 +10,48 @@ \usepackage{enumitem} \usepackage{graphicx} \usepackage{listings} -\usepackage{color} +\usepackage{fontspec} +\usepackage{xcolor} +\usepackage{float} +% \usepackage{color} + +\newfontfamily\codefont[Ligatures=ResetAll]{Fira Code}[Contextuals={Alternate}] +\newfontfamily\cascadia{Cascadia Code} \lstset{ - basicstyle = \sffamily, % 基本代码风格 - keywordstyle = \bfseries, % 关键字风格 - commentstyle = \rmfamily\itshape, % 注释的风格,斜体 - stringstyle = \ttfamily, % 字符串风格 - flexiblecolumns, % 别问为什么,加上这个 - numbers = left, % 行号的位置在左边 - showspaces = false, % 是否显示空格,显示了有点乱,所以不现实了 - numberstyle = \zihao{-5}\ttfamily, % 行号的样式,小五号,tt等宽字体 + basicstyle = \small\codefont, + % --- + tabsize = 4, showstringspaces = false, - captionpos = t, % 这段代码的名字所呈现的位置,t指的是top上面 - frame = lrtb, % 显示边框 + numbers = left, + numberstyle = \codefont, + % --- + breaklines = true, + captionpos = t, + % --- + frame = l, + flexiblecolumns, } \lstdefinestyle{Python}{ language = Python, % 语言选Python - basicstyle = \zihao{-5}\ttfamily, - numberstyle = \zihao{-5}\ttfamily, keywordstyle = \color{blue}, keywordstyle = [2] \color{teal}, - stringstyle = \color{magenta}, - commentstyle = \color{red}\ttfamily, - breaklines = true, % 自动换行,建议不要写太长的行 - columns = fixed, % 如果不加这一句,字间距就不固定,很丑,必须加 - basewidth = 0.5em, + stringstyle = \color{orange!80!black}, + commentstyle = \color{red}, + identifierstyle = \color{blue!80!white}, +} + +\lstdefinestyle{Bash}{ + language = bash } \usepackage{subcaption} \usepackage{booktabs} % toprule \usepackage[mathcal]{eucal} \usepackage[thehwcnt = 2]{iidef} +\allowdisplaybreaks + \thecourseinstitute{清华大学电子工程系} \thecoursename{\textbf{媒体与认知} \space 课堂2} \theterm{2023-2024学年春季学期} @@ -54,13 +63,13 @@ \centerline{\textbf{\Large{理论部分}}} \section{单选题(15分)} -\subsection{\underline{A}} +\subsection{\underline{C}} \subsection{\underline{D}} \subsection{\underline{D}} -\subsection{\underline{D}} +\subsection{\underline{C}} \subsection{\underline{B}} @@ -118,57 +127,58 @@ W=\left[ \begin{array}{cc} \begin{align*} \frac{\partial L}{\partial X} & = \begin{bmatrix} - 0.3 & 0.1 & 0\\ - -0.4 & 0.2 & 0\\ + 0.1 & -0.2 & 0\\ + -0.3 & 0.4 & 0\\ 0 & 0 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{11}} + \begin{bmatrix} - 0 & 0.3 & 0.1\\ - 0 & -0.4 & 0.2\\ + 0 & 0.1 & -0.2\\ + 0 & -0.3 & 0.4\\ 0 & 0 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{12}}\\ & \quad + \begin{bmatrix} 0 & 0 & 0\\ - 0.3 & 0.1 & 0\\ - -0.4 & 0.2 & 0 + 0.1 & -0.2 & 0\\ + -0.3 & 0.4 & 0 \end{bmatrix} \frac{\partial L}{\partial Y_{21}} + \begin{bmatrix} 0 & 0 & 0\\ - 0 & 0.3 & 0.1\\ - 0 & -0.4 & 0.2 + 0 & 0.1 & -0.2\\ + 0 & -0.3 & 0.4 \end{bmatrix} \frac{\partial L}{\partial Y_{22}}\\ + & = \mathrm{zeropad}(W) \ast \frac{\partial L}{\partial Y}\\ & = \begin{bmatrix} - 0.09 & 0.03 & 0\\ - -0.12 & 0.06 & 0\\ + 0.03 & -0.06 & 0\\ + -0.09 & 0.12 & 0\\ 0 & 0 & 0 \end{bmatrix} + \begin{bmatrix} - 0 & 0.03 & 0.01\\ - 0 & -0.04 & 0.02\\ + 0 & 0.01 & -0.02\\ + 0 & -0.03 & 0.04\\ 0 & 0 & 0 \end{bmatrix}\\ & \quad + \begin{bmatrix} 0 & 0 & 0\\ - -0.12 & -0.04 & 0\\ - 0.16 & -0.08 & 0 + -0.04 & 0.08 & 0\\ + 0.12 & -0.16 & 0 \end{bmatrix} + \begin{bmatrix} 0 & 0 & 0\\ - 0 & 0.06 & 0.02\\ - 0 & -0.08 & 0.04 + 0 & 0.02 & -0.04\\ + 0 & -0.06 & 0.08 \end{bmatrix}\\ & = \begin{bmatrix} - 0.09 & 0.06 & 0.01\\ - -0.24 & 0.04 & 0.04\\ - 0.16 & -0.16 & 0.04 + 0.03 & -0.05 & -0.02\\ + -0.13 & 0.19 & 0\\ + 0.12 & -0.22 & 0.08 \end{bmatrix} \qedhere \end{align*} \end{proof} @@ -178,7 +188,153 @@ W=\left[ \begin{array}{cc} % 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成 \section{编程作业报告} -\section{自选课题工作进度汇报} +\subsection{探究batch normalization和dropout的作用} +\begin{enumerate} + \item 使用默认配置训练模型: + \begin{lstlisting}[style=Bash] +python train.py --ckpt_path checkpoints/default + \end{lstlisting} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/models/default/loss_and_acc.jpg} + \end{figure} + 之后测试得到的正确率为90.8\%。 + \item 启用batch normalization: + \begin{lstlisting}[style=Bash] +python train.py --ckpt_path checkpoints/bn --bn + \end{lstlisting} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/models/bn/loss_and_acc.jpg} + \end{figure} + 测试得到的正确率为95.9\%。 + \item 启用dropout并设置概率为0.3: + \begin{lstlisting}[style=Bash] +python train.py --ckpt_path checkpoints/dropout --dropout 0.3 + \end{lstlisting} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/models/dropout/loss_and_acc.jpg} + \end{figure} + 测试后得到的正确率为94.1\%。 +\end{enumerate} + +\subsection{探究数据增广的作用} +考虑到在不同的视角下,交通标志可能有旋转或者变形,因此使用 +\begin{lstlisting}[style=Python] +transforms.RandomAffine(degrees=30,shear=10) +\end{lstlisting} +来对数据进行随机的形变与旋转;另外,考虑到可能在不同的光线条件下导致对比度变化,因此使用 +\begin{lstlisting}[style=Python] +transforms.RandomAutocontrast() +\end{lstlisting} +来对数据进行随机的对比度调整。 + +执行 +\begin{lstlisting}[style=Bash] +python unit_test.py data_loader +\end{lstlisting} +得到 +\begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/augmentation.jpg} + \caption{数据增广后的结果} +\end{figure} + +训练最优模型使用的命令为 +\begin{lstlisting}[style=Bash] +python train.py --ckpt_path checkpoints/bn_aug --bn --augment --epoch 20 +\end{lstlisting} +\begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/models/bn_aug/loss_and_acc.jpg} +\end{figure} +测试得到的正确率为96.0\%,略微高于不使用数据增强时的结果。 + +\subsection{探究空间变换网络(STN)的作用} +运行 +\begin{lstlisting}[style=Bash] +python train.py --ckpt_path checkpoints/stn --bn --stn +\end{lstlisting} +\begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/models/stn/loss_and_acc.jpg} +\end{figure} +测试得到的正确率为94.6\%。正确率比不使用stn反而有所降低,可能是设计的网络结构不够理想导致的。 + +\subsection{可视化} +\begin{enumerate} + \item 可视化各层卷积核: + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/filter/filter_layer_0.jpg} + \caption{第0层的卷积核} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/filter/filter_layer_1.jpg} + \caption{第1层的卷积核} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/filter/filter_layer_2.jpg} + \caption{第2层的卷积核} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/filter/filter_layer_3.jpg} + \caption{第3层的卷积核} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/filter/filter_layer_4.jpg} + \caption{第4层的卷积核} + \end{figure} + \item 可视化各层卷积层的输出特征图 + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/feature/layer_0/feature_map.jpg} + \caption{第0层的卷积核特征图} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/feature/layer_1/feature_map.jpg} + \caption{第1层的卷积核特征图} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/feature/layer_2/feature_map.jpg} + \caption{第2层的卷积核特征图} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/feature/layer_3/feature_map.jpg} + \caption{第3层的卷积核特征图} + \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/feature/layer_4/feature_map.jpg} + \caption{第4层的卷积核特征图} + \end{figure} + \item t-SNE可视化最后一层隐藏层的输出特征 + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/tsne/tsne.jpg} + \end{figure} + t-SNE最后一层的隐藏层的输出证明,不同类别的输入已经被通过非线性变换分类到了不同的聚类。 + \item STN学习到的变换 + \begin{figure}[H] + \centering + \includegraphics[width=\linewidth]{img/stn/stn.jpg} + \end{figure} + 网络尽可能将所有的路牌都变换到了同样的倾斜角度。 +\end{enumerate} + +\section{遇到的问题与解决办法} +在自定义STN网络的时候,我最开始使用了比较小的卷积核,使得STN的效果很差,使用之后会使得正确率只有80\%;之后,我分析认为STN主要要感知整个图片的倾斜以及旋转情况,需要较大的视野,因此选择了较大的卷积核,之后得到了比较理想的效果。 + +完成作业没有使用大模型。 +% \section{自选课题工作进度汇报} \end{document} diff --git a/j.ps1 b/j.ps1 new file mode 100644 index 0000000..69a4272 --- /dev/null +++ b/j.ps1 @@ -0,0 +1 @@ +cd ./hw2/code \ No newline at end of file diff --git a/testtorch.ipynb b/testtorch.ipynb new file mode 100644 index 0000000..37d57d5 --- /dev/null +++ b/testtorch.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "\n", + "import torchvision.transforms as transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class ConvBlock(nn.Module):\n", + " def __init__(\n", + " self,\n", + " in_channels,\n", + " out_channels,\n", + " kernel_size,\n", + " stride,\n", + " padding,\n", + " use_batch_norm=False,\n", + " use_residual=False,\n", + " ):\n", + " \"\"\"\n", + " Convolutional block with batch normalization and ReLU activation\n", + " ----------------------\n", + " :param in_channels: channel number of input image\n", + " :param out_channels: channel number of output image\n", + " :param kernel_size: size of convolutional kernel\n", + " :param stride: stride of convolutional operation\n", + " :param padding: padding of convolutional operation\n", + " :param use_batch_norm: whether to use batch normalization in convolutional layers\n", + " :param use_residual: whether to use residual connection\n", + " \"\"\"\n", + " super().__init__()\n", + "\n", + " if use_batch_norm:\n", + " bn2d = nn.BatchNorm2d\n", + " else:\n", + " # use identity function to replace batch normalization\n", + " bn2d = nn.Identity\n", + "\n", + " self.use_residual = use_residual\n", + "\n", + " # >>> TODO 2.1: complete a convolutional block with batch normalization and ReLU activation\n", + " # Hint: use the `bn2d` defined above for batch normalization to adapt to the input parameter `use_batch_norm`\n", + " # Network structure:\n", + " # conv -> batchnorm -> relu\n", + " self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)\n", + " self.bn = bn2d(out_channels)\n", + " self.relu = nn.ReLU()\n", + " # <<< TODO 2.1\n", + "\n", + " def forward(self, x):\n", + " # >>> TODO 2.2: forward process\n", + " # Hint: apply residual connection if `self.use_residual` is True\n", + " out = self.relu(self.bn(self.conv(x)))\n", + " if self.use_residual:\n", + " out += x\n", + "\n", + " # <<< TODO 2.2\n", + " return out\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "in_channels = 3\n", + "dropout_prob = 0.5\n", + "conv_net = nn.Sequential(\n", + " ConvBlock(\n", + " in_channels=in_channels, out_channels=32, kernel_size=5, stride=1, padding=2\n", + " ),\n", + " ConvBlock(in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2),\n", + " nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n", + " ConvBlock(\n", + " in_channels=64,\n", + " out_channels=64,\n", + " kernel_size=3,\n", + " stride=1,\n", + " padding=1,\n", + " use_residual=True,\n", + " ),\n", + " ConvBlock(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),\n", + " nn.MaxPool2d(kernel_size=2, stride=2, padding=0),\n", + " ConvBlock(\n", + " in_channels=128,\n", + " out_channels=128,\n", + " kernel_size=3,\n", + " stride=1,\n", + " padding=1,\n", + " use_residual=True,\n", + " ),\n", + " nn.Dropout2d(p=dropout_prob),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([10, 128, 4, 4])\n", + "ConvBlock(\n", + " (conv): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))\n", + " (bn): Identity()\n", + " (relu): ReLU()\n", + ")\n" + ] + } + ], + "source": [ + "a = torch.randn(10, 3, 32, 32)\n", + "print(conv_net(a).size())\n", + "print(conv_net[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([10, 8, 16, 16])\n", + "torch.Size([10, 16, 8, 8])\n" + ] + } + ], + "source": [ + "conv_1 = ConvBlock(in_channels=3, out_channels=8, kernel_size=9, stride=2, padding=4, use_batch_norm=True)\n", + "conv_2 = ConvBlock(in_channels=8, out_channels=16, kernel_size=5, stride=2, padding=2, use_batch_norm=True)\n", + "\n", + "print(conv_1(a).size())\n", + "print(conv_2(conv_1(a)).size())\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "media_cognition", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}