Files
MediaNCognition/hw1/HW1-code/network.py
unlockable 8b657be441 Mac Sync
2024-05-15 20:05:18 +08:00

157 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#========================================================
# Media and Cognition
# Homework 1 Neural network basics
# network.py - linear layer and MLP network
# Student ID: 2022010639
# Name: Gao Yixuan
# Tsinghua University
# (C) Copyright 2024
#========================================================
import torch
import torch.nn as nn
from activations import Activation
'''
In this script we will implement our Linear layer and MLP network.
For the linear layer, we will provide a sample of codes which calculate both the forward and backward processes by our own.
More details about customizing a backward process can be found in:
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
For the MLP network, you should cascade the linear layers and activation functions in a proper way in the __init__ function and implement the forward function.
'''
class LinearFunction(torch.autograd.Function):
'''
we will implement the linear function:
y = xW^T + b
as well as its gradient computation process
'''
@staticmethod
def forward(ctx, x, W, b):
'''
Input:
:param ctx: a context object that can be used to stash information for backward computation
:param x: input features with size [batch_size, input_size]
:param W: weight matrix with size [output_size, input_size]
:param b: bias with size [output_size]
Return:
y :output features with size [batch_size, output_size]
'''
# print(x, x.size(), x.dtype)
# print(W.T, W.T.size(), W.T.dtype)
# print(x.device, W.T.device)
y = torch.matmul(x, W.T) + b
ctx.save_for_backward(x, W)
return y
@staticmethod
def backward(ctx, grad_output):
'''
Input:
:param ctx: a context object with saved variables
:param grad_output: dL/dy, with size [batch_size, output_size]
Return:
grad_input: dL/dx, with size [batch_size, input_size]
grad_W: dL/dW, with size [output_size, input_size], summed for data in the batch
grad_b: dL/db, with size [output_size], summed for data in the batch
'''
x, W = ctx.saved_variables
# calculate dL/dx by using dL/dy (grad_output) and W, e.g., dL/dx = dL/dy*W
# calculate dL/dW by using dL/dy (grad_output) and x
# calculate dL/db using dL/dy (grad_output)
# you can use torch.matmul(A, B) to compute matrix product of A and B
grad_input = torch.matmul(grad_output, W)
grad_W = torch.matmul(grad_output.T, x)
grad_b = grad_output.sum(0)
return grad_input, grad_W, grad_b
class Linear(nn.Module):
def __init__(self, input_size, output_size):
'''
A linear layer which uses our own LinearFunction implemented above.
-----------------------------------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
'''
super(Linear, self).__init__()
W = torch.randn(output_size, input_size).float()
b = torch.zeros(output_size).float()
self.W = nn.Parameter(W, requires_grad=True)
self.b = nn.Parameter(b, requires_grad=True)
def forward(self, x):
# here we call the LinearFunction we implement above
return LinearFunction.apply(x, self.W, self.b)
class MLP(nn.Module):
def __init__(self, input_size, output_size, hidden_size, n_layers, act_type):
'''
Multilayer Perceptron
----------------------
:param input_size: dimension of input features
:param output_size: dimension of output features
:param hidden_size: a list containing hidden size for each hidden layer
:param n_layers: number of layers
:param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
'''
# TODO 1: initialize the parent class nn.Module
super(MLP, self).__init__()
# total layer number should be hidden layer number + 1 (output layer)
# print(hidden_size, n_layers)
assert len(hidden_size) + 1 == n_layers, 'total layer number should be hidden layer number + 1'
# TODO 2complete the network structures
# instantiate the activation function by using the defined classes in activations.py
self.act = Activation(act_type)
# initialize a list to save layers
layers = nn.ModuleList()
if n_layers == 1:
# append a linear layer into the module list
# if n_layers == 1, MLP degenerates to a single linear layer
layers.append(Linear(input_size, output_size))
# MLP with at least 2 layers
else:
# construct the hidden layers and add them to the module list
# a hidden layer of MLP consists of a linear layer and an activation function
in_size = input_size
for i in range(n_layers - 1):
layer = Linear(in_size, hidden_size[i])
layers.append(layer) # append the linear layer into the module list
layers.append(self.act)
in_size = hidden_size[i] # update in_size for the next layer
# initialize the output layer and append the layer into the module list
# hint: what is the output size of the output layer?
layers.append(Linear(hidden_size[-1], output_size))
# Use nn.Sequential to get the neural network
self.network = torch.nn.Sequential()
for layer in layers:
self.network.append(layer)
def forward(self, x):
'''
Define the forward function
:param x: input features with size [batch_size, input_size]
:return: output features with size [batch_size, output_size]
'''
# TODO 3: implement the forward propagation of the MLP
out = self.network(x)
return out