165 lines
5.4 KiB
Python
165 lines
5.4 KiB
Python
#========================================================
|
|
# Media and Cognition
|
|
# Homework 1 Neural network basics
|
|
# activations.py - activation functions
|
|
# Student ID: 2022010639
|
|
# Name: Gao Yixuan
|
|
# Tsinghua University
|
|
# (C) Copyright 2024
|
|
#========================================================
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
'''
|
|
In this script we will implement three activation functions, including both forward and backward processes.
|
|
More details about customizing a backward process in PyTorch can be found in:
|
|
https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
|
|
'''
|
|
|
|
## Here, Tanh is given as an example to show how to construct the activation function. Please finish the activation functions of Sigmoid and ReLU later.
|
|
class Tanh(torch.autograd.Function):
|
|
'''
|
|
Tanh activation function
|
|
y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
|
|
'''
|
|
# static method of a python class means that we can call the function without initializing an instance of the class
|
|
@staticmethod
|
|
def forward(ctx, x):
|
|
'''
|
|
In the forward pass we receive a Tensor containing the input x and return
|
|
a Tensor containing the output.
|
|
|
|
ctx: it is a context object that can be used to save information for backward computation. You can save
|
|
objects by using ctx.save_for_backward, and get objects by using ctx.saved_tensors
|
|
|
|
x: input with arbitrary shape
|
|
'''
|
|
# Please think if we use "y = (exp(x) - exp(-x)) / (exp(x) + exp(-x))", what might happen when x has a large absolute value
|
|
# y = (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
|
|
|
|
# here we directly use torch.tanh(x) to avoid the problem above
|
|
y = torch.tanh(x)
|
|
|
|
# save an variable in ctx
|
|
ctx.save_for_backward(y)
|
|
|
|
return y
|
|
|
|
@staticmethod
|
|
def backward(ctx, grad_output):
|
|
"""
|
|
In the backward pass we receive a Tensor containing the gradient of the loss
|
|
with respect to the output, and we need to compute the gradient of the loss
|
|
with respect to the input.
|
|
|
|
grad_output: dL/dy
|
|
grad_input: dL/dx = dL/dy * dy/dx, where y = forward(x)
|
|
"""
|
|
# get an variable from ctx
|
|
y, = ctx.saved_tensors
|
|
|
|
# chain rule: dL/dx = dL/dy * dy/dx
|
|
# where dL/dy = grad_output, and the dy/dx of tanh function is (1-y^2)!
|
|
grad_input = grad_output * (1 - y ** 2)
|
|
|
|
return grad_input
|
|
|
|
#TODO 1: complete the forward and backward functions of the Sigmoid activation function.
|
|
#Note: You can refer to the activation function Tanh
|
|
class Sigmoid(torch.autograd.Function):
|
|
'''
|
|
Sigmoid activation function
|
|
y = 1 / (1 + exp(-x))
|
|
'''
|
|
|
|
@staticmethod
|
|
def forward(ctx, x):
|
|
|
|
# hint: you can use torch.exp(x) to calculate exp(x)
|
|
y = 1 - (1 + torch.exp(-x))
|
|
|
|
# here we save y in ctx, in this way we can use y to calculate gradients in backward process
|
|
ctx.save_for_backward(y)
|
|
|
|
return y
|
|
|
|
@staticmethod
|
|
def backward(ctx, grad_output):
|
|
|
|
# get y from ctx
|
|
y, = ctx.saved_tensors
|
|
|
|
# implement gradient of x (grad_input), grad_input refers to dL/dx
|
|
# chain rule: dL/dx = dL/dy * dy/dx
|
|
# where dL/dy = grad_output, and dy/dx of Sigmoid function is y * (1 - y)
|
|
grad_input = grad_output * y * (1 - y)
|
|
|
|
return grad_input
|
|
|
|
#TODO 2: complete the forward and backward functions of the ReLU activation function.
|
|
#Note: You can refer to the activation function Tanh
|
|
class ReLU(torch.autograd.Function):
|
|
'''
|
|
ReLU activation function
|
|
y = max{x, 0}
|
|
'''
|
|
|
|
@staticmethod
|
|
def forward(ctx, x):
|
|
|
|
# set elements less than 0 in x to 0
|
|
# this operation is inplace
|
|
x = torch.max(x, torch.tensor([0.]).to(x.device))
|
|
|
|
# save x in ctx, in this way we can use x to calculate gradients in backward process
|
|
ctx.save_for_backward(x)
|
|
|
|
# return the output
|
|
return x
|
|
|
|
@staticmethod
|
|
def backward(ctx, grad_output):
|
|
"""
|
|
In the backward pass we receive a Tensor containing the gradient of the loss
|
|
with respect to the output, and we need to compute the gradient of the loss
|
|
with respect to the input.
|
|
"""
|
|
|
|
# get x from ctx
|
|
x, = ctx.saved_tensors
|
|
# print("Before heaviside")
|
|
# print(x, x.size())
|
|
x = torch.heaviside(x, torch.tensor([0.]).to(x.device))
|
|
# print("After heaviside")
|
|
# print(x, x.size())
|
|
# print(grad_output, grad_output.size())
|
|
# print(grad_output * x)
|
|
|
|
# chain rule: dL/dx = dL/dy * dy/dx
|
|
# where dL/dy = grad_output, and dy/dx of ReLU function is 1 if x > 0, and 0 if x <= 0
|
|
grad_input = grad_output * x
|
|
|
|
return grad_input
|
|
|
|
|
|
# activate function class according to the type
|
|
class Activation(nn.Module):
|
|
def __init__(self, type):
|
|
'''
|
|
:param type: 'sigmoid', 'tanh', or 'relu'
|
|
'''
|
|
super().__init__()
|
|
|
|
if type == 'sigmoid':
|
|
self.act = Sigmoid.apply
|
|
elif type == 'tanh':
|
|
self.act = Tanh.apply
|
|
elif type == 'relu':
|
|
self.act = ReLU.apply
|
|
else:
|
|
print('activation type should be one of [sigmoid, tanh, relu]')
|
|
raise NotImplementedError
|
|
|
|
def forward(self, x):
|
|
return self.act(x)
|