Files
MediaNCognition/hw3/code/data_preprocess.py
unlockable 8b657be441 Mac Sync
2024-05-15 20:05:18 +08:00

179 lines
7.1 KiB
Python

# ========================================================
# Media and Cognition
# Homework 3 Support Vector Machine
# data_preprocess.py - Using pretrained convolutional layers to extract feature,
# and using PCA for dimensionality reduction
# Student ID:
# Name:
# Tsinghua University
# (C) Copyright 2024
# ========================================================
import os
import torchvision.transforms as transforms
import torch
from PIL import Image
from networks import Classifier
import matplotlib.pyplot as plt
import argparse
def preprocess(pre_conv, data_root, image_size, classes):
# TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution
# =============== process training dataset ======================
print("Start preprocessing the training dataset !!!")
train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)
# calculate the mean and PCA projection matrix
data_mean, u = PCA(train_data, 2)
# TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
train_data_pca = ???
visualize(train_data_pca, train_label, "train")
savedata(train_data_pca, train_label, data_root+"/train.pt")
print("training dataset saved !!!")
# =============== process validation dataset ======================
print("Start preprocessing the validation dataset!!!")
val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)
# TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
val_data_pca = ???
visualize(val_data_pca, val_label, "val")
savedata(val_data_pca, val_label, data_root+"/val.pt")
print("validation dataset saved !!!")
# =============== process testing dataset ======================
print("Start preprocessing the testing dataset!!!")
test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)
# TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
test_data_pca = ???
visualize(test_data_pca, test_label, "test")
savedata(test_data_pca, test_label, data_root+"/test.pt")
print("testing dataset saved !!!")
def savedata(data, label, save_path):
save_dict = {
'data': data,
'label': label
}
torch.save(save_dict, save_path)
def visualize(datas, labels, mode):
"""
Display feature points after dimensionality reduction
-------------------------------
:param datas: the samples after dimensionality reduction, with the shape of [N, 2]
:param labels: the labels (chosen from {-1, +1}) corresponding to the samples
:param mode: chosen from {'train', 'val', 'test'}
:return:
"""
plt.figure()
for idx in range(datas.shape[1]):
plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
plt.legend()
plt.title(mode)
plt.show()
def PCA(data, dim=2):
"""
calculate the mean value of the data and the projection matrix for PCA
:param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
:param dim: the data dimension after projection
:return:
data_mean: the mean value of the data
u: the projection matrix for PCA, with the shape of [2048, dim]
"""
# TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix
# TODO: compute the mean of train_data
data_mean = ???
# TODO: compute the covariance matrix of train_data
data_cov = ???
# TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
# reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
???
# TODO: return the proper 'data_mean' and 'u[]'
return ???
def loaddata(pre_conv, data_root, mode, image_size, classes):
"""
load one dataset, and use pretrained network in homework 2 to extract feature
:param pre_conv: pretrained network in homework 2
:param data_root: the path of the dataset
:param mode: chosen from {'train', 'val', 'test'}
:param image_size: the preset size that each image try to zoom to
:param classes: two classes that need to be classified
:return:
datas: the samples of extracted features with the shape of [N, 2048]
labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
"""
assert len(classes) == 2
datas = []
labels = []
for idx in range(len(classes)):
for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
label = 2 * idx - 1
datas.append(data)
labels.append(label)
return torch.stack(datas), torch.tensor(labels)
def readimg(pre_conv, filepath, image_size):
"""
Read one image and use pretrained network to extract the feature
--------------------------
:param pre_conv: pretrained network in homework 2
:param filepath: the file path of one image
:param image_size: the preset size that each image try to zoom to
:return:
data: the extracted feature with the length of 2048
"""
img_pil = Image.open(filepath).convert('RGB')
img_pil = img_pil.resize(image_size)
img_transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize(0.5, 0.5),
])
img_tensor = img_transform(img_pil)
data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)
return data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
help="the filepath of the pretrained network in homework 2")
parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
parser.add_argument("--image_size", type=tuple, default=(32, 32),
help="the preset size that each image try to zoom to")
parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")
args = parser.parse_args()
pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
configs = pretrained_checkpoint["configs"]
cls = Classifier(
configs["in_channels"],
configs["num_classes"],
configs["use_batch_norm"],
configs["use_stn"],
configs["dropout_prob"],
)
cls.load_state_dict(pretrained_checkpoint["model_state"])
for param in cls.parameters():
param.requires_grad = False
conv = cls.conv_net
preprocess(conv, args.data_root, args.image_size, args.classes)