MediaNCognition/hw3/code/data_preprocess.py

# ========================================================
#             Media and Cognition
#             Homework 3 Support Vector Machine
#             data_preprocess.py - Using pretrained convolutional layers to extract feature,
#                                   and using PCA for dimensionality reduction
#             Student ID: 2022010639
#             Name: Yixuan Gao
#             Tsinghua University
#             (C) Copyright 2024
# ========================================================

import os
import torchvision.transforms as transforms
import torch
from PIL import Image
from networks import Classifier
import matplotlib.pyplot as plt
import argparse


def preprocess(pre_conv, data_root, image_size, classes):
    # TODO 1: Using PCA to reduce the dimensionality of 2048 point features extracted by convolution

    # ===============  process training dataset ======================
    print("Start preprocessing the training dataset !!!")
    train_data, train_label = loaddata(pre_conv, data_root, 'train', image_size, classes)

    # calculate the mean and PCA projection matrix
    data_mean, u = PCA(train_data, 2)

    u = u * 20

    # TODO: using PCA to compress the dimensionality of the train_data after subtracting the mean vector
    train_data_pca = (train_data - data_mean) @ u

    visualize(train_data_pca, train_label, "train")
    savedata(train_data_pca, train_label, data_root+"/train.pt")
    print("training dataset saved !!!")

    # ===============  process validation dataset ======================
    print("Start preprocessing the validation dataset!!!")
    val_data, val_label = loaddata(pre_conv, data_root, 'val', image_size, classes)

    # TODO: using PCA to compress the dimensionality of the val_data after subtracting the mean vector
    val_data_pca = (val_data - data_mean) @ u

    visualize(val_data_pca, val_label, "val")
    savedata(val_data_pca, val_label, data_root+"/val.pt")
    print("validation dataset saved !!!")

    # ===============  process testing dataset ======================
    print("Start preprocessing the testing dataset!!!")
    test_data, test_label = loaddata(pre_conv, data_root, 'test', image_size, classes)

    # TODO: using PCA to compress the dimensionality of the test_data after subtracting the mean vector
    test_data_pca = (test_data - data_mean) @ u

    visualize(test_data_pca, test_label, "test")
    savedata(test_data_pca, test_label, data_root+"/test.pt")
    print("testing dataset saved !!!")


def savedata(data, label, save_path):
    save_dict = {
        'data': data,
        'label': label
    }
    torch.save(save_dict, save_path)


def visualize(datas, labels, mode):
    """
    Display feature points after dimensionality reduction
    -------------------------------
    :param datas: the samples after dimensionality reduction, with the shape of [N, 2]
    :param labels: the labels (chosen from {-1, +1}) corresponding to the samples
    :param mode: chosen from {'train', 'val', 'test'}
    :return:
    """
    plt.figure()
    for idx in range(datas.shape[1]):
        plt.scatter(datas[labels == 2*idx-1, 0], datas[labels == 2*idx-1, 1], label=(2*idx-1))
    plt.legend()
    plt.title(mode)
    plt.show()


def PCA(data, dim=2):
    """
    calculate the mean value of the data and the projection matrix for PCA
    :param data: the sample features extracted by the pretrained network in homework2, with the shape of [N, 2048]
    :param dim: the data dimension after projection
    :return:
        data_mean: the mean value of the data
        u: the projection matrix for PCA, with the shape of [2048, dim]
    """
    # TODO 2: complete the algorithm of PCA, calculate the mean value of the data and the projection matrix

    # TODO: compute the mean of train_data
    data_mean = data.mean(dim=0)
    # TODO: compute the covariance matrix of train_data
    diff = data - data_mean
    # data_cov = diff.T @ diff
    data_cov = torch.cov(diff.T)
    # TODO: compute the SVD decompositon of data_cov using torch.linalg.svd
    # reference: https://pytorch.org/docs/1.11/generated/torch.linalg.svd.html
    u, s, v = torch.linalg.svd(data_cov)
    # TODO: return the proper 'data_mean' and 'u[]'
    return data_mean, u[:, :dim]

def loaddata(pre_conv, data_root, mode, image_size, classes):
    """
    load one dataset, and use pretrained network in homework 2 to extract feature
    :param pre_conv: pretrained network in homework 2
    :param data_root: the path of the dataset
    :param mode: chosen from {'train', 'val', 'test'}
    :param image_size: the preset size that each image try to zoom to
    :param classes: two classes that need to be classified
    :return:
        datas: the samples of extracted features with the shape of [N, 2048]
        labels: the corresponding labels for each sample (chosen from {-1, +1}), with the shape of [N]
    """
    assert len(classes) == 2
    datas = []
    labels = []
    for idx in range(len(classes)):
        for img in os.listdir(data_root + '/' + mode + '/' + classes[idx]):
            data = readimg(pre_conv, data_root + '/' + mode + '/' + classes[idx] + '/' + img, image_size)
            label = 2 * idx - 1
            datas.append(data)
            labels.append(label)
    return torch.stack(datas), torch.tensor(labels)


def readimg(pre_conv, filepath, image_size):
    """
    Read one image and use pretrained network to extract the feature
    --------------------------
    :param pre_conv: pretrained network in homework 2
    :param filepath: the file path of one image
    :param image_size: the preset size that each image try to zoom to
    :return:
        data: the extracted feature with the length of 2048
    """
    img_pil = Image.open(filepath).convert('RGB')
    img_pil = img_pil.resize(image_size)
    img_transform = transforms.Compose([transforms.ToTensor(),
                                        transforms.Normalize(0.5, 0.5),
                                        ])
    img_tensor = img_transform(img_pil)
    data = pre_conv(img_tensor.unsqueeze(0)).reshape(-1)

    return data


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--pretrained_net", type=str, default="checkpoints/bn/ckpt_epoch_15.pth",
                        help="the filepath of the pretrained network in homework 2")
    parser.add_argument("--data_root", type=str, default="data", help="the path of all datasets")
    parser.add_argument("--image_size", type=tuple, default=(32, 32),
                        help="the preset size that each image try to zoom to")
    parser.add_argument("--classes", default=["B", "C"], help="two classes that need to be classified")

    args = parser.parse_args()

    pretrained_checkpoint = torch.load(args.pretrained_net, map_location="cpu")
    configs = pretrained_checkpoint["configs"]
    cls = Classifier(
        configs["in_channels"],
        configs["num_classes"],
        configs["use_batch_norm"],
        configs["use_stn"],
        configs["dropout_prob"],
    )
    cls.load_state_dict(pretrained_checkpoint["model_state"], strict=False)
    for param in cls.parameters():
        param.requires_grad = False
    conv = cls.conv_net

    preprocess(conv, args.data_root, args.image_size, args.classes)