TA Release homework4.
This commit is contained in:
104
hw4/code/attnvis.ipynb
Normal file
104
hw4/code/attnvis.ipynb
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"from contextlib import nullcontext\n",
|
||||||
|
"import torch\n",
|
||||||
|
"from model import GPTConfig, GPT\n",
|
||||||
|
"from bertviz import head_view\n",
|
||||||
|
"from dataset import Converter, LMDataset\n",
|
||||||
|
"\n",
|
||||||
|
"# set random seed for reproducibility\n",
|
||||||
|
"seed = 2024\n",
|
||||||
|
"torch.manual_seed(seed)\n",
|
||||||
|
"torch.cuda.manual_seed(seed)\n",
|
||||||
|
"torch.cuda.manual_seed_all(seed)\n",
|
||||||
|
"torch.backends.cudnn.deterministic = True\n",
|
||||||
|
"torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul\n",
|
||||||
|
"torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn\n",
|
||||||
|
"\n",
|
||||||
|
"#################################################\n",
|
||||||
|
"# \n",
|
||||||
|
"model_name = 'mygpt'\n",
|
||||||
|
"ckpt_path = 'workdirs/quansongci'\n",
|
||||||
|
"data_root = 'data/quansongci'\n",
|
||||||
|
"vis_text_path = 'data/vis/vis_1.txt'\n",
|
||||||
|
"#################################################\n",
|
||||||
|
"\n",
|
||||||
|
"device = 'cpu'\n",
|
||||||
|
"\n",
|
||||||
|
"dataset = LMDataset(data_root, 'train')\n",
|
||||||
|
"converter = Converter(dataset.stoi, dataset.itos)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"with open(vis_text_path, 'r', encoding='utf-8') as f:\n",
|
||||||
|
" start = f.read()\n",
|
||||||
|
"start_ids = converter.single_encode(start)\n",
|
||||||
|
"start_texts = [c for c in start]\n",
|
||||||
|
"x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])\n",
|
||||||
|
"print(f\"Input texts: {start}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c0792738",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# model\n",
|
||||||
|
"dtype = 'float16' # 'float32' or 'bfloat16' or 'float16'\n",
|
||||||
|
"ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]\n",
|
||||||
|
"ctx = nullcontext() if device == 'cpu' else torch.autocast(device_type=device, dtype=ptdtype)\n",
|
||||||
|
"# init from a model saved in a specific directory\n",
|
||||||
|
"ckpt_path = os.path.join(ckpt_path, 'best.pth')\n",
|
||||||
|
"print(\"loading model params from %s\"%ckpt_path)\n",
|
||||||
|
"checkpoint = torch.load(ckpt_path, map_location=device)\n",
|
||||||
|
"gptconf = GPTConfig[model_name]\n",
|
||||||
|
"if 'model_args' in checkpoint:\n",
|
||||||
|
" gptconf = checkpoint['model_args']\n",
|
||||||
|
"model = GPT(**gptconf)\n",
|
||||||
|
"state_dict = checkpoint['state_dict']\n",
|
||||||
|
"model.load_state_dict(state_dict)\n",
|
||||||
|
"\n",
|
||||||
|
"model.eval()\n",
|
||||||
|
"model.to(device)\n",
|
||||||
|
"\n",
|
||||||
|
"# run generation\n",
|
||||||
|
"with torch.no_grad():\n",
|
||||||
|
" with ctx:\n",
|
||||||
|
" _, attn_weights = model(x)\n",
|
||||||
|
"\n",
|
||||||
|
"head_view(attn_weights, start_texts)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
13426
hw4/code/data/quansongci/data.json
Normal file
13426
hw4/code/data/quansongci/data.json
Normal file
File diff suppressed because it is too large
Load Diff
2
hw4/code/data/vis/vis_1.txt
Normal file
2
hw4/code/data/vis/vis_1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
+++如梦令
|
||||||
|
昨夜雨疏风骤。浓睡不消残酒。试问卷帘人,却道海棠依旧。知否。知否。应是绿肥红瘦。
|
||||||
3
hw4/code/data/vis/vis_2.txt
Normal file
3
hw4/code/data/vis/vis_2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
+++鹧鸪天(秋思)
|
||||||
|
红蓼烟收尽暮鸦。秋风吹叶荻花秋。断肠天色连云散,黄叶荻花秋水流。
|
||||||
|
楼上角,笛声悠。兴王莫上叹人头。明朝归去无消息,只有当时一望流。
|
||||||
75
hw4/code/dataset.py
Normal file
75
hw4/code/dataset.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
import torch
|
||||||
|
from torch.utils.data import Dataset
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
class LMDataset(Dataset):
|
||||||
|
def __init__(self, data_dir, split):
|
||||||
|
super().__init__()
|
||||||
|
# load the data
|
||||||
|
with open(os.path.join(data_dir, '%s.json'%split), 'r', encoding='utf-8') as f:
|
||||||
|
meta = json.load(f)
|
||||||
|
|
||||||
|
self.data = meta['data'] # list of samples
|
||||||
|
self.stoi = meta['stoi'] # a dict that maps character to integer
|
||||||
|
self.itos = meta['itos'] # a dict that maps string of integer to character
|
||||||
|
self.vocab_size = meta['vocab_size'] # vocab size
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
return self.data[index]
|
||||||
|
|
||||||
|
class Converter:
|
||||||
|
'''
|
||||||
|
This class helps us convert strings to integers and back
|
||||||
|
We use "0" to denote the padding character '<pad>', and "1" to denote the end of the sequence '<eos>'
|
||||||
|
'''
|
||||||
|
def __init__(self, stoi, itos):
|
||||||
|
self.stoi = stoi # a dict that maps character to integer
|
||||||
|
self.itos = itos # a dict that maps string of integer to character
|
||||||
|
|
||||||
|
def single_encode(self, s):
|
||||||
|
l = [] # initialize an empty list
|
||||||
|
for i in s:
|
||||||
|
l.append(self.stoi[i])
|
||||||
|
# transform the list into a numpy array
|
||||||
|
l = np.array(l, dtype=np.int64)
|
||||||
|
return l
|
||||||
|
|
||||||
|
def single_decode(self, l):
|
||||||
|
s = '' # initialize an empty string
|
||||||
|
for i in l:
|
||||||
|
# if we meet the end of the sequence (the value of integer is equal to 1), break
|
||||||
|
if i == 1:
|
||||||
|
break
|
||||||
|
# convert string of the integer into a character
|
||||||
|
s += self.itos[str(i)]
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def encode(self, data):
|
||||||
|
'''
|
||||||
|
encode a list of strings into integers
|
||||||
|
'''
|
||||||
|
lens = [len(s) for s in data]
|
||||||
|
max_len = max(lens)
|
||||||
|
out = np.zeros((len(data), max_len+1), dtype=np.int64)
|
||||||
|
for i,s in enumerate(data):
|
||||||
|
out[i,:len(s)] = self.single_encode(s)
|
||||||
|
out[i,len(s)] = 1
|
||||||
|
x = torch.from_numpy(out[:,:-1])
|
||||||
|
y = torch.from_numpy(out[:,1:])
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def decode(self, data):
|
||||||
|
'''
|
||||||
|
decode a list of integers into strings
|
||||||
|
'''
|
||||||
|
data = data.cpu().numpy().astype(np.int64)
|
||||||
|
out = []
|
||||||
|
for i in range(len(data)):
|
||||||
|
out.append(self.single_decode(data[i]))
|
||||||
|
return out
|
||||||
356
hw4/code/model.py
Normal file
356
hw4/code/model.py
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
# ========================================================
|
||||||
|
# Media and Cognition
|
||||||
|
# Homework 4 Sequence Modeling
|
||||||
|
# model.py - Model definition
|
||||||
|
# Student ID:
|
||||||
|
# Name:
|
||||||
|
# Tsinghua University
|
||||||
|
# (C) Copyright 2024
|
||||||
|
# ========================================================
|
||||||
|
|
||||||
|
|
||||||
|
# Import required libraries
|
||||||
|
############################################################
|
||||||
|
import math
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# Define the GELU activation function used in OpenAI GPT
|
||||||
|
############################################################
|
||||||
|
def gelu(z):
|
||||||
|
"""
|
||||||
|
Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
|
||||||
|
0.5z(1 + tanh[(2/π)^(1/2) * (z + 0.044715 z^3)])
|
||||||
|
"""
|
||||||
|
return 0.5 * z * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (z + 0.044715 * torch.pow(z, 3.0))))
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# Define the Multi-Head SelfAttention module
|
||||||
|
############################################################
|
||||||
|
class SelfAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, embed_dim, num_head, dropout):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# define there linear layers for q, k, v generation separately
|
||||||
|
self.q_layer = nn.Linear(embed_dim, embed_dim)
|
||||||
|
self.k_layer = nn.Linear(embed_dim, embed_dim)
|
||||||
|
self.v_layer = nn.Linear(embed_dim, embed_dim)
|
||||||
|
|
||||||
|
# define the projection layer for output
|
||||||
|
self.proj_layer = nn.Linear(embed_dim, embed_dim)
|
||||||
|
|
||||||
|
# define the dropout layer for attention and output calculation
|
||||||
|
self.attn_drop = nn.Dropout(dropout)
|
||||||
|
self.proj_drop = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
self.num_head = num_head
|
||||||
|
self.head_dim = embed_dim // num_head
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
|
||||||
|
batch_size, seq_len, dim = x.shape
|
||||||
|
|
||||||
|
# >>> TODO 1: complete the forward process of the Multi-Head SelfAttention module.
|
||||||
|
# Step 1.1: obtain q, k, v via self.q_layer(), self.k_layer(), self.v_layer() respectively.
|
||||||
|
# the shape of q, k, v: (batch_size, seq_len, num_heads * head_dim)
|
||||||
|
# num_heads denotes the number of heads in multi-head attention, head_dim denotes the dimension of each head
|
||||||
|
q = ???
|
||||||
|
k = ???
|
||||||
|
v = ???
|
||||||
|
|
||||||
|
# Step 1.2: in order to calculate multi-head attention in parallel, reshape q, k, v first.
|
||||||
|
# first use `Tensor.reshape()` to reshape q, k, v to: (batch_size, seq_len, num_heads, head_dim)
|
||||||
|
q = ???
|
||||||
|
k = ???
|
||||||
|
v = ???
|
||||||
|
|
||||||
|
# then use `Tensor.transpose()` or `Tensor.permute()` to exchange the dim of q, k, v for matrix multiplication
|
||||||
|
# the shape of q, k, v from (batch_size, seq_len, num_heads, head_dim) to (batch_size, num_heads, seq_len, head_dim)
|
||||||
|
q = ???
|
||||||
|
k = ???
|
||||||
|
v = ???
|
||||||
|
|
||||||
|
# Step 1.3: calculate multi-head attention in parallel: Attention(q, k, v) = softmax(qk^T / sqrt(head_dim)) v
|
||||||
|
# Step 1.3.1: do matrix multiplication via `torch.matmul()`: attn = qk^T / sqrt(head_dim)
|
||||||
|
# the shape of `attn`: (batch_size, num_heads, seq_len, seq_len)
|
||||||
|
attn = ???
|
||||||
|
|
||||||
|
# Step 1.3.2: for Auto-Regressive Language Model, the predictions for position i can depend only on the input at positions less than i.
|
||||||
|
# Therefore, a mask is used to prevent positions from attending to subsequent positions
|
||||||
|
# attn_mask = (0,1,...,1; 0,0,1,...,0; ...; 0,...,0) is a upper triangular matrix with shape (seq_len, seq_len)
|
||||||
|
# Hint:
|
||||||
|
# use torch.ones(?, device=x.device) to generate a matrix filled with value 1 with shape (seq_len, seq_len)
|
||||||
|
attn_mask = ???
|
||||||
|
# use torch.triu(?, diagonal=1) to obtain a upper triangular matrix where elements on the main diagonal are 0
|
||||||
|
attn_mask = ???
|
||||||
|
# use Tensor.bool() to convert the matrix to a boolean matrix
|
||||||
|
attn_mask = ???
|
||||||
|
# fill the position of `attn` where `attn_mask==True` with value `float('-inf')` via `Tensor.masked_fill()`
|
||||||
|
attn = ???
|
||||||
|
|
||||||
|
# Step 1.3.3: normalize `attn` via softmax funtion: attn = Softmax(attn) = Softmax(qk^T / sqrt(head_dim))
|
||||||
|
attn = ???
|
||||||
|
# Step 1.3.4: apply dropout to `attn` via self.attn_drop()
|
||||||
|
attn = ???
|
||||||
|
# Step 1.3.5: multiply v by `attn` via torch.matmul(): out = Attention(q, k, v) = attn v
|
||||||
|
# the shape of `out`: (batch_size, num_heads, seq_len, head_dim)
|
||||||
|
out = ???
|
||||||
|
|
||||||
|
# Step 1.4: use `Tensor.transpose()` and `Tensor.reshape()' to concatenate output of different heads
|
||||||
|
# the shape of `out` from (batch_size, num_heads, seq_len, head_dim) to (batch_size, seq_len, num_heads*head_dim)
|
||||||
|
out = ???
|
||||||
|
|
||||||
|
# Step 1.5: obtain the final results via self.proj_layer() and self.proj_drop(): result = Dropout(MultiHead(Q, K, V)) = Dropout(out W^O)
|
||||||
|
result = ???
|
||||||
|
# <<< TODO 1
|
||||||
|
|
||||||
|
# return the final results `result` and attention weights `attn`
|
||||||
|
return result, attn
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# Define the feed forward network (FFN)
|
||||||
|
############################################################
|
||||||
|
class FFN(nn.Module):
|
||||||
|
def __init__(self, embed_dim, feedforward_dim, dropout):
|
||||||
|
super().__init__()
|
||||||
|
self.fc1 = nn.Linear(embed_dim, feedforward_dim)
|
||||||
|
self.fc2 = nn.Linear(feedforward_dim, embed_dim)
|
||||||
|
self.drop = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = gelu(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# Define the TransformerLayer
|
||||||
|
############################################################
|
||||||
|
class TransformerLayer(nn.Module):
|
||||||
|
def __init__(self, embed_dim, num_head, feedforward_dim, dropout, no_res):
|
||||||
|
super().__init__()
|
||||||
|
self.norm1 = nn.LayerNorm(embed_dim)
|
||||||
|
self.attn = SelfAttention(embed_dim, num_head, dropout)
|
||||||
|
self.norm2 = nn.LayerNorm(embed_dim)
|
||||||
|
self.ffn = FFN(embed_dim, feedforward_dim, dropout)
|
||||||
|
self.no_res = no_res # whether to use residual connection
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# >>> TODO 2: complete the forward process of the TransformerLayer module.
|
||||||
|
# Step 2.1: calculate the output of multi-head self-attention
|
||||||
|
# normalize the input via `self.norm1()`: x_norm = LayerNorm(x)
|
||||||
|
x_norm = ???
|
||||||
|
|
||||||
|
# calculate the output of multi-head self-attention via `self.attn()`: x_attn, attn = SelfAttention(x_norm)
|
||||||
|
x_attn, attn = ???
|
||||||
|
|
||||||
|
# add the input 'x' to the output of attention (x_attn) if self.no_res is False: x_attn = x + x_attn if no_res is False else x_attn
|
||||||
|
if ???:
|
||||||
|
x_attn = ???
|
||||||
|
|
||||||
|
# Step 2.2: calculate the output of feed forward network
|
||||||
|
# calculate the output of feed forward network via `self.ffn()` and `self.norm2()`: x_ffn = FFN(LayerNorm(x_attn))
|
||||||
|
x_ffn = ???
|
||||||
|
|
||||||
|
# add the output of attention (x_attn) to the output of feed forward network (x_ffn) if self.no_res is False: out = x_attn + x_ffn if no_res is False else x_ffn
|
||||||
|
if ???:
|
||||||
|
out = ???
|
||||||
|
else:
|
||||||
|
out = ???
|
||||||
|
# <<< TODO 2
|
||||||
|
|
||||||
|
return out, attn
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# Define the GPT module
|
||||||
|
############################################################
|
||||||
|
class GPT(nn.Module):
|
||||||
|
def __init__(self, vocab_size, max_seq_len, num_layer, embed_dim, num_head, feedforward_dim, dropout, no_res=False, no_pos=False):
|
||||||
|
'''
|
||||||
|
vocab_size: the size of vocabulary
|
||||||
|
max_seq_len: the maximum length of input texts
|
||||||
|
num_layer: the number of transformer layers
|
||||||
|
embed_dim: the embedding dimension
|
||||||
|
num_head: the number of heads in Multi-Head Self Attention
|
||||||
|
feedforward_dim: the dimension in the feed forward network
|
||||||
|
dropout: dropout ratio
|
||||||
|
no_res: whether to use residual connection in transformer layers
|
||||||
|
no_pos: whether to use position embeddings
|
||||||
|
'''
|
||||||
|
super().__init__()
|
||||||
|
self.num_layer = num_layer
|
||||||
|
self.max_seq_len = max_seq_len
|
||||||
|
self.no_pos = no_pos
|
||||||
|
|
||||||
|
# Define Embedding Layer to transfer input text tokens and positions to embeddings
|
||||||
|
self.word_token_embedding = nn.Embedding(vocab_size, embed_dim)
|
||||||
|
self.word_pos_embedding = nn.Embedding(max_seq_len, embed_dim)
|
||||||
|
|
||||||
|
self.drop = nn.Dropout(dropout)
|
||||||
|
# Define the transformer layers
|
||||||
|
self.transformer = nn.ModuleList([TransformerLayer(embed_dim, num_head, feedforward_dim, dropout, no_res) for _ in range(num_layer)])
|
||||||
|
|
||||||
|
# Define the head layer to predict output
|
||||||
|
self.norm = nn.LayerNorm(embed_dim)
|
||||||
|
self.language_model_head = nn.Linear(embed_dim, vocab_size, bias=False)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Weight tying improves the performance of language models by tying (sharing) the weights of the embedding and softmax layers.
|
||||||
|
Reference: https://paperswithcode.com/method/weight-tying
|
||||||
|
"""
|
||||||
|
self.word_token_embedding.weight = self.language_model_head.weight
|
||||||
|
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Linear):
|
||||||
|
torch.nn.init.normal_(m.weight, mean=0.0, std=0.02)
|
||||||
|
if m.bias is not None:
|
||||||
|
torch.nn.init.zeros_(m.bias)
|
||||||
|
elif isinstance(m, nn.Embedding):
|
||||||
|
torch.nn.init.normal_(m.weight, mean=0.0, std=0.02)
|
||||||
|
|
||||||
|
# apply special scaled init to the residual projections, per GPT-2 paper
|
||||||
|
for pn, p in self.named_parameters():
|
||||||
|
if pn.endswith('proj_layer.weight'):
|
||||||
|
torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * self.num_layer))
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, word_idx, targets=None):
|
||||||
|
batch_size, seq_len = word_idx.shape
|
||||||
|
|
||||||
|
# >>> TODO 3: complete the forward process of GPT
|
||||||
|
# Step 3.1: use torch.arange(?, dtype=torch.long, device=word_idx.device) to generate the position sequence `pos` [0, 1, ..., seq_len-1]
|
||||||
|
pos = ???
|
||||||
|
|
||||||
|
# Step 3.2: use self.word_token_embedding() and self.word_pos_embedding() to transfer `word_idx` and `pos` to embeddings ('token_embed` and `pos_embed`)
|
||||||
|
token_embed = ???
|
||||||
|
pos_embed = ???
|
||||||
|
|
||||||
|
# Step 3.3: initialize the input embeddings `x` of transformer layers
|
||||||
|
# add the token embeddings and position embeddings to obtain the input embeddings `x` if self.no_pos is False
|
||||||
|
if ???:
|
||||||
|
x = ???
|
||||||
|
else:
|
||||||
|
x = ???
|
||||||
|
|
||||||
|
# apply dropout to the input embeddings via `self.drop()`
|
||||||
|
x = ???
|
||||||
|
|
||||||
|
# Step 3.4: use for loop to obtain the output and attention weights of multiple transformer layers
|
||||||
|
# define a list `attention_weights` and append the attention weights of each transformer layer into the list
|
||||||
|
attention_weights = ???
|
||||||
|
for ???:
|
||||||
|
# Step 4.1: obtain the output and attention weights of transformer layers
|
||||||
|
x, attn = ???
|
||||||
|
# Step 4.2: append the attention weights of transformer layers into the list `attention_weights`
|
||||||
|
???
|
||||||
|
|
||||||
|
# Step 3.5: use self.norm() to normalize the output of transformer layers and then use self.language_model_head() to obtain the `logits` for prediction
|
||||||
|
# self.language_model_head() is a linear layer defined in __init__() function
|
||||||
|
# Note: do not add softmax here since it is included in the cross entropy loss function
|
||||||
|
x = ???
|
||||||
|
logits = ???
|
||||||
|
# <<< TODO 3
|
||||||
|
|
||||||
|
# return logits and loss or attention weights
|
||||||
|
if targets is not None:
|
||||||
|
loss = torch.nn.functional.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=0)
|
||||||
|
return logits, loss
|
||||||
|
assert isinstance(attention_weights, list), "attention_weights must be a list, please check whether to append the attention weights of all transformer layers into it!"
|
||||||
|
return logits, attention_weights
|
||||||
|
|
||||||
|
def configure_optimizers(self, weight_decay):
|
||||||
|
"""
|
||||||
|
This long function is unfortunately doing something very simple and is being very defensive:
|
||||||
|
We are separating out all parameters of the model into two buckets: those that will experience
|
||||||
|
weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
|
||||||
|
We are then returning the PyTorch optimizer object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# separate out all parameters to those that will and won't experience regularizing weight decay
|
||||||
|
decay = set()
|
||||||
|
no_decay = set()
|
||||||
|
whitelist_weight_modules = (nn.Linear, )
|
||||||
|
blacklist_weight_modules = (nn.LayerNorm, torch.nn.Embedding)
|
||||||
|
for mn, m in self.named_modules():
|
||||||
|
for pn, p in m.named_parameters():
|
||||||
|
fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
|
||||||
|
# random note: because named_modules and named_parameters are recursive
|
||||||
|
# we will see the same tensors p many many times. but doing it this way
|
||||||
|
# allows us to know which parent module any tensor p belongs to...
|
||||||
|
if pn.endswith('bias'):
|
||||||
|
# all biases will not be decayed
|
||||||
|
no_decay.add(fpn)
|
||||||
|
elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
|
||||||
|
# weights of whitelist modules will be weight decayed
|
||||||
|
decay.add(fpn)
|
||||||
|
elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
|
||||||
|
# weights of blacklist modules will NOT be weight decayed
|
||||||
|
no_decay.add(fpn)
|
||||||
|
|
||||||
|
# subtle: 'transformer.wte.weight' and 'lm_head.weight' are tied, so they
|
||||||
|
# will appear in the no_decay and decay sets respectively after the above.
|
||||||
|
# In addition, because named_parameters() doesn't return duplicates, it
|
||||||
|
# will only return the first occurence, key'd by 'transformer.wte.weight', below.
|
||||||
|
# so let's manually remove 'lm_head.weight' from decay set. This will include
|
||||||
|
# this tensor into optimization via transformer.wte.weight only, and not decayed.
|
||||||
|
decay.remove('language_model_head.weight')
|
||||||
|
|
||||||
|
# validate that we considered every parameter
|
||||||
|
param_dict = {pn: p for pn, p in self.named_parameters()}
|
||||||
|
inter_params = decay & no_decay
|
||||||
|
union_params = decay | no_decay
|
||||||
|
assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
|
||||||
|
assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
|
||||||
|
% (str(param_dict.keys() - union_params), )
|
||||||
|
|
||||||
|
# create the pytorch optimizer object
|
||||||
|
optim_groups = [
|
||||||
|
{"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": weight_decay},
|
||||||
|
{"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
|
||||||
|
]
|
||||||
|
return optim_groups
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
|
||||||
|
"""
|
||||||
|
Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
|
||||||
|
the sequence max_new_tokens times, feeding the predictions back into the model each time.
|
||||||
|
Most likely you'll want to make sure to be in model.eval() mode of operation for this.
|
||||||
|
"""
|
||||||
|
for _ in range(max_new_tokens):
|
||||||
|
# if the sequence context is growing too long we must crop it at block_size
|
||||||
|
idx_cond = idx
|
||||||
|
# forward the model to get the logits for the index in the sequence
|
||||||
|
logits, _ = self(idx_cond)
|
||||||
|
# pluck the logits at the final step and scale by desired temperature
|
||||||
|
logits = logits[:, -1, :] / temperature
|
||||||
|
# optionally crop the logits to only the top k options
|
||||||
|
if top_k is not None:
|
||||||
|
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
||||||
|
logits[logits < v[:, [-1]]] = -float('Inf')
|
||||||
|
# apply softmax to convert logits to (normalized) probabilities
|
||||||
|
probs = F.softmax(logits, dim=-1)
|
||||||
|
# sample from the distribution
|
||||||
|
idx_next = torch.multinomial(probs, num_samples=1)
|
||||||
|
# append sampled index to the running sequence and continue
|
||||||
|
idx = torch.cat((idx, idx_next), dim=1)
|
||||||
|
|
||||||
|
return idx.squeeze().cpu().numpy()
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
GPTConfig = {
|
||||||
|
'mygpt': dict(num_layer=4, embed_dim=128, num_head=4, feedforward_dim=128*4, dropout=0.0),
|
||||||
|
'gpt2-mini': dict(num_layer=6, embed_dim=384, num_head=6, feedforward_dim=384*4, dropout=0.2),
|
||||||
|
'gpt2': dict(num_layer=12, embed_dim=768, num_head=12, feedforward_dim=768*4, dropout=0.2),
|
||||||
|
}
|
||||||
61
hw4/code/prepare.py
Normal file
61
hw4/code/prepare.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""
|
||||||
|
Prepare the dataset for character-level language modeling.
|
||||||
|
So instead of encoding with GPT-2 BPE tokens, we just map characters to ints.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--data_root', type=str, default='data/quansongci', help='data directory')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# set the input file path
|
||||||
|
input_file_path = os.path.join(args.data_root, 'data.json')
|
||||||
|
|
||||||
|
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)['data']
|
||||||
|
print(f"length of dataset: {len(data):,}")
|
||||||
|
|
||||||
|
# get all the unique characters that occur in this text
|
||||||
|
chars = sorted(list(set(''.join(data))))
|
||||||
|
vocab_size = len(chars) + 2 # for <pad> and <eos>
|
||||||
|
print("all the unique characters:", ''.join(chars))
|
||||||
|
print(f"vocab size: {vocab_size:,}")
|
||||||
|
|
||||||
|
# create a mapping from characters to integers
|
||||||
|
stoi = { ch:i+2 for i,ch in enumerate(chars) }
|
||||||
|
itos = { i+2:ch for i,ch in enumerate(chars) }
|
||||||
|
stoi['<pad>'] = 0
|
||||||
|
itos[0] = '<pad>'
|
||||||
|
stoi['<eos>'] = 1
|
||||||
|
itos[1] = '<eos>'
|
||||||
|
|
||||||
|
|
||||||
|
# create the train and test splits
|
||||||
|
n = len(data)
|
||||||
|
train_data = data[:int(n*0.9)]
|
||||||
|
val_data = data[int(n*0.9):]
|
||||||
|
print(f"train has {len(train_data):,} samples")
|
||||||
|
print(f"val has {len(val_data):,} samples")
|
||||||
|
|
||||||
|
# save the meta information as well, to help us encode/decode later
|
||||||
|
train_meta = {
|
||||||
|
'data': train_data,
|
||||||
|
'vocab_size': vocab_size,
|
||||||
|
'itos': itos,
|
||||||
|
'stoi': stoi,
|
||||||
|
}
|
||||||
|
with open(os.path.join(args.data_root, 'train.json'), 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(train_meta, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
val_meta = {
|
||||||
|
'data': val_data,
|
||||||
|
'vocab_size': vocab_size,
|
||||||
|
'itos': itos,
|
||||||
|
'stoi': stoi,
|
||||||
|
}
|
||||||
|
with open(os.path.join(args.data_root, 'val.json'), 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(val_meta, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
76
hw4/code/sample.py
Normal file
76
hw4/code/sample.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
Sample from a trained model
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from contextlib import nullcontext
|
||||||
|
import torch
|
||||||
|
from model import GPTConfig, GPT
|
||||||
|
import argparse
|
||||||
|
from dataset import Converter, LMDataset
|
||||||
|
|
||||||
|
def sample(start, num_samples, max_new_tokens, model_name, ckpt_path, data_root, device):
|
||||||
|
dataset = LMDataset(data_root, 'train')
|
||||||
|
converter = Converter(dataset.stoi, dataset.itos)
|
||||||
|
temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
|
||||||
|
top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
|
||||||
|
# model
|
||||||
|
dtype = 'float16' # 'float32' or 'bfloat16' or 'float16'
|
||||||
|
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
|
||||||
|
ctx = nullcontext() if device == 'cpu' else torch.autocast(device_type=device, dtype=ptdtype)
|
||||||
|
# init from a model saved in a specific directory
|
||||||
|
ckpt_path = os.path.join(ckpt_path, 'best.pth')
|
||||||
|
print("sample from %s"%ckpt_path)
|
||||||
|
checkpoint = torch.load(ckpt_path, map_location=device)
|
||||||
|
gptconf = GPTConfig[model_name]
|
||||||
|
if 'model_args' in checkpoint:
|
||||||
|
gptconf = checkpoint['model_args']
|
||||||
|
model = GPT(**gptconf)
|
||||||
|
state_dict = checkpoint['state_dict']
|
||||||
|
#unwanted_prefix = '_orig_mod.'
|
||||||
|
#for k,v in list(state_dict.items()):
|
||||||
|
# if k.startswith(unwanted_prefix):
|
||||||
|
# state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
|
||||||
|
model.load_state_dict(state_dict)
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
# encode the beginning of the prompt
|
||||||
|
start_ids = converter.single_encode(start)
|
||||||
|
x = torch.from_numpy(start_ids)[None, ...].to(device).long()
|
||||||
|
|
||||||
|
# run generation
|
||||||
|
with torch.no_grad():
|
||||||
|
with ctx:
|
||||||
|
for k in range(num_samples):
|
||||||
|
y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
|
||||||
|
print(converter.single_decode(y))
|
||||||
|
print('---------------')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# set random seed for reproducibility
|
||||||
|
seed = 2024
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
torch.cuda.manual_seed(seed)
|
||||||
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
torch.backends.cudnn.deterministic = True
|
||||||
|
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
|
||||||
|
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
|
||||||
|
|
||||||
|
# set configurations of the model and sampling process
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--start', type=str, default='+++', help='start of the sample, e.g. "+++" or "+++清平乐"')
|
||||||
|
parser.add_argument('--num_samples', type=int, default='10', help='the number of samples')
|
||||||
|
parser.add_argument('--model_name', type=str, default='mygpt', help='name of the model')
|
||||||
|
parser.add_argument('--ckpt_path', type=str, default='workdirs/quansongci', help='path to load checkpoints')
|
||||||
|
parser.add_argument('--data_root', type=str, default='data/quansongci', help='file of training and validation data')
|
||||||
|
parser.add_argument('--device', type=str, help='cpu or cuda')
|
||||||
|
|
||||||
|
opt = parser.parse_args()
|
||||||
|
if opt.device is None:
|
||||||
|
opt.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
|
||||||
|
|
||||||
|
sample(opt.start, opt.num_samples, 128-len(opt.start), opt.model_name, opt.ckpt_path, opt.data_root, opt.device)
|
||||||
219
hw4/code/train.py
Normal file
219
hw4/code/train.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import pickle
|
||||||
|
from contextlib import nullcontext
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
from model import GPT, GPTConfig
|
||||||
|
from dataset import LMDataset, Converter
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# learning rate decay scheduler (cosine with warmup)
|
||||||
|
def get_lr(it, learning_rate, min_lr=1e-4, warmup_iters=100, lr_decay_iters=6000):
|
||||||
|
# 1) linear warmup for warmup_iters steps
|
||||||
|
if it < warmup_iters:
|
||||||
|
return learning_rate * it / warmup_iters
|
||||||
|
# 2) if it > lr_decay_iters, return min learning rate
|
||||||
|
if it > lr_decay_iters:
|
||||||
|
return min_lr
|
||||||
|
# 3) in between, use cosine decay down to min learning rate
|
||||||
|
decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
|
||||||
|
assert 0 <= decay_ratio <= 1
|
||||||
|
coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
|
||||||
|
return min_lr + coeff * (learning_rate - min_lr)
|
||||||
|
|
||||||
|
def train(data_root, model_name, batch_size, n_iters, ckpt_path, val_interval, device='cpu', no_res=False, no_pos=False):
|
||||||
|
train_dataset = LMDataset(data_root, 'train')
|
||||||
|
val_dataset = LMDataset(data_root, 'val')
|
||||||
|
train_loader = DataLoader(train_dataset, batch_size=int(batch_size), shuffle=True)
|
||||||
|
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
|
||||||
|
converter = Converter(train_dataset.stoi, train_dataset.itos)
|
||||||
|
|
||||||
|
# adamw optimizer
|
||||||
|
learning_rate = 5e-3 # max learning rate
|
||||||
|
weight_decay = 1e-1
|
||||||
|
beta1 = 0.9
|
||||||
|
beta2 = 0.99
|
||||||
|
grad_clip = 1.0 # clip gradients at this value, or disable if == 0.0
|
||||||
|
|
||||||
|
# system
|
||||||
|
|
||||||
|
dtype = 'bfloat16' if device == 'cpu' else 'float16' # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
|
||||||
|
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
|
||||||
|
ctx = nullcontext() if device == 'cpu' else torch.autocast(device_type=device, dtype=ptdtype)
|
||||||
|
#ctx = torch.autocast(device_type=device, dtype=ptdtype)
|
||||||
|
best_val_loss = 1e9
|
||||||
|
iter_num = 0 # number of iterations in the lifetime of this process
|
||||||
|
|
||||||
|
# model init
|
||||||
|
model_args = GPTConfig[model_name]
|
||||||
|
model_args['vocab_size'] = train_dataset.vocab_size
|
||||||
|
model_args['max_seq_len'] = 128
|
||||||
|
model_args['no_res'] = no_res
|
||||||
|
model_args['no_pos'] = no_pos
|
||||||
|
|
||||||
|
# init a new model from scratch
|
||||||
|
print("Initializing a new model from scratch")
|
||||||
|
model = GPT(**model_args)
|
||||||
|
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
# initialize a GradScaler. If enabled=False scaler is a no-op
|
||||||
|
scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
|
||||||
|
|
||||||
|
# optimizer
|
||||||
|
optim_groups = model.configure_optimizers(weight_decay)
|
||||||
|
optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=(beta1, beta2))
|
||||||
|
checkpoint = None # free up memory
|
||||||
|
|
||||||
|
print('training...')
|
||||||
|
# training loop
|
||||||
|
epoch_num = np.ceil(n_iters * int(batch_size) / float(len(train_dataset))).astype(np.int32)
|
||||||
|
t0 = time.time()
|
||||||
|
model.train()
|
||||||
|
train_losses = []
|
||||||
|
val_losses = []
|
||||||
|
for epoch in range(epoch_num):
|
||||||
|
for step, inputs in enumerate(train_loader):
|
||||||
|
if iter_num >= n_iters:
|
||||||
|
break
|
||||||
|
X, Y = converter.encode(inputs)
|
||||||
|
X, Y = X.to(device), Y.to(device)
|
||||||
|
lr = get_lr(iter_num, learning_rate, lr_decay_iters=n_iters)
|
||||||
|
for param_group in optimizer.param_groups:
|
||||||
|
param_group['lr'] = lr
|
||||||
|
|
||||||
|
# forward backward update, with optional gradient accumulation to simulate larger batch size
|
||||||
|
# and using the GradScaler if data type is float16
|
||||||
|
with ctx:
|
||||||
|
logits, loss = model(X, Y)
|
||||||
|
loss = loss # scale the loss to account for gradient accumulation
|
||||||
|
|
||||||
|
# backward pass, with gradient scaling if training in fp16
|
||||||
|
scaler.scale(loss).backward()
|
||||||
|
# clip the gradient
|
||||||
|
if grad_clip != 0.0:
|
||||||
|
scaler.unscale_(optimizer)
|
||||||
|
torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
|
||||||
|
# step the optimizer and scaler if training in fp16
|
||||||
|
scaler.step(optimizer)
|
||||||
|
scaler.update()
|
||||||
|
# flush the gradients as soon as we can, no need for this memory anymore
|
||||||
|
optimizer.zero_grad(set_to_none=True)
|
||||||
|
|
||||||
|
iter_num += 1
|
||||||
|
train_losses.append(loss.item())
|
||||||
|
# evaluate the loss on train/val sets and write checkpoints
|
||||||
|
if iter_num % val_interval == 0:
|
||||||
|
# timing and logging
|
||||||
|
t1 = time.time()
|
||||||
|
dt = t1 - t0
|
||||||
|
t0 = t1
|
||||||
|
lossf = loss.item()
|
||||||
|
print(f"iter {iter_num}: loss {lossf:.4f}, time {dt*1000:.2f}ms")
|
||||||
|
losses = estimate_loss(model, val_loader, converter, ctx, device)
|
||||||
|
val_losses.append(losses['val'])
|
||||||
|
print(f"iter {iter_num}: val loss {losses['val']:.4f}")
|
||||||
|
print(f"saving latest checkpoint to {ckpt_path}")
|
||||||
|
checkpoint = {
|
||||||
|
'state_dict': model.state_dict(),
|
||||||
|
'optimizer': optimizer.state_dict(),
|
||||||
|
'model_args': model_args,
|
||||||
|
'iter_num': iter_num,
|
||||||
|
'best_val_loss': best_val_loss,
|
||||||
|
}
|
||||||
|
torch.save(checkpoint, os.path.join(ckpt_path, 'latest.pth'))
|
||||||
|
|
||||||
|
if losses['val'] < best_val_loss:
|
||||||
|
best_val_loss = losses['val']
|
||||||
|
if iter_num > 0:
|
||||||
|
print(f"saving best checkpoint to {ckpt_path}")
|
||||||
|
torch.save(checkpoint, os.path.join(ckpt_path, 'best.pth'))
|
||||||
|
|
||||||
|
plot(n_iters, train_losses, val_losses, val_interval, ckpt_path)
|
||||||
|
|
||||||
|
def plot(n_iters, train_losses, val_losses, val_interval, ckpt_path):
|
||||||
|
# create a plot
|
||||||
|
f, ax = plt.subplots(1,2,figsize=(18,6))
|
||||||
|
val_iters = np.arange(1, n_iters+1, val_interval)
|
||||||
|
|
||||||
|
# draw loss
|
||||||
|
ax[0].plot(train_losses)
|
||||||
|
ax[0].plot(val_iters, val_losses, 'r')
|
||||||
|
|
||||||
|
# set labels
|
||||||
|
ax[0].set_xlabel('training iters')
|
||||||
|
ax[0].legend(['training loss', 'validation loss'])
|
||||||
|
|
||||||
|
train_perplexity = [np.exp(x) for x in train_losses]
|
||||||
|
val_perplexity = [np.exp(x) for x in val_losses]
|
||||||
|
# draw perplexity
|
||||||
|
ax[1].plot(train_perplexity)
|
||||||
|
ax[1].plot(val_iters, val_perplexity, 'r')
|
||||||
|
|
||||||
|
# set labels
|
||||||
|
ax[1].set_xlabel('training iters')
|
||||||
|
ax[1].legend(['training perplexity', 'validation perplexity'])
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# show the image
|
||||||
|
plt.savefig(os.path.join(ckpt_path, 'loss&perplexity.jpg'), dpi=300)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# helps estimate an arbitrarily accurate loss over either split using many batches
|
||||||
|
@torch.no_grad()
|
||||||
|
def estimate_loss(model, val_loader, converter, ctx, device):
|
||||||
|
out = {}
|
||||||
|
model.eval()
|
||||||
|
losses = 0
|
||||||
|
max_iters = 100
|
||||||
|
iter_num = 0
|
||||||
|
for inputs in val_loader:
|
||||||
|
if iter_num >= max_iters:
|
||||||
|
break
|
||||||
|
iter_num += 1
|
||||||
|
X, Y = converter.encode(inputs)
|
||||||
|
X, Y = X.to(device), Y.to(device)
|
||||||
|
with ctx:
|
||||||
|
logits, loss = model(X, Y)
|
||||||
|
#loss = model.loss(logits, Y)
|
||||||
|
losses += loss.item()
|
||||||
|
out['val'] = losses / max_iters
|
||||||
|
model.train()
|
||||||
|
return out
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# set random seed for reproducibility
|
||||||
|
seed = 2024
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
torch.cuda.manual_seed(seed)
|
||||||
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
torch.backends.cudnn.deterministic = True
|
||||||
|
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
|
||||||
|
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
|
||||||
|
|
||||||
|
# set configurations of the model and training process
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--data_root', type=str, default='data/quansongci', help='file of training and validation data')
|
||||||
|
parser.add_argument('--model_name', type=str, default='mygpt', help='name of the pretrained model')
|
||||||
|
parser.add_argument('--iters', type=int, default=1000, help='number of training epochs')
|
||||||
|
parser.add_argument('--batchsize', type=int, default=16, help='training batch size')
|
||||||
|
parser.add_argument('--ckpt_path', type=str, default='workdirs/quansongci', help='path to save checkpoints')
|
||||||
|
parser.add_argument('--val_interval', type=int, default=20, help='iter intervals of validation')
|
||||||
|
parser.add_argument('--no_res', action='store_true', help='whether to use residual connection')
|
||||||
|
parser.add_argument('--no_pos', action='store_true', help='whether to use positional encoding')
|
||||||
|
parser.add_argument('--device', type=str, help='cpu or cuda')
|
||||||
|
|
||||||
|
opt = parser.parse_args()
|
||||||
|
if opt.device is None:
|
||||||
|
opt.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
|
||||||
|
os.makedirs(opt.ckpt_path, exist_ok=True)
|
||||||
|
train(opt.data_root, opt.model_name, opt.batchsize, opt.iters, opt.ckpt_path, opt.val_interval, opt.device, opt.no_res, opt.no_pos)
|
||||||
|
|
||||||
|
|
||||||
132
hw4/report/dtx-style.sty
Normal file
132
hw4/report/dtx-style.sty
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
%%
|
||||||
|
%% This is file `dtx-style.sty',
|
||||||
|
%% generated with the docstrip utility.
|
||||||
|
%%
|
||||||
|
%% The original source files were:
|
||||||
|
%%
|
||||||
|
%% thucoursework.dtx (with options: `dtx-style')
|
||||||
|
%%
|
||||||
|
%% This is a generated file.
|
||||||
|
%%
|
||||||
|
%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
|
||||||
|
%%
|
||||||
|
%% This work may be distributed and/or modified under the
|
||||||
|
%% conditions of the LaTeX Project Public License, either version 1.3
|
||||||
|
%% of this license or (at your option) any later version.
|
||||||
|
%% The latest version of this license is in
|
||||||
|
%% http://www.latex-project.org/lppl.txt
|
||||||
|
%% and version 1.3 or later is part of all distributions of LaTeX
|
||||||
|
%% version 2005/12/01 or later.
|
||||||
|
%%
|
||||||
|
%% To produce the documentation run the original source files ending with `.dtx'
|
||||||
|
%% through LaTeX.
|
||||||
|
%%
|
||||||
|
|
||||||
|
\ProvidesPackage{dtx-style}
|
||||||
|
\RequirePackage{hypdoc}
|
||||||
|
\RequirePackage[UTF8,scheme=chinese]{ctex}
|
||||||
|
\RequirePackage{newpxtext}
|
||||||
|
\RequirePackage{newpxmath}
|
||||||
|
\RequirePackage[
|
||||||
|
top=2.5cm, bottom=2.5cm,
|
||||||
|
left=4cm, right=2cm,
|
||||||
|
headsep=3mm]{geometry}
|
||||||
|
\RequirePackage{array,longtable,booktabs}
|
||||||
|
\RequirePackage{listings}
|
||||||
|
\RequirePackage{fancyhdr}
|
||||||
|
\RequirePackage{xcolor}
|
||||||
|
\RequirePackage{enumitem}
|
||||||
|
\RequirePackage{etoolbox}
|
||||||
|
\RequirePackage{metalogo}
|
||||||
|
|
||||||
|
\colorlet{thu@macro}{blue!60!black}
|
||||||
|
\colorlet{thu@env}{blue!70!black}
|
||||||
|
\colorlet{thu@option}{purple}
|
||||||
|
\patchcmd{\PrintMacroName}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
|
||||||
|
\patchcmd{\PrintDescribeMacro}{\MacroFont}{\MacroFont\bfseries\color{thu@macro}}{}{}
|
||||||
|
\patchcmd{\PrintDescribeEnv}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
|
||||||
|
\patchcmd{\PrintEnvName}{\MacroFont}{\MacroFont\bfseries\color{thu@env}}{}{}
|
||||||
|
|
||||||
|
\def\DescribeOption{%
|
||||||
|
\leavevmode\@bsphack\begingroup\MakePrivateLetters%
|
||||||
|
\Describe@Option}
|
||||||
|
\def\Describe@Option#1{\endgroup
|
||||||
|
\marginpar{\raggedleft\PrintDescribeOption{#1}}%
|
||||||
|
\thu@special@index{option}{#1}\@esphack\ignorespaces}
|
||||||
|
\def\PrintDescribeOption#1{\strut \MacroFont\bfseries\sffamily\color{thu@option} #1\ }
|
||||||
|
\def\thu@special@index#1#2{\@bsphack
|
||||||
|
\begingroup
|
||||||
|
\HD@target
|
||||||
|
\let\HDorg@encapchar\encapchar
|
||||||
|
\edef\encapchar usage{%
|
||||||
|
\HDorg@encapchar hdclindex{\the\c@HD@hypercount}{usage}%
|
||||||
|
}%
|
||||||
|
\index{#2\actualchar{\string\ttfamily\space#2}
|
||||||
|
(#1)\encapchar usage}%
|
||||||
|
\index{#1:\levelchar#2\actualchar
|
||||||
|
{\string\ttfamily\space#2}\encapchar usage}%
|
||||||
|
\endgroup
|
||||||
|
\@esphack}
|
||||||
|
|
||||||
|
\lstdefinestyle{lstStyleBase}{%
|
||||||
|
basicstyle=\small\ttfamily,
|
||||||
|
aboveskip=\medskipamount,
|
||||||
|
belowskip=\medskipamount,
|
||||||
|
lineskip=0pt,
|
||||||
|
boxpos=c,
|
||||||
|
showlines=false,
|
||||||
|
extendedchars=true,
|
||||||
|
upquote=true,
|
||||||
|
tabsize=2,
|
||||||
|
showtabs=false,
|
||||||
|
showspaces=false,
|
||||||
|
showstringspaces=false,
|
||||||
|
numbers=none,
|
||||||
|
linewidth=\linewidth,
|
||||||
|
xleftmargin=4pt,
|
||||||
|
xrightmargin=0pt,
|
||||||
|
resetmargins=false,
|
||||||
|
breaklines=true,
|
||||||
|
breakatwhitespace=false,
|
||||||
|
breakindent=0pt,
|
||||||
|
breakautoindent=true,
|
||||||
|
columns=flexible,
|
||||||
|
keepspaces=true,
|
||||||
|
gobble=2,
|
||||||
|
framesep=3pt,
|
||||||
|
rulesep=1pt,
|
||||||
|
framerule=1pt,
|
||||||
|
backgroundcolor=\color{gray!5},
|
||||||
|
stringstyle=\color{green!40!black!100},
|
||||||
|
keywordstyle=\bfseries\color{blue!50!black},
|
||||||
|
commentstyle=\slshape\color{black!60}}
|
||||||
|
|
||||||
|
\lstdefinestyle{lstStyleShell}{%
|
||||||
|
style=lstStyleBase,
|
||||||
|
frame=l,
|
||||||
|
rulecolor=\color{purple},
|
||||||
|
language=bash}
|
||||||
|
|
||||||
|
\lstdefinestyle{lstStyleLaTeX}{%
|
||||||
|
style=lstStyleBase,
|
||||||
|
frame=l,
|
||||||
|
rulecolor=\color{violet},
|
||||||
|
language=[LaTeX]TeX}
|
||||||
|
|
||||||
|
\lstnewenvironment{latex}{\lstset{style=lstStyleLaTeX}}{}
|
||||||
|
\lstnewenvironment{shell}{\lstset{style=lstStyleShell}}{}
|
||||||
|
|
||||||
|
\setlist{nosep}
|
||||||
|
|
||||||
|
\DeclareDocumentCommand{\option}{m}{\textsf{#1}}
|
||||||
|
\DeclareDocumentCommand{\env}{m}{\texttt{#1}}
|
||||||
|
\DeclareDocumentCommand{\pkg}{s m}{%
|
||||||
|
\texttt{#2}\IfBooleanF#1{\thu@special@index{package}{#2}}}
|
||||||
|
\DeclareDocumentCommand{\file}{s m}{%
|
||||||
|
\texttt{#2}\IfBooleanF#1{\thu@special@index{file}{#2}}}
|
||||||
|
\newcommand{\myentry}[1]{%
|
||||||
|
\marginpar{\raggedleft\color{purple}\bfseries\strut #1}}
|
||||||
|
\newcommand{\note}[2][Note]{{%
|
||||||
|
\color{magenta}{\bfseries #1}\emph{#2}}}
|
||||||
|
|
||||||
|
\def\thucoursework{\textsc{Thu}\-\textsc{Coursework}}
|
||||||
153
hw4/report/iidef.sty
Normal file
153
hw4/report/iidef.sty
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
%%
|
||||||
|
%% This is file `iidef.sty',
|
||||||
|
%% generated with the docstrip utility.
|
||||||
|
%%
|
||||||
|
%% The original source files were:
|
||||||
|
%%
|
||||||
|
%% thucoursework.dtx (with options: `sty')
|
||||||
|
%%
|
||||||
|
%% This is a generated file.
|
||||||
|
%%
|
||||||
|
%% Copyright (C) 2021 by zhaofeng-shu33 <616545598@qq.com>
|
||||||
|
%%
|
||||||
|
%% This work may be distributed and/or modified under the
|
||||||
|
%% conditions of the LaTeX Project Public License, either version 1.3
|
||||||
|
%% of this license or (at your option) any later version.
|
||||||
|
%% The latest version of this license is in
|
||||||
|
%% http://www.latex-project.org/lppl.txt
|
||||||
|
%% and version 1.3 or later is part of all distributions of LaTeX
|
||||||
|
%% version 2005/12/01 or later.
|
||||||
|
%%
|
||||||
|
%% To produce the documentation run the original source files ending with `.dtx'
|
||||||
|
%% through LaTeX.
|
||||||
|
%%
|
||||||
|
|
||||||
|
\NeedsTeXFormat{LaTeX2e}[1999/12/01]
|
||||||
|
\ProvidesClass{iidef}
|
||||||
|
[2020/09/09 2.6 Tsinghua University Coursework Template]
|
||||||
|
%% configuration of nested enumerate env
|
||||||
|
\RequirePackage{enumitem}
|
||||||
|
%% set hwcount key-value option
|
||||||
|
\RequirePackage{kvoptions}
|
||||||
|
%% required by macro DeclareMathOperator
|
||||||
|
\RequirePackage{amsmath}
|
||||||
|
%% Set up page headers using with fancyhdr
|
||||||
|
\@ifundefined{lhead}{\RequirePackage{fancyhdr}}
|
||||||
|
{\def\@thulhead{thulhead}}
|
||||||
|
\RequirePackage{amsthm}
|
||||||
|
%% semester
|
||||||
|
\def\@term{term}
|
||||||
|
\newcommand{\theterm}[1]{\renewcommand\@term{#1}}
|
||||||
|
%% institute
|
||||||
|
\newcommand{\@courseinstitute}[1]{institute}
|
||||||
|
\newcommand{\thecourseinstitute}[1]{\renewcommand\@courseinstitute{#1}}
|
||||||
|
%% coursename
|
||||||
|
\newcommand{\@coursename}[1]{coursename}
|
||||||
|
\newcommand{\thecoursename}[1]{\renewcommand\@coursename{\textsc{#1}}}
|
||||||
|
%% user can rewrite homework name
|
||||||
|
\def\@hwname{Homework}
|
||||||
|
\def\hwname#1{\renewcommand\@hwname{#1}}
|
||||||
|
%% \iidef@thehwcnt = 1
|
||||||
|
\DeclareStringOption[1]{thehwcnt}
|
||||||
|
\ProcessKeyvalOptions*
|
||||||
|
\def\thehwcnt{\iidef@thehwcnt}
|
||||||
|
%% page header setup, distinguish between first page(plain style)
|
||||||
|
%% and second page on (runningpage style)
|
||||||
|
%%***************************************************************************
|
||||||
|
\newcommand{\courseheader}{
|
||||||
|
\thispagestyle{plain}%first page use native plain style to suppress header
|
||||||
|
\vspace*{-1in}
|
||||||
|
\begin{center}
|
||||||
|
\@courseinstitute\\
|
||||||
|
\@coursename\\
|
||||||
|
\@term
|
||||||
|
\vspace*{0.1in}
|
||||||
|
\hrule
|
||||||
|
\end{center}
|
||||||
|
\begin{center}
|
||||||
|
\underline{\bf \@hwname\;\thehwcnt} \\
|
||||||
|
\end{center}
|
||||||
|
}
|
||||||
|
\@ifundefined{@thulhead}{
|
||||||
|
\fancypagestyle{runningpage}
|
||||||
|
{
|
||||||
|
\fancyhead[L]{\small\@coursename}
|
||||||
|
\fancyhead[R]{\small\@courseinstitute}
|
||||||
|
}
|
||||||
|
%% use runningpage style from second page on
|
||||||
|
\pagestyle{runningpage}
|
||||||
|
}{}
|
||||||
|
%% *********************************************************************************************
|
||||||
|
%%name command macro
|
||||||
|
%%*************************
|
||||||
|
\newcommand{\name}[1]{
|
||||||
|
\begin{flushleft}
|
||||||
|
#1\hfill
|
||||||
|
\today
|
||||||
|
\end{flushleft}
|
||||||
|
\hrule
|
||||||
|
|
||||||
|
\vspace{2em}
|
||||||
|
|
||||||
|
\flushleft
|
||||||
|
}
|
||||||
|
%%*************************
|
||||||
|
%% enumitem related configuration
|
||||||
|
\setlist[enumerate,1]{label=\thehwcnt.\arabic*.}
|
||||||
|
\setlist[enumerate,2]{label=(\alph*)}
|
||||||
|
\setlist[enumerate,3]{label=\roman*.}
|
||||||
|
\setlist[enumerate,4]{label=\greek*}
|
||||||
|
%%******************************
|
||||||
|
\def\@slname{Solution}
|
||||||
|
\def\slname#1{\renewcommand\@slname{#1}}
|
||||||
|
|
||||||
|
\@ifundefined{solution}{
|
||||||
|
\newenvironment{solution}
|
||||||
|
{
|
||||||
|
\proof[\@slname]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
%% no qed symbol in solution env
|
||||||
|
\renewcommand{\qedsymbol}{}
|
||||||
|
\endproof
|
||||||
|
}
|
||||||
|
}{}
|
||||||
|
%%******************************
|
||||||
|
%%common math symbols go here
|
||||||
|
%%*************************************************
|
||||||
|
\def\v#1{\underline{#1}}
|
||||||
|
\newcommand{\uc}{\underline{c}} % c, vec
|
||||||
|
\newcommand{\uv}{\underline{v}} % x, vec
|
||||||
|
\newcommand{\uw}{\underline{w}} % w, vec
|
||||||
|
\newcommand{\ux}{\underline{x}} % x, vec
|
||||||
|
\newcommand{\uy}{\underline{y}} % y, vec
|
||||||
|
\newcommand{\uz}{\underline{z}} % z, vec
|
||||||
|
\newcommand{\um}{\underline{m}} % m, vec
|
||||||
|
\newcommand{\rvx}{\mathsf{x}} % x, r.v.
|
||||||
|
\newcommand{\rvy}{\mathsf{y}} % y, r.v.
|
||||||
|
\newcommand{\rvz}{\mathsf{z}} % z, r.v.
|
||||||
|
\newcommand{\rvw}{\mathsf{w}} % w, r.v.
|
||||||
|
\newcommand{\rvH}{\mathsf{H}} % H, r.v.
|
||||||
|
\newcommand{\urvx}{\underline{\mathsf{x}}} % x, r.v. vec
|
||||||
|
\newcommand{\urvy}{\underline{\mathsf{y}}} % y, r.v. vec
|
||||||
|
\newcommand{\urvz}{\underline{\mathsf{z}}} % z, r.v. vec
|
||||||
|
\newcommand{\urvw}{\underline{\mathsf{w}}} % w, r.v. vec
|
||||||
|
|
||||||
|
\newcommand{\defas}{\triangleq} %\coloneqq
|
||||||
|
\newcommand{\reals}{\mathbb{R}}
|
||||||
|
\newcommand{\TT}{\mathrm{T}} % transpose
|
||||||
|
\DeclareMathOperator*{\argmax}{arg\,max}
|
||||||
|
\DeclareMathOperator*{\argmin}{arg\,min}
|
||||||
|
\DeclareMathOperator*{\argsup}{arg\,sup}
|
||||||
|
\DeclareMathOperator*{\arginf}{arg\,inf}
|
||||||
|
\DeclareMathOperator{\diag}{diag}
|
||||||
|
\DeclareMathOperator{\Var}{Var}
|
||||||
|
\DeclareMathOperator{\Cov}{Cov}
|
||||||
|
\DeclareMathOperator{\MSE}{MSE}
|
||||||
|
\DeclareMathOperator{\1}{\mathds{1}}
|
||||||
|
\DeclareMathOperator{\In}{\mathbb{I}}
|
||||||
|
\DeclareMathOperator{\E}{\mathbb{E}}
|
||||||
|
\DeclareMathOperator{\Prob}{\mathbb{P}}
|
||||||
|
\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
|
||||||
|
\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
|
||||||
|
%%************************************************************************************
|
||||||
100
hw4/report/main.tex
Normal file
100
hw4/report/main.tex
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
% Homework template for Inference and Information
|
||||||
|
% UPDATE: September 26, 2017 by Xiangxiang
|
||||||
|
\documentclass[a4paper]{article}
|
||||||
|
\usepackage{ctex}
|
||||||
|
\usepackage{amsmath, amssymb, amsthm}
|
||||||
|
\usepackage{moreenum}
|
||||||
|
\usepackage{mathtools}
|
||||||
|
\usepackage{url}
|
||||||
|
\usepackage{bm}
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{color}
|
||||||
|
|
||||||
|
\lstset{
|
||||||
|
basicstyle = \sffamily, % 基本代码风格
|
||||||
|
keywordstyle = \bfseries, % 关键字风格
|
||||||
|
commentstyle = \rmfamily\itshape, % 注释的风格,斜体
|
||||||
|
stringstyle = \ttfamily, % 字符串风格
|
||||||
|
flexiblecolumns, % 别问为什么,加上这个
|
||||||
|
numbers = left, % 行号的位置在左边
|
||||||
|
showspaces = false, % 是否显示空格,显示了有点乱,所以不现实了
|
||||||
|
numberstyle = \zihao{-5}\ttfamily, % 行号的样式,小五号,tt等宽字体
|
||||||
|
showstringspaces = false,
|
||||||
|
captionpos = t, % 这段代码的名字所呈现的位置,t指的是top上面
|
||||||
|
frame = lrtb, % 显示边框
|
||||||
|
}
|
||||||
|
|
||||||
|
\lstdefinestyle{Python}{
|
||||||
|
language = Python, % 语言选Python
|
||||||
|
basicstyle = \zihao{-5}\ttfamily,
|
||||||
|
numberstyle = \zihao{-5}\ttfamily,
|
||||||
|
keywordstyle = \color{blue},
|
||||||
|
keywordstyle = [2] \color{teal},
|
||||||
|
stringstyle = \color{magenta},
|
||||||
|
commentstyle = \color{red}\ttfamily,
|
||||||
|
breaklines = true, % 自动换行,建议不要写太长的行
|
||||||
|
columns = fixed, % 如果不加这一句,字间距就不固定,很丑,必须加
|
||||||
|
basewidth = 0.5em,
|
||||||
|
}
|
||||||
|
\usepackage{subcaption}
|
||||||
|
\usepackage{booktabs} % toprule
|
||||||
|
\usepackage[mathcal]{eucal}
|
||||||
|
\usepackage[thehwcnt = 4]{iidef}
|
||||||
|
|
||||||
|
\thecourseinstitute{清华大学电子工程系}
|
||||||
|
\thecoursename{\textbf{媒体与认知}}
|
||||||
|
\theterm{2023-2024学年春季学期}
|
||||||
|
\hwname{作业}
|
||||||
|
\begin{document}
|
||||||
|
\courseheader
|
||||||
|
\name{YOUR NAME}
|
||||||
|
\vspace{3mm}
|
||||||
|
\centerline{\textbf{\Large{理论部分}}}
|
||||||
|
|
||||||
|
\section{单选题(15分)}
|
||||||
|
\subsection{\underline{?}}
|
||||||
|
|
||||||
|
\subsection{\underline{?}}
|
||||||
|
|
||||||
|
\subsection{\underline{?}}
|
||||||
|
|
||||||
|
\subsection{\underline{?}}
|
||||||
|
|
||||||
|
\subsection{\underline{?}}
|
||||||
|
|
||||||
|
\section{计算题(15 分)}
|
||||||
|
% 计算题1
|
||||||
|
\subsection{隐含马尔可夫模型}
|
||||||
|
|
||||||
|
\hspace{2em}暑假中,小E每天进行一项体育活动,包括跑步(R)、游泳(S)和打球(B),所选择的体育活动受某种潜在因素(如心情)的影响。小E每天把进行体育活动的照片发至微信朋友圈,我们可以根据观测信息推测该潜在因素的状态。
|
||||||
|
|
||||||
|
\hspace{2em}假设该潜在因素分为$S_1$和$S_2$两种状态。在$S_1$时,小E选择三种体育活动的概率分别为0.6,0.2,0.2;在$S_2$时,小E选择三种体育活动的概率分别为0.1,0.6,0.3。
|
||||||
|
|
||||||
|
\hspace{2em}该潜在因素的变化也有一定规律,若某天处于$S_1$的状态,第二天处于$S_1$和$S_2$的状态的概率分别为0.5,0.5;若某天处于$S_2$的状态,第二天处于$S_1$和$S_2$的状态的概率分别为0.6,0.4。
|
||||||
|
|
||||||
|
\hspace{2em}暑假第一天处于$S_1$和$S_2$的状态的概率均为0.5。
|
||||||
|
|
||||||
|
\vspace{3mm}
|
||||||
|
(1) 采用隐含马尔可夫模型(HMM)对小E暑假体育活动安排进行建模,{\color{blue}请写出HMM对应的参数$\lambda=\{\pi, A, B\}$}。
|
||||||
|
|
||||||
|
\vspace{3mm}
|
||||||
|
(2) 假设暑假第1、2、3天小E所进行的体育活动依次为跑步(R)、打球(B)和游泳(S),{\color{blue}请计算出现该观测序列的概率}。
|
||||||
|
|
||||||
|
\vspace{3mm}
|
||||||
|
(3) 在(2)的条件下。{\color{blue}请利用Viterbi算法推测暑假第1、2、3天最可能的隐含状态序列}。
|
||||||
|
|
||||||
|
|
||||||
|
% 请根据是否选择自选课题的情况选择“编程作业报告”或“自选课题开题报告”中的一项完成
|
||||||
|
\section{编程作业报告}
|
||||||
|
\section{自选课题工作进度汇报}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: late\rvx
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% End:
|
||||||
Reference in New Issue
Block a user