diff --git a/assignment-2/submission/19210680053/.keep b/assignment-2/submission/19210680053/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/submission/19210680053/README.md b/assignment-2/submission/19210680053/README.md new file mode 100644 index 0000000000000000000000000000000000000000..abdc5462b52fc1d754752b8f1c3b62442d70e009 --- /dev/null +++ b/assignment-2/submission/19210680053/README.md @@ -0,0 +1,191 @@ +# 实验报告 +### 1.模型设计 +神经网络设计如下图所示 +### 2.算子补充 +**Matmul** + +**Forward** +$$ +h=X*W +$$ + +**Backward** +$$ +\frac{\partial Y}{\partial X} = W^{T} +$$ +$$ +\frac{\partial Y}{\partial W} = X^{T} +$$ +维度变化以及Python实现如下所示: +``` + """ + grad_y: shape(N, d') + w.T: shape(d', d) + """ + grad_x=np.matmul(grad_y, W.T) + """ + grad_y: shape(N, d') + x.T: shape(d, N) + """ + grad_W=np.matmul(x.T, grad_y) +``` +**Relu** + +**Forward** +$$ +Y=\begin{cases} +X&X\ge0\\\\ +0&\text{otherwise} +\end{cases} +$$ +**Backward** +$$ +\frac{\partial Y}{\partial X}=\begin{cases}1&X\ge0\\\\ +0&\text{otherwise} +\end{cases} +$$ +Python实现如下所示: +``` + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + x=self.memory['x'] + grad_x=grad_y*np.where(x>0,1,0) + return grad_x +``` +**Log** + +Forward +$$ +Y=Log(x+epsilon) +$$ +**Backward** +$$ +Y=1/(x+epsilon) +$$ +Python实现如下所示: +``` + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + + """ + grad_y: same shape as x + """ + x=self.memory['x'] + grad_x=grad_y*(1./(x+self.epsilon)) + return grad_x +``` +Softmax: + +**Forward** +$$ +Y_i = \frac{e^{X_i}}{\sum_{k=1}^n e^{X_k}} +$$ + +**Backward** +$$ +\frac{\partial Y_i}{\partial X_j} = + \begin{cases} + Y_i \times (1 - Y_i) & i = j\\\\ + -Y_i \times Y_j & i \neq j + \end{cases} +$$ +Python实现如下所示: + +softmax的反向传播通过逐个元素判断求导进行实现 +``` + def forward(self, x): + """ + x: shape(N, c) + """ + ex = np.exp(x) + rowsum = np.sum(ex,axis=1) + rowsum = rowsum[:,np.newaxis] + softmax = ex / rowsum + self.memory['softmax'] = softmax + return softmax + + def backward(self, grad_y): + softmax = self.memory['softmax'] + # print(sumx.shape) + [ROWS, COLUMNS] = softmax.shape + grad_x = [] + grad_x=[[0 for i in range(COLUMNS)] for j in range(ROWS)] + for i in range(len(grad_x)): + for j in range(len(grad_x[0])): + for k in range(len(grad_x[0])): + if j == k: + grad_x[i][j] += (1 - softmax[i][k]) * softmax[i][k] * grad_y[i][k] + else: + grad_x[i][j] += -softmax[i][j] * softmax[i][k] * grad_y[i][k] + grad_x = np.array(grad_x) +``` +### 3.mini_batch函数优化 +原有mini_batch方法是将元素打乱重排进行训练 分别将数据和标签储存进对应list + +将index进行打乱 + +根据batch_size 从乱序index中一次取出相应大小的数据及标签进行训练 + +Python实现如下所示: +``` +def mini_batch(dataset,batch_size=128): + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + data = np.array(data) + label = np.array(label) + index=data.shape[0] + index = list(np.random.permutation(index)) + return [(data[index[i:i + batch_size]], label[index[i:i + batch_size]]) for i in range(0, len(data), batch_size)] +``` +### 4.实验结果 +**准确率如下** + +使用**更新后的mini_batch函数** + +[0] Accuracy: 0.9367 + +[1] Accuracy: 0.9607 + +[2] Accuracy: 0.9687 + +![](./img/loss_value%20mini%20batch.png) + +使用**util.py中的mini_batch函数** + +[0] Accuracy: 0.9441 + +[1] Accuracy: 0.9635 + +[2] Accuracy: 0.9721 + +![](./img/mini_batch_orig.png) + +经过比对,两者准确性基本相同 + +使用**更新后的mini_batch函数**,选取更小**batch** + +[0] Accuracy: 0.9594 + +[1] Accuracy: 0.9702 + +[2] Accuracy: 0.9771 + +![](./img/sma_bat.png) \ No newline at end of file diff --git a/assignment-2/submission/19210680053/img/.keep b/assignment-2/submission/19210680053/img/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/submission/19210680053/img/loss_value mini batch.png b/assignment-2/submission/19210680053/img/loss_value mini batch.png new file mode 100644 index 0000000000000000000000000000000000000000..ab0658bf3f37b62a6bf629bdd9cb5b9a42dbad80 Binary files /dev/null and b/assignment-2/submission/19210680053/img/loss_value mini batch.png differ diff --git a/assignment-2/submission/19210680053/img/mini_batch_orig.png b/assignment-2/submission/19210680053/img/mini_batch_orig.png new file mode 100644 index 0000000000000000000000000000000000000000..c4225c0e01087cb4abe343c4c8d295cb35ec7e22 Binary files /dev/null and b/assignment-2/submission/19210680053/img/mini_batch_orig.png differ diff --git a/assignment-2/submission/19210680053/img/sma_bat.png b/assignment-2/submission/19210680053/img/sma_bat.png new file mode 100644 index 0000000000000000000000000000000000000000..a32722a6b75cbad3c5c8765cd4a6da9830e249ac Binary files /dev/null and b/assignment-2/submission/19210680053/img/sma_bat.png differ diff --git a/assignment-2/submission/19210680053/numpy_fnn.py b/assignment-2/submission/19210680053/numpy_fnn.py new file mode 100644 index 0000000000000000000000000000000000000000..34a4a698fb55a49f5a9e680654d62081c9d8454a --- /dev/null +++ b/assignment-2/submission/19210680053/numpy_fnn.py @@ -0,0 +1,214 @@ +import numpy as np + + +class NumpyOp: + + def __init__(self): + self.memory = {} + self.epsilon = 1e-12 + + +class Matmul(NumpyOp): + + def forward(self, x, W): + """ + x: shape(N, d) + w: shape(d, d') + """ + self.memory['x'] = x + self.memory['W'] = W + h = np.matmul(x, W) + return h + + def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + + + #################### + # code 1 # + #################### + x=self.memory['x'] + W=self.memory['W'] + """ + grad_y: shape(N, d') + w.T: shape(d', d) + """ + grad_x=np.matmul(grad_y, W.T) + """ + grad_y: shape(N, d') + x.T: shape(d, N) + """ + grad_W=np.matmul(x.T, grad_y) + + return grad_x, grad_W + + +class Relu(NumpyOp): + + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + #################### + # code 2 # + #################### + x=self.memory['x'] + grad_x=grad_y*np.where(x>0,1,0) + return grad_x + + +class Log(NumpyOp): + + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + + """ + grad_y: same shape as x + """ + #################### + # code 3 # + #################### + x=self.memory['x'] + grad_x=grad_y*(1./(x+self.epsilon)) + return grad_x + + +class Softmax(NumpyOp): + """ + softmax over last dimension + """ + + def forward(self, x): + """ + x: shape(N, c) + """ + #################### + # code 4 # + #################### +# self.memory['x'] = x +# exp_x=np.exp(x) +# softmax=np.exp(x)/np.sum(exp_x,axis=1) +# self.memory['softmax']=softmax + ex = np.exp(x) + rowsum = np.sum(ex,axis=1) + rowsum = rowsum[:,np.newaxis] + softmax = ex / rowsum + self.memory['softmax'] = softmax + return softmax + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + #################### + # code 5 # + #################### +# softmax = self.memory['softmax'] + + softmax = self.memory['softmax'] + # print(sumx.shape) + [ROWS, COLUMNS] = softmax.shape + grad_x = [] + # print(grad_y) +# grad_x=[[] for i in range(ROWS)] + grad_x=[[0 for i in range(COLUMNS)] for j in range(ROWS)] + for i in range(len(grad_x)): + for j in range(len(grad_x[0])): +# for j in range(m): +# out[i].append(0) + for k in range(len(grad_x[0])): + if j == k: + + grad_x[i][j] += (1 - softmax[i][k]) * softmax[i][k] * grad_y[i][k] + else: + grad_x[i][j] += -softmax[i][j] * softmax[i][k] * grad_y[i][k] + grad_x = np.array(grad_x) + + return grad_x +class NumpyLoss: + + def __init__(self): + self.target = None + + def get_loss(self, pred, target): + self.target = target + return (-pred * target).sum(axis=1).mean() + + def backward(self): + return -self.target / self.target.shape[0] + +class NumpyModel: + def __init__(self): + self.W1 = np.random.normal(size=(28 * 28, 256)) + self.W2 = np.random.normal(size=(256, 64)) + self.W3 = np.random.normal(size=(64, 10)) + + + # 以下算子会在 forward 和 backward 中使用 + self.matmul_1 = Matmul() + self.relu_1 = Relu() + self.matmul_2 = Matmul() + self.relu_2 = Relu() + self.matmul_3 = Matmul() + self.softmax = Softmax() + self.log = Log() + + # 以下变量需要在 backward 中更新 + self.x1_grad, self.W1_grad = None, None + self.relu_1_grad = None + self.x2_grad, self.W2_grad = None, None + self.relu_2_grad = None + self.x3_grad, self.W3_grad = None, None + self.softmax_grad = None + self.log_grad = None + + + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + #################### + + A1 = self.matmul_1.forward(x, self.W1) # shape(5, 4) + z1 = self.relu_1.forward(A1) + A2 = self.matmul_2.forward(z1, self.W2) + z2=self.relu_2.forward(A2) + A3=self.matmul_3.forward(z2,self.W3) + z3 = self.softmax.forward(A3) + R = self.log.forward(z3) + return R + + def backward(self, y): + #################### + # code 7 # + #################### + self.log_grad=self.log.backward(y) + self.soft_grad=self.softmax.backward(self.log_grad) + self.x3_grad,self.W3_grad=self.matmul_3.backward(self.soft_grad) + self.relu_2_grad=self.relu_2.backward(self.x3_grad) + self.x2_grad,self.W2_grad=self.matmul_2.backward(self.relu_2_grad) + self.relu_1_grad=self.relu_1.backward(self.x2_grad) + self.x1_grad,self.W1_grad=self.matmul_1.backward(self.relu_1_grad) + pass + + + def optimize(self, learning_rate): + self.W1 -= learning_rate * self.W1_grad + self.W2 -= learning_rate * self.W2_grad + self.W3 -= learning_rate * self.W3_grad diff --git a/assignment-2/submission/19210680053/numpy_mnist.py b/assignment-2/submission/19210680053/numpy_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..73e875088a352d2a35dc2a0e9a14357d3e5229aa --- /dev/null +++ b/assignment-2/submission/19210680053/numpy_mnist.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Apr 28 22:11:32 2021 + +@author: hyt +""" + +import numpy as np +from numpy_fnn import NumpyModel, NumpyLoss +from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot +def mini_batch(dataset,batch_size=128): + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + data = np.array(data) + label = np.array(label) + index=data.shape[0] + index = list(np.random.permutation(index)) + return [(data[index[i:i + batch_size]], label[index[i:i + batch_size]]) for i in range(0, len(data), batch_size)] + + +def numpy_run(): + train_dataset, test_dataset = download_mnist() + + model = NumpyModel() + numpy_loss = NumpyLoss() + model.W1, model.W2, model.W3 = get_torch_initialization() + + train_loss = [] + + epoch_number = 3 + learning_rate = 0.1 + + for epoch in range(epoch_number): + for x, y in mini_batch(train_dataset): + y = one_hot(y) + + y_pred = model.forward(x) +# y_pred = model.forward(x.numpy()) + loss = numpy_loss.get_loss(y_pred, y) + + model.backward(numpy_loss.backward()) + model.optimize(learning_rate) + + train_loss.append(loss.item()) + + x, y = batch(test_dataset)[0] + accuracy = np.mean((model.forward(x).argmax(axis=1) == y)) + print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy)) + + plot_curve(train_loss) + + +if __name__ == "__main__": + numpy_run() \ No newline at end of file