diff --git a/assignment-2/submission/17307110367/.keep b/assignment-2/submission/17307110367/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/submission/17307110367/README.md b/assignment-2/submission/17307110367/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9b99779240f17d85b2217a7a0600698deb047d32 --- /dev/null +++ b/assignment-2/submission/17307110367/README.md @@ -0,0 +1,186 @@ +# 课程报告 + +## 模型的训练与测试 + +直接运行numpy_minist.py,设定batch_size =128, learning_rate = 0.1, epoch_number = 3得到如下的结果: + +| 轮次 | Accuracy | +| ---- | -------- | +| [0] | 0.9474 | +| [1] | 0.9654 | +| [2] | 0.9704 | + +得到的损失函数的变化如下图所示: + +loss + +**减小batch_size的值为 64**,保持其他的参数不变进行实验,得到结果如下: + +| 轮次 | Accuracy | +| ---- | -------- | +| [0] | 0.9548 | +| [1] | 0.9682 | +| [2] | 0.9679 | + +可见当batch_size 减小时,模型准确率在每一轮中进步的幅度会变小。 + + + +## mini_batch 的替换 + +``` +def mini_batch(dataset, batch_size=128,seed=0): + np.random.seed(seed) + x_train = dataset.train_data + y_train = dataset.train_labels + m = y_train.shape[0] + def mini_batch(dataset, batch_size=128,seed=0): + np.random.seed(seed) + x_train = dataset.train_data + y_train = dataset.train_labels + m = y_train.shape[0] + # m为所有样本的数量 + mini_batchs =[] + permutation = list(np.random.permutation(m)) + # 打乱样本顺序 + shuffle_X = x_train[permutation, :, :] + shuffle_Y = y_train[permutation] + num_mini_batch = int(m//batch_size) + # num_mini_batch为mini_batch的块数 + for i in range(num_mini_batch): + mini_batch_x = shuffle_X[i*batch_size:(i+1)*batch_size, :, :] + mini_batch_y = shuffle_Y[i*batch_size:(i+1)*batch_size] + mini_batch = (mini_batch_x, mini_batch_y) + mini_batchs.append(mini_batch) + if m % batch_size != 0: + # 如果样本数不能被整除,取余下的部分 + mini_batch_X = shuffle_X[num_mini_batch * batch_size:m, :, :] + mini_batch_Y = shuffle_Y[num_mini_batch * batch_size:m] + mini_batch = (mini_batch_X, mini_batch_Y) + mini_batchs.append(mini_batch) + return mini_batchs + mini_batchs =[] + permutation = list(np.random.permutation(m)) + shuffle_X = x_train[permutation, :, :] + shuffle_Y = y_train[permutation] + num_mini_batch = int(m//batch_size) + for i in range(num_mini_batch): + mini_batch_x = shuffle_X[i*batch_size:(i+1)*batch_size, :, :] + mini_batch_y = shuffle_Y[i*batch_size:(i+1)*batch_size] + mini_batch = (mini_batch_x, mini_batch_y) + mini_batchs.append(mini_batch) + if m % batch_size != 0: + # 如果样本数不能被整除,取余下的部分 + mini_batch_X = shuffle_X[num_mini_batch * batch_size:m, :, :] + mini_batch_Y = shuffle_Y[num_mini_batch * batch_size:m] + mini_batch = (mini_batch_X, mini_batch_Y) + mini_batchs.append(mini_batch) + return mini_batchs +``` + +整体思路是打乱样本的顺序,然后依次取出batch_size个样本的数据,作为一个mini_batch组。最后mini_batch函数返回所有的mini_batch组的集合 + +用numpy实现mini_batch之后,运行numpy_minist.py, 设定batch_size =128, learning_rate = 0.1, epoch_number = 3, 得到的实验结果如下: + +| 轮次 | Accuracy | +| ---- | -------- | +| [0] | 0.8780 | +| [1] | 0.8994 | +| [2] | 0.9099 | + + + +## 反向传播公式的推导 + +**Matmul** + +Matmul的计算式为 +$$ +Y = X*W +$$ +根据矩阵的求导法则,可以推出 +$$ +\frac{\partial Y}{\partial X} = W^{T} +$$ + +$$ +\frac{\partial Y}{\partial W} = X^{T} +$$ + + 对应代码段如下: + +``` +grad_W = np.matmul(self.memory['x'].T, grad_y) +grad_x = np.matmul(grad_y, self.memory['W'].T) +``` + + + +**Relu** + +Relu的计算公式为 +$$ +Y=\begin{cases} +X&X\ge0\\\\ +0&\text{otherwise} +\end{cases} +$$ +所以有 +$$ +\frac{\partial Y}{\partial X}=\begin{cases} +1&X>0\\\\ +0&\text{otherwise} +\end{cases} +$$ +对应代码块如下: + +``` +grad_x = np.where(self.memory['x'] > 0, grad_y, np.zeros_like(self.memory['x'])) +``` + +意思是说若x大于0, grad_x的结果就是传入的grad_y。否则结果为0 + + + +**Log** + +log的计算公式为 +$$ +Y=\ln(X+\epsilon) +$$ +因此有 +$$ +\frac{\partial Y}{\partial X} = \frac1{X+\epsilon} +$$ +对应的代码块如下: + +``` +grad_x = np.multiply(1./(self.memory['x'] + self.epsilon), grad_y) +``` + + + +**Softmax** + +Softmax的计算公式为 +$$ +Y=\frac{\exp\{X\}}{\sum_{k=1}^c\exp\{X\}} +$$ +根据邱老师书第411~412页对该公式的导数的推导(在这里直接引用结果),有 + + +$$ +\frac{\partial Y}{\partial X} = diag(softmax(x))-softmax(x)softmax(x)^{T} +$$ +对应的代码块如下 + +``` +out = self.memory['out'] +grad_x = [] +for idx in range(out.shape[0]): + dout = np.diag(out[idx]) - np.outer(out[idx], out[idx]) + grad = np.matmul(dout, grad_y[idx]) + grad_x.append(grad) +grad_x = np.array(grad_x) +``` + diff --git a/assignment-2/submission/17307110367/img/.keep b/assignment-2/submission/17307110367/img/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/submission/17307110367/img/loss_value1.png b/assignment-2/submission/17307110367/img/loss_value1.png new file mode 100644 index 0000000000000000000000000000000000000000..dbb3c56e9d1036c37ea3aa3258a7ae0acd9eb79b Binary files /dev/null and b/assignment-2/submission/17307110367/img/loss_value1.png differ diff --git a/assignment-2/submission/17307110367/numpy_fnn.py b/assignment-2/submission/17307110367/numpy_fnn.py new file mode 100644 index 0000000000000000000000000000000000000000..ea2d68f88793d261f1ead5fa836c421f4b3ed140 --- /dev/null +++ b/assignment-2/submission/17307110367/numpy_fnn.py @@ -0,0 +1,173 @@ +import numpy as np + + +class NumpyOp: + + def __init__(self): + self.memory = {} + self.epsilon = 1e-12 + + +class Matmul(NumpyOp): + + def forward(self, x, W): + """ + x: shape(N, d) + w: shape(d, d') + """ + self.memory['x'] = x + self.memory['W'] = W + h = np.matmul(x, W) + return h + + def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + grad_W = np.matmul(self.memory['x'].T, grad_y) + grad_x = np.matmul(grad_y, self.memory['W'].T) + + return grad_x, grad_W + + +class Relu(NumpyOp): + + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + grad_x = np.where(self.memory['x'] > 0, grad_y, np.zeros_like(self.memory['x'])) + + return grad_x + + +class Log(NumpyOp): + + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + grad_x = np.multiply(1./(self.memory['x'] + self.epsilon), grad_y) + + return grad_x + + +class Softmax(NumpyOp): + """ + softmax over last dimension + """ + + def forward(self, x): + """ + x: shape(N, c) + """ + out = [] + for index in range(x.shape[0]): + temp = x[index] + temp = temp - max(temp) + temp = np.exp(temp) + out.append(temp/sum(temp)) + out = np.array(out) + self.memory['out'] = out + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + out = self.memory['out'] + grad_x = [] + for idx in range(out.shape[0]): + dout = np.diag(out[idx]) - np.outer(out[idx], out[idx]) + grad = np.matmul(dout, grad_y[idx]) + grad_x.append(grad) + grad_x = np.array(grad_x) + + return grad_x + + +class NumpyLoss: + + def __init__(self): + self.target = None + + def get_loss(self, pred, target): + self.target = target + return (-pred * target).sum(axis=1).mean() + + def backward(self): + return -self.target / self.target.shape[0] + + +class NumpyModel: + def __init__(self): + self.W1 = np.random.normal(size=(28 * 28, 256)) + self.W2 = np.random.normal(size=(256, 64)) + self.W3 = np.random.normal(size=(64, 10)) + + # 以下算子会在 forward 和 backward 中使用 + self.matmul_1 = Matmul() + self.relu_1 = Relu() + self.matmul_2 = Matmul() + self.relu_2 = Relu() + self.matmul_3 = Matmul() + self.softmax = Softmax() + self.log = Log() + + # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导) + self.x1_grad, self.W1_grad = None, None + self.relu_1_grad = None + self.x2_grad, self.W2_grad = None, None + self.relu_2_grad = None + self.x3_grad, self.W3_grad = None, None + self.softmax_grad = None + self.log_grad = None + + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + x = x.reshape(-1, 28 * 28) + x = self.relu_1.forward(self.matmul_1.forward(x, self.W1)) + x = self.relu_2.forward(self.matmul_2.forward(x, self.W2)) + x = self.matmul_3.forward(x, self.W3) + x = self.softmax.forward(x) + x = self.log.forward(x) + return x + + def backward(self, y): + self.log_grad = self.log.backward(y) + grad_y = self.log_grad + self.softmax_grad = self.softmax.backward(grad_y) + grad_y = self.softmax_grad + self.x3_grad, self.W3_grad = self.matmul_3.backward(grad_y) + grad_y = self.x3_grad + self.relu_2_grad =self.relu_2.backward(grad_y) + grad_y = self.relu_2_grad + self.x2_grad, self.W2_grad =self.matmul_2.backward(grad_y) + grad_y = self.x2_grad + self.relu_1_grad = self.relu_1.backward(grad_y) + grad_y = self.relu_1_grad + self.x1_grad, self.W1_grad = self.matmul_1.backward(grad_y) + pass + + def optimize(self, learning_rate): + self.W1 -= learning_rate * self.W1_grad + self.W2 -= learning_rate * self.W2_grad + self.W3 -= learning_rate * self.W3_grad diff --git a/assignment-2/submission/17307110367/numpy_mnist.py b/assignment-2/submission/17307110367/numpy_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..daea871d110daf90996073d606bef96da6d2f827 --- /dev/null +++ b/assignment-2/submission/17307110367/numpy_mnist.py @@ -0,0 +1,62 @@ +import numpy as np +from numpy_fnn import NumpyModel, NumpyLoss +from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot + +def mini_batch(dataset, batch_size=128,seed=0): + np.random.seed(seed) + x_train = dataset.train_data + y_train = dataset.train_labels + m = y_train.shape[0] + # m为所有样本的数量 + mini_batchs =[] + permutation = list(np.random.permutation(m)) + # 打乱样本顺序 + shuffle_X = x_train[permutation, :, :] + shuffle_Y = y_train[permutation] + num_mini_batch = int(m//batch_size) + # num_mini_batch为mini_batch的块数 + for i in range(num_mini_batch): + mini_batch_x = shuffle_X[i*batch_size:(i+1)*batch_size, :, :] + mini_batch_y = shuffle_Y[i*batch_size:(i+1)*batch_size] + mini_batch = (mini_batch_x, mini_batch_y) + mini_batchs.append(mini_batch) + if m % batch_size != 0: + # 如果样本数不能被整除,取余下的部分 + mini_batch_X = shuffle_X[num_mini_batch * batch_size:m, :, :] + mini_batch_Y = shuffle_Y[num_mini_batch * batch_size:m] + mini_batch = (mini_batch_X, mini_batch_Y) + mini_batchs.append(mini_batch) + return mini_batchs + +def numpy_run(): + train_dataset, test_dataset = download_mnist() + model = NumpyModel() + numpy_loss = NumpyLoss() + model.W1, model.W2, model.W3 = get_torch_initialization() + + train_loss = [] + + epoch_number = 3 + learning_rate = 0.1 + + for epoch in range(epoch_number): + for x, y in mini_batch(train_dataset): + y = one_hot(y) + y_pred = model.forward(x.numpy()/255) + #y_pred = model.forward(x.numpy()) + loss = numpy_loss.get_loss(y_pred, y) + + model.backward(numpy_loss.backward()) + model.optimize(learning_rate) + + train_loss.append(loss.item()) + + x, y = batch(test_dataset)[0] + accuracy = np.mean((model.forward(x).argmax(axis=1) == y)) + print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy)) + + plot_curve(train_loss) + + +if __name__ == "__main__": + numpy_run()