diff --git a/assignment-2/submission/19307130062/README.md b/assignment-2/submission/19307130062/README.md
index 6615e86187728bd84f94ff46c3ef55d6595e1b00..79d593179f6bcd306650008b80a89cf352bc2ba0 100644
--- a/assignment-2/submission/19307130062/README.md
+++ b/assignment-2/submission/19307130062/README.md
@@ -520,4 +520,37 @@ def mini_batch(dataset, batch_size = 128, numpy = False):
 
 ### 权重初始化
 
-留坑待补...
\ No newline at end of file
+#### torch.nn.Linear 的初始化方法
+
+首先是原代码中 `torch.nn.Linear` 的初始化方法调查，这个可以去查一下文档，然后就可以发现
+
+<img src="img/Pytorch.png" alt="5" />
+
+可以看到，这里 $k$ 表示上一层神经元个数（in_features)  的倒数，而 old_features 即为当前层神经元个数，然后用 $\mathcal U(-\sqrt{k},\ \sqrt{k})$ 的均匀分布进行初始化，这和 He 初始化有一些相似，但是差了常数
+
+
+
+#### 其他初始化方法的探索
+
+测试了几种方法，有
+
+- 均匀分布采样的 Xavier 初始化
+- 均匀分布采样的 Hekaiming 初始化
+- 高斯分布采样的 Xavier 初始化
+- 高斯分布采样的 Hekaiming 初始化
+
+#### $\mathrm{epoch} = 3,\ \alpha = 0.1$
+
+<img src="img/6.3.png" alt="5" />
+
+| Epoch |   Torch   | Xavier_Uniform | HeKaiming_Uniform | Xavier_Normal | HeKaiming_Normal |
+| :---: | :-------: | :------------: | :---------------: | :-----------: | :--------------: |
+|  $0$  | $94.38\%$ |   $95.59\%$    |     $95.66\%$     |   $95.55\%$   |    $95.58\%$     |
+|  $1$  | $95.47\%$ |   $96.19\%$    |     $96.72\%$     |   $96.68\%$   |    $96.83\%$     |
+|  $2$  | $96.91\%$ |   $97.20\%$    |     $96.81\%$     |   $97.12\%$   |    $96.97\%$     |
+
+
+
+#### 总结
+
+- 可以看到 Xavier 和 He 初始化相对于 Pytorch 默认的方法来说还是有一定优势的，但实际测试中 He 初始化有时会不够稳定，且参数设定上存在一些困难
\ No newline at end of file
diff --git a/assignment-2/submission/19307130062/img/6.1.png b/assignment-2/submission/19307130062/img/6.1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ce20298bc07a084ce60460219d0ec97c6da753f
Binary files /dev/null and b/assignment-2/submission/19307130062/img/6.1.png differ
diff --git a/assignment-2/submission/19307130062/img/6.2.png b/assignment-2/submission/19307130062/img/6.2.png
new file mode 100644
index 0000000000000000000000000000000000000000..37fe7567104f9c5d6aff3c637cf351ab372f4721
Binary files /dev/null and b/assignment-2/submission/19307130062/img/6.2.png differ
diff --git a/assignment-2/submission/19307130062/img/6.3.png b/assignment-2/submission/19307130062/img/6.3.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca5379b9a083780667237d99a41eee14bc4479cb
Binary files /dev/null and b/assignment-2/submission/19307130062/img/6.3.png differ
diff --git a/assignment-2/submission/19307130062/img/Pytorch.png b/assignment-2/submission/19307130062/img/Pytorch.png
new file mode 100644
index 0000000000000000000000000000000000000000..bac6ce64671b021b2420d290d7cfa67bf6519298
Binary files /dev/null and b/assignment-2/submission/19307130062/img/Pytorch.png differ
diff --git a/assignment-2/submission/19307130062/numpy_fnn.py b/assignment-2/submission/19307130062/numpy_fnn.py
index c5c95457e32cddf3ea2cb9effb780331e632d2bd..0f05d097653cfde4eb20a9749462e20d5a190aed 100644
--- a/assignment-2/submission/19307130062/numpy_fnn.py
+++ b/assignment-2/submission/19307130062/numpy_fnn.py
@@ -66,7 +66,7 @@ class Log(NumpyOp):
         """
         
         # code3
-        grad_x = grad_y * (1.0 / self.memory['x'])
+        grad_x = grad_y * (1.0 / (self.memory['x'] + self.epsilon))
         
         return grad_x
 
diff --git a/assignment-2/submission/19307130062/numpy_mnist.py b/assignment-2/submission/19307130062/numpy_mnist.py
index 4e2bf7a7f624444ede1e6b317d46b0052f1ac675..d6cf155da24ea5279c4c71dfc25bfec6b4c39fcc 100644
--- a/assignment-2/submission/19307130062/numpy_mnist.py
+++ b/assignment-2/submission/19307130062/numpy_mnist.py
@@ -58,14 +58,67 @@ def numpy_run():
     
     plot_curve(train_loss)
     
+def get_torch_initialization(numpy = True, init_type = 'Torch'):
+    import torch 
+    if init_type == 'Torch': 
+        fc1 = torch.nn.Linear(28 * 28, 256)
+        fc2 = torch.nn.Linear(256, 64)
+        fc3 = torch.nn.Linear(64, 10)
+        
+        if numpy:
+            W1 = fc1.weight.T.detach().clone().numpy()
+            W2 = fc2.weight.T.detach().clone().numpy()
+            W3 = fc3.weight.T.detach().clone().numpy()
+        else:
+            W1 = fc1.weight.T.detach().clone().data
+            W2 = fc2.weight.T.detach().clone().data
+            W3 = fc3.weight.T.detach().clone().data
+            
+    elif init_type == 'Xavier_Uniform':
+        print('Xavier_Uniform')
+        r = np.sqrt(6.0 / (28 * 28 + 256))
+        W1 = np.random.uniform(-r, r, (28 * 28, 256))
+        r = np.sqrt(6.0 / (256 + 64))
+        W2 = np.random.uniform(-r, r, (256, 64))
+        r = np.sqrt(6.0 / (64 + 10))
+        W3 = np.random.uniform(-r, r, (64, 10))
+        
+    elif init_type == 'HeKaiming_Uniform':
+        print('HeKaiming_Uniform')
+        r = np.sqrt(6.0 / (28 * 28))
+        W1 = np.random.uniform(-r, r, (28 * 28, 256))
+        r = np.sqrt(6.0 / 256)
+        W2 = np.random.uniform(-r, r, (256, 64))
+        r = 4 * np.sqrt(6.0 / 64)
+        W3 = np.random.uniform(-r, r, (64, 10))
+        
+    elif init_type == 'Xavier_Normal':
+        print('Xavier_Normal')
+        sigma = 2.0 / (28 * 28 + 256)
+        W1 = np.random.normal(0., np.sqrt(sigma), (28 * 28, 256))
+        sigma = 2.0 / (256 + 64)
+        W2 = np.random.normal(0., np.sqrt(sigma), (256, 64))
+        sigma = 2.0 / (64 + 10)
+        W3 = np.random.normal(0., np.sqrt(sigma), (64, 10))
+        
+    elif init_type == 'HeKaiming_Normal':
+        print('HeKaiming_Normal')
+        sigma = 2.0 / (28 * 28)
+        W1 = np.random.normal(0., np.sqrt(sigma), (28 * 28, 256))
+        sigma = 2.0 / 256
+        W2 = np.random.normal(0., np.sqrt(sigma), (256, 64))
+        sigma = 16 * 2.0 / 64
+        W3 = np.random.normal(0., np.sqrt(sigma), (64, 10)) 
+        
+    return W1, W2, W3
 
-
-def my_numpy_run(learning_rate = 0.01, epoch_number = 3, update_type = None):
+def my_numpy_run(learning_rate = 0.1, epoch_number = 3, update_type = None, init_type = 'Torch'):
+    print('learning rate = ' + str(learning_rate))
     train_dataset, test_dataset = download_mnist()
     
     model = NumpyModel(learning_rate, update_type, iter_times = 1407)
     numpy_loss = NumpyLoss()
-    model.W1, model.W2, model.W3 = get_torch_initialization()
+    model.W1, model.W2, model.W3 = get_torch_initialization(init_type = init_type)
     
     train_loss = []
     
@@ -136,8 +189,47 @@ def multi_test():
         
     plt.savefig("5.4.png", format = 'png', dpi = 1000) 
     
+def multi_test_2():
+    from matplotlib import pyplot as plt
+    cases = [ 'HeKaiming_Normal',
+              'HeKaiming_Uniform',
+              'Torch',
+              'Xavier_Uniform', 
+              'Xavier_Normal', 
+                    ]
+
+    colors = ['#1f77b4',
+              '#ff7f0e',
+              '#2ca02c',
+              '#d62728',
+              '#9467bd',
+              '#8c564b',
+              '#e377c2',
+              '#7f7f7f',
+              '#bcbd22',
+              '#17becf',
+              '#1a55FF']
+
+    # Configure rcParams axes.prop_cycle to simultaneously cycle cases and colors.
+    # mpl.rcParams['axes.prop_cycle'] = cycler(markevery=cases, color=colors) 
+    # Set the plot curve with markers and a title
+    plt.rcParams['figure.figsize'] = (10.0, 4.0) # 设置figure_size尺寸
+    fig = plt.figure()
+    ax = fig.add_axes([0.1, 0.1, 0.6, 0.75])
+    plt.xlabel('step')
+    plt.ylabel('loss value')
+    for i in range(len(cases)):
+        print('Test ' + str(cases[i]) + ' :')
+        data = my_numpy_run(init_type = cases[i])
+        print('-------------\n')
+        ax.plot(range(len(data)), data, linewidth = 0.5, label = str(cases[i]))
+        ax.legend(bbox_to_anchor = (1.05, 1), loc = 'upper left', borderaxespad = 0.)
+        
+    plt.savefig("5.4.png", format = 'png', dpi = 1000) 
+    
     
 if __name__ == "__main__":
     numpy_run()
     # my_numpy_run(learning_rate = 0.05, update_type = 'RMSprop')
     # multi_test()
+    # multi_test_2()