diff --git a/assignment-1/submission/18340246016/README.md b/assignment-1/submission/18340246016/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c998960bf2dcbd812771d18b990d086ef231b740
--- /dev/null
+++ b/assignment-1/submission/18340246016/README.md
@@ -0,0 +1,485 @@
+ **1. KNN实现过程**
+ 
+*1.1 辅助方法的实现*
+
+```
+# 这里我们用train_test_split实现训练集与验证集以给定的比例划分与打乱
+def train_test_split(self,x,y,rate):
+        shuffled_indexes = np.random.permutation(len(x))
+        test_size = int(len(x) * rate)
+        train_index = shuffled_indexes[test_size:]
+        test_index = shuffled_indexes[:test_size]
+        return x[train_index], x[test_index], y[train_index], y[test_index]
+```
+
+
+```
+# 用distance方法计算两组向量之间的欧式距离
+def distance(self,v1,v2):
+    
+        weight_array = (v1-v2)**2
+        weight_array_sum = np.sum(weight_array)
+        return weight_array_sum**(0.5)
+```
+
+
+
+*1.2 fit方法的实现*
+
+
+```
+# 我们用fit方法实现 1. 对训练数据的归一化 2. 训练数据内部subdivide为训练集与测试集，取最优K值 3. 将训练数据赋值self.data
+def fit(self, train_data, train_label):
+        
+        # 归一化
+        mu = np.mean(train_data, axis=0)
+        sigma = np.std(train_data, axis=0)
+        train_data = (train_data - mu) / sigma
+        
+        # 划分训练集/验证集 with rate =0.3
+        X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3)
+        
+        # 对于不同的K[1-0.5*len(train)]，计算验证集到训练集的欧氏距离
+        best_k=0
+        k_candi=0;
+        for k in range(1,int(0.5*len(X_train))+1):
+
+            true_couter=0
+            for test_counter in range(0,len(X_test)):
+                pos_vec_list=[]
+                
+                for train_counter in range(0,len(X_train)):
+                    pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]])
+                    pos_vec_list.append(pos_vec)     
+                    
+                #对距离list根据距离排序
+                pos_vec_list = np.array(pos_vec_list)
+                pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+                #k-近邻结果列表
+                result_list = pos_vec_list_sorted[:k][:,1]
+            
+
+            
+                #test预测结果
+                label = int(result_list[np.argmax(result_list)])
+            
+            
+                #检验本次test在给定k下是否正确
+                if (label == Y_test[test_counter] ):
+                    true_couter=true_couter+1
+            
+
+            #最优K取值
+            if (true_couter >= best_k):
+                best_k = true_couter
+                k_candi = k
+                
+        self.k = k_candi
+        self.train_data = train_data
+        self.train_labels = train_label
+        return self.k
+```
+
+
+*1.3 predict方法的实现*
+
+```
+    # 根据fit方法带入的数据data以及训练结果K，预测test_label
+    def predict(self, test_data):
+        test_label=[]
+        result_list=[]
+        
+        # 归一化原始训练数据
+        mu = np.mean(test_data, axis=0)
+        sigma = np.std(test_data, axis=0)
+        test_data = (test_data - mu) / sigma
+        
+        for i in range (0,len(test_data)):
+            pos_vec_list=[]    
+            for m in range(0,len(self.train_data)):
+                pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]])
+                pos_vec_list.append(pos_vec)
+            
+            
+            # KNN结果列表
+            pos_vec_list = np.array(pos_vec_list)
+            pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+
+            result_list = pos_vec_list_sorted[:(self.k)][:,1]
+            test_label.append(result_list[np.argmax(result_list)])
+            
+        return test_label
+```
+
+
+
+ **2. 高斯分布探究试验** 
+
+*2.0 生成以及展示函数*
+
+
+```
+def generate (amount_1,amount_2,amount_3):
+    
+    mean = (2, 2)
+    cov = np.array([[1,0], [0, 1]])
+    x = np.random.multivariate_normal(mean, cov, (amount_1,))  
+    
+    mean = (4, 6)
+    cov = np.array([[2, 0], [0, 2]])
+    y = np.random.multivariate_normal(mean, cov, (amount_2,))  
+
+    mean = (10, 10)
+    cov = np.array([[2,1],[1,3]])
+    z = np.random.multivariate_normal(mean, cov, (amount_3,))  
+    
+    
+    data = np.concatenate([x,y,z])
+
+    label = np.concatenate([
+            np.zeros((amount_1,),dtype=int),
+            np.ones((amount_2,),dtype=int),
+            np.ones((amount_3,),dtype=int)*2
+        ])
+
+    return model.train_test_split(data,label,0.2)
+
+```
+
+
+```
+# 展示函数
+def display(x,y):
+    type1_x = []; type1_y = []
+    type2_x = []; type2_y = []
+    type3_x = []; type3_y = []
+
+    plt.figure(figsize=(8,6))
+
+    for i in range(0,len(x)):
+        if(y[i]==0):
+            type1_x.append(x[i][0])
+            type1_y.append(x[i][1])
+        if(y[i]==1):
+            type2_x.append(x[i][0])
+            type2_y.append(x[i][1])
+        if(y[i]==2):
+            type3_x.append(x[i][0])
+            type3_y.append(x[i][1])
+        
+    fig = plt.figure(figsize = (10, 6))
+    ax = fig.add_subplot(111)
+
+    type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown')
+    type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime')
+    type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet")
+
+
+
+    ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0)
+
+    plt.show()
+
+```
+
+
+
+***2.1 均值集中 + xy分布分散 = 对应的k***
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+4 & 0 \\\\
+0 & 2
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 4
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+8 & 8
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+8 & 1 \\\\
+1 & 6
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+12 & 12
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111859_687c068b_8823823.png "屏幕截图.png")
+
+*测试集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111906_d0e2e134_8823823.png "屏幕截图.png")
+
+*K与准确率结果*
+| k                | 准确率  |
+| ---------------- | ------ |
+| 4 | 62.5% | 
+
+***2.2 mean集中 + xy集中 = 对应的k***
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+1 & 0 \\\\
+0 & 1
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 1
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+8 & 8
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 1 \\\\
+1 & 1
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+12 & 12
+\end{array}\right]
+\end{array}
+$$
+
+*训练集分布*
+
+![训练集分布](https://images.gitee.com/uploads/images/2021/0401/105433_e7ec4619_8823823.png "屏幕截图.png")
+
+*测试集分布*
+
+![测试集分布](https://images.gitee.com/uploads/images/2021/0401/105459_56af4a90_8823823.png "屏幕截图.png")
+
+*K与准确率结果*
+| k                | 准确率  |
+| ---------------- | ------ |
+| 1 | 78.75% | 
+
+
+***2.3 mean分散 + xy分散 = 对应的k***
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+4 & 0 \\\\
+0 & 2
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+2 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 4
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+4 & 6
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+8 & 1 \\\\
+1 & 6
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112426_09535d36_8823823.png "屏幕截图.png")
+
+*测试集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112437_53a32eec_8823823.png "屏幕截图.png")
+
+*K与准确率结果*
+
+
+| k                | 准确率  |
+| ---------------- | ------ |
+| 2 | 86.25% | 
+
+
+***2.4 mean分散 + xy集中 = 对应的k***
+
+
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+1 & 0 \\\\
+0 & 1
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+2 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 2
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+4 & 6
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 1 \\\\
+1 & 3
+\end{array}\right] 
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112759_754208e4_8823823.png "屏幕截图.png")
+
+*测试集分布*
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112810_ca9d4230_8823823.png "屏幕截图.png")
+
+*K与准确率结果*
+
+
+| k                | 准确率  |
+| ---------------- | ------ |
+| 1 | 95.625% | 
+
+
+***2.5 有关高斯分布的结论***
+1. 与直观体会到的类似，模型准确率与高斯分布均值离散程度正相关，与xy方差负相关
+2. 模型在均值分散，方差集中的情况下表现最好，在均值集中，方差分散的情况下表现最差
+3. 最佳K的取值与准确率无直接联系，准确性更多取决于分布情况
+
+
+
+**3. K值 ** 
+
+***3.1 不同量下的best_k***
+
+（这里以2.4高斯分布为例）
+
+```
+# 准备数据集维度
+amount_list = [[10,10,10],[50,50,50],[100,100,100],[150,50,200],[200,200,200],[250,300,400]]
+k_list=[]
+aug_amount = [30,150,300,400,600,950]
+
+# 在不同数据集量下训练最佳k值
+for i in range (0,len(amount_list)):
+    model = KNN()
+    X_train, X_test, Y_train, Y_test = generate(amount_list[i][0],amount_list[i][1],amount_list[i][2])
+    k_list.append(model.fit(X_train, Y_train))
+
+# 画图
+l1=plt.plot(aug_amount,k_list,'r--',label='Best-K-Value')
+plt.title('The relationship between # elements and |K|')
+plt.xlabel('Total # of elements')
+plt.ylabel('K-value')
+plt.legend()
+
+```
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/120033_088f8aa7_8823823.png "屏幕截图.png")
+
+
+1. K取值与元素数量无关
+2. 最佳K会趋向于一个较小的值，所以在fit时考虑遍历[1,10]，无需遍历更大的K（导致额外无用运算）
+
+
+**3.2 不同K下的acc**
+
+
+```
+#  因为我实现的KNN函数没有K参数（学习最优K），这里采用SKlearn数据库中的KNeighborsClassifier来做本次探究
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+
+# 导入sklearn iris数据库并分类
+iris = load_iris()  
+X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.25, random_state = 33)
+ 
+# 在不同k值下计算accuracy
+acc_list=[]
+
+for i in range (1,100):
+    
+
+    knc = KNeighborsClassifier(n_neighbors=i)
+    knc.fit(X_train, y_train)
+    y_predict = knc.predict(X_test)
+ 
+    acc_list.append(knc.score(X_test, y_test))
+
+# 画图
+l1=plt.plot(range(1,100),acc_list,'green',label='Accuracy')
+plt.title('The relationship between K-choice and Accuracy')
+plt.xlabel('K-Value')
+plt.ylabel('Accuracy')
+plt.legend()
+
+```
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/122104_30809a7a_8823823.png "屏幕截图.png")
+
+1. 同3.1结论[2]，最佳K的取值会聚焦于一个较小的值
+2. iris数据集大小为150，可以看到超过0.5*len 之后准确率锁定在50%（几乎约等于瞎猜）
diff --git a/assignment-1/submission/18340246016/img/.keep b/assignment-1/submission/18340246016/img/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..2b3ebe23a4c696d374084525296b704bc1e94461
Binary files /dev/null and b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ecab29162d04ce0e20d91129786ace0133752dd
Binary files /dev/null and b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..38182b7678184b3e73071c3a0b77496698405b6c
Binary files /dev/null and b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..b5fd60ce1e7218c3359d9739ae304ead9ee6e16b
Binary files /dev/null and b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..a1db91fe434924aafbc734b235647d73fb4229dd
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..7731028adc12f28c461efc7240b6e5fa95c12702
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..913e3801971de18d4f609181dad97957b03a142b
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..34ea7339a55877d026ae45bc7d32ee349f4b6533
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..48293c8c963882364c7642e833597be03e206027
Binary files /dev/null and b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..0343da404b4c87d03aebfde3016b60e6b55cb836
Binary files /dev/null and b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png differ
diff --git a/assignment-1/submission/18340246016/source.py b/assignment-1/submission/18340246016/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..c43b0cc36b1573f5a16c096738d5547454904ca6
--- /dev/null
+++ b/assignment-1/submission/18340246016/source.py
@@ -0,0 +1,154 @@
+class KNN:
+
+    def __init__(self):
+        self.train_data = None
+        self.train_labels = None
+        self.k = None
+        
+    def train_test_split(self,x,y,rate):
+        shuffled_indexes = np.random.permutation(len(x))
+        test_size = int(len(x) * rate)
+        train_index = shuffled_indexes[test_size:]
+        test_index = shuffled_indexes[:test_size]
+        return x[train_index], x[test_index], y[train_index], y[test_index]
+
+    def distance(self,v1,v2):
+    
+        weight_array = (v1-v2)**2
+        weight_array_sum = np.sum(weight_array)
+        return weight_array_sum**(0.5)
+
+    def fit(self, train_data, train_label):
+        
+        # 归一化
+        mu = np.mean(train_data, axis=0)
+        sigma = np.std(train_data, axis=0)
+        train_data = (train_data - mu) / sigma
+        
+        # 划分训练集/验证集 with rate =0.3
+        X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3)
+        
+        # 对于不同的K[1-20]，计算验证集到训练集的欧氏距离
+        best_k=0
+        k_candi=0;
+        for k in range(1,20):
+
+            true_couter=0
+            for test_counter in range(0,len(X_test)):
+                pos_vec_list=[]
+                
+                for train_counter in range(0,len(X_train)):
+                    pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]])
+                    pos_vec_list.append(pos_vec)     
+                    
+                #对距离list根据距离排序
+                pos_vec_list = np.array(pos_vec_list)
+                pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+                #k-近邻结果列表
+                result_list = pos_vec_list_sorted[:k][:,1]
+            
+
+            
+                #test预测结果
+                label = int(result_list[np.argmax(result_list)])
+            
+            
+                #检验本次test在给定k下是否正确
+                if (label == Y_test[test_counter] ):
+                    true_couter=true_couter+1
+            
+
+            #最优K取值
+            if (true_couter >= best_k):
+                best_k = true_couter
+                k_candi = k
+                
+        # print(k_candi)
+        self.k = k_candi
+        self.train_data = train_data
+        self.train_labels = train_label
+        return self.k
+
+    def predict(self, test_data):
+        test_label=[]
+        result_list=[]
+        
+        # 归一化
+        mu = np.mean(test_data, axis=0)
+        sigma = np.std(test_data, axis=0)
+        test_data = (test_data - mu) / sigma
+        #test_data = test_data / np.sqrt(np.sum(test_data**2))
+        
+        for i in range (0,len(test_data)):
+            pos_vec_list=[]    
+            for m in range(0,len(self.train_data)):
+                pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]])
+                pos_vec_list.append(pos_vec)
+            
+            
+            
+            pos_vec_list = np.array(pos_vec_list)
+            pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+
+            result_list = pos_vec_list_sorted[:(self.k)][:,1]
+            test_label.append(result_list[np.argmax(result_list)])
+            
+        return test_label
+    
+def generate (amount_1,amount_2,amount_3):
+    
+    
+    mean = (2, 2)
+    cov = np.array([[1,0], [0, 1]])
+    x = np.random.multivariate_normal(mean, cov, (amount_1,))  
+    
+    mean = (4, 6)
+    cov = np.array([[2, 0], [0, 2]])
+    y = np.random.multivariate_normal(mean, cov, (amount_2,))  
+
+    mean = (10, 10)
+    cov = np.array([[2,1],[1,3]])
+    z = np.random.multivariate_normal(mean, cov, (amount_3,))  
+    
+    
+    data = np.concatenate([x,y,z])
+
+    label = np.concatenate([
+            np.zeros((amount_1,),dtype=int),
+            np.ones((amount_2,),dtype=int),
+            np.ones((amount_3,),dtype=int)*2
+        ])
+
+    return model.train_test_split(data,label,0.2)
+
+
+def display(x,y):
+    type1_x = []; type1_y = []
+    type2_x = []; type2_y = []
+    type3_x = []; type3_y = []
+
+    plt.figure(figsize=(8,6))
+
+    for i in range(0,len(x)):
+        if(y[i]==0):
+            type1_x.append(x[i][0])
+            type1_y.append(x[i][1])
+        if(y[i]==1):
+            type2_x.append(x[i][0])
+            type2_y.append(x[i][1])
+        if(y[i]==2):
+            type3_x.append(x[i][0])
+            type3_y.append(x[i][1])
+        
+    fig = plt.figure(figsize = (10, 6))
+    ax = fig.add_subplot(111)
+
+    type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown')
+    type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime')
+    type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet")
+
+
+
+    ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0)
+
+    plt.show()
diff --git a/assignment-2/.keep b/assignment-2/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/assignment-2/README.md b/assignment-2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..827aa9f21f075ba56cb82e13340d2f480679e478
--- /dev/null
+++ b/assignment-2/README.md
@@ -0,0 +1,183 @@
+**1. Mini-batch 函数的实现** 
+
+```
+def mini_batch(dataset, batch_size=128, numpy=False):
+
+# 仿照batch函数分离data与label 
+    data = []
+    label = []
+    for each in dataset:
+        data.append(np.array(each[0]))
+        label.append(each[1])
+
+    data = np.array(data)
+    label = np.array(label)
+
+
+    res = []
+
+# 以batch_size为单位，从[0,len(data)]分割mini-batch
+    for start_idx in range(0, data.shape[0], batch_size):
+        end_idx = min(start_idx + batch_size, len(data))
+        res.append((data[start_idx:end_idx],label[start_idx:end_idx]))
+
+    return res
+```
+
+ **2. 模型的训练和测试** 
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/113123_f204953a_8823823.png "processon.png")
+
+```
+    # 前向传播过程
+    def forward(self, x):
+        x = x.reshape(-1, 28 * 28)
+        
+        ####################
+        #      code 6      #
+
+        #x = torch.relu(torch.matmul(x, self.W1))
+        # 模仿torch的relu(Matmul.(x,w1))操作，下同
+        x = self.relu_1.forward(self.matmul_1.forward(x,self.W1))
+        
+        #x = torch.relu(torch.matmul(x, self.W2))
+        x = self.relu_2.forward(self.matmul_2.forward(x,self.W2))
+        
+        #x = torch.matmul(x, self.W3)
+        #x = torch.softmax(x, 1)
+        x =  self.softmax.forward(self.matmul_3.forward(x,self.W3))
+        
+        #x = torch.log(x)
+        x = self.log.forward(x)
+
+        ####################
+        
+        return x
+    
+    # 反向传播过程
+    def backward(self, y):
+        
+        ####################
+        #      code 7      #
+        
+        self.log_grad = self.log.backward(y)
+        
+        self.softmax_grad = self.softmax.backward(self.log_grad)
+        
+        self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad)
+        
+        self.relu_2_grad = self.relu_2.backward(self.x3_grad)
+        
+        self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad)
+        
+        self.relu_1_grad = self.relu_1.backward(self.x2_grad)
+        
+        self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad)
+        
+        
+        
+        ####################
+        
+        pass
+```
+
+
+
+
+
+| Epoch                | Batch_size  |
+| ---------------- | ------ |
+| 3 | 16 | 
+| 10 | 128 |
+
+
+* Epoch = 3 Batch_size=128
+
+[0] Accuracy: 0.9373
+<br>[1] Accuracy: 0.9583
+<br>[2] Accuracy: 0.9683
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/102618_2f8a1661_8823823.png "3-128.png")
+
+
+
+* Epoch = 3 Batch_size=16
+
+[0] Accuracy: 0.9640
+<br>[1] Accuracy: 0.9676
+<br>[2] Accuracy: 0.9707
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/103502_131ca59f_8823823.png "3-16.png")
+
+* Epoch = 10 Batch_size=16
+
+[0] Accuracy: 0.9602
+<br>[1] Accuracy: 0.9657
+<br>[2] Accuracy: 0.9741
+<br>[3] Accuracy: 0.9747
+<br>[4] Accuracy: 0.9701
+<br>[5] Accuracy: 0.9731
+<br>[6] Accuracy: 0.9760
+<br>[7] Accuracy: 0.9763
+<br>[8] Accuracy: 0.9760
+<br>[9] Accuracy: 0.9780
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/105531_82cafce0_8823823.png "10-16.png")
+
+
+* Epoch = 10 Batch_size=128
+
+[0] Accuracy: 0.9389
+<br>[1] Accuracy: 0.9585
+<br>[2] Accuracy: 0.9679
+<br>[3] Accuracy: 0.9706
+<br>[4] Accuracy: 0.9746
+<br>[5] Accuracy: 0.9760
+<br>[6] Accuracy: 0.9769
+<br>[7] Accuracy: 0.9777
+<br>[8] Accuracy: 0.9781
+<br>[9] Accuracy: 0.9781
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/111052_ddb4ecda_8823823.png "10-128.png")
+
+
+
+*探究发现*
+1. mini-batch size太小的情况下，模型的波动率非常高，并不是对batch_size越细分模型的准确率就越高
+2. 从epoch的数量上来看，epoch的数量与模型accuracy有正相关性
+
+
+ **3.Momentum 优化函数下的对比试验** 
+
+
+```
+    # 利用momentum优化下的optimize函数
+    def optimize(self, learning_rate):
+        
+        # 初始化V值
+        v1=0
+        v2=0
+        v3=0
+        mu=0.9
+        
+        # momentum下的更新算法
+        v1 = mu * v1 - learning_rate * self.W1_grad
+        self.W1 = self.W1 + v1
+        
+        v2 = mu * v2 - learning_rate * self.W2_grad
+        self.W2 = self.W2 + v2
+        
+        v3 = mu * v3 - learning_rate * self.W3_grad
+        self.W3 = self.W3 + v3
+        
+        
+        #self.W1 -= learning_rate * self.W1_grad
+       # self.W2 -= learning_rate * self.W2_grad
+        #self.W3 -= learning_rate * self.W3_grad
+```
+
+![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/115301_1fa39f2b_8823823.png "屏幕截图.png")
+
+1. 在我们的测试数据下，momentum与梯度下降模型准确率差异不大
+2. momentum模型主要用来解决全局最优与局部最优之间的差异，但在本测试集下不存在全局与局部的差异
+3. 理论上来说，当 momentum 动量越大时，其转换为势能的能量也就越大，就越有可能摆脱局部凹域的束缚，进入全局凹域
\ No newline at end of file
diff --git a/assignment-2/numpy_fnn.py b/assignment-2/numpy_fnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f48bcd98d7ecd450bb8cbeb2735f2919a109668
--- /dev/null
+++ b/assignment-2/numpy_fnn.py
@@ -0,0 +1,249 @@
+import numpy as np
+
+
+class NumpyOp:
+    
+    def __init__(self):
+        self.memory = {}
+        self.epsilon = 1e-12
+
+
+class Matmul(NumpyOp):
+    
+    def forward(self, x, W):
+        """
+        x: shape(N, d)
+        w: shape(d, d')
+        """
+        self.memory['x'] = x
+        self.memory['W'] = W
+        h = np.matmul(x, W)
+        return h
+    
+    def backward(self, grad_y):
+        """
+        grad_y: shape(N, d')
+        """
+        
+        ####################
+        #      code 1      #
+        x = self.memory['x']
+        W = self.memory['W']
+
+        grad_W = np.matmul(x.T,grad_y)
+        grad_x = np.matmul(grad_y,W.T)
+        ####################
+        
+        return grad_x, grad_W
+
+
+class Relu(NumpyOp):
+    
+    def forward(self, x):
+        self.memory['x'] = x
+        return np.where(x > 0, x, np.zeros_like(x))
+    
+    def backward(self, grad_y):
+        """
+        grad_y: same shape as x
+        """
+        
+        ####################
+        #      code 2      #
+        x = self.memory['x']
+        
+        grad_x = np.where(x > 0, 1, 0)
+
+        grad_x = grad_x * grad_y
+        
+        ####################
+        
+        return grad_x
+
+
+class Log(NumpyOp):
+    
+    def forward(self, x):
+        """
+        x: shape(N, c)
+        """
+        
+        out = np.log(x + self.epsilon)
+        self.memory['x'] = x
+        
+        return out
+    
+    def backward(self, grad_y):
+        """
+        grad_y: same shape as x
+        """
+        
+        ####################
+        #      code 3      #
+        x = self.memory['x']
+        
+        grad_x = (1/(x + self.epsilon))
+        
+        grad_x = grad_x*grad_y
+        
+        
+        ####################
+        
+        return grad_x
+
+
+class Softmax(NumpyOp):
+    """
+    softmax over last dimension
+    """
+    
+    def forward(self, x):
+        """
+        x: shape(N, c)
+        """
+        
+        ####################
+        #      code 4      #
+        
+        out = np.array(x, copy="true")
+
+
+        result_list=[]
+
+        for m in range(len(out)):
+            result_list.append(sum(np.exp(out[m])))
+
+        for m in range(len(out)):
+            for n in range(len(out[0])):
+                out[m][n]= np.exp(out[m][n]) / result_list[m]
+            
+        self.memory['x'] = x
+
+        ####################
+        
+        return out
+        
+
+    
+    def backward(self, grad_y):
+        
+        """
+        grad_y: same shape as x
+        """
+        
+        
+
+        ####################
+        #      code 5      #
+   
+        x = self.memory['x']
+        softx = self.forward(x)
+        [n, m] = x.shape
+        out = []
+        for i in range(n):
+            out.append([])
+            for j in range(m):
+                out[i].append(0)
+                for k in range(m):
+                    if j == k:
+                        out[i][j] += (1 - softx[i][k]) * softx[i][k] * grad_y[i][k]
+                    else:
+                        out[i][j] += -softx[i][j] * softx[i][k] * grad_y[i][k]
+        grad_x = np.array(out)
+                
+        
+       
+
+        ####################
+        
+        return grad_x
+
+
+class NumpyLoss:
+    
+    def __init__(self):
+        self.target = None
+    
+    def get_loss(self, pred, target):
+        self.target = target
+        return (-pred * target).sum(axis=1).mean()
+    
+    def backward(self):
+        return -self.target / self.target.shape[0]
+
+
+class NumpyModel:
+    def __init__(self):
+        self.W1 = np.random.normal(size=(28 * 28, 256))
+        self.W2 = np.random.normal(size=(256, 64))
+        self.W3 = np.random.normal(size=(64, 10))
+        
+        # 以下算子会在 forward 和 backward 中使用
+        self.matmul_1 = Matmul()
+        self.relu_1 = Relu()
+        self.matmul_2 = Matmul()
+        self.relu_2 = Relu()
+        self.matmul_3 = Matmul()
+        self.softmax = Softmax()
+        self.log = Log()
+        
+        # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度（ loss 关于算子输入的偏导）
+        self.x1_grad, self.W1_grad = None, None
+        self.relu_1_grad = None
+        self.x2_grad, self.W2_grad = None, None
+        self.relu_2_grad = None
+        self.x3_grad, self.W3_grad = None, None
+        self.softmax_grad = None
+        self.log_grad = None
+    
+    def forward(self, x):
+        x = x.reshape(-1, 28 * 28)
+        
+        ####################
+        #      code 6      #
+        #x = torch.relu(torch.matmul(x, self.W1))
+        x = self.relu_1.forward(self.matmul_1.forward(x,self.W1))
+        
+        #x = torch.relu(torch.matmul(x, self.W2))
+        x = self.relu_2.forward(self.matmul_2.forward(x,self.W2))
+        
+        #x = torch.matmul(x, self.W3)
+        #x = torch.softmax(x, 1)
+        x =  self.softmax.forward(self.matmul_3.forward(x,self.W3))
+        
+        #x = torch.log(x)
+        x = self.log.forward(x)
+
+        ####################
+        
+        return x
+    
+    def backward(self, y):
+        
+        ####################
+        #      code 7      #
+        
+        self.log_grad = self.log.backward(y)
+        
+        self.softmax_grad = self.softmax.backward(self.log_grad)
+        
+        self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad)
+        
+        self.relu_2_grad = self.relu_2.backward(self.x3_grad)
+        
+        self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad)
+        
+        self.relu_1_grad = self.relu_1.backward(self.x2_grad)
+        
+        self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad)
+        
+        
+        
+        ####################
+        
+        pass
+    
+    def optimize(self, learning_rate):
+        self.W1 -= learning_rate * self.W1_grad
+        self.W2 -= learning_rate * self.W2_grad
+        self.W3 -= learning_rate * self.W3_grad
\ No newline at end of file
diff --git a/assignment-2/numpy_mnist.py b/assignment-2/numpy_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..833e5aa2a37f0a5b38b147a0bb05e81aa8d787e1
--- /dev/null
+++ b/assignment-2/numpy_mnist.py
@@ -0,0 +1,58 @@
+import numpy as np
+from numpy_fnn import NumpyModel, NumpyLoss
+from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot
+
+def mini_batch(dataset, batch_size=128, numpy=False):
+
+    data = []
+    label = []
+    for each in dataset:
+        data.append(np.array(each[0]))
+        label.append(each[1])
+
+    data = np.array(data)
+    label = np.array(label)
+
+    res = []
+
+    
+    for start_idx in range(0, data.shape[0], batch_size):
+        end_idx = min(start_idx + batch_size, len(data))
+        res.append((data[start_idx:end_idx],label[start_idx:end_idx]))
+
+    return res
+
+
+def numpy_run():
+    train_dataset, test_dataset = download_mnist()
+    
+    model = NumpyModel()
+    numpy_loss = NumpyLoss()
+    model.W1, model.W2, model.W3 = get_torch_initialization()
+    
+    train_loss = []
+    
+    epoch_number = 3
+    learning_rate = 0.1
+    
+    for epoch in range(epoch_number):
+        for x, y in mini_batch(train_dataset):
+            y = one_hot(y)
+            
+            y_pred = model.forward(x)
+            loss = numpy_loss.get_loss(y_pred, y)
+
+            model.backward(numpy_loss.backward())
+            model.optimize(learning_rate)
+            
+            train_loss.append(loss.item())
+        
+        x, y = batch(test_dataset)[0]
+        accuracy = np.mean((model.forward(x).argmax(axis=1) == y))
+        print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
+    
+    plot_curve(train_loss)
+
+
+if __name__ == "__main__":
+    numpy_run()
\ No newline at end of file