diff --git a/ assignment-3/submission/18340246016/img/.keep b/ assignment-3/submission/18340246016/img/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/ assignment-3/submission/18340246016/img/Cluster_algo.png b/ assignment-3/submission/18340246016/img/Cluster_algo.png
new file mode 100644
index 0000000000000000000000000000000000000000..d597d85c9497f59dfaa86a3127b4372f4ec70b4e
Binary files /dev/null and b/ assignment-3/submission/18340246016/img/Cluster_algo.png differ
diff --git a/ assignment-3/submission/18340246016/img/GMM_res.png b/ assignment-3/submission/18340246016/img/GMM_res.png
new file mode 100644
index 0000000000000000000000000000000000000000..517b0570f48530330f9a9fe424fbdf30f3a2934b
Binary files /dev/null and b/ assignment-3/submission/18340246016/img/GMM_res.png differ
diff --git a/ assignment-3/submission/18340246016/img/KMeans_res.png b/ assignment-3/submission/18340246016/img/KMeans_res.png
new file mode 100644
index 0000000000000000000000000000000000000000..9958ea1eaf7ecb6af3d6499e78799930c7578331
Binary files /dev/null and b/ assignment-3/submission/18340246016/img/KMeans_res.png differ
diff --git a/ assignment-3/submission/18340246016/img/Vis_data_2.png b/ assignment-3/submission/18340246016/img/Vis_data_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c2b76342b7cc39b6bed19f3ebb14c4f30ded45a
Binary files /dev/null and b/ assignment-3/submission/18340246016/img/Vis_data_2.png differ
diff --git a/ assignment-3/submission/18340246016/img/visual_data_1.png b/ assignment-3/submission/18340246016/img/visual_data_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..2591b361a69de753e3119feadf0e159dc5d448c9
Binary files /dev/null and b/ assignment-3/submission/18340246016/img/visual_data_1.png differ
diff --git a/ assignment-3/submission/18340246016/readme.md b/ assignment-3/submission/18340246016/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..df9cbec3f470f5dc0a8f7354f7a144190aab39d7
--- /dev/null
+++ b/ assignment-3/submission/18340246016/readme.md
@@ -0,0 +1,226 @@
+ **1.基础实验部分**
+
+ _1.1 数据生成与可视化实现_
+
+
+```
+def data_generator():
+ # 生成三组multivariate_normal,数据长度为2000
+ mean = (1, 2)
+ cov = np.array([[73, 0], [0, 22]])
+ x = np.random.multivariate_normal(mean, cov, (800,))
+
+ mean = (16, -5)
+ cov = np.array([[21.2, 0], [0, 32.1]])
+ y = np.random.multivariate_normal(mean, cov, (200,))
+
+ mean = (10, 22)
+ cov = np.array([[10, 5], [5, 10]])
+ z = np.random.multivariate_normal(mean, cov, (1000,))
+
+ data, _ = shuffle(x, y, z)
+ return (data, data), 3
+```
+
+```
+def shuffle(*datas):
+ # 随机打乱算法
+ data = np.concatenate(datas)
+ label = np.concatenate([
+ np.ones((d.shape[0],), dtype=int)*i
+ for (i, d) in enumerate(datas)
+ ])
+ N = data.shape[0]
+ idx = np.arange(N)
+ np.random.shuffle(idx)
+ data = data[idx]
+ label = label[idx]
+ return data, label
+```
+
+```
+# 展示函数
+def display(x,y):
+ type1_x = []; type1_y = []
+ type2_x = []; type2_y = []
+ type3_x = []; type3_y = []
+
+ plt.figure(figsize=(8,6))
+
+ for i in range(0,len(x)):
+ if(y[i]==0):
+ type1_x.append(x[i][0])
+ type1_y.append(x[i][1])
+ if(y[i]==1):
+ type2_x.append(x[i][0])
+ type2_y.append(x[i][1])
+ if(y[i]==2):
+ type3_x.append(x[i][0])
+ type3_y.append(x[i][1])
+
+ fig = plt.figure(figsize = (10, 6))
+ ax = fig.add_subplot(111)
+
+ type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown')
+ type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime')
+ type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet")
+
+
+
+ ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0)
+
+ plt.show()
+
+```
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+73 & 0 \\\\
+0 & 22
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+1 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+21.2 & 0 \\\\
+0 & 32.1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+16 & -5
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+10 & 5 \\\\
+5 & 10
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 22
+\end{array}\right]
+\end{array}
+$$
+
+ _1.2 样本可视化结果_
+
+
+
+
+
+ _1.3 K-Means分类结果_
+
+
+```
+(train_data, test_data), n_clusters = data_generator()
+exp =KMeans(n_clusters)
+exp.fit(train_data)
+result = exp.predict(test_data)
+display(test_data,exp.predict(test_data))
+```
+
+
+
+ _1.4 GaussianMixture分类结果_
+
+
+```
+exp =GaussianMixture(n_clusters)
+exp.fit(train_data)
+result = exp.predict(test_data)
+display(test_data,exp.predict(test_data))
+```
+
+
+
+ **2.自动选择聚簇数量(Elbow Method 配合 K-Means 算法)**
+
+ _2.1 fit算法实现_
+
+
+```
+ def fit(self, train_data):
+ train_data=self.normal(train_data)
+ numpoint = len(train_data)
+ dist_list =[]
+
+ upper_bound = min([len(train_data)+1,20])
+
+ # 遍历(1,upperbound)计算distance
+ for cluster in range(1,upper_bound):
+ KMean = KMeans(cluster)
+ Centroids,label = KMean.fit(train_data)
+ temp_length=0
+ for n in range(cluster):
+ temp_length = temp_length+self.length_cal(Centroids[n],train_data[label==n])
+ dist_list.append(temp_length)
+
+ # 当变化率小于5%,选取最佳K
+ percent_change=[]
+ for i in range(len(dist_list)-1):
+ percent_change.append((dist_list[i]-dist_list[i+1])/dist_list[i]*100)
+ for i in range(len(percent_change)):
+ if (percent_change[i]<5):
+ self.optimal = i+1
+ print(i+1)
+ return dist_list,percent_change
+ break
+```
+
+ _2.2 数据可视化_
+
+原始数据
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+1 & 0 \\\\
+0 & 1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+2 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 2
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+4 & 6
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 1 \\\\
+1 & 3
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+
+
+
+clustering数据
+
+
+
diff --git a/ assignment-3/submission/18340246016/source.py b/ assignment-3/submission/18340246016/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1fb49925b35afbeec88676539d3d14cd02fcc3e
--- /dev/null
+++ b/ assignment-3/submission/18340246016/source.py
@@ -0,0 +1,197 @@
+import numpy as np
+
+class KMeans:
+
+ # 初始化Centroids
+ def initCentroids(self,dataSet,k):
+ centers = dataSet[:k]
+ return centers
+
+ #dataSet分类至不同centroids下
+ def cluster_classifier(self,centroids,dataSet):
+ distances = np.sqrt(((dataSet - centroids[:, np.newaxis])**2).sum(axis=2))
+ return np.argmin(distances, axis=0)
+
+ def __init__(self, n_clusters):
+ self.n_clusters = n_clusters
+
+ def fit(self, train_data):
+
+ #初试化Centroids以及Centroids
+ Centroids = self.initCentroids(train_data,self.n_clusters)
+ temp_label = self.cluster_classifier(Centroids,train_data)
+
+ #开始自适应fit,直到Centroids不再更新
+ while(True):
+ for i in range(self.n_clusters):
+ Centroids[i] = np.mean(train_data[temp_label==i])
+ updated_label = self.cluster_classifier(Centroids,train_data)
+ if (updated_label.all() == temp_label.all()):
+ # 储存final Centroids
+ self.Centroids=Centroids
+ return Centroids,updated_label
+ break
+ else:
+ temp_label=updated_label
+
+ def predict(self, test_data):
+ #根据fit更新过的centroids分类testdata
+ return self.cluster_classifier(self.Centroids,test_data)
+
+
+
+class GaussianMixture:
+
+ #正则化
+ def normal(self,raw_data):
+ norm = np.linalg.norm(raw_data)
+ normal_array = raw_data/norm
+ return normal_array
+
+ #初始化参数 mu,var,Pi,W
+ def ini_para(self,n_clusters,train_data):
+ self.epsilon = 1e-12
+ dim = train_data.shape[1]
+ mu=[]
+ var=[]
+ ones=[1]
+
+ for i in range(n_clusters):
+ mu.append(np.random.normal(0, 1, size=(dim,)))
+ var.append(ones*dim)
+ Pi = [1 / n_clusters] * n_clusters
+ W = np.ones((len(train_data), n_clusters)) / n_clusters
+ Pi = W.sum(axis=0) / W.sum()
+ self.dim = dim
+
+
+ return np.array(mu),np.array(var),W,Pi
+
+ def __init__(self, n_clusters):
+ self.n_clusters = n_clusters
+
+ # multivariate_normal density function
+ def multivariate_normal(self,x, mu, cov):
+ part1 = 1 / ( ((2* np.pi)**(len(mu)/2)) * (np.linalg.det(cov)**(1/2)) )
+ part2 = (-1/2) * ((x-mu).T.dot(np.linalg.inv(cov))).dot((x-mu))
+ return float(part1 * np.exp(part2))
+
+
+ def update_W(self,X, Mu, Var, Pi):
+ n_points, n_clusters = len(X), len(Pi)
+ pdfs = np.zeros(((n_points, n_clusters)))
+ for m in range(n_points):
+ for i in range(n_clusters):
+ pdfs[m, i] = Pi[i] * self.multivariate_normal(X[m], Mu[i], (np.diag(Var[i])))
+ W = pdfs / pdfs.sum(axis=1).reshape(-1, 1)
+ return W
+
+
+ def update_Pi(self,W):
+ Pi = W.sum(axis=0) / W.sum()
+ return Pi
+
+ def update_Mu(self,X, W):
+ n_clusters = W.shape[1]
+ Mu = np.zeros((n_clusters, self.dim))
+ for i in range(n_clusters):
+ Mu[i] = np.average(X, axis=0, weights=W[:, i])
+ return Mu
+
+ def update_Var(self,X, Mu, W):
+ n_clusters = W.shape[1]
+ Var = np.zeros((n_clusters, self.dim))
+ for i in range(n_clusters):
+ Var[i] = np.average((X - Mu[i]) ** 2, axis=0, weights=W[:, i])+self.epsilon
+ return Var
+
+
+ def fit(self, train_data):
+ train_data = self.normal(train_data)
+ n_clusters = self.n_clusters
+
+
+ Mu,Var,W,Pi = self.ini_para(n_clusters,train_data)
+ n_points = len(train_data)
+ X=train_data
+
+ Mu_init=Mu
+ Var_init=Var
+ i=0
+
+ # 10次loop内更新参数
+ for i in range(10):
+ i=i+1
+ W = self.update_W(X, Mu, Var, Pi)
+ Pi = self.update_Pi(W)
+ Mu = self.update_Mu(X, W)
+ Var = self.update_Var(X, Mu, W)
+
+ if (Mu_init.all()==Mu.all)and(Var_init.all()==Var.all()):
+ break
+ else:
+ Mu_init=Mu
+ Var_init=Var
+
+ self.Pi =Pi
+ self.W=W
+ self.Mu=Mu
+ self.Var=Var
+
+
+ def predict(self, test_data):
+ test_data = self.normal(test_data)
+ prob= self.update_W(test_data, self.Mu, self.Var, self.Pi)
+ return np.argmax(prob,axis=1)
+
+class ClusteringAlgorithm:
+
+ def normal(self,raw_data):
+ norm = np.linalg.norm(raw_data)
+ normal_array = raw_data/norm
+ return normal_array
+
+ def __init__(self):
+ pass
+
+ def length_cal(self,Centroids,points):
+ total=0
+ for point in points:
+ weight_array = (point-Centroids)**2
+ weight_array_sum = np.sum(weight_array)
+ total = total + weight_array_sum**(0.5)
+
+ return total
+
+ def fit(self, train_data):
+ train_data=self.normal(train_data)
+ numpoint = len(train_data)
+ dist_list =[]
+
+ upper_bound = min([len(train_data)+1,20])
+
+ # 遍历(1,upperbound)计算distance
+ for cluster in range(1,upper_bound):
+ KMean = KMeans(cluster)
+ Centroids,label = KMean.fit(train_data)
+ temp_length=0
+ for n in range(cluster):
+ temp_length = temp_length+self.length_cal(Centroids[n],train_data[label==n])
+ dist_list.append(temp_length)
+
+ # 当变化率小于5%,选取最佳K
+ percent_change=[]
+ for i in range(len(dist_list)-1):
+ percent_change.append((dist_list[i]-dist_list[i+1])/dist_list[i]*100)
+ for i in range(len(percent_change)):
+ if (percent_change[i]<5):
+ self.optimal = i+1
+ print(i+1)
+ return dist_list,percent_change
+ break
+
+ def predict(self, test_data):
+ test_data = self.normal(test_data)
+ KMean_ready = KMeans(self.optimal)
+ KMean_ready.fit(test_data)
+ return KMean_ready.predict(test_data)
\ No newline at end of file
diff --git a/assignment-1/submission/18340246016/README.md b/assignment-1/submission/18340246016/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c998960bf2dcbd812771d18b990d086ef231b740
--- /dev/null
+++ b/assignment-1/submission/18340246016/README.md
@@ -0,0 +1,485 @@
+ **1. KNN实现过程**
+
+*1.1 辅助方法的实现*
+
+```
+# 这里我们用train_test_split实现训练集与验证集以给定的比例划分与打乱
+def train_test_split(self,x,y,rate):
+ shuffled_indexes = np.random.permutation(len(x))
+ test_size = int(len(x) * rate)
+ train_index = shuffled_indexes[test_size:]
+ test_index = shuffled_indexes[:test_size]
+ return x[train_index], x[test_index], y[train_index], y[test_index]
+```
+
+
+```
+# 用distance方法计算两组向量之间的欧式距离
+def distance(self,v1,v2):
+
+ weight_array = (v1-v2)**2
+ weight_array_sum = np.sum(weight_array)
+ return weight_array_sum**(0.5)
+```
+
+
+
+*1.2 fit方法的实现*
+
+
+```
+# 我们用fit方法实现 1. 对训练数据的归一化 2. 训练数据内部subdivide为训练集与测试集,取最优K值 3. 将训练数据赋值self.data
+def fit(self, train_data, train_label):
+
+ # 归一化
+ mu = np.mean(train_data, axis=0)
+ sigma = np.std(train_data, axis=0)
+ train_data = (train_data - mu) / sigma
+
+ # 划分训练集/验证集 with rate =0.3
+ X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3)
+
+ # 对于不同的K[1-0.5*len(train)],计算验证集到训练集的欧氏距离
+ best_k=0
+ k_candi=0;
+ for k in range(1,int(0.5*len(X_train))+1):
+
+ true_couter=0
+ for test_counter in range(0,len(X_test)):
+ pos_vec_list=[]
+
+ for train_counter in range(0,len(X_train)):
+ pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]])
+ pos_vec_list.append(pos_vec)
+
+ #对距离list根据距离排序
+ pos_vec_list = np.array(pos_vec_list)
+ pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+ #k-近邻结果列表
+ result_list = pos_vec_list_sorted[:k][:,1]
+
+
+
+ #test预测结果
+ label = int(result_list[np.argmax(result_list)])
+
+
+ #检验本次test在给定k下是否正确
+ if (label == Y_test[test_counter] ):
+ true_couter=true_couter+1
+
+
+ #最优K取值
+ if (true_couter >= best_k):
+ best_k = true_couter
+ k_candi = k
+
+ self.k = k_candi
+ self.train_data = train_data
+ self.train_labels = train_label
+ return self.k
+```
+
+
+*1.3 predict方法的实现*
+
+```
+ # 根据fit方法带入的数据data以及训练结果K,预测test_label
+ def predict(self, test_data):
+ test_label=[]
+ result_list=[]
+
+ # 归一化原始训练数据
+ mu = np.mean(test_data, axis=0)
+ sigma = np.std(test_data, axis=0)
+ test_data = (test_data - mu) / sigma
+
+ for i in range (0,len(test_data)):
+ pos_vec_list=[]
+ for m in range(0,len(self.train_data)):
+ pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]])
+ pos_vec_list.append(pos_vec)
+
+
+ # KNN结果列表
+ pos_vec_list = np.array(pos_vec_list)
+ pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+
+ result_list = pos_vec_list_sorted[:(self.k)][:,1]
+ test_label.append(result_list[np.argmax(result_list)])
+
+ return test_label
+```
+
+
+
+ **2. 高斯分布探究试验**
+
+*2.0 生成以及展示函数*
+
+
+```
+def generate (amount_1,amount_2,amount_3):
+
+ mean = (2, 2)
+ cov = np.array([[1,0], [0, 1]])
+ x = np.random.multivariate_normal(mean, cov, (amount_1,))
+
+ mean = (4, 6)
+ cov = np.array([[2, 0], [0, 2]])
+ y = np.random.multivariate_normal(mean, cov, (amount_2,))
+
+ mean = (10, 10)
+ cov = np.array([[2,1],[1,3]])
+ z = np.random.multivariate_normal(mean, cov, (amount_3,))
+
+
+ data = np.concatenate([x,y,z])
+
+ label = np.concatenate([
+ np.zeros((amount_1,),dtype=int),
+ np.ones((amount_2,),dtype=int),
+ np.ones((amount_3,),dtype=int)*2
+ ])
+
+ return model.train_test_split(data,label,0.2)
+
+```
+
+
+```
+# 展示函数
+def display(x,y):
+ type1_x = []; type1_y = []
+ type2_x = []; type2_y = []
+ type3_x = []; type3_y = []
+
+ plt.figure(figsize=(8,6))
+
+ for i in range(0,len(x)):
+ if(y[i]==0):
+ type1_x.append(x[i][0])
+ type1_y.append(x[i][1])
+ if(y[i]==1):
+ type2_x.append(x[i][0])
+ type2_y.append(x[i][1])
+ if(y[i]==2):
+ type3_x.append(x[i][0])
+ type3_y.append(x[i][1])
+
+ fig = plt.figure(figsize = (10, 6))
+ ax = fig.add_subplot(111)
+
+ type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown')
+ type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime')
+ type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet")
+
+
+
+ ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0)
+
+ plt.show()
+
+```
+
+
+
+***2.1 均值集中 + xy分布分散 = 对应的k***
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+4 & 0 \\\\
+0 & 2
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 4
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+8 & 8
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+8 & 1 \\\\
+1 & 6
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+12 & 12
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+
+
+*测试集分布*
+
+
+
+*K与准确率结果*
+| k | 准确率 |
+| ---------------- | ------ |
+| 4 | 62.5% |
+
+***2.2 mean集中 + xy集中 = 对应的k***
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+1 & 0 \\\\
+0 & 1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+8 & 8
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 1 \\\\
+1 & 1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+12 & 12
+\end{array}\right]
+\end{array}
+$$
+
+*训练集分布*
+
+
+
+*测试集分布*
+
+
+
+*K与准确率结果*
+| k | 准确率 |
+| ---------------- | ------ |
+| 1 | 78.75% |
+
+
+***2.3 mean分散 + xy分散 = 对应的k***
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+4 & 0 \\\\
+0 & 2
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+2 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 4
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+4 & 6
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+8 & 1 \\\\
+1 & 6
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+
+
+*测试集分布*
+
+
+
+*K与准确率结果*
+
+
+| k | 准确率 |
+| ---------------- | ------ |
+| 2 | 86.25% |
+
+
+***2.4 mean分散 + xy集中 = 对应的k***
+
+
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+1 & 0 \\\\
+0 & 1
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+2 & 2
+\end{array}\right]
+\end{array}
+$$
+
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 0 \\\\
+0 & 2
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+4 & 6
+\end{array}\right]
+\end{array}
+$$
+
+$$
+\begin{array}{l}
+\Sigma=\left[\begin{array}{cc}
+2 & 1 \\\\
+1 & 3
+\end{array}\right]
+\mu=\left[\begin{array}{ll}
+10 & 10
+\end{array}\right]
+\end{array}
+$$
+
+
+*训练集分布*
+
+
+
+*测试集分布*
+
+
+
+*K与准确率结果*
+
+
+| k | 准确率 |
+| ---------------- | ------ |
+| 1 | 95.625% |
+
+
+***2.5 有关高斯分布的结论***
+1. 与直观体会到的类似,模型准确率与高斯分布均值离散程度正相关,与xy方差负相关
+2. 模型在均值分散,方差集中的情况下表现最好,在均值集中,方差分散的情况下表现最差
+3. 最佳K的取值与准确率无直接联系,准确性更多取决于分布情况
+
+
+
+**3. K值 **
+
+***3.1 不同量下的best_k***
+
+(这里以2.4高斯分布为例)
+
+```
+# 准备数据集维度
+amount_list = [[10,10,10],[50,50,50],[100,100,100],[150,50,200],[200,200,200],[250,300,400]]
+k_list=[]
+aug_amount = [30,150,300,400,600,950]
+
+# 在不同数据集量下训练最佳k值
+for i in range (0,len(amount_list)):
+ model = KNN()
+ X_train, X_test, Y_train, Y_test = generate(amount_list[i][0],amount_list[i][1],amount_list[i][2])
+ k_list.append(model.fit(X_train, Y_train))
+
+# 画图
+l1=plt.plot(aug_amount,k_list,'r--',label='Best-K-Value')
+plt.title('The relationship between # elements and |K|')
+plt.xlabel('Total # of elements')
+plt.ylabel('K-value')
+plt.legend()
+
+```
+
+
+
+1. K取值与元素数量无关
+2. 最佳K会趋向于一个较小的值,所以在fit时考虑遍历[1,10],无需遍历更大的K(导致额外无用运算)
+
+
+**3.2 不同K下的acc**
+
+
+```
+# 因为我实现的KNN函数没有K参数(学习最优K),这里采用SKlearn数据库中的KNeighborsClassifier来做本次探究
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+
+# 导入sklearn iris数据库并分类
+iris = load_iris()
+X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.25, random_state = 33)
+
+# 在不同k值下计算accuracy
+acc_list=[]
+
+for i in range (1,100):
+
+
+ knc = KNeighborsClassifier(n_neighbors=i)
+ knc.fit(X_train, y_train)
+ y_predict = knc.predict(X_test)
+
+ acc_list.append(knc.score(X_test, y_test))
+
+# 画图
+l1=plt.plot(range(1,100),acc_list,'green',label='Accuracy')
+plt.title('The relationship between K-choice and Accuracy')
+plt.xlabel('K-Value')
+plt.ylabel('Accuracy')
+plt.legend()
+
+```
+
+
+1. 同3.1结论[2],最佳K的取值会聚焦于一个较小的值
+2. iris数据集大小为150,可以看到超过0.5*len 之后准确率锁定在50%(几乎约等于瞎猜)
diff --git a/assignment-1/submission/18340246016/img/.keep b/assignment-1/submission/18340246016/img/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..2b3ebe23a4c696d374084525296b704bc1e94461
Binary files /dev/null and b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ecab29162d04ce0e20d91129786ace0133752dd
Binary files /dev/null and b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..38182b7678184b3e73071c3a0b77496698405b6c
Binary files /dev/null and b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..b5fd60ce1e7218c3359d9739ae304ead9ee6e16b
Binary files /dev/null and b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..a1db91fe434924aafbc734b235647d73fb4229dd
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..7731028adc12f28c461efc7240b6e5fa95c12702
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..913e3801971de18d4f609181dad97957b03a142b
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..34ea7339a55877d026ae45bc7d32ee349f4b6533
Binary files /dev/null and b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..48293c8c963882364c7642e833597be03e206027
Binary files /dev/null and b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png differ
diff --git a/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png
new file mode 100644
index 0000000000000000000000000000000000000000..0343da404b4c87d03aebfde3016b60e6b55cb836
Binary files /dev/null and b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png differ
diff --git a/assignment-1/submission/18340246016/source.py b/assignment-1/submission/18340246016/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..c43b0cc36b1573f5a16c096738d5547454904ca6
--- /dev/null
+++ b/assignment-1/submission/18340246016/source.py
@@ -0,0 +1,154 @@
+class KNN:
+
+ def __init__(self):
+ self.train_data = None
+ self.train_labels = None
+ self.k = None
+
+ def train_test_split(self,x,y,rate):
+ shuffled_indexes = np.random.permutation(len(x))
+ test_size = int(len(x) * rate)
+ train_index = shuffled_indexes[test_size:]
+ test_index = shuffled_indexes[:test_size]
+ return x[train_index], x[test_index], y[train_index], y[test_index]
+
+ def distance(self,v1,v2):
+
+ weight_array = (v1-v2)**2
+ weight_array_sum = np.sum(weight_array)
+ return weight_array_sum**(0.5)
+
+ def fit(self, train_data, train_label):
+
+ # 归一化
+ mu = np.mean(train_data, axis=0)
+ sigma = np.std(train_data, axis=0)
+ train_data = (train_data - mu) / sigma
+
+ # 划分训练集/验证集 with rate =0.3
+ X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3)
+
+ # 对于不同的K[1-20],计算验证集到训练集的欧氏距离
+ best_k=0
+ k_candi=0;
+ for k in range(1,20):
+
+ true_couter=0
+ for test_counter in range(0,len(X_test)):
+ pos_vec_list=[]
+
+ for train_counter in range(0,len(X_train)):
+ pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]])
+ pos_vec_list.append(pos_vec)
+
+ #对距离list根据距离排序
+ pos_vec_list = np.array(pos_vec_list)
+ pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+ #k-近邻结果列表
+ result_list = pos_vec_list_sorted[:k][:,1]
+
+
+
+ #test预测结果
+ label = int(result_list[np.argmax(result_list)])
+
+
+ #检验本次test在给定k下是否正确
+ if (label == Y_test[test_counter] ):
+ true_couter=true_couter+1
+
+
+ #最优K取值
+ if (true_couter >= best_k):
+ best_k = true_couter
+ k_candi = k
+
+ # print(k_candi)
+ self.k = k_candi
+ self.train_data = train_data
+ self.train_labels = train_label
+ return self.k
+
+ def predict(self, test_data):
+ test_label=[]
+ result_list=[]
+
+ # 归一化
+ mu = np.mean(test_data, axis=0)
+ sigma = np.std(test_data, axis=0)
+ test_data = (test_data - mu) / sigma
+ #test_data = test_data / np.sqrt(np.sum(test_data**2))
+
+ for i in range (0,len(test_data)):
+ pos_vec_list=[]
+ for m in range(0,len(self.train_data)):
+ pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]])
+ pos_vec_list.append(pos_vec)
+
+
+
+ pos_vec_list = np.array(pos_vec_list)
+ pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)]
+
+ result_list = pos_vec_list_sorted[:(self.k)][:,1]
+ test_label.append(result_list[np.argmax(result_list)])
+
+ return test_label
+
+def generate (amount_1,amount_2,amount_3):
+
+
+ mean = (2, 2)
+ cov = np.array([[1,0], [0, 1]])
+ x = np.random.multivariate_normal(mean, cov, (amount_1,))
+
+ mean = (4, 6)
+ cov = np.array([[2, 0], [0, 2]])
+ y = np.random.multivariate_normal(mean, cov, (amount_2,))
+
+ mean = (10, 10)
+ cov = np.array([[2,1],[1,3]])
+ z = np.random.multivariate_normal(mean, cov, (amount_3,))
+
+
+ data = np.concatenate([x,y,z])
+
+ label = np.concatenate([
+ np.zeros((amount_1,),dtype=int),
+ np.ones((amount_2,),dtype=int),
+ np.ones((amount_3,),dtype=int)*2
+ ])
+
+ return model.train_test_split(data,label,0.2)
+
+
+def display(x,y):
+ type1_x = []; type1_y = []
+ type2_x = []; type2_y = []
+ type3_x = []; type3_y = []
+
+ plt.figure(figsize=(8,6))
+
+ for i in range(0,len(x)):
+ if(y[i]==0):
+ type1_x.append(x[i][0])
+ type1_y.append(x[i][1])
+ if(y[i]==1):
+ type2_x.append(x[i][0])
+ type2_y.append(x[i][1])
+ if(y[i]==2):
+ type3_x.append(x[i][0])
+ type3_y.append(x[i][1])
+
+ fig = plt.figure(figsize = (10, 6))
+ ax = fig.add_subplot(111)
+
+ type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown')
+ type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime')
+ type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet")
+
+
+
+ ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0)
+
+ plt.show()
diff --git a/assignment-2/submission/18340246016/README.md b/assignment-2/submission/18340246016/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..be11b1ddc0e6d6648014b9e637b8dbf07fd0f8e9
--- /dev/null
+++ b/assignment-2/submission/18340246016/README.md
@@ -0,0 +1,183 @@
+**1. Mini-batch 函数的实现**
+
+```
+def mini_batch(dataset, batch_size=128, numpy=False):
+
+# 仿照batch函数分离data与label
+ data = []
+ label = []
+ for each in dataset:
+ data.append(np.array(each[0]))
+ label.append(each[1])
+
+ data = np.array(data)
+ label = np.array(label)
+
+
+ res = []
+
+# 以batch_size为单位,从[0,len(data)]分割mini-batch
+ for start_idx in range(0, data.shape[0], batch_size):
+ end_idx = min(start_idx + batch_size, len(data))
+ res.append((data[start_idx:end_idx],label[start_idx:end_idx]))
+
+ return res
+```
+
+ **2. 模型的训练和测试**
+
+
+
+```
+ # 前向传播过程
+ def forward(self, x):
+ x = x.reshape(-1, 28 * 28)
+
+ ####################
+ # code 6 #
+
+ #x = torch.relu(torch.matmul(x, self.W1))
+ # 模仿torch的relu(Matmul.(x,w1))操作,下同
+ x = self.relu_1.forward(self.matmul_1.forward(x,self.W1))
+
+ #x = torch.relu(torch.matmul(x, self.W2))
+ x = self.relu_2.forward(self.matmul_2.forward(x,self.W2))
+
+ #x = torch.matmul(x, self.W3)
+ #x = torch.softmax(x, 1)
+ x = self.softmax.forward(self.matmul_3.forward(x,self.W3))
+
+ #x = torch.log(x)
+ x = self.log.forward(x)
+
+ ####################
+
+ return x
+
+ # 反向传播过程
+ def backward(self, y):
+
+ ####################
+ # code 7 #
+
+ self.log_grad = self.log.backward(y)
+
+ self.softmax_grad = self.softmax.backward(self.log_grad)
+
+ self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad)
+
+ self.relu_2_grad = self.relu_2.backward(self.x3_grad)
+
+ self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad)
+
+ self.relu_1_grad = self.relu_1.backward(self.x2_grad)
+
+ self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad)
+
+
+
+ ####################
+
+ pass
+```
+
+
+
+
+
+| Epoch | Batch_size |
+| ---------------- | ------ |
+| 3 | 16 |
+| 10 | 128 |
+
+
+* Epoch = 3 Batch_size=128
+
+[0] Accuracy: 0.9373
+
[1] Accuracy: 0.9583
+
[2] Accuracy: 0.9683
+
+
+
+
+
+* Epoch = 3 Batch_size=16
+
+[0] Accuracy: 0.9640
+
[1] Accuracy: 0.9676
+
[2] Accuracy: 0.9707
+
+
+
+* Epoch = 10 Batch_size=16
+
+[0] Accuracy: 0.9602
+
[1] Accuracy: 0.9657
+
[2] Accuracy: 0.9741
+
[3] Accuracy: 0.9747
+
[4] Accuracy: 0.9701
+
[5] Accuracy: 0.9731
+
[6] Accuracy: 0.9760
+
[7] Accuracy: 0.9763
+
[8] Accuracy: 0.9760
+
[9] Accuracy: 0.9780
+
+
+
+
+* Epoch = 10 Batch_size=128
+
+[0] Accuracy: 0.9389
+
[1] Accuracy: 0.9585
+
[2] Accuracy: 0.9679
+
[3] Accuracy: 0.9706
+
[4] Accuracy: 0.9746
+
[5] Accuracy: 0.9760
+
[6] Accuracy: 0.9769
+
[7] Accuracy: 0.9777
+
[8] Accuracy: 0.9781
+
[9] Accuracy: 0.9781
+
+
+
+
+
+*探究发现*
+1. mini-batch size太小的情况下,模型的波动率非常高,并不是对batch_size越细分模型的准确率就越高
+2. 从epoch的数量上来看,epoch的数量与模型accuracy有正相关性
+
+
+ **3.Momentum 优化函数下的对比试验**
+
+
+```
+ # 利用momentum优化下的optimize函数
+ def optimize(self, learning_rate):
+
+ # 初始化V值
+ v1=0
+ v2=0
+ v3=0
+ mu=0.9
+
+ # momentum下的更新算法
+ v1 = mu * v1 - learning_rate * self.W1_grad
+ self.W1 = self.W1 + v1
+
+ v2 = mu * v2 - learning_rate * self.W2_grad
+ self.W2 = self.W2 + v2
+
+ v3 = mu * v3 - learning_rate * self.W3_grad
+ self.W3 = self.W3 + v3
+
+
+ #self.W1 -= learning_rate * self.W1_grad
+ # self.W2 -= learning_rate * self.W2_grad
+ #self.W3 -= learning_rate * self.W3_grad
+```
+
+
+
+1. 在我们的测试数据下,momentum与梯度下降模型准确率差异不大
+2. momentum模型主要用来解决全局最优与局部最优之间的差异,但在本测试集下不存在全局与局部的差异
+3. 理论上来说,当 momentum 动量越大时,其转换为势能的能量也就越大,就越有可能摆脱局部凹域的束缚,进入全局凹域
\ No newline at end of file
diff --git a/assignment-2/submission/18340246016/img/10-128.png b/assignment-2/submission/18340246016/img/10-128.png
new file mode 100644
index 0000000000000000000000000000000000000000..62352ed1838be0f2b7ded908e7cf94416690e663
Binary files /dev/null and b/assignment-2/submission/18340246016/img/10-128.png differ
diff --git a/assignment-2/submission/18340246016/img/10-16.png b/assignment-2/submission/18340246016/img/10-16.png
new file mode 100644
index 0000000000000000000000000000000000000000..47c83d74dd1d85afc8f27bb53d21ad210b47a543
Binary files /dev/null and b/assignment-2/submission/18340246016/img/10-16.png differ
diff --git a/assignment-2/submission/18340246016/img/3-128.png b/assignment-2/submission/18340246016/img/3-128.png
new file mode 100644
index 0000000000000000000000000000000000000000..3bcc33c65c07a1c12625ab0193e69588eb7cc5a8
Binary files /dev/null and b/assignment-2/submission/18340246016/img/3-128.png differ
diff --git a/assignment-2/submission/18340246016/img/3-16.png b/assignment-2/submission/18340246016/img/3-16.png
new file mode 100644
index 0000000000000000000000000000000000000000..e85d4d2d4c3cc2d62bcfd80dd2199c0ff26dcc85
Binary files /dev/null and b/assignment-2/submission/18340246016/img/3-16.png differ
diff --git a/assignment-2/submission/18340246016/img/momentum.png b/assignment-2/submission/18340246016/img/momentum.png
new file mode 100644
index 0000000000000000000000000000000000000000..f1f3afdf95cb49bd4f84f7c2bfb68fdbd13ddde3
Binary files /dev/null and b/assignment-2/submission/18340246016/img/momentum.png differ
diff --git a/assignment-2/submission/18340246016/img/processon.png b/assignment-2/submission/18340246016/img/processon.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b41c289c3621142bc38c400ba1a566f6945f523
Binary files /dev/null and b/assignment-2/submission/18340246016/img/processon.png differ
diff --git a/assignment-2/submission/18340246016/numpy_fnn.py b/assignment-2/submission/18340246016/numpy_fnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f48bcd98d7ecd450bb8cbeb2735f2919a109668
--- /dev/null
+++ b/assignment-2/submission/18340246016/numpy_fnn.py
@@ -0,0 +1,249 @@
+import numpy as np
+
+
+class NumpyOp:
+
+ def __init__(self):
+ self.memory = {}
+ self.epsilon = 1e-12
+
+
+class Matmul(NumpyOp):
+
+ def forward(self, x, W):
+ """
+ x: shape(N, d)
+ w: shape(d, d')
+ """
+ self.memory['x'] = x
+ self.memory['W'] = W
+ h = np.matmul(x, W)
+ return h
+
+ def backward(self, grad_y):
+ """
+ grad_y: shape(N, d')
+ """
+
+ ####################
+ # code 1 #
+ x = self.memory['x']
+ W = self.memory['W']
+
+ grad_W = np.matmul(x.T,grad_y)
+ grad_x = np.matmul(grad_y,W.T)
+ ####################
+
+ return grad_x, grad_W
+
+
+class Relu(NumpyOp):
+
+ def forward(self, x):
+ self.memory['x'] = x
+ return np.where(x > 0, x, np.zeros_like(x))
+
+ def backward(self, grad_y):
+ """
+ grad_y: same shape as x
+ """
+
+ ####################
+ # code 2 #
+ x = self.memory['x']
+
+ grad_x = np.where(x > 0, 1, 0)
+
+ grad_x = grad_x * grad_y
+
+ ####################
+
+ return grad_x
+
+
+class Log(NumpyOp):
+
+ def forward(self, x):
+ """
+ x: shape(N, c)
+ """
+
+ out = np.log(x + self.epsilon)
+ self.memory['x'] = x
+
+ return out
+
+ def backward(self, grad_y):
+ """
+ grad_y: same shape as x
+ """
+
+ ####################
+ # code 3 #
+ x = self.memory['x']
+
+ grad_x = (1/(x + self.epsilon))
+
+ grad_x = grad_x*grad_y
+
+
+ ####################
+
+ return grad_x
+
+
+class Softmax(NumpyOp):
+ """
+ softmax over last dimension
+ """
+
+ def forward(self, x):
+ """
+ x: shape(N, c)
+ """
+
+ ####################
+ # code 4 #
+
+ out = np.array(x, copy="true")
+
+
+ result_list=[]
+
+ for m in range(len(out)):
+ result_list.append(sum(np.exp(out[m])))
+
+ for m in range(len(out)):
+ for n in range(len(out[0])):
+ out[m][n]= np.exp(out[m][n]) / result_list[m]
+
+ self.memory['x'] = x
+
+ ####################
+
+ return out
+
+
+
+ def backward(self, grad_y):
+
+ """
+ grad_y: same shape as x
+ """
+
+
+
+ ####################
+ # code 5 #
+
+ x = self.memory['x']
+ softx = self.forward(x)
+ [n, m] = x.shape
+ out = []
+ for i in range(n):
+ out.append([])
+ for j in range(m):
+ out[i].append(0)
+ for k in range(m):
+ if j == k:
+ out[i][j] += (1 - softx[i][k]) * softx[i][k] * grad_y[i][k]
+ else:
+ out[i][j] += -softx[i][j] * softx[i][k] * grad_y[i][k]
+ grad_x = np.array(out)
+
+
+
+
+ ####################
+
+ return grad_x
+
+
+class NumpyLoss:
+
+ def __init__(self):
+ self.target = None
+
+ def get_loss(self, pred, target):
+ self.target = target
+ return (-pred * target).sum(axis=1).mean()
+
+ def backward(self):
+ return -self.target / self.target.shape[0]
+
+
+class NumpyModel:
+ def __init__(self):
+ self.W1 = np.random.normal(size=(28 * 28, 256))
+ self.W2 = np.random.normal(size=(256, 64))
+ self.W3 = np.random.normal(size=(64, 10))
+
+ # 以下算子会在 forward 和 backward 中使用
+ self.matmul_1 = Matmul()
+ self.relu_1 = Relu()
+ self.matmul_2 = Matmul()
+ self.relu_2 = Relu()
+ self.matmul_3 = Matmul()
+ self.softmax = Softmax()
+ self.log = Log()
+
+ # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导)
+ self.x1_grad, self.W1_grad = None, None
+ self.relu_1_grad = None
+ self.x2_grad, self.W2_grad = None, None
+ self.relu_2_grad = None
+ self.x3_grad, self.W3_grad = None, None
+ self.softmax_grad = None
+ self.log_grad = None
+
+ def forward(self, x):
+ x = x.reshape(-1, 28 * 28)
+
+ ####################
+ # code 6 #
+ #x = torch.relu(torch.matmul(x, self.W1))
+ x = self.relu_1.forward(self.matmul_1.forward(x,self.W1))
+
+ #x = torch.relu(torch.matmul(x, self.W2))
+ x = self.relu_2.forward(self.matmul_2.forward(x,self.W2))
+
+ #x = torch.matmul(x, self.W3)
+ #x = torch.softmax(x, 1)
+ x = self.softmax.forward(self.matmul_3.forward(x,self.W3))
+
+ #x = torch.log(x)
+ x = self.log.forward(x)
+
+ ####################
+
+ return x
+
+ def backward(self, y):
+
+ ####################
+ # code 7 #
+
+ self.log_grad = self.log.backward(y)
+
+ self.softmax_grad = self.softmax.backward(self.log_grad)
+
+ self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad)
+
+ self.relu_2_grad = self.relu_2.backward(self.x3_grad)
+
+ self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad)
+
+ self.relu_1_grad = self.relu_1.backward(self.x2_grad)
+
+ self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad)
+
+
+
+ ####################
+
+ pass
+
+ def optimize(self, learning_rate):
+ self.W1 -= learning_rate * self.W1_grad
+ self.W2 -= learning_rate * self.W2_grad
+ self.W3 -= learning_rate * self.W3_grad
\ No newline at end of file
diff --git a/assignment-2/submission/18340246016/numpy_mnist.py b/assignment-2/submission/18340246016/numpy_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..833e5aa2a37f0a5b38b147a0bb05e81aa8d787e1
--- /dev/null
+++ b/assignment-2/submission/18340246016/numpy_mnist.py
@@ -0,0 +1,58 @@
+import numpy as np
+from numpy_fnn import NumpyModel, NumpyLoss
+from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot
+
+def mini_batch(dataset, batch_size=128, numpy=False):
+
+ data = []
+ label = []
+ for each in dataset:
+ data.append(np.array(each[0]))
+ label.append(each[1])
+
+ data = np.array(data)
+ label = np.array(label)
+
+ res = []
+
+
+ for start_idx in range(0, data.shape[0], batch_size):
+ end_idx = min(start_idx + batch_size, len(data))
+ res.append((data[start_idx:end_idx],label[start_idx:end_idx]))
+
+ return res
+
+
+def numpy_run():
+ train_dataset, test_dataset = download_mnist()
+
+ model = NumpyModel()
+ numpy_loss = NumpyLoss()
+ model.W1, model.W2, model.W3 = get_torch_initialization()
+
+ train_loss = []
+
+ epoch_number = 3
+ learning_rate = 0.1
+
+ for epoch in range(epoch_number):
+ for x, y in mini_batch(train_dataset):
+ y = one_hot(y)
+
+ y_pred = model.forward(x)
+ loss = numpy_loss.get_loss(y_pred, y)
+
+ model.backward(numpy_loss.backward())
+ model.optimize(learning_rate)
+
+ train_loss.append(loss.item())
+
+ x, y = batch(test_dataset)[0]
+ accuracy = np.mean((model.forward(x).argmax(axis=1) == y))
+ print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
+
+ plot_curve(train_loss)
+
+
+if __name__ == "__main__":
+ numpy_run()
\ No newline at end of file