diff --git a/assignment-1/submission/18340246016/README.md b/assignment-1/submission/18340246016/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c998960bf2dcbd812771d18b990d086ef231b740 --- /dev/null +++ b/assignment-1/submission/18340246016/README.md @@ -0,0 +1,485 @@ + **1. KNN实现过程** + +*1.1 辅助方法的实现* + +``` +# 这里我们用train_test_split实现训练集与验证集以给定的比例划分与打乱 +def train_test_split(self,x,y,rate): + shuffled_indexes = np.random.permutation(len(x)) + test_size = int(len(x) * rate) + train_index = shuffled_indexes[test_size:] + test_index = shuffled_indexes[:test_size] + return x[train_index], x[test_index], y[train_index], y[test_index] +``` + + +``` +# 用distance方法计算两组向量之间的欧式距离 +def distance(self,v1,v2): + + weight_array = (v1-v2)**2 + weight_array_sum = np.sum(weight_array) + return weight_array_sum**(0.5) +``` + + + +*1.2 fit方法的实现* + + +``` +# 我们用fit方法实现 1. 对训练数据的归一化 2. 训练数据内部subdivide为训练集与测试集,取最优K值 3. 将训练数据赋值self.data +def fit(self, train_data, train_label): + + # 归一化 + mu = np.mean(train_data, axis=0) + sigma = np.std(train_data, axis=0) + train_data = (train_data - mu) / sigma + + # 划分训练集/验证集 with rate =0.3 + X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3) + + # 对于不同的K[1-0.5*len(train)],计算验证集到训练集的欧氏距离 + best_k=0 + k_candi=0; + for k in range(1,int(0.5*len(X_train))+1): + + true_couter=0 + for test_counter in range(0,len(X_test)): + pos_vec_list=[] + + for train_counter in range(0,len(X_train)): + pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]]) + pos_vec_list.append(pos_vec) + + #对距离list根据距离排序 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + #k-近邻结果列表 + result_list = pos_vec_list_sorted[:k][:,1] + + + + #test预测结果 + label = int(result_list[np.argmax(result_list)]) + + + #检验本次test在给定k下是否正确 + if (label == Y_test[test_counter] ): + true_couter=true_couter+1 + + + #最优K取值 + if (true_couter >= best_k): + best_k = true_couter + k_candi = k + + self.k = k_candi + self.train_data = train_data + self.train_labels = train_label + return self.k +``` + + +*1.3 predict方法的实现* + +``` + # 根据fit方法带入的数据data以及训练结果K,预测test_label + def predict(self, test_data): + test_label=[] + result_list=[] + + # 归一化原始训练数据 + mu = np.mean(test_data, axis=0) + sigma = np.std(test_data, axis=0) + test_data = (test_data - mu) / sigma + + for i in range (0,len(test_data)): + pos_vec_list=[] + for m in range(0,len(self.train_data)): + pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]]) + pos_vec_list.append(pos_vec) + + + # KNN结果列表 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + + result_list = pos_vec_list_sorted[:(self.k)][:,1] + test_label.append(result_list[np.argmax(result_list)]) + + return test_label +``` + + + + **2. 高斯分布探究试验** + +*2.0 生成以及展示函数* + + +``` +def generate (amount_1,amount_2,amount_3): + + mean = (2, 2) + cov = np.array([[1,0], [0, 1]]) + x = np.random.multivariate_normal(mean, cov, (amount_1,)) + + mean = (4, 6) + cov = np.array([[2, 0], [0, 2]]) + y = np.random.multivariate_normal(mean, cov, (amount_2,)) + + mean = (10, 10) + cov = np.array([[2,1],[1,3]]) + z = np.random.multivariate_normal(mean, cov, (amount_3,)) + + + data = np.concatenate([x,y,z]) + + label = np.concatenate([ + np.zeros((amount_1,),dtype=int), + np.ones((amount_2,),dtype=int), + np.ones((amount_3,),dtype=int)*2 + ]) + + return model.train_test_split(data,label,0.2) + +``` + + +``` +# 展示函数 +def display(x,y): + type1_x = []; type1_y = [] + type2_x = []; type2_y = [] + type3_x = []; type3_y = [] + + plt.figure(figsize=(8,6)) + + for i in range(0,len(x)): + if(y[i]==0): + type1_x.append(x[i][0]) + type1_y.append(x[i][1]) + if(y[i]==1): + type2_x.append(x[i][0]) + type2_y.append(x[i][1]) + if(y[i]==2): + type3_x.append(x[i][0]) + type3_y.append(x[i][1]) + + fig = plt.figure(figsize = (10, 6)) + ax = fig.add_subplot(111) + + type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown') + type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime') + type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet") + + + + ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0) + + plt.show() + +``` + + + +***2.1 均值集中 + xy分布分散 = 对应的k*** + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +4 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 4 +\end{array}\right] +\mu=\left[\begin{array}{ll} +8 & 8 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +8 & 1 \\\\ +1 & 6 +\end{array}\right] +\mu=\left[\begin{array}{ll} +12 & 12 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111859_687c068b_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111906_d0e2e134_8823823.png "屏幕截图.png") + +*K与准确率结果* +| k | 准确率 | +| ---------------- | ------ | +| 4 | 62.5% | + +***2.2 mean集中 + xy集中 = 对应的k*** + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +1 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +8 & 8 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 1 \\\\ +1 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +12 & 12 +\end{array}\right] +\end{array} +$$ + +*训练集分布* + +![训练集分布](https://images.gitee.com/uploads/images/2021/0401/105433_e7ec4619_8823823.png "屏幕截图.png") + +*测试集分布* + +![测试集分布](https://images.gitee.com/uploads/images/2021/0401/105459_56af4a90_8823823.png "屏幕截图.png") + +*K与准确率结果* +| k | 准确率 | +| ---------------- | ------ | +| 1 | 78.75% | + + +***2.3 mean分散 + xy分散 = 对应的k*** + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +4 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +2 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 4 +\end{array}\right] +\mu=\left[\begin{array}{ll} +4 & 6 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +8 & 1 \\\\ +1 & 6 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112426_09535d36_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112437_53a32eec_8823823.png "屏幕截图.png") + +*K与准确率结果* + + +| k | 准确率 | +| ---------------- | ------ | +| 2 | 86.25% | + + +***2.4 mean分散 + xy集中 = 对应的k*** + + + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +1 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +2 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +4 & 6 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 1 \\\\ +1 & 3 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112759_754208e4_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112810_ca9d4230_8823823.png "屏幕截图.png") + +*K与准确率结果* + + +| k | 准确率 | +| ---------------- | ------ | +| 1 | 95.625% | + + +***2.5 有关高斯分布的结论*** +1. 与直观体会到的类似,模型准确率与高斯分布均值离散程度正相关,与xy方差负相关 +2. 模型在均值分散,方差集中的情况下表现最好,在均值集中,方差分散的情况下表现最差 +3. 最佳K的取值与准确率无直接联系,准确性更多取决于分布情况 + + + +**3. K值 ** + +***3.1 不同量下的best_k*** + +(这里以2.4高斯分布为例) + +``` +# 准备数据集维度 +amount_list = [[10,10,10],[50,50,50],[100,100,100],[150,50,200],[200,200,200],[250,300,400]] +k_list=[] +aug_amount = [30,150,300,400,600,950] + +# 在不同数据集量下训练最佳k值 +for i in range (0,len(amount_list)): + model = KNN() + X_train, X_test, Y_train, Y_test = generate(amount_list[i][0],amount_list[i][1],amount_list[i][2]) + k_list.append(model.fit(X_train, Y_train)) + +# 画图 +l1=plt.plot(aug_amount,k_list,'r--',label='Best-K-Value') +plt.title('The relationship between # elements and |K|') +plt.xlabel('Total # of elements') +plt.ylabel('K-value') +plt.legend() + +``` +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/120033_088f8aa7_8823823.png "屏幕截图.png") + + +1. K取值与元素数量无关 +2. 最佳K会趋向于一个较小的值,所以在fit时考虑遍历[1,10],无需遍历更大的K(导致额外无用运算) + + +**3.2 不同K下的acc** + + +``` +# 因为我实现的KNN函数没有K参数(学习最优K),这里采用SKlearn数据库中的KNeighborsClassifier来做本次探究 +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier + +# 导入sklearn iris数据库并分类 +iris = load_iris() +X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.25, random_state = 33) + +# 在不同k值下计算accuracy +acc_list=[] + +for i in range (1,100): + + + knc = KNeighborsClassifier(n_neighbors=i) + knc.fit(X_train, y_train) + y_predict = knc.predict(X_test) + + acc_list.append(knc.score(X_test, y_test)) + +# 画图 +l1=plt.plot(range(1,100),acc_list,'green',label='Accuracy') +plt.title('The relationship between K-choice and Accuracy') +plt.xlabel('K-Value') +plt.ylabel('Accuracy') +plt.legend() + +``` +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/122104_30809a7a_8823823.png "屏幕截图.png") + +1. 同3.1结论[2],最佳K的取值会聚焦于一个较小的值 +2. iris数据集大小为150,可以看到超过0.5*len 之后准确率锁定在50%(几乎约等于瞎猜) diff --git a/assignment-1/submission/18340246016/img/.keep b/assignment-1/submission/18340246016/img/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..2b3ebe23a4c696d374084525296b704bc1e94461 Binary files /dev/null and b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..8ecab29162d04ce0e20d91129786ace0133752dd Binary files /dev/null and b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..38182b7678184b3e73071c3a0b77496698405b6c Binary files /dev/null and b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..b5fd60ce1e7218c3359d9739ae304ead9ee6e16b Binary files /dev/null and b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..a1db91fe434924aafbc734b235647d73fb4229dd Binary files /dev/null and b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..7731028adc12f28c461efc7240b6e5fa95c12702 Binary files /dev/null and b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..913e3801971de18d4f609181dad97957b03a142b Binary files /dev/null and b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..34ea7339a55877d026ae45bc7d32ee349f4b6533 Binary files /dev/null and b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..48293c8c963882364c7642e833597be03e206027 Binary files /dev/null and b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..0343da404b4c87d03aebfde3016b60e6b55cb836 Binary files /dev/null and b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png differ diff --git a/assignment-1/submission/18340246016/source.py b/assignment-1/submission/18340246016/source.py new file mode 100644 index 0000000000000000000000000000000000000000..c43b0cc36b1573f5a16c096738d5547454904ca6 --- /dev/null +++ b/assignment-1/submission/18340246016/source.py @@ -0,0 +1,154 @@ +class KNN: + + def __init__(self): + self.train_data = None + self.train_labels = None + self.k = None + + def train_test_split(self,x,y,rate): + shuffled_indexes = np.random.permutation(len(x)) + test_size = int(len(x) * rate) + train_index = shuffled_indexes[test_size:] + test_index = shuffled_indexes[:test_size] + return x[train_index], x[test_index], y[train_index], y[test_index] + + def distance(self,v1,v2): + + weight_array = (v1-v2)**2 + weight_array_sum = np.sum(weight_array) + return weight_array_sum**(0.5) + + def fit(self, train_data, train_label): + + # 归一化 + mu = np.mean(train_data, axis=0) + sigma = np.std(train_data, axis=0) + train_data = (train_data - mu) / sigma + + # 划分训练集/验证集 with rate =0.3 + X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3) + + # 对于不同的K[1-20],计算验证集到训练集的欧氏距离 + best_k=0 + k_candi=0; + for k in range(1,20): + + true_couter=0 + for test_counter in range(0,len(X_test)): + pos_vec_list=[] + + for train_counter in range(0,len(X_train)): + pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]]) + pos_vec_list.append(pos_vec) + + #对距离list根据距离排序 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + #k-近邻结果列表 + result_list = pos_vec_list_sorted[:k][:,1] + + + + #test预测结果 + label = int(result_list[np.argmax(result_list)]) + + + #检验本次test在给定k下是否正确 + if (label == Y_test[test_counter] ): + true_couter=true_couter+1 + + + #最优K取值 + if (true_couter >= best_k): + best_k = true_couter + k_candi = k + + # print(k_candi) + self.k = k_candi + self.train_data = train_data + self.train_labels = train_label + return self.k + + def predict(self, test_data): + test_label=[] + result_list=[] + + # 归一化 + mu = np.mean(test_data, axis=0) + sigma = np.std(test_data, axis=0) + test_data = (test_data - mu) / sigma + #test_data = test_data / np.sqrt(np.sum(test_data**2)) + + for i in range (0,len(test_data)): + pos_vec_list=[] + for m in range(0,len(self.train_data)): + pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]]) + pos_vec_list.append(pos_vec) + + + + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + + result_list = pos_vec_list_sorted[:(self.k)][:,1] + test_label.append(result_list[np.argmax(result_list)]) + + return test_label + +def generate (amount_1,amount_2,amount_3): + + + mean = (2, 2) + cov = np.array([[1,0], [0, 1]]) + x = np.random.multivariate_normal(mean, cov, (amount_1,)) + + mean = (4, 6) + cov = np.array([[2, 0], [0, 2]]) + y = np.random.multivariate_normal(mean, cov, (amount_2,)) + + mean = (10, 10) + cov = np.array([[2,1],[1,3]]) + z = np.random.multivariate_normal(mean, cov, (amount_3,)) + + + data = np.concatenate([x,y,z]) + + label = np.concatenate([ + np.zeros((amount_1,),dtype=int), + np.ones((amount_2,),dtype=int), + np.ones((amount_3,),dtype=int)*2 + ]) + + return model.train_test_split(data,label,0.2) + + +def display(x,y): + type1_x = []; type1_y = [] + type2_x = []; type2_y = [] + type3_x = []; type3_y = [] + + plt.figure(figsize=(8,6)) + + for i in range(0,len(x)): + if(y[i]==0): + type1_x.append(x[i][0]) + type1_y.append(x[i][1]) + if(y[i]==1): + type2_x.append(x[i][0]) + type2_y.append(x[i][1]) + if(y[i]==2): + type3_x.append(x[i][0]) + type3_y.append(x[i][1]) + + fig = plt.figure(figsize = (10, 6)) + ax = fig.add_subplot(111) + + type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown') + type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime') + type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet") + + + + ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0) + + plt.show() diff --git a/assignment-2/.keep b/assignment-2/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-2/README.md b/assignment-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..827aa9f21f075ba56cb82e13340d2f480679e478 --- /dev/null +++ b/assignment-2/README.md @@ -0,0 +1,183 @@ +**1. Mini-batch 函数的实现** + +``` +def mini_batch(dataset, batch_size=128, numpy=False): + +# 仿照batch函数分离data与label + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + + data = np.array(data) + label = np.array(label) + + + res = [] + +# 以batch_size为单位,从[0,len(data)]分割mini-batch + for start_idx in range(0, data.shape[0], batch_size): + end_idx = min(start_idx + batch_size, len(data)) + res.append((data[start_idx:end_idx],label[start_idx:end_idx])) + + return res +``` + + **2. 模型的训练和测试** + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/113123_f204953a_8823823.png "processon.png") + +``` + # 前向传播过程 + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + + #x = torch.relu(torch.matmul(x, self.W1)) + # 模仿torch的relu(Matmul.(x,w1))操作,下同 + x = self.relu_1.forward(self.matmul_1.forward(x,self.W1)) + + #x = torch.relu(torch.matmul(x, self.W2)) + x = self.relu_2.forward(self.matmul_2.forward(x,self.W2)) + + #x = torch.matmul(x, self.W3) + #x = torch.softmax(x, 1) + x = self.softmax.forward(self.matmul_3.forward(x,self.W3)) + + #x = torch.log(x) + x = self.log.forward(x) + + #################### + + return x + + # 反向传播过程 + def backward(self, y): + + #################### + # code 7 # + + self.log_grad = self.log.backward(y) + + self.softmax_grad = self.softmax.backward(self.log_grad) + + self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad) + + self.relu_2_grad = self.relu_2.backward(self.x3_grad) + + self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad) + + self.relu_1_grad = self.relu_1.backward(self.x2_grad) + + self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad) + + + + #################### + + pass +``` + + + + + +| Epoch | Batch_size | +| ---------------- | ------ | +| 3 | 16 | +| 10 | 128 | + + +* Epoch = 3 Batch_size=128 + +[0] Accuracy: 0.9373 +
[1] Accuracy: 0.9583 +
[2] Accuracy: 0.9683 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/102618_2f8a1661_8823823.png "3-128.png") + + + +* Epoch = 3 Batch_size=16 + +[0] Accuracy: 0.9640 +
[1] Accuracy: 0.9676 +
[2] Accuracy: 0.9707 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/103502_131ca59f_8823823.png "3-16.png") + +* Epoch = 10 Batch_size=16 + +[0] Accuracy: 0.9602 +
[1] Accuracy: 0.9657 +
[2] Accuracy: 0.9741 +
[3] Accuracy: 0.9747 +
[4] Accuracy: 0.9701 +
[5] Accuracy: 0.9731 +
[6] Accuracy: 0.9760 +
[7] Accuracy: 0.9763 +
[8] Accuracy: 0.9760 +
[9] Accuracy: 0.9780 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/105531_82cafce0_8823823.png "10-16.png") + + +* Epoch = 10 Batch_size=128 + +[0] Accuracy: 0.9389 +
[1] Accuracy: 0.9585 +
[2] Accuracy: 0.9679 +
[3] Accuracy: 0.9706 +
[4] Accuracy: 0.9746 +
[5] Accuracy: 0.9760 +
[6] Accuracy: 0.9769 +
[7] Accuracy: 0.9777 +
[8] Accuracy: 0.9781 +
[9] Accuracy: 0.9781 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/111052_ddb4ecda_8823823.png "10-128.png") + + + +*探究发现* +1. mini-batch size太小的情况下,模型的波动率非常高,并不是对batch_size越细分模型的准确率就越高 +2. 从epoch的数量上来看,epoch的数量与模型accuracy有正相关性 + + + **3.Momentum 优化函数下的对比试验** + + +``` + # 利用momentum优化下的optimize函数 + def optimize(self, learning_rate): + + # 初始化V值 + v1=0 + v2=0 + v3=0 + mu=0.9 + + # momentum下的更新算法 + v1 = mu * v1 - learning_rate * self.W1_grad + self.W1 = self.W1 + v1 + + v2 = mu * v2 - learning_rate * self.W2_grad + self.W2 = self.W2 + v2 + + v3 = mu * v3 - learning_rate * self.W3_grad + self.W3 = self.W3 + v3 + + + #self.W1 -= learning_rate * self.W1_grad + # self.W2 -= learning_rate * self.W2_grad + #self.W3 -= learning_rate * self.W3_grad +``` + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/115301_1fa39f2b_8823823.png "屏幕截图.png") + +1. 在我们的测试数据下,momentum与梯度下降模型准确率差异不大 +2. momentum模型主要用来解决全局最优与局部最优之间的差异,但在本测试集下不存在全局与局部的差异 +3. 理论上来说,当 momentum 动量越大时,其转换为势能的能量也就越大,就越有可能摆脱局部凹域的束缚,进入全局凹域 \ No newline at end of file diff --git a/assignment-2/numpy_fnn.py b/assignment-2/numpy_fnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7f48bcd98d7ecd450bb8cbeb2735f2919a109668 --- /dev/null +++ b/assignment-2/numpy_fnn.py @@ -0,0 +1,249 @@ +import numpy as np + + +class NumpyOp: + + def __init__(self): + self.memory = {} + self.epsilon = 1e-12 + + +class Matmul(NumpyOp): + + def forward(self, x, W): + """ + x: shape(N, d) + w: shape(d, d') + """ + self.memory['x'] = x + self.memory['W'] = W + h = np.matmul(x, W) + return h + + def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + + #################### + # code 1 # + x = self.memory['x'] + W = self.memory['W'] + + grad_W = np.matmul(x.T,grad_y) + grad_x = np.matmul(grad_y,W.T) + #################### + + return grad_x, grad_W + + +class Relu(NumpyOp): + + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 2 # + x = self.memory['x'] + + grad_x = np.where(x > 0, 1, 0) + + grad_x = grad_x * grad_y + + #################### + + return grad_x + + +class Log(NumpyOp): + + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 3 # + x = self.memory['x'] + + grad_x = (1/(x + self.epsilon)) + + grad_x = grad_x*grad_y + + + #################### + + return grad_x + + +class Softmax(NumpyOp): + """ + softmax over last dimension + """ + + def forward(self, x): + """ + x: shape(N, c) + """ + + #################### + # code 4 # + + out = np.array(x, copy="true") + + + result_list=[] + + for m in range(len(out)): + result_list.append(sum(np.exp(out[m]))) + + for m in range(len(out)): + for n in range(len(out[0])): + out[m][n]= np.exp(out[m][n]) / result_list[m] + + self.memory['x'] = x + + #################### + + return out + + + + def backward(self, grad_y): + + """ + grad_y: same shape as x + """ + + + + #################### + # code 5 # + + x = self.memory['x'] + softx = self.forward(x) + [n, m] = x.shape + out = [] + for i in range(n): + out.append([]) + for j in range(m): + out[i].append(0) + for k in range(m): + if j == k: + out[i][j] += (1 - softx[i][k]) * softx[i][k] * grad_y[i][k] + else: + out[i][j] += -softx[i][j] * softx[i][k] * grad_y[i][k] + grad_x = np.array(out) + + + + + #################### + + return grad_x + + +class NumpyLoss: + + def __init__(self): + self.target = None + + def get_loss(self, pred, target): + self.target = target + return (-pred * target).sum(axis=1).mean() + + def backward(self): + return -self.target / self.target.shape[0] + + +class NumpyModel: + def __init__(self): + self.W1 = np.random.normal(size=(28 * 28, 256)) + self.W2 = np.random.normal(size=(256, 64)) + self.W3 = np.random.normal(size=(64, 10)) + + # 以下算子会在 forward 和 backward 中使用 + self.matmul_1 = Matmul() + self.relu_1 = Relu() + self.matmul_2 = Matmul() + self.relu_2 = Relu() + self.matmul_3 = Matmul() + self.softmax = Softmax() + self.log = Log() + + # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导) + self.x1_grad, self.W1_grad = None, None + self.relu_1_grad = None + self.x2_grad, self.W2_grad = None, None + self.relu_2_grad = None + self.x3_grad, self.W3_grad = None, None + self.softmax_grad = None + self.log_grad = None + + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + #x = torch.relu(torch.matmul(x, self.W1)) + x = self.relu_1.forward(self.matmul_1.forward(x,self.W1)) + + #x = torch.relu(torch.matmul(x, self.W2)) + x = self.relu_2.forward(self.matmul_2.forward(x,self.W2)) + + #x = torch.matmul(x, self.W3) + #x = torch.softmax(x, 1) + x = self.softmax.forward(self.matmul_3.forward(x,self.W3)) + + #x = torch.log(x) + x = self.log.forward(x) + + #################### + + return x + + def backward(self, y): + + #################### + # code 7 # + + self.log_grad = self.log.backward(y) + + self.softmax_grad = self.softmax.backward(self.log_grad) + + self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad) + + self.relu_2_grad = self.relu_2.backward(self.x3_grad) + + self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad) + + self.relu_1_grad = self.relu_1.backward(self.x2_grad) + + self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad) + + + + #################### + + pass + + def optimize(self, learning_rate): + self.W1 -= learning_rate * self.W1_grad + self.W2 -= learning_rate * self.W2_grad + self.W3 -= learning_rate * self.W3_grad \ No newline at end of file diff --git a/assignment-2/numpy_mnist.py b/assignment-2/numpy_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..833e5aa2a37f0a5b38b147a0bb05e81aa8d787e1 --- /dev/null +++ b/assignment-2/numpy_mnist.py @@ -0,0 +1,58 @@ +import numpy as np +from numpy_fnn import NumpyModel, NumpyLoss +from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot + +def mini_batch(dataset, batch_size=128, numpy=False): + + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + + data = np.array(data) + label = np.array(label) + + res = [] + + + for start_idx in range(0, data.shape[0], batch_size): + end_idx = min(start_idx + batch_size, len(data)) + res.append((data[start_idx:end_idx],label[start_idx:end_idx])) + + return res + + +def numpy_run(): + train_dataset, test_dataset = download_mnist() + + model = NumpyModel() + numpy_loss = NumpyLoss() + model.W1, model.W2, model.W3 = get_torch_initialization() + + train_loss = [] + + epoch_number = 3 + learning_rate = 0.1 + + for epoch in range(epoch_number): + for x, y in mini_batch(train_dataset): + y = one_hot(y) + + y_pred = model.forward(x) + loss = numpy_loss.get_loss(y_pred, y) + + model.backward(numpy_loss.backward()) + model.optimize(learning_rate) + + train_loss.append(loss.item()) + + x, y = batch(test_dataset)[0] + accuracy = np.mean((model.forward(x).argmax(axis=1) == y)) + print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy)) + + plot_curve(train_loss) + + +if __name__ == "__main__": + numpy_run() \ No newline at end of file