diff --git a/ assignment-3/submission/18340246016/img/.keep b/ assignment-3/submission/18340246016/img/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ assignment-3/submission/18340246016/img/Cluster_algo.png b/ assignment-3/submission/18340246016/img/Cluster_algo.png new file mode 100644 index 0000000000000000000000000000000000000000..d597d85c9497f59dfaa86a3127b4372f4ec70b4e Binary files /dev/null and b/ assignment-3/submission/18340246016/img/Cluster_algo.png differ diff --git a/ assignment-3/submission/18340246016/img/GMM_res.png b/ assignment-3/submission/18340246016/img/GMM_res.png new file mode 100644 index 0000000000000000000000000000000000000000..517b0570f48530330f9a9fe424fbdf30f3a2934b Binary files /dev/null and b/ assignment-3/submission/18340246016/img/GMM_res.png differ diff --git a/ assignment-3/submission/18340246016/img/KMeans_res.png b/ assignment-3/submission/18340246016/img/KMeans_res.png new file mode 100644 index 0000000000000000000000000000000000000000..9958ea1eaf7ecb6af3d6499e78799930c7578331 Binary files /dev/null and b/ assignment-3/submission/18340246016/img/KMeans_res.png differ diff --git a/ assignment-3/submission/18340246016/img/Vis_data_2.png b/ assignment-3/submission/18340246016/img/Vis_data_2.png new file mode 100644 index 0000000000000000000000000000000000000000..1c2b76342b7cc39b6bed19f3ebb14c4f30ded45a Binary files /dev/null and b/ assignment-3/submission/18340246016/img/Vis_data_2.png differ diff --git a/ assignment-3/submission/18340246016/img/visual_data_1.png b/ assignment-3/submission/18340246016/img/visual_data_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2591b361a69de753e3119feadf0e159dc5d448c9 Binary files /dev/null and b/ assignment-3/submission/18340246016/img/visual_data_1.png differ diff --git a/ assignment-3/submission/18340246016/readme.md b/ assignment-3/submission/18340246016/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..df9cbec3f470f5dc0a8f7354f7a144190aab39d7 --- /dev/null +++ b/ assignment-3/submission/18340246016/readme.md @@ -0,0 +1,226 @@ + **1.基础实验部分** + + _1.1 数据生成与可视化实现_ + + +``` +def data_generator(): + # 生成三组multivariate_normal,数据长度为2000 + mean = (1, 2) + cov = np.array([[73, 0], [0, 22]]) + x = np.random.multivariate_normal(mean, cov, (800,)) + + mean = (16, -5) + cov = np.array([[21.2, 0], [0, 32.1]]) + y = np.random.multivariate_normal(mean, cov, (200,)) + + mean = (10, 22) + cov = np.array([[10, 5], [5, 10]]) + z = np.random.multivariate_normal(mean, cov, (1000,)) + + data, _ = shuffle(x, y, z) + return (data, data), 3 +``` + +``` +def shuffle(*datas): + # 随机打乱算法 + data = np.concatenate(datas) + label = np.concatenate([ + np.ones((d.shape[0],), dtype=int)*i + for (i, d) in enumerate(datas) + ]) + N = data.shape[0] + idx = np.arange(N) + np.random.shuffle(idx) + data = data[idx] + label = label[idx] + return data, label +``` + +``` +# 展示函数 +def display(x,y): + type1_x = []; type1_y = [] + type2_x = []; type2_y = [] + type3_x = []; type3_y = [] + + plt.figure(figsize=(8,6)) + + for i in range(0,len(x)): + if(y[i]==0): + type1_x.append(x[i][0]) + type1_y.append(x[i][1]) + if(y[i]==1): + type2_x.append(x[i][0]) + type2_y.append(x[i][1]) + if(y[i]==2): + type3_x.append(x[i][0]) + type3_y.append(x[i][1]) + + fig = plt.figure(figsize = (10, 6)) + ax = fig.add_subplot(111) + + type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown') + type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime') + type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet") + + + + ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0) + + plt.show() + +``` + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +73 & 0 \\\\ +0 & 22 +\end{array}\right] +\mu=\left[\begin{array}{ll} +1 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +21.2 & 0 \\\\ +0 & 32.1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +16 & -5 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +10 & 5 \\\\ +5 & 10 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 22 +\end{array}\right] +\end{array} +$$ + + _1.2 样本可视化结果_ + + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0614/122623_858b78c2_8823823.png "屏幕截图.png") + + + _1.3 K-Means分类结果_ + + +``` +(train_data, test_data), n_clusters = data_generator() +exp =KMeans(n_clusters) +exp.fit(train_data) +result = exp.predict(test_data) +display(test_data,exp.predict(test_data)) +``` +![输入图片说明](https://images.gitee.com/uploads/images/2021/0614/122208_5966ec9c_8823823.png "屏幕截图.png") + + + _1.4 GaussianMixture分类结果_ + + +``` +exp =GaussianMixture(n_clusters) +exp.fit(train_data) +result = exp.predict(test_data) +display(test_data,exp.predict(test_data)) +``` + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0614/123050_f971e80a_8823823.png "屏幕截图.png") + + **2.自动选择聚簇数量(Elbow Method 配合 K-Means 算法)** + + _2.1 fit算法实现_ + + +``` + def fit(self, train_data): + train_data=self.normal(train_data) + numpoint = len(train_data) + dist_list =[] + + upper_bound = min([len(train_data)+1,20]) + + # 遍历(1,upperbound)计算distance + for cluster in range(1,upper_bound): + KMean = KMeans(cluster) + Centroids,label = KMean.fit(train_data) + temp_length=0 + for n in range(cluster): + temp_length = temp_length+self.length_cal(Centroids[n],train_data[label==n]) + dist_list.append(temp_length) + + # 当变化率小于5%,选取最佳K + percent_change=[] + for i in range(len(dist_list)-1): + percent_change.append((dist_list[i]-dist_list[i+1])/dist_list[i]*100) + for i in range(len(percent_change)): + if (percent_change[i]<5): + self.optimal = i+1 + print(i+1) + return dist_list,percent_change + break +``` + + _2.2 数据可视化_ + +原始数据 + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +1 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +2 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +4 & 6 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 1 \\\\ +1 & 3 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0614/130337_efd2f52a_8823823.png "屏幕截图.png") + +clustering数据 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0614/130434_98084bb3_8823823.png "屏幕截图.png") + diff --git a/ assignment-3/submission/18340246016/source.py b/ assignment-3/submission/18340246016/source.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fb49925b35afbeec88676539d3d14cd02fcc3e --- /dev/null +++ b/ assignment-3/submission/18340246016/source.py @@ -0,0 +1,197 @@ +import numpy as np + +class KMeans: + + # 初始化Centroids + def initCentroids(self,dataSet,k): + centers = dataSet[:k] + return centers + + #dataSet分类至不同centroids下 + def cluster_classifier(self,centroids,dataSet): + distances = np.sqrt(((dataSet - centroids[:, np.newaxis])**2).sum(axis=2)) + return np.argmin(distances, axis=0) + + def __init__(self, n_clusters): + self.n_clusters = n_clusters + + def fit(self, train_data): + + #初试化Centroids以及Centroids + Centroids = self.initCentroids(train_data,self.n_clusters) + temp_label = self.cluster_classifier(Centroids,train_data) + + #开始自适应fit,直到Centroids不再更新 + while(True): + for i in range(self.n_clusters): + Centroids[i] = np.mean(train_data[temp_label==i]) + updated_label = self.cluster_classifier(Centroids,train_data) + if (updated_label.all() == temp_label.all()): + # 储存final Centroids + self.Centroids=Centroids + return Centroids,updated_label + break + else: + temp_label=updated_label + + def predict(self, test_data): + #根据fit更新过的centroids分类testdata + return self.cluster_classifier(self.Centroids,test_data) + + + +class GaussianMixture: + + #正则化 + def normal(self,raw_data): + norm = np.linalg.norm(raw_data) + normal_array = raw_data/norm + return normal_array + + #初始化参数 mu,var,Pi,W + def ini_para(self,n_clusters,train_data): + self.epsilon = 1e-12 + dim = train_data.shape[1] + mu=[] + var=[] + ones=[1] + + for i in range(n_clusters): + mu.append(np.random.normal(0, 1, size=(dim,))) + var.append(ones*dim) + Pi = [1 / n_clusters] * n_clusters + W = np.ones((len(train_data), n_clusters)) / n_clusters + Pi = W.sum(axis=0) / W.sum() + self.dim = dim + + + return np.array(mu),np.array(var),W,Pi + + def __init__(self, n_clusters): + self.n_clusters = n_clusters + + # multivariate_normal density function + def multivariate_normal(self,x, mu, cov): + part1 = 1 / ( ((2* np.pi)**(len(mu)/2)) * (np.linalg.det(cov)**(1/2)) ) + part2 = (-1/2) * ((x-mu).T.dot(np.linalg.inv(cov))).dot((x-mu)) + return float(part1 * np.exp(part2)) + + + def update_W(self,X, Mu, Var, Pi): + n_points, n_clusters = len(X), len(Pi) + pdfs = np.zeros(((n_points, n_clusters))) + for m in range(n_points): + for i in range(n_clusters): + pdfs[m, i] = Pi[i] * self.multivariate_normal(X[m], Mu[i], (np.diag(Var[i]))) + W = pdfs / pdfs.sum(axis=1).reshape(-1, 1) + return W + + + def update_Pi(self,W): + Pi = W.sum(axis=0) / W.sum() + return Pi + + def update_Mu(self,X, W): + n_clusters = W.shape[1] + Mu = np.zeros((n_clusters, self.dim)) + for i in range(n_clusters): + Mu[i] = np.average(X, axis=0, weights=W[:, i]) + return Mu + + def update_Var(self,X, Mu, W): + n_clusters = W.shape[1] + Var = np.zeros((n_clusters, self.dim)) + for i in range(n_clusters): + Var[i] = np.average((X - Mu[i]) ** 2, axis=0, weights=W[:, i])+self.epsilon + return Var + + + def fit(self, train_data): + train_data = self.normal(train_data) + n_clusters = self.n_clusters + + + Mu,Var,W,Pi = self.ini_para(n_clusters,train_data) + n_points = len(train_data) + X=train_data + + Mu_init=Mu + Var_init=Var + i=0 + + # 10次loop内更新参数 + for i in range(10): + i=i+1 + W = self.update_W(X, Mu, Var, Pi) + Pi = self.update_Pi(W) + Mu = self.update_Mu(X, W) + Var = self.update_Var(X, Mu, W) + + if (Mu_init.all()==Mu.all)and(Var_init.all()==Var.all()): + break + else: + Mu_init=Mu + Var_init=Var + + self.Pi =Pi + self.W=W + self.Mu=Mu + self.Var=Var + + + def predict(self, test_data): + test_data = self.normal(test_data) + prob= self.update_W(test_data, self.Mu, self.Var, self.Pi) + return np.argmax(prob,axis=1) + +class ClusteringAlgorithm: + + def normal(self,raw_data): + norm = np.linalg.norm(raw_data) + normal_array = raw_data/norm + return normal_array + + def __init__(self): + pass + + def length_cal(self,Centroids,points): + total=0 + for point in points: + weight_array = (point-Centroids)**2 + weight_array_sum = np.sum(weight_array) + total = total + weight_array_sum**(0.5) + + return total + + def fit(self, train_data): + train_data=self.normal(train_data) + numpoint = len(train_data) + dist_list =[] + + upper_bound = min([len(train_data)+1,20]) + + # 遍历(1,upperbound)计算distance + for cluster in range(1,upper_bound): + KMean = KMeans(cluster) + Centroids,label = KMean.fit(train_data) + temp_length=0 + for n in range(cluster): + temp_length = temp_length+self.length_cal(Centroids[n],train_data[label==n]) + dist_list.append(temp_length) + + # 当变化率小于5%,选取最佳K + percent_change=[] + for i in range(len(dist_list)-1): + percent_change.append((dist_list[i]-dist_list[i+1])/dist_list[i]*100) + for i in range(len(percent_change)): + if (percent_change[i]<5): + self.optimal = i+1 + print(i+1) + return dist_list,percent_change + break + + def predict(self, test_data): + test_data = self.normal(test_data) + KMean_ready = KMeans(self.optimal) + KMean_ready.fit(test_data) + return KMean_ready.predict(test_data) \ No newline at end of file diff --git a/assignment-1/submission/18340246016/README.md b/assignment-1/submission/18340246016/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c998960bf2dcbd812771d18b990d086ef231b740 --- /dev/null +++ b/assignment-1/submission/18340246016/README.md @@ -0,0 +1,485 @@ + **1. KNN实现过程** + +*1.1 辅助方法的实现* + +``` +# 这里我们用train_test_split实现训练集与验证集以给定的比例划分与打乱 +def train_test_split(self,x,y,rate): + shuffled_indexes = np.random.permutation(len(x)) + test_size = int(len(x) * rate) + train_index = shuffled_indexes[test_size:] + test_index = shuffled_indexes[:test_size] + return x[train_index], x[test_index], y[train_index], y[test_index] +``` + + +``` +# 用distance方法计算两组向量之间的欧式距离 +def distance(self,v1,v2): + + weight_array = (v1-v2)**2 + weight_array_sum = np.sum(weight_array) + return weight_array_sum**(0.5) +``` + + + +*1.2 fit方法的实现* + + +``` +# 我们用fit方法实现 1. 对训练数据的归一化 2. 训练数据内部subdivide为训练集与测试集,取最优K值 3. 将训练数据赋值self.data +def fit(self, train_data, train_label): + + # 归一化 + mu = np.mean(train_data, axis=0) + sigma = np.std(train_data, axis=0) + train_data = (train_data - mu) / sigma + + # 划分训练集/验证集 with rate =0.3 + X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3) + + # 对于不同的K[1-0.5*len(train)],计算验证集到训练集的欧氏距离 + best_k=0 + k_candi=0; + for k in range(1,int(0.5*len(X_train))+1): + + true_couter=0 + for test_counter in range(0,len(X_test)): + pos_vec_list=[] + + for train_counter in range(0,len(X_train)): + pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]]) + pos_vec_list.append(pos_vec) + + #对距离list根据距离排序 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + #k-近邻结果列表 + result_list = pos_vec_list_sorted[:k][:,1] + + + + #test预测结果 + label = int(result_list[np.argmax(result_list)]) + + + #检验本次test在给定k下是否正确 + if (label == Y_test[test_counter] ): + true_couter=true_couter+1 + + + #最优K取值 + if (true_couter >= best_k): + best_k = true_couter + k_candi = k + + self.k = k_candi + self.train_data = train_data + self.train_labels = train_label + return self.k +``` + + +*1.3 predict方法的实现* + +``` + # 根据fit方法带入的数据data以及训练结果K,预测test_label + def predict(self, test_data): + test_label=[] + result_list=[] + + # 归一化原始训练数据 + mu = np.mean(test_data, axis=0) + sigma = np.std(test_data, axis=0) + test_data = (test_data - mu) / sigma + + for i in range (0,len(test_data)): + pos_vec_list=[] + for m in range(0,len(self.train_data)): + pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]]) + pos_vec_list.append(pos_vec) + + + # KNN结果列表 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + + result_list = pos_vec_list_sorted[:(self.k)][:,1] + test_label.append(result_list[np.argmax(result_list)]) + + return test_label +``` + + + + **2. 高斯分布探究试验** + +*2.0 生成以及展示函数* + + +``` +def generate (amount_1,amount_2,amount_3): + + mean = (2, 2) + cov = np.array([[1,0], [0, 1]]) + x = np.random.multivariate_normal(mean, cov, (amount_1,)) + + mean = (4, 6) + cov = np.array([[2, 0], [0, 2]]) + y = np.random.multivariate_normal(mean, cov, (amount_2,)) + + mean = (10, 10) + cov = np.array([[2,1],[1,3]]) + z = np.random.multivariate_normal(mean, cov, (amount_3,)) + + + data = np.concatenate([x,y,z]) + + label = np.concatenate([ + np.zeros((amount_1,),dtype=int), + np.ones((amount_2,),dtype=int), + np.ones((amount_3,),dtype=int)*2 + ]) + + return model.train_test_split(data,label,0.2) + +``` + + +``` +# 展示函数 +def display(x,y): + type1_x = []; type1_y = [] + type2_x = []; type2_y = [] + type3_x = []; type3_y = [] + + plt.figure(figsize=(8,6)) + + for i in range(0,len(x)): + if(y[i]==0): + type1_x.append(x[i][0]) + type1_y.append(x[i][1]) + if(y[i]==1): + type2_x.append(x[i][0]) + type2_y.append(x[i][1]) + if(y[i]==2): + type3_x.append(x[i][0]) + type3_y.append(x[i][1]) + + fig = plt.figure(figsize = (10, 6)) + ax = fig.add_subplot(111) + + type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown') + type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime') + type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet") + + + + ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0) + + plt.show() + +``` + + + +***2.1 均值集中 + xy分布分散 = 对应的k*** + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +4 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 4 +\end{array}\right] +\mu=\left[\begin{array}{ll} +8 & 8 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +8 & 1 \\\\ +1 & 6 +\end{array}\right] +\mu=\left[\begin{array}{ll} +12 & 12 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111859_687c068b_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/111906_d0e2e134_8823823.png "屏幕截图.png") + +*K与准确率结果* +| k | 准确率 | +| ---------------- | ------ | +| 4 | 62.5% | + +***2.2 mean集中 + xy集中 = 对应的k*** + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +1 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +8 & 8 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 1 \\\\ +1 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +12 & 12 +\end{array}\right] +\end{array} +$$ + +*训练集分布* + +![训练集分布](https://images.gitee.com/uploads/images/2021/0401/105433_e7ec4619_8823823.png "屏幕截图.png") + +*测试集分布* + +![测试集分布](https://images.gitee.com/uploads/images/2021/0401/105459_56af4a90_8823823.png "屏幕截图.png") + +*K与准确率结果* +| k | 准确率 | +| ---------------- | ------ | +| 1 | 78.75% | + + +***2.3 mean分散 + xy分散 = 对应的k*** + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +4 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +2 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 4 +\end{array}\right] +\mu=\left[\begin{array}{ll} +4 & 6 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +8 & 1 \\\\ +1 & 6 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112426_09535d36_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112437_53a32eec_8823823.png "屏幕截图.png") + +*K与准确率结果* + + +| k | 准确率 | +| ---------------- | ------ | +| 2 | 86.25% | + + +***2.4 mean分散 + xy集中 = 对应的k*** + + + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +1 & 0 \\\\ +0 & 1 +\end{array}\right] +\mu=\left[\begin{array}{ll} +2 & 2 +\end{array}\right] +\end{array} +$$ + + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 0 \\\\ +0 & 2 +\end{array}\right] +\mu=\left[\begin{array}{ll} +4 & 6 +\end{array}\right] +\end{array} +$$ + +$$ +\begin{array}{l} +\Sigma=\left[\begin{array}{cc} +2 & 1 \\\\ +1 & 3 +\end{array}\right] +\mu=\left[\begin{array}{ll} +10 & 10 +\end{array}\right] +\end{array} +$$ + + +*训练集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112759_754208e4_8823823.png "屏幕截图.png") + +*测试集分布* + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/112810_ca9d4230_8823823.png "屏幕截图.png") + +*K与准确率结果* + + +| k | 准确率 | +| ---------------- | ------ | +| 1 | 95.625% | + + +***2.5 有关高斯分布的结论*** +1. 与直观体会到的类似,模型准确率与高斯分布均值离散程度正相关,与xy方差负相关 +2. 模型在均值分散,方差集中的情况下表现最好,在均值集中,方差分散的情况下表现最差 +3. 最佳K的取值与准确率无直接联系,准确性更多取决于分布情况 + + + +**3. K值 ** + +***3.1 不同量下的best_k*** + +(这里以2.4高斯分布为例) + +``` +# 准备数据集维度 +amount_list = [[10,10,10],[50,50,50],[100,100,100],[150,50,200],[200,200,200],[250,300,400]] +k_list=[] +aug_amount = [30,150,300,400,600,950] + +# 在不同数据集量下训练最佳k值 +for i in range (0,len(amount_list)): + model = KNN() + X_train, X_test, Y_train, Y_test = generate(amount_list[i][0],amount_list[i][1],amount_list[i][2]) + k_list.append(model.fit(X_train, Y_train)) + +# 画图 +l1=plt.plot(aug_amount,k_list,'r--',label='Best-K-Value') +plt.title('The relationship between # elements and |K|') +plt.xlabel('Total # of elements') +plt.ylabel('K-value') +plt.legend() + +``` +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/120033_088f8aa7_8823823.png "屏幕截图.png") + + +1. K取值与元素数量无关 +2. 最佳K会趋向于一个较小的值,所以在fit时考虑遍历[1,10],无需遍历更大的K(导致额外无用运算) + + +**3.2 不同K下的acc** + + +``` +# 因为我实现的KNN函数没有K参数(学习最优K),这里采用SKlearn数据库中的KNeighborsClassifier来做本次探究 +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier + +# 导入sklearn iris数据库并分类 +iris = load_iris() +X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.25, random_state = 33) + +# 在不同k值下计算accuracy +acc_list=[] + +for i in range (1,100): + + + knc = KNeighborsClassifier(n_neighbors=i) + knc.fit(X_train, y_train) + y_predict = knc.predict(X_test) + + acc_list.append(knc.score(X_test, y_test)) + +# 画图 +l1=plt.plot(range(1,100),acc_list,'green',label='Accuracy') +plt.title('The relationship between K-choice and Accuracy') +plt.xlabel('K-Value') +plt.ylabel('Accuracy') +plt.legend() + +``` +![输入图片说明](https://images.gitee.com/uploads/images/2021/0401/122104_30809a7a_8823823.png "屏幕截图.png") + +1. 同3.1结论[2],最佳K的取值会聚焦于一个较小的值 +2. iris数据集大小为150,可以看到超过0.5*len 之后准确率锁定在50%(几乎约等于瞎猜) diff --git a/assignment-1/submission/18340246016/img/.keep b/assignment-1/submission/18340246016/img/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..2b3ebe23a4c696d374084525296b704bc1e94461 Binary files /dev/null and b/assignment-1/submission/18340246016/img/105433_e7ec4619_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..8ecab29162d04ce0e20d91129786ace0133752dd Binary files /dev/null and b/assignment-1/submission/18340246016/img/105459_56af4a90_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..38182b7678184b3e73071c3a0b77496698405b6c Binary files /dev/null and b/assignment-1/submission/18340246016/img/111859_687c068b_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..b5fd60ce1e7218c3359d9739ae304ead9ee6e16b Binary files /dev/null and b/assignment-1/submission/18340246016/img/111906_d0e2e134_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..a1db91fe434924aafbc734b235647d73fb4229dd Binary files /dev/null and b/assignment-1/submission/18340246016/img/112426_09535d36_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..7731028adc12f28c461efc7240b6e5fa95c12702 Binary files /dev/null and b/assignment-1/submission/18340246016/img/112437_53a32eec_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..913e3801971de18d4f609181dad97957b03a142b Binary files /dev/null and b/assignment-1/submission/18340246016/img/112759_754208e4_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..34ea7339a55877d026ae45bc7d32ee349f4b6533 Binary files /dev/null and b/assignment-1/submission/18340246016/img/112810_ca9d4230_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..48293c8c963882364c7642e833597be03e206027 Binary files /dev/null and b/assignment-1/submission/18340246016/img/120033_088f8aa7_8823823.png differ diff --git a/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png new file mode 100644 index 0000000000000000000000000000000000000000..0343da404b4c87d03aebfde3016b60e6b55cb836 Binary files /dev/null and b/assignment-1/submission/18340246016/img/122104_30809a7a_8823823.png differ diff --git a/assignment-1/submission/18340246016/source.py b/assignment-1/submission/18340246016/source.py new file mode 100644 index 0000000000000000000000000000000000000000..c43b0cc36b1573f5a16c096738d5547454904ca6 --- /dev/null +++ b/assignment-1/submission/18340246016/source.py @@ -0,0 +1,154 @@ +class KNN: + + def __init__(self): + self.train_data = None + self.train_labels = None + self.k = None + + def train_test_split(self,x,y,rate): + shuffled_indexes = np.random.permutation(len(x)) + test_size = int(len(x) * rate) + train_index = shuffled_indexes[test_size:] + test_index = shuffled_indexes[:test_size] + return x[train_index], x[test_index], y[train_index], y[test_index] + + def distance(self,v1,v2): + + weight_array = (v1-v2)**2 + weight_array_sum = np.sum(weight_array) + return weight_array_sum**(0.5) + + def fit(self, train_data, train_label): + + # 归一化 + mu = np.mean(train_data, axis=0) + sigma = np.std(train_data, axis=0) + train_data = (train_data - mu) / sigma + + # 划分训练集/验证集 with rate =0.3 + X_train, X_test, Y_train, Y_test = self.train_test_split(train_data,train_label,0.3) + + # 对于不同的K[1-20],计算验证集到训练集的欧氏距离 + best_k=0 + k_candi=0; + for k in range(1,20): + + true_couter=0 + for test_counter in range(0,len(X_test)): + pos_vec_list=[] + + for train_counter in range(0,len(X_train)): + pos_vec = np.array([self.distance(X_test[test_counter],X_train[train_counter]),Y_train[train_counter]]) + pos_vec_list.append(pos_vec) + + #对距离list根据距离排序 + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + #k-近邻结果列表 + result_list = pos_vec_list_sorted[:k][:,1] + + + + #test预测结果 + label = int(result_list[np.argmax(result_list)]) + + + #检验本次test在给定k下是否正确 + if (label == Y_test[test_counter] ): + true_couter=true_couter+1 + + + #最优K取值 + if (true_couter >= best_k): + best_k = true_couter + k_candi = k + + # print(k_candi) + self.k = k_candi + self.train_data = train_data + self.train_labels = train_label + return self.k + + def predict(self, test_data): + test_label=[] + result_list=[] + + # 归一化 + mu = np.mean(test_data, axis=0) + sigma = np.std(test_data, axis=0) + test_data = (test_data - mu) / sigma + #test_data = test_data / np.sqrt(np.sum(test_data**2)) + + for i in range (0,len(test_data)): + pos_vec_list=[] + for m in range(0,len(self.train_data)): + pos_vec = np.array([self.distance(self.train_data[m],test_data[i]),self.train_labels[m]]) + pos_vec_list.append(pos_vec) + + + + pos_vec_list = np.array(pos_vec_list) + pos_vec_list_sorted = pos_vec_list[np.lexsort(pos_vec_list[:,::-1].T)] + + result_list = pos_vec_list_sorted[:(self.k)][:,1] + test_label.append(result_list[np.argmax(result_list)]) + + return test_label + +def generate (amount_1,amount_2,amount_3): + + + mean = (2, 2) + cov = np.array([[1,0], [0, 1]]) + x = np.random.multivariate_normal(mean, cov, (amount_1,)) + + mean = (4, 6) + cov = np.array([[2, 0], [0, 2]]) + y = np.random.multivariate_normal(mean, cov, (amount_2,)) + + mean = (10, 10) + cov = np.array([[2,1],[1,3]]) + z = np.random.multivariate_normal(mean, cov, (amount_3,)) + + + data = np.concatenate([x,y,z]) + + label = np.concatenate([ + np.zeros((amount_1,),dtype=int), + np.ones((amount_2,),dtype=int), + np.ones((amount_3,),dtype=int)*2 + ]) + + return model.train_test_split(data,label,0.2) + + +def display(x,y): + type1_x = []; type1_y = [] + type2_x = []; type2_y = [] + type3_x = []; type3_y = [] + + plt.figure(figsize=(8,6)) + + for i in range(0,len(x)): + if(y[i]==0): + type1_x.append(x[i][0]) + type1_y.append(x[i][1]) + if(y[i]==1): + type2_x.append(x[i][0]) + type2_y.append(x[i][1]) + if(y[i]==2): + type3_x.append(x[i][0]) + type3_y.append(x[i][1]) + + fig = plt.figure(figsize = (10, 6)) + ax = fig.add_subplot(111) + + type1 = ax.scatter(type1_x, type1_y, s = 30, c = 'brown') + type2 = ax.scatter(type2_x, type2_y, s = 30, c = 'lime') + type3 = ax.scatter(type3_x, type3_y, s = 30, c = "darkviolet") + + + + ax.legend((type1, type2, type3), ("A", "B", "C"), loc = 0) + + plt.show() diff --git a/assignment-2/submission/18340246016/README.md b/assignment-2/submission/18340246016/README.md new file mode 100644 index 0000000000000000000000000000000000000000..be11b1ddc0e6d6648014b9e637b8dbf07fd0f8e9 --- /dev/null +++ b/assignment-2/submission/18340246016/README.md @@ -0,0 +1,183 @@ +**1. Mini-batch 函数的实现** + +``` +def mini_batch(dataset, batch_size=128, numpy=False): + +# 仿照batch函数分离data与label + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + + data = np.array(data) + label = np.array(label) + + + res = [] + +# 以batch_size为单位,从[0,len(data)]分割mini-batch + for start_idx in range(0, data.shape[0], batch_size): + end_idx = min(start_idx + batch_size, len(data)) + res.append((data[start_idx:end_idx],label[start_idx:end_idx])) + + return res +``` + + **2. 模型的训练和测试** + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/113123_f204953a_8823823.png "processon.png") + +``` + # 前向传播过程 + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + + #x = torch.relu(torch.matmul(x, self.W1)) + # 模仿torch的relu(Matmul.(x,w1))操作,下同 + x = self.relu_1.forward(self.matmul_1.forward(x,self.W1)) + + #x = torch.relu(torch.matmul(x, self.W2)) + x = self.relu_2.forward(self.matmul_2.forward(x,self.W2)) + + #x = torch.matmul(x, self.W3) + #x = torch.softmax(x, 1) + x = self.softmax.forward(self.matmul_3.forward(x,self.W3)) + + #x = torch.log(x) + x = self.log.forward(x) + + #################### + + return x + + # 反向传播过程 + def backward(self, y): + + #################### + # code 7 # + + self.log_grad = self.log.backward(y) + + self.softmax_grad = self.softmax.backward(self.log_grad) + + self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad) + + self.relu_2_grad = self.relu_2.backward(self.x3_grad) + + self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad) + + self.relu_1_grad = self.relu_1.backward(self.x2_grad) + + self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad) + + + + #################### + + pass +``` + + + + + +| Epoch | Batch_size | +| ---------------- | ------ | +| 3 | 16 | +| 10 | 128 | + + +* Epoch = 3 Batch_size=128 + +[0] Accuracy: 0.9373 +
[1] Accuracy: 0.9583 +
[2] Accuracy: 0.9683 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/102618_2f8a1661_8823823.png "3-128.png") + + + +* Epoch = 3 Batch_size=16 + +[0] Accuracy: 0.9640 +
[1] Accuracy: 0.9676 +
[2] Accuracy: 0.9707 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/103502_131ca59f_8823823.png "3-16.png") + +* Epoch = 10 Batch_size=16 + +[0] Accuracy: 0.9602 +
[1] Accuracy: 0.9657 +
[2] Accuracy: 0.9741 +
[3] Accuracy: 0.9747 +
[4] Accuracy: 0.9701 +
[5] Accuracy: 0.9731 +
[6] Accuracy: 0.9760 +
[7] Accuracy: 0.9763 +
[8] Accuracy: 0.9760 +
[9] Accuracy: 0.9780 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/105531_82cafce0_8823823.png "10-16.png") + + +* Epoch = 10 Batch_size=128 + +[0] Accuracy: 0.9389 +
[1] Accuracy: 0.9585 +
[2] Accuracy: 0.9679 +
[3] Accuracy: 0.9706 +
[4] Accuracy: 0.9746 +
[5] Accuracy: 0.9760 +
[6] Accuracy: 0.9769 +
[7] Accuracy: 0.9777 +
[8] Accuracy: 0.9781 +
[9] Accuracy: 0.9781 + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/111052_ddb4ecda_8823823.png "10-128.png") + + + +*探究发现* +1. mini-batch size太小的情况下,模型的波动率非常高,并不是对batch_size越细分模型的准确率就越高 +2. 从epoch的数量上来看,epoch的数量与模型accuracy有正相关性 + + + **3.Momentum 优化函数下的对比试验** + + +``` + # 利用momentum优化下的optimize函数 + def optimize(self, learning_rate): + + # 初始化V值 + v1=0 + v2=0 + v3=0 + mu=0.9 + + # momentum下的更新算法 + v1 = mu * v1 - learning_rate * self.W1_grad + self.W1 = self.W1 + v1 + + v2 = mu * v2 - learning_rate * self.W2_grad + self.W2 = self.W2 + v2 + + v3 = mu * v3 - learning_rate * self.W3_grad + self.W3 = self.W3 + v3 + + + #self.W1 -= learning_rate * self.W1_grad + # self.W2 -= learning_rate * self.W2_grad + #self.W3 -= learning_rate * self.W3_grad +``` + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0430/115301_1fa39f2b_8823823.png "屏幕截图.png") + +1. 在我们的测试数据下,momentum与梯度下降模型准确率差异不大 +2. momentum模型主要用来解决全局最优与局部最优之间的差异,但在本测试集下不存在全局与局部的差异 +3. 理论上来说,当 momentum 动量越大时,其转换为势能的能量也就越大,就越有可能摆脱局部凹域的束缚,进入全局凹域 \ No newline at end of file diff --git a/assignment-2/submission/18340246016/img/10-128.png b/assignment-2/submission/18340246016/img/10-128.png new file mode 100644 index 0000000000000000000000000000000000000000..62352ed1838be0f2b7ded908e7cf94416690e663 Binary files /dev/null and b/assignment-2/submission/18340246016/img/10-128.png differ diff --git a/assignment-2/submission/18340246016/img/10-16.png b/assignment-2/submission/18340246016/img/10-16.png new file mode 100644 index 0000000000000000000000000000000000000000..47c83d74dd1d85afc8f27bb53d21ad210b47a543 Binary files /dev/null and b/assignment-2/submission/18340246016/img/10-16.png differ diff --git a/assignment-2/submission/18340246016/img/3-128.png b/assignment-2/submission/18340246016/img/3-128.png new file mode 100644 index 0000000000000000000000000000000000000000..3bcc33c65c07a1c12625ab0193e69588eb7cc5a8 Binary files /dev/null and b/assignment-2/submission/18340246016/img/3-128.png differ diff --git a/assignment-2/submission/18340246016/img/3-16.png b/assignment-2/submission/18340246016/img/3-16.png new file mode 100644 index 0000000000000000000000000000000000000000..e85d4d2d4c3cc2d62bcfd80dd2199c0ff26dcc85 Binary files /dev/null and b/assignment-2/submission/18340246016/img/3-16.png differ diff --git a/assignment-2/submission/18340246016/img/momentum.png b/assignment-2/submission/18340246016/img/momentum.png new file mode 100644 index 0000000000000000000000000000000000000000..f1f3afdf95cb49bd4f84f7c2bfb68fdbd13ddde3 Binary files /dev/null and b/assignment-2/submission/18340246016/img/momentum.png differ diff --git a/assignment-2/submission/18340246016/img/processon.png b/assignment-2/submission/18340246016/img/processon.png new file mode 100644 index 0000000000000000000000000000000000000000..0b41c289c3621142bc38c400ba1a566f6945f523 Binary files /dev/null and b/assignment-2/submission/18340246016/img/processon.png differ diff --git a/assignment-2/submission/18340246016/numpy_fnn.py b/assignment-2/submission/18340246016/numpy_fnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7f48bcd98d7ecd450bb8cbeb2735f2919a109668 --- /dev/null +++ b/assignment-2/submission/18340246016/numpy_fnn.py @@ -0,0 +1,249 @@ +import numpy as np + + +class NumpyOp: + + def __init__(self): + self.memory = {} + self.epsilon = 1e-12 + + +class Matmul(NumpyOp): + + def forward(self, x, W): + """ + x: shape(N, d) + w: shape(d, d') + """ + self.memory['x'] = x + self.memory['W'] = W + h = np.matmul(x, W) + return h + + def backward(self, grad_y): + """ + grad_y: shape(N, d') + """ + + #################### + # code 1 # + x = self.memory['x'] + W = self.memory['W'] + + grad_W = np.matmul(x.T,grad_y) + grad_x = np.matmul(grad_y,W.T) + #################### + + return grad_x, grad_W + + +class Relu(NumpyOp): + + def forward(self, x): + self.memory['x'] = x + return np.where(x > 0, x, np.zeros_like(x)) + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 2 # + x = self.memory['x'] + + grad_x = np.where(x > 0, 1, 0) + + grad_x = grad_x * grad_y + + #################### + + return grad_x + + +class Log(NumpyOp): + + def forward(self, x): + """ + x: shape(N, c) + """ + + out = np.log(x + self.epsilon) + self.memory['x'] = x + + return out + + def backward(self, grad_y): + """ + grad_y: same shape as x + """ + + #################### + # code 3 # + x = self.memory['x'] + + grad_x = (1/(x + self.epsilon)) + + grad_x = grad_x*grad_y + + + #################### + + return grad_x + + +class Softmax(NumpyOp): + """ + softmax over last dimension + """ + + def forward(self, x): + """ + x: shape(N, c) + """ + + #################### + # code 4 # + + out = np.array(x, copy="true") + + + result_list=[] + + for m in range(len(out)): + result_list.append(sum(np.exp(out[m]))) + + for m in range(len(out)): + for n in range(len(out[0])): + out[m][n]= np.exp(out[m][n]) / result_list[m] + + self.memory['x'] = x + + #################### + + return out + + + + def backward(self, grad_y): + + """ + grad_y: same shape as x + """ + + + + #################### + # code 5 # + + x = self.memory['x'] + softx = self.forward(x) + [n, m] = x.shape + out = [] + for i in range(n): + out.append([]) + for j in range(m): + out[i].append(0) + for k in range(m): + if j == k: + out[i][j] += (1 - softx[i][k]) * softx[i][k] * grad_y[i][k] + else: + out[i][j] += -softx[i][j] * softx[i][k] * grad_y[i][k] + grad_x = np.array(out) + + + + + #################### + + return grad_x + + +class NumpyLoss: + + def __init__(self): + self.target = None + + def get_loss(self, pred, target): + self.target = target + return (-pred * target).sum(axis=1).mean() + + def backward(self): + return -self.target / self.target.shape[0] + + +class NumpyModel: + def __init__(self): + self.W1 = np.random.normal(size=(28 * 28, 256)) + self.W2 = np.random.normal(size=(256, 64)) + self.W3 = np.random.normal(size=(64, 10)) + + # 以下算子会在 forward 和 backward 中使用 + self.matmul_1 = Matmul() + self.relu_1 = Relu() + self.matmul_2 = Matmul() + self.relu_2 = Relu() + self.matmul_3 = Matmul() + self.softmax = Softmax() + self.log = Log() + + # 以下变量需要在 backward 中更新。 softmax_grad, log_grad 等为算子反向传播的梯度( loss 关于算子输入的偏导) + self.x1_grad, self.W1_grad = None, None + self.relu_1_grad = None + self.x2_grad, self.W2_grad = None, None + self.relu_2_grad = None + self.x3_grad, self.W3_grad = None, None + self.softmax_grad = None + self.log_grad = None + + def forward(self, x): + x = x.reshape(-1, 28 * 28) + + #################### + # code 6 # + #x = torch.relu(torch.matmul(x, self.W1)) + x = self.relu_1.forward(self.matmul_1.forward(x,self.W1)) + + #x = torch.relu(torch.matmul(x, self.W2)) + x = self.relu_2.forward(self.matmul_2.forward(x,self.W2)) + + #x = torch.matmul(x, self.W3) + #x = torch.softmax(x, 1) + x = self.softmax.forward(self.matmul_3.forward(x,self.W3)) + + #x = torch.log(x) + x = self.log.forward(x) + + #################### + + return x + + def backward(self, y): + + #################### + # code 7 # + + self.log_grad = self.log.backward(y) + + self.softmax_grad = self.softmax.backward(self.log_grad) + + self.x3_grad, self.W3_grad = self.matmul_3.backward(self.softmax_grad) + + self.relu_2_grad = self.relu_2.backward(self.x3_grad) + + self.x2_grad, self.W2_grad = self.matmul_2.backward(self.relu_2_grad) + + self.relu_1_grad = self.relu_1.backward(self.x2_grad) + + self.x1_grad, self.W1_grad = self.matmul_1.backward(self.relu_1_grad) + + + + #################### + + pass + + def optimize(self, learning_rate): + self.W1 -= learning_rate * self.W1_grad + self.W2 -= learning_rate * self.W2_grad + self.W3 -= learning_rate * self.W3_grad \ No newline at end of file diff --git a/assignment-2/submission/18340246016/numpy_mnist.py b/assignment-2/submission/18340246016/numpy_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..833e5aa2a37f0a5b38b147a0bb05e81aa8d787e1 --- /dev/null +++ b/assignment-2/submission/18340246016/numpy_mnist.py @@ -0,0 +1,58 @@ +import numpy as np +from numpy_fnn import NumpyModel, NumpyLoss +from utils import download_mnist, batch, get_torch_initialization, plot_curve, one_hot + +def mini_batch(dataset, batch_size=128, numpy=False): + + data = [] + label = [] + for each in dataset: + data.append(np.array(each[0])) + label.append(each[1]) + + data = np.array(data) + label = np.array(label) + + res = [] + + + for start_idx in range(0, data.shape[0], batch_size): + end_idx = min(start_idx + batch_size, len(data)) + res.append((data[start_idx:end_idx],label[start_idx:end_idx])) + + return res + + +def numpy_run(): + train_dataset, test_dataset = download_mnist() + + model = NumpyModel() + numpy_loss = NumpyLoss() + model.W1, model.W2, model.W3 = get_torch_initialization() + + train_loss = [] + + epoch_number = 3 + learning_rate = 0.1 + + for epoch in range(epoch_number): + for x, y in mini_batch(train_dataset): + y = one_hot(y) + + y_pred = model.forward(x) + loss = numpy_loss.get_loss(y_pred, y) + + model.backward(numpy_loss.backward()) + model.optimize(learning_rate) + + train_loss.append(loss.item()) + + x, y = batch(test_dataset)[0] + accuracy = np.mean((model.forward(x).argmax(axis=1) == y)) + print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy)) + + plot_curve(train_loss) + + +if __name__ == "__main__": + numpy_run() \ No newline at end of file