diff --git a/assignment-1/submission/18307130341/README.md b/assignment-1/submission/18307130341/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e2d7dbd1aa21f08d8f510d067bd8855b49136783
--- /dev/null
+++ b/assignment-1/submission/18307130341/README.md
@@ -0,0 +1,102 @@
+# 实验报告:ASS1-KNN分类器
+
+18307130341 黄韵澄
+
+[toc]
+
+### 1.1 实验概述
+
+ 本实验使用k近邻方法(KNN),实现了对二维带标签点集的分类。
+
+ 在**1.2**中生成了3组符合高斯分布的二维点集,并打上0~2共3种标签。初始数据集被划分为训练数据(train_data,80%)和测试数据(test_data,20%),并将训练数据放进KNN模型中进行训练。
+
+ **1.3**中是对KNN模型的实现部分,包括初始化、模型训练、预测分类三个部分。
+
+ **1.4**中对KNN模型进行测试和实验探究。
+
+ **1.5**中完成实验的自动测试部分。
+
+### 1.2 数据集生成
+
+ 数据集的生成在函数`data_generate`中实现。每种标签各生成了`num=200`个点集,通过设置其均值`mean`和协方差`cov`来生成符合高斯分布的二维数据集。生成数据集的参数如下:
+$$
+\mu_0 = [6, 4],\ \Sigma_0 = \begin{bmatrix}35&4\\4&11\\\end{bmatrix}\\
+\mu_1 = [11, 14],\ \Sigma_1 = \begin{bmatrix}21&6\\6&24 \\\end{bmatrix}\\
+\mu_2 = [22, 6],\ \Sigma_2 = \begin{bmatrix}25&5\\5&10\\\end{bmatrix}\\
+$$
+ 生成的数据集组成`data`,使用`shuffle`函数将其打乱,取前80%作为`train_data`,后20%作为`test_data`。
+
+ 使用`matplotlib`绘图,测试集和训练集散点图如下:
+
+
+
+
Fig 1:训练集
+
+
+
+Fig 2: 测试集
+
+### 1.3 KNN模型的实现
+
+#### 1.3.1 KNN初始化
+
+ `__init__`成员函数中初始化KNN模型。定义`data`、`label`作为k近邻的候选点集,`num`为点集规模,`k`为模型训练后选取的最优k值。
+
+#### 1.3.2 模型训练——fit函数
+
+ 将`train_data`再次划分为`train_set_data`和`dev_set_data`。`train_set_data`作为候选点集,`dev_set_data`作为开发集,实现对超参`k`的选取。
+
+ 超参`k`的选取范围设置为`1~15`。
+
+ 对每个候选k,对开发集进行类别预测。具体方式是在训练集中找到最近的k个点中出现中最多的类别作为预测类别。对每个k计算出其类别预测平均准确率acc,绘制成图如下:
+
+
+
+ Fig 3:acc-k折线图
+
+ 在1%的波动范围内,选取准确度最高的最小k作为最终选取的超参`k`,此时模型训练完成。在上图的样例中,最终选取的超参`k`为6。
+
+#### 1.3.3 类别预测——predict函数
+
+ 对于每个需要预测的数据,找到训练集中距离最近的`k`个点,距离选取为欧几里得距离。
+
+ 最终每个点的预测类别为:`k`个最近邻的点中出现最多的类别。`k`为1.3.2中训练好的超参`k`。
+
+### 1.4 模型测试和实验探究
+
+#### 1.4.1 模型测试
+
+ 用`test_data`进行模型测试,输出模型的准确率。实验重复10次,结果如下:
+$$
+\begin{array}{c|l} {实验次数}&{1}&{2}&{3}&{4}&{5}&{6}&{7}&{8}&{9}&{10}\\ \hline {k}&{5}&{8}&{9}&{3}&{11}&{6}&{8}&{6}&{9}&{7}\\ {acc}&{0.83}&{0.88}&{0.8}&{0.83}&{0.87}&{0.85}&{0.88}&{0.88}&{0.86}&{0.88}\\ \end{array}
+$$
+
+Table 1: 模型准确率
+
+ 平均超参`k`为`7~8`之间,平均准确率`acc`为`0.856`。
+
+#### 1.4.2 实验探究
+
+ (1) 修改高斯分布距离,使得三种点集更加分散(重合部分更少):
+
+
+
+ Fig 4: 修改高斯分布距离后的点集
+
+ 该数据集测试出的模型准确率为92.92%。多次重复实验,增大点集的距离,得出结论:高斯分布距离越大,点集越分散,模型准确率越高。
+
+ (2) 修改高斯分布距离,使得三种点集更加分散(重合部分更少):
+
+
+
+Fig 5: 修改高斯分布方差后的点集
+
+ 该数据集测试出的模型准确率为97.5%。多次重复实验,减小点坐标方差(对角线的值),得出结论:高斯分布方差越小,点集内部更加集中,模型准确率更高。
+
+### 1.5 自动化测试
+
+ 程序只导入了`numpy`和`matplotlib`包。
+
+ 配置conda环境进行测试,可以通过测试。
+
+
\ No newline at end of file
diff --git a/assignment-1/submission/18307130341/img/Fig4.png b/assignment-1/submission/18307130341/img/Fig4.png
new file mode 100644
index 0000000000000000000000000000000000000000..73f0b16e63c730206ad6ef5e0e7ae0357edd87e3
Binary files /dev/null and b/assignment-1/submission/18307130341/img/Fig4.png differ
diff --git a/assignment-1/submission/18307130341/img/Fig5.png b/assignment-1/submission/18307130341/img/Fig5.png
new file mode 100644
index 0000000000000000000000000000000000000000..9e268c13558259d449fb5bb068592089c7b8424c
Binary files /dev/null and b/assignment-1/submission/18307130341/img/Fig5.png differ
diff --git a/assignment-1/submission/18307130341/img/k_acc.png b/assignment-1/submission/18307130341/img/k_acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..9be3895d0efe8698d2ad4d4059ea016b7023b36f
Binary files /dev/null and b/assignment-1/submission/18307130341/img/k_acc.png differ
diff --git a/assignment-1/submission/18307130341/img/test_data.png b/assignment-1/submission/18307130341/img/test_data.png
new file mode 100644
index 0000000000000000000000000000000000000000..4542a24e3ccdaa56269acd5055648ada456894e8
Binary files /dev/null and b/assignment-1/submission/18307130341/img/test_data.png differ
diff --git a/assignment-1/submission/18307130341/img/train_data.png b/assignment-1/submission/18307130341/img/train_data.png
new file mode 100644
index 0000000000000000000000000000000000000000..bff7980984d6f66e8d5e9855a95af7f742189149
Binary files /dev/null and b/assignment-1/submission/18307130341/img/train_data.png differ
diff --git a/assignment-1/submission/18307130341/source.py b/assignment-1/submission/18307130341/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b0187b0fb635e667cd73d24b168d09004fea303
--- /dev/null
+++ b/assignment-1/submission/18307130341/source.py
@@ -0,0 +1,112 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+class KNN:
+
+ def __init__(self):
+ self.data = []
+ self.label = []
+ self.num = 0
+ self.k = 1
+
+ def fit(self, train_data, train_label):
+ self.num, _ = train_data.shape
+
+ ratio = 0.8
+ idx = np.random.permutation(self.num)
+ train_data = train_data[idx]
+ train_label = train_label[idx]
+
+ train_set_num = (int)(ratio * self.num)
+ train_set_data = train_data[:train_set_num]
+ train_set_label = train_label[:train_set_num]
+ dev_set_data = train_data[train_set_num:]
+ dev_set_label = train_label[train_set_num:]
+
+ self.data = train_set_data
+ self.label = train_set_label
+ self.num = train_set_num
+ max_acc = -1
+ max_k = 15
+
+ acc_k = []
+
+ for k in range(1,max_k):
+ self.k = k
+ predict_label = self.predict(dev_set_data)
+ acc = np.mean(np.equal(predict_label, dev_set_label))
+ acc_k.append(acc)
+ if acc >= max_acc + 0.01:
+ max_acc = acc
+ select_k = k
+
+ # Graph_Plot(acc_k, "k_acc")
+
+ self.k = select_k
+ self.num , _ = train_data.shape
+ self.data = train_data
+ self.label = train_label
+
+ def predict(self, test_data):
+ predict_label = []
+ for x in test_data:
+ dis = np.array([np.sqrt(sum((x-y)**2)) for y in self.data])
+ knn = np.argsort(np.array(dis))[:self.k]
+ result = np.argmax(np.bincount(self.label[knn]))
+ predict_label.append(result)
+ return predict_label
+
+def data_generate(num):
+ mean = [(6,4), (11, 14), (22, 6)]
+ cov = [[35, 4], [4, 11]],[[21, 6], [6, 24]],[[25, 5], [5, 10]]
+ data0 = np.random.multivariate_normal(mean[0], cov[0], num)
+ data1 = np.random.multivariate_normal(mean[1], cov[1], num)
+ data2 = np.random.multivariate_normal(mean[2], cov[2], num)
+ data = np.concatenate([data0,data1,data2])
+ label = np.array([0]*num + [1]*num + [2]*num)
+
+ idx = np.random.permutation(3*num)
+
+ data = data[idx]
+ label = label[idx]
+
+ return data, label
+
+def Graph_Plot(acc, name):
+ plt.plot(acc)
+ plt.xlabel('k')
+ plt.ylabel('acc')
+ plt.savefig(f'img/{name}')
+ plt.close()
+
+def Graph_Scatter(data, label, name):
+ points =[[],[],[]]
+ for i in range(len(data)):
+ points[label[i]].append(data[i])
+ for points_set in points:
+ points_set = np.array(points_set)
+ plt.scatter(points_set[:, 0], points_set[:, 1])
+ # plt.show()
+ plt.savefig(f'img/{name}')
+ plt.close()
+
+
+if __name__ == "__main__":
+ num = 400
+ data, label = data_generate(num)
+ train_num = (int)(num* 3 * 0.8)
+ train_data = data[:train_num]
+ train_label = label[:train_num]
+ test_data = data[train_num:]
+ test_label = label[train_num:]
+
+ # Graph_Scatter(train_data, train_label, "train_data")
+ # Graph_Scatter(test_data, test_label,"test_data")
+
+ model = KNN()
+ model.fit(train_data, train_label)
+ test_predict = model.predict(test_data)
+ acc = np.mean(np.equal(test_predict, test_label))
+ print("k = ", model.k)
+ print("acc = ", acc)
+