diff --git a/assignment-1/submission/17307100038/README.md b/assignment-1/submission/17307100038/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e209eba20b4f93de8019d642a8fdc6979914d38
--- /dev/null
+++ b/assignment-1/submission/17307100038/README.md
@@ -0,0 +1,362 @@
+# 课程报告
+
+## KNN类实现
+
+### fit()函数
+
+fit(X, y,cate = 'euclidean',metric='accuracy',preprocess =None)
+
+X: 训练集
+
+y:训练集标签
+
+cate：距离计算方式，如euclidean、manhattan距离
+
+metric:模型评估方式，如accuracy
+
+preprocess:预处理方式，包含min_max归一化、z_score标准化、不处理
+
+
+
+fit函数包含以下功能:
+
+​	1、预处理；
+
+​	2、随机打乱数据集顺序 
+
+​	3、以8:2的比例划分train_data，dev_data,训练选出评估结果最优的k值
+
+### predict()函数
+
+predict用于预测测试集样本
+
+### 辅助函数
+
+distance( d1, d2，cate ='eulidean')
+
+d1,d2表示计算距离的点，cate默认为euclidean距离，可以选择manhattan距离
+
+
+
+## 实验1
+
+### Group1：各个类别相差较大，成较为明显的线性位置
+
+$$
+\Sigma = 
+ \left[
+ \begin{matrix}
+   52 & 0 \\
+   0 & 22 
+  \end{matrix}
+  \right]
+ \Sigma = 
+ \left[
+ \begin{matrix}
+   21.1 & 0 \\
+   0 & 32.1 
+  \end{matrix}
+  \right]
+  \Sigma = 
+ \left[
+ \begin{matrix}
+   10 & 0 \\
+   0 & 10 
+  \end{matrix}
+  \right]
+$$
+
+$$
+\mu =
+  \left[
+ \begin{matrix}
+   2 &5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   20 & -5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   -5 & 22
+  \end{matrix}
+  \right]
+$$
+
+train_data
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/train_g1.png" alt="test_g1" style="zoom:67%;" />
+
+测试集
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/test_g1.png" style="zoom:67%;" />
+
+测试在两种距离下的准确率如下：
+
+| k    | distance  | acc     |
+| ---- | --------- | ------- |
+| 8    | euclidean | 96.250% |
+| 9    | euclidean | 95.625% |
+| 3    | euclidean | 95.833% |
+| 13   | euclidean | 96.458% |
+| 3    | manhattan | 95.417% |
+| 13   | manhattan | 96.250% |
+| 5    | manhattan | 95.625% |
+| 5    | manhattan | 95.625% |
+
+### Group2：各个类别之间相差较大，成较为明显的分散位置
+
+$$
+\Sigma = 
+ \left[
+ \begin{matrix}
+   52 & 0 \\
+   0 & 22 
+  \end{matrix}
+  \right]
+ \Sigma = 
+ \left[
+ \begin{matrix}
+   21.1 & 0 \\
+   0 & 32.1 
+  \end{matrix}
+  \right]
+  \Sigma = 
+ \left[
+ \begin{matrix}
+   10 & 0 \\
+   0 & 10 
+  \end{matrix}
+  \right]
+$$
+
+$$
+\mu =
+  \left[
+ \begin{matrix}
+   2 &5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   20 & 16
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   -5 & 22
+  \end{matrix}
+  \right]
+$$
+
+train_data:
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/train_g2.png" alt="train_g2" style="zoom:67%;" />
+
+test_data:
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/test_g2.png" style="zoom:67%;" />
+
+测试在两种距离下的准确率如下：
+
+| k    | distance  | acc     |
+| ---- | --------- | ------- |
+| 7    | euclidean | 96.875% |
+| 7    | euclidean | 96.875% |
+| 9    | euclidean | 97.083% |
+| 8    | euclidean | 97.083% |
+| 12   | manhattan | 97.708% |
+| 14   | manhattan | 97.500% |
+| 5    | manhattan | 97.083% |
+| 12   | manhattan | 97.708% |
+
+*可见不同群之间的几何分布类型对knn的效果影响不明显*
+
+## 实验2
+
+控制均值不变，倍数扩大协方差的各个数值至2倍
+$$
+\Sigma = 
+ \left[
+ \begin{matrix}
+   52 & 0 \\
+   0 & 22 
+  \end{matrix}
+  \right]
+ \Sigma = 
+ \left[
+ \begin{matrix}
+   21.1 & 0 \\
+   0 & 32.1 
+  \end{matrix}
+  \right]
+  \Sigma = 
+ \left[
+ \begin{matrix}
+   10 & 0 \\
+   0 & 10 
+  \end{matrix}
+  \right]
+$$
+
+$$
+\left[
+ \begin{matrix}
+   2 &5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   20 & 16
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   -5 & 22
+  \end{matrix}
+  \right]
+$$
+
+得到准确率改变如下图：
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/change_cov.png" alt="change_cov" style="zoom:67%;" />
+
+*方差对于KNN的准确率影响显著，随着方差增大，模型准确率下降*
+
+## 实验3
+
+对比采用归一化、标准化前后
+$$
+\Sigma = 
+ \left[
+ \begin{matrix}
+   20 & 0 \\
+   0 & 1250 
+  \end{matrix}
+  \right]
+ \Sigma = 
+ \left[
+ \begin{matrix}
+   25 & 0 \\
+   0 & 2500
+  \end{matrix}
+  \right]
+  \Sigma = 
+ \left[
+ \begin{matrix}
+   10 & 0 \\
+   0 & 950 
+  \end{matrix}
+  \right]
+$$
+
+$$
+\mu=
+\left[
+ \begin{matrix}
+   2 &5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   10 & -60
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   -5 & 72
+  \end{matrix}
+  \right]
+$$
+
+无预处理：
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/data_original.png" alt="data_original" style="zoom:67%;" />
+
+min_max 归一化：
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/data_minmax.png" alt="data_minmax" style="zoom:67%;" />
+
+Z_score标准化：
+
+<img src="https://gitee.com/xyhdsg10022/prml-21-spring/raw/master/assignment-1/submission/17307100038/img/data_zscore.png" alt="data_zscore" style="zoom:67%;" />
+
+得到对应的准确率如下：
+
+| preprocessing | accuracy |
+| ------------- | -------- |
+| None          | 82.917%  |
+| min_max       | 83.542%  |
+| z_score       | 84.17%   |
+
+通过变小均值和方差的差距，重新实验得到如下结果：
+$$
+\Sigma = 
+ \left[
+ \begin{matrix}
+   20 & 0 \\
+   0 & 750 
+  \end{matrix}
+  \right]
+ \Sigma = 
+ \left[
+ \begin{matrix}
+   25 & 0 \\
+   0 & 1200
+  \end{matrix}
+  \right]
+  \Sigma = 
+ \left[
+ \begin{matrix}
+   10 & 0 \\
+   0 & 650 
+  \end{matrix}
+  \right]
+$$
+
+$$
+\mu=
+\left[
+ \begin{matrix}
+   2 &5
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   10 & -50
+  \end{matrix}
+  \right]
+   \mu =
+  \left[
+ \begin{matrix}
+   -5 & 55
+  \end{matrix}
+  \right]
+$$
+
+| preprocessing | accuracy |
+| ------------- | -------- |
+| None          | 90.417%  |
+| min_max       | 90.625#  |
+| z_score       | 90.833%  |
+
+*标准化、归一化对于KNN模型的准确率有一定提升，数据集各个feature的数量级差别越大，效果越明显*
+
+## 总结
+
+1、KNN模型中不同类别点的几何分布类型对模型预测准确率影响不明显
+
+2、方差对于KNN的准确率影响显著，随着方差增大，模型准确率下降
+
+3、标准化、归一化对于KNN模型的准确率有一定提升，数据集各个feature的数量级差别越大，效果越明显；在数量级相差不大的情况下，性能提升不明显
\ No newline at end of file
diff --git a/assignment-1/submission/17307100038/img/change_cov.png b/assignment-1/submission/17307100038/img/change_cov.png
new file mode 100644
index 0000000000000000000000000000000000000000..90c6e3d31b490ac4e6f2e9a05f21f24bc71627ea
Binary files /dev/null and b/assignment-1/submission/17307100038/img/change_cov.png differ
diff --git a/assignment-1/submission/17307100038/img/data_minmax.png b/assignment-1/submission/17307100038/img/data_minmax.png
new file mode 100644
index 0000000000000000000000000000000000000000..2bf4c70c5448506cd1bb4c074e8a1a9e569c7716
Binary files /dev/null and b/assignment-1/submission/17307100038/img/data_minmax.png differ
diff --git a/assignment-1/submission/17307100038/img/data_original.png b/assignment-1/submission/17307100038/img/data_original.png
new file mode 100644
index 0000000000000000000000000000000000000000..76b9b4aa00c3807e7eb0c973d717e15b8f6ebdc4
Binary files /dev/null and b/assignment-1/submission/17307100038/img/data_original.png differ
diff --git a/assignment-1/submission/17307100038/img/data_zscore.png b/assignment-1/submission/17307100038/img/data_zscore.png
new file mode 100644
index 0000000000000000000000000000000000000000..c79fe49fa23ed2cf8aec87519e4770fd9b3930aa
Binary files /dev/null and b/assignment-1/submission/17307100038/img/data_zscore.png differ
diff --git a/assignment-1/submission/17307100038/img/test_g1.png b/assignment-1/submission/17307100038/img/test_g1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ba84cf0de903969371c4bb50b7dd8da40b2f1e4
Binary files /dev/null and b/assignment-1/submission/17307100038/img/test_g1.png differ
diff --git a/assignment-1/submission/17307100038/img/test_g2.png b/assignment-1/submission/17307100038/img/test_g2.png
new file mode 100644
index 0000000000000000000000000000000000000000..2155370c1ac0fa5544e7e9e4c9baee3b53fb834e
Binary files /dev/null and b/assignment-1/submission/17307100038/img/test_g2.png differ
diff --git a/assignment-1/submission/17307100038/img/train_g1.png b/assignment-1/submission/17307100038/img/train_g1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b1c264c47eadb1f85822cf8ab1364ced2405f8d
Binary files /dev/null and b/assignment-1/submission/17307100038/img/train_g1.png differ
diff --git a/assignment-1/submission/17307100038/img/train_g2.png b/assignment-1/submission/17307100038/img/train_g2.png
new file mode 100644
index 0000000000000000000000000000000000000000..5530bce8dde2a7a3787fa58ee3e9a37b45726b02
Binary files /dev/null and b/assignment-1/submission/17307100038/img/train_g2.png differ
diff --git a/assignment-1/submission/17307100038/source.py b/assignment-1/submission/17307100038/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..be07e0492b7b13cde2148ce694ddd252ad0426dc
--- /dev/null
+++ b/assignment-1/submission/17307100038/source.py
@@ -0,0 +1,227 @@
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+class KNN:
+    def __init__(self):
+        self.X = None
+        self.y = None
+        self.k = None
+        self.cate = None  # 距离计算公式
+        self.metric = None  # 评分方式，如accuracy
+        self.preprocess = None
+        self.min = None
+        self.max =None
+        self.mean = None
+        self.std = None
+
+    def distance(self, d1, d2):
+        '''计算距离，如欧式距离、曼哈顿距离等'''
+        if self.cate == 'euclidean':
+            dist = np.sum(np.square(d1 - d2))
+        elif self.cate == 'manhattan':
+            dist = np.sum(np.abs(d1-d2))
+        return dist
+
+    def score(self, y_pred, test_label):
+        '''分数评估如accuracy、macro_f1、micro_f1等'''
+        if self.metric == 'accuracy':
+            cnt = 0
+            for i in range(len(y_pred)):
+                if y_pred[i] == test_label[i]:
+                    cnt += 1
+            score = cnt / len(y_pred)
+        return score
+
+    def fit(self, X, y,cate = 'euclidean',metric='accuracy',preprocess =None):
+        '''包含K值的选择、建立模型'''
+        self.cate = cate
+        self.metric = metric
+        self.preprocess = preprocess
+
+        # 1、preprocessing
+        if preprocess == 'Min_Max':   #标准化
+            self.min =  X.min(axis = 0)
+            self.max = X.max(axis = 0)
+            X = (X -self.min)/(self.max - self.min)
+        elif preprocess == 'Z_score':    # 归一化
+            self.mean = X.mean(axis=0)
+            self.std = X.std(axis=0)
+            X = (X - self.mean) / self.std
+        else:
+            X = X
+
+        # 2、打乱顺序
+        random_index = np.random.permutation(len(X))
+        X = X[random_index]
+        y= y[random_index]
+
+        # 3、分为train_data，dev_data
+        N = X.shape[0]
+        cut = int(N * 0.8)  # 防止非整数情况
+        train_data, dev_data = X[:cut, ], X[cut:, ]
+        train_label, dev_label = y[:cut, ], y[cut:, ]
+
+        # 4、训练K值
+        max_score = 0
+        max_score_K = 0
+        for k in range(2, 15):
+            '''计算每个k下的accuracy：
+                1、对每个dev_data，计算其与train_data的距离
+                2、排序得到距离最近的k个index
+                3、获取该dev_data的y_pred
+                4、计算accuracy
+                '''
+            y_pred = []
+            for i in range(len(dev_data)):
+                dist_arr = [self.distance(dev_data[i], train_data[j]) for j in range(len(train_data))] # 每个测试点距离训练集各个点的距离列表
+                sorted_index = np.argsort(dist_arr)   # arg 排序各个距离的大小，得到index
+                first_k_index = sorted_index[:k]      # 最小的k个index
+                first_k_label = train_label[first_k_index]
+                y_pred.append(np.argmax(np.bincount(first_k_label)))  # 取众数为预测值
+            y_pred = np.array(y_pred)
+            score = self.score(y_pred, dev_label)
+
+            if score > max_score:
+                max_score, max_score_K = score, k
+
+        # 5、确立参数
+        self.X = X
+        self.y = y
+        self.k = max_score_K
+        # print('k:%d' % self.k)
+
+    def predict(self, test_data):
+        # preprocessing
+        if self.preprocess == 'Min_Max':   #标准化
+            test_data = (test_data -self.min)/(self.max - self.min)
+        elif self.preprocess == 'Z_score':    # 归一化
+            test_data = (test_data - self.mean) / self.std
+        else:
+            test_data = test_data
+
+        y_pred = []
+        for i in range(len(test_data)):
+            dist_arr = [self.distance(test_data[i], self.X[j]) for j in range(len(self.X))]
+            first_k_index = np.argsort(dist_arr)[:self.k]
+            first_k_label = self.y[first_k_index]
+            y_pred.append(np.argmax(np.bincount(first_k_label)))
+        return np.array(y_pred)
+
+
+def generate():
+    mean = (2, 5)
+    cov = np.array([[20, 0], [0, 750]])
+    x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (10, -60)
+    cov = np.array([[25, 0], [0, 2500]])
+    y = np.random.multivariate_normal(mean, cov, (600,))
+
+    mean = (-5, 72)
+    cov = np.array([[10, 0], [0, 650]])
+    z = np.random.multivariate_normal(mean, cov, (1000,))
+
+    idx = np.arange(2400)
+    np.random.shuffle(idx)
+    data = np.concatenate([x, y, z])
+    label = np.concatenate([
+        np.zeros((800,), dtype=int),
+        np.ones((600,), dtype=int),
+        np.ones((1000,), dtype=int) * 2
+    ])
+    data = data[idx]
+    label = label[idx]
+
+    train_data, test_data = data[:1920, ], data[1920:, ]
+    train_label, test_label = label[:1920, ], label[1920:, ]
+    np.save("data.npy", (
+        (train_data, train_label), (test_data, test_label)
+    ))
+
+
+def read():
+    (train_data, train_label), (test_data, test_label) = np.load("data.npy", allow_pickle=True)
+    return (train_data, train_label), (test_data, test_label)
+
+
+def display(data, label, name):
+    datas = [[], [], []]
+    for i in range(len(data)):
+        datas[label[i]].append(data[i])
+
+    for each in datas:
+        each = np.array(each)
+        plt.scatter(each[:, 0], each[:, 1])
+    plt.savefig(f'img/{name}')
+    plt.show()
+
+
+'''测试改变方差对结果的影响'''
+def generate_ball(r=1):
+    mean = (2, 5)
+    cov = np.array([[40, 0], [0, 30]])
+    x = np.random.multivariate_normal(mean, cov*r, (800,))
+
+    mean = (20, 16)
+    cov = np.array([[25, 0], [0, 35.1]])
+    y = np.random.multivariate_normal(mean, cov*r, (600,))
+
+    mean = (-5, 22)
+    cov = np.array([[30, 0], [0, 25]])
+    z = np.random.multivariate_normal(mean, cov*r, (1000,))
+
+    idx = np.arange(2400)
+    np.random.shuffle(idx)
+    data = np.concatenate([x, y, z])
+    label = np.concatenate([
+        np.zeros((800,), dtype=int),
+        np.ones((600,), dtype=int),
+        np.ones((1000,), dtype=int) * 2
+    ])
+    data = data[idx]
+    label = label[idx]
+
+    train_data, test_data = data[:1920, ], data[1920:, ]
+    train_label, test_label = label[:1920, ], label[1920:, ]
+    return train_data, train_label, test_data, test_label
+
+def change_cov():
+    acc_1 = []
+    acc_2 = []
+    for each in np.arange(1, 2.1, 0.1):
+        train_data, train_label, test_data, test_label = generate_ball(r=each)
+        # euclidean
+        model = KNN()
+        model.fit(train_data, train_label, cate='euclidean', metric='accuracy')
+        res = model.predict(test_data)
+        acc1 = np.mean(np.equal(res, test_label))
+        acc_1.append(acc1)
+        # manhattan
+        model = KNN()
+        model.fit(train_data, train_label, cate='manhattan', metric='accuracy')
+        res = model.predict(test_data)
+        acc2 = np.mean(np.equal(res, test_label))
+        acc_2.append(acc2)
+    plt.plot(np.arange(1,2.1,0.1), acc_1,color = 'r')
+    plt.plot(np.arange(1,2.1,0.1), acc_2,color = 'b')
+    plt.title('accuracy at different cov')
+    plt.legend(['euclidean','manhattan'])
+    plt.savefig('change_cov.png')
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1] == "g":
+        generate()
+    if len(sys.argv) > 1 and sys.argv[1] == "d":
+        (train_data, train_label), (test_data, test_label) = read()
+        display(train_data, train_label, 'train')
+        display(test_data, test_label, 'test')
+    else:
+        (train_data, train_label), (test_data, test_label) = read()
+
+        model = KNN()
+        # 选择距离计算公式、评估公式
+        model.fit(train_data, train_label, cate='manhattan',metric='accuracy')
+        res = model.predict(test_data)
+        print("acc =", np.mean(np.equal(res, test_label)))
\ No newline at end of file