diff --git a/assignment-3/submission/16307130040/README.md b/assignment-3/submission/16307130040/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfaae6f5b35787f02947646ab7d719e8290fc268
--- /dev/null
+++ b/assignment-3/submission/16307130040/README.md
@@ -0,0 +1,271 @@
+# 实验报告3
+
+
+
+## 1，KMeans 和 GaussianMixture 聚类算法的实现
+
+
+
+#### 1，KMeans的实现
+
+
+
+首先，是KMeans的实现。在fit之前要初始化，我的做法是随机选择数据中的n_clusters个点作为初始的聚簇中心。
+
+训练的实现较为简单，在limit变量规定的次数中，反复进行以下两步：
+
+
+
+1，为数据的每一个点寻找最近的聚簇中心，并将它归于聚簇中。最终，输出一个表示每个点所属的聚簇的数组idx。这一步由方法FindClosestCenters(self,train_data)实现。
+
+
+
+2，将每个聚簇中所有点的坐标进行平均，最终得出新的聚簇中心。这一步由方法ComputeNewCenters(self,idx,train_data)实现。
+
+其中，聚簇中心存储在类的私有变量centers中。
+
+
+
+预测的时候，使用的逻辑与FindClosestCenters相似。不同的是，输入的数据又训练数据变成了测试数据。
+
+
+
+#### 2，GaussianMixture的实现
+
+
+
+之后，是 GaussianMixture 的实现。初始的方法与Kmeans相同。
+
+
+
+在训练中，GaussianMixture类使用了以下数据结构：
+
+*self.mean*：每个聚簇的平均值，格式为k*n。
+*self.var*:每个聚簇的方差，格式为k*n。在计算概率的时候，会将方差展开为对角矩阵。
+*self.W*：每个数据属于不同的聚簇的可能性,格式为m*k。W[i][j]为第i个数据属于第j个聚簇的概率。
+*self.Pi*：对随机的一个数据，它属于各个聚簇的可能性够成的数组。可以由W计算得出，但是为了计算方便，使用了独立的Pi变量。
+
+
+
+在训练中，GaussianMixture类会在limit变量规定的次数中，反复进行以下两步：
+
+
+
+##### 1，E步
+
+在E步中，我们会更新聚簇的分布。使用贝叶斯公式来重新计算每个数据属于各个聚簇的概率：
+
+![](.\img\1.png)
+
+之后，再更新pi变量：
+
+![](.\img\2.png)
+
+代码如下：
+
+```python
+    def step_E(self,train_data):
+        #update W matrix
+        m, n = train_data.shape
+        new_W=np.zeros((m,self.k))
+
+        for i in range(self.k):
+            for j in range(m):
+                new_W[j,i]=self.Pi[i]*self.gauss_prob(train_data[j],self.mean[i],np.diag(self.var[i]))
+
+        for j in range(m):
+            sum=new_W[j].sum()
+            if sum != 0:
+                self.W[j]=new_W[j]/new_W[j].sum()
+        self.Pi = self.W.sum(axis=0) / self.W.sum()
+```
+
+这里用到了高斯分布的概率公式。这里使用了numpy进行实现，并在计算前提前考虑方差为0的情况：
+
+```python
+    def gauss_prob(self,x,mean,var):
+        #calculate the probability of gauss distribution by np
+        if not np.any(var):
+            n=var.shape[0]
+            var=np.diag(np.ones(n)/100)
+
+        y = np.exp((-1 / 2) * (x - mean).T.dot(np.linalg.inv(var)).dot(x - mean)) / np.sqrt(
+            np.power(2 * np.pi, len(x)) * np.linalg.det(var)
+        )
+        return y
+```
+
+
+
+##### 2，M步
+
+在M步中，更新每个聚簇的均值和方差。具体说来，实现方法是基于W矩阵，计算数据坐标的均值和方差的均值:
+
+![](.\img\3.png)
+
+![](.\img\4.png)
+
+以下是代码：
+
+```python
+    def step_M(self,train_data):
+        m, n = train_data.shape
+
+        #update the mean and var of each cluster
+        new_mean=np.zeros((self.k,n))
+        new_var=np.zeros((self.k,n))
+        for i in range(self.k):
+            new_mean[i]=np.average(train_data,axis=0,weights=np.array(self.W[:,i]))
+            new_var[i]=np.average((train_data-new_mean[i])**2, axis=0, weights=np.array(self.W[:, i]))
+        self.mean=new_mean
+        self.var=new_var
+```
+
+
+
+预测的时候，将测试数据代入每一个聚簇的高斯概率公式中进行计算，得出该数据点属于各类的概率。最终，选取概率最大的聚簇。
+
+
+
+## 2，基础实验
+
+基础实验中，我生成了如下2700个数据，属于三个二维高斯分布模型：
+
+```python
+    mean = (1, 2)
+    cov = np.array([[80, 0], [0, 22]])
+    data_x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (16, -5)
+    cov = np.array([[40, 0], [0, 32.1]])
+    data_y = np.random.multivariate_normal(mean, cov, (900,))
+
+    mean = (-6, 10)
+    cov = np.array([[20, 10], [10, 20]])
+    data_z = np.random.multivariate_normal(mean, cov, (1000,))
+```
+
+之后，混合，贴上标签，并洗匀之后，将其中80%作为训练数据，20%作为测试数据。
+
+这是训练数据：
+
+![](.\img\test1_1.png)
+
+这是测试数据：
+
+![](.\img\test1_2.png)
+
+然后，通过KMeans模型进行聚类操作：
+
+![](.\img\test1_3.png)
+
+之后，用GaussianMixture模型进行聚类：
+
+![](.\img\test1_4.png)
+
+通过上方的准确率可以看出，高斯混合模型比KMeans的表现要好。
+
+因为聚类模型采用的标签和数据原来的标签很有可能不一致，所以如果简单地进行一一比对的话，是不会得出准确的准确率的。所以，使用这样的方法来计算准确率：将聚类模型采用的标签不断地进行轮换，再一一比较，计算出每一个轮换中的准确率，并将最高的准确率作为这个模型的准确率。
+
+
+
+```python
+def calculate_accu(label_predict,label_test):
+    #when calculate the accu ,try all probables of label combination
+    rotates=np.array([[0,1,2],[0,2,1],[1,0,2],[1,2,0],[2,0,1],[2,1,0]])
+    label_predict_rotated=[]
+    for rotate in rotates:
+        label_predict_temp=np.array(label_predict)
+        label_predict_temp=np.where(label_predict_temp != 0, label_predict_temp, -1)
+        label_predict_temp=np.where(label_predict_temp != 1, label_predict_temp, -2)
+        label_predict_temp=np.where(label_predict_temp != 2, label_predict_temp, -3)
+
+        label_predict_temp=np.where(label_predict_temp != -1, label_predict_temp, rotate[0])
+        label_predict_temp=np.where(label_predict_temp != -2, label_predict_temp, rotate[1])
+        label_predict_temp=np.where(label_predict_temp != -3, label_predict_temp, rotate[2])
+        label_predict_rotated.append(label_predict_temp)
+
+    correct_max = np.count_nonzero((label_predict == label_test))
+    for label_predict_temp in label_predict_rotated:
+        correct = np.count_nonzero((label_predict_temp == label_test))
+        if correct>correct_max:
+            correct_max=correct
+
+    accurate= correct_max/len(label_test)
+    return  accurate
+```
+
+该实验所有的代码在test1.py中。
+
+## 3， 自动选择聚簇数量的实验
+
+
+
+首先说明选择聚簇数的思路。每当选择聚簇数，并训练数据之后，计算出每个聚簇的直径的平均数。一般来说，随着聚簇的数量增加，平均直径会减小。但是，在达到某一个值之后，直径的下降会明显缓慢，有时反而会微量地增加。这个临界值就是最佳的聚簇数。
+
+该实验中，生成了如下2700个数据，属于三个二维高斯分布模型：
+
+```python
+    mean = (-10, 2)
+    cov = np.array([[6, 0], [0, 12]])
+    data_x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (-10, -5)
+    cov = np.array([[14, 0], [0, 14]])
+    data_y = np.random.multivariate_normal(mean, cov, (900,))
+
+    mean = (0, 0)
+    cov = np.array([[15, 14], [14, 15]])
+    data_z = np.random.multivariate_normal(mean, cov, (1000,))
+
+```
+
+之后，混合，贴上标签，并洗匀之后，将其中80%作为训练数据，20%作为测试数据。
+
+这是训练数据：
+
+![](.\img\test2_1.png)
+
+这是测试数据：
+
+![](.\img\test2_2.png)
+
+之后，用自动选择聚簇数的KMeans模型进行聚类：
+
+![](.\img\test2_3.png)
+
+这里模型自动认为这组数据可以分为三类。
+
+
+
+不过，当三个类靠得更近一些的时候，会发生一些不同的情况：
+
+```python
+    mean = (2, 2)
+    cov = np.array([[6, 0], [0, 12]])
+    data_x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (-5, -5)
+    cov = np.array([[7, 2], [2, 7]])
+    data_y = np.random.multivariate_normal(mean, cov, (900,))
+
+    mean = (0, 0)
+    cov = np.array([[5, 0], [0, 5]])
+    data_z = np.random.multivariate_normal(mean, cov, (1000,))
+```
+
+![](.\img\test2_4.png)
+
+![](.\img\test2_5.png)
+
+此时，模型将数据分为了四个聚簇：
+
+![](.\img\test2_7.png)
+
+
+
+在观察后，发现这种分类方式的确更为合适。
+
+
+
+该实验所有的代码在test2.py中。
\ No newline at end of file
diff --git a/assignment-3/submission/16307130040/img/1.PNG b/assignment-3/submission/16307130040/img/1.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..ba4e5cfd1bf0c264ad2bade06bd601e7c7c29f34
Binary files /dev/null and b/assignment-3/submission/16307130040/img/1.PNG differ
diff --git a/assignment-3/submission/16307130040/img/2.PNG b/assignment-3/submission/16307130040/img/2.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..6d08cca9311064e3c4eef0f332e89a55c734e2ae
Binary files /dev/null and b/assignment-3/submission/16307130040/img/2.PNG differ
diff --git a/assignment-3/submission/16307130040/img/3.PNG b/assignment-3/submission/16307130040/img/3.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..8ae5b5c31d972a554f4f1b8431b5296b76481c7f
Binary files /dev/null and b/assignment-3/submission/16307130040/img/3.PNG differ
diff --git a/assignment-3/submission/16307130040/img/4.PNG b/assignment-3/submission/16307130040/img/4.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..6c5aac59293cf8f48fdfedb635fbfddaa83d52aa
Binary files /dev/null and b/assignment-3/submission/16307130040/img/4.PNG differ
diff --git a/assignment-3/submission/16307130040/img/test1_1.png b/assignment-3/submission/16307130040/img/test1_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..d584f2519c65b8174b97eed171408275c91694ed
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test1_1.png differ
diff --git a/assignment-3/submission/16307130040/img/test1_2.png b/assignment-3/submission/16307130040/img/test1_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1196ccae6d9aefd14e5a000692aa12456fcfbbea
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test1_2.png differ
diff --git a/assignment-3/submission/16307130040/img/test1_3.png b/assignment-3/submission/16307130040/img/test1_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..9e2270b9996c24d06342387d753c76e2c7e8bde2
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test1_3.png differ
diff --git a/assignment-3/submission/16307130040/img/test1_4.png b/assignment-3/submission/16307130040/img/test1_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..a93fbb5b131e476033aef945880543ddc4a739db
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test1_4.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_1.png b/assignment-3/submission/16307130040/img/test2_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..7298a4bc4f355326ce96ba5b4aa74725d08f7a9e
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_1.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_2.png b/assignment-3/submission/16307130040/img/test2_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..c954d088696d3bee2203fbc292f898685053b336
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_2.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_3.png b/assignment-3/submission/16307130040/img/test2_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..b6d27ac32507dbb22615c2b91e8ab6b0aa05c938
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_3.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_4.png b/assignment-3/submission/16307130040/img/test2_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..83c567792cb6df19bbce2ef15432f631989d1b14
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_4.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_5.png b/assignment-3/submission/16307130040/img/test2_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..8a5c30ad7292e8fbe2727eea18054b9bb1cf7c52
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_5.png differ
diff --git a/assignment-3/submission/16307130040/img/test2_7.png b/assignment-3/submission/16307130040/img/test2_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..7771ab521d8ff17afc995c59d8c867fc4785fda7
Binary files /dev/null and b/assignment-3/submission/16307130040/img/test2_7.png differ
diff --git a/assignment-3/submission/16307130040/source.py b/assignment-3/submission/16307130040/source.py
new file mode 100644
index 0000000000000000000000000000000000000000..84f7c4a5a8c245b06954a30f886683f85ce21113
--- /dev/null
+++ b/assignment-3/submission/16307130040/source.py
@@ -0,0 +1,239 @@
+import numpy as np
+
+class KMeans:
+
+    def __init__(self, n_clusters):
+        self.limit=50
+        self.labels=[]
+        self.centers=[]
+        self.k=n_clusters
+
+
+
+    def FindClosestCenters(self,train_data):
+
+        m=train_data.shape[0]
+        idx=np.zeros(m)
+        # for each point,find the closest center
+        for i in range(m):
+            min_dist=np.sum((train_data[i] - self.centers[0]) ** 2)
+            idx[i]=0
+            for j in range(self.k):
+                dist=np.sum((train_data[i] - self.centers[j]) ** 2)
+                if dist < min_dist:
+                    min_dist=dist
+                    idx[i]=j
+        return idx
+
+
+    def ComputeNewCenters(self,idx,train_data):
+        m, n = train_data.shape
+        for i in range(self.k):
+            indices = np.where(idx == i)
+            # for each center ,find the points select it
+
+            if len(indices[0]) != 0:
+                self.centers[i]=np.sum(train_data[indices], axis=0) / len(indices[0])
+
+
+
+    def ComputeTheDiameter(self,train_data):
+        # calculate the average of each clusters after fit the train data
+        diameters=[]
+        idx=self.FindClosestCenters(train_data)
+        for i in range(self.k):
+            indices = np.where(idx == i)
+            data_temp=train_data[indices]
+            if len(indices[0]) != 0:
+                    diameter_max=0
+                    for data1 in data_temp:
+                        for data2 in data_temp:
+                            diameter=sum((data1 - data2) ** 2)
+                            if diameter>diameter_max:
+                                diameter_max=diameter
+
+                    diameters.append(diameter_max)
+        return np.average(diameters)
+
+
+
+
+    def fit(self, train_data):
+        # initialize the centers, which are  just same to k random points of  data at first
+        m, n = train_data.shape
+        self.centers = np.zeros((self.k, n))
+        select_idx = np.random.randint(0, m, self.k)
+        for i in range(self.k):
+            self.centers[i] = train_data[select_idx[i]]
+
+        for i in range(self.limit):
+            idx=self.FindClosestCenters(train_data)
+            self.ComputeNewCenters(idx,train_data)
+
+
+
+
+    
+    def predict(self, test_data):
+        m,n=test_data.shape
+        predict_idx=np.zeros(m)
+        # similar to function above
+        for i in range(m):
+            min_dist=np.sum((test_data[i] - self.centers[0]) ** 2)
+            predict_idx[i]=0
+            for j in range(self.k):
+                dist=np.sum((test_data[i] - self.centers[j]) ** 2)
+                if dist < min_dist:
+                    min_dist=dist
+                    predict_idx[i]=j
+        return  predict_idx
+
+class GaussianMixture:
+
+    def __init__(self, n_clusters):
+        # k is the number of clusters , each one set of mean and var is related to one cluster
+        # W is for hidden variable ,W（i，j) is the probability for Xi belongs to cluster j
+        self.k=n_clusters
+        self.mean=[]
+        self.var=[]
+        self.W=[]
+        self.Pi=[]
+        self.limit=5
+
+    def gauss_prob(self,x,mean,var):
+        #calculate the probability of gauss distribution by np
+        if not np.any(var):
+            n=var.shape[0]
+            var=np.diag(np.ones(n)/100)
+
+        y = np.exp((-1 / 2) * (x - mean).T.dot(np.linalg.inv(var)).dot(x - mean)) / np.sqrt(
+            np.power(2 * np.pi, len(x)) * np.linalg.det(var)
+        )
+        return y
+
+    def step_E(self,train_data):
+        #update W matrix
+        m, n = train_data.shape
+        new_W=np.zeros((m,self.k))
+
+        for i in range(self.k):
+            for j in range(m):
+                new_W[j,i]=self.Pi[i]*self.gauss_prob(train_data[j],self.mean[i],np.diag(self.var[i]))
+
+        for j in range(m):
+            sum=new_W[j].sum()
+            if sum != 0:
+                self.W[j]=new_W[j]/new_W[j].sum()
+        self.Pi = self.W.sum(axis=0) / self.W.sum()
+        #print(self.W)
+
+
+
+    def step_M(self,train_data):
+        m, n = train_data.shape
+
+        #update the mean and var of each cluster
+        new_mean=np.zeros((self.k,n))
+        new_var=np.zeros((self.k,n))
+        for i in range(self.k):
+            new_mean[i]=np.average(train_data,axis=0,weights=np.array(self.W[:,i]))
+            new_var[i]=np.average((train_data-new_mean[i])**2, axis=0, weights=np.array(self.W[:, i]))
+        self.mean=new_mean
+        self.var=new_var
+        #print(self.mean)
+
+    def ComputeTheDiameter(self,train_data):
+        diameters=[]
+        idx=self.predict(train_data)
+        for i in range(self.k):
+            indices = np.where(idx == i)
+            data_temp=train_data[indices]
+            if len(indices[0]) != 0:
+                    diameter_max=0
+                    for data1 in data_temp:
+                        for data2 in data_temp:
+                            diameter=sum((data1 - data2) ** 2)
+                            if diameter>diameter_max:
+                                diameter_max=diameter
+                    #print(diameter_max)
+                    diameters.append(diameter_max)
+        return np.average(diameters)
+
+
+    def fit(self, train_data):
+        # initialize the variables above
+        m,n=train_data.shape
+        self.mean=np.zeros((self.k,n))
+        select_idx = np.random.randint(0, m, self.k)
+        for i in range(self.k):
+            self.mean[i] = train_data[select_idx[i]]
+        self.var=np.ones((self.k,n))
+        self.W=np.ones((m,self.k))/self.k
+        self.Pi = self.W.sum(axis=0) / self.W.sum()
+
+        for i in range(self.limit):
+            self.step_E(train_data)
+        #    if(m==3):
+        #        print('W: ',self.W)
+            self.step_M(train_data)
+        #    if (m == 3):
+        #        print('mean：',self.mean)
+        #        print('var: ',self.var)
+
+
+
+
+    
+    def predict(self, test_data):
+        m,n=test_data.shape
+        predict_idx=np.zeros(m)
+        # similar to function above
+        for i in range(m):
+            max_prob=self.gauss_prob(test_data[i],self.mean[0],np.diag(self.var[0]))
+            predict_idx[i]=0
+            for j in range(self.k):
+                prob=self.gauss_prob(test_data[i],self.mean[j],np.diag(self.var[j]))
+                if max_prob < prob:
+                    max_prob=prob
+                    predict_idx[i]=j
+        #print(predict_idx)
+        return  predict_idx
+
+class ClusteringAlgorithm:
+
+
+    def __init__(self):
+        self.k=2
+        self.model_Kmeans=KMeans(2)
+
+    def fit(self, train_data):
+        last_diameter=0
+        last_model = KMeans(2)
+        selected_k=2
+        selected_model=KMeans(2)
+        # try the differnet n_clusters in order
+        for i in range(2,31):
+            #print(i)
+            model_Kmeans= KMeans(i)
+            model_Kmeans.fit(train_data)
+            if i == 2:
+                last_diameter=model_Kmeans.ComputeTheDiameter(train_data)
+                last_model=model_Kmeans
+
+            else:
+                diameter=model_Kmeans.ComputeTheDiameter(train_data)
+                # when the average diameter of clusters is not decreased sharply , then select the previous n_cluster and Kmeans model
+                if diameter>last_diameter*0.80:
+                    selected_k=i-1
+                    selected_model=last_model
+                    break
+                else:
+                    last_diameter=diameter
+                    last_model=model_Kmeans
+        self.k=selected_k
+        self.model_Kmeans=selected_model
+    
+    def predict(self, test_data):
+        return self.k,self.model_Kmeans.predict(test_data)
+
+
diff --git a/assignment-3/submission/16307130040/test1.py b/assignment-3/submission/16307130040/test1.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ce2d08de582ff7a476c810f825558335f90a038
--- /dev/null
+++ b/assignment-3/submission/16307130040/test1.py
@@ -0,0 +1,105 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from source import KMeans, GaussianMixture
+
+
+def train_test_split(data, label):
+    offset = int(len(data) * 0.8)
+    data_train = data[:offset]
+    data_test = data[offset:]
+    label_train = label[:offset]
+    label_test = label[offset:]
+
+
+    return np.array(data_train), np.array(data_test), np.array(label_train), np.array(label_test)
+
+def display(data, label,name,accu):
+    datas =[[],[],[]]
+    colors=['b','r','y']
+    for i in range(len(data)):
+        datas[label[i]].append(data[i])
+    for i,each in enumerate(datas):
+        each = np.array(each)
+        plt.scatter(each[:, 0], each[:, 1],c=colors[i])
+    if not accu==0:
+        plt.text(-10, 20, 'accurate=%f'%accu)
+    plt.title(name)
+    plt.show()
+
+def shuffle(*datas):
+    data = np.concatenate(datas)
+    label = np.concatenate([
+        np.ones((d.shape[0],), dtype=int)*i
+        for (i, d) in enumerate(datas)
+    ])
+    N = data.shape[0]
+    idx = np.arange(N)
+    np.random.shuffle(idx)
+    data = data[idx]
+    label = label[idx]
+    return data, label
+
+def calculate_accu(label_predict,label_test):
+    #when calculate the accu ,try all probables of label combination
+    rotates=np.array([[0,1,2],[0,2,1],[1,0,2],[1,2,0],[2,0,1],[2,1,0]])
+    label_predict_rotated=[]
+    for rotate in rotates:
+        label_predict_temp=np.array(label_predict)
+        label_predict_temp=np.where(label_predict_temp != 0, label_predict_temp, -1)
+        label_predict_temp=np.where(label_predict_temp != 1, label_predict_temp, -2)
+        label_predict_temp=np.where(label_predict_temp != 2, label_predict_temp, -3)
+
+        label_predict_temp=np.where(label_predict_temp != -1, label_predict_temp, rotate[0])
+        label_predict_temp=np.where(label_predict_temp != -2, label_predict_temp, rotate[1])
+        label_predict_temp=np.where(label_predict_temp != -3, label_predict_temp, rotate[2])
+        label_predict_rotated.append(label_predict_temp)
+
+    correct_max = np.count_nonzero((label_predict == label_test))
+    for label_predict_temp in label_predict_rotated:
+        correct = np.count_nonzero((label_predict_temp == label_test))
+        if correct>correct_max:
+            correct_max=correct
+
+    accurate= correct_max/len(label_test)
+    return  accurate
+
+
+def test1():
+    mean = (1, 2)
+    cov = np.array([[80, 0], [0, 22]])
+    data_x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (16, -5)
+    cov = np.array([[40, 0], [0, 32.1]])
+    data_y = np.random.multivariate_normal(mean, cov, (900,))
+
+    mean = (-6, 10)
+    cov = np.array([[20, 10], [10, 20]])
+    data_z = np.random.multivariate_normal(mean, cov, (1000,))
+
+    data, label = shuffle(data_x,data_y,data_z)
+    data_train,data_test,label_train,label_test=train_test_split(data,label)
+
+    display(data_train,label_train,'train',0)
+    display(data_test,label_test,'test',0)
+
+    model_KMeans=KMeans(3)
+    model_KMeans.fit(data_train)
+    label_predict_KMeans=np.array(model_KMeans.predict(data_test),dtype=np.int32)
+    accurate_Kmeans=calculate_accu(label_predict_KMeans,label_test)
+    display(data_test,label_predict_KMeans,'predict_Kmeans',accurate_Kmeans)
+
+
+    model_GaussianMixture=GaussianMixture(3)
+    model_GaussianMixture.fit(data_train)
+    label_predict_GaussianMixture=np.array(model_GaussianMixture.predict(data_test),dtype=np.int32)
+    accurate_GaussianMixture=calculate_accu(label_predict_GaussianMixture,label_test)
+    display(data_test,label_predict_GaussianMixture,'predict_GaussianMixture',accurate_GaussianMixture)
+
+
+
+
+
+
+if __name__ == "__main__":
+    test1()
\ No newline at end of file
diff --git a/assignment-3/submission/16307130040/test2.py b/assignment-3/submission/16307130040/test2.py
new file mode 100644
index 0000000000000000000000000000000000000000..18425d22d0d57e911490f9c3eff39e6bc37ecc25
--- /dev/null
+++ b/assignment-3/submission/16307130040/test2.py
@@ -0,0 +1,86 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from source import KMeans, GaussianMixture,ClusteringAlgorithm
+
+
+def train_test_split(data, label):
+    offset = int(len(data) * 0.8)
+    data_train = data[:offset]
+    data_test = data[offset:]
+    label_train = label[:offset]
+    label_test = label[offset:]
+
+
+    return np.array(data_train), np.array(data_test), np.array(label_train), np.array(label_test)
+
+def display(data, label,name,n_clusters):
+    # the display function of test2,which is vaild in different numbers of clusters
+    datas =[[] for i in range(n_clusters)]
+
+    colors=['b','r','y','xkcd:neon purple','xkcd:deep green','xkcd:reddish pink','aliceblue','xkcd:electric green','xkcd:royal']
+    for i in range(len(data)):
+        datas[label[i]].append(data[i])
+    for i,each in enumerate(datas):
+        each = np.array(each)
+        plt.scatter(each[:, 0], each[:, 1],c=colors[i])
+    #if not accu==0:
+    #    plt.text(-10, 20, 'accurate=%f'%accu)
+    plt.title(name)
+    plt.show()
+
+
+def shuffle(*datas):
+    data = np.concatenate(datas)
+    label = np.concatenate([
+        np.ones((d.shape[0],), dtype=int)*i
+        for (i, d) in enumerate(datas)
+    ])
+    N = data.shape[0]
+    idx = np.arange(N)
+    np.random.shuffle(idx)
+    data = data[idx]
+    label = label[idx]
+    return data, label
+
+
+
+
+def test2():
+    mean = (2, 2)
+    cov = np.array([[6, 0], [0, 12]])
+    data_x = np.random.multivariate_normal(mean, cov, (800,))
+
+    mean = (-5, -5)
+    cov = np.array([[7, 2], [2, 7]])
+    data_y = np.random.multivariate_normal(mean, cov, (900,))
+
+    mean = (0, 0)
+    cov = np.array([[5, 0], [0, 5]])
+    data_z = np.random.multivariate_normal(mean, cov, (1000,))
+
+    data, label = shuffle(data_x,data_y,data_z)
+    data_train,data_test,label_train,label_test=train_test_split(data,label)
+
+    display(data_train,label_train,'train',3)
+    display(data_test,label_test,'test',3)
+
+    model_KMeans=ClusteringAlgorithm()
+    model_KMeans.fit(data_train)
+    n_clusters, label_predict_KMeans = model_KMeans.predict(data_test)
+    label_predict_KMeans=np.array(label_predict_KMeans,dtype=np.int32)
+    display(data_test,label_predict_KMeans,'predict_Kmeans',n_clusters)
+
+
+
+
+
+
+
+
+
+
+
+
+
+if __name__ == "__main__":
+    test2()
\ No newline at end of file