代码拉取完成,页面将自动刷新
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from collections import Counter
def accuracy(test, prediction):
correct = 0
for i in range(len(test)):
if test[i] == prediction[i]:
correct += 1
return correct / float(len(test))
def euclidean_distance(x1, x2):
return math.sqrt(np.sum((x1 - x2) ** 2))
iris_data = load_iris()
# print(dir(iris_data))
# print(iris_data.DESCR)
# print(iris_data)
iris_data_pd = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)
iris_data_pd['class'] = iris_data.target
print(iris_data_pd)
print(iris_data.feature_names)
print('可视化展示')
'''
plt.style.use('ggplot')
xx = data.data
yy = data.target
features = data.feature_names
targets = data.target_names
print(xx)
print(yy)
print(features)
print(targets)
plt.figure(figsize=(10, 4))
plt.plot(xx[:, 2][yy == 0], xx[:, 3][yy == 0], 'bs', label=targets[0])
plt.plot(xx[:, 2][yy == 1], xx[:, 3][yy == 1], 'kx', label=targets[1])
plt.plot(xx[:, 2][yy == 2], xx[:, 3][yy == 2], 'ro', label=targets[2])
plt.xlabel(features[2])
plt.ylabel(features[3])
plt.title('Iris Data Set')
plt.legend()
plt.savefig('Iris data Set.png', dpi=300)
plt.show()
'''
iris_data_pd.loc[iris_data_pd['class'] == 0, 'class'] = "setosa" # 把类别这一列数值为0的替换为setosa
iris_data_pd.loc[iris_data_pd['class'] == 1, 'class'] = "versicolor" # 把类别这一列数值为1的替换为versicolor
iris_data_pd.loc[iris_data_pd['class'] == 2, 'class'] = "virginica" # 把类别这一列数值为2的替换为virginica
print(iris_data_pd.head())
print(iris_data_pd.describe())
sns.pairplot(iris_data_pd.dropna(), hue='class')
plt.show()
train_x, test_x, train_y, test_y = train_test_split(iris_data.data, iris_data.target, test_size=0.3, random_state=3)
'''
dtc_model = DecisionTreeClassifier()
dtc_model.fit(train_x, train_y)
predict_y = dtc_model.predict(test_x)
score = dtc_model.score(test_x, test_y)
print(predict_y)
print(test_y)
print('score: ', score)
'''
k = 10
predict_y = []
for t in test_x:
distance = [euclidean_distance(x, t) for x in train_x]
nearest = np.argsort(distance)
top_k = [train_y[i] for i in nearest[:k]]
votes = Counter(top_k)
predict_y.append(votes.most_common()[0][0])
print(distance)
print(nearest)
print(top_k)
print(votes)
print(predict_y)
print(test_y)
print(accuracy(test_y, predict_y))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。