java调用python返回聚类树形图和分类词(二进制保存图片).py
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:hua
import io

from PIL import Image
from scipy import cluster
import numpy as np
import importlib,sys
import pandas as pd
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt
from pandas.core.frame import DataFrame

import cv2 # pip install opencv_python
# filename = "datas5"
filename = sys.argv[1]
# print(filename)
heights =sys.argv[2]
heights = eval(heights)
# heights = 42
# print(heights)
# print(type(heights))
# pic_path = sys.argv[3]
# 获取数据源
case_train = np.genfromtxt("E:\zhihui\work_code\wordcloud\{}.csv".format(filename), delimiter=',')

keywords = keywords.columns
keywords = list(keywords)

# 去除第一行和第一列(如果有字段名的话)
case_train = np.delete(case_train,0,axis=0)

case_train1=np.array(case_train)
# 将矩阵反转为a
a = case_train1.T
global list3
global list4
global list5
global list6

list3 = []
list4 = []
list5 = []
list6 = []
for i in a:
for k in a:
c = [i,k]

# 求每两行的最小值(AB交集的最小值)
ik = np.min(c, axis=0)

# 对两个矩阵对比后，得到的矩阵求和(AB共同出现的频率)
sum_two = ik.sum()
list3.append(sum_two)

n = len(a)
# 将相似矩阵组合成相似系数矩阵
data_two = [list3[i:i + n] for i in range(0, len(list3), n)]
# 构建两两的词频共现矩阵
data_two = DataFrame(data_two,index=keywords,columns=keywords)
# 生成聚类树形图
fig = plt.figure(figsize=(30, 25),dpi=80)
hierarchy.dendrogram(Z, orientation='right',
show_leaf_counts=False,
leaf_font_size=15.,
labels=data_two.index)
# dd = hierarchy.dendrogram(Z, orientation='right', labels=data_two.index)
# fig =plt.figure(figsize=(30, 10),dpi=80)
canvas=fig.canvas
buffer = io.BytesIO()
print(buffer)
canvas.print_png(buffer)
# 聚类图的二进制码
data=buffer.getvalue()
print(data)
# buffer.close()

# 测试二进制是否可以转为图片
buffer.write(data) # 将数据写入buffer
img = Image.open(buffer)# 使用Image打开图片数据

img = np.asarray(img)
cv2.imwrite("E:\zhihui\work_code\wordcloud\datas55.png", img)

# 1）创建画布，并设置画布属性
# 设置切割精确度
label = cluster.hierarchy.cut_tree(Z, height=heights)
label = label.reshape(label.size, )
# print(keywords)
# print(label)
list6.append(keywords)
list6.append(label)

list6 = np.array(list6)
# print(list6)
list6 = list6.T
n =len(list6)

print(333333333333333333)
print(label)
label2 = list(set(label))
# global list7
list7 = [[] for i in range(len(label2))]
cluster_list = [[] for i in range(len(label2))]
print(list7)
print(label2)

for i in label2:
for k in range(n):
if int(list6[:][k][1]) == int(i):

list7[label2.index(i)].append(list6[:][k])

print(len(list7))
for a in range(len(list7)):
for j in list7[a]:
cluster_list[a].append(j[0])
print(cluster_list)
# 将聚类树形图保存
# plt.savefig("{}".format(pic_path))
# plt.savefig("E:\zhihui\work_code\wordcloud\datas52.png")
# plt.savefig('{}.png'.format(filename))

print(8888888888888888888)