1 Star 0 Fork 0

ShaunJM / FANE

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
main.py 37.23 KB
一键复制 编辑 原始数据 按行查看 历史
Enya Shen 提交于 2018-11-19 15:50 . fane
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981
import os
# import sys
import time
# import matplotlib.pyplot as plt
# from matplotlib import collections as mc
import networkx as nx
from copy import copy, deepcopy
# import pydot
# from networkx.drawing.nx_pydot import graphviz_layout
from forceatlas2 import ForceAtlas2
import numpy as np
from node2vec import Node2Vec
from attri2vec import Attri2Vec
import graph
import struc2vec
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
# import node2vec
import nltk
from nltk.cluster import KMeansClusterer
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.manifold import TSNE
# from sklearn.cluster import KMeans
import logging
from color import color, shape
# from zoompan import ZoomPan
import matplotlib.patches as patches
# from draggablerectangle import DraggableRectangle
# import itertools
NODE_COLOR = 'lightseagreen'
NODE_SIZE = 150
NODE_SHAPE = 'o'
logging.basicConfig(
filename="system.log",
level=logging.WARNING,
format="%(asctime)s:%(levelname)s:%(message)s"
)
class Attri2vecTest:
dataFile = ''
attriFile = ''
classFile = ''
graph = ''
attriGraph = ''
nodeGraph = ''
attriNode = []
attriEdges = []
attriLines = []
nodeEdges = []
nodeLines = []
nodeLabels = []
model = ''
cluster = ''
nodePosition = ''
nodeXs = []
nodeYs = []
clusterColors = color
saNodeColors = NODE_COLOR
nodeColors = []
saNodeSizes = NODE_SIZE
nodeSizes = []
saNodeShapes = NODE_SHAPE
nodeShapes = []
nodeFactor = 1.0
# edgeColor = ''
selectedNode = []
isShowLabel = False
isShowAttriNode = False
isShowEdge = False
embeddingMethodList = ['node2vec', 'struc2vec', 'attri2vec', 'HSCA', 'TADW']
embeddingMethod = 'node2vec'
P = 1.0
Q = 1.0
R = 1.0
D = 16
clusteringMethodList = ['K-means', 'EM', 'GAA']
clusteringMethod = 'K-means'
clusteringNum = 6
reductionMethodList = ['t-SNE', 'NONE', 'Isomap', 'LLE', 'MLLE', 'HLLE', 'Spectral', 'LTSA', 'MDS']
reductionMethod = 't-SNE'
colorMethodList = ['Unitary', 'Attribute', 'Cluster', 'Class']
colorMethod = 'Unitary'
colorCombox = ''
sizeMethodList = ['Unitary', 'Degree', 'Class']
sizeMethod = 'Unitary'
shapeMethodList = ['Unitary', 'Value', 'Class']
shapeMethod = 'Unitary'
ax = ''
cur_xlim = ''
cur_ylim = ''
oax = ''
overviewRec = ''
overViewWidget = ''
def __init__(self, dir=[]):
if dir == []:
self.scalabilityTest()
else:
self.openfolder(dir)
self.batch()
def getHeaderAndSeperator(self, file):
hs = [False, ' ']
with open(file, 'rb') as inf:
header = inf.readline().decode()
# print('header', header)
if len(header.split(',')) > 1:
hs[1] = ','
elif len(header.split('\t')) > 1:
hs[1] = '\t'
elif len(header.split(' ')) > 1:
hs[1] = ' '
else:
return []
# print('hs', hs)
header = header.split(hs[1])
# print('header', header)
if header[0] == 'Source':
hs[0] = True
inf.close()
return hs
def readEdgeFile(self, edgeFile):
self.dataFile = edgeFile
with open(self.dataFile, 'rb') as inf:
hinf = self.getHeaderAndSeperator(self.dataFile)
if len(hinf) == 0:
return
if hinf[0]:
next(inf, '') # skip a line
e = nx.read_edgelist(inf, delimiter=hinf[1], nodetype=str, encoding="utf-8")
self.nodeGraph = nx.Graph()
self.nodeGraph.add_edges_from(e.edges())
# print('Nodes', len(self.nodeGraph.nodes()), 'Edges', len(self.nodeGraph.edges()))
self.graph = self.nodeGraph
def snapReader(self, dir):
edgeFile = dir + '/edgelist'
vertex2attriFile = dir + '/vertex2aid'
classFile = dir + '/Class_info'
if not os.path.isfile(edgeFile):
return False
print(time.ctime())
beginTime = time.time()
print('0: Open SNAP Begin')
self.readEdgeFile(edgeFile)
# todo ground true class file
self.classFile = classFile
with open(self.classFile, 'rb') as inf:
next(inf, '') # skip a line
na = [[] for x in self.nodeGraph.nodes()]
na = dict(zip(self.nodeGraph.nodes(), na))
# print('na', na)
cls = 0
for line in inf:
line = line.decode()
# print('line', line)
if line.startswith('Circle'):
cls = int(line[line.index('#') + 1:])
else:
nc = line.split(' ')
# print('nc', nc)
for x in nc:
if x == '\n':
continue
if x in list(na.keys()):
na[x].append(cls)
else:
print('class file node not in edge file', x)
self.nodeGraph.add_node(x)
na[x] = [cls]
# [na[x].append(cls) for x in nc if x != '\n']
# print('na', na)
nx.set_node_attributes(self.nodeGraph, 'class', na)
# get_attris = nx.get_node_attributes(self.nodeGraph, 'class')
# print('get_attris', get_attris)
self.attriFile = vertex2attriFile
with open(self.attriFile, 'rb') as inf:
hinf = self.getHeaderAndSeperator(self.attriFile)
if len(hinf) == 0:
return
if hinf[0]:
next(inf, '') # skip a line
e = nx.read_edgelist(inf, delimiter=hinf[1], create_using=nx.DiGraph(), nodetype=str,
encoding="utf-8")
na = [[] for x in self.nodeGraph.nodes()]
na = dict(zip(self.nodeGraph.nodes(), na))
# print('na', list(na.keys()))
for x in e.edges():
if x[0] in list(na.keys()):
na[x[0]].append(x[1])
else:
print('attribute file node not in edge file', x)
self.nodeGraph.add_node(x[0])
na[x[0]] = [x[1]]
# [na[x[0]].append(x[1]) for x in e.edges()]
# print('na', na)
nx.set_node_attributes(self.nodeGraph, 'value', na)
# get_attris = nx.get_node_attributes(self.nodeGraph, 'value')
# print('get_attris', get_attris)
ea = [(x[0], 'attri-' + x[1]) for x in e.edges()]
[self.attriNode.append(x[1]) for x in ea if x[1] not in self.attriNode]
self.attriGraph = deepcopy(self.nodeGraph)
self.attriGraph.add_edges_from(ea)
self.graph = self.attriGraph
print('Nodes', len(self.nodeGraph.nodes()), 'Edges', len(self.nodeGraph.edges()))
print('Nodes + AttriNode', len(self.attriGraph.nodes()), 'Edges + AttriEdge',
len(self.attriGraph.edges()))
print('Time of Open', time.time() - beginTime)
return True
def linqsReader(self, dir):
edgeFile = ''
vertex2attriFile = ''
for filename in os.listdir(dir):
if filename.endswith(".cites"):
edgeFile = filename
elif filename.endswith(".content"):
vertex2attriFile = filename
if edgeFile == '':
return False
edgeFile = dir + '/' + edgeFile
vertex2attriFile = dir + '/' + vertex2attriFile
# print(edgeFile, vertex2attriFile)
print(time.ctime())
beginTime = time.time()
print('0: Open LINQS Begin')
self.readEdgeFile(edgeFile)
self.attriFile = vertex2attriFile
with open(self.attriFile, 'rb') as inf:
hinf = self.getHeaderAndSeperator(self.attriFile)
# print('hinf', hinf)
if len(hinf) == 0:
return
if hinf[0]:
next(inf, '') # skip a line
ea = []
nal = []
na = [[] for x in self.nodeGraph.nodes()]
na = dict(zip(self.nodeGraph.nodes(), na))
nc = dict.fromkeys(self.nodeGraph.nodes())
for line in inf:
# print('line', line)
el = line.decode().rstrip('\n').split(hinf[1])
# el = line.split(hinf[1])
eleSize = len(el)
# print('el', el[eleSize-1])
if nal == []:
[nal.append('attri-' + str(i)) for i in range(1, eleSize - 1)]
nc[el[0]] = el[eleSize - 1]
for i in range(1, eleSize - 1):
if el[i] != '0':
# print('el[0]', na[el[0]])
ea.append((el[0], 'attri-' + str(i)))
if el[0] in list(na.keys()):
na[el[0]].append(i)
else:
print('class file node not in edge file', el[0])
self.nodeGraph.add_node(el[0])
na[el[0]] = [i]
# na[el[0]].append(i)
# [(ea.append((el[0], 'attri-' + str(i))), na[el[0]].append('attri-' + str(i)))
# for i in range(1, eleSize-2) if el[i] != '0']
# print('nal', nal)
# print('nc', nc)
nx.set_node_attributes(self.nodeGraph, 'value', na)
nx.set_node_attributes(self.nodeGraph, 'class', nc)
self.attriGraph = deepcopy(self.nodeGraph)
self.attriNode = nal
# get_attris = nx.get_node_attributes(self.nodeGraph, 'class')
# print('get_attris', get_attris)
self.attriGraph.add_edges_from(ea)
# self.attriGraph.remove_nodes_from(isolatedNodes)
self.graph = self.attriGraph
print('Nodes', len(self.nodeGraph.nodes()), 'Edges', len(self.nodeGraph.edges()))
print('Nodes + AttriNode', len(self.attriGraph.nodes()), 'Edges + AttriEdge',
len(self.attriGraph.edges()))
print('Time of Open', time.time() - beginTime)
return True
def tadwReader(self, dir):
edgeFile = dir + '/graph.txt'
vertex2attriFile = dir + '/feature.txt'
classFile = dir + '/group.txt'
print(time.ctime())
beginTime = time.time()
print('0: Open TADW Begin')
self.readEdgeFile(edgeFile)
# todo ground true class file
self.classFile = classFile
with open(self.classFile, 'rb') as inf:
hinf = self.getHeaderAndSeperator(self.classFile)
# print('hinf', hinf)
if len(hinf) == 0:
return
if hinf[0]:
next(inf, '') # skip a line
na = [[] for x in self.nodeGraph.nodes()]
na = dict(zip(self.nodeGraph.nodes(), na))
# print('na', na)
for line in inf:
nc = line.decode().rstrip('\r').split(hinf[1])
if nc[0] in list(na.keys()):
na[nc[0]].append(int(nc[1]))
else:
print('class file node not in edge file', nc[0])
self.nodeGraph.add_node(nc[0])
na[nc[0]] = [int(nc[1])]
# print('na-class', na)
nx.set_node_attributes(self.nodeGraph, 'class', na)
# get_attris = nx.get_node_attributes(self.nodeGraph, 'class')
# print('get_attris', get_attris)
self.attriFile = vertex2attriFile
with open(self.attriFile, 'rb') as inf:
hinf = self.getHeaderAndSeperator(self.attriFile)
# print('hinf', hinf)
if len(hinf) == 0:
return
if hinf[0]:
next(inf, '') # skip a line
ea = []
nal = []
na = [[] for x in self.nodeGraph.nodes()]
na = dict(zip(self.nodeGraph.nodes(), na))
for num, line in enumerate(inf, 0):
nc = line.decode().rstrip('\r').split(hinf[1])
# print('nc', nc)
eleSize = len(nc)
# print('eleSize', eleSize)
if nal == []:
[nal.append('attri-' + str(i)) for i in range(0, eleSize)]
for i in range(0, eleSize):
if nc[i] != '0':
ea.append((str(num), 'attri-' + str(i)))
na[str(num)].append(str(i))
# print('na-attri', na)
# print('ea', ea)
nx.set_node_attributes(self.nodeGraph, 'value', na)
# get_attris = nx.get_node_attributes(self.nodeGraph, 'value')
# print('get_attris', get_attris)
self.attriGraph = deepcopy(self.nodeGraph)
self.attriNode = nal
self.attriGraph.add_edges_from(ea)
self.graph = self.attriGraph
print('Nodes', len(self.nodeGraph.nodes()), 'Edges', len(self.nodeGraph.edges()))
print('Nodes + AttriNode', len(self.attriGraph.nodes()), 'Edges + AttriEdge',
len(self.attriGraph.edges()))
print('Time of Open', time.time() - beginTime)
return True
def openfolder(self, dir):
logging.warning('Dataset {}'.format(dir))
if dir == '':
return
if not self.snapReader(dir):
if not self.linqsReader(dir):
if not self.tadwReader(dir):
return
self.plot()
def embedding(self):
if self.embeddingMethod == 'node2vec':
self.node2vec()
elif self.embeddingMethod == 'struc2vec':
self.struc2vec()
elif self.embeddingMethod == 'attri2vec':
self.attri2vec()
elif self.embeddingMethod == 'HSCA':
self.hsca()
self.plot()
return
self.reducting()
self.clustering() # includes plot
# self.plot_subplot()
def batch(self):
# [2.0, 3.0, 4.0, 5.0, 6.0]
# [0.05, 0.1, 0.15, 0.2, 0.3]
# [2.0, 3.0, 4.0, 5.0, 6.0]
for i in [1.0]: # range(10, 0, -1) *.1 0.25, 0.5, 1.0,
for j in [1.0]: # , 0.5, 1.0, 2.0, 4.0
for k in [0.01]: # 0.25, 0.5,
for d in [8]: # 16, 32, 128
self.P = i
self.Q = j
self.R = k
self.D = d
self.struc2vec()
self.svm()
# self.reducting()
# self.clustering()
# self.figure.savefig('./results/p'+str(i*.1)+'-q'+str(j*.1)+'-r'+str(k*.1)+'.png')
def scalabilityTest(self):
ap = 3 # attribute num per node
for a_num in range(1, 2):
# ar = 100*a_num # total attribute number
ar = 10*pow(10, a_num)
for n_power in range(1, 7):
self.nodeGraph = nx.erdos_renyi_graph(10*pow(10, n_power), 0.05)
na = [np.random.randint(ar, size=(1, ap)) for x in self.nodeGraph.nodes()]
print('na', na)
na = dict(zip(self.nodeGraph.nodes(), na))
print('na', na)
nx.set_node_attributes(self.nodeGraph, 'value', na)
ea = []
for key, value in na.items():
for x in value.ravel():
ea.append((key, 'attri-' + str(x)))
# print('ea', ea)
self.attriNode.clear()
[self.attriNode.append(x[1]) for x in ea if x[1] not in self.attriNode]
self.attriGraph = deepcopy(self.nodeGraph)
self.attriGraph.add_edges_from(ea)
self.graph = self.attriGraph
beginTime = time.time()
self.attri2vec()
logging.warning('- scalarTest ar={} ap={} n={} t={}'.format(ar, ap, 10*pow(10, n_power),
time.time() - beginTime))
def node2vec(self):
if self.nodeGraph == '':
return
# self.graph = self.nodeGraph
beginTime = time.time()
print('1: node2vec Begin')
# Precompute probabilities and generate walks
# g = self.graph
# g = deepcopy(self.graph)
# if self.attriNode:
# g.remove_nodes_from(self.attriNode)
node2vec = Node2Vec(self.nodeGraph, dimensions=16, walk_length=30, num_walks=200, p=self.P, q=self.Q, workers=4)
print('Time of Node2Vec', time.time() - beginTime)
beginTime = time.time()
# Embed
# Any keywords acceptable by gensim.Word2Vec can be passed,
# `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
self.model = node2vec.fit(window=10, min_count=1, batch_words=4)
print('Time of FIT', time.time() - beginTime)
def struc2vec(self):
if self.nodeGraph == '':
return
beginTime = time.time()
print('1: struc2vec Begin')
strucGraph = graph.from_networkx(self.nodeGraph)
strucGraph = struc2vec.Graph(strucGraph, 'undirected', 4, untilLayer=None)
if True:
strucGraph.preprocess_neighbors_with_bfs_compact()
else:
strucGraph.preprocess_neighbors_with_bfs()
if True:
strucGraph.create_vectors()
strucGraph.calc_distances(compactDegree=True)
else:
strucGraph.calc_distances_all_vertices(compactDegree=True)
strucGraph.create_distances_network()
strucGraph.preprocess_parameters_random_walk()
strucGraph.simulate_walks(10, 80)
walks = LineSentence('random_walks.txt')
self.model = Word2Vec(walks, size=self.D, window=10, min_count=0, hs=1, sg=1, workers=4, iter=5)
print('Time of Struc2Vec', time.time() - beginTime)
def attri2vec(self):
if self.attriGraph == '':
return
self.graph = self.attriGraph
beginTime = time.time()
print('1: attri2vec Begin')
# logging.warning('- attri2vec p={} q={} r={} d={}'.format(self.P, self.Q, self.R, self.D))
# Precompute probabilities and generate walks
attri2vec = Attri2Vec(self.graph, dimensions=self.D, walk_length=30, num_walks=200, p=self.P, q=self.Q,
r=self.R, workers=4)
print('Time of Attri2Vec', time.time() - beginTime)
beginTime = time.time()
# Embed
# Any keywords acceptable by gensim.Word2Vec can be passed,
# `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
self.model = attri2vec.fit(window=10, min_count=1, batch_words=4)
print('Time of FIT', time.time() - beginTime)
def geri(self):
if self.attriGraph == '':
return
self.graph = self.attriGraph
beginTime = time.time()
print('1: GERI Begin')
logging.warning('- gemi p={} q={} r={} d={}'.format(self.P, self.Q, self.R, self.D))
# Precompute probabilities and generate walks
geri = GERI(self.graph, dimensions=self.D, walk_length=30, num_walks=200, p=self.P, q=self.Q,
r=self.R, workers=4)
print('Time of GERI', time.time() - beginTime)
beginTime = time.time()
# Embed
# Any keywords acceptable by gensim.Word2Vec can be passed,
# `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
self.model = geri.fit(window=10, min_count=1, batch_words=4)
print('Time of FIT', time.time() - beginTime)
def clustering(self):
if self.model == '':
return
# Clustering
if self.clusteringMethod == 'K-means':
self.kmeans()
elif self.clusteringMethod == 'EM':
self.em()
elif self.clusteringMethod == 'GAA':
self.gaa()
index = self.colorCombox.findText('Cluster', QtCore.Qt.MatchFixedString)
if index >= 0:
self.colorCombox.setCurrentIndex(index)
self.colorMapping()
def svm(self):
beginTime = time.time()
print('3: svm Begin')
mv = self.model.vocab
# print('mv', mv)
for x in self.attriNode:
if x in mv:
del mv[x]
else:
print('x', x) # why we lost attri-445 in cora data
# print('mv', mv)
model = self.model[mv]
# print('model', model)
realClass = nx.get_node_attributes(self.nodeGraph, 'class')
y = []
[y.append(realClass[v]) for v in self.model.vocab if v not in self.attriNode]
# print('y', y)
for x in range(1, 10):
# print('train_size', x*.1)
# logging.warning('- attri2vec p={} q={} r={} d={} t={}'.format(self.P, self.Q, self.R, self.D, x*.1))
# logging.warning('-- train_size: {}'.format(x*.1))
x_train, x_test, y_train, y_test = train_test_split(model, y, random_state=1, train_size=x*.1)
# print('train', x_train, y_train)
# C: Penalty parameter C of the error term. (default=1.0)
# decision_function_shape: 'ovo', 'ovr'
# random_state: The seed of the pseudo random number generator to use when shuffling the data
for c in range(1, 11):
logging.warning('- p={} q={} r={} d={} t={} c={}'.format(
self.P, self.Q, self.R, self.D, x * .1, c * .1))
# logging.warning('--- svm-c: {}'.format(c * .1))
clf = svm.SVC(C=c*.1, kernel='linear', decision_function_shape='ovr', random_state=0)
# clf = svm.LinearSVC(random_state=0)
clf.fit(x_train, y_train) # .ravel()
# clf.fit(model, y)
# logging.warning('Class: {}'.format(clusters))
print('accuracy train', clf.score(x_train, y_train))
y_preds = clf.predict(x_train)
print('y_hat_train', y_preds)
# calculate f1
mean_f1 = f1_score(y_train, y_preds, average='micro')
print('mean_f1 train micro', mean_f1)
logging.warning('--- mean_f1 train micro: {}'.format(mean_f1))
mean_f1 = f1_score(y_train, y_preds, average='macro')
print('mean_f1 train macro', mean_f1)
logging.warning('--- mean_f1 train macro: {}'.format(mean_f1))
print('accuracy test', clf.score(x_test, y_test))
y_preds = clf.predict(x_test)
# print('y_hat_test', y_preds)
mean_f1 = f1_score(y_test, y_preds, average='micro')
print('mean_f1 test micro', mean_f1)
logging.warning('--- mean_f1 test micro: {}'.format(mean_f1))
mean_f1 = f1_score(y_test, y_preds, average='macro')
print('mean_f1 test macro', mean_f1)
logging.warning('--- mean_f1 test macro: {}'.format(mean_f1))
print('Time of svm', time.time() - beginTime)
def kmeans(self):
beginTime = time.time()
print('3: kmeans Begin')
model = self.model[self.model.vocab]
print('model', model)
kclusterer = KMeansClusterer(self.clusteringNum, distance=nltk.cluster.util.cosine_distance,
repeats=25, avoid_empty_clusters=True)
clusters = kclusterer.cluster(model, assign_clusters=True)
print('clusters', clusters)
self.cluster = dict(zip(list(self.model.vocab), clusters))
print('self.clusters', self.cluster)
# realClass = nx.get_node_attributes(self.nodeGraph, 'class')
# print('realClass', realClass)
# realClassSet = set(realClass.values())
# print('realClassSet', realClassSet)
# clustersSet = set(clusters)
# print('clustersSet', clustersSet)
# classMapping = np.zeros((len(clustersSet), len(realClassSet)))
# for x in self.nodeGraph.nodes():
# classMapping[int(self.cluster[x])][int(realClass[x])] += 1
# print('classMapping', classMapping)
# orderArray = [x for x in range(len(clustersSet))]
# allOrder = list(itertools.permutations(orderArray))
# maxSum = 0
# maxIdx = []
# minD = min(len(classMapping), len(classMapping[0]))
# for x in allOrder:
# subMaxSum = 0
# for idx, val in enumerate(x):
# if idx < minD:
# subMaxSum += classMapping[val][idx]
# if subMaxSum > maxSum:
# maxSum = subMaxSum
# maxIdx = x
# print('maxSum: ', maxSum)
# logging.warning('maxSum: {}/{}'.format(maxSum, len(self.nodeGraph.nodes())))
# clusters = [maxIdx[x] for x in clusters]
# print('clusters', clusters)
# logging.warning('Class: {}'.format(clusters))
# logging.warning('Truth: {}'.format(realClass.values()))
# self.cluster = dict(zip(list(self.model.vocab), clusters))
# tot = 0
# for x in self.nodeGraph.nodes():
# if self.cluster[x] == int(realClass[x]):
# tot += 1
# print('tot', tot)
# print('classMapping', classMapping)
# staticMap = np.zeros((len(classMapping[0]), 3))
# print('staticMap', staticMap)
# for i in range(0, len(classMapping[0])):
# staticMap[i][0] = classMapping[:, i].argmax()
# staticMap[i][1] = i
# staticMap[i][2] = np.max(classMapping[:, i])
# print('staticMap', staticMap)
# mapTo = np.zeros(len(staticMap))
# for j in range(0, len(staticMap)):
# m = staticMap[:, 2].argmax()
# if staticMap[m][0] != staticMap[m][1]:
# if mapTo[staticMap[m][1]] == 0:
# mapTo[staticMap[m][1]] = staticMap[m][0]
# staticMap[m][2] = 0
# else:
#
# print('mapTo', mapTo)
print('Time of kmeans', time.time() - beginTime)
# self.saNodeColors = KMeans(n_clusters=3, random_state=9).fit_predict(embeddings_2d)
def em(self):
beginTime = time.time()
# todo
print('Time of Gaussian EM', time.time() - beginTime)
def gaa(self):
beginTime = time.time()
# todo
print('Time of Group Average Agglomerative', time.time() - beginTime)
def reducting(self):
print('reductionMethod', self.reductionMethod)
if self.reductionMethod == 't-SNE':
self.tsne()
elif self.reductionMethod == 'NONE':
return
elif self.reductionMethod == 'Isomap':
self.isomap()
elif self.reductionMethod == 'LLE':
self.lle()
elif self.reductionMethod == 'MLLE':
self.mlle()
elif self.reductionMethod == 'HLLE':
self.hlle()
elif self.reductionMethod == 'Spectral':
self.spectral()
elif self.reductionMethod == 'LTSA':
self.ltsa()
elif self.reductionMethod == 'MDS':
self.mds()
self.nodeXs.clear()
self.nodeEdges.clear()
self.attriEdges.clear()
self.cur_xlim = ''
def hsca(self):
crs = open("HSCA_D128_FEATURES_CORA.txt", "r")
rows = [row.strip().split(',') for row in crs]
embeddings = np.array(rows)
embeddings = embeddings.astype(np.float)
print('hsca embedding', embeddings)
tsne = TSNE(n_components=2, perplexity=30, early_exaggeration=30) # random_state=7,
embeddings_2d = tsne.fit_transform(embeddings)
print('embedding_2d', embeddings_2d)
nodePosition = dict(zip(self.attriGraph.nodes(), embeddings_2d))
print('nodePosition', nodePosition)
self.nodePosition.update(nodePosition)
def tsne(self):
beginTime = time.time()
print('2: TSNE Begin')
if self.model == '':
msg = QMessageBox()
msg.setIcon(QMessageBox.Warning)
msg.setText("These is no model.")
msg.setWindowTitle("Warning")
msg.setStandardButtons(QMessageBox.Ok)
msg.exec_()
return
gn = [x for x in self.model.vocab if len(x) > 0]
embeddings = np.array([self.model[x] for x in gn])
# print('embeddings', embeddings)
tsne = TSNE(n_components=2, perplexity=30, early_exaggeration=30) # random_state=7,
embeddings_2d = tsne.fit_transform(embeddings)
# print('embeddings_2d', embeddings_2d)
# todo: fix reducting (position) or fix embedding method
# self.nodePosition = dict(zip(self.model.vocab, embeddings_2d))
nodePosition = dict(zip(self.model.vocab, embeddings_2d))
# print('nodePosition', nodePosition)
self.nodePosition.update(nodePosition)
# print('nodePosition', self.nodePosition)
print('Time of TSNE', time.time() - beginTime)
def isomap(self):
beginTime = time.time()
# todo
print('Time of Isomap', time.time() - beginTime)
def lle(self):
beginTime = time.time()
# todo
print('Time of Locally linear embedding', time.time() - beginTime)
def mlle(self):
beginTime = time.time()
# todo
print('Time of Modified Locally Linear Embedding', time.time() - beginTime)
def hlle(self):
beginTime = time.time()
# todo
print('Time of Hessian Eigenmapping', time.time() - beginTime)
def spectral(self):
beginTime = time.time()
# todo
print('Time of Spectral Embedding', time.time() - beginTime)
def ltsa(self):
beginTime = time.time()
# todo
print('Time of Local tangent space alignment', time.time() - beginTime)
def mds(self):
beginTime = time.time()
# todo
print('Time of Multi-dimensional Scaling', time.time() - beginTime)
def colorMapping(self):
needRefresh = False
if self.colorMethod == 'Unitary':
self.saNodeColors = NODE_SIZE
needRefresh = True
elif self.colorMethod == 'Attribute':
needRefresh = self.attri2Color()
elif self.colorMethod == 'Cluster':
needRefresh = self.cluster2Color()
elif self.colorMethod == 'Class':
needRefresh = self.class2Color()
if needRefresh:
self.nodeColors.clear()
self.plot()
def cluster2Color(self):
print('4: cluster2Color Begin')
if self.cluster:
c2c = [self.clusterColors[v % len(self.clusterColors)] for v in self.cluster.values()]
# print('c2c', c2c)
self.saNodeColors = dict(zip(self.cluster.keys(), c2c))
# print('nodeColors', self.saNodeColors)
return True
return False
def attriMean(self, v):
if isinstance(v, list):
# todo multi label color mapping
if len(v) > 0:
return v[0]
else:
return 0
else:
return v
def attri2Color(self):
if self.attriGraph == '':
return False
self.graph = self.attriGraph
print('4: attri2Color Begin')
if nx.get_node_attributes(self.graph, 'value'):
# todo value is not num. v.isnumeric()
c2c = [self.clusterColors[int(self.attriMean(v)) % len(self.clusterColors)] for v in
nx.get_node_attributes(self.graph, 'value').values()]
print('c2c', c2c)
self.saNodeColors = dict(zip(self.graph.nodes(), c2c))
return True
# print('nodeColors', self.saNodeColors)
return False
def class2Color(self):
if self.attriGraph == '':
return False
self.graph = self.attriGraph
print('4: class2Color Begin')
if nx.get_node_attributes(self.graph, 'class'):
# todo value is not num. v.isnumeric()
c2c = [self.clusterColors[int(self.attriMean(v)) % len(self.clusterColors)] for v in
nx.get_node_attributes(self.graph, 'class').values()]
print('c2c', c2c)
self.saNodeColors = dict(zip(self.graph.nodes(), c2c))
return True
# print('nodeColors', self.saNodeColors)
return False
def sizeMapping(self):
needRefresh = False
if self.sizeMethod == 'Unitary':
self.saNodeSizes = NODE_SIZE
needRefresh = True
elif self.sizeMethod == 'Degree':
needRefresh = self.degree2Size()
elif self.sizeMethod == 'Class':
needRefresh = self.class2Size()
if needRefresh:
self.nodeSizes.clear()
self.plot()
def degree2Size(self):
d2s = []
if self.attriGraph:
d2s = [nx.degree(self.attriGraph)[x] * 10 if nx.degree(self.attriGraph)[x] * 10 > 50 else 50
for x in self.attriGraph.nodes()]
elif self.nodeGraph:
d2s = [nx.degree(self.graph)[x] * 10 if nx.degree(self.graph)[x] * 10 > 50 else 50
for x in self.nodeGraph.nodes()]
else:
return False
self.saNodeSizes = dict(zip(self.nodeGraph.nodes(), d2s))
# print('nodeSizes', self.saNodeSizes)
return True
def class2Size(self):
if self.attriGraph == '':
return False
self.graph = self.attriGraph
print('4: attri2Color Begin')
if nx.get_node_attributes(self.graph, 'class'):
# todo value is not num. v.isnumeric()
cv = nx.get_node_attributes(self.graph, 'class').values()
c2s = [len(v) * 10 if len(v) > 0 else 5 for v in cv]
print('c2s', c2s)
self.saNodeSizes = dict(zip(self.graph.nodes(), c2s))
return True
# print('nodeColors', self.saNodeColors)
return False
def shapeMethodChanged(self):
self.shapeMethod = self.sender().currentText()
self.shapeMapping()
def shapeMapping(self):
needRefresh = False
if self.shapeMethod == 'Unitary':
self.saNodeShapes = NODE_SHAPE
needRefresh = True
elif self.shapeMethod == 'Value':
needRefresh = self.attri2Shape()
elif self.shapeMethod == 'Class':
needRefresh = self.class2Shape()
if needRefresh:
self.nodeShapes.clear()
self.plot()
def attri2Shape(self):
if self.attriGraph == '':
return False
self.graph = self.attriGraph
print('4: attri2Shape Begin')
if nx.get_node_attributes(self.graph, 'value'):
# todo value is not num. v.isnumeric()
vv = nx.get_node_attributes(self.graph, 'value').values()
v2s = [shape[int(v)] for v in vv]
# print('v2s', v2s)
self.saNodeShapes = dict(zip(self.graph.nodes(), v2s))
return True
# print('nodeColors', self.saNodeColors)
return False
def class2Shape(self):
if self.attriGraph == '':
return False
self.graph = self.attriGraph
print('4: class2Shape Begin')
if nx.get_node_attributes(self.graph, 'class'):
# todo value is not num. v.isnumeric()
cs = nx.get_node_attributes(self.graph, 'class').values()
c2s = [shape[int(v)] for v in cs]
print('c2s', c2s)
self.saNodeShapes = dict(zip(self.graph.nodes(), c2s))
return True
# print('nodeColors', self.saNodeColors)
return False
def graphLayout(self):
if self.graph == '':
return
# self.nodePosition = graphviz_layout(self.graph, prog="sfdp")
if 0:
self.nodePosition = nx.spring_layout(self.graph, iterations=200)
else:
forceatlas2 = ForceAtlas2(
# Behavior alternatives
outboundAttractionDistribution=False, # Dissuade hubs
linLogMode=False, # NOT IMPLEMENTED
adjustSizes=False, # Prevent overlap (NOT IMPLEMENTED)
edgeWeightInfluence=1.0,
# Performance
jitterTolerance=1.0, # Tolerance
barnesHutOptimize=True,
barnesHutTheta=1.2,
multiThreaded=False, # NOT IMPLEMENTED
# Tuning
scalingRatio=2.0,
strongGravityMode=False,
gravity=1.0,
# Log
verbose=True)
self.nodePosition = forceatlas2.forceatlas2_networkx_layout(self.graph, pos=None, iterations=1)
def curFunction(self, xlim, ylim):
# print('x,y', xlim, ylim)
self.cur_xlim = xlim
self.cur_ylim = ylim
if self.oax:
if self.oax.patches:
self.overviewRec.remove()
self.overviewRec = patches.Rectangle((xlim[0], ylim[0]), xlim[1] - xlim[0], ylim[1] - ylim[0], fill=False)
# print('self.overviewRec', self.overviewRec)
self.oax.add_patch(self.overviewRec)
# self.overviewRec = self.oax.bar(xlim[0], ylim[1]-ylim[0], xlim[1]-xlim[0], ylim[0], align='edge', zorder=3)[0]
# dr = DraggableRectangle(self.overviewRec)
# dr.connect()
self.overViewWidget.plot()
def plot(self):
return
Attri2vecTest('./datasets/webkb')
#Attri2vecTest()
print('Finish.')
1
https://gitee.com/shaunjm/FANE.git
git@gitee.com:shaunjm/FANE.git
shaunjm
FANE
FANE
master

搜索帮助