From 415064ff3686499a057cf502ea1f8069d7f00d2c Mon Sep 17 00:00:00 2001 From: Hanyan Yin Date: Thu, 7 Aug 2025 10:07:15 +0800 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20=E8=AF=BE=E9=A2=982=EF=BC=9A?= =?UTF-8?q?=E8=A7=A3=E5=86=B3SWFD=E7=AE=97=E5=AD=90=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E8=B6=85=E6=97=B6=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/SlidingWindowFD/DSFD.py | 5 +++-- StreamLearn/Dataset/FDDataset.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/StreamLearn/Algorithm/SlidingWindowFD/DSFD.py b/StreamLearn/Algorithm/SlidingWindowFD/DSFD.py index 3bf9a01..8551ffc 100644 --- a/StreamLearn/Algorithm/SlidingWindowFD/DSFD.py +++ b/StreamLearn/Algorithm/SlidingWindowFD/DSFD.py @@ -123,8 +123,9 @@ class SeqDSFD(StreamEstimator): direction = direction / linalg.norm(direction) predict = self.predict(direction) ground_truth = linalg.norm(A @ direction, 2) ** 2 - loss = self.evaluate(predict, ground_truth) - print("GDRO", i, loss) + + loss = self.evaluate(predict, ground_truth) + print("DSFD", i, loss) def get(self): j = 0 diff --git a/StreamLearn/Dataset/FDDataset.py b/StreamLearn/Dataset/FDDataset.py index 0c94189..5c0b9ec 100644 --- a/StreamLearn/Dataset/FDDataset.py +++ b/StreamLearn/Dataset/FDDataset.py @@ -14,7 +14,7 @@ class FDDataset(StreamDataset): "StreamLearn/Dataset/subject103.dat", delim_whitespace=True ) A = df.values.astype(np.float64) - A = A[:, 2:] + A = A[:20000, 2:] A[np.isnan(A)] = 1 self.m, self.d = A.shape Rs = np.linalg.norm(A, axis=1) ** 2 -- Gitee From fa5d798127913bcafe88a36b343e00d6c2eb4de6 Mon Sep 17 00:00:00 2001 From: Hanyan Yin Date: Fri, 8 Aug 2025 15:31:00 +0800 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20=E8=AF=BE=E9=A2=982=EF=BC=9ADGNN?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/DecoupledDGNN/DGNN.py | 238 +++++------------- .../Algorithm/DecoupledDGNN/graph_embs.py | 0 StreamLearn/Algorithm/DecoupledDGNN/model.py | 23 +- StreamLearn/Algorithm/DecoupledDGNN/utils.py | 35 +-- StreamLearn/Config/DGNN.py | 2 +- StreamLearn/Dataset/DTDGsDataset.py | 3 +- 6 files changed, 99 insertions(+), 202 deletions(-) mode change 100755 => 100644 StreamLearn/Algorithm/DecoupledDGNN/DGNN.py mode change 100755 => 100644 StreamLearn/Algorithm/DecoupledDGNN/graph_embs.py mode change 100755 => 100644 StreamLearn/Algorithm/DecoupledDGNN/model.py mode change 100755 => 100644 StreamLearn/Algorithm/DecoupledDGNN/utils.py mode change 100755 => 100644 StreamLearn/Dataset/DTDGsDataset.py diff --git a/StreamLearn/Algorithm/DecoupledDGNN/DGNN.py b/StreamLearn/Algorithm/DecoupledDGNN/DGNN.py old mode 100755 new mode 100644 index e6ec604..263332f --- a/StreamLearn/Algorithm/DecoupledDGNN/DGNN.py +++ b/StreamLearn/Algorithm/DecoupledDGNN/DGNN.py @@ -14,6 +14,7 @@ from torch.utils.data import DataLoader import pickle import math import random +import os from StreamLearn.Base.SemiEstimator import StreamEstimator from StreamLearn.Algorithm.DecoupledDGNN.utils import ( @@ -24,37 +25,6 @@ from StreamLearn.Algorithm.DecoupledDGNN.model import ( linkPredictor, ) -def edge_index_difference(edge_all, edge_except, num_nodes): - """Set difference operator, return edges in edge_all but not - in edge_except. - """ - idx_all = edge_all[0] * num_nodes + edge_all[1] - idx_except = edge_except[0] * num_nodes + edge_except[1] - mask=np.isin(idx_all, idx_except) - idx_kept = idx_all[~mask] - ii = idx_kept // num_nodes - jj = idx_kept % num_nodes - return np.vstack((ii,jj)).astype(np.int64) - -def gen_negative_edges(sources, destinations, num_nodes, num_neg_per_node): - """Generates a fixed number of negative edges for each node. - - Args: - sources: (E) array of positive edges' sources. - destinations: (E) array of positive edges' destinations. - num_nodes: total number of nodes. - num_neg_per_node: approximate number of negative edges generated for - each source node in edge_index. - """ - src_lst = np.unique(sources) # get unique senders. - pos_edge_index = np.vstack((sources, destinations)) - num_neg_per_node = int(1.5 * num_neg_per_node) # add some redundancy. - ii = src_lst.repeat(num_neg_per_node) - jj = np.random.choice(num_nodes, len(ii), replace=True) - candidates = np.vstack((ii, jj)).astype(np.int64) - neg_edge_index = edge_index_difference(candidates, pos_edge_index, num_nodes) - return neg_edge_index[0], neg_edge_index[1] - class DGNN(StreamEstimator): def __init__(self, args): self.args = args @@ -80,12 +50,18 @@ class DGNN(StreamEstimator): self.shuffle = args.shuffle self.checkpt_path = args.checkpt_path + os.makedirs(self.checkpt_path, exist_ok=True) self.checkpt_file = self.checkpt_path + '/' + self.data + '_ws'+str(self.window_size)+'_best.pt' self.patience = args.patience def fit(self, stream_dataset): self.train() + def test(self, stream_dataset): + print('begin test...load model at epoch ', self.best_epoch) + test_loss, test_acc, test_auc, test_ap = self.eval(self.test_data) + print("Test loss: %.5f, Test accuracy: %.5f, Test AUC: %.5f, Test AP: %.5f" % (test_loss, test_acc, test_auc, test_ap)) + def predict(self, X): pass @@ -93,15 +69,15 @@ class DGNN(StreamEstimator): pass def train(self): - full_data, train_data, val_data, test_data, _, _ = get_data_transductive('./dataset', self.data, shuffle=self.shuffle, disperse=True) + self.full_data, train_data, val_data, self.test_data, _, _ = get_data_transductive('./dataset', self.data, shuffle=self.shuffle, disperse=True) print('window_size: ', self.window_size) - self.edge_helper = EdgeHelper(self.data,randomize=True, disperse=True, use_neg=self.use_neg, inductive=self.inductive, split=False) + self.edge_helper = EdgeHelper('./dataset', self.data,randomize=True, disperse=True, use_neg=self.use_neg, inductive=self.inductive, split=False) criterion = nn.BCELoss() - optimizer = optim.RMSprop(self.model.parameters(), lr=args.learning_rate) - best_ap, best_epoch, best_loss = 0, 0, 10000. + optimizer = optim.RMSprop(self.model.parameters(), lr=self.args.learning_rate) + best_ap, self.best_epoch, best_loss = 0, 0, 10000. bad_counter = 0 - for epoch in range(args.epochs): + for epoch in range(self.args.epochs): self.model.train() correct = 0 num_samples = 0 @@ -116,53 +92,55 @@ class DGNN(StreamEstimator): for batch_idx in range(num_batch): start_idx = batch_idx * self.batch_size end_idx = min(num_instance, start_idx + self.batch_size) - sources_batch, destinations_batch = edges_snap[start_idx:end_idx, 0], edges_snap[start_idx:end_idx, 1] + sources_batch, destinations_batch = edges_snap[start_idx:end_idx, 0], edges_snap[start_idx:end_idx, + 1] size = len(sources_batch) timestamps_batch = np.repeat(time, size) - - if args.seq_model in ['lstm', 'gru']: - ### get pos features (sources_batch, destinations_batch, timestamps_batch) - src_features, dst_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - pos_preds = self.model.get_edges_embedding(src_features.to(self.device), dst_features.to(self.device)) - elif args.seq_model == 'transformer': - pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - pos_preds = self.model(pos_features.to(self.device)) + pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, + window_size=self.window_size, concat=True) + pos_preds = self.model(pos_features.to(self.device)) pos_labels = torch.ones(pos_preds.shape[0], dtype=torch.float, device=self.device) - pos_loss = criterion(pos_preds.squeeze(dim=1), pos_labels) ##pos_loss = -torch.log(pos_preds[:, 1]) + pos_loss = criterion(pos_preds.squeeze(dim=1), pos_labels) ### get negtive sample and features neg_destinations_batch = np.random.randint(0, self.edge_helper.node_num, size) - if args.seq_model in ['lstm', 'gru']: - neg_src_features, neg_dst_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - neg_preds = self.model.get_edges_embedding(neg_src_features.to(self.device), neg_dst_features.to(self.device)) - elif args.seq_model == 'transformer': - neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - neg_preds = self.model(neg_features.to(self.device)) + neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, + timestamps_batch, window_size=self.window_size, + concat=True) + neg_preds = self.model(neg_features.to(self.device)) neg_labels = torch.zeros(neg_preds.shape[0], dtype=torch.float, device=self.device) - neg_loss = criterion(neg_preds.squeeze(dim=1), neg_labels) ##neg_loss = -torch.log(neg_preds[:, 0]) + neg_loss = criterion(neg_preds.squeeze(dim=1), neg_labels) # backward - loss = pos_loss + neg_loss ##loss = torch.mean(pos_loss + neg_loss) + loss = pos_loss + neg_loss all_loss.append(loss.item()) - + optimizer.zero_grad() loss.backward() optimizer.step() - TP = torch.sum(pos_preds>=0.5) - TN = torch.sum(neg_preds<0.5) + TP = torch.sum(pos_preds >= 0.5) + TN = torch.sum(neg_preds < 0.5) correct += (TP + TN).item() num_samples += (pos_preds.shape[0] + neg_preds.shape[0]) train_loss = np.mean(all_loss) train_acc = correct / num_samples - valid_loss, valid_acc, valid_auc, valid_ap = self.valid(val_data) - print("Epoch: %d, loss: %.5f, Train accuracy: %.5f, Valid loss: %.5f, Valid accuracy: %.5f, Valid AUC: %.5f, Valid AP: %.5f" % (epoch+1, train_loss, train_acc, valid_loss, valid_acc, valid_auc, valid_ap)) + if epoch > 999: + valid_loss, valid_acc, valid_auc, valid_ap, valid_mrr = self.valid(val_data, cal_mrr=True) + print( + "Epoch: %d, loss: %.5f, Train accuracy: %.5f, Valid loss: %.5f, Valid accuracy: %.5f, Valid AUC: %.5f, Valid AP: %.5f, MRR: %.5f" % ( + epoch + 1, train_loss, train_acc, valid_loss, valid_acc, valid_auc, valid_ap, valid_mrr)) + else: + valid_loss, valid_acc, valid_auc, valid_ap = self.valid(val_data) + print( + "Epoch: %d, loss: %.5f, Train accuracy: %.5f, Valid loss: %.5f, Valid accuracy: %.5f, Valid AUC: %.5f, Valid AP: %.5f" % ( + epoch + 1, train_loss, train_acc, valid_loss, valid_acc, valid_auc, valid_ap)) if valid_ap > best_ap: best_ap = valid_ap - best_epoch = epoch + self.best_epoch = epoch torch.save(self.model.state_dict(), self.checkpt_file) bad_counter = 0 else: @@ -170,19 +148,14 @@ class DGNN(StreamEstimator): if bad_counter == self.patience: break - print('begin test...') - print(best_epoch) - test_loss, test_acc, test_auc, test_ap, test_mrr = self.test(test_data) - print("Test loss: %.5f, Test accuracy: %.5f, Test AUC: %.5f, Test AP: %.5f, MRR: %.5f" % (test_loss, test_acc, test_auc, test_ap, test_mrr)) - - def valid(self, val_data, cal_mrr=False): + def valid(self, val_data): criterion = nn.BCELoss() self.model.eval() correct = 0 num_samples = 0 all_loss = [] - val_batchsize = 2*self.batch_size + val_batchsize = 2 * self.batch_size y_true, y_pred = [], [] mrr_hist = [] for time in val_data.unique_times: @@ -196,32 +169,26 @@ class DGNN(StreamEstimator): size = len(sources_batch) timestamps_batch = np.repeat(time, size) - if args.seq_model in ['lstm', 'gru']: - src_features, dst_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - pos_preds = self.model.get_edges_embedding(src_features.to(self.device), dst_features.to(self.device)) - elif args.seq_model == 'transformer': - pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - pos_preds = self.model(pos_features.to(self.device)) + pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, + window_size=self.window_size, concat=True) + pos_preds = self.model(pos_features.to(self.device)) pos_labels = torch.ones(pos_preds.shape[0], dtype=torch.float, device=self.device) pos_loss = criterion(pos_preds.squeeze(dim=1), pos_labels) ### get negtive sample and features neg_destinations_batch = np.random.randint(0, self.edge_helper.node_num, size) - if args.seq_model in ['lstm', 'gru']: - neg_src_features, neg_dst_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - neg_preds = self.model.get_edges_embedding(neg_src_features.to(self.device), neg_dst_features.to(self.device)) - elif args.seq_model == 'transformer': - neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - neg_preds = self.model(neg_features.to(self.device)) + neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, + window_size=self.window_size, concat=True) + neg_preds = self.model(neg_features.to(self.device)) neg_labels = torch.zeros(neg_preds.shape[0], dtype=torch.float, device=self.device) neg_loss = criterion(neg_preds.squeeze(dim=1), neg_labels) - loss = pos_loss + neg_loss ##loss = torch.mean(pos_loss + neg_loss) + loss = pos_loss + neg_loss all_loss.append(loss.item()) - - TP = torch.sum(pos_preds>=0.5) - TN = torch.sum(neg_preds<0.5) + + TP = torch.sum(pos_preds >= 0.5) + TN = torch.sum(neg_preds < 0.5) correct += (TP + TN).item() num_samples += (pos_preds.shape[0] + neg_preds.shape[0]) @@ -232,19 +199,12 @@ class DGNN(StreamEstimator): y_pred.append(neg_preds[i].item()) y_true.append(0) - if cal_mrr: - # calculate MRR for each snap - mrr, recall_at = self.eval_mrr_and_recall(edges_snap, np.repeat(time, edges_snap.shape[0]), self.edge_helper.node_num) - mrr_hist.append(mrr) + auc = self.auc_score(y_true, y_pred) ap = self.ap_score(y_true, y_pred) valid_loss = np.mean(all_loss) valid_acc = correct / num_samples - if cal_mrr: - valid_mrr = np.mean(mrr_hist) - return valid_loss, valid_acc, auc, ap, valid_mrr - else: - return valid_loss, valid_acc, auc, ap + return valid_loss, valid_acc, auc, ap def auc_score(self, y_true, y_score): ''' use sklearn roc_auc_score API @@ -262,68 +222,14 @@ class DGNN(StreamEstimator): ap = average_precision_score(y_true, y_score) return ap - def eval_mrr_and_recall(self, eval_edges, eval_timestamps, num_nodes, num_neg_per_node=1000): - from datetime import datetime - start = datetime.now() - eval_sources, eval_destinations = eval_edges[:, 0], eval_edges[:, 1] - - # A list of source nodes to consider. - src_lst = np.unique(eval_sources) # get unique senders. - num_users = len(src_lst) - - src_features, dst_features = self.edge_helper.get_edges_feats(eval_sources, eval_destinations, eval_timestamps, window_size=self.window_size, concat=False) - pos_preds = self.model.get_edges_embedding(src_features.to(self.device), dst_features.to(self.device)) - pos_preds = pos_preds.squeeze(dim=1) - pos_labels = torch.ones(pos_preds.shape[0], dtype=torch.float, device=self.device) - - # generate negtive samples - neg_sources, neg_destinations = gen_negative_edges(eval_sources, eval_destinations, num_nodes, num_neg_per_node) - neg_timestamps = np.resize(eval_timestamps, neg_sources.shape) - neg_src_features, neg_dst_features = self.edge_helper.get_edges_feats(neg_sources, neg_destinations, neg_timestamps, window_size=self.window_size, concat=False) - neg_preds = self.model.get_edges_embedding(neg_src_features.to(self.device), neg_dst_features.to(self.device)) - neg_preds = neg_preds.squeeze(dim=1) - neg_labels = torch.zeros(neg_preds.shape[0], dtype=torch.float, device=self.device) - - # The default setting, consider the rank of the most confident edge. - from torch_scatter import scatter_max - best_p_pos, _ = scatter_max(src=pos_preds, index=torch.from_numpy(eval_sources).to(self.device), dim_size=num_nodes) - # best_p_pos has shape (num_nodes), for nodes not in src_lst has value 0. - best_p_pos_by_user = best_p_pos[src_lst] - - uni, counts = np.unique(neg_sources,return_counts=True) - # find index of first occurrence of each src in neg_sources - first_occ_idx = np.cumsum(counts,axis=0) - counts - add = np.arange(num_neg_per_node) - # take the first $num_neg_per_node$ negative edges from each src. - score_idx = first_occ_idx.reshape(-1, 1) + add.reshape(1, -1) - score_idx = torch.from_numpy(score_idx).long() - p_neg_by_user = neg_preds[score_idx] # (num_users, num_neg_per_node) - - compare = (p_neg_by_user >= best_p_pos_by_user.reshape(num_users, 1)).float() - assert compare.shape == (num_users, num_neg_per_node) - # compare[i, j], for node i, the j-th negative edge's score > p_best. - - # counts 1 + how many negative edge from src has higher score than p_best. - # if there's no such negative edge, rank is 1. - rank_by_user = compare.sum(axis=1) + 1 # (num_users,) - assert rank_by_user.shape == (num_users,) - - mrr = float(torch.mean(1 / rank_by_user)) - print(f'MRR={mrr}, time taken: {(datetime.now() - start).seconds} s') - - recall_at = dict() - for k in [1, 3, 10]: - recall_at[k] = float((rank_by_user <= k).float().mean()) - return mrr, recall_at - - def test(self, test_data): + def eval(self, test_data): self.model.load_state_dict(torch.load(self.checkpt_file)) self.model.eval() criterion = nn.BCELoss() correct = 0 num_samples = 0 all_loss = [] - test_batchsize = 2*self.batch_size + test_batchsize = 2 * self.batch_size y_true, y_pred = [], [] mrr_hist = [] for time in test_data.unique_times: @@ -337,31 +243,24 @@ class DGNN(StreamEstimator): sources_batch, destinations_batch = edges_snap[start_idx:end_idx, 0], edges_snap[start_idx:end_idx, 1] size = len(sources_batch) timestamps_batch = np.repeat(time, size) - if args.seq_model in ['lstm', 'gru']: - ### get pos features (sources_batch, destinations_batch, timestamps_batch) - src_features, dst_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - pos_preds = self.model.get_edges_embedding(src_features.to(self.device), dst_features.to(self.device)) - elif args.seq_model == 'transformer': - pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - pos_preds = self.model(pos_features.to(self.device)) + pos_features = self.edge_helper.get_edges_feats(sources_batch, destinations_batch, timestamps_batch, + window_size=self.window_size, concat=True) + pos_preds = self.model(pos_features.to(self.device)) pos_labels = torch.ones(pos_preds.shape[0], dtype=torch.float, device=self.device) - pos_loss = criterion(pos_preds.squeeze(dim=1), pos_labels) ##pos_loss = -torch.log(pos_preds[:, 1]) + pos_loss = criterion(pos_preds.squeeze(dim=1), pos_labels) ### get negtive sample and features neg_destinations_batch = np.random.randint(0, self.edge_helper.node_num, size) - if args.seq_model in ['lstm', 'gru']: - neg_src_features, neg_dst_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=False) - neg_preds = self.model.get_edges_embedding(neg_src_features.to(self.device), neg_dst_features.to(self.device)) - elif args.seq_model == 'transformer': - neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, window_size=self.window_size, concat=True) - neg_preds = self.model(neg_features.to(self.device)) + neg_features = self.edge_helper.get_edges_feats(sources_batch, neg_destinations_batch, timestamps_batch, + window_size=self.window_size, concat=True) + neg_preds = self.model(neg_features.to(self.device)) neg_labels = torch.zeros(neg_preds.shape[0], dtype=torch.float, device=self.device) neg_loss = criterion(neg_preds.squeeze(dim=1), neg_labels) - loss = pos_loss + neg_loss ##loss = torch.mean(pos_loss + neg_loss) + loss = pos_loss + neg_loss all_loss.append(loss.item()) - - TP = torch.sum(pos_preds>=0.5) - TN = torch.sum(neg_preds<0.5) + + TP = torch.sum(pos_preds >= 0.5) + TN = torch.sum(neg_preds < 0.5) correct += (TP + TN).item() num_samples += (pos_preds.shape[0] + neg_preds.shape[0]) @@ -372,14 +271,9 @@ class DGNN(StreamEstimator): y_pred.append(neg_preds[i].item()) y_true.append(0) - # calculate MRR for each snap - mrr, recall_at = self.eval_mrr_and_recall(edges_snap, np.repeat(time, edges_snap.shape[0]), self.edge_helper.node_num) - mrr_hist.append(mrr) - test_auc = self.auc_score(y_true, y_pred) test_ap = self.ap_score(y_true, y_pred) test_loss = np.mean(all_loss) test_acc = correct / num_samples - test_mrr = np.mean(mrr_hist) - return test_loss, test_acc, test_auc, test_ap, test_mrr + return test_loss, test_acc, test_auc, test_ap diff --git a/StreamLearn/Algorithm/DecoupledDGNN/graph_embs.py b/StreamLearn/Algorithm/DecoupledDGNN/graph_embs.py old mode 100755 new mode 100644 diff --git a/StreamLearn/Algorithm/DecoupledDGNN/model.py b/StreamLearn/Algorithm/DecoupledDGNN/model.py old mode 100755 new mode 100644 index 258a8a3..c6f43b0 --- a/StreamLearn/Algorithm/DecoupledDGNN/model.py +++ b/StreamLearn/Algorithm/DecoupledDGNN/model.py @@ -65,9 +65,8 @@ class MergeLayer(torch.nn.Module): torch.nn.init.xavier_normal_(self.fc1.weight) torch.nn.init.xavier_normal_(self.fc2.weight) - def forward(self, x1, x2): - x = torch.cat([x1, x2], dim=-1) - h = self.act(self.fc1(x[:,-1,:])) + def forward(self, x): + h = self.act(self.fc1(x[:,-1,:])) #x[:,-1,:] for lstm(with out shape: [BS, 20, 64]) out = torch.nn.Sigmoid()(self.fc2(h)) return out @@ -142,19 +141,21 @@ class linkPredictor(nn.ModuleList): self.dropout = args.dropout self.embedding_module_type = args.seq_model print('self.embedding_module_type: ', self.embedding_module_type) - if self.embedding_module_type=='lstm': - self.embedding_model = LSTM_Emb(self.batch_size, self.hidden_dim, self.LSTM_layers, self.nodes_embedding_size, self.dropout, device) - elif self.embedding_module_type=='gru': - self.embedding_model = GRU_Emb(self.batch_size, self.hidden_dim, self.LSTM_layers, self.nodes_embedding_size, self.dropout, device) + if self.embedding_module_type == 'lstm': + self.embedding_model = LSTM_Emb(self.batch_size, self.hidden_dim * 2, self.LSTM_layers, + self.nodes_embedding_size * 2, self.dropout, device) + elif self.embedding_module_type == 'gru': + self.embedding_model = GRU_Emb(self.batch_size, self.hidden_dim * 2, self.LSTM_layers, + self.nodes_embedding_size * 2, self.dropout, device) self.decoder = MergeLayer(self.hidden_dim, self.hidden_dim, self.hidden_dim, 1) def get_nodes_embedding(self, source_feats, destination_feats): src_emb = self.embedding_model(source_feats) dst_emb = self.embedding_model(destination_feats) return src_emb, dst_emb - - def get_edges_embedding(self, source_feats, destination_feats): - src_emb, dst_emb = self.get_nodes_embedding(source_feats, destination_feats) - edge_prob = self.decoder(src_emb, dst_emb) + + def forward(self, edge_feats): + edge_emb = self.embedding_model(edge_feats) + edge_prob = self.decoder(edge_emb) return edge_prob diff --git a/StreamLearn/Algorithm/DecoupledDGNN/utils.py b/StreamLearn/Algorithm/DecoupledDGNN/utils.py old mode 100755 new mode 100644 index 3062806..178547d --- a/StreamLearn/Algorithm/DecoupledDGNN/utils.py +++ b/StreamLearn/Algorithm/DecoupledDGNN/utils.py @@ -38,7 +38,7 @@ class EdgeHelper(): self.get_nodes_seq_lst(randomize,disperse,inductive) def get_time_edges(self,disperse=False, inductive=False): - disperse_str = '_disperse' if self.disperse else '' + disperse_str = '_disperse' if disperse else '' if self.split: history = 1 for i, ss in enumerate(['train', 'valid', 'test']): @@ -66,8 +66,8 @@ class EdgeHelper(): self.time_lst[idx+1] = time def get_nodes_seq_lst(self, randomize=True, disperse=False, inductive=False): - rand_str = '_randomize' if self.randomize_features else '' - disperse_str = '_disperse' if self.disperse else '' + rand_str = '_randomize' if randomize else '' + disperse_str = '_disperse' if disperse else '' if self.dataset_name in ['CollegeMsg', 'bitcoinalpha', 'bitcoinotc'] and self.use_neg: file1 = os.path.join(self.path, self.dataset_name, f"{self.dataset_name}_nodes_seq_lst{rand_str}_mul{disperse_str}.pkl") # 1-alpha file2 = os.path.join(self.path, self.dataset_name, f"{self.dataset_name}_nodes_seq_lst{rand_str}_mul{disperse_str}_alpha-1.pkl") # alpha-1 @@ -84,31 +84,34 @@ class EdgeHelper(): nodes_seq_lst_file = os.path.join(self.path, self.dataset_name, f"{self.dataset_name}_nodes_seq_lst{rand_str}_mul_inductive{disperse_str}.pkl") else: nodes_seq_lst_file = os.path.join(self.path, self.dataset_name, f"{self.dataset_name}_nodes_seq_lst{rand_str}_mul{disperse_str}.pkl") + print("nodes_seq_lst_file: ",nodes_seq_lst_file) with open(nodes_seq_lst_file, 'rb') as f: self.nodes_seq_lst = pickle.load(f) self.node_num = len(self.nodes_seq_lst) def get_edges_feats(self, sources, destinations, timestamps, window_size=5, concat=True): - src_feat_lst = dst_feat_lst = torch.zeros((len(sources), window_size, self.nodes_seq_lst[0].shape[1])) - src_dts_lst = dts_dts_lst = torch.zeros((len(sources), window_size)) - for i, (src, dst, ts) in enumerate(zip(sources, destinations, timestamps)): + src_feat_lst, dst_feat_lst = [], [] + for src, dst, ts in zip(sources, destinations, timestamps): ts = round(ts, 3) ts_id = self.time_edge_dict[ts]['idx'] + if ts_id < window_size: + continue - src_feat, src_prev_ts = self.get_node_feats(src, ts_id, window_size) - dst_feat, dst_prev_ts = self.get_node_feats(dst, ts_id, window_size) - src_feat_lst[i][-len(src_feat):] = src_feat - dst_feat_lst[i][-len(dst_feat):] = dst_feat + src_feat = self.nodes_seq_lst[src][(ts_id-window_size):ts_id, :] + src_feat = torch.tensor(src_feat.toarray(), dtype=torch.float32) - src_dts = ts - src_prev_ts - dst_dts = ts - dst_prev_ts - src_dts_lst[i][-len(src_dts):] = torch.tensor(src_dts, dtype=torch.float32) - dts_dts_lst[i][-len(dst_dts):] = torch.tensor(dst_dts, dtype=torch.float32) + dst_feat = self.nodes_seq_lst[dst][(ts_id-window_size):ts_id, :] + dst_feat = torch.tensor(dst_feat.toarray(), dtype=torch.float32) + + src_feat_lst.append(src_feat) + dst_feat_lst.append(dst_feat) + src_feat_lst=torch.stack(src_feat_lst) + dst_feat_lst=torch.stack(dst_feat_lst) if concat: edges_feats = torch.cat((src_feat_lst, dst_feat_lst), dim=-1) - return edges_feats, src_dts_lst, dts_dts_lst + return edges_feats else: - return src_feat_lst, dst_feat_lst, src_dts_lst, dts_dts_lst + return src_feat_lst, dst_feat_lst def get_node_feats(self, node, tsid, window_size): mask = np.array(self.nodes_seq_lst[node].sum(-1)) != 0 diff --git a/StreamLearn/Config/DGNN.py b/StreamLearn/Config/DGNN.py index 0b423bc..f3dafb6 100644 --- a/StreamLearn/Config/DGNN.py +++ b/StreamLearn/Config/DGNN.py @@ -13,7 +13,7 @@ def parameter_parser(): parser.add_argument("--max_len", type=int, default=20, help="Maximum sequence length per tweet.") parser.add_argument("--seq_model", default='lstm', help="Sequence model type.") parser.add_argument("--window_size", type=int, default=20, help="Window size.") - parser.add_argument("--emb_size", type=int, default=256, help="Embedding size.") + parser.add_argument("--emb_size", type=int, default=128, help="Embedding size.") parser.add_argument("--seed", type=int, default=1024, help="Random seed.") parser.add_argument("--gpu", type=int, default=0, help="ID number of GPU.") parser.add_argument("--patience", type=int, default=3, help="Max number of bad count.") diff --git a/StreamLearn/Dataset/DTDGsDataset.py b/StreamLearn/Dataset/DTDGsDataset.py old mode 100755 new mode 100644 index 6d9aca6..7f7fbf1 --- a/StreamLearn/Dataset/DTDGsDataset.py +++ b/StreamLearn/Dataset/DTDGsDataset.py @@ -4,7 +4,6 @@ import pandas as pd from pathlib import Path import argparse import pickle -import pdb import time import os import subprocess @@ -155,7 +154,7 @@ class DTDGsDataset(StreamDataset): print('num of nodes: ', max_idx+1) newnew_df = disperse_dataset(data_name, new_df) ## split into snapshots - newnew_df.to_csv(OUT_DF) + newnew_df.to_csv(OUT_DF, index=True, index_label="idx") generate_init_graph(OUT_INIT_GRAPH, node_num = max_idx+1) # generate empty graph that contains only self-loop edges build_time_edge_map(newnew_df,OUT_TIME_EDGE_MAP, disperse=disperse) -- Gitee From 90bf79f79f2187ec5208f4c13248ede69f3131b0 Mon Sep 17 00:00:00 2001 From: bwnzheng Date: Fri, 8 Aug 2025 08:04:38 +0000 Subject: [PATCH 3/3] update StreamLearn/Dataset/FDDataset.py. Signed-off-by: bwnzheng --- StreamLearn/Dataset/FDDataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/StreamLearn/Dataset/FDDataset.py b/StreamLearn/Dataset/FDDataset.py index 5c0b9ec..ffc1b98 100644 --- a/StreamLearn/Dataset/FDDataset.py +++ b/StreamLearn/Dataset/FDDataset.py @@ -11,7 +11,7 @@ class FDDataset(StreamDataset): match name: case "pamap": df = pd.read_csv( - "StreamLearn/Dataset/subject103.dat", delim_whitespace=True + "./dataset/subject103.dat", delim_whitespace=True ) A = df.values.astype(np.float64) A = A[:20000, 2:] -- Gitee