r1.6

分支 (17)

标签 (5)

管理

管理

master

r2.7.rc1

r2.3

r2.2

r2.1

r2.0

r1.8

r1.9

r2.0.0-alpha

r1.10

r1.7

r1.6

dynamic_shape

r1.5

r1.3

r1.4

r1.2

v2.2.11

v2.2.10

v2.2.1

v1.6.0

v1.5.0

models
/
official
/
gnn
/
gcn
/
src
/
dataset.py

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
create adjacency matrix, node features, labels, and mask for training.
Cora and Citeseer datasets are supported by our example, the original versions of these datasets are as follows:
@inproceedings{nr,
    title={The Network Data Repository with Interactive Graph Analytics and Visualization},
    author={Ryan A. Rossi and Nesreen K. Ahmed},
    booktitle={AAAI},
    url={http://networkrepository.com},
    year={2015}
}
In this example, we use dataset splits provided by https://github.com/kimiyoung/planetoid (Zhilin Yang, William W. Cohen, Ruslan Salakhutdinov, [Revisiting Semi-Supervised Learning with Graph Embeddings](https://arxiv.org/abs/1603.08861), ICML 2016).
"""
import numpy as np
import scipy.sparse as sp
import mindspore.dataset as ds


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


def get_adj_features_labels(data_dir):
    """Get adjacency matrix, node features and labels from dataset."""
    g = ds.GraphData(data_dir)
    nodes = g.get_all_nodes(0)
    nodes_list = nodes.tolist()
    row_tensor = g.get_node_feature(nodes_list, [1, 2])
    features = row_tensor[0]
    labels = row_tensor[1]

    nodes_num = labels.shape[0]
    class_num = labels.max() + 1
    labels_onehot = np.eye(nodes_num, class_num)[labels].astype(np.float32)

    neighbor = g.get_all_neighbors(nodes_list, 0)
    node_map = {node_id: index for index, node_id in enumerate(nodes_list)}
    adj = np.zeros([nodes_num, nodes_num], dtype=np.float32)
    for index, value in np.ndenumerate(neighbor):
        # The first column of neighbor is node_id, second column to last column are neighbors of the first column.
        # So we only care index[1] > 1.
        # If the node does not have that many neighbors, -1 is padded. So if value < 0, we will not deal with it.
        if value >= 0 and index[1] > 0:
            adj[node_map[neighbor[index[0], 0]], node_map[value]] = 1
    adj = sp.coo_matrix(adj)
    adj = adj + adj.T.multiply(adj.T > adj) + sp.eye(nodes_num)
    nor_adj = normalize_adj(adj)
    nor_adj = np.array(nor_adj.todense())
    return nor_adj, features, labels_onehot, labels


def get_mask(total, begin, end):
    """Generate mask."""
    mask = np.zeros([total]).astype(np.float32)
    mask[begin:end] = 1
    return mask