Source code for cogdl.models.emb.prone

import numpy as np
import scipy.sparse as sp
from scipy.special import iv
import networkx as nx
from sklearn import preprocessing
from sklearn.utils.extmath import randomized_svd

from cogdl.utils.prone_utils import get_embedding_dense
from cogdl.data import Graph
from .. import BaseModel


[docs]class ProNE(BaseModel): r"""The ProNE model from the `"ProNE: Fast and Scalable Network Representation Learning" <https://www.ijcai.org/Proceedings/2019/0594.pdf>`_ paper. Args: hidden_size (int) : The dimension of node representation. step (int) : The number of items in the chebyshev expansion. mu (float) : Parameter in ProNE. theta (float) : Parameter in ProNE. """
[docs] @staticmethod def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off parser.add_argument("--step", type=int, default=5, help="Number of items in the chebyshev expansion") parser.add_argument("--mu", type=float, default=0.2) parser.add_argument("--theta", type=float, default=0.5) parser.add_argument("--hidden-size", type=int, default=128)
# fmt: on
[docs] @classmethod def build_model_from_args(cls, args): return cls(args.hidden_size, args.step, args.mu, args.theta)
def __init__(self, dimension, step, mu, theta): super(ProNE, self).__init__() self.dimension = dimension self.step = step self.mu = mu self.theta = theta
[docs] def forward(self, graph: Graph, return_dict=False): nx_g = graph.to_networkx() self.matrix0 = sp.csr_matrix(nx.adjacency_matrix(nx_g)) features_matrix = self._pre_factorization(self.matrix0, self.matrix0) embeddings_matrix = self._chebyshev_gaussian(self.matrix0, features_matrix, self.step, self.mu, self.theta) embeddings = embeddings_matrix if return_dict: features_matrix = dict() for vid, node in enumerate(nx_g.nodes()): features_matrix[node] = embeddings[vid] else: features_matrix = np.zeros((graph.num_nodes, embeddings.shape[1])) nx_nodes = nx_g.nodes() features_matrix[nx_nodes] = embeddings[np.arange(graph.num_nodes)] return features_matrix
def _get_embedding_rand(self, matrix): # Sparse randomized tSVD for fast embedding smat = sp.csc_matrix(matrix) # convert to sparse CSC format U, Sigma, VT = randomized_svd(smat, n_components=self.dimension, n_iter=5, random_state=None) U = U * np.sqrt(Sigma) U = preprocessing.normalize(U, "l2") return U def _pre_factorization(self, tran, mask): # Network Embedding as Sparse Matrix Factorization l1 = 0.75 C1 = preprocessing.normalize(tran, "l1") neg = np.array(C1.sum(axis=0))[0] ** l1 neg = neg / neg.sum() neg = sp.diags(neg, format="csr") neg = mask.dot(neg) C1.data[C1.data <= 0] = 1 neg.data[neg.data <= 0] = 1 C1.data = np.log(C1.data) neg.data = np.log(neg.data) C1 -= neg F = C1 features_matrix = self._get_embedding_rand(F) return features_matrix def _chebyshev_gaussian(self, A, a, order=5, mu=0.5, s=0.2, plus=False, nn=False): # NE Enhancement via Spectral Propagation num_node = a.shape[0] if order == 1: return a A = sp.eye(num_node) + A DA = preprocessing.normalize(A, norm="l1") L = sp.eye(num_node) - DA M = L - mu * sp.eye(num_node) Lx0 = a Lx1 = M.dot(a) Lx1 = 0.5 * M.dot(Lx1) - a conv = iv(0, s) * Lx0 conv -= 2 * iv(1, s) * Lx1 for i in range(2, order): Lx2 = M.dot(Lx1) Lx2 = (M.dot(Lx2) - 2 * Lx1) - Lx0 # Lx2 = 2*L.dot(Lx1) - Lx0 if i % 2 == 0: conv += 2 * iv(i, s) * Lx2 else: conv -= 2 * iv(i, s) * Lx2 Lx0 = Lx1 Lx1 = Lx2 del Lx2 emb = mm = conv if not plus: mm = A.dot(a - conv) if not nn: emb = get_embedding_dense(mm, self.dimension) return emb