Source code for cogdl.utils.prone_utils

import math
import numpy as np
import scipy
import scipy.sparse as sp
from scipy.special import iv
from sklearn import preprocessing


[docs]class HeatKernel(object):
    def __init__(self, t=0.5, theta0=0.6, theta1=0.4):
        self.t = t
        self.theta0 = theta0
        self.theta1 = theta1

[docs]    def prop_adjacency(self, mx):
        mx_norm = preprocessing.normalize(mx.transpose(), "l1").transpose()
        adj = self.t * mx_norm
        adj.data = np.exp(adj.data)
        return adj / np.exp(self.t)

[docs]    def prop(self, mx, emb):
        adj = self.prop_adjacency(mx)
        return self.theta0 * emb + self.theta1 * adj.dot(emb)


[docs]class HeatKernelApproximation(object):
    def __init__(self, t=0.2, k=5):
        self.t = t
        self.k = k

[docs]    def taylor(self, mx, emb):
        mx_norm = preprocessing.normalize(mx, "l1")
        result = [math.exp(self.t) * emb]
        for i in range(self.k - 1):
            temp_mx = self.t * mx_norm.dot(result[-1]) / (i + 1)
            result.append(temp_mx)
        return sum(result)

[docs]    def chebyshev(self, mx, emb):
        mx = mx + sp.eye(emb.shape[0])
        mx = preprocessing.normalize(mx, "l1")
        conv = iv(0, self.t) * emb
        laplacian = sp.eye(emb.shape[0]) - mx
        Lx0 = emb
        Lx1 = laplacian.dot(emb)
        conv -= 2 * iv(1, self.t) * Lx1

        for i in range(2, self.k):
            Lx2 = 2 * laplacian.dot(Lx1) - Lx0
            conv += (-1) ** i * 2 * iv(i, self.t) * Lx2
            Lx0 = Lx1
            Lx1 = Lx2
        return conv

[docs]    def prop(self, mx, emb):
        return self.chebyshev(mx, emb)


[docs]class Gaussian(object):
    def __init__(self, mu=0.5, theta=1, rescale=False, k=3):
        self.theta = theta
        self.mu = mu
        self.k = k
        self.rescale = rescale
        self.coefs = [(-1) ** i * iv(i, self.theta) for i in range(k + 3)]
        self.coefs[0] = self.coefs[0] / 2

    # adj: 1 mul + 3 add,  emb: 2*k mul, 3*k add
[docs]    def prop(self, mx, emb):
        row_num, col_sum = mx.shape
        mx = mx + sp.eye(row_num)
        mx_norm = preprocessing.normalize(mx, "l1")
        mx_hat = (1 - self.mu) * sp.eye(row_num) - mx_norm

        Lx0 = emb
        Lx1 = mx_hat.dot(emb)
        Lx1 = 0.5 * mx_hat.dot(Lx1) - emb

        conv = iv(0, self.theta) * Lx0
        conv -= 2 * iv(1, self.theta) * Lx1
        for i in range(2, self.k):
            Lx2 = mx_hat.dot(Lx1)
            Lx2 = (mx_hat.dot(Lx2) - 2 * Lx1) - Lx0

            # Lx2 = 2 * mx_hat.dot(Lx1) - Lx0
            conv += (-1) ** i * 2 * iv(i, self.theta) * Lx2
            Lx0 = Lx1
            Lx1 = Lx2
        if self.rescale:
            conv = mx.dot(emb - conv)
        return conv


[docs]class PPR(object):
    """
    applying sparsification to accelerate computation
    """

    def __init__(self, alpha=0.5, k=10):
        self.alpha = alpha
        self.k = k
        self.alpha_list = [self.alpha * (1 - self.alpha) ** i for i in range(self.k)]
        self.epsilon = 1e-3

[docs]    def prop(self, mx, emb):
        mx_norm = preprocessing.normalize(mx, "l1")

        Lx = emb
        conv = self.alpha * Lx
        for i in range(1, self.k):
            Lx = (1 - self.alpha) * mx_norm.dot(Lx)
            conv += Lx
        return conv


[docs]class SignalRescaling(object):
    """
    - rescale signal of each node according to the degree of the node:
        - sigmoid(degree)
        - sigmoid(1/degree)
    """

    def __init__(self):
        pass

[docs]    def prop(self, mx, emb):
        mx = preprocessing.normalize(mx, "l1")
        degree = mx.sum(1).A.squeeze()

        degree_inv = 1.0 / degree
        signal_val = 1.0 / (1 + np.exp(-degree_inv))

        row_num, col_num = mx.shape
        q_ = sp.csc_matrix((signal_val, (np.arange(row_num), np.arange(col_num))), shape=(row_num, col_num))

        adj_norm = mx.dot(q_)
        adj_norm = preprocessing.normalize(adj_norm, "l1")
        conv = adj_norm.dot(emb)
        return conv


[docs]class ProNE(object):
    def __call__(self, A, a, order=10, mu=0.1, s=0.5):
        # NE Enhancement via Spectral Propagation
        print("Chebyshev Series -----------------")

        if order == 1:
            return a

        node_number = a.shape[0]

        A = sp.eye(node_number) + A
        DA = preprocessing.normalize(A, norm="l1")
        L = sp.eye(node_number) - DA

        M = L - mu * sp.eye(node_number)

        Lx0 = a
        Lx1 = M.dot(a)
        Lx1 = 0.5 * M.dot(Lx1) - a

        conv = iv(0, s) * Lx0
        conv -= 2 * iv(1, s) * Lx1
        for i in range(2, order):
            Lx2 = M.dot(Lx1)
            Lx2 = (M.dot(Lx2) - 2 * Lx1) - Lx0
            #         Lx2 = 2*L.dot(Lx1) - Lx0
            if i % 2 == 0:
                conv += 2 * iv(i, s) * Lx2
            else:
                conv -= 2 * iv(i, s) * Lx2
            Lx0 = Lx1
            Lx1 = Lx2
            del Lx2
        mm = A.dot(a - conv)
        return mm


[docs]class NodeAdaptiveEncoder(object):
    """
    - shrink negative values in signal/feature matrix
    - no learning
    """

[docs]    @staticmethod
    def prop(signal):
        mean_signal = signal.mean(1)
        mean_signal = 1.0 / (1 + np.exp(-mean_signal))
        sel_row, sel_col = np.where(signal < 0)
        mean_signal = mean_signal[sel_row]
        signal[sel_row, sel_col] = signal[sel_row, sel_col] * mean_signal
        return signal


[docs]def propagate(mx, emb, stype, space=None):
    if space is not None:
        if stype == "heat":
            heat_kernel = HeatKernelApproximation(t=space["t"])
            result = heat_kernel.prop(mx, emb)
        elif stype == "ppr":
            ppr = PPR(alpha=space["alpha"])
            result = ppr.prop(mx, emb)
        elif stype == "gaussian":
            gaussian = Gaussian(mu=space["mu"], theta=space["theta"])
            result = gaussian.prop(mx, emb)
        elif stype == "sc":
            signal_rs = SignalRescaling()
            result = signal_rs.prop(mx, emb)
        else:
            raise ValueError("please use filter in ['heat', 'ppr', 'gaussian', 'sc'], currently use {}".format(stype))
    else:
        if stype == "heat":
            heat_kernel = HeatKernelApproximation()
            result = heat_kernel.prop(mx, emb)
        elif stype == "ppr":
            ppr = PPR()
            result = ppr.prop(mx, emb)
        elif stype == "gaussian":
            gaussian = Gaussian()
            result = gaussian.prop(mx, emb)
        elif stype == "sc":
            signal_rs = SignalRescaling()
            result = signal_rs.prop(mx, emb)
        elif stype == "prone":
            signal_pro = ProNE()
            result = signal_pro(mx, emb)
        else:
            raise ValueError("please use filter in ['heat', 'ppr', 'gaussian', 'sc'], currently use {}".format(stype))
    return result


[docs]def get_embedding_dense(matrix, dimension):
    # get dense embedding via SVD
    U, s, Vh = scipy.linalg.svd(matrix, full_matrices=False, check_finite=False, overwrite_a=True)
    U = np.array(U)
    U = U[:, :dimension]
    s = s[:dimension]
    s = np.sqrt(s)
    U = U * s
    U = preprocessing.normalize(U, "l2")
    return U