Source code for cogdl.layers.pprgo_modules

import numba
import numpy as np
import scipy.sparse as sp
import torch


@numba.njit(cache=True, locals={"_val": numba.float32, "res": numba.float32, "res_vnode": numba.float32})
def _calc_ppr_node(inode, indptr, indices, deg, alpha, epsilon):
    alpha_eps = alpha * epsilon
    f32_0 = numba.float32(0)
    p = {inode: f32_0}
    r = {inode: alpha}
    q = [inode]
    while len(q) > 0:
        unode = q.pop()

        res = r[unode] if unode in r else f32_0
        if unode in p:
            p[unode] += res
        else:
            p[unode] = res
        r[unode] = f32_0
        for vnode in indices[indptr[unode] : indptr[unode + 1]]:
            _val = (1 - alpha) * res / deg[unode]
            if vnode in r:
                r[vnode] += _val
            else:
                r[vnode] = _val

            res_vnode = r[vnode] if vnode in r else f32_0
            if res_vnode >= alpha_eps * deg[vnode]:
                if vnode not in q:
                    q.append(vnode)

    return list(p.keys()), list(p.values())


[docs]@numba.njit(cache=True, parallel=True) def calc_ppr_topk_parallel(indptr, indices, deg, alpha, epsilon, nodes, topk): js = [np.zeros(0, dtype=np.int64)] * len(nodes) vals = [np.zeros(0, dtype=np.float32)] * len(nodes) for i in numba.prange(len(nodes)): j, val = _calc_ppr_node(nodes[i], indptr, indices, deg, alpha, epsilon) j_np, val_np = np.array(j), np.array(val) idx_topk = np.argsort(val_np)[-topk:] js[i] = j_np[idx_topk] vals[i] = val_np[idx_topk] return js, vals
[docs]def ppr_topk(adj_matrix, alpha, epsilon, nodes, topk): """Calculate the PPR matrix approximately using Anderson.""" out_degree = np.sum(adj_matrix > 0, axis=1).A1 nnodes = adj_matrix.shape[0] neighbors, weights = calc_ppr_topk_parallel( adj_matrix.indptr, adj_matrix.indices, out_degree, numba.float32(alpha), numba.float32(epsilon), nodes, topk ) return construct_sparse(neighbors, weights, (len(nodes), nnodes))
[docs]def construct_sparse(neighbors, weights, shape): i = np.repeat(np.arange(len(neighbors)), np.fromiter(map(len, neighbors), dtype=np.int)) j = np.concatenate(neighbors) return sp.coo_matrix((np.concatenate(weights), (i, j)), shape)
[docs]def topk_ppr_matrix(adj_matrix, alpha, eps, idx, topk, normalization="row"): """Create a sparse matrix where each node has up to the topk PPR neighbors and their weights.""" topk_matrix = ppr_topk(adj_matrix, alpha, eps, idx, topk).tocsr() if normalization == "sym": # Assume undirected (symmetric) adjacency matrix deg = adj_matrix.sum(1).A1 deg_sqrt = np.sqrt(np.maximum(deg, 1e-12)) deg_inv_sqrt = 1.0 / deg_sqrt row, col = topk_matrix.nonzero() topk_matrix.data = deg_sqrt[idx[row]] * topk_matrix.data * deg_inv_sqrt[col] elif normalization == "col": # Assume undirected (symmetric) adjacency matrix deg = adj_matrix.sum(1).A1 deg_inv = 1.0 / np.maximum(deg, 1e-12) row, col = topk_matrix.nonzero() topk_matrix.data = deg[idx[row]] * topk_matrix.data * deg_inv[col] elif normalization == "row": pass else: raise ValueError(f"Unknown PPR normalization: {normalization}") return topk_matrix
[docs]def build_topk_ppr_matrix_from_data(edge_index, *args, **kwargs): if isinstance(edge_index, torch.Tensor): num_node = int(torch.max(edge_index)) + 1 edge_index = edge_index.cpu().numpy() val = np.ones(edge_index.shape[1]) adj_matrix = sp.csr_matrix((val, (edge_index[0], edge_index[1])), shape=(num_node, num_node)) else: adj_matrix = edge_index return topk_ppr_matrix(adj_matrix, *args, **kwargs)
[docs]class PPRGoDataset(torch.utils.data.Dataset): def __init__( self, features: torch.Tensor, ppr_matrix: sp.csr_matrix, node_indices: torch.Tensor, labels_all: torch.Tensor = None, ): self.features = features self.matrix = ppr_matrix self.node_indices = node_indices self.labels_all = labels_all self.cache = dict() def __len__(self): return self.node_indices.shape[0] def __getitem__(self, items): key = str(items) if key not in self.cache: sample_matrix = self.matrix[items] source, neighbor = sample_matrix.nonzero() ppr_scores = torch.from_numpy(sample_matrix.data).float() features = self.features[neighbor].float() targets = torch.from_numpy(source).long() labels = self.labels_all[self.node_indices[items]] self.cache[key] = (features, targets, ppr_scores, labels) return self.cache[key]