Source code for distanceclosure.utils

# -*- coding: utf-8 -*-
"""
Utils
==========================

Utility functions for the Distance Closure package
"""

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import networkx as nx

__author__ = """\n""".join(['Rion Brattig Correia <rionbr@gmail.com>'])

__all__ = [
    'prox2dist',
    'dist2prox',
    'dict2matrix',
    'matrix2dict',
    'dict2sparse',
    'from_networkx_to_dijkstra_format'
]


[docs]def prox2dist(p): """Transforms a non-negative ``[0,1]`` proximity to distance in the ``[0,inf]`` interval: .. math:: d = \\frac{1}{p} - 1 Parameters ---------- p : float Proximity value Returns ------- d : float Distance value See Also -------- dist2prox """ if (p == 0): return np.inf else: return (1 / float(p)) - 1
[docs]def dist2prox(d): """ Transforms a non-negative integer distance ``d`` to a proximity/similarity value in the ``[0,1]`` interval: .. math:: p = \\frac{1}{(d+1)} It accepts both dense and sparse matrices. Parameters ---------- D :matrix Distance matrix Returns ------- P : matrix Proximity matrix See Also -------- prox2dist """ if d == np.inf: return 0 else: return (d + 1) ** -1
[docs]def dict2matrix(d): """ Tranforms a 2D dictionary into a numpy. Usefull when converting Dijkstra results. Parameters ---------- d (dict): 2D dictionary Returns ------- m : Numpy matrix Warning ------- If your nodes are identified by names instead of numbers, make sure to keep a mapping. Examples -------- >>> d = {0: {0: 0, 1: 1, 2:3}, 1: {0: 1, 1: 0, 2:2}, 2: {0: 3, 1:2, 2:0}} >>> dict2matrix(d) [[ 0 1 3] [ 1 0 2] [ 3 2 0]] Note ---- Uses pandas to accomplish this in a one liner. See Also -------- matrix2dict """ return pd.DataFrame.from_dict(d).values
[docs]def matrix2dict(m): """ Tranforms a Numpy matrix into a 2D dictionary. Usefull when comparing dense metric and Dijkstra results. Parameters ---------- m (matrix): numpy matrix Returns ------- d (dict): 2D dictionary Examples -------- >>> m = [[0, 1, 3], [1, 0, 2], [3, 2, 0]] >>> matrix2dict(m) {0: {0: 0, 1: 1, 2:3}, 1: {0: 1, 1: 0, 2:2}, 2: {0: 3, 1:2, 2:0}} Note ---- Uses pandas to accomplish this in a one liner. See Also -------- dict2matrix """ return pd.DataFrame(m).to_dict()
[docs]def dict2sparse(d): """ Tranforms a 2D dictionary into a Scipy sparse matrix. Parameters ---------- d : dict 2D dictionary Returns ------- m : CSR matrix CRS Sparse Matrix Examples -------- >>> d = {0: {0: 0, 1: 1, 2:3}, 1: {0: 1, 1: 0, 2:2}, 2: {0: 3, 1:2, 2:0}} >>> dict2sparse(d) (0, 1) 1 (0, 2) 3 (1, 0) 1 (1, 2) 2 (2, 0) 3 (2, 1) 2 Note ---- Uses pandas to convert dict into dataframe and then feeds it to the `csr_matrix`. See Also -------- dict2matrix matrix2dict """ return csr_matrix(pd.DataFrame.from_dict(d, orient='index').values)
[docs]def from_networkx_to_dijkstra_format(D, weight='weight'): """ Converts a ``NetworkX.Graph`` object to input variables to be used by ``cython.dijkstra``. Parameters ---------- D : NetworkX:Graph The Distance graph. weight : string The edge property to use as distance weight. Returns ------- nodes : list List of all nodes converted to sequential numbers. edges : list List of all edges. neighbors : dict Dictionary containing the neighborhood of every node in a fast access format. dict_int_nodes : dict The mapping between original node names and the numeric node names. Examples -------- >>> G = nx.path(5) >>> nx.set_edge_attributes(G, name='distance', values=1) >>> nodes, edges, neighbors, dict_int_nodes = from_networkx_to_dijkstra_format(G, weight='distance') """ if not isinstance(D, nx.classes.graph.Graph): raise NotImplementedError("This is on the TODO list. For now, only undirected nx.Graphs() are accepted.") dict_nodes_int = {u: i for i, u in enumerate(D.nodes())} dict_int_nodes = {i: u for u, i in dict_nodes_int.items()} nodes = list(dict_nodes_int.values()) edges_ij = {(dict_nodes_int[i], dict_nodes_int[j]): d[weight] for i, j, d in D.edges(data=True)} edges_ji = {(dict_nodes_int[j], dict_nodes_int[i]): d[weight] for i, j, d in D.edges(data=True)} edges = {**edges_ij, **edges_ji} neighbors = {dict_nodes_int[i]: [dict_nodes_int[j] for j in D.neighbors(i)] for i in D.nodes()} return nodes, edges, neighbors, dict_int_nodes