Discuss whether to use `graph_tool`

barneydobson commented 8 months ago

Also, there's this library called graph-tool that is freakishly fast, but the caveat is that it's not available on PyPi and can only be installed from conda-forge.

Originally posted by @cheginit in https://github.com/ImperialCollegeLondon/SWMManywhere/issues/74#issuecomment-1986583374

Discuss if we find that the topological metrics are taking a long time to evaluate

cheginit commented 8 months ago

For reference, here's the test suite that I have:

from collections import defaultdict
from time import perf_counter

import cytoolz.curried as tlz
import joblib
import networkx as nx
import numpy as np
from graph_tool import Graph
from graph_tool.centrality import betweenness as gt_betweenness

def edge_betweenness_centrality(
    graph: nx.Graph, normalized: bool = True, weight: str = "weight", njobs: int = -1
):
    """Compute edge betweenness centrality function in parallel."""
    njobs = joblib.cpu_count(True) if njobs == -1 else njobs
    node_chunks = tlz.partition_all(graph.order() // njobs, graph.nodes())
    bt_func = tlz.partial(
        nx.edge_betweenness_centrality_subset,
        G=graph,
        normalized=normalized,
        weight=weight,
    )
    bt_sc = joblib.Parallel(n_jobs=njobs)(
        joblib.delayed(bt_func)(sources=nodes, targets=graph.nodes()) for nodes in node_chunks
    )

    bt_c = defaultdict(float)
    for bt in bt_sc:
        for n, v in bt.items():
            bt_c[n] += v
    return bt_c

G_ba = nx.barabasi_albert_graph(2000, 3, seed=42)
G_er = nx.gnp_random_graph(2000, 0.01, directed=True, seed=42)
G_ws = nx.connected_watts_strogatz_graph(2000, 4, 0.1, seed=42)
nx.set_edge_attributes(
    G_ws,
    dict(zip(G_ws.edges(), np.random.uniform(0, 1, len(G_ws.edges())), strict=False)),
    "weight",
)

for graph in (G_ba, G_er, G_ws):
    gt_graph = Graph(directed=graph.is_directed())
    eweight, weight, eprops = None, None, None
    edges = np.asarray(list(graph.edges()))
    if next(iter(graph.edges(data="weight")), None)[2]:
        eweight = gt_graph.new_ep("double")
        weight = "weight"
        eprops = [eweight]
        edges = np.c_[edges, list(nx.get_edge_attributes(graph, weight).values())]
    gt_graph.add_edge_list(edges, eprops=eprops)
    print("")
    print("Computing betweenness centrality for:")
    print(graph)
    print("\tNon-Parallel version")
    start = perf_counter()
    bt_ser = nx.edge_betweenness_centrality(graph, weight=weight)
    print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
    print("\tParallel version")
    start = perf_counter()
    bt_par = edge_betweenness_centrality(graph, weight=weight)
    print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
    print("\tGraph-tool version")
    start = perf_counter()
    _, bt_gt = gt_betweenness(gt_graph, weight=eweight)
    print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
    assert np.allclose(np.asarray(list(bt_ser.values())), np.asarray(list(bt_par.values())))
    assert np.allclose(np.asarray(bt_gt.fa), np.asarray(list(bt_par.values())))

On my machine with 12-core M2 Max, I get:

Computing betweenness centrality for:
Graph with 2000 nodes and 5991 edges
    Non-Parallel version
        Time: 5.1456 seconds
    Parallel version
        Time: 1.0575 seconds
    Graph-tool version
        Time: 0.0671 seconds

Computing betweenness centrality for:
DiGraph with 2000 nodes and 39778 edges
    Non-Parallel version
        Time: 9.4499 seconds
    Parallel version
        Time: 1.9615 seconds
    Graph-tool version
        Time: 0.1008 seconds

Computing betweenness centrality for:
Graph with 2000 nodes and 4000 edges
    Non-Parallel version
        Time: 9.3623 seconds
    Parallel version
        Time: 1.3033 seconds
    Graph-tool version
        Time: 0.0697 seconds

barneydobson commented 8 months ago

thanks!

barneydobson commented 1 month ago

may also link with igraph in #319

ImperialCollegeLondon / SWMManywhere

Discuss whether to use `graph_tool` #80