Open barneydobson opened 8 months ago
For reference, here's the test suite that I have:
from collections import defaultdict
from time import perf_counter
import cytoolz.curried as tlz
import joblib
import networkx as nx
import numpy as np
from graph_tool import Graph
from graph_tool.centrality import betweenness as gt_betweenness
def edge_betweenness_centrality(
graph: nx.Graph, normalized: bool = True, weight: str = "weight", njobs: int = -1
):
"""Compute edge betweenness centrality function in parallel."""
njobs = joblib.cpu_count(True) if njobs == -1 else njobs
node_chunks = tlz.partition_all(graph.order() // njobs, graph.nodes())
bt_func = tlz.partial(
nx.edge_betweenness_centrality_subset,
G=graph,
normalized=normalized,
weight=weight,
)
bt_sc = joblib.Parallel(n_jobs=njobs)(
joblib.delayed(bt_func)(sources=nodes, targets=graph.nodes()) for nodes in node_chunks
)
bt_c = defaultdict(float)
for bt in bt_sc:
for n, v in bt.items():
bt_c[n] += v
return bt_c
G_ba = nx.barabasi_albert_graph(2000, 3, seed=42)
G_er = nx.gnp_random_graph(2000, 0.01, directed=True, seed=42)
G_ws = nx.connected_watts_strogatz_graph(2000, 4, 0.1, seed=42)
nx.set_edge_attributes(
G_ws,
dict(zip(G_ws.edges(), np.random.uniform(0, 1, len(G_ws.edges())), strict=False)),
"weight",
)
for graph in (G_ba, G_er, G_ws):
gt_graph = Graph(directed=graph.is_directed())
eweight, weight, eprops = None, None, None
edges = np.asarray(list(graph.edges()))
if next(iter(graph.edges(data="weight")), None)[2]:
eweight = gt_graph.new_ep("double")
weight = "weight"
eprops = [eweight]
edges = np.c_[edges, list(nx.get_edge_attributes(graph, weight).values())]
gt_graph.add_edge_list(edges, eprops=eprops)
print("")
print("Computing betweenness centrality for:")
print(graph)
print("\tNon-Parallel version")
start = perf_counter()
bt_ser = nx.edge_betweenness_centrality(graph, weight=weight)
print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
print("\tParallel version")
start = perf_counter()
bt_par = edge_betweenness_centrality(graph, weight=weight)
print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
print("\tGraph-tool version")
start = perf_counter()
_, bt_gt = gt_betweenness(gt_graph, weight=eweight)
print(f"\t\tTime: {(perf_counter() - start):.4f} seconds")
assert np.allclose(np.asarray(list(bt_ser.values())), np.asarray(list(bt_par.values())))
assert np.allclose(np.asarray(bt_gt.fa), np.asarray(list(bt_par.values())))
On my machine with 12-core M2 Max, I get:
Computing betweenness centrality for:
Graph with 2000 nodes and 5991 edges
Non-Parallel version
Time: 5.1456 seconds
Parallel version
Time: 1.0575 seconds
Graph-tool version
Time: 0.0671 seconds
Computing betweenness centrality for:
DiGraph with 2000 nodes and 39778 edges
Non-Parallel version
Time: 9.4499 seconds
Parallel version
Time: 1.9615 seconds
Graph-tool version
Time: 0.1008 seconds
Computing betweenness centrality for:
Graph with 2000 nodes and 4000 edges
Non-Parallel version
Time: 9.3623 seconds
Parallel version
Time: 1.3033 seconds
Graph-tool version
Time: 0.0697 seconds
thanks!
may also link with igraph
in #319
Originally posted by @cheginit in https://github.com/ImperialCollegeLondon/SWMManywhere/issues/74#issuecomment-1986583374
Discuss if we find that the topological metrics are taking a long time to evaluate