graphistry / pygraphistry

PyGraphistry is a Python library to quickly load, shape, embed, and explore big graphs with the GPU-accelerated Graphistry visual graph analyzer
BSD 3-Clause "New" or "Revised" License
2.17k stars 206 forks source link

[FEA] modularly-weighted layouts #576

Closed lmeyerov closed 3 months ago

lmeyerov commented 4 months ago

Is your feature request related to a problem? Please describe.

Often we have modular structure annotated with a graph, like community scores, and we'd like the layout to reflect it more than a basic force-directed layout would, see image below

Describe the solution you'd like

# auto modes
g.layout_modular().plot()
g.layout_modular(algorithm='leiden').plot()

# controls
g.compute_cugraph('leiden').layout_modular(
  node_attr='leiden',
  algorithm=None,
  engine='gpu',
  intra_edge_weight=3.0
).plot()

This would translate to something like:

def layout_modular(
  g: Plottable,
  node_attr: Optional[str] =None,
  algorithm: Optional[str]='leiden',
  engine: Literal['cpu', 'gpu', 'any'] = 'any',
  intra_edge_weight=2.0,
  inter_edge_weight=0.5,
  edge_influence=2.0
) -> Plottable:
  if node_attr is None:
    # compute community via igraph or cugraph algorithm depending on env/engine/etc
    node_attr = algorithm
    ...
  e_annotated = annotate_src_dst_with_node_attr(g._edges, node_attr, 'src_attr', 'dst_attr')
  e_weighted = g._edges.assign(weight=(e_annotated['src_attr'] == e_annotated['dst_attr']).map({
    True: intra_edge_weight,
    False: inter_edge_weight
  })
  g2 = g.edges(e_weighted).settings(url_params={'edgeInfluence': 2})
  return g

It should work in both pandas + cudf modes

Additional context

lmeyerov commented 3 months ago

WIP, fully vectorized:

from typing import Any, Dict, Optional

def modular_layout(
  g,
  community_col: Optional[str] = None,
  community_alg: Optional[str] = None,
  community_params: Optional[Dict[str, Any]] = None,
  same_community_weight: float = 4.0,
  cross_community_weight: float = 0.2,
  edge_influence: float = 10.0,
  engine=None
):
  assert g._edges is not None, 'Expected edges to be set'
  if community_col is None:
    g = g.materialize_nodes()
    if community_alg is None:
      community_alg = 'community_infomap'
      #if community_params is None:
      #  community_params = {'directed': False}
    community_params = community_params or {}
    g = g.compute_igraph(community_alg, **community_params)
    community_col = community_alg
  else:
    assert community_col in g._nodes, f'Expected community column {community_col} in nodes, only available are {g._nodes.columns}'

  g = g.layout_settings(edge_influence=edge_influence)

  assert 'source_community' not in g._edges, 'Expected no source_community column in edges'
  assert 'destination_community' not in g._edges, 'Expected no destination_community column in edges'

  edges = (g._edges
    .merge(g._nodes[[community_col, g._node]], left_on=g._source, right_on=g._node).rename(columns={community_col: 'source_community'}).drop(columns=[g._node])
    .merge(g._nodes[[community_col, g._node]], left_on=g._destination, right_on=g._node).rename(columns={community_col: 'destination_community'}).drop(columns=[g._node])
  )

  same_community=(edges['source_community'] == edges['destination_community'])
  edges = edges.assign(
      weight=same_community.map({
          True: same_community_weight,
          False: cross_community_weight
      }),
      same_community=same_community
  )
  edges = edges.drop(columns=['source_community', 'destination_community'])

  return g.edges(edges).bind(edge_weight='weight')