Graphegon / pygraphblas

GraphBLAS for Python
https://graphegon.github.io/pygraphblas/pygraphblas/index.html
Apache License 2.0
343 stars 27 forks source link

Louvain clustering implementation #104

Open scottyler89 opened 1 year ago

scottyler89 commented 1 year ago

Hi all, I'm new to pygraphblas & was just trying to re-implement the Louvain modularity tutorial, but the code there gives an error, traceable to k.reduce() returning a traditional float, but I'm guessing it was supposed to by another pygraphblas object.

import random
from random import choice
from time import time

from pygraphblas import *
from pygraphblas import lib
from pygraphblas.gviz import draw

from collections import defaultdict
from itertools import groupby
from operator import itemgetter

def group_labels(T):
    d = defaultdict(list)
    for k,v in groupby(T, itemgetter(1)):
        d[k].append(list(v)[0][0])
    return d

def compare_groups(left, right):
    left = {k: set(v) for k, v in left.items()}
    right = {k: set(v) for k, v in right.items()}
    return sorted(left.values()) == sorted(right.values())

def get_louvain_cluster_assignments(cluster_matrix):
    return cluster_matrix.cast(INT64).apply(INT64.POSITIONJ).reduce_vector()

######################################################
## make a dummy adj matrix
I = [0, 0, 0, 0,
    1, 1, 1, 1,
    2, 2, 2, 2,
    3, 3, 3, 3,
    4, 4, 4, 4,
    5, 5, 5, 5, 5,
    6, 6, 6,
    7, 7, 7, 7]

J = [0, 2, 3, 6,
    1, 2, 3, 7,
    0, 2, 4, 6,
    0, 1, 3, 5,
    0, 2, 4, 6,
    1, 3, 5, 6, 7,
    0, 4, 6,
    1, 3, 5, 7]

M = Matrix.from_lists(I, J, [1.0] * len(I), typ=FP64)
#####################################################

def louvain_cluster_easy(A, max_iters=10):
    S = Matrix.identity(BOOL, A.nrows)
    empty = Vector.sparse(BOOL, A.nrows)
    i = 0
    changed = True
    start = time()
    G = A.T + A
    k = A.reduce_vector()
    m = k.reduce_int() / 2.0
    k = (-k) / m
    while changed and i < max_iters:
        changed = False
        for j in set(k.indexes):
            Sj = S[j]
            S[j] = empty
            v = G[j] + k[j]
            q = v @ S
            t = q.select('max')
            if t:
                r = choice(list(t.indexes))
                S[j, r] = True
                if Sj.get(r) is None:
                    changed = True
        i += 1
    return S, time() - start

def louvain_cluster(A, max_iters=10):
    An = A.nrows
    S = Matrix.identity(BOOL, An)
    empty = Vector.sparse(BOOL, An)
    Sj = Vector.sparse(BOOL, An)
    v = Vector.sparse(FP64, An)
    q = Vector.sparse(FP64, An)
    t = Vector.sparse(FP64, An)
    i = 0
    changed = True
    start = time()
    G = A.T + A
    k = A.reduce()
    m = k.reduce_int() / 2.0
    k = (-k) / m
    kI = set(k.I)
    while changed and i < max_iters:
        changed = False
        for j in kI:
            S.extract_row(j, out=Sj)                  # Sj = S[j]
            S.assign_row(j, empty)                    # S[j] = empty
            G.extract_row(j, out=v)                   # v = G[j] + nkm[j]
            v.apply_second(FP64.PLUS, k[j], out=v)
            v.vxm(S, out=q)                           # q = v @ S
            if not q:
                continue
            q.select('max', out=t)                    # t = q.select('max')
            if t:
                r = choice(list(t.indexes))
                S[j, r] = True
                if Sj.get(r) is None:
                    changed = True
        i += 1
    return S, time() - start

ans, took = louvain_cluster(M, 5)

Gives the following error:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<stdin>", line 14, in louvain_cluster
AttributeError: 'float' object has no attribute 'reduce_int'

If anyone has a working implementation of Louvain modularity, or can help troubleshoot, I would be super appreciative!