kfoynt / LocalGraphClustering

MIT License
132 stars 45 forks source link

Graph drawing tools #68

Open dgleich opened 5 years ago

dgleich commented 5 years ago

We have a number of common visualization patterns that we'd like to do. I've been using NetworkX for this, but this seems like overkill as there is a big translation between their ids and our ids, which makes the process slightly tedious and error-prone.

  1. standard graph drawing given xy or xyz coordinates for each vertex.
  2. a standard graph drawing (xy or xyz coords) for each vertex and a subset set of nodes highlighted.
  3. a standard graph drawing (xy or xyz coords) for each vertex and a vector of data highlighted (e.g. a float value for each node).

G = GraphLocal()

Here xy, xyz are arrays with a row for each vertex with 1 or 2 coordinates.

G.draw(xy)
G.draw(xyz)
G.draw(xy, nodemarkersize=0) 
G.draw(xy, set=S)
G.draw(xy, set=S)
G.draw(xy, values=f)
G.draw(xyz, groups=g) # this is a partition. 

Parameters


G.draw(coords, ...) 

coords: a n-by-2 or n-by-3 array with coordinates for each node of the graph.

Optional parameters:

alpha: [0, 1] the overall alpha scaling of the plot
nodealpha: [0, 1]
edgealpha:
setalpha: 

nodecolor:
edgecolor:
setcolor:

nodesize:
linewidth:

ax=None (default) will create a new figure, or this will plot in ax if not None.

Return a dictionary with: 
fig, ax, nodes, edges, setnodes, setedges, groupnodes, groupedges
these are the handles to the actual plot elements, so that you could change 
values after the fact. 
"""
dgleich commented 5 years ago

Here are some of the codes I'm using to do this now.

def ourncp(N, rholist=[1e-2,1e-3,1e-4]):
    return lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(
        rholist=rholist,deep=False,
        neighborhoods=False,localmins=False, nthreads=64, timeout=30)

def ncp_min_feature_by_group_binned(df, feature, group, edges=None, nbins=50, log=False):
    xs = df[group].values.copy()
    xs.sort()
    xs = xs.astype(np.float64)
    if log is True:
        xs = np.log10(xs)
        if edges is None:
            edges = np.power(10.0,np.histogram(xs, bins=nbins)[1]) # second output
    else:
        if edges is None: 
            edges = np.histogram(xs, bins=nbins)[1]
    print(edges)
    buckets = pd.cut(df[group], edges)
    return df.groupby(buckets).apply(lambda x: lgc.ncpplots._ncp_min(x, feature)), edges

def minline(ncp, feature, group, nbins=100, edges=None, log=True):
        ncpdata = ncp.as_data_frame()
        dfmin, edges = ncp_min_feature_by_group_binned(ncpdata, feature, group,
            nbins=nbins, edges=edges, log=log)
        dfmin = dfmin.dropna(axis=0)
        y = dfmin[feature]
        x = dfmin[group]
        pos = dfmin["best"]
        tmp = list(zip(x,y))
        tmp.sort(key = lambda x: x[0])
        x = [i[0] for i in tmp]
        y = [i[1] for i in tmp]
        return x,y, edges

def ncpplot(N,**kwargs):
    ncp = ourncp(N, **kwargs)
    lgc.NCPPlots(ncp).cond_by_size()
    return ncp

def ncpplotline(ax,N,name,nbins=20, **kwargs):
    ncp = ourncp(N, **kwargs)
    linedata = lgc.NCPPlots(ncp).feature_by_group_min_line(
            "output_cond","output_sizeeff", ax=ax, label=name, nbins=nbins)
    ax.loglog()
    return ncp, linedata

def matrix2dict(A,G):
    return {v:list(A[i]) for i,v in enumerate(G.nodes())}

def fiedler_view(N):
    F = lgc.algorithms.eig2_nL(lgc.GraphLocal().from_networkx(N),dim=2)[0]
    return nx.draw(N,matrix2dict(F,N),node_size=12, width=0.5, alpha=0.5), F

def ncpsetview(N,pos,ax=None,ncp=None,nbins=20,rholist=[1e-2,1e-3,1e-4]):

    fig,axs = plt.subplots(4,5, figsize=(8,8))
    axs = axs.flatten()

    # We need to be a bit hacky here because feature_by_group_min_line
    # assumes you are plotting. So if the ax=None, then we don't want
    # to plot the line at all, instead, what we do is just plot 
    # to the first set of axes that will get the graph eventually.
    # then we clear

    if ax is None:
        lineax = axs[0]
    else:
        lineax = ax
    if ncp is None:
        ncp = lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(rholist=rholist)
    # make a nodeid map
    id2nx = [ v for v in N.nodes() ]
    setdata = lgc.NCPPlots(ncp).feature_by_group_min_line(
            "output_cond","output_sizeeff", ax=lineax, label="", nbins=nbins)
    if ax is None: 
        lineax.clear()
    for ax in axs:
        ax.axis('off')
    for i,d in enumerate(setdata):
        S,cond = ncp.output_set(int(d[2])) # get the output set
        SN = [ id2nx[v] for v in S ]
        R = set(N)
        R -= set(SN)
        # make sure we always look at the small size
        if len(R) < len(SN):
            R, SN = SN, R # swap!
        #nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=8)
        #print(S)
        nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=2, node_color='k', nodelist=list(R))
        nx.draw_networkx_nodes(N, pos, ax=axs[i], node_size=14, nodelist=list(SN), node_color='r')
        nx.draw_networkx_edges(N, pos, ax=axs[i], alpha=0.5, width=0.5)
        axs[i].set_title('|S|=' + str(len(S)) + '\ncond=%.4f'%(d[1]),fontsize=10)
        axs[i].axis('tight')
    return fig
dgleich commented 5 years ago

There is some additional stuff in the above, but it has the things you need :)

dgleich commented 5 years ago

And in 3d

# From https://www.idtools.com.au/3d-network-graphs-python-mplot3d-toolkit/
from mpl_toolkits.mplot3d import Axes3D
def draw3d(G, pos, **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    xs = [p[0] for v,p in pos.items()]
    ys = [p[1] for v,p in pos.items()]
    zs = [p[2] for v,p in pos.items()]
    ax.scatter(xs, ys, zs, alpha=0.5)

    for i,j in enumerate(G.edges()):
        x = np.array((pos[j[0]][0], pos[j[1]][0]))
        y = np.array((pos[j[0]][1], pos[j[1]][1]))
        z = np.array((pos[j[0]][2], pos[j[1]][2]))

        # Plot the connecting lines
        ax.plot(x, y, z, c='black', alpha=0.5, linewidth=0.5)

    #ax.view_init(30, 0)
    ax.set_axis_off()
    return fig

def fiedler_view3(N):
    F = lgc.algorithms.eig2_nL(lgc.GraphLocal().from_networkx(N),dim=3)[0]
    return draw3d(N,matrix2dict(F,N)), F    

# This is still not done... 
def ncpsetview3(N, pos, ax=None,ncp=None,nbins=20,rholist=[1e-2,1e-3,1e-4]):

    assert(nbins == 20) # todo make this flexible

    fig = plt.figure(figsize=(8,8))
    axs = [ fig.add_subplot(4,5,i+1, projection='3d') for i in range(4*5)]

    # We need to be a bit hacky here because feature_by_group_min_line
    # assumes you are plotting. So if the ax=None, then we don't want
    # to plot the line at all, instead, what we do is just plot 
    # to the first set of axes that will get the graph eventually.
    # then we clear

    if ax is None:
        lineax = axs[0]
    else:
        lineax = ax
    if ncp is None:
        ncp = lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(rholist=rholist)

    setdata = lgc.NCPPlots(ncp).feature_by_group_min_line(
            "output_cond","output_sizeeff", ax=lineax, label="", nbins=nbins)
    if ax is None: 
        lineax.clear()
    for ax in axs:
        ax.axis('off')

    # convert coordinates
    xs = [pos[v][0] for v in N.nodes()]
    ys = [pos[v][1] for v in N.nodes()]
    zs = [pos[v][2] for v in N.nodes()]

    for i,d in enumerate(setdata):

        S,cond = ncp.output_set(int(d[2])) # get the output set

        # for this we are going to use indices from 0 to N
        R = set(range(len(pos)))
        R -= set(S)
        # make sure we always look at the small size
        if len(R) < len(S):
            R, S = S, R # swap!

        axs[i].scatter([xs[v] for v in R], [ys[v] for v in R], [zs[v] for v in R], alpha=0.1, c='k')
        axs[i].scatter([xs[v] for v in S], [ys[v] for v in S], [zs[v] for v in S], alpha=0.5, c='r')

        #nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=2, node_color='k', nodelist=list(R))
        #nx.draw_networkx_nodes(N, pos, ax=axs[i], node_size=14, nodelist=list(SN), node_color='r')
        #nx.draw_networkx_edges(N, pos, ax=axs[i], alpha=0.5, width=0.5)
        axs[i].set_title('|S|=' + str(len(S)) + '\ncond=%.4f'%(d[1]),fontsize=10)
        axs[i].axis('tight')
    return fig