Source code for autocnet.graph.markov_cluster
import numpy as np
import networkx as nx
[docs]def mcl(g, expand_factor=2, inflate_factor=2, max_loop=10, mult_factor=1):
"""
Markov Cluster Algorithm
Implementation modified from: https://github.com/koteth/python_mcl
Originally released under the MIT license (https://opensource.org/licenses/MIT)
Parameters
----------
g : object or ndarray
NetworkX graph object or adjacency matrix
inflate_factor : float
Parameter to strengthen and weaken flow between nodes. The larger the value
the more granular the resultant clusters are.
expand_factor : int
Parameter to manage flow connection between different regions of the graph.
mult_factor : int
Value to set for self loops. That is, the flow between a node and itself.
max_loop : int
Number of iterations to perform before terminating (or convergence).
Returns
-------
arr : ndarray
arr normalized flow matrix computed after convergence or max_loop is exceeded.
clusters : dict
of clusters where the key is an arbitrary cluster identifier and
the value is a list of node identifiers.
References
----------
[Stijn2000]_
[Stijn2000a]_
"""
def _normalize(arr):
"""
Column normalize an array
Parameters
----------
arr : ndarray
array to be normalized
Returns
-------
new_matrix : ndarray
normalized array
"""
column_sums = arr.sum(axis=0)
new_matrix = arr / column_sums[np.newaxis, :]
return new_matrix
def _inflate(arr, inflate_factor):
return _normalize(np.power(arr, inflate_factor))
def _expand(arr, expand_factor):
return np.linalg.matrix_power(arr, expand_factor)
def _add_diag(arr, mult_factor):
return arr + mult_factor * np.identity(arr.shape[0])
def _stop(arr, i):
if i % 5 == 4:
m = np.max(arr ** 2 - arr) - np.min(arr ** 2 - arr)
if m == 0:
return True
return False
def _get_clusters(arr):
clusters = {}
cid = 0
for j in arr:
row_positive = np.nonzero(j)[0].tolist()
if row_positive and not row_positive in clusters.values():
clusters[cid] = row_positive
cid += 1
return clusters
# Create a dense adjacency matrix
if isinstance(g, nx.Graph):
arr = np.array(nx.adjacency_matrix(g).todense())
elif isinstance(g, np.ndarray):
arr = g
arr = _add_diag(arr, mult_factor)
arr = _normalize(arr)
for i in range(max_loop):
arr = _inflate(arr, inflate_factor)
arr = _expand(arr, expand_factor)
# Check for convergence
if _stop(arr, i):
break
clusters = _get_clusters(arr)
return arr, clusters