Source code for autocnet.graph.markov_cluster

import numpy as np
import networkx as nx


[docs]def mcl(g, expand_factor=2, inflate_factor=2, max_loop=10, mult_factor=1):
    """
    Markov Cluster Algorithm

    Implementation modified from: https://github.com/koteth/python_mcl
    Originally released under the MIT license (https://opensource.org/licenses/MIT)

    Parameters
    ----------
    g : object or ndarray
        NetworkX graph object or adjacency matrix

    inflate_factor : float
                     Parameter to strengthen and weaken flow between nodes.  The larger the value
                     the more granular the resultant clusters are.

    expand_factor : int
                    Parameter to manage flow connection between different regions of the graph.

    mult_factor : int
                  Value to set for self loops.  That is, the flow between a node and itself.

    max_loop : int
               Number of iterations to perform before terminating (or convergence).

    Returns
    -------
    arr : ndarray
          arr normalized flow matrix computed after convergence or max_loop is exceeded.

    clusters : dict
               of clusters where the key is an arbitrary cluster identifier and
               the value is a list of node identifiers.

    References
    ----------
    [Stijn2000]_
    [Stijn2000a]_

    """

    def _normalize(arr):
        """
        Column normalize an array
        Parameters
        ----------
        arr : ndarray
              array to be normalized
        Returns
        -------
        new_matrix : ndarray
                     normalized array
        """
        column_sums = arr.sum(axis=0)
        new_matrix = arr / column_sums[np.newaxis, :]
        return new_matrix

    def _inflate(arr, inflate_factor):
        return _normalize(np.power(arr, inflate_factor))

    def _expand(arr, expand_factor):
        return np.linalg.matrix_power(arr, expand_factor)

    def _add_diag(arr, mult_factor):
        return arr + mult_factor * np.identity(arr.shape[0])

    def _stop(arr, i):

        if i % 5 == 4:
            m = np.max(arr ** 2 - arr) - np.min(arr ** 2 - arr)
            if m == 0:
                return True

        return False

    def _get_clusters(arr):
        clusters = {}
        cid = 0
        for j in arr:
            row_positive = np.nonzero(j)[0].tolist()
            if row_positive and not row_positive in clusters.values():
                clusters[cid] = row_positive
                cid += 1
        return clusters

    # Create a dense adjacency matrix
    if isinstance(g, nx.Graph):
        arr = np.array(nx.adjacency_matrix(g).todense())
    elif isinstance(g, np.ndarray):
        arr = g

    arr = _add_diag(arr, mult_factor)
    arr = _normalize(arr)

    for i in range(max_loop):
        arr = _inflate(arr, inflate_factor)
        arr = _expand(arr, expand_factor)

        # Check for convergence
        if _stop(arr, i):
            break

    clusters = _get_clusters(arr)
    return arr, clusters