Source code for autocnet.matcher.cpu_outlier_detector

from collections import deque
import math
import warnings

import numpy as np
import pandas as pd


[docs]def distance_ratio(edge, matches, ratio=0.8, single=False):
    """
    Compute and return a mask for a matches dataframe
    using Lowe's ratio test.  If keypoints have a single
    Lowe (2004) [Lowe2004]_

    Parameters
    ----------
    ratio : float
            the ratio between the first and second-best match distances
            for each keypoint to use as a bound for marking the first keypoint
            as "good". Default: 0.8

    single : bool
             If True, points with only a single entry are included (True)
             in the result mask, else False.

    Returns
    -------
    mask : pd.dataframe
           A Pandas DataFrame mask for the matches with those failing the
           ratio test set to False.
    """
    def func(group):
        res = [False] * len(group)
        if len(res) == 1:
            return [single]
        if group.iloc[0] < group.iloc[1] * ratio:
            res[0] = True
        return res

    mask_s = matches.groupby('source_idx')['distance'].transform(func).astype('bool')
    single = True
    mask_d = matches.groupby('destination_idx')['distance'].transform(func).astype('bool')
    mask = mask_s & mask_d

    return mask


[docs]def spatial_suppression(df, bounds, xkey='x', ykey='y', k=60, error_k=0.05, nsteps=250):
    """
    Apply the spatial suppression algorithm over an arbitrary domain for all of the spatial
    data in the provided data frame.

    Parameters
    ----------

    df : object
         Pandas data frame with coordinates

    bounds : list
             In the form xmin, ymin, xmax, ymax

    xkey : str
           The column name for the x coordinates

    ykey : str
           The column name for the y coordinates

    k : int
        The desired number of points after suppression

    error_k : float
              The percentage of allowable error in the domain [0,1]

    nsteps : int
             The granularity of the search. This controls the number of
             buckets in the x and y dimension. More granular search adds processing
             time, but can result in a more accurate solution.

    Returns
    -------
    mask : nd.array
           A boolean mask of the valid points

    len(result) : int
                  The numer of valud points
    """
    # Compute the bounding area inside of which the suppression will be applied
    minx = min(bounds[0], bounds[2])
    maxx = max(bounds[0], bounds[2])
    miny = min(bounds[1], bounds[3])
    maxy = max(bounds[1], bounds[3])
    domain = (maxx-minx),(maxy-miny)

    min_radius = min(domain) / 20
    max_radius = max(domain)
    search_space = np.linspace(min_radius, max_radius, nsteps)
    cell_sizes = search_space / math.sqrt(2)
    min_idx = 0
    max_idx = len(search_space) - 1

    # Setup flags to watch for looping
    prev_min = None
    prev_max = None

    # Sort the dataframe (hard coded to ascending as lower strength (cost) is better)
    df = df.sort_values(by=['strength'], ascending=True).copy()
    df = df.reset_index(drop=True)
    mask = pd.Series(False, index=df.index)

    process = True
    while process:
        # Binary search
        mid_idx = int((min_idx + max_idx) / 2)
        if min_idx == mid_idx or mid_idx == max_idx:
            warnings.warn('Unable to optimally solve.')
            process = False
        else:
            # Setup to store results
            result = []

        # Get the current cell size and grid the domain
        cell_size = cell_sizes[mid_idx]
        n_x_cells = int(round(domain[0] / cell_size, 0)) - 1
        n_y_cells = int(round(domain[1] / cell_size, 0)) - 1

        if n_x_cells <= 0:
            n_x_cells = 1
        if n_y_cells <= 0:
            n_y_cells = 1

        grid = np.zeros((n_y_cells, n_x_cells), dtype=bool)
        # Assign all points to bins
        x_edges = np.linspace(minx, maxx, n_x_cells)
        y_edges = np.linspace(miny, maxy, n_y_cells)
        xbins = np.digitize(df[xkey], bins=x_edges)
        ybins = np.digitize(df[ykey], bins=y_edges)

        # Starting with the best point, start assigning points to grid cells
        for i, (idx, p) in enumerate(df.iterrows()):
            x_center = xbins[i] - 1
            y_center = ybins[i] - 1
            cell = grid[y_center, x_center]

            if cell == False:
                result.append(idx)
                # Set the cell to True
                grid[y_center, x_center] = True

            # If everything is already 'covered' break from the list
            if grid.all() == False:
                continue

        # Check to see if the algorithm is completed, or if the grid size needs to be larger or smaller
        if k - k * error_k <= len(result) <= k + k * error_k:
            # Success, in bounds
            process = False

        elif len(result) < k - k * error_k:
            # The radius is too large
            max_idx = mid_idx
            if max_idx == 0:
                process = False
                warnings.warn('Unable to retrieve {} points. Consider reducing the amount of points you request(k)'.format(k))
            if min_idx == max_idx:
                process = False
        elif len(result) > k + k * error_k:
            # Too many points, break
            min_idx = mid_idx
    mask.loc[list(result)] = True
    return mask, len(result)


[docs]def self_neighbors(matches):
    """
    Returns a pandas data series intended to be used as a mask. Each row
    is True if it is not matched to a point in the same image (good) and
    False if it is (bad.)

    Parameters
    ----------
    matches : dataframe
              the matches dataframe stored along the edge of the graph
              containing matched points with columns containing:
              matched image name, query index, train index, and
              descriptor distance
    Returns
    -------
    : dataseries
      Intended to mask the matches dataframe. True means the row is not matched to a point in the same image
      and false the row is.
    """
    return matches.source_image != matches.destination_image


[docs]def mirroring_test(matches):
    """
    Compute and return a mask for the matches dataframe on each edge of the graph which
    will keep only entries in which there is both a source -> destination match and a destination ->
    source match.

    Parameters
    ----------
    matches : dataframe
              the matches dataframe stored along the edge of the graph
              containing matched points with columns containing:
              matched image name, query index, train index, and
              descriptor distance

    Returns
    -------
    duplicates : dataseries
                 Intended to mask the matches dataframe. Rows are True if the associated keypoint passes
                 the mirroring test and false otherwise. That is, if 1->2, 2->1, both rows will be True,
                 otherwise, they will be false. Keypoints with only one match will be False. Removes
                 duplicate rows.
    """
    duplicate_mask = matches.duplicated(subset=['source_idx', 'destination_idx', 'distance'], keep='last')
    return duplicate_mask