Source code for viscid.calculator.cluster

#!/usr/bin/env python
"""Functions for clustering clouds of neighboring points"""

from __future__ import print_function, division
from itertools import count

import numpy as np


__all__ = ['distance_to_clusters', 'find_clusters']


[docs]def distance_to_clusters(point, clusters, alt=()): """L2 distance between point and clusters""" x, y = point dists = np.zeros((len(clusters),), dtype='f') wraps = np.zeros((len(clusters), len(point)), dtype='int') for i, (clx, cly) in enumerate(clusters): clx, cly = np.asarray(clx), np.asarray(cly) dists[i] = np.min(np.sqrt((clx - x)**2 + (cly - y)**2)) for altx, alty in alt: newdist = np.min(np.sqrt((clx - (x + altx))**2 + (cly - (y + alty))**2)) if newdist < dists[i]: dists[i] = newdist wraps[i, :] = (altx, alty) wraps = np.sign(wraps) return dists, wraps
[docs]def find_clusters(indx, indy, x, y, multiple=True, periodic=(False, False), diagonals=True): """Cluster and average groups of neighboring points TODO: If absolutely necessary, could do some K-means clustering here by calling into scikit-learn. Args: indx (sequence): list of x indices indy (sequence): list of y indices x (sequence): x coordinate array y (sequence): y coordinate array multiple (bool): If False, average all points as a single cluster periodic (sequence): indicate whether that direction is periodic, and if so, whether the coordinate arrays are overlapped or not. Values can be True, False, or '+'. '+' indicates that x[0] and x[-1] are not colocated, so assume they're dx apart where dx = x[-1] - x[-2]. diagonals (bool): if true, then diagonal points are considered neighbors Returns: ndarray: 2xN for N clusters """ assert len(indx) == len(indy) inds = np.array([indx, indy]).T crds = [np.asarray(x), np.asarray(y)] thresh = 1.5 if diagonals else 1.1 # setup some bookkeeping about periodicity pd = [False if pi == "0" else bool(pi) for pi in periodic] pdN = [len(_x) for _x in crds] pdL = np.array([_x[-1] - _x[0] for _x in crds]) xh = np.array([_x[-1] for _x in crds]) xl = np.array([_x[0] for _x in crds]) for i, p in enumerate(periodic): if not pd[i]: pdN[i] = 0 pdL[i] = 0.0 elif str(p).strip() == '+': pdL[i] += crds[i][-1] - crds[i][-2] alt = [] if pd[0]: alt += [(-pdN[0], 0), (pdN[0], 0)] if pd[1]: alt += [(0, -pdN[1]), (0, pdN[1])] if pd[0] and pd[1]: alt += [(-pdN[0], -pdN[1]), (pdN[0], pdN[1])] # cli == cluster indices, clx == cluster locations clx = [] if multiple: clusters = [] cli = [] for i, (ix, iy) in enumerate(inds): ind = inds[i] dists, wraps = distance_to_clusters((ix, iy), cli, alt=alt) touching = dists < thresh ntouching = np.sum(touching) if ntouching > 1: # if this point touches > 1 cluster, then merge the other # clusters clusters.append([i]) cli.append([[ix], [iy]]) clx.append([[x[ix]], [y[iy]]]) for k in reversed(np.flatnonzero(touching)): clusters[-1] += clusters.pop(k) icli = cli.pop(k) iclx = clx.pop(k) for j, N, L in zip(count(), pdN, pdL): iclij = np.asarray(icli[j]) - wraps[k][j] * N iclxj = np.asarray(iclx[j]) - wraps[k][j] * L cli[-1][j] += iclij.tolist() clx[-1][j] += iclxj.tolist() elif ntouching == 1: icluster = np.argmax(touching) wrap = wraps[icluster] clusters[icluster].append(i) for j, N, L in zip(count(), pdN, pdL): cli[icluster][j].append(ind[j] + wrap[j] * N) clx[icluster][j].append(crds[j][ind[j]] + wrap[j] * L) else: clusters.append([i]) cli.append([[ix], [iy]]) clx.append([[x[ix]], [y[iy]]]) else: clx = [[x[inds[0, :]], y[inds[1, :]]]] pts = np.array([np.average(clxi, axis=1) for clxi in clx]).reshape((-1, 2)) pts -= pdL * np.floor((pts - xl) / (xh - xl)) return pts.T
def _main(): # test clustering; x == theta && y == phi Nx, Lx = 16, 180.0 Ny, Ly = 32, 360.0 x = np.linspace(0, Lx, Nx) y = np.linspace(0, Ly, Ny, endpoint=True) ix = [0, 0, Nx - 1, Nx - 1, Nx - 4] # pylint: disable=bad-whitespace iy = [0, Ny - 1, 0, Ny - 1, Ny - 4] # pylint: disable=bad-whitespace pts = fin_clusters(ix, iy, x, y, multiple=True, periodic="01") print(pts.shape) print(pts) if __name__ == "__main__": import sys sys.exit(_main()) ## ## EOF ##