''' Implementation of DBSCAN* '''

import matplotlib.pyplot as plt
import random
import math

def dbscan(points, epsilon, min_points):
    def dist(p, q):
        return sum((pi - qi) ** 2 for pi, qi in zip(p, q))

    def close(p, q):
        return dist(p, q) <= epsilon ** 2

    core, noise, clusters, edges = [], [], [], []
    for p in points:
        if sum(close(p, q) for q in points) >= min_points:
            core.append(p)
        else:
            noise.append(p)

    unclustered = list(core)
    while unclustered:
        cluster = [unclustered.pop()]
        for p in cluster:  # cluster grows while scanning
            for q in list(unclustered):
                if close(p, q):
                    cluster.append(q)
                    unclustered.remove(q)
                    edges.append((p, q))        
        clusters.append(cluster)

    if False:
        # Draw edges explored for connecting core points
        for p in noise:
            q = min(core, key=lambda q: dist(p, q))
            if close(p, q):
                plt.plot(*zip(p, q), 'k:')
        if noise:
            plt.plot(*zip(*noise), 'k.')
        for edge in edges:
            plt.plot(*zip(*edge), 'r-')
        for cluster in clusters:
            plt.plot(*zip(*cluster), 'o')
            plt.plot(*cluster[0], 'ks')
    else:
        # Draw all close edge
        for i, p in enumerate(points):
            for q in points[:i]:
                if close(p, q):
                    if p in noise or q in noise:
                        plt.plot(*zip(p, q), 'k:', alpha=0.5)
                    else:
                        plt.plot(*zip(p, q), 'k-', alpha=0.5)
        if noise:
            plt.plot(*zip(*noise), 'k.')
        for cluster in clusters:
            plt.plot(*zip(*cluster), '.')
        
    plt.gca().set_aspect('equal')
    plt.show()
    
    return clusters, noise


def random_points(N, mu=1, sigma=1):
    points = []
    for _ in range(N):
        angle = 2 * math.pi * random.random()
        radius = random.normalvariate(mu, sigma)  # normal distribution
        points.append((radius * math.cos(angle), radius * math.sin(angle)))
    return points


points = (random_points(300, mu=30, sigma=4) +
          random_points(100, mu=10, sigma=4))

clusters, noise = dbscan(points, epsilon=5, min_points=5)
