Java Program to Implement Lloyd’s Algorithm

This is a Java Program to implement Lloyd’s Algorithm. The LBG-algorithm or Lloyd’s algorithm allows clustering of vectors of any dimension. This is helpful for example for image classification when using the SIFT or SURF algorithms. It might be also useful if you want to cluster a large amount of points on a map.

Here is the source code of the Java Program to Implement Lloyd’s Algorithm. The Java program is successfully compiled and run on a Windows system. The program output is also shown below.

//This is a java program to implement Lloyd’s Algorithm
import java.util.ArrayList;
 
public class GenLloyd
{
    protected double[][] samplePoints;
    protected double[][] clusterPoints;
 
    int[] pointApproxIndices;
    int pointDimension = 0;
    protected double epsilon = 0.0005;
    protected double avgDistortion = 0.0;
 
    /**
     * Create Generalized Lloyd object with an array of sample points
     */
    public GenLloyd(double[][] samplePoints)
    {
        this.setSamplePoints(samplePoints);
    }
 
    /**
     * Return epsilon parameter (accuracy)
     */
    public double getEpsilon()
    {
        return epsilon;
    }
 
    /**
     * Set epsilon parameter (accuracy). Should be a small number 0.0 < epsilon
     * < 0.1
     */
    public void setEpsilon(double epsilon)
    {
        this.epsilon = epsilon;
    }
 
    /**
     * Set array of sample points
     */
    public void setSamplePoints(double[][] samplePoints)
    {
        if (samplePoints.length > 0)
        {
            this.samplePoints = samplePoints;
            this.pointDimension = samplePoints[0].length;
        }
    }
 
    /**
     * Get array of sample points
     */
    public double[][] getSamplePoints()
    {
        return samplePoints;
    }
 
    /**
     * Get calculated cluster points. <numClusters> cluster points will be
     * calculated and returned
     */
    public double[][] getClusterPoints(int numClusters)
    {
        this.calcClusters(numClusters);
 
        return clusterPoints;
    }
 
    protected void calcClusters(int numClusters)
    {
        // initialize with first cluster
        clusterPoints = new double[1][pointDimension];
 
        double[] newClusterPoint = initializeClusterPoint(samplePoints);
        clusterPoints[0] = newClusterPoint;
 
        if (numClusters > 1)
        {
            // calculate initial average distortion
            avgDistortion = 0.0;
            for (double[] samplePoint : samplePoints)
            {
                avgDistortion += calcDist(samplePoint, newClusterPoint);
            }
 
            avgDistortion /= (double) (samplePoints.length * pointDimension);
 
            // set up array of point approximization indices
            pointApproxIndices = new int[samplePoints.length];
 
            // split the clusters
            int i = 1;
            do
            {
                i = splitClusters();
            } while (i < numClusters);
        }
    }
 
    protected int splitClusters()
    {
        int newClusterPointSize = 2;
        if (clusterPoints.length != 1)
        {
            newClusterPointSize = clusterPoints.length * 2;
        }
 
        // split clusters
        double[][] newClusterPoints = new double[newClusterPointSize][pointDimension];
        int newClusterPointIdx = 0;
        for (double[] clusterPoint : clusterPoints)
        {
            newClusterPoints[newClusterPointIdx] = createNewClusterPoint(
                    clusterPoint, -1);
            newClusterPoints[newClusterPointIdx + 1] = createNewClusterPoint(
                    clusterPoint, +1);
 
            newClusterPointIdx += 2;
        }
 
        clusterPoints = newClusterPoints;
 
        // iterate to approximate cluster points
        // int iteration = 0;
        double curAvgDistortion = 0.0;
        do
        {
            curAvgDistortion = avgDistortion;
 
            // find the min values
            for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
            {
                double minDist = Double.MAX_VALUE;
                for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++)
                {
                    double newMinDist = calcDist(samplePoints[pointIdx],
                            clusterPoints[clusterPointIdx]);
                    if (newMinDist < minDist)
                    {
                        minDist = newMinDist;
                        pointApproxIndices[pointIdx] = clusterPointIdx;
                    }
                }
            }
 
            // update codebook
            for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++)
            {
                double[] newClusterPoint = new double[pointDimension];
                int num = 0;
                for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
                {
                    if (pointApproxIndices[pointIdx] == clusterPointIdx)
                    {
                        addPointValues(newClusterPoint, samplePoints[pointIdx]);
                        num++;
                    }
                }
 
                if (num > 0)
                {
                    multiplyPointValues(newClusterPoint, 1.0 / (double) num);
                    clusterPoints[clusterPointIdx] = newClusterPoint;
                }
            }
 
            // update average distortion
            avgDistortion = 0.0;
            for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
            {
                avgDistortion += calcDist(samplePoints[pointIdx],
                        clusterPoints[pointApproxIndices[pointIdx]]);
            }
 
            avgDistortion /= (double) (samplePoints.length * pointDimension);
 
        } while (((curAvgDistortion - avgDistortion) / curAvgDistortion) > epsilon);
 
        return clusterPoints.length;
    }
 
    protected double[] initializeClusterPoint(double[][] pointsInCluster)
    {
        // calculate point sum
        double[] clusterPoint = new double[pointDimension];
        for (int numPoint = 0; numPoint < pointsInCluster.length; numPoint++)
        {
            addPointValues(clusterPoint, pointsInCluster[numPoint]);
        }
 
        // calculate average
        multiplyPointValues(clusterPoint, 1.0 / (double) pointsInCluster.length);
 
        return clusterPoint;
    }
 
    protected double[] createNewClusterPoint(double[] clusterPoint,
            int epsilonFactor)
    {
        double[] newClusterPoint = new double[pointDimension];
        addPointValues(newClusterPoint, clusterPoint);
        multiplyPointValues(newClusterPoint, 1.0 + (double) epsilonFactor
                * epsilon);
 
        return newClusterPoint;
    }
 
    protected double calcDist(double[] v1, double[] v2)
    {
        double distSum = 0.0;
        for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
        {
            double absDist = Math.abs(v1[pointIdx] - v2[pointIdx]);
            distSum += absDist * absDist;
        }
 
        return distSum;
    }
 
    protected void addPointValues(double[] v1, double[] v2)
    {
        for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
        {
            v1[pointIdx] += v2[pointIdx];
        }
    }
 
    protected void multiplyPointValues(double[] v1, double f)
    {
        for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
        {
            v1[pointIdx] *= f;
        }
    }
 
    public static void main(String[] args)
    {
        ArrayList<double[]> points = new ArrayList<double[]>();
 
        // points.add(arrayOf(-1.5, -1.5));
        points.add(arrayOf(-1.5, 2.0, 5.0));
        points.add(arrayOf(-2.0, -2.0, 0.0));
        points.add(arrayOf(1.0, 1.0, 2.0));
        points.add(arrayOf(1.5, 1.5, 1.2));
        points.add(arrayOf(1.0, 2.0, 5.6));
        points.add(arrayOf(1.0, -2.0, -2.0));
        points.add(arrayOf(1.0, -3.0, -2.0));
        points.add(arrayOf(1.0, -2.5, -4.5));
 
        GenLloyd gl = new GenLloyd(points.toArray(new double[points.size()][2]));
 
        double[][] results = gl.getClusterPoints(4);
        for (double[] point : results)
        {
            System.out.println("Cluster " + point[0] + ", " + point[1] + ", "
                    + point[2]);
        }
    }
 
    private static double[] arrayOf(double x, double y, double z)
    {
        double[] a = new double[3];
        a[0] = x;
        a[1] = y;
        a[2] = z;
 
        return a;
    }
}

Output:

$ javac GenLloyd.java
$ java GenLloyd
 
Cluster -2.0, -2.0, 0.0
Cluster 1.0, -2.5, -2.833333333333333
Cluster 1.25, 1.25, 1.6
Cluster -0.25, 2.0, 5.3

Related posts:

Tính đóng gói (Encapsulation) trong java
The SpringJUnitConfig and SpringJUnitWebConfig Annotations in Spring 5
REST Web service: HTTP Status Code và xử lý ngoại lệ RESTful web service với Jersey 2.x
Java Program to Generate a Graph for a Given Fixed Degree Sequence
Giới thiệu về Stream API trong Java 8
Java – Byte Array to Reader
Hướng dẫn Java Design Pattern – DAO
Java Program to Use Above Below Primitive to Test Whether Two Lines Intersect
Guava – Join and Split Collections
Apache Commons Collections MapUtils
Template Engines for Spring
Java 8 – Powerful Comparison with Lambdas
Allow user:password in URL
Java Map With Case-Insensitive Keys
Java Program to Check whether Graph is a Bipartite using DFS
Exploring the New Spring Cloud Gateway
Java String to InputStream
Java Program to Check whether Directed Graph is Connected using DFS
Java Program to Find SSSP (Single Source Shortest Path) in DAG (Directed Acyclic Graphs)
Java Program to Generate All Subsets of a Given Set in the Gray Code Order
Java Program to Implement Bresenham Line Algorithm
Hướng dẫn Java Design Pattern – Bridge
Giới thiệu thư viện Apache Commons Chain
Using JWT with Spring Security OAuth (legacy stack)
Java Program to Compute the Volume of a Tetrahedron Using Determinants
Creating a Custom Starter with Spring Boot
Java Program to Implement LinkedTransferQueue API
Default Password Encoder in Spring Security 5
Guide to the Synchronized Keyword in Java
Guava CharMatcher
Java Program to Test Using DFS Whether a Directed Graph is Strongly Connected or Not
Vòng lặp for, while, do-while trong Java