This is a Java Program to implement Lloyd’s Algorithm. The LBG-algorithm or Lloyd’s algorithm allows clustering of vectors of any dimension. This is helpful for example for image classification when using the SIFT or SURF algorithms. It might be also useful if you want to cluster a large amount of points on a map.
Here is the source code of the Java Program to Implement Lloyd’s Algorithm. The Java program is successfully compiled and run on a Windows system. The program output is also shown below.
//This is a java program to implement Lloyd’s Algorithm import java.util.ArrayList; public class GenLloyd { protected double[][] samplePoints; protected double[][] clusterPoints; int[] pointApproxIndices; int pointDimension = 0; protected double epsilon = 0.0005; protected double avgDistortion = 0.0; /** * Create Generalized Lloyd object with an array of sample points */ public GenLloyd(double[][] samplePoints) { this.setSamplePoints(samplePoints); } /** * Return epsilon parameter (accuracy) */ public double getEpsilon() { return epsilon; } /** * Set epsilon parameter (accuracy). Should be a small number 0.0 < epsilon * < 0.1 */ public void setEpsilon(double epsilon) { this.epsilon = epsilon; } /** * Set array of sample points */ public void setSamplePoints(double[][] samplePoints) { if (samplePoints.length > 0) { this.samplePoints = samplePoints; this.pointDimension = samplePoints[0].length; } } /** * Get array of sample points */ public double[][] getSamplePoints() { return samplePoints; } /** * Get calculated cluster points. <numClusters> cluster points will be * calculated and returned */ public double[][] getClusterPoints(int numClusters) { this.calcClusters(numClusters); return clusterPoints; } protected void calcClusters(int numClusters) { // initialize with first cluster clusterPoints = new double[1][pointDimension]; double[] newClusterPoint = initializeClusterPoint(samplePoints); clusterPoints[0] = newClusterPoint; if (numClusters > 1) { // calculate initial average distortion avgDistortion = 0.0; for (double[] samplePoint : samplePoints) { avgDistortion += calcDist(samplePoint, newClusterPoint); } avgDistortion /= (double) (samplePoints.length * pointDimension); // set up array of point approximization indices pointApproxIndices = new int[samplePoints.length]; // split the clusters int i = 1; do { i = splitClusters(); } while (i < numClusters); } } protected int splitClusters() { int newClusterPointSize = 2; if (clusterPoints.length != 1) { newClusterPointSize = clusterPoints.length * 2; } // split clusters double[][] newClusterPoints = new double[newClusterPointSize][pointDimension]; int newClusterPointIdx = 0; for (double[] clusterPoint : clusterPoints) { newClusterPoints[newClusterPointIdx] = createNewClusterPoint( clusterPoint, -1); newClusterPoints[newClusterPointIdx + 1] = createNewClusterPoint( clusterPoint, +1); newClusterPointIdx += 2; } clusterPoints = newClusterPoints; // iterate to approximate cluster points // int iteration = 0; double curAvgDistortion = 0.0; do { curAvgDistortion = avgDistortion; // find the min values for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++) { double minDist = Double.MAX_VALUE; for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++) { double newMinDist = calcDist(samplePoints[pointIdx], clusterPoints[clusterPointIdx]); if (newMinDist < minDist) { minDist = newMinDist; pointApproxIndices[pointIdx] = clusterPointIdx; } } } // update codebook for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++) { double[] newClusterPoint = new double[pointDimension]; int num = 0; for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++) { if (pointApproxIndices[pointIdx] == clusterPointIdx) { addPointValues(newClusterPoint, samplePoints[pointIdx]); num++; } } if (num > 0) { multiplyPointValues(newClusterPoint, 1.0 / (double) num); clusterPoints[clusterPointIdx] = newClusterPoint; } } // update average distortion avgDistortion = 0.0; for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++) { avgDistortion += calcDist(samplePoints[pointIdx], clusterPoints[pointApproxIndices[pointIdx]]); } avgDistortion /= (double) (samplePoints.length * pointDimension); } while (((curAvgDistortion - avgDistortion) / curAvgDistortion) > epsilon); return clusterPoints.length; } protected double[] initializeClusterPoint(double[][] pointsInCluster) { // calculate point sum double[] clusterPoint = new double[pointDimension]; for (int numPoint = 0; numPoint < pointsInCluster.length; numPoint++) { addPointValues(clusterPoint, pointsInCluster[numPoint]); } // calculate average multiplyPointValues(clusterPoint, 1.0 / (double) pointsInCluster.length); return clusterPoint; } protected double[] createNewClusterPoint(double[] clusterPoint, int epsilonFactor) { double[] newClusterPoint = new double[pointDimension]; addPointValues(newClusterPoint, clusterPoint); multiplyPointValues(newClusterPoint, 1.0 + (double) epsilonFactor * epsilon); return newClusterPoint; } protected double calcDist(double[] v1, double[] v2) { double distSum = 0.0; for (int pointIdx = 0; pointIdx < v1.length; pointIdx++) { double absDist = Math.abs(v1[pointIdx] - v2[pointIdx]); distSum += absDist * absDist; } return distSum; } protected void addPointValues(double[] v1, double[] v2) { for (int pointIdx = 0; pointIdx < v1.length; pointIdx++) { v1[pointIdx] += v2[pointIdx]; } } protected void multiplyPointValues(double[] v1, double f) { for (int pointIdx = 0; pointIdx < v1.length; pointIdx++) { v1[pointIdx] *= f; } } public static void main(String[] args) { ArrayList<double[]> points = new ArrayList<double[]>(); // points.add(arrayOf(-1.5, -1.5)); points.add(arrayOf(-1.5, 2.0, 5.0)); points.add(arrayOf(-2.0, -2.0, 0.0)); points.add(arrayOf(1.0, 1.0, 2.0)); points.add(arrayOf(1.5, 1.5, 1.2)); points.add(arrayOf(1.0, 2.0, 5.6)); points.add(arrayOf(1.0, -2.0, -2.0)); points.add(arrayOf(1.0, -3.0, -2.0)); points.add(arrayOf(1.0, -2.5, -4.5)); GenLloyd gl = new GenLloyd(points.toArray(new double[points.size()][2])); double[][] results = gl.getClusterPoints(4); for (double[] point : results) { System.out.println("Cluster " + point[0] + ", " + point[1] + ", " + point[2]); } } private static double[] arrayOf(double x, double y, double z) { double[] a = new double[3]; a[0] = x; a[1] = y; a[2] = z; return a; } }
Output:
$ javac GenLloyd.java $ java GenLloyd Cluster -2.0, -2.0, 0.0 Cluster 1.0, -2.5, -2.833333333333333 Cluster 1.25, 1.25, 1.6 Cluster -0.25, 2.0, 5.3
Related posts:
Converting a Stack Trace to a String in Java
REST Web service: Basic Authentication trong Jersey 2.x
Java Program to Implement Direct Addressing Tables
Java Program to Test Using DFS Whether a Directed Graph is Weakly Connected or Not
@Before vs @BeforeClass vs @BeforeEach vs @BeforeAll
Spring REST API + OAuth2 + Angular
Spring Security – Reset Your Password
Java Program to Solve Tower of Hanoi Problem using Stacks
Java Program to Implement Binomial Heap
Hướng dẫn Java Design Pattern – MVC
New Stream Collectors in Java 9
New Features in Java 13
Java Program to Implement Unrolled Linked List
A Guide to the ViewResolver in Spring MVC
Introduction to Spring Security Expressions
Spring Boot - Sending Email
Thao tác với tập tin và thư mục trong Java
Java Program to Implement ConcurrentSkipListMap API
Java Program to Find kth Largest Element in a Sequence
Spring Cloud – Tracing Services with Zipkin
Java Program to Generate Random Numbers Using Multiply with Carry Method
Map to String Conversion in Java
Apache Camel with Spring Boot
Java Program to Implement EnumMap API
Java – Reader to String
Debug a JavaMail Program
Java Program to Check for balanced parenthesis by using Stacks
Guide to Apache Commons CircularFifoQueue
Java Program to Implement Insertion Sort
Java Program to Implement an Algorithm to Find the Global min Cut in a Graph
Tính đóng gói (Encapsulation) trong java
Java Program to Implement Fisher-Yates Algorithm for Array Shuffling