public double CompleteLink(Cluster c) { // I want to find the greatest distance between this cluster and the cluster given var longestDistance = double.MinValue; foreach (var thisPoint in this.Points) { foreach (var thatPoint in c.Points) { var tmpDist = Point.Distance(thisPoint, thatPoint); if (tmpDist > longestDistance) { longestDistance = tmpDist; } } } return longestDistance; }
public double MeanLink(Cluster c) { var thisMeanPoint = new Point { X = 0, Y = 0 }; foreach (var thisPoint in this.Points) { thisMeanPoint.X = thisMeanPoint.X + thisPoint.X; thisMeanPoint.Y = thisMeanPoint.Y + thisPoint.Y; } thisMeanPoint.X = thisMeanPoint.X / this.Points.Count; thisMeanPoint.Y = thisMeanPoint.Y / this.Points.Count; var thatMeanPoint = new Point { X = 0, Y = 0 }; foreach (var thatPoint in c.Points) { thatMeanPoint.X = thatMeanPoint.X + thatPoint.X; thatMeanPoint.Y = thatMeanPoint.Y + thatPoint.Y; } thatMeanPoint.X = thatMeanPoint.X / c.Points.Count; thatMeanPoint.Y = thatMeanPoint.Y / c.Points.Count; return Point.Distance(thisMeanPoint, thatMeanPoint); }
public double SingleLink(Cluster c) { // I want to find the shortest distance between this cluster and the cluster given var shortestDistance = double.MaxValue; foreach (var thisPoint in this.Points) { foreach (var thatPoint in c.Points) { var tmpDist = Point.Distance(thisPoint, thatPoint); if (tmpDist < shortestDistance) { shortestDistance = tmpDist; } } } return shortestDistance; }
public void MergeClusters(Cluster c) { foreach (var cPoint in c.Points) { this.AddPoint(cPoint); } }
private static void Links(IEnumerable<Point> points) { var singleLinkClusters = new List<Cluster>(); var completeLinkClusters = new List<Cluster>(); var meanLinkClusters = new List<Cluster>(); foreach (var point in points) { var cluster = new Cluster(); cluster.AddPoint(point); singleLinkClusters.Add(cluster); var cCluster = new Cluster(); cCluster.AddPoint(new Point { Id = point.Id, X = point.X, Y = point.Y }); completeLinkClusters.Add(cCluster); var mCluster = new Cluster(); mCluster.AddPoint(new Point { Id = point.Id, X = point.X, Y = point.Y }); meanLinkClusters.Add(mCluster); } //find the smallest distance between all clusters SingleLinkCluster(singleLinkClusters); CompleLinkCluster(completeLinkClusters); MeanLinkCluster(meanLinkClusters); }
private static Tuple<double, string, List<Cluster>> KMeansPlusPlus(IList<Point> points) { var phi = new int[points.Count]; //choose c1 in X arbitrarily var clusters = new Point[3]; clusters[0] = points[0]; //get a random //select 1 random points var threeCenterCost = 0.0; for (var i = 1; i < 3; i++) { var prevCluster = clusters[i - 1]; //choose ci from X with a probability proportional to d(x, phic-1(x))^2 var probTuples = new List<Tuple<Point, double>>(); for (var j = 0; j < points.Count; j++) { var dist = Point.Distance(points[j], prevCluster); probTuples.Add(new Tuple<Point, double>(points[j], dist * dist)); } var total = probTuples.Sum(t => t.Item2); var nextCluster = Random.NextDouble(); var runningTotal = (double)0; foreach (var tuple in probTuples) { runningTotal += tuple.Item2 / total; if (nextCluster <= runningTotal) { clusters[i] = tuple.Item1; threeCenterCost += tuple.Item2; break; } } } for (var i = 0; i < points.Count; i++) { if (clusters.Any(t => t.Id == points[i].Id)) { continue; } var m = double.MaxValue; for (var j = 0; j < clusters.Length; j++) { var dist = Point.Distance(points[i], clusters[j]); if (dist < m) { phi[i] = j; m = dist; } } } var sb = new StringBuilder(); for (var i = 0; i < clusters.Length; i++) { for (var j = 0; j < points.Count; j++) { if (phi[j] == i) { sb.AppendLine(points[j].ToString()); } } sb.AppendLine(); } var c1 = new Cluster(); var c2 = new Cluster(); var c3 = new Cluster(); c1.AddPoint(clusters[0]); c2.AddPoint(clusters[1]); c3.AddPoint(clusters[2]); return new Tuple<double, string, List<Cluster>>( threeCenterCost, sb.ToString(), new List<Cluster> { c1, c2, c3 }); }
private static void KMeans(ICollection<Point> points) { // initially choose k points that are likely to be in different clusters // to avoid too much variation in the results, choose c1 as the point a var p1 = points.First(t => t.Id == "a"); Point p2 = null; Point p3 = null; // find the point that has the largest distance from c1 var largestTuple = FindLargestDistanceFromPointTuple(points, p1); if (largestTuple.Item1 == null) { return; } p2 = largestTuple.Item1; var tempPoint = new Point { X = (p1.X + p2.X) / 2, Y = (p1.Y + p2.Y) / 2, }; largestTuple = FindLargestDistanceFromPointTuple(points, tempPoint); if (largestTuple.Item1 == null) { return; } p3 = largestTuple.Item1; var clusters = new List<Cluster>(); var initClusters = new[] { p1, p2, p3 }; foreach (var point in initClusters) { var cluster = new Cluster(); cluster.AddPoint(point); clusters.Add(cluster); } // copy to new array, this is a waste I know.. foreach (var point in points) { if (initClusters.Any(t => t.Id == point.Id)) { continue; } var closestCluster = new Tuple<Cluster, double>(null, double.MaxValue); foreach (var cluster in clusters) { var dist = Point.Distance(cluster.Centroid, point); if (dist < closestCluster.Item2) { closestCluster = new Tuple<Cluster, double>(cluster, dist); } } if (closestCluster.Item1 != null) { closestCluster.Item1.AddPoint(point); } } PrintClusters(clusters); }
private static Tuple<double, string, List<Cluster>> Gonzales3Center(IList<Point> points) { var phi = new int[points.Count]; var cost = 0.0; for (var i = 0; i < points.Count; i++) { phi[i] = 0; } var cPhi = new Point[3]; cPhi[0] = points[0]; var m1 = (double)0; cPhi[1] = points[0]; // find the furthest point from A foreach (var point in points) { var dist = Point.Distance(point, cPhi[0]); if (dist > m1) { m1 = dist; cPhi[1] = point; } } cost += m1; // update all the clusters now to be in either 1 or 2 for (var i = 0; i < points.Count; i++) { var point = points[i]; var dist1 = Point.Distance(point, cPhi[0]); var dist2 = Point.Distance(point, cPhi[1]); if (dist1 > dist2) { phi[i] = 1; } } var tempCenter = new Point { X = (cPhi[0].X + cPhi[1].X) / 2, Y = (cPhi[0].Y + cPhi[1].Y) / 2 }; var furthestCluster = GonzalezFurthestCluster(points, phi, tempCenter); cPhi[2] = points[furthestCluster.Item1]; //var furthestCluster1 = GonzalezFurthestCluster(points, phi, cPhi[0]); //var furthestCluster2 = GonzalezFurthestCluster(points, phi, cPhi[1]); //if (furthestCluster1.Item2 > furthestCluster2.Item2) //{ // cPhi[2] = points[furthestCluster1.Item1]; //} //else //{ // cPhi[2] = points[furthestCluster2.Item1]; //} for (var j = 0; j < points.Count; j++) { var m = double.MaxValue; for (var i = 0; i < 3; i++) { var dist = Point.Distance(points[j], cPhi[i]); if (dist < m) { m = dist; phi[j] = i; } } } cost += m1; var sb = new StringBuilder(); for (var i = 0; i < cPhi.Length; i++) { for (var j = 0; j < points.Count; j++) { if (phi[j] == i) { sb.AppendLine(points[j].ToString()); } } sb.AppendLine(); } var c1 = new Cluster(); c1.AddPoint(cPhi[0]); var c2 = new Cluster(); c2.AddPoint(cPhi[1]); var c3 = new Cluster(); c3.AddPoint(cPhi[2]); return new Tuple<double, string, List<Cluster>>(cost, sb.ToString(), new List<Cluster> { c1, c2, c3 }); }