/// <summary> /// Silhouette width: /// s(i) = b(i) - a(i) / max(a(i), b(i)) /// d(i, c) = avg(all j in cluster c)( | x(i) - x(j) |) /// a(i) = d(i, c) where c is i's cluster /// b(i) = lowest d(i, c) for all c which aren't i's cluster /// f**k it, just use R. /// actually ballocks, can't, R can't handle big matrices /// </summary> public static void CalculateSilhouette(ResultClusterer.ForStat ass, IReadOnlyList <Cluster> clusters, out double silhouette, out Cluster nearestCluster) { // a = d(i, c) where c is i's cluster double a = CalculateSilhouette_CalculateD(ass, ass.Assignment.Cluster.Assignments.List); Cluster nearest = null; double b = double.MaxValue; foreach (Cluster cluster in clusters) { if (cluster != ass.Assignment.Cluster) { double d = CalculateSilhouette_CalculateD(ass, cluster.Assignments.List); if (d < b) { b = d; nearest = cluster; } } } double s = (b - a) / Math.Max(a, b); silhouette = s; nearestCluster = nearest; }
/// <summary> /// Used by CalculateSilhouette. /// Calculates d(i). /// </summary> private static double CalculateSilhouette_CalculateD(ResultClusterer.ForStat stat, IReadOnlyList <Assignment> ass2s) { double[] result = new double[ass2s.Count]; for (int index = 0; index < ass2s.Count; index++) { result[index] = stat.DistanceMatrix.Values[stat.Assignment.Vector.Index, ass2s[index].Vector.Index]; } if (result.Length == 0) { return(double.NaN); } return(result.Average()); }