public KMeansClusteringSolution(KMeansClusteringModel model, IClusteringProblemData problemData) : base(model, problemData) { double trainingIntraClusterSumOfSquares = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TrainingIndices); double testIntraClusterSumOfSquares = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TestIndices); this.Add(new Result(TrainingIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the training partition to the cluster center (is minimized by k-Means).", new DoubleValue(trainingIntraClusterSumOfSquares))); this.Add(new Result(TestIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the test partition to the cluster center (is minimized by k-Means).", new DoubleValue(testIntraClusterSumOfSquares))); }
public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable<int> rows) { List<int> clusterValues = model.GetClusterValues(dataset, rows).ToList(); List<string> allowedInputVariables = model.AllowedInputVariables.ToList(); int nCols = allowedInputVariables.Count; Dictionary<int, List<double[]>> clusterPoints = new Dictionary<int, List<double[]>>(); Dictionary<int, double[]> clusterMeans = new Dictionary<int, double[]>(); foreach (var clusterValue in clusterValues.Distinct()) { clusterPoints.Add(clusterValue, new List<double[]>()); } // collect points of clusters int clusterValueIndex = 0; foreach (var row in rows) { double[] p = new double[allowedInputVariables.Count]; for (int i = 0; i < nCols; i++) { p[i] = dataset.GetDoubleValue(allowedInputVariables[i], row); } clusterPoints[clusterValues[clusterValueIndex++]].Add(p); } // calculate cluster means foreach (var pair in clusterPoints) { double[] mean = new double[nCols]; foreach (var p in pair.Value) { for (int i = 0; i < nCols; i++) { mean[i] += p[i]; } } for (int i = 0; i < nCols; i++) { mean[i] /= pair.Value.Count; } clusterMeans[pair.Key] = mean; } // calculate distances double allCenterDistances = 0; foreach (var pair in clusterMeans) { double[] mean = pair.Value; double centerDistances = 0; foreach (var clusterPoint in clusterPoints[pair.Key]) { double centerDistance = 0; for (int i = 0; i < nCols; i++) { double d = mean[i] - clusterPoint[i]; d = d * d; centerDistance += d; } centerDistances += centerDistance; } allCenterDistances += centerDistances; } return allCenterDistances; }
private KMeansClusteringModel(KMeansClusteringModel original, Cloner cloner) : base(original, cloner) { this.allowedInputVariables = (string[])original.allowedInputVariables.Clone(); this.centers = new List <double[]>(original.Centers); }
public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable <int> rows) { List <int> clusterValues = model.GetClusterValues(dataset, rows).ToList(); List <string> allowedInputVariables = model.AllowedInputVariables.ToList(); int nCols = allowedInputVariables.Count; Dictionary <int, List <double[]> > clusterPoints = new Dictionary <int, List <double[]> >(); Dictionary <int, double[]> clusterMeans = new Dictionary <int, double[]>(); foreach (var clusterValue in clusterValues.Distinct()) { clusterPoints.Add(clusterValue, new List <double[]>()); } // collect points of clusters int clusterValueIndex = 0; foreach (var row in rows) { double[] p = new double[allowedInputVariables.Count]; for (int i = 0; i < nCols; i++) { p[i] = dataset.GetDoubleValue(allowedInputVariables[i], row); } clusterPoints[clusterValues[clusterValueIndex++]].Add(p); } // calculate cluster means foreach (var pair in clusterPoints) { double[] mean = new double[nCols]; foreach (var p in pair.Value) { for (int i = 0; i < nCols; i++) { mean[i] += p[i]; } } for (int i = 0; i < nCols; i++) { mean[i] /= pair.Value.Count; } clusterMeans[pair.Key] = mean; } // calculate distances double allCenterDistances = 0; foreach (var pair in clusterMeans) { double[] mean = pair.Value; double centerDistances = 0; foreach (var clusterPoint in clusterPoints[pair.Key]) { double centerDistance = 0; for (int i = 0; i < nCols; i++) { double d = mean[i] - clusterPoint[i]; d = d * d; centerDistance += d; } centerDistances += centerDistance; } allCenterDistances += centerDistances; } return(allCenterDistances); }
private KMeansClusteringModel(KMeansClusteringModel original, Cloner cloner) : base(original, cloner) { this.allowedInputVariables = (string[])original.allowedInputVariables.Clone(); this.centers = new List<double[]>(original.Centers); }