public KMeansClusteringSolution(KMeansClusteringModel model, IClusteringProblemData problemData)
   : base(model, problemData) {
   double trainingIntraClusterSumOfSquares = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TrainingIndices);
   double testIntraClusterSumOfSquares = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TestIndices);
   this.Add(new Result(TrainingIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the training partition to the cluster center (is minimized by k-Means).", new DoubleValue(trainingIntraClusterSumOfSquares)));
   this.Add(new Result(TestIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the test partition to the cluster center (is minimized by k-Means).", new DoubleValue(testIntraClusterSumOfSquares)));
 }
        public KMeansClusteringSolution(KMeansClusteringModel model, IClusteringProblemData problemData)
            : base(model, problemData)
        {
            double trainingIntraClusterSumOfSquares = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TrainingIndices);
            double testIntraClusterSumOfSquares     = KMeansClusteringUtil.CalculateIntraClusterSumOfSquares(model, problemData.Dataset, problemData.TestIndices);

            this.Add(new Result(TrainingIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the training partition to the cluster center (is minimized by k-Means).", new DoubleValue(trainingIntraClusterSumOfSquares)));
            this.Add(new Result(TestIntraClusterSumOfSquaresResultName, "The sum of squared distances of points of the test partition to the cluster center (is minimized by k-Means).", new DoubleValue(testIntraClusterSumOfSquares)));
        }
    public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable<int> rows) {
      List<int> clusterValues = model.GetClusterValues(dataset, rows).ToList();
      List<string> allowedInputVariables = model.AllowedInputVariables.ToList();
      int nCols = allowedInputVariables.Count;
      Dictionary<int, List<double[]>> clusterPoints = new Dictionary<int, List<double[]>>();
      Dictionary<int, double[]> clusterMeans = new Dictionary<int, double[]>();
      foreach (var clusterValue in clusterValues.Distinct()) {
        clusterPoints.Add(clusterValue, new List<double[]>());
      }

      // collect points of clusters
      int clusterValueIndex = 0;
      foreach (var row in rows) {
        double[] p = new double[allowedInputVariables.Count];
        for (int i = 0; i < nCols; i++) {
          p[i] = dataset.GetDoubleValue(allowedInputVariables[i], row);
        }
        clusterPoints[clusterValues[clusterValueIndex++]].Add(p);
      }
      // calculate cluster means
      foreach (var pair in clusterPoints) {
        double[] mean = new double[nCols];
        foreach (var p in pair.Value) {
          for (int i = 0; i < nCols; i++) {
            mean[i] += p[i];
          }
        }
        for (int i = 0; i < nCols; i++) {
          mean[i] /= pair.Value.Count;
        }
        clusterMeans[pair.Key] = mean;
      }
      // calculate distances
      double allCenterDistances = 0;
      foreach (var pair in clusterMeans) {
        double[] mean = pair.Value;
        double centerDistances = 0;
        foreach (var clusterPoint in clusterPoints[pair.Key]) {
          double centerDistance = 0;
          for (int i = 0; i < nCols; i++) {
            double d = mean[i] - clusterPoint[i];
            d = d * d;
            centerDistance += d;
          }
          centerDistances += centerDistance;
        }
        allCenterDistances += centerDistances;
      }
      return allCenterDistances;
    }
 private KMeansClusteringModel(KMeansClusteringModel original, Cloner cloner)
     : base(original, cloner)
 {
     this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
     this.centers = new List <double[]>(original.Centers);
 }
Example #5
0
        public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable <int> rows)
        {
            List <int>    clusterValues         = model.GetClusterValues(dataset, rows).ToList();
            List <string> allowedInputVariables = model.AllowedInputVariables.ToList();
            int           nCols = allowedInputVariables.Count;
            Dictionary <int, List <double[]> > clusterPoints = new Dictionary <int, List <double[]> >();
            Dictionary <int, double[]>         clusterMeans  = new Dictionary <int, double[]>();

            foreach (var clusterValue in clusterValues.Distinct())
            {
                clusterPoints.Add(clusterValue, new List <double[]>());
            }

            // collect points of clusters
            int clusterValueIndex = 0;

            foreach (var row in rows)
            {
                double[] p = new double[allowedInputVariables.Count];
                for (int i = 0; i < nCols; i++)
                {
                    p[i] = dataset.GetDoubleValue(allowedInputVariables[i], row);
                }
                clusterPoints[clusterValues[clusterValueIndex++]].Add(p);
            }
            // calculate cluster means
            foreach (var pair in clusterPoints)
            {
                double[] mean = new double[nCols];
                foreach (var p in pair.Value)
                {
                    for (int i = 0; i < nCols; i++)
                    {
                        mean[i] += p[i];
                    }
                }
                for (int i = 0; i < nCols; i++)
                {
                    mean[i] /= pair.Value.Count;
                }
                clusterMeans[pair.Key] = mean;
            }
            // calculate distances
            double allCenterDistances = 0;

            foreach (var pair in clusterMeans)
            {
                double[] mean            = pair.Value;
                double   centerDistances = 0;
                foreach (var clusterPoint in clusterPoints[pair.Key])
                {
                    double centerDistance = 0;
                    for (int i = 0; i < nCols; i++)
                    {
                        double d = mean[i] - clusterPoint[i];
                        d = d * d;
                        centerDistance += d;
                    }
                    centerDistances += centerDistance;
                }
                allCenterDistances += centerDistances;
            }
            return(allCenterDistances);
        }
 private KMeansClusteringModel(KMeansClusteringModel original, Cloner cloner)
   : base(original, cloner) {
   this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
   this.centers = new List<double[]>(original.Centers);
 }