Example #1
0
        internal static bool UpdateClusterAssignment(ClusterResult result, ClusterData data, ClusterAlgorithmOptions options)
        {
            bool updated = false;

            //iterate each data point
            for (int row = 0; row < data.RowCount; row++)
            {
                //determine distance to each cluster
                double[] distances = new double[options.NumberOfClusters];
                for (int cluster = 0; cluster < options.NumberOfClusters; cluster++)
                {
                    distances[cluster] = GetDistance(
                        result.ClusterMeanValues[cluster],
                        result.NormalizedData[row],
                        data.AttributeWeights
                        );
                }
                //update assignment with best found cluster
                int closestCluster = FindMinimumIndex(distances);
                if (closestCluster != result.ClusterAssignment[row])
                {
                    updated = true;
                    result.ClusterAssignment[row] = closestCluster;
                }
            }
            return(updated);
        }
Example #2
0
        internal static bool UpdateClusterMeanValues(ClusterResult result, ClusterData data, ClusterAlgorithmOptions options)
        {
            //create array for cluster totals
            double[][] totals = new double[options.NumberOfClusters][];
            for (int c = 0; c < options.NumberOfClusters; c++)
            {
                totals[c] = new double[data.ColumnCount + 1];//additional col for assignment counting
            }
            //fill that cluster total array and count assigned points
            for (int row = 0; row < result.ClusterAssignment.Length; row++)
            {
                int cluster = result.ClusterAssignment[row];
                totals[cluster][data.ColumnCount] += 1;//count number of assigned points

                for (int col = 0; col < data.ColumnCount; col++)
                {
                    totals[cluster][col] += data.AttributeWeights[col] * result.NormalizedData[row][col];
                }
            }

            //update the mean by dividing through number of assigned points
            for (int cluster = 0; cluster < options.NumberOfClusters; cluster++)
            {
                if (totals[cluster][data.ColumnCount] == 0)
                {
                    return(false);
                }
                for (int col = 0; col < data.ColumnCount; col++)
                {
                    result.ClusterMeanValues[cluster][col] =
                        totals[cluster][col] / totals[cluster][data.ColumnCount];
                }
            }
            return(true);
        }
Example #3
0
        internal static ClusterResult InitializeClusters(ClusterData input, ClusterAlgorithmOptions options)
        {
            ClusterResult result = new ClusterResult();
            Random        random = new Random(options.RandomSeed);

            result.ClusterAssignment = new int[input.RowCount];
            // ensure each cluster gets one data point
            for (int row = 0; row < options.NumberOfClusters; row++)
            {
                result.ClusterAssignment[row] = row;
            }
            //assign rest of data points randomly
            for (int row = options.NumberOfClusters; row < input.RowCount; row++)
            {
                result.ClusterAssignment[row] = random.Next(0, options.NumberOfClusters);
            }
            //initialize mean value matrix (without data)
            result.ClusterMeanValues = new double[options.NumberOfClusters][];
            int colCount = input.RawData[0].Length;

            for (int cluster = 0; cluster < options.NumberOfClusters; cluster++)
            {
                result.ClusterMeanValues[cluster] = new double[colCount];
            }
            return(result);
        }
Example #4
0
        public static ClusterResult Analyze(ClusterData input, ClusterAlgorithmOptions options)
        {
            bool clusterIsAssigned;
            bool clusterResultChanged = false;
            bool maxIterationsReached = false;

            ClusterResult result = InitializeClusters(input, options);

            result.NormalizedData = (options.NormalizeData) ? Normalize(input) : input.RawData;

            do
            {
                clusterIsAssigned = UpdateClusterMeanValues(result, input, options);
                if (clusterIsAssigned)
                {
                    clusterResultChanged = UpdateClusterAssignment(result, input, options);
                    result.Iterations++;
                    maxIterationsReached = (result.Iterations >= options.MaximumIterationCount);
                }
            } while (clusterIsAssigned && clusterResultChanged && !maxIterationsReached);

            if (!clusterIsAssigned)
            {
                result.TerminationStatus = Status.EmptyClusters;
            }
            else if (maxIterationsReached)
            {
                result.TerminationStatus = Status.MaxIterationsReached;
            }
            else if (!clusterResultChanged)
            {
                result.TerminationStatus = Status.Convergence;
            }

            if (clusterIsAssigned)
            {
                CalculateObjective(result, input, options);
            }

            return(result);
        }
Example #5
0
        private static void CalculateObjective(ClusterResult result, ClusterData input, ClusterAlgorithmOptions options)
        {
            //cluster-point-distance
            double[] clusterTotals  = new double[options.NumberOfClusters];
            int[]    clusterNumbers = new int[options.NumberOfClusters];
            for (int r = 0; r < input.RowCount; r++)
            {
                int clusterId = result.ClusterAssignment[r];
                var m         = result.ClusterMeanValues[clusterId];
                var d         = result.NormalizedData[r];
                var w         = input.AttributeWeights;
                clusterTotals[clusterId]  += GetDistance(m, d, w);
                clusterNumbers[clusterId] += 1;
            }
            double withinScatter = 0;

            for (int c = 0; c < options.NumberOfClusters; c++)
            {
                withinScatter += clusterNumbers[c] * clusterTotals[c];
            }
            result.TotalClusterToPointDistance = withinScatter;
        }