// Define the performance function of the
        // system under study (in this context,
        // the Davies–Bouldin index of the data subset
        // defined by the combination represented by
        // parameter x.
        static double Performance(DoubleMatrix x)
        {
            IndexCollection selected = x.FindNonzero();

            double performance =
                IndexPartition.DaviesBouldinIndex(
                    data: data[":", selected],
                    partition: partition);

            return(performance);
        }
Ejemplo n.º 2
0
        public void IndexPartitionDaviesBouldinTest()
        {
            // data is null
            {
                var target    = IrisDataSet.GetClassPredictions();
                var partition = IndexPartition.Create(target);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = IndexPartition.DaviesBouldinIndex(null, partition);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "data");
            }

            // partition is null
            {
                var data = IrisDataSet.GetAttributesAsDoubleMatrix();

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = IndexPartition.DaviesBouldinIndex(data, null);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "partition");
            }

            // The first part contains an invalid index
            {
                var data      = IrisDataSet.GetAttributesAsDoubleMatrix();
                var target    = IrisDataSet.GetClassPredictions();
                var partition = IndexPartition.Create(target);

                partition[1][0] = 100000;

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = IndexPartition.DaviesBouldinIndex(data, partition);
                },
                    expectedType: typeof(ArgumentException),
                    expectedPartialMessage: ImplementationServices.GetResourceString(
                        "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"),
                    expectedParameterName: "partition");
            }

            // The second part contains an invalid index
            {
                var data      = IrisDataSet.GetAttributesAsDoubleMatrix();
                var target    = IrisDataSet.GetClassPredictions();
                var partition = IndexPartition.Create(target);

                partition[2][0] = 100000;

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = IndexPartition.DaviesBouldinIndex(data, partition);
                },
                    expectedType: typeof(ArgumentException),
                    expectedPartialMessage: ImplementationServices.GetResourceString(
                        "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"),
                    expectedParameterName: "partition");
            }

            // Valid input
            {
                var data      = IrisDataSet.GetAttributesAsDoubleMatrix();
                var target    = IrisDataSet.GetClassPredictions();
                var partition = IndexPartition.Create(target);

                var actual = IndexPartition.DaviesBouldinIndex(
                    data, partition);

                // The expected below was obtained in R as follows:
                //
                // library(clv)
                // data(iris)
                // iris.data < -iris[, 1:4]
                // # cluster data
                // agnes.mod < -agnes(iris.data) # create cluster tree
                // v.pred < - as.integer(cutree(agnes.mod, 5)) # "cut" the tree
                // intraclust = c("complete", "average", "centroid")
                // interclust = c("single", "complete", "average", "centroid", "aveToCent", "hausdorff")
                // cls.scatt < -cls.scatt.data(iris.data, v.pred, dist = "euclidean")
                // davies1 <- clv.Davies.Bouldin(cls.scatt, intraclust, interclust)

                // This is davies1[4,3], corresponding to
                // intra = "centroid",
                // inter = "centroid".
                double expected = 0.685838025870551;

                Assert.AreEqual(expected, actual, 1e-3);
            }
        }
Ejemplo n.º 3
0
        public void Main()
        {
            // Set the number of items and features under study.
            const int numberOfItems    = 12;
            int       numberOfFeatures = 7;

            // Define a partition that must be explained.
            // Three parts (clusters) are included,
            // containing, respectively, items 0 to 3,
            // 5 to 8, and 9 to 11.
            var partition = IndexPartition.Create(
                new double[numberOfItems]
            {
                0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
            });

            // Create a matrix that will represent
            // an artificial data set,
            // having 12 items (rows) and 7 features (columns).
            // This will store the observations which
            // explanation will be based on.
            var data = DoubleMatrix.Dense(
                numberOfRows: numberOfItems,
                numberOfColumns: numberOfFeatures);

            // The first 5 features are built to be almost
            // surely non informative, since they result
            // as samples drawn from a same distribution.
            var g = new GaussianDistribution(mu: 0, sigma: .01);

            for (int j = 0; j < 5; j++)
            {
                data[":", j] = g.Sample(sampleSize: numberOfItems);
            }

            // Features 5 to 6 are instead built to be informative,
            // since they are sampled from different distributions
            // while filling rows whose indexes are in different parts
            // of the partition to be explained.
            var    partIdentifiers = partition.Identifiers;
            double mu = 1.0;

            for (int i = 0; i < partIdentifiers.Count; i++)
            {
                var part     = partition[partIdentifiers[i]];
                int partSize = part.Count;
                g.Mu          = mu;
                data[part, 5] = g.Sample(sampleSize: partSize);
                mu           += 2.0;
                g.Mu          = mu;
                data[part, 6] = g.Sample(sampleSize: partSize);
                mu           += 2.0;
            }

            Console.WriteLine("The data set:");
            Console.WriteLine(data);

            // Define how many features must be selected
            // for explanation.
            int numberOfExplanatoryFeatures = 2;

            // Select the best features.
            IndexCollection optimalExplanatoryFeatureIndexes =
                Clusters.Explain(
                    data,
                    partition,
                    numberOfExplanatoryFeatures);

            // Show the results.
            Console.WriteLine();
            Console.WriteLine(
                "The {0} features best explaining the given partition have column indexes:",
                numberOfExplanatoryFeatures);
            Console.WriteLine(optimalExplanatoryFeatureIndexes);

            Console.WriteLine();
            Console.WriteLine("The Davies-Bouldin Index for the selected features:");
            var dbi = IndexPartition.DaviesBouldinIndex(
                data[":", optimalExplanatoryFeatureIndexes],
                partition);

            Console.WriteLine(dbi);
        }
        public void Main()
        {
            // Set the number of items and features under study.
            const int numberOfItems    = 12;
            int       numberOfFeatures = 7;

            // Create a matrix that will represent
            // an artificial data set,
            // having 12 items (rows) and 7 features (columns).
            // This will store the observations which
            // partition discovery will be based on.
            var data = DoubleMatrix.Dense(
                numberOfRows: numberOfItems,
                numberOfColumns: numberOfFeatures);

            // Fill the data rows by sampling from a different
            // distribution while, respectively, drawing observations
            // for items 0 to 3, 4 to 7, and 8 to 11: these will be the
            // three different parts expected to be included in the
            // optimal partition.
            double mu = 1.0;
            var    g  = new GaussianDistribution(mu: mu, sigma: .01);

            IndexCollection range = IndexCollection.Range(0, 3);

            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            mu   += 5.0;
            g.Mu  = mu;
            range = IndexCollection.Range(4, 7);
            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            mu   += 5.0;
            g.Mu  = mu;
            range = IndexCollection.Range(8, 11);
            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            Console.WriteLine("The data set:");
            Console.WriteLine(data);

            // Define the optimization problem as
            // the minimization of the Davies-Bouldin Index
            // of a candidate partition.
            double objectiveFunction(DoubleMatrix x)
            {
                // An argument x has 12 entries, each belonging
                // to the set {0,...,k-1}, where k is the
                // maximum number of allowed parts, so
                // x[j]==i signals that, at x, item j
                // has been assigned to part i.
                IndexPartition <double> selected =
                    IndexPartition.Create(x);

                var performance = IndexPartition.DaviesBouldinIndex(
                    data: data,
                    partition: selected);

                return(performance);
            }

            var optimizationGoal = OptimizationGoal.Minimization;

            // Define the maximum number of parts allowed in the
            // partition to be discovered.
            int maximumNumberOfParts = 3;

            // Create the required context.
            var context = new PartitionOptimizationContext(
                objectiveFunction: objectiveFunction,
                stateDimension: numberOfItems,
                partitionDimension: maximumNumberOfParts,
                probabilitySmoothingCoefficient: .8,
                optimizationGoal: optimizationGoal,
                minimumNumberOfIterations: 3,
                maximumNumberOfIterations: 1000);

            // Create the optimizer.
            var optimizer = new SystemPerformanceOptimizer()
            {
                PerformanceEvaluationParallelOptions = { MaxDegreeOfParallelism = -1 },
                SampleGenerationParallelOptions      = { MaxDegreeOfParallelism = -1 }
            };

            // Set optimization parameters.
            double rarity     = 0.01;
            int    sampleSize = 2000;

            // Solve the problem.
            var results = optimizer.Optimize(
                context,
                rarity,
                sampleSize);

            IndexPartition <double> optimalPartition =
                IndexPartition.Create(results.OptimalState);

            // Show the results.
            Console.WriteLine(
                "The Cross-Entropy optimizer has converged: {0}.",
                results.HasConverged);

            Console.WriteLine();
            Console.WriteLine("Initial guess parameter:");
            Console.WriteLine(context.InitialParameter);

            Console.WriteLine();
            Console.WriteLine("The minimizer of the performance is:");
            Console.WriteLine(results.OptimalState);

            Console.WriteLine();
            Console.WriteLine(
                "The optimal partition is:");
            Console.WriteLine(optimalPartition);

            Console.WriteLine();
            Console.WriteLine("The minimum performance is:");
            Console.WriteLine(results.OptimalPerformance);

            Console.WriteLine();
            Console.WriteLine("The Dunn Index for the optimal partition is:");
            var di = IndexPartition.DunnIndex(
                data,
                optimalPartition);

            Console.WriteLine(di);
        }
Ejemplo n.º 5
0
        public void Main()
        {
            // Set the number of items and features under study.
            const int numberOfItems    = 12;
            int       numberOfFeatures = 7;

            // Create a matrix that will represent
            // an artificial data set,
            // having 12 items (rows) and 7 features (columns).
            // This will store the observations which
            // partition discovery will be based on.
            var data = DoubleMatrix.Dense(
                numberOfRows: numberOfItems,
                numberOfColumns: numberOfFeatures);

            // Fill the data rows by sampling from a different
            // distribution while, respectively, drawing observations
            // for items 0 to 3, 4 to 7, and 8 to 11: these will be the
            // three different parts expected to be included in the
            // optimal partition.
            double mu = 1.0;
            var    g  = new GaussianDistribution(mu: mu, sigma: .01);

            IndexCollection range = IndexCollection.Range(0, 3);

            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            mu   += 5.0;
            g.Mu  = mu;
            range = IndexCollection.Range(4, 7);
            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            mu   += 5.0;
            g.Mu  = mu;
            range = IndexCollection.Range(8, 11);
            for (int j = 0; j < numberOfFeatures; j++)
            {
                data[range, j] = g.Sample(sampleSize: range.Count);
            }

            Console.WriteLine("The data set:");
            Console.WriteLine(data);

            // Define the maximum number of parts allowed in the
            // partition to be discovered.
            int maximumNumberOfParts = 3;

            // Select the best partition.
            IndexPartition <double> optimalPartition =
                Clusters.Discover(
                    data,
                    maximumNumberOfParts);

            // Show the results.
            Console.WriteLine();
            Console.WriteLine(
                "The optimal partition:");
            Console.WriteLine(optimalPartition);

            Console.WriteLine();
            Console.WriteLine("The Davies-Bouldin Index for the optimal partition:");
            var dbi = IndexPartition.DaviesBouldinIndex(
                data,
                optimalPartition);

            Console.WriteLine(dbi);
        }