// Define the performance function of the // system under study (in this context, // the Dunn index of the data subset // defined by the combination represented by // parameter x. static double Performance(DoubleMatrix x) { IndexCollection selected = x.FindNonzero(); double performance = IndexPartition.DunnIndex( data: data[":", selected], partition: partition); return(performance); }
public void IndexPartitionDunnTest() { // data is null { var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DunnIndex(null, partition); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "data"); } // partition is null { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DunnIndex(data, null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "partition"); } // The first part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[1][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DunnIndex(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // The second part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[2][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DunnIndex(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // Valid input { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); var actual = IndexPartition.DunnIndex( data, partition); // The expected below was obtained in R as follows: // // library(clv) // data(iris) // iris.data < -iris[, 1:4] // # cluster data // agnes.mod < -agnes(iris.data) # create cluster tree // v.pred < - as.integer(cutree(agnes.mod, 5)) # "cut" the tree // intraclust = c("complete", "average", "centroid") // interclust = c("single", "complete", "average", "centroid", "aveToCent", "hausdorff") // cls.scatt < -cls.scatt.data(iris.data, v.pred, dist = "euclidean") // dunn1 < -clv.Dunn(cls.scatt, intraclust, interclust) // This is dunn1[1,1], corresponding to // intra = "complete", // inter = "single". double expected = 0.154041597; Assert.AreEqual(expected, actual, 1e-4); Assert.AreEqual(expected, actual, 1e-4); } }
public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // partition discovery will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // Fill the data rows by sampling from a different // distribution while, respectively, drawing observations // for items 0 to 3, 4 to 7, and 8 to 11: these will be the // three different parts expected to be included in the // optimal partition. double mu = 1.0; var g = new GaussianDistribution(mu: mu, sigma: .01); IndexCollection range = IndexCollection.Range(0, 3); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } mu += 5.0; g.Mu = mu; range = IndexCollection.Range(4, 7); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } mu += 5.0; g.Mu = mu; range = IndexCollection.Range(8, 11); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define the optimization problem as // the minimization of the Davies-Bouldin Index // of a candidate partition. double objectiveFunction(DoubleMatrix x) { // An argument x has 12 entries, each belonging // to the set {0,...,k-1}, where k is the // maximum number of allowed parts, so // x[j]==i signals that, at x, item j // has been assigned to part i. IndexPartition <double> selected = IndexPartition.Create(x); var performance = IndexPartition.DaviesBouldinIndex( data: data, partition: selected); return(performance); } var optimizationGoal = OptimizationGoal.Minimization; // Define the maximum number of parts allowed in the // partition to be discovered. int maximumNumberOfParts = 3; // Create the required context. var context = new PartitionOptimizationContext( objectiveFunction: objectiveFunction, stateDimension: numberOfItems, partitionDimension: maximumNumberOfParts, probabilitySmoothingCoefficient: .8, optimizationGoal: optimizationGoal, minimumNumberOfIterations: 3, maximumNumberOfIterations: 1000); // Create the optimizer. var optimizer = new SystemPerformanceOptimizer() { PerformanceEvaluationParallelOptions = { MaxDegreeOfParallelism = -1 }, SampleGenerationParallelOptions = { MaxDegreeOfParallelism = -1 } }; // Set optimization parameters. double rarity = 0.01; int sampleSize = 2000; // Solve the problem. var results = optimizer.Optimize( context, rarity, sampleSize); IndexPartition <double> optimalPartition = IndexPartition.Create(results.OptimalState); // Show the results. Console.WriteLine( "The Cross-Entropy optimizer has converged: {0}.", results.HasConverged); Console.WriteLine(); Console.WriteLine("Initial guess parameter:"); Console.WriteLine(context.InitialParameter); Console.WriteLine(); Console.WriteLine("The minimizer of the performance is:"); Console.WriteLine(results.OptimalState); Console.WriteLine(); Console.WriteLine( "The optimal partition is:"); Console.WriteLine(optimalPartition); Console.WriteLine(); Console.WriteLine("The minimum performance is:"); Console.WriteLine(results.OptimalPerformance); Console.WriteLine(); Console.WriteLine("The Dunn Index for the optimal partition is:"); var di = IndexPartition.DunnIndex( data, optimalPartition); Console.WriteLine(di); }
public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Define a partition that must be explained. // Three parts (clusters) are included, // containing, respectively, items 0 to 3, // 4 to 7, and 8 to 11. var partition = IndexPartition.Create( new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // explanation will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // The first 5 features are built to be almost // surely non informative, since they result // as samples drawn from a same distribution. var g = new GaussianDistribution(mu: 0, sigma: .01); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } // Features 5 to 6 are instead built to be informative, // since they are sampled from different distributions // while filling rows whose indexes are in different parts // of the partition to be explained. var partIdentifiers = partition.Identifiers; double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define the selection problem as // the maximization of the Dunn Index. double objectiveFunction(DoubleMatrix x) { // An argument x has entries equal to one, // signaling that the corresponding features // are selected at x. Otherwise, the entries // are zero. IndexCollection selected = x.FindNonzero(); double performance = IndexPartition.DunnIndex( data: data[":", selected], partition: partition); return(performance); } var optimizationGoal = OptimizationGoal.Maximization; // Define how many features must be selected // for explanation. int numberOfExplanatoryFeatures = 2; // Create the required context. var context = new CombinationOptimizationContext( objectiveFunction: objectiveFunction, stateDimension: numberOfFeatures, combinationDimension: numberOfExplanatoryFeatures, probabilitySmoothingCoefficient: .8, optimizationGoal: optimizationGoal, minimumNumberOfIterations: 3, maximumNumberOfIterations: 1000); // Create the optimizer. var optimizer = new SystemPerformanceOptimizer() { PerformanceEvaluationParallelOptions = { MaxDegreeOfParallelism = -1 }, SampleGenerationParallelOptions = { MaxDegreeOfParallelism = -1 } }; // Set optimization parameters. double rarity = 0.01; int sampleSize = 1000; // Solve the problem. var results = optimizer.Optimize( context, rarity, sampleSize); IndexCollection optimalExplanatoryFeatureIndexes = results.OptimalState.FindNonzero(); // Show the results. Console.WriteLine( "The Cross-Entropy optimizer has converged: {0}.", results.HasConverged); Console.WriteLine(); Console.WriteLine("Initial guess parameter:"); Console.WriteLine(context.InitialParameter); Console.WriteLine(); Console.WriteLine("The maximizer of the performance is:"); Console.WriteLine(results.OptimalState); Console.WriteLine(); Console.WriteLine( "The {0} features best explaining the given partition have column indexes:", numberOfExplanatoryFeatures); Console.WriteLine(optimalExplanatoryFeatureIndexes); Console.WriteLine(); Console.WriteLine("The maximum performance is:"); Console.WriteLine(results.OptimalPerformance); Console.WriteLine(); Console.WriteLine("This is the Dunn Index for the selected features:"); var di = IndexPartition.DunnIndex( data[":", optimalExplanatoryFeatureIndexes], partition); Console.WriteLine(di); }