/// <summary> /// Checks that the specified <see cref="IndexPartition{T}"/> /// instances have equal identifiers, irrespective of the corresponding /// parts. /// </summary> /// <param name="expected">The partition containing the expected identifiers.</param> /// <param name="actual">The partition containing the actual identifiers.</param> public static void HaveEqualIdentifiers <T>( IndexPartition <T> expected, IndexPartition <T> actual) { if (null == expected && null == actual) { return; } if (((null == expected) && (null != actual)) || ((null != expected) && (null == actual))) { throw new AssertFailedException( "One IndexPartition instance is null, the other is not."); } int expectedCount = expected.Count; int actualCount = actual.Count; if (expectedCount != actualCount) { throw new AssertFailedException( "IndexPartition instances have not the same number of parts."); } for (int i = 0; i < expected.Count; i++) { Assert.AreEqual( expected.Identifiers[i], actual.Identifiers[i], "Wrong part identifier at position: {0}", i); } }
public void Main() { // Create a matrix. var data = new double[8] { 0, 1, -2, -3, 0, -1, 2, 3 }; var matrix = DoubleMatrix.Dense(2, 4, data, StorageOrder.RowMajor); // Check the sign of its entries. var signs = DoubleMatrix.Dense(matrix.NumberOfRows, matrix.NumberOfColumns); for (int i = 0; i < matrix.Count; i++) { signs[i] = Math.Sign(matrix[i]); } // Partition the matrix linear indexes by the sign of each entry. var partition = IndexPartition.Create(signs); // The partition contains three parts, the zero part, identified by 0, // the negative part (identified by -1), and the positive one // (identified by 1). Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } }
public void Main() { // Create a matrix. var data = new double[18] { 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0 }; var matrix = DoubleMatrix.Dense(6, 3, data, StorageOrder.RowMajor); // Partition the matrix row indexes by the contents of column 0: // a part is created for each distinct value in column 0. var partition = IndexPartition.Create(matrix[":", 0]); // Each part is identified by its corresponding value and contains // the indexes of the rows in which the identifier // is positioned in column 0. Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } }
public void Main() { // Create a matrix. var data = new double[6] { 1, 3, 0, 2, 2, 1 }; var matrix = DoubleMatrix.Dense(3, 2, data, StorageOrder.RowMajor); // Partition the matrix linear indexes by the content of // matrix entries: a part is created for each distinct matrix value. var partition = IndexPartition.Create(matrix); // Each part is identified by its corresponding value and contains // the linear indexes of the entries in which the identifier // is positioned. Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } // Convert the partition to a matrix. var fromPartition = (DoubleMatrix)partition; Console.WriteLine("Conversion of a partition to a matrix:"); Console.WriteLine(fromPartition); }
public void Main() { // Create a matrix. var data = new double[18] { 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0 }; var matrix = DoubleMatrix.Dense(6, 3, data, StorageOrder.RowMajor); // Partition the matrix row indexes by the contents of each row: // a part is created for each distinct row. var partition = IndexPartition.Create(matrix.AsRowCollection()); // Each part is identified by its corresponding row and contains // the indexes of the rows which are equal to the identifier. Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } }
static TestablePartitionOptimizationContext00() { const int numberOfItems = 12; const int numberOfFeatures = 7; var target = DoubleMatrix.Dense(numberOfItems, 1, new double[numberOfItems] { 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1 }); partition = IndexPartition.Create(target); data = DoubleMatrix.Dense(numberOfItems, numberOfFeatures); double mu = 1.0; var partIdentifiers = partition.Identifiers; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; for (int j = 0; j < partSize; j++) { data[part[j], ":"] += mu; } mu += 5.0; } }
public void Main() { // Create an array of strings. var data = new string[6] { "one", "two", "one", "one", "three", "three" }; // Partition the array positions by their contents. var partition = IndexPartition.Create(data); // The partition contains three parts, identified, respectively, // by the strings "one", "two", and "three". Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } }
public void ToDoubleMatrixTest() { // This code example produces the following output: // // // Part identifier: 0 // indexes: 1 // // Part identifier: 1 // indexes: 0, 5 // // Part identifier: 2 // indexes: 2, 4 // // Part identifier: 3 // indexes: 3 // var target = new IndexPartition <double> { partIndetifiers = new List <double>(3) { 0.0, 1.0, 2.0, 3.0 }, parts = new Dictionary <double, IndexCollection>(3) { { 0.0, IndexCollection.FromArray(new int[] { 1 }) }, { 1.0, IndexCollection.FromArray(new int[] { 0, 5 }) }, { 2.0, IndexCollection.FromArray(new int[] { 2, 4 }) }, { 3.0, IndexCollection.FromArray(new int[] { 3 }) } } }; // Convert the partition to a matrix. var actual = (DoubleMatrix)target; // Conversion of a partition to a matrix: // 1 // 0 // 2 // 3 // 2 // 1 // var expected = DoubleMatrix.Dense(6, 1, new double[] { 1, 0, 2, 3, 2, 1 }); DoubleMatrixAssert.AreEqual(expected, actual, 1e-2); }
public static void HaveEqualParts <T>( IndexPartition <T> expected, IndexPartition <T> actual) { if (null == expected && null == actual) { return; } if (((null == expected) && (null != actual)) || ((null != expected) && (null == actual))) { throw new AssertFailedException( "One IndexPartition instance is null, the other is not."); } int expectedCount = expected.Count; int actualCount = actual.Count; if (expectedCount != actualCount) { throw new AssertFailedException( "IndexPartition instances have not the same number of parts."); } List <T> availableIds = new(actual.Identifiers); for (int i = 0; i < expected.Count; i++) { bool expectedPartIsMissing = true; var expectedPart = expected[expected.Identifiers[i]]; for (int j = 0; j < availableIds.Count; j++) { var actualPart = actual[availableIds[j]]; try { IndexCollectionAssert.AreEqual( expectedPart, actualPart); expectedPartIsMissing = false; availableIds.RemoveAt(j); break; } catch (AssertFailedException) { } } if (expectedPartIsMissing) { throw new AssertFailedException( msg: string.Format( "Missing expected part {0}.", expectedPart)); } } }
// Define the performance function of the // system under study (in this context, // the Dunn index of the data subset // defined by the combination represented by // parameter x. static double Performance(DoubleMatrix x) { IndexCollection selected = x.FindNonzero(); double performance = IndexPartition.DunnIndex( data: data[":", selected], partition: partition); return(performance); }
// Define the performance function of the // system under study (in this context, // the sum of squared errors corresponding // to the partition represented by // parameter x. static double Performance(DoubleMatrix x) { double performance = 0.0; var partition = IndexPartition.Create(x); foreach (double category in partition.Identifiers) { performance += Stat.Sum( Stat.SumOfSquaredDeviations( data[partition[category], ":"], DataOperation.OnColumns)); } return(performance); }
public void RunTest() { // Valid input - Minimization { var optimizer = new SystemPerformanceOptimizer(); // Create the context. var testableContext = TestablePartitionOptimizationContext00.Get(); var context = testableContext.Context; // Set optimization parameters. int sampleSize = 2000; double rarity = 0.01; // Solve the problem. var results = optimizer.Optimize( context, rarity, sampleSize); Assert.AreEqual( expected: true, actual: results.HasConverged); var expectedPartition = IndexPartition.Create( testableContext.OptimalState); var actualPartition = IndexPartition.Create( results.OptimalState); IndexPartitionAssert.HaveEqualIdentifiers( expected: expectedPartition, actual: actualPartition); IndexPartitionAssert.HaveEqualParts( expected: expectedPartition, actual: actualPartition); Assert.AreEqual( expected: testableContext.OptimalPerformance, actual: results.OptimalPerformance, DoubleMatrixTest.Accuracy); } }
public void TryGetPartTest() { // Part identifier: "false" // indexes: 0, 10 // // Part identifier: "true" // indexes: 5, 15 // var falsePart = IndexCollection.FromArray(new int[2] { 0, 10 }); var truePart = IndexCollection.FromArray(new int[2] { 5, 15 }); var target = new IndexPartition <string> { partIndetifiers = new List <string>(2) { "false", "true" }, parts = new Dictionary <string, IndexCollection>(2) { { "false", falsePart }, { "true", truePart } } }; bool partFound; partFound = target.TryGetPart("unknown", out IndexCollection part); Assert.AreEqual(expected: false, actual: partFound); Assert.IsNull(part); partFound = target.TryGetPart("false", out part); Assert.AreEqual(expected: true, actual: partFound); IndexCollectionAssert.AreEqual(expected: falsePart, actual: part); partFound = target.TryGetPart("true", out part); Assert.AreEqual(expected: true, actual: partFound); IndexCollectionAssert.AreEqual(expected: truePart, actual: part); }
public void ToStringTest() { // Create a matrix. var data = new double[18] { 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0 }; var matrix = DoubleMatrix.Dense(6, 3, data, StorageOrder.RowMajor); // Partition the matrix row indexes by the contents of each row: // a part is created for each distinct row. var rowCollection = matrix.AsRowCollection(); var partition = IndexPartition.Create(rowCollection); // Each part is identified by its corresponding row and contains // the indexes of the rows which are equal to the identifier. // Expected: // // Part identifier: 0 0 1 // // indexes: 0, 1 // // Part identifier: 0 1 0 // // indexes: 2, 3 // // Part identifier: 1 0 0 // // indexes: 4, 5 // var expected = "[(0 0 1 ), 0, 1]" + Environment.NewLine + "[(0 1 0 ), 2, 3]" + Environment.NewLine + "[(1 0 0 ), 4, 5]" + Environment.NewLine; Assert.AreEqual(expected, partition.ToString()); }
public void Main() { // Create a matrix. var data = new double[16] { -3, 3, 3, -1, 0, 2, -2, 2, 2, 1, -4, -5, -8, 2, 7, -1 }; var matrix = DoubleMatrix.Dense(4, 4, data, StorageOrder.RowMajor); // Create the collection of linear indexes corresponding // to entries on the matrix main diagonal. var diagonalIndexes = IndexCollection.Sequence(0, 1 + matrix.NumberOfRows, matrix.Count); // Create a partitioner which returns true if // the absolute value in a entry having the specified linear // index is less than 3, otherwise false. bool partitioner(int linearIndex) { return(Math.Abs(matrix[linearIndex]) < 3.0); } // Partition the diagonal linear indexes through the // specified partitioner. var partition = IndexPartition.Create(diagonalIndexes, partitioner); // Two parts are created, one for diagonal // entries less than 3 in absolute value, the other for // entries not satisfying that condition. Console.WriteLine(); foreach (var identifier in partition.Identifiers) { Console.WriteLine("Part identifier: {0}", identifier); Console.WriteLine(" indexes: {0}", partition[identifier]); Console.WriteLine(); } }
static TestableCombinationOptimizationContext01() { const int numberOfItems = 12; var target = DoubleMatrix.Dense(numberOfItems, 1, new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); partition = IndexPartition.Create(target); data = DoubleMatrix.Dense(numberOfItems, 7); // features 0 to 4 var g = new GaussianDistribution(mu: 0, sigma: .1); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } var partIdentifiers = partition.Identifiers; // feature 5 to 6 double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } }
/// <summary> /// Tests that method /// <see cref="RandomIndexPermutation.Next"/> /// terminates successfully as expected. /// </summary> /// <param name="indexes"> /// The indexes to permute. /// </param> /// <param name="numberOfRandomPermutations"> /// The number of permutations to draw. /// </param> /// <param name="criticalValue"> /// A quantile of the chi-squared distribution with a number of /// degrees of freedom equal to the <see cref="IndexCollection.Count"/> /// of <paramref name="indexes"/> /// minus <c>1</c>. /// To serve as the critical value for the Pearson's /// chi-squared test whose null hypothesis assume that the /// the distinct possible permutations /// are equiprobable. /// </param> /// <param name="delta">The required accuracy. /// Defaults to <c>.01</c>.</param> public static void Succeed( IndexCollection indexes, int numberOfRandomPermutations, double criticalValue, double delta = .01) { var randomPermutation = new RandomIndexPermutation(indexes); // Generate permutations var permutations = new IndexCollection[numberOfRandomPermutations]; for (int i = 0; i < numberOfRandomPermutations; i++) { permutations[i] = randomPermutation.Next(); } // Check the number of distinct generated permutations var permutationIdentifiers = IndexCollection.Default(numberOfRandomPermutations - 1); var actualDistinctPermutations = IndexPartition.Create( permutationIdentifiers, (i) => { return(permutations[i]); }); int numberOfActualDistinctPermutations = actualDistinctPermutations.Count; Assert.AreEqual( expected: SpecialFunctions.Factorial(indexes.Count), actual: numberOfActualDistinctPermutations); // Compute the actual permutation probabilities DoubleMatrix actualPermutationProbabilities = DoubleMatrix.Dense( numberOfActualDistinctPermutations, 1); int j = 0; foreach (var identifier in actualDistinctPermutations.Identifiers) { actualPermutationProbabilities[j] = (double)actualDistinctPermutations[identifier].Count / (double)numberOfRandomPermutations; j++; } // Check that the Chebyshev Inequality holds true // for each permutation probability var expectedPermutationProbabilities = DoubleMatrix.Dense( numberOfActualDistinctPermutations, 1, 1.0 / (double)numberOfActualDistinctPermutations); for (int i = 0; i < numberOfActualDistinctPermutations; i++) { ProbabilityDistributionTest.CheckChebyshevInequality( new BernoulliDistribution(expectedPermutationProbabilities[i]), actualPermutationProbabilities[i], numberOfRandomPermutations, delta); } // Check how good the actual permutation probabilities fit // the expected ones ProbabilityDistributionTest.CheckGoodnessOfFit( expectedPermutationProbabilities, actualPermutationProbabilities, criticalValue); }
public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Define a partition that must be explained. // Three parts (clusters) are included, // containing, respectively, items 0 to 3, // 4 to 7, and 8 to 11. var partition = IndexPartition.Create( new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // explanation will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // The first 5 features are built to be almost // surely non informative, since they result // as samples drawn from a same distribution. var g = new GaussianDistribution(mu: 0, sigma: .01); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } // Features 5 to 6 are instead built to be informative, // since they are sampled from different distributions // while filling rows whose indexes are in different parts // of the partition to be explained. var partIdentifiers = partition.Identifiers; double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define the selection problem as // the maximization of the Dunn Index. double objectiveFunction(DoubleMatrix x) { // An argument x has entries equal to one, // signaling that the corresponding features // are selected at x. Otherwise, the entries // are zero. IndexCollection selected = x.FindNonzero(); double performance = IndexPartition.DunnIndex( data: data[":", selected], partition: partition); return(performance); } var optimizationGoal = OptimizationGoal.Maximization; // Define how many features must be selected // for explanation. int numberOfExplanatoryFeatures = 2; // Create the required context. var context = new CombinationOptimizationContext( objectiveFunction: objectiveFunction, stateDimension: numberOfFeatures, combinationDimension: numberOfExplanatoryFeatures, probabilitySmoothingCoefficient: .8, optimizationGoal: optimizationGoal, minimumNumberOfIterations: 3, maximumNumberOfIterations: 1000); // Create the optimizer. var optimizer = new SystemPerformanceOptimizer() { PerformanceEvaluationParallelOptions = { MaxDegreeOfParallelism = -1 }, SampleGenerationParallelOptions = { MaxDegreeOfParallelism = -1 } }; // Set optimization parameters. double rarity = 0.01; int sampleSize = 1000; // Solve the problem. var results = optimizer.Optimize( context, rarity, sampleSize); IndexCollection optimalExplanatoryFeatureIndexes = results.OptimalState.FindNonzero(); // Show the results. Console.WriteLine( "The Cross-Entropy optimizer has converged: {0}.", results.HasConverged); Console.WriteLine(); Console.WriteLine("Initial guess parameter:"); Console.WriteLine(context.InitialParameter); Console.WriteLine(); Console.WriteLine("The maximizer of the performance is:"); Console.WriteLine(results.OptimalState); Console.WriteLine(); Console.WriteLine( "The {0} features best explaining the given partition have column indexes:", numberOfExplanatoryFeatures); Console.WriteLine(optimalExplanatoryFeatureIndexes); Console.WriteLine(); Console.WriteLine("The maximum performance is:"); Console.WriteLine(results.OptimalPerformance); Console.WriteLine(); Console.WriteLine("This is the Dunn Index for the selected features:"); var di = IndexPartition.DunnIndex( data[":", optimalExplanatoryFeatureIndexes], partition); Console.WriteLine(di); }
public void GetOptimalStateTest() { // valid input - random ties resolution { var context = new CategoricalEntailmentEnsembleOptimizationContext( objectiveFunction: (DoubleMatrix state) => { return(Double.PositiveInfinity); }, featureCategoryCounts: new List <int>(1) { 6 }, numberOfResponseCategories: 4, numberOfCategoricalEntailments: 1, allowEntailmentPartialTruthValues: true, probabilitySmoothingCoefficient: .9, optimizationGoal: OptimizationGoal.Maximization, minimumNumberOfIterations: 5, maximumNumberOfIterations: 1000); int numberOfEvaluations = 10000; double delta = .01; var parameter = DoubleMatrix.Dense(1, 10, new double[10] { .5, .5, .5, .5, .5, .5, .25, .25, .25, .25 }); // Generate states var states = new int[numberOfEvaluations]; var responseIndexes = IndexCollection.Range(6, 9); for (int i = 0; i < numberOfEvaluations; i++) { var state = context.GetOptimalState(parameter); states[i] = state.Vec(responseIndexes).FindNonzero()[0]; } // Compute the actual inclusion probabilities DoubleMatrix actualInclusionProbabilities = DoubleMatrix.Dense(context.NumberOfResponseCategories, 1); var stateIndexes = IndexCollection.Default(numberOfEvaluations - 1); for (int j = 0; j < context.NumberOfResponseCategories; j++) { var samplesContainingCurrentUnit = IndexPartition.Create( stateIndexes, (i) => { return(states[i] == j); }); actualInclusionProbabilities[j] = (double)samplesContainingCurrentUnit[true].Count / (double)numberOfEvaluations; } // Check the number of distinct generated states var distinctStates = IndexPartition.Create( states); int numberOfDistinctStates = distinctStates.Count; Assert.AreEqual( expected: context.NumberOfResponseCategories, actual: numberOfDistinctStates); // Check that the Chebyshev Inequality holds true // for each inclusion probability var expectedInclusionProbabilities = DoubleMatrix.Dense(context.NumberOfResponseCategories, 1, 1.0 / context.NumberOfResponseCategories); for (int j = 0; j < context.NumberOfResponseCategories; j++) { ProbabilityDistributionTest.CheckChebyshevInequality( new BernoulliDistribution(expectedInclusionProbabilities[j]), actualInclusionProbabilities[j], numberOfEvaluations, delta); } // Check how good the actual inclusion probabilities fit // the expected ones // The following assumes a number of response // categories equal to 4. // // The quantile of order .9 for // the chi-squared distribution having 4-1 // degrees of freedom is 6.251389 // (as from R function qchisq(.9, 3)) var goodnessOfFitCriticalValue = 6.251389; ProbabilityDistributionTest.CheckGoodnessOfFit( expectedInclusionProbabilities, actualInclusionProbabilities, goodnessOfFitCriticalValue); } }
public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // partition discovery will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // Fill the data rows by sampling from a different // distribution while, respectively, drawing observations // for items 0 to 3, 4 to 7, and 8 to 11: these will be the // three different parts expected to be included in the // optimal partition. double mu = 1.0; var g = new GaussianDistribution(mu: mu, sigma: .01); IndexCollection range = IndexCollection.Range(0, 3); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } mu += 5.0; g.Mu = mu; range = IndexCollection.Range(4, 7); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } mu += 5.0; g.Mu = mu; range = IndexCollection.Range(8, 11); for (int j = 0; j < numberOfFeatures; j++) { data[range, j] = g.Sample(sampleSize: range.Count); } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define the optimization problem as // the minimization of the Davies-Bouldin Index // of a candidate partition. double objectiveFunction(DoubleMatrix x) { // An argument x has 12 entries, each belonging // to the set {0,...,k-1}, where k is the // maximum number of allowed parts, so // x[j]==i signals that, at x, item j // has been assigned to part i. IndexPartition <double> selected = IndexPartition.Create(x); var performance = IndexPartition.DaviesBouldinIndex( data: data, partition: selected); return(performance); } var optimizationGoal = OptimizationGoal.Minimization; // Define the maximum number of parts allowed in the // partition to be discovered. int maximumNumberOfParts = 3; // Create the required context. var context = new PartitionOptimizationContext( objectiveFunction: objectiveFunction, stateDimension: numberOfItems, partitionDimension: maximumNumberOfParts, probabilitySmoothingCoefficient: .8, optimizationGoal: optimizationGoal, minimumNumberOfIterations: 3, maximumNumberOfIterations: 1000); // Create the optimizer. var optimizer = new SystemPerformanceOptimizer() { PerformanceEvaluationParallelOptions = { MaxDegreeOfParallelism = -1 }, SampleGenerationParallelOptions = { MaxDegreeOfParallelism = -1 } }; // Set optimization parameters. double rarity = 0.01; int sampleSize = 2000; // Solve the problem. var results = optimizer.Optimize( context, rarity, sampleSize); IndexPartition <double> optimalPartition = IndexPartition.Create(results.OptimalState); // Show the results. Console.WriteLine( "The Cross-Entropy optimizer has converged: {0}.", results.HasConverged); Console.WriteLine(); Console.WriteLine("Initial guess parameter:"); Console.WriteLine(context.InitialParameter); Console.WriteLine(); Console.WriteLine("The minimizer of the performance is:"); Console.WriteLine(results.OptimalState); Console.WriteLine(); Console.WriteLine( "The optimal partition is:"); Console.WriteLine(optimalPartition); Console.WriteLine(); Console.WriteLine("The minimum performance is:"); Console.WriteLine(results.OptimalPerformance); Console.WriteLine(); Console.WriteLine("The Dunn Index for the optimal partition is:"); var di = IndexPartition.DunnIndex( data, optimalPartition); Console.WriteLine(di); }
public void IndexPartitionDaviesBouldinTest() { // data is null { var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DaviesBouldinIndex(null, partition); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "data"); } // partition is null { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DaviesBouldinIndex(data, null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "partition"); } // The first part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[1][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DaviesBouldinIndex(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // The second part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[2][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.DaviesBouldinIndex(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // Valid input { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); var actual = IndexPartition.DaviesBouldinIndex( data, partition); // The expected below was obtained in R as follows: // // library(clv) // data(iris) // iris.data < -iris[, 1:4] // # cluster data // agnes.mod < -agnes(iris.data) # create cluster tree // v.pred < - as.integer(cutree(agnes.mod, 5)) # "cut" the tree // intraclust = c("complete", "average", "centroid") // interclust = c("single", "complete", "average", "centroid", "aveToCent", "hausdorff") // cls.scatt < -cls.scatt.data(iris.data, v.pred, dist = "euclidean") // davies1 <- clv.Davies.Bouldin(cls.scatt, intraclust, interclust) // This is davies1[4,3], corresponding to // intra = "centroid", // inter = "centroid". double expected = 0.685838025870551; Assert.AreEqual(expected, actual, 1e-3); } }
public void IndexPartitionMinimumCentroidTest() { // data is null { var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.MinimumCentroidLinkage(null, partition); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "data"); } // partition is null { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.MinimumCentroidLinkage(data, null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "partition"); } // The first part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[1][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.MinimumCentroidLinkage(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // The second part contains an invalid index { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); partition[2][0] = 100000; ArgumentExceptionAssert.Throw( () => { var actual = IndexPartition.MinimumCentroidLinkage(data, partition); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_INP_PART_CONTAINS_INVALID_INDEX"), expectedParameterName: "partition"); } // Valid input { var data = IrisDataSet.GetAttributesAsDoubleMatrix(); var target = IrisDataSet.GetClassPredictions(); var partition = IndexPartition.Create(target); var actual = IndexPartition.MinimumCentroidLinkage( data, partition); List <double> linkages = new(); foreach (var leftId in partition.Identifiers) { var leftPart = partition[leftId]; var left = data[leftPart, ":"]; foreach (var rightId in partition.Identifiers) { if (rightId != leftId) { var rightPart = partition[rightId]; var right = data[rightPart, ":"]; linkages.Add(Distance.CentroidLinkage(left, right)); } } } double expected = linkages.Min(); Assert.AreEqual(expected, actual, 1e-4); } }
public void IndexerGetTest() { // Create an array of strings. var elements = new string[6] { "one", "two", "one", "one", "three", "three" }; // Partition the array positions by their contents. var target = IndexPartition.Create(elements); // The partition contains three parts, identified, respectively, // by the strings "one", "two", and "three". // Expected: // // Part identifier: one // indexes: 0, 2, 3 // // Part identifier: three // indexes: 4, 5 // // Part identifier: two // indexes: 1 // partIdentifier is null { ArgumentExceptionAssert.Throw( () => { var part = target[(string)null]; }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "partIdentifier"); } // partIdentifier is not a key { ArgumentExceptionAssert.Throw( () => { var part = target["four"]; }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_IS_NOT_A_PART_IDENTIFIER"), expectedParameterName: "partIdentifier"); } // Valid partIdentifier { var actual = target["one"]; var expected = IndexCollection.FromArray(new int[3] { 0, 2, 3 }); IndexCollectionAssert.AreEqual(expected, actual); } }
public void CreateFromRowCollectionTest() { // elements is null { ArgumentExceptionAssert.Throw( () => { var partition = IndexPartition.Create((DoubleMatrixRowCollection)null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "elements"); } // elements is not null { // Create a matrix. var data = new double[18] { 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0 }; var matrix = DoubleMatrix.Dense(6, 3, data, StorageOrder.RowMajor); // Partition the matrix row indexes by the contents of each row: // a part is created for each distinct row. var elements = matrix.AsRowCollection(); var actual = IndexPartition.Create(elements); // Each part is identified by its corresponding row and contains // the indexes of the rows which are equal to the identifier. // Expected: // // Part identifier: 0 0 1 // // indexes: 0, 1 // // Part identifier: 0 1 0 // // indexes: 2, 3 // // Part identifier: 1 0 0 // // indexes: 4, 5 // var expected = new IndexPartition <DoubleMatrixRow> { partIndetifiers = new List <DoubleMatrixRow>(3) { elements[0], elements[2], elements[4] }, parts = new Dictionary <DoubleMatrixRow, IndexCollection>(3) { { elements[0], IndexCollection.Default(1) }, { elements[2], IndexCollection.Range(2, 3) }, { elements[4], IndexCollection.Range(4, 5) } } }; IndexPartitionAssert.AreEqual(expected, actual); } }
public void AverageLinkageTest() { // left is null { ArgumentExceptionAssert.Throw( () => { Distance.AverageLinkage( left: (DoubleMatrix)null, right: DoubleMatrix.Identity(2)); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "left"); } // right is null { ArgumentExceptionAssert.Throw( () => { Distance.AverageLinkage( left: DoubleMatrix.Identity(2), right: (DoubleMatrix)null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "right"); } // right and left have not the same number of columns { ArgumentExceptionAssert.Throw( () => { Distance.AverageLinkage( left: DoubleMatrix.Identity(2), right: DoubleMatrix.Identity(3)); }, expectedType: typeof(ArgumentException), expectedPartialMessage: string.Format( ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_HAVE_SAME_NUM_OF_COLUMNS"), "left"), expectedParameterName: "right"); } // input is valid { var items = IndexCollection.Range(0, 3); int numberOfItems = items.Count; var attributes = IrisDataSet.GetAttributesAsDoubleMatrix(); var classes = IrisDataSet.GetClasses(); var partition = IndexPartition.Create(classes); var left = attributes[partition["virginica"], ":"][items, ":"]; var right = attributes[partition["setosa"], ":"][items, ":"]; var actual = Distance.AverageLinkage(left, right); var expected = 4.932325; Assert.AreEqual(expected, actual, DoubleMatrixTest.Accuracy); } }
public void CreateFromDoubleMatrixTest() { // elements is null { ArgumentExceptionAssert.Throw( () => { var partition = IndexPartition.Create((DoubleMatrix)null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "elements"); } // elements is a vector { // Create a matrix var data = new double[18] { 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0 }; var matrix = DoubleMatrix.Dense(6, 3, data, StorageOrder.RowMajor); // Partition the matrix row indexes by the contents of column 0: // a part is created for each distinct value in column 0 var elements = matrix[":", 0]; var actual = IndexPartition.Create(elements); // Each part is identified by its corresponding value and contains // the indexes of the rows in which the identifier // is positioned in column 0 // Expected: // // Part identifier: 0 // indexes: 0, 1, 2, 3 // // Part identifier: 1 // indexes: 4, 5 IndexPartition <double> expected = new() { partIndetifiers = new List <double>(2) { 0.0, 1.0 }, parts = new Dictionary <double, IndexCollection>(2) { { 0.0, IndexCollection.Default(3) }, { 1.0, IndexCollection.Range(4, 5) } } }; IndexPartitionAssert.AreEqual(expected, actual); } // elements is a matrix of signs { // Create a matrix. var data = new double[8] { 0, 1, -2, -3, 0, -1, 2, 3 }; var matrix = DoubleMatrix.Dense(2, 4, data, StorageOrder.RowMajor); // Check the sign of its entries var signs = DoubleMatrix.Dense(matrix.NumberOfRows, matrix.NumberOfColumns); for (int i = 0; i < matrix.Count; i++) { signs[i] = Math.Sign(matrix[i]); } // Partition the matrix linear indexes by the sign of each entry var actual = IndexPartition.Create(signs); // The partition contains three parts, the zero part, identified by 0, // the negative part (identified by -1), and the positive one // (identified by 1). // Expected: // // Part identifier: -1 // indexes: 3, 4, 6 // // Part identifier: 0 // indexes: 0, 1 // // Part identifier: 1 // indexes: 2, 5, 7 // IndexPartition <double> expected = new() { partIndetifiers = new List <double>(3) { -1.0, 0.0, 1.0 }, parts = new Dictionary <double, IndexCollection>(3) { { -1.0, IndexCollection.FromArray(new int[] { 3, 4, 6 }) }, { 0.0, IndexCollection.Default(1) }, { 1.0, IndexCollection.FromArray(new int[] { 2, 5, 7 }) } } }; IndexPartitionAssert.AreEqual(expected, actual); } // elements is a matrix of data { // Create a matrix var data = new double[6] { 1, 3, 0, 2, 2, 1 }; var elements = DoubleMatrix.Dense(3, 2, data, StorageOrder.RowMajor); // Partition the matrix linear indexes by the content of // matrix entries: a part is created for each distinct matrix value var actual = IndexPartition.Create(elements); // Each part is identified by its corresponding value and contains // the linear indexes of the entries in which the identifier // is positioned. // This code example produces the following output: // // // Part identifier: 0 // indexes: 1 // // Part identifier: 1 // indexes: 0, 5 // // Part identifier: 2 // indexes: 2, 4 // // Part identifier: 3 // indexes: 3 // var expected = new IndexPartition <double> { partIndetifiers = new List <double>(3) { 0.0, 1.0, 2.0, 3.0 }, parts = new Dictionary <double, IndexCollection>(3) { { 0.0, IndexCollection.FromArray(new int[] { 1 }) }, { 1.0, IndexCollection.FromArray(new int[] { 0, 5 }) }, { 2.0, IndexCollection.FromArray(new int[] { 2, 4 }) }, { 3.0, IndexCollection.FromArray(new int[] { 3 }) } } }; IndexPartitionAssert.AreEqual(expected, actual); } }
/// <summary> /// Tests that method /// <see cref="RandomSampling.NextDoubleMatrix"/> /// terminates successfully as expected. /// </summary> /// <param name="testableRandomSampling"> /// The testable random sampling providing the instance /// on which to invoke the methods to test and their expected /// behaviors. /// </param> /// <param name="numberOfSamples"> /// The number of samples to draw. /// </param> /// <param name="delta">The required accuracy. /// Defaults to <c>.01</c>.</param> public static void Succeed( TestableRandomSampling testableRandomSampling, int numberOfSamples, double delta = .01) { var randomSampling = testableRandomSampling.RandomSampling; // Generate samples var samples = DoubleMatrix.Dense( numberOfSamples, randomSampling.PopulationSize); for (int i = 0; i < numberOfSamples; i++) { samples[i, ":"] = randomSampling.NextDoubleMatrix(); } // Compute the actual inclusion probabilities DoubleMatrix actualInclusionProbabilities = DoubleMatrix.Dense(randomSampling.PopulationSize, 1); var sampleIndexes = IndexCollection.Default(numberOfSamples - 1); for (int j = 0; j < randomSampling.PopulationSize; j++) { var samplesContainingUnit = IndexPartition.Create( sampleIndexes, (i) => { return(samples[i, j] == 1.0); }); actualInclusionProbabilities[j] = (double)samplesContainingUnit[true].Count / (double)numberOfSamples; } // Check the number of distinct generated samples var distinctSamples = IndexPartition.Create(samples.AsRowCollection()); int numberOfDistinctSamples = distinctSamples.Count; Assert.AreEqual( SpecialFunctions.BinomialCoefficient( randomSampling.PopulationSize, randomSampling.SampleSize), numberOfDistinctSamples); // Check that the Chebyshev Inequality holds true // for each inclusion probability var expectedInclusionProbabilities = testableRandomSampling.InclusionProbabilities; for (int j = 0; j < randomSampling.PopulationSize; j++) { ProbabilityDistributionTest.CheckChebyshevInequality( new BernoulliDistribution(expectedInclusionProbabilities[j]), actualInclusionProbabilities[j], numberOfSamples, delta); } // Check how good the actual inclusion probabilities fit // the expected ones ProbabilityDistributionTest.CheckGoodnessOfFit( expectedInclusionProbabilities, actualInclusionProbabilities, testableRandomSampling.GoodnessOfFitCriticalValue); }
public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Define a partition that must be explained. // Three parts (clusters) are included, // containing, respectively, items 0 to 3, // 5 to 8, and 9 to 11. var partition = IndexPartition.Create( new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // explanation will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // The first 5 features are built to be almost // surely non informative, since they result // as samples drawn from a same distribution. var g = new GaussianDistribution(mu: 0, sigma: .01); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } // Features 5 to 6 are instead built to be informative, // since they are sampled from different distributions // while filling rows whose indexes are in different parts // of the partition to be explained. var partIdentifiers = partition.Identifiers; double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define how many features must be selected // for explanation. int numberOfExplanatoryFeatures = 2; // Select the best features. IndexCollection optimalExplanatoryFeatureIndexes = Clusters.Explain( data, partition, numberOfExplanatoryFeatures); // Show the results. Console.WriteLine(); Console.WriteLine( "The {0} features best explaining the given partition have column indexes:", numberOfExplanatoryFeatures); Console.WriteLine(optimalExplanatoryFeatureIndexes); Console.WriteLine(); Console.WriteLine("The Davies-Bouldin Index for the selected features:"); var dbi = IndexPartition.DaviesBouldinIndex( data[":", optimalExplanatoryFeatureIndexes], partition); Console.WriteLine(dbi); }
public void CreateFromEnumerableTest() { // elements is null { ArgumentExceptionAssert.Throw( () => { var partition = IndexPartition.Create((string[])null); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "elements"); } // elements is not null { // Create an array of strings. var elements = new string[6] { "one", "two", "one", "one", "three", "three" }; // Partition the array positions by their contents. var actual = IndexPartition.Create(elements); // The partition contains three parts, identified, respectively, // by the strings "one", "two", and "three". // Expected: // // Part identifier: one // indexes: 0, 2, 3 // // Part identifier: three // indexes: 4, 5 // // Part identifier: two // indexes: 1 var expected = new IndexPartition <string> { partIndetifiers = new List <string>(3) { "one", "three", "two" }, parts = new Dictionary <string, IndexCollection>(3) { { "one", IndexCollection.FromArray(new int[] { 0, 2, 3 }) }, { "three", IndexCollection.Range(4, 5) }, { "two", IndexCollection.FromArray(new int[] { 1 }) } } }; IndexPartitionAssert.AreEqual(expected, actual); } }
public void CreateFromIndexCollectionTest() { // elements is null { bool partitioner(int linearIndex) { return(linearIndex < 3); } ArgumentExceptionAssert.Throw( () => { var partition = IndexPartition.Create( (IndexCollection)null, partitioner); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "elements"); } // partitioner is null { Func <int, bool> partitioner = null; ArgumentExceptionAssert.Throw( () => { var partition = IndexPartition.Create( IndexCollection.Default(3), partitioner); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "partitioner"); } // Valid parameters { // Create a matrix. var data = new double[16] { -3, 3, 3, -1, 0, 2, -2, 2, 2, 1, -4, -5, -8, 2, 7, -1 }; var matrix = DoubleMatrix.Dense(4, 4, data, StorageOrder.RowMajor); // Create the collection of linear indexes corresponding // to entries on the matrix main diagonal. var elements = IndexCollection.Sequence(0, 1 + matrix.NumberOfRows, matrix.Count); // Create a partitioner which returns true if // the absolute value in a entry having the specified linear // index is less than 3, otherwise false. bool partitioner(int linearIndex) { return(Math.Abs(matrix[linearIndex]) < 3.0); } // Partition the diagonal linear indexes through the // specified partitioner. var actual = IndexPartition.Create(elements, partitioner); // Two parts are created, one for diagonal // entries less than 3 in absolute value, the other for // entries not satisfying that condition. // Expected: // // Part identifier: False // indexes: 0, 10 // // Part identifier: True // indexes: 5, 15 // var expected = new IndexPartition <bool> { partIndetifiers = new List <bool>(2) { false, true }, parts = new Dictionary <bool, IndexCollection>(2) { { false, IndexCollection.FromArray(new int[2] { 0, 10 }) }, { true, IndexCollection.FromArray(new int[2] { 5, 15 }) } } }; IndexPartitionAssert.AreEqual(expected, actual); } }