public void Main() { // Set the number of items and features under study. const int numberOfItems = 12; int numberOfFeatures = 7; // Define a partition that must be explained. // Three parts (clusters) are included, // containing, respectively, items 0 to 3, // 5 to 8, and 9 to 11. var partition = IndexPartition.Create( new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); // Create a matrix that will represent // an artificial data set, // having 12 items (rows) and 7 features (columns). // This will store the observations which // explanation will be based on. var data = DoubleMatrix.Dense( numberOfRows: numberOfItems, numberOfColumns: numberOfFeatures); // The first 5 features are built to be almost // surely non informative, since they result // as samples drawn from a same distribution. var g = new GaussianDistribution(mu: 0, sigma: .01); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } // Features 5 to 6 are instead built to be informative, // since they are sampled from different distributions // while filling rows whose indexes are in different parts // of the partition to be explained. var partIdentifiers = partition.Identifiers; double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } Console.WriteLine("The data set:"); Console.WriteLine(data); // Define how many features must be selected // for explanation. int numberOfExplanatoryFeatures = 2; // Select the best features. IndexCollection optimalExplanatoryFeatureIndexes = Clusters.Explain( data, partition, numberOfExplanatoryFeatures); // Show the results. Console.WriteLine(); Console.WriteLine( "The {0} features best explaining the given partition have column indexes:", numberOfExplanatoryFeatures); Console.WriteLine(optimalExplanatoryFeatureIndexes); Console.WriteLine(); Console.WriteLine("The Davies-Bouldin Index for the selected features:"); var dbi = IndexPartition.DaviesBouldinIndex( data[":", optimalExplanatoryFeatureIndexes], partition); Console.WriteLine(dbi); }
public void ExplainTest() { // data is null { string parameterName = "data"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: null, partition: IndexPartition.Create( DoubleMatrix.Dense(10, 1)), numberOfExplanatoryFeatures: 2);; }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: parameterName); } // data is null { string parameterName = "partition"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: DoubleMatrix.Dense(10, 5), partition: null, numberOfExplanatoryFeatures: 2);; }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: parameterName); } // numberOfExplanatoryFeatures is zero { var STR_EXCEPT_PAR_MUST_BE_POSITIVE = ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE"); string parameterName = "numberOfExplanatoryFeatures"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: DoubleMatrix.Dense(10, 5), partition: IndexPartition.Create( DoubleMatrix.Dense(10, 1)), numberOfExplanatoryFeatures: 0); }, expectedType: typeof(ArgumentOutOfRangeException), expectedPartialMessage: STR_EXCEPT_PAR_MUST_BE_POSITIVE, expectedParameterName: parameterName); } // numberOfExplanatoryFeatures is negative { var STR_EXCEPT_PAR_MUST_BE_POSITIVE = ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE"); string parameterName = "numberOfExplanatoryFeatures"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: DoubleMatrix.Dense(10, 5), partition: IndexPartition.Create( DoubleMatrix.Dense(10, 1)), numberOfExplanatoryFeatures: -1); }, expectedType: typeof(ArgumentOutOfRangeException), expectedPartialMessage: STR_EXCEPT_PAR_MUST_BE_POSITIVE, expectedParameterName: parameterName); } // numberOfExplanatoryFeatures is equal to the number of columns in data { var STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS = string.Format( ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS"), "numberOfExplanatoryFeatures", "data"); string parameterName = "numberOfExplanatoryFeatures"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: DoubleMatrix.Dense(10, 5), partition: IndexPartition.Create( DoubleMatrix.Dense(10, 1)), numberOfExplanatoryFeatures: 5); }, expectedType: typeof(ArgumentException), expectedPartialMessage: STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS, expectedParameterName: parameterName); } // numberOfExplanatoryFeatures is greater than the number of columns in data { var STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS = string.Format( ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS"), "numberOfExplanatoryFeatures", "data"); string parameterName = "numberOfExplanatoryFeatures"; ArgumentExceptionAssert.Throw( () => { Clusters.Explain( data: DoubleMatrix.Dense(10, 5), partition: IndexPartition.Create( DoubleMatrix.Dense(10, 1)), numberOfExplanatoryFeatures: 6); }, expectedType: typeof(ArgumentException), expectedPartialMessage: STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS, expectedParameterName: parameterName); } // Valid input { const int numberOfItems = 12; var source = DoubleMatrix.Dense(numberOfItems, 1, new double[numberOfItems] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }); var partition = IndexPartition.Create(source); var data = DoubleMatrix.Dense(numberOfItems, 7); // features 0 to 4 var g = new GaussianDistribution(mu: 0, sigma: .1); for (int j = 0; j < 5; j++) { data[":", j] = g.Sample(sampleSize: numberOfItems); } var partIdentifiers = partition.Identifiers; // feature 5 to 6 double mu = 1.0; for (int i = 0; i < partIdentifiers.Count; i++) { var part = partition[partIdentifiers[i]]; int partSize = part.Count; g.Mu = mu; data[part, 5] = g.Sample(sampleSize: partSize); mu += 2.0; g.Mu = mu; data[part, 6] = g.Sample(sampleSize: partSize); mu += 2.0; } IndexCollection actualFeatureIndexes = Clusters.Explain( data: data, partition: partition, numberOfExplanatoryFeatures: 2); IndexCollectionAssert.AreEqual( expected: IndexCollection.Range(5, 6), actual: actualFeatureIndexes); } }