/// <summary> /// Initializes a new instance of the /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class /// by exploiting the specified <paramref name="trainer"/> /// to select the given /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// </summary> /// <param name="trainer"> /// An object whose state contains the information needed to /// train the classifier. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments to be trained. /// </param> /// <returns> /// A classifier whose ensemble contains the trained /// categorical entailments. /// </returns> /// <remarks> /// <para> /// The <paramref name="trainer"/> can be equipped with a /// nonempty initial ensemble of categorical entailments. /// This method always trains a classifier by adding /// to such ensemble further entailments, whose number /// is specified by parameter /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// However, the method returns the partial classifier whose /// ensemble contains the additional trained entailments only /// (no initial entailments). /// </para> /// </remarks> private static CategoricalEntailmentEnsembleClassifier Train( CategoricalEntailmentEnsembleTrainer trainer, int numberOfTrainedCategoricalEntailments) { var optimizer = new SystemPerformanceOptimizer(); int numberOfResponseCategories = trainer.ResponseVariable.NumberOfCategories; var context = new CategoricalEntailmentEnsembleOptimizationContext( objectiveFunction: trainer.Performance, trainer.featureCategoryCounts, numberOfResponseCategories, numberOfTrainedCategoricalEntailments, trainer.allowEntailmentPartialTruthValues, probabilitySmoothingCoefficient: .9, optimizationGoal: OptimizationGoal.Maximization, minimumNumberOfIterations: 10, maximumNumberOfIterations: 1000); int numberOfParameters = numberOfTrainedCategoricalEntailments * ( trainer.featureCategoryCounts.Sum() + numberOfResponseCategories); int sampleSize = 100 * numberOfParameters; double rarity = .01; var results = optimizer.Optimize( context, rarity, sampleSize); var partialClassifier = context.GetCategoricalEntailmentEnsembleClassifier( results.OptimalState, new List <CategoricalVariable>(trainer.FeatureVariables), trainer.ResponseVariable); return(partialClassifier); }
public void GetOptimalStateTest() { // valid input - random ties resolution { var context = new CategoricalEntailmentEnsembleOptimizationContext( objectiveFunction: (DoubleMatrix state) => { return(Double.PositiveInfinity); }, featureCategoryCounts: new List <int>(1) { 6 }, numberOfResponseCategories: 4, numberOfCategoricalEntailments: 1, allowEntailmentPartialTruthValues: true, probabilitySmoothingCoefficient: .9, optimizationGoal: OptimizationGoal.Maximization, minimumNumberOfIterations: 5, maximumNumberOfIterations: 1000); int numberOfEvaluations = 10000; double delta = .01; var parameter = DoubleMatrix.Dense(1, 10, new double[10] { .5, .5, .5, .5, .5, .5, .25, .25, .25, .25 }); // Generate states var states = new int[numberOfEvaluations]; var responseIndexes = IndexCollection.Range(6, 9); for (int i = 0; i < numberOfEvaluations; i++) { var state = context.GetOptimalState(parameter); states[i] = state.Vec(responseIndexes).FindNonzero()[0]; } // Compute the actual inclusion probabilities DoubleMatrix actualInclusionProbabilities = DoubleMatrix.Dense(context.NumberOfResponseCategories, 1); var stateIndexes = IndexCollection.Default(numberOfEvaluations - 1); for (int j = 0; j < context.NumberOfResponseCategories; j++) { var samplesContainingCurrentUnit = IndexPartition.Create( stateIndexes, (i) => { return(states[i] == j); }); actualInclusionProbabilities[j] = (double)samplesContainingCurrentUnit[true].Count / (double)numberOfEvaluations; } // Check the number of distinct generated states var distinctStates = IndexPartition.Create( states); int numberOfDistinctStates = distinctStates.Count; Assert.AreEqual( expected: context.NumberOfResponseCategories, actual: numberOfDistinctStates); // Check that the Chebyshev Inequality holds true // for each inclusion probability var expectedInclusionProbabilities = DoubleMatrix.Dense(context.NumberOfResponseCategories, 1, 1.0 / context.NumberOfResponseCategories); for (int j = 0; j < context.NumberOfResponseCategories; j++) { ProbabilityDistributionTest.CheckChebyshevInequality( new BernoulliDistribution(expectedInclusionProbabilities[j]), actualInclusionProbabilities[j], numberOfEvaluations, delta); } // Check how good the actual inclusion probabilities fit // the expected ones // The following assumes a number of response // categories equal to 4. // // The quantile of order .9 for // the chi-squared distribution having 4-1 // degrees of freedom is 6.251389 // (as from R function qchisq(.9, 3)) var goodnessOfFitCriticalValue = 6.251389; ProbabilityDistributionTest.CheckGoodnessOfFit( expectedInclusionProbabilities, actualInclusionProbabilities, goodnessOfFitCriticalValue); } }