Esempio n. 1
0
        /// <summary>
        /// Initializes a new instance of the
        /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class
        /// by exploiting the specified <paramref name="trainer"/>
        /// to select the given
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// </summary>
        /// <param name="trainer">
        /// An object whose state contains the information needed to
        /// train the classifier.
        /// </param>
        /// <param name="numberOfTrainedCategoricalEntailments">
        /// The number of categorical entailments to be trained.
        /// </param>
        /// <returns>
        /// A classifier whose ensemble contains the trained
        /// categorical entailments.
        /// </returns>
        /// <remarks>
        /// <para>
        /// The <paramref name="trainer"/> can be equipped with a
        /// nonempty initial ensemble of categorical entailments.
        /// This method always trains a classifier by adding
        /// to such ensemble further entailments, whose number
        /// is specified by parameter
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// However, the method returns the partial classifier whose
        /// ensemble contains the additional trained entailments only
        /// (no initial entailments).
        /// </para>
        /// </remarks>
        private static CategoricalEntailmentEnsembleClassifier Train(
            CategoricalEntailmentEnsembleTrainer trainer,
            int numberOfTrainedCategoricalEntailments)
        {
            var optimizer = new SystemPerformanceOptimizer();

            int numberOfResponseCategories =
                trainer.ResponseVariable.NumberOfCategories;

            var context =
                new CategoricalEntailmentEnsembleOptimizationContext(
                    objectiveFunction: trainer.Performance,
                    trainer.featureCategoryCounts,
                    numberOfResponseCategories,
                    numberOfTrainedCategoricalEntailments,
                    trainer.allowEntailmentPartialTruthValues,
                    probabilitySmoothingCoefficient: .9,
                    optimizationGoal: OptimizationGoal.Maximization,
                    minimumNumberOfIterations: 10,
                    maximumNumberOfIterations: 1000);

            int numberOfParameters = numberOfTrainedCategoricalEntailments * (
                trainer.featureCategoryCounts.Sum() + numberOfResponseCategories);

            int sampleSize = 100 * numberOfParameters;

            double rarity = .01;

            var results = optimizer.Optimize(
                context,
                rarity,
                sampleSize);

            var partialClassifier = context.GetCategoricalEntailmentEnsembleClassifier(
                results.OptimalState,
                new List <CategoricalVariable>(trainer.FeatureVariables),
                trainer.ResponseVariable);

            return(partialClassifier);
        }
Esempio n. 2
0
        public void GetOptimalStateTest()
        {
            // valid input - random ties resolution
            {
                var context = new CategoricalEntailmentEnsembleOptimizationContext(
                    objectiveFunction:
                    (DoubleMatrix state) => { return(Double.PositiveInfinity); },
                    featureCategoryCounts: new List <int>(1)
                {
                    6
                },
                    numberOfResponseCategories: 4,
                    numberOfCategoricalEntailments: 1,
                    allowEntailmentPartialTruthValues: true,
                    probabilitySmoothingCoefficient: .9,
                    optimizationGoal: OptimizationGoal.Maximization,
                    minimumNumberOfIterations: 5,
                    maximumNumberOfIterations: 1000);

                int    numberOfEvaluations = 10000;
                double delta = .01;

                var parameter = DoubleMatrix.Dense(1, 10, new double[10] {
                    .5, .5, .5, .5, .5, .5, .25, .25, .25, .25
                });

                // Generate states

                var states = new int[numberOfEvaluations];

                var responseIndexes = IndexCollection.Range(6, 9);

                for (int i = 0; i < numberOfEvaluations; i++)
                {
                    var state = context.GetOptimalState(parameter);

                    states[i] = state.Vec(responseIndexes).FindNonzero()[0];
                }

                // Compute the actual inclusion probabilities

                DoubleMatrix actualInclusionProbabilities =
                    DoubleMatrix.Dense(context.NumberOfResponseCategories, 1);

                var stateIndexes = IndexCollection.Default(numberOfEvaluations - 1);

                for (int j = 0; j < context.NumberOfResponseCategories; j++)
                {
                    var samplesContainingCurrentUnit =
                        IndexPartition.Create(
                            stateIndexes,
                            (i) => { return(states[i] == j); });

                    actualInclusionProbabilities[j] =
                        (double)samplesContainingCurrentUnit[true].Count
                        /
                        (double)numberOfEvaluations;
                }

                // Check the number of distinct generated states

                var distinctStates =
                    IndexPartition.Create(
                        states);

                int numberOfDistinctStates =
                    distinctStates.Count;

                Assert.AreEqual(
                    expected: context.NumberOfResponseCategories,
                    actual: numberOfDistinctStates);

                // Check that the Chebyshev Inequality holds true
                // for each inclusion probability

                var expectedInclusionProbabilities =
                    DoubleMatrix.Dense(context.NumberOfResponseCategories, 1,
                                       1.0 / context.NumberOfResponseCategories);

                for (int j = 0; j < context.NumberOfResponseCategories; j++)
                {
                    ProbabilityDistributionTest.CheckChebyshevInequality(
                        new BernoulliDistribution(expectedInclusionProbabilities[j]),
                        actualInclusionProbabilities[j],
                        numberOfEvaluations,
                        delta);
                }

                // Check how good the actual inclusion probabilities fit
                // the expected ones

                // The following assumes a number of response
                // categories equal to 4.
                //
                // The quantile of order .9 for
                // the chi-squared distribution having 4-1
                // degrees of freedom is 6.251389
                // (as from R function qchisq(.9, 3))
                var goodnessOfFitCriticalValue = 6.251389;

                ProbabilityDistributionTest.CheckGoodnessOfFit(
                    expectedInclusionProbabilities,
                    actualInclusionProbabilities,
                    goodnessOfFitCriticalValue);
            }
        }