Esempio n. 1
0
        /// <summary>
        /// Initializes a new instance of the
        /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class
        /// by exploiting the specified <paramref name="trainer"/>
        /// to select the given
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// </summary>
        /// <param name="trainer">
        /// An object whose state contains the information needed to
        /// train the classifier.
        /// </param>
        /// <param name="numberOfTrainedCategoricalEntailments">
        /// The number of categorical entailments to be trained.
        /// </param>
        /// <returns>
        /// A classifier whose ensemble contains the trained
        /// categorical entailments.
        /// </returns>
        /// <remarks>
        /// <para>
        /// The <paramref name="trainer"/> can be equipped with a
        /// nonempty initial ensemble of categorical entailments.
        /// This method always trains a classifier by adding
        /// to such ensemble further entailments, whose number
        /// is specified by parameter
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// However, the method returns the partial classifier whose
        /// ensemble contains the additional trained entailments only
        /// (no initial entailments).
        /// </para>
        /// </remarks>
        private static CategoricalEntailmentEnsembleClassifier Train(
            CategoricalEntailmentEnsembleTrainer trainer,
            int numberOfTrainedCategoricalEntailments)
        {
            var optimizer = new SystemPerformanceOptimizer();

            int numberOfResponseCategories =
                trainer.ResponseVariable.NumberOfCategories;

            var context =
                new CategoricalEntailmentEnsembleOptimizationContext(
                    objectiveFunction: trainer.Performance,
                    trainer.featureCategoryCounts,
                    numberOfResponseCategories,
                    numberOfTrainedCategoricalEntailments,
                    trainer.allowEntailmentPartialTruthValues,
                    probabilitySmoothingCoefficient: .9,
                    optimizationGoal: OptimizationGoal.Maximization,
                    minimumNumberOfIterations: 10,
                    maximumNumberOfIterations: 1000);

            int numberOfParameters = numberOfTrainedCategoricalEntailments * (
                trainer.featureCategoryCounts.Sum() + numberOfResponseCategories);

            int sampleSize = 100 * numberOfParameters;

            double rarity = .01;

            var results = optimizer.Optimize(
                context,
                rarity,
                sampleSize);

            var partialClassifier = context.GetCategoricalEntailmentEnsembleClassifier(
                results.OptimalState,
                new List <CategoricalVariable>(trainer.FeatureVariables),
                trainer.ResponseVariable);

            return(partialClassifier);
        }
Esempio n. 2
0
        /// <summary>
        /// Adds a number of new categorical entailments
        /// by training them together with
        /// the entailments currently included in this instance.
        /// Training happens on the
        /// specified features and response categorical
        /// variables in a given data set.
        /// </summary>
        /// <param name="dataSet">The categorical data set containing
        /// information about
        /// the available feature and response variables.
        /// </param>
        /// <param name="featureVariableIndexes">
        /// The zero-based indexes of the columns in <paramref name="dataSet"/>
        /// containing observations about
        /// the feature variables on which premises must be defined.
        /// </param>
        /// <param name="responseVariableIndex">
        /// The zero-based index of the column in <paramref name="dataSet"/>
        /// containing observations about the response variable.
        /// </param>
        /// <param name="numberOfTrainedCategoricalEntailments">
        /// The number of categorical entailments to be trained.
        /// </param>
        /// <param name="allowEntailmentPartialTruthValues">
        /// If set to <c>true</c> signals that the truth value of a
        /// categorical entailment must be equal to the homogeneity
        /// of the probability distribution from which its conclusion has been
        /// drawn. Otherwise, the truth value is unity.
        /// </param>
        /// <param name="trainSequentially">
        /// If set to <c>true</c> signals that the ensemble is trained
        /// sequentially, i.e. it starts as an empty collection, and new
        /// categorical entailments are added through a step-by-step
        /// procedure to the trained ensemble,
        /// by selecting, at each step, the entailment that better
        /// improves the system's performance of the current ensemble.
        /// Otherwise, the categorical entailments are trained simultaneously.
        /// </param>
        /// <returns>
        /// The instance of the
        /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class
        /// based on the trained ensemble of categorical entailments.
        /// </returns>
        /// <remarks>
        /// <para>
        /// The entailments to be trained are added to the
        /// <see cref="Entailments"/>
        /// to optimally enlarge such collection by the specified
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// </para>
        /// <para>
        /// it is expected that <paramref name="featureVariableIndexes"/> has the same count
        /// of the <see cref="FeatureVariables"/>
        /// of this instance,
        /// and that the <latex>l</latex>-th position
        /// of <paramref name="featureVariableIndexes"/> is the index of the
        /// column that, in <paramref name="dataSet"/>, contains observations
        /// about the <latex>l</latex>-th feature variable of the classifier.
        /// Furthermore, <paramref name="responseVariableIndex"/> must be the index
        /// of the column where, in <paramref name="dataSet"/>, are stored
        /// observations about
        /// the <see cref="ResponseVariable"/>
        /// of this instance.
        /// </para>
        /// </remarks>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="dataSet"/> is <b>null</b>.<br/>
        /// -or-<br/>
        /// <paramref name="featureVariableIndexes"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="numberOfTrainedCategoricalEntailments"/> is not positive.<br/>
        /// -or-<br/>
        /// <paramref name="featureVariableIndexes"/> contains values which
        /// are not valid column indexes for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="dataSet"/>.<br/>
        /// -or-<br/>
        /// <paramref name="responseVariableIndex"/> is
        /// not a valid column index for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="dataSet"/>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="featureVariableIndexes"/> has not the same
        /// <see cref="IndexCollection.Count"/> of the
        /// <see cref="FeatureVariables"/> of this instance.
        /// </exception>
        public void AddTrained(
            CategoricalDataSet dataSet,
            IndexCollection featureVariableIndexes,
            int responseVariableIndex,
            int numberOfTrainedCategoricalEntailments,
            bool allowEntailmentPartialTruthValues,
            bool trainSequentially)
        {
            #region Input validation

            if (dataSet is null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            if (featureVariableIndexes is null)
            {
                throw new ArgumentNullException(nameof(featureVariableIndexes));
            }

            if (featureVariableIndexes.Max >= dataSet.Data.NumberOfColumns)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(featureVariableIndexes),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column", nameof(dataSet)));
            }

            if (this.FeatureVariables.Count != featureVariableIndexes.Count)
            {
                throw new ArgumentException(
                          ImplementationServices.GetResourceString(
                              "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"),
                          nameof(featureVariableIndexes));
            }

            if (responseVariableIndex >= dataSet.Data.NumberOfColumns
                ||
                responseVariableIndex < 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(responseVariableIndex),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column", nameof(dataSet)));
            }

            if (numberOfTrainedCategoricalEntailments < 1)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(numberOfTrainedCategoricalEntailments),
                          ImplementationServices.GetResourceString(
                              "STR_EXCEPT_PAR_MUST_BE_POSITIVE"));
            }

            #endregion

            var features = dataSet[":", featureVariableIndexes];
            var response = dataSet[":", responseVariableIndex];

            if (trainSequentially)
            {
                var trainer = new CategoricalEntailmentEnsembleTrainer(
                    new List <CategoricalEntailment>(this.entailments),
                    numberOfTrainedCategoricalEntailments,
                    features,
                    response,
                    allowEntailmentPartialTruthValues,
                    trainSequentially);

                for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++)
                {
                    var partialClassifier = Train(
                        trainer,
                        numberOfTrainedCategoricalEntailments: 1);

                    var trainedEntailment =
                        partialClassifier.Entailments[0];

                    trainer.entailments.Add(trainedEntailment);

                    this.entailments.Add(trainedEntailment);
                }
            }
            else
            {
                var trainer = new CategoricalEntailmentEnsembleTrainer(
                    new List <CategoricalEntailment>(this.entailments),
                    numberOfTrainedCategoricalEntailments,
                    features,
                    response,
                    allowEntailmentPartialTruthValues,
                    trainSequentially: false);

                var partialClassifier = Train(
                    trainer,
                    numberOfTrainedCategoricalEntailments);

                for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++)
                {
                    this.entailments.Add(partialClassifier.entailments[i]);
                }
            }
        }