/// <summary> /// Initializes a new instance of the /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class /// by exploiting the specified <paramref name="trainer"/> /// to select the given /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// </summary> /// <param name="trainer"> /// An object whose state contains the information needed to /// train the classifier. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments to be trained. /// </param> /// <returns> /// A classifier whose ensemble contains the trained /// categorical entailments. /// </returns> /// <remarks> /// <para> /// The <paramref name="trainer"/> can be equipped with a /// nonempty initial ensemble of categorical entailments. /// This method always trains a classifier by adding /// to such ensemble further entailments, whose number /// is specified by parameter /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// However, the method returns the partial classifier whose /// ensemble contains the additional trained entailments only /// (no initial entailments). /// </para> /// </remarks> private static CategoricalEntailmentEnsembleClassifier Train( CategoricalEntailmentEnsembleTrainer trainer, int numberOfTrainedCategoricalEntailments) { var optimizer = new SystemPerformanceOptimizer(); int numberOfResponseCategories = trainer.ResponseVariable.NumberOfCategories; var context = new CategoricalEntailmentEnsembleOptimizationContext( objectiveFunction: trainer.Performance, trainer.featureCategoryCounts, numberOfResponseCategories, numberOfTrainedCategoricalEntailments, trainer.allowEntailmentPartialTruthValues, probabilitySmoothingCoefficient: .9, optimizationGoal: OptimizationGoal.Maximization, minimumNumberOfIterations: 10, maximumNumberOfIterations: 1000); int numberOfParameters = numberOfTrainedCategoricalEntailments * ( trainer.featureCategoryCounts.Sum() + numberOfResponseCategories); int sampleSize = 100 * numberOfParameters; double rarity = .01; var results = optimizer.Optimize( context, rarity, sampleSize); var partialClassifier = context.GetCategoricalEntailmentEnsembleClassifier( results.OptimalState, new List <CategoricalVariable>(trainer.FeatureVariables), trainer.ResponseVariable); return(partialClassifier); }
/// <summary> /// Adds a number of new categorical entailments /// by training them together with /// the entailments currently included in this instance. /// Training happens on the /// specified features and response categorical /// variables in a given data set. /// </summary> /// <param name="dataSet">The categorical data set containing /// information about /// the available feature and response variables. /// </param> /// <param name="featureVariableIndexes"> /// The zero-based indexes of the columns in <paramref name="dataSet"/> /// containing observations about /// the feature variables on which premises must be defined. /// </param> /// <param name="responseVariableIndex"> /// The zero-based index of the column in <paramref name="dataSet"/> /// containing observations about the response variable. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments to be trained. /// </param> /// <param name="allowEntailmentPartialTruthValues"> /// If set to <c>true</c> signals that the truth value of a /// categorical entailment must be equal to the homogeneity /// of the probability distribution from which its conclusion has been /// drawn. Otherwise, the truth value is unity. /// </param> /// <param name="trainSequentially"> /// If set to <c>true</c> signals that the ensemble is trained /// sequentially, i.e. it starts as an empty collection, and new /// categorical entailments are added through a step-by-step /// procedure to the trained ensemble, /// by selecting, at each step, the entailment that better /// improves the system's performance of the current ensemble. /// Otherwise, the categorical entailments are trained simultaneously. /// </param> /// <returns> /// The instance of the /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class /// based on the trained ensemble of categorical entailments. /// </returns> /// <remarks> /// <para> /// The entailments to be trained are added to the /// <see cref="Entailments"/> /// to optimally enlarge such collection by the specified /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// </para> /// <para> /// it is expected that <paramref name="featureVariableIndexes"/> has the same count /// of the <see cref="FeatureVariables"/> /// of this instance, /// and that the <latex>l</latex>-th position /// of <paramref name="featureVariableIndexes"/> is the index of the /// column that, in <paramref name="dataSet"/>, contains observations /// about the <latex>l</latex>-th feature variable of the classifier. /// Furthermore, <paramref name="responseVariableIndex"/> must be the index /// of the column where, in <paramref name="dataSet"/>, are stored /// observations about /// the <see cref="ResponseVariable"/> /// of this instance. /// </para> /// </remarks> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="numberOfTrainedCategoricalEntailments"/> is not positive.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> contains values which /// are not valid column indexes for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>.<br/> /// -or-<br/> /// <paramref name="responseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="featureVariableIndexes"/> has not the same /// <see cref="IndexCollection.Count"/> of the /// <see cref="FeatureVariables"/> of this instance. /// </exception> public void AddTrained( CategoricalDataSet dataSet, IndexCollection featureVariableIndexes, int responseVariableIndex, int numberOfTrainedCategoricalEntailments, bool allowEntailmentPartialTruthValues, bool trainSequentially) { #region Input validation if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } if (featureVariableIndexes is null) { throw new ArgumentNullException(nameof(featureVariableIndexes)); } if (featureVariableIndexes.Max >= dataSet.Data.NumberOfColumns) { throw new ArgumentOutOfRangeException( nameof(featureVariableIndexes), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (this.FeatureVariables.Count != featureVariableIndexes.Count) { throw new ArgumentException( ImplementationServices.GetResourceString( "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"), nameof(featureVariableIndexes)); } if (responseVariableIndex >= dataSet.Data.NumberOfColumns || responseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(responseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (numberOfTrainedCategoricalEntailments < 1) { throw new ArgumentOutOfRangeException( nameof(numberOfTrainedCategoricalEntailments), ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE")); } #endregion var features = dataSet[":", featureVariableIndexes]; var response = dataSet[":", responseVariableIndex]; if (trainSequentially) { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments: 1); var trainedEntailment = partialClassifier.Entailments[0]; trainer.entailments.Add(trainedEntailment); this.entailments.Add(trainedEntailment); } } else { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially: false); var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { this.entailments.Add(partialClassifier.entailments[i]); } } }