/// <summary> /// Creates a partition of positions in a collection of /// <see cref="System.Double"/> elements by /// aggregating those positions occupied by a same element. /// </summary> /// <param name="elements">The collection of elements whose /// positions are to be partitioned.</param> /// <returns>The partition of element positions in the /// specified collection.</returns> /// <example> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx0Intro']"/> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx0Code']"/> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx2Intro']"/> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx2Code']"/> /// </example> /// <exception cref="ArgumentNullException"> /// <paramref name="elements"/> is <b>null</b>. /// </exception> public static IndexPartition <double> Create(DoubleMatrix elements) { if (elements is null) { throw new ArgumentNullException(nameof(elements)); } var distinctElements = new SortedSet <double>(); var indexLists = new Dictionary <double, List <int> >(); bool isNotAlreadyInElementSet; double currentElement; for (int i = 0; i < elements.Count; i++) { currentElement = elements[i]; isNotAlreadyInElementSet = distinctElements.Add(currentElement); if (isNotAlreadyInElementSet) { indexLists.Add(currentElement, new List <int>()); } indexLists[currentElement].Add(i); } IndexPartition <double> partition = new(); foreach (var element in distinctElements) { partition[element] = new IndexCollection( indexLists[element].ToArray(), false); partition.partIndetifiers.Add(element); } return(partition); }
/// <summary> /// Creates a partition of positions in a collection of elements by /// aggregating those positions occupied by a same element. /// </summary> /// <typeparam name="T">The type of the /// elements whose positions are to be partitioned. /// </typeparam> /// <param name="elements">The collection of elements whose /// positions are to be partitioned.</param> /// <example> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx3Intro']"/> /// <inheritdoc cref="IndexPartition" /// path="para[@id='IndexPartitionEx3Code']"/> /// </example> /// <returns>The partition of element positions in the /// specified collection.</returns> /// <exception cref="ArgumentNullException"> /// <paramref name="elements"/> is <b>null</b>. /// </exception> public static IndexPartition <T> Create <T>( IEnumerable <T> elements) where T : IComparable <T> { if (elements is null) { throw new ArgumentNullException(nameof(elements)); } var distinctElements = new SortedSet <T>(); var indexLists = new Dictionary <T, List <int> >(); bool isNotAlreadyInElementSet; int i = 0; foreach (var element in elements) { isNotAlreadyInElementSet = distinctElements.Add(element); if (isNotAlreadyInElementSet) { indexLists.Add(element, new List <int>()); } indexLists[element].Add(i++); } IndexPartition <T> partition = new(); foreach (var element in distinctElements) { partition[element] = new IndexCollection( indexLists[element].ToArray(), false); partition.partIndetifiers.Add(element); } return(partition); }
/// <summary> /// Indicates whether the current object is equal to another object of the same type. /// </summary> /// <param name="other">An object to compare with this object.</param> /// <returns><c>true</c> if the current object is equal to the <paramref name="other" /> /// parameter; otherwise, <c>false</c>.</returns> /// <remarks> /// <inheritdoc cref="IndexCollection" /// path="para[@id='quasi.lexicographic.order']"/> /// <inheritdoc cref="IndexCollection" /// path="para[@id='quasi.lexicographic.equality']"/> /// </remarks> /// <seealso href="https://en.wikipedia.org/wiki/Lexicographical_order"/> public bool Equals(IndexCollection other) { return(0 == this.CompareTo(other)); }
/// <summary> /// Explains existing clusters by selecting /// a number of features from the specified corresponding data set. /// </summary> /// <param name="data"> /// The matrix whose columns contain the features observed at the /// items under study. /// </param> /// <param name="partition"> /// A partition of the row indexes valid for <paramref name="data"/>. /// </param> /// <param name="numberOfExplanatoryFeatures"> /// The number of features to be selected. /// </param> /// <remarks> /// <para> /// Method <see cref="Explain( /// DoubleMatrix, IndexPartition{double}, int)"/> /// selects the specified <paramref name="numberOfExplanatoryFeatures"/> /// from the given /// <paramref name="data"/>, by minimizing the Davies-Bouldin /// Index corresponding to /// the <paramref name="partition"/> of the items under study. /// </para> /// <para> /// This method uses a default Cross-Entropy context of /// type <see cref="CombinationOptimizationContext"/> to identify the /// optimal features. /// If different selection criteria need to be applied, /// or extra control on the /// parameters of the underlying algorithm is required, /// a specialized <see cref="CombinationOptimizationContext"/> can be /// can be instantiated and hence exploited executing /// method <see cref="SystemPerformanceOptimizer.Optimize( /// SystemPerformanceOptimizationContext, double, int)">Optimize</see> /// on a <see cref="SystemPerformanceOptimizer"/> object. /// See the documentation about <see cref="CombinationOptimizationContext"/> /// for additional examples. /// </para> /// </remarks> /// <example> /// <para> /// In the following example, an existing partition of 12 items is explained /// by selecting 2 features out of the seven ones available in /// an artificial data set regarding the items under study. /// </para> /// <para> /// <code title="Selecting features from a data set to explain a given partition." /// source="..\Novacta.Analytics.CodeExamples\ClustersExplainExample0.cs.txt" /// language="cs" /> /// </para> /// </example> /// <returns> /// The collection of column indexes, valid for <paramref name="data"/>, that /// correspond to the features selected to explain the /// given <paramref name="partition"/> of row indexes. /// </returns> /// <exception cref="ArgumentNullException"> /// <paramref name="data"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="partition"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="numberOfExplanatoryFeatures"/> is not positive. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="numberOfExplanatoryFeatures"/> is not less than /// the number of columns in <paramref name="data"/>.<br/> /// -or-<br/> /// A part in <paramref name="partition"/> contains a position /// which is not valid as a row index of <paramref name="data"/>. /// </exception> /// <seealso cref="IndexPartition.DaviesBouldinIndex( /// DoubleMatrix, IndexPartition{double})"/> /// <seealso cref="CombinationOptimizationContext"/> /// <seealso cref="SystemPerformanceOptimizer"/> public static IndexCollection Explain( DoubleMatrix data, IndexPartition <double> partition, int numberOfExplanatoryFeatures) { #region Input validation if (data is null) { throw new ArgumentNullException(nameof(data)); } if (numberOfExplanatoryFeatures < 1) { throw new ArgumentOutOfRangeException( nameof(numberOfExplanatoryFeatures), ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE")); } int stateDimension = data.NumberOfColumns; if (stateDimension <= numberOfExplanatoryFeatures) { throw new ArgumentException( string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_LESS_THAN_OTHER_COLUMNS"), nameof(numberOfExplanatoryFeatures), nameof(data)), nameof(numberOfExplanatoryFeatures) ); } if (partition is null) { throw new ArgumentNullException(nameof(partition)); } #endregion double objectiveFunction(DoubleMatrix x) { IndexCollection selected = x.FindNonzero(); double performance = IndexPartition.DaviesBouldinIndex( data: data[":", selected], partition: partition); return(performance); } var optimizer = new SystemPerformanceOptimizer(); var context = new CombinationOptimizationContext( objectiveFunction: objectiveFunction, stateDimension: stateDimension, combinationDimension: numberOfExplanatoryFeatures, probabilitySmoothingCoefficient: .8, optimizationGoal: OptimizationGoal.Minimization, minimumNumberOfIterations: 3, maximumNumberOfIterations: 1000); double rarity = .01; int sampleSize = 1000 * stateDimension; var results = optimizer.Optimize( context, rarity, sampleSize); var optimalState = results.OptimalState; return(optimalState.FindNonzero()); }
public IndexCollectionEnumerator(IndexCollection indexCollection) { this.indexCollection = indexCollection; }
/// <summary> /// Classifies the categorical items in the specified data set. /// </summary> /// <param name="dataSet"> /// The data set whose rows contain the specified items. /// </param> /// <param name="featureVariableIndexes"> /// The zero-based indexes of the data set columns that contain the /// data about the features involved in the premises of the /// entailments defined by this instance. /// </param> /// <remarks> /// <para> /// Let <latex>L</latex> be the <see cref="IReadOnlyCollection{T}.Count"/> of /// the <see cref="CategoricalEntailment.FeaturePremises"/> /// defined by the <see cref="Entailments"/> exploited by this instance. /// It is expected /// that <paramref name="featureVariableIndexes"/> has the same count, /// and that the <latex>l</latex>-th position /// of <paramref name="featureVariableIndexes"/>, say <latex>k_l</latex>, /// is the index of the /// column that, in <paramref name="dataSet"/>, contains observations /// about the same feature variable on which is built the <latex>l</latex>-th /// premise of the <see cref="Entailments"/> of this instance. /// </para> /// </remarks> /// <returns> /// The collection of data set row indexes containing the items /// that satisfy the premises of the entailments defined by this instance. /// </returns> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="featureVariableIndexes"/> contains values which /// are not valid column indexes for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="featureVariableIndexes"/> has not the same /// <see cref="IndexCollection.Count"/> of the /// <see cref="FeatureVariables"/> of this instance. /// </exception> public CategoricalDataSet Classify( CategoricalDataSet dataSet, IndexCollection featureVariableIndexes) { #region Input validation if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } if (featureVariableIndexes is null) { throw new ArgumentNullException(nameof(featureVariableIndexes)); } if (dataSet.NumberOfColumns <= featureVariableIndexes.Max) { throw new ArgumentOutOfRangeException( nameof(featureVariableIndexes), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (this.FeatureVariables.Count != featureVariableIndexes.Count) { throw new ArgumentException( ImplementationServices.GetResourceString( "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"), nameof(featureVariableIndexes)); } #endregion var itemData = dataSet.Data[":", featureVariableIndexes]; int numberOfItems = itemData.NumberOfRows; DoubleMatrix itemResponses = DoubleMatrix.Dense(numberOfItems, 1); itemResponses.SetColumnName(0, this.ResponseVariable.Name); DoubleMatrix item, votes; int numberOfResponseCategories = this.responseCodeIndexPairs.Count; for (int r = 0; r < itemData.NumberOfRows; r++) { votes = DoubleMatrix.Dense(1, numberOfResponseCategories); item = itemData[r, ":"]; for (int e = 0; e < this.entailments.Count; e++) { if (this.entailments[e].ValidatePremises(item)) { votes[this.responseCodeIndexPairs[this.entailments[e].ResponseConclusion]] += this.entailments[e].TruthValue; } } double maximumVote = Stat.Max(votes).value; var maximumVoteIndexes = votes.Find(maximumVote); int numberOfMaximumVoteIndexes = maximumVoteIndexes.Count; if (numberOfMaximumVoteIndexes == 1) { itemResponses[r] = this.ResponseVariable.Categories[maximumVoteIndexes[0]].Code; } else { // Pick a position corresponding to a maximum vote at random int randomMaximumVotePosition = Convert.ToInt32( Math.Floor(numberOfMaximumVoteIndexes * this.randomNumberGenerator.DefaultUniform())); itemResponses[r] = this.ResponseVariable.Categories[ maximumVoteIndexes[randomMaximumVotePosition]].Code; } } return(new CategoricalDataSet( new List <CategoricalVariable>() { this.ResponseVariable }, itemResponses)); }
/// <summary> /// Adds a number of new categorical entailments /// by training them together with /// the entailments currently included in this instance. /// Training happens on the /// specified features and response categorical /// variables in a given data set. /// </summary> /// <param name="dataSet">The categorical data set containing /// information about /// the available feature and response variables. /// </param> /// <param name="featureVariableIndexes"> /// The zero-based indexes of the columns in <paramref name="dataSet"/> /// containing observations about /// the feature variables on which premises must be defined. /// </param> /// <param name="responseVariableIndex"> /// The zero-based index of the column in <paramref name="dataSet"/> /// containing observations about the response variable. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments to be trained. /// </param> /// <param name="allowEntailmentPartialTruthValues"> /// If set to <c>true</c> signals that the truth value of a /// categorical entailment must be equal to the homogeneity /// of the probability distribution from which its conclusion has been /// drawn. Otherwise, the truth value is unity. /// </param> /// <param name="trainSequentially"> /// If set to <c>true</c> signals that the ensemble is trained /// sequentially, i.e. it starts as an empty collection, and new /// categorical entailments are added through a step-by-step /// procedure to the trained ensemble, /// by selecting, at each step, the entailment that better /// improves the system's performance of the current ensemble. /// Otherwise, the categorical entailments are trained simultaneously. /// </param> /// <returns> /// The instance of the /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class /// based on the trained ensemble of categorical entailments. /// </returns> /// <remarks> /// <para> /// The entailments to be trained are added to the /// <see cref="Entailments"/> /// to optimally enlarge such collection by the specified /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// </para> /// <para> /// it is expected that <paramref name="featureVariableIndexes"/> has the same count /// of the <see cref="FeatureVariables"/> /// of this instance, /// and that the <latex>l</latex>-th position /// of <paramref name="featureVariableIndexes"/> is the index of the /// column that, in <paramref name="dataSet"/>, contains observations /// about the <latex>l</latex>-th feature variable of the classifier. /// Furthermore, <paramref name="responseVariableIndex"/> must be the index /// of the column where, in <paramref name="dataSet"/>, are stored /// observations about /// the <see cref="ResponseVariable"/> /// of this instance. /// </para> /// </remarks> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="numberOfTrainedCategoricalEntailments"/> is not positive.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> contains values which /// are not valid column indexes for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>.<br/> /// -or-<br/> /// <paramref name="responseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="featureVariableIndexes"/> has not the same /// <see cref="IndexCollection.Count"/> of the /// <see cref="FeatureVariables"/> of this instance. /// </exception> public void AddTrained( CategoricalDataSet dataSet, IndexCollection featureVariableIndexes, int responseVariableIndex, int numberOfTrainedCategoricalEntailments, bool allowEntailmentPartialTruthValues, bool trainSequentially) { #region Input validation if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } if (featureVariableIndexes is null) { throw new ArgumentNullException(nameof(featureVariableIndexes)); } if (featureVariableIndexes.Max >= dataSet.Data.NumberOfColumns) { throw new ArgumentOutOfRangeException( nameof(featureVariableIndexes), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (this.FeatureVariables.Count != featureVariableIndexes.Count) { throw new ArgumentException( ImplementationServices.GetResourceString( "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"), nameof(featureVariableIndexes)); } if (responseVariableIndex >= dataSet.Data.NumberOfColumns || responseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(responseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (numberOfTrainedCategoricalEntailments < 1) { throw new ArgumentOutOfRangeException( nameof(numberOfTrainedCategoricalEntailments), ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE")); } #endregion var features = dataSet[":", featureVariableIndexes]; var response = dataSet[":", responseVariableIndex]; if (trainSequentially) { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments: 1); var trainedEntailment = partialClassifier.Entailments[0]; trainer.entailments.Add(trainedEntailment); this.entailments.Add(trainedEntailment); } } else { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially: false); var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { this.entailments.Add(partialClassifier.entailments[i]); } } }
/// <summary> /// Returns the accuracy of a predicted classification with respect /// to an actual one. /// </summary> /// <param name="predictedDataSet"> /// The data set containing the predicted classification. /// </param> /// <param name="predictedResponseVariableIndex"> /// The zero-based index of the column /// in <paramref name="predictedDataSet"/> containing the /// predictions about the response variable applied /// for classifying. /// </param> /// <param name="actualDataSet"> /// The data set containing the actual classification. /// </param> /// <param name="actualResponseVariableIndex"> /// The zero-based index of the column /// in <paramref name="actualDataSet"/> containing the /// observations about the response variable applied /// for classifying. /// </param> /// <returns> /// The accuracy of a predicted classification with respect to an /// actual one. /// </returns> /// <exception cref="ArgumentNullException"> /// <paramref name="predictedDataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="actualDataSet"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="predictedResponseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="predictedDataSet"/>.<br/> /// -or-<br/> /// <paramref name="actualResponseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="actualDataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="actualDataSet"/> has not the same /// <see cref="CategoricalDataSet.NumberOfRows"/> of /// parameter <paramref name="predictedDataSet"/>. /// </exception> /// <seealso href="https://en.wikipedia.org/wiki/Confusion_matrix"/> public static double EvaluateAccuracy( CategoricalDataSet predictedDataSet, int predictedResponseVariableIndex, CategoricalDataSet actualDataSet, int actualResponseVariableIndex) { #region Input validation if (predictedDataSet is null) { throw new ArgumentNullException(nameof(predictedDataSet)); } if (predictedResponseVariableIndex >= predictedDataSet.Data.NumberOfColumns || predictedResponseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(predictedResponseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(predictedDataSet))); } if (actualDataSet is null) { throw new ArgumentNullException(nameof(actualDataSet)); } if (actualResponseVariableIndex >= actualDataSet.Data.NumberOfColumns || actualResponseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(actualResponseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(actualDataSet))); } if (predictedDataSet.Data.NumberOfRows != actualDataSet.Data.NumberOfRows) { throw new ArgumentException( string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_HAVE_SAME_NUM_OF_ROWS"), nameof(predictedDataSet)), nameof(actualDataSet)); } #endregion var actualResponses = actualDataSet.Data[":", actualResponseVariableIndex]; var predictedResponses = predictedDataSet.Data[":", predictedResponseVariableIndex]; var responseCodes = actualDataSet.Variables[actualResponseVariableIndex].CategoryCodes; double numberOfExactPredictions = 0.0; foreach (var code in responseCodes) { IndexCollection codePredictedIndexes = predictedResponses.Find(code); if (codePredictedIndexes is not null) { var correspondingActualResponses = actualResponses.Vec(codePredictedIndexes); numberOfExactPredictions += correspondingActualResponses.Find(code)?.Count ?? 0; } } var accuracy = numberOfExactPredictions / (double)(actualDataSet.NumberOfRows); return(accuracy); }