static TestableMultipleCorrespondence00() { // Create a data stream. string[] data = new string[6] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "Red,Negative", "Blue,Negative", "Blue,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; dataSet = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); }
/// <summary> /// Verifies that specified categorical data sets are equal. /// </summary> /// <param name="expected">The expected categorical data set.</param> /// <param name="actual">The actual categorical data set.</param> public static void AreEqual( CategoricalDataSet expected, CategoricalDataSet actual) { if (null == expected && null == actual) { return; } if (((null == expected) && (null != actual)) || ((null != expected) && (null == actual))) { throw new AssertFailedException( "One categorical data set is null, the other is not."); } if (expected.Name != actual.Name) { throw new AssertFailedException( "Categorical data sets have different names."); } if (expected.Variables.Count != actual.Variables.Count) { throw new AssertFailedException( "Categorical data sets have different numbers of variables."); } for (int i = 0; i < expected.Variables.Count; i++) { CategoricalVariableAssert.AreEqual( expected.Variables[i], actual.Variables[i]); } var expectedVariables = (List <CategoricalVariable>) Reflector.GetField(expected, "variables"); var actualVariables = (List <CategoricalVariable>) Reflector.GetField(actual, "variables"); if (expectedVariables.Count != actualVariables.Count) { throw new AssertFailedException( "Categorical data sets have different numbers of variables."); } for (int i = 0; i < expectedVariables.Count; i++) { CategoricalVariableAssert.AreEqual( expectedVariables[i], actualVariables[i]); } DoubleMatrixAssert.AreEqual( (DoubleMatrix)Reflector.GetField(expected, "data"), (DoubleMatrix)Reflector.GetField(actual, "data"), 1e-4); DoubleMatrixAssert.AreEqual(expected.Data, actual.Data, 1e-4); }
/// <summary> /// Analyzes the multiple correspondence of the specified /// categorical data set. /// </summary> /// <param name="dataSet">The data set to analyze.</param> /// <returns>The multiple correspondence of the specified data set.</returns> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// The disjoint form of parameter <paramref name="dataSet"/> has at least a non /// positive marginal sum. /// </exception> /// <exception cref="InvalidOperationException"> /// The Singular Value Decomposition needed to acquire /// the correspondence cannot be executed or does not converge.<br/> /// -or-<br/> /// No principal variable has positive variance. /// The principal information cannot be acquired. /// </exception> public static MultipleCorrespondence Analyze( CategoricalDataSet dataSet) { if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } var disjunctiveProtocol = dataSet.Disjoin(); Correspondence correspondence; try { correspondence = Correspondence.Analyze( disjunctiveProtocol); } catch (ArgumentOutOfRangeException) { throw new ArgumentOutOfRangeException(nameof(dataSet), ImplementationServices.GetResourceString( "STR_EXCEPT_GDA_MCA_NON_POSITIVE_MARGINAL_SUMS")); } catch (Exception) { throw; } var multipleCorrespondence = new MultipleCorrespondence { correspondence = correspondence }; return(multipleCorrespondence); }
/// <summary> /// Initializes a new instance of /// the <see cref="CategoricalEntailmentEnsembleTrainer"/> class /// aimed to train the specified number of categorical /// entailments by exploiting the specified data sets. /// </summary> /// <param name="initialCategoricalEntailments"> /// The collection of initial categorical entailments. /// It can be empty: see <see cref="entailments"/>. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments /// to be trained. /// </param> /// <param name="features"> /// The data set containing the training features. /// </param> /// <param name="response"> /// The data set containing the training response. /// </param> /// <param name="allowEntailmentPartialTruthValues"> /// If set to <c>true</c> signals that the truth value of a /// categorical entailment must be equal to the homogeneity /// of the probability distribution from which its conclusion has been /// drawn. Otherwise, the truth value is unity. /// </param> /// <param name="trainSequentially"> /// If set to <c>true</c> signals that the ensemble is trained /// sequentially, i.e. it starts as an empty collection, and new /// categorical entailments are added through a step-by-step /// procedure to the trained ensemble, /// by selecting, at each step, the entailment that better /// improves the system's performance of the current ensemble. /// Otherwise, the categorical entailments are trained simultaneously. /// </param> public CategoricalEntailmentEnsembleTrainer( IReadOnlyList <CategoricalEntailment> initialCategoricalEntailments, int numberOfTrainedCategoricalEntailments, CategoricalDataSet features, CategoricalDataSet response, bool allowEntailmentPartialTruthValues, bool trainSequentially) { this.trainSequentially = trainSequentially; this.entailments = new List <CategoricalEntailment>(initialCategoricalEntailments); this.numberOfTrainedCategoricalEntailments = numberOfTrainedCategoricalEntailments; var responseVariable = response.Variables[0]; var featureVariables = features.Variables; List <int> featureCategoryCounts = new(featureVariables.Count); int overallNumberOfFeatureCategories = 0; for (int j = 0; j < featureVariables.Count; j++) { int currentFeatureNumberOfCategories = featureVariables[j].NumberOfCategories; featureCategoryCounts.Add(currentFeatureNumberOfCategories); overallNumberOfFeatureCategories += currentFeatureNumberOfCategories; } this.featureCategoryCounts = featureCategoryCounts; this.numberOfResponseCategories = responseVariable.NumberOfCategories; this.overallNumberOfCategories = overallNumberOfFeatureCategories + this.numberOfResponseCategories; this.entailmentRepresentationLength = this.overallNumberOfCategories + 1; this.responseCodeIndexPairs = new SortedList <double, int>(); int c = 0; foreach (var code in responseVariable.CategoryCodes) { this.responseCodeIndexPairs[code] = c; c++; } this.randomNumberGeneratorPool = new ConcurrentDictionary <int, RandomNumberGenerator>(); this.featuresData = new DoubleMatrix[features.NumberOfRows]; for (int i = 0; i < this.featuresData.Length; i++) { this.featuresData[i] = features.Data[i, ":"]; } this.allowEntailmentPartialTruthValues = allowEntailmentPartialTruthValues; this.responseData = response.Data; this.FeatureVariables = features.Variables; this.ResponseVariable = response.Variables[0]; }
public void Main() { // Create a data stream. string[] data = new string[6] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "Red,Negative", "Black,Negative", "Black,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; CategoricalDataSet dataset = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); // Decode and show the data set. Console.WriteLine("Decoded data set:"); Console.WriteLine(); var decodedDataSet = dataset.Decode(); int numberOfInstances = dataset.Data.NumberOfRows; int numberOfVariables = dataset.Data.NumberOfColumns; foreach (var variable in dataset.Variables) { Console.Write(variable.Name + ","); } Console.WriteLine(); for (int i = 0; i < numberOfInstances; i++) { for (int j = 0; j < numberOfVariables; j++) { Console.Write(decodedDataSet[i][j] + ","); } Console.WriteLine(); } }
public void Main() { // Create a data stream. string[] data = new string[7] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "White,Positive", "Red,Negative", "Blue,Negative", "Blue,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; var dataSet = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); // Assign the categories of variable NUMBER // to the rows of the table. int rowVariableIndex = 1; // Assign the categories of variable COLOR // to the columns of the table. int columnVariableIndex = 0; // Get the NUMBER-by-COLOR table. var table = dataSet.GetContingencyTable( rowVariableIndex, columnVariableIndex); // Show the table. Console.WriteLine("Contingency table:"); Console.WriteLine(table); }
public double Performance(DoubleMatrix state) { #region Create the ensemble of categorical entailments List <CategoricalEntailment> entailments = new(this.entailments); int numberOfSelectedCategoricalEntailments = this.trainSequentially ? 1 : this.numberOfTrainedCategoricalEntailments; int numberOfResponseCategories = this.numberOfResponseCategories; for (int e = 0; e < numberOfSelectedCategoricalEntailments; e++) { int entailmentRepresentationIndex = e * this.entailmentRepresentationLength; entailments.Add(new CategoricalEntailment( state[0, IndexCollection.Range( entailmentRepresentationIndex, entailmentRepresentationIndex + this.overallNumberOfCategories)], this.FeatureVariables, this.ResponseVariable)); } #endregion #region Exploit the ensemble to classify observed items int numberOfItems = this.featuresData.Length; DoubleMatrix itemClassifications = DoubleMatrix.Dense(numberOfItems, 1); DoubleMatrix item, votes; for (int r = 0; r < this.featuresData.Length; r++) { votes = DoubleMatrix.Dense(1, numberOfResponseCategories); item = this.featuresData[r]; for (int e = 0; e < entailments.Count; e++) { if (entailments[e].ValidatePremises(item)) { votes[this.responseCodeIndexPairs[entailments[e].ResponseConclusion]] += entailments[e].TruthValue; } } double maximumVote = Stat.Max(votes).value; var maximumVoteIndexes = votes.Find(maximumVote); int numberOfMaximumVoteIndexes = maximumVoteIndexes.Count; if (numberOfMaximumVoteIndexes == 1) { itemClassifications[r] = this.ResponseVariable.Categories[maximumVoteIndexes[0]].Code; } else { // Pick a position corresponding to a maximum vote at random int randomMaximumVotePosition = Convert.ToInt32( Math.Floor(numberOfMaximumVoteIndexes * this.randomNumberGeneratorPool.GetOrAdd( Environment.CurrentManagedThreadId, (threadId) => { var localRandomNumberGenerator = RandomNumberGenerator.CreateNextMT2203(7777777); return(localRandomNumberGenerator); }).DefaultUniform())); itemClassifications[r] = this.ResponseVariable.Categories[ maximumVoteIndexes[randomMaximumVotePosition]].Code; } } var predictedResponses = new CategoricalDataSet( new List <CategoricalVariable>(1) { this.ResponseVariable }, itemClassifications); #endregion #region Evaluate classification accuracy var actualResponses = this.responseData; var responseCodes = this.ResponseVariable.CategoryCodes; double numberOfExactPredictions = 0.0; foreach (var code in responseCodes) { IndexCollection codePredictedIndexes = predictedResponses.Data.Find(code); if (codePredictedIndexes is not null) { DoubleMatrix correspondingActualResponses = actualResponses.Vec(codePredictedIndexes); numberOfExactPredictions += correspondingActualResponses.Find(code)?.Count ?? 0; } } // Compute the overall confusion double totalConfusion = actualResponses.Count; var accuracy = numberOfExactPredictions / totalConfusion; return(accuracy); #endregion }
public void Main() { // Create a data stream. const int numberOfInstances = 27; string[] data = new string[numberOfInstances + 1] { "NUMERICAL,TARGET", "0,A", "0,A", "0,A", "1,B", "1,B", "1,B", "1,B", "2,B", "2,B", "3,C", "3,C", "3,C", "4,B", "4,B", "4,B", "4,C", "5,A", "5,A", "6,A", "7,C", "7,C", "7,C", "8,C", "8,C", "9,C", "9,C", "9,C" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Identify the special categorizer for variable NUMERICAL. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection numericalColumns = IndexCollection.Range(0, 0); bool firstLineContainsColumnHeaders = true; int targetColumn = 1; IFormatProvider provider = CultureInfo.InvariantCulture; var specialCategorizers = CategoricalDataSet.CategorizeByEntropyMinimization( streamReader, columnDelimiter, numericalColumns, firstLineContainsColumnHeaders, targetColumn, provider); // Encode the categorical data set using the special categorizer. stream.Position = 0; IndexCollection extractedColumns = IndexCollection.Range(0, 1); CategoricalDataSet dataset = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders, specialCategorizers, provider); // Decode and show the data set. Console.WriteLine("Decoded data set:"); Console.WriteLine(); var decodedDataSet = dataset.Decode(); int numberOfVariables = dataset.Data.NumberOfColumns; foreach (var variable in dataset.Variables) { Console.Write(variable.Name + ","); } Console.WriteLine(); for (int i = 0; i < numberOfInstances; i++) { for (int j = 0; j < numberOfVariables; j++) { Console.Write(decodedDataSet[i][j] + ","); } Console.WriteLine(); } }
public void FromEncodedDataTest() { // variables is null { List <CategoricalVariable> variables = null; DoubleMatrix data = DoubleMatrix.Identity(3); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "variables"); } // data is null { List <CategoricalVariable> variables = new(); DoubleMatrix data = null; ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "data"); } // variables count unequal to data number of columns { List <CategoricalVariable> variables = new() { new CategoricalVariable("var0"), new CategoricalVariable("var1") }; DoubleMatrix data = DoubleMatrix.Dense(6, 3); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentOutOfRangeException), expectedPartialMessage: String.Format( ImplementationServices.GetResourceString( "STR_EXCEPT_CAT_COLUMNS_NOT_EQUAL_TO_VARIABLES_COUNT"), "variables"), expectedParameterName: "data"); } // category not included in variable { List <CategoricalVariable> variables = new() { new CategoricalVariable("var0"), new CategoricalVariable("var1") }; variables[0].Add(0.0); variables[1].Add(1.0); DoubleMatrix data = DoubleMatrix.Dense(1, 2); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_CAT_MATRIX_ENTRY_NOT_IN_VARIABLE_LIST"), expectedParameterName: "data"); } // Valid input { // Create a data stream string[] data = new string[6] { "COLOR,NUMBER", "Red, -2.2", "Green, 0.0", "Red, -3.3", "Black,-1.1", "Black, 4.4" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Define a special categorizer for variable NUMBER string numberCategorizer(string token, IFormatProvider provider) { double datum = Convert.ToDouble(token, provider); if (datum == 0) { return("Zero"); } else if (datum < 0) { return("Negative"); } else { return("Positive"); } } // Attach the special categorizer to variable NUMBER int numberColumnIndex = 1; var specialCategorizers = new Dictionary <int, Categorizer> { { numberColumnIndex, numberCategorizer } }; // Encode the categorical data set StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; CategoricalDataSet actual = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders, specialCategorizers, CultureInfo.InvariantCulture); CategoricalVariable color = new("COLOR") { { 0, "Red" },
/// <summary> /// Classifies the categorical items in the specified data set. /// </summary> /// <param name="dataSet"> /// The data set whose rows contain the specified items. /// </param> /// <param name="featureVariableIndexes"> /// The zero-based indexes of the data set columns that contain the /// data about the features involved in the premises of the /// entailments defined by this instance. /// </param> /// <remarks> /// <para> /// Let <latex>L</latex> be the <see cref="IReadOnlyCollection{T}.Count"/> of /// the <see cref="CategoricalEntailment.FeaturePremises"/> /// defined by the <see cref="Entailments"/> exploited by this instance. /// It is expected /// that <paramref name="featureVariableIndexes"/> has the same count, /// and that the <latex>l</latex>-th position /// of <paramref name="featureVariableIndexes"/>, say <latex>k_l</latex>, /// is the index of the /// column that, in <paramref name="dataSet"/>, contains observations /// about the same feature variable on which is built the <latex>l</latex>-th /// premise of the <see cref="Entailments"/> of this instance. /// </para> /// </remarks> /// <returns> /// The collection of data set row indexes containing the items /// that satisfy the premises of the entailments defined by this instance. /// </returns> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="featureVariableIndexes"/> contains values which /// are not valid column indexes for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="featureVariableIndexes"/> has not the same /// <see cref="IndexCollection.Count"/> of the /// <see cref="FeatureVariables"/> of this instance. /// </exception> public CategoricalDataSet Classify( CategoricalDataSet dataSet, IndexCollection featureVariableIndexes) { #region Input validation if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } if (featureVariableIndexes is null) { throw new ArgumentNullException(nameof(featureVariableIndexes)); } if (dataSet.NumberOfColumns <= featureVariableIndexes.Max) { throw new ArgumentOutOfRangeException( nameof(featureVariableIndexes), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (this.FeatureVariables.Count != featureVariableIndexes.Count) { throw new ArgumentException( ImplementationServices.GetResourceString( "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"), nameof(featureVariableIndexes)); } #endregion var itemData = dataSet.Data[":", featureVariableIndexes]; int numberOfItems = itemData.NumberOfRows; DoubleMatrix itemResponses = DoubleMatrix.Dense(numberOfItems, 1); itemResponses.SetColumnName(0, this.ResponseVariable.Name); DoubleMatrix item, votes; int numberOfResponseCategories = this.responseCodeIndexPairs.Count; for (int r = 0; r < itemData.NumberOfRows; r++) { votes = DoubleMatrix.Dense(1, numberOfResponseCategories); item = itemData[r, ":"]; for (int e = 0; e < this.entailments.Count; e++) { if (this.entailments[e].ValidatePremises(item)) { votes[this.responseCodeIndexPairs[this.entailments[e].ResponseConclusion]] += this.entailments[e].TruthValue; } } double maximumVote = Stat.Max(votes).value; var maximumVoteIndexes = votes.Find(maximumVote); int numberOfMaximumVoteIndexes = maximumVoteIndexes.Count; if (numberOfMaximumVoteIndexes == 1) { itemResponses[r] = this.ResponseVariable.Categories[maximumVoteIndexes[0]].Code; } else { // Pick a position corresponding to a maximum vote at random int randomMaximumVotePosition = Convert.ToInt32( Math.Floor(numberOfMaximumVoteIndexes * this.randomNumberGenerator.DefaultUniform())); itemResponses[r] = this.ResponseVariable.Categories[ maximumVoteIndexes[randomMaximumVotePosition]].Code; } } return(new CategoricalDataSet( new List <CategoricalVariable>() { this.ResponseVariable }, itemResponses)); }
/// <summary> /// Adds a number of new categorical entailments /// by training them together with /// the entailments currently included in this instance. /// Training happens on the /// specified features and response categorical /// variables in a given data set. /// </summary> /// <param name="dataSet">The categorical data set containing /// information about /// the available feature and response variables. /// </param> /// <param name="featureVariableIndexes"> /// The zero-based indexes of the columns in <paramref name="dataSet"/> /// containing observations about /// the feature variables on which premises must be defined. /// </param> /// <param name="responseVariableIndex"> /// The zero-based index of the column in <paramref name="dataSet"/> /// containing observations about the response variable. /// </param> /// <param name="numberOfTrainedCategoricalEntailments"> /// The number of categorical entailments to be trained. /// </param> /// <param name="allowEntailmentPartialTruthValues"> /// If set to <c>true</c> signals that the truth value of a /// categorical entailment must be equal to the homogeneity /// of the probability distribution from which its conclusion has been /// drawn. Otherwise, the truth value is unity. /// </param> /// <param name="trainSequentially"> /// If set to <c>true</c> signals that the ensemble is trained /// sequentially, i.e. it starts as an empty collection, and new /// categorical entailments are added through a step-by-step /// procedure to the trained ensemble, /// by selecting, at each step, the entailment that better /// improves the system's performance of the current ensemble. /// Otherwise, the categorical entailments are trained simultaneously. /// </param> /// <returns> /// The instance of the /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class /// based on the trained ensemble of categorical entailments. /// </returns> /// <remarks> /// <para> /// The entailments to be trained are added to the /// <see cref="Entailments"/> /// to optimally enlarge such collection by the specified /// <paramref name="numberOfTrainedCategoricalEntailments"/>. /// </para> /// <para> /// it is expected that <paramref name="featureVariableIndexes"/> has the same count /// of the <see cref="FeatureVariables"/> /// of this instance, /// and that the <latex>l</latex>-th position /// of <paramref name="featureVariableIndexes"/> is the index of the /// column that, in <paramref name="dataSet"/>, contains observations /// about the <latex>l</latex>-th feature variable of the classifier. /// Furthermore, <paramref name="responseVariableIndex"/> must be the index /// of the column where, in <paramref name="dataSet"/>, are stored /// observations about /// the <see cref="ResponseVariable"/> /// of this instance. /// </para> /// </remarks> /// <exception cref="ArgumentNullException"> /// <paramref name="dataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="numberOfTrainedCategoricalEntailments"/> is not positive.<br/> /// -or-<br/> /// <paramref name="featureVariableIndexes"/> contains values which /// are not valid column indexes for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>.<br/> /// -or-<br/> /// <paramref name="responseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="dataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="featureVariableIndexes"/> has not the same /// <see cref="IndexCollection.Count"/> of the /// <see cref="FeatureVariables"/> of this instance. /// </exception> public void AddTrained( CategoricalDataSet dataSet, IndexCollection featureVariableIndexes, int responseVariableIndex, int numberOfTrainedCategoricalEntailments, bool allowEntailmentPartialTruthValues, bool trainSequentially) { #region Input validation if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } if (featureVariableIndexes is null) { throw new ArgumentNullException(nameof(featureVariableIndexes)); } if (featureVariableIndexes.Max >= dataSet.Data.NumberOfColumns) { throw new ArgumentOutOfRangeException( nameof(featureVariableIndexes), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (this.FeatureVariables.Count != featureVariableIndexes.Count) { throw new ArgumentException( ImplementationServices.GetResourceString( "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"), nameof(featureVariableIndexes)); } if (responseVariableIndex >= dataSet.Data.NumberOfColumns || responseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(responseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(dataSet))); } if (numberOfTrainedCategoricalEntailments < 1) { throw new ArgumentOutOfRangeException( nameof(numberOfTrainedCategoricalEntailments), ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_BE_POSITIVE")); } #endregion var features = dataSet[":", featureVariableIndexes]; var response = dataSet[":", responseVariableIndex]; if (trainSequentially) { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments: 1); var trainedEntailment = partialClassifier.Entailments[0]; trainer.entailments.Add(trainedEntailment); this.entailments.Add(trainedEntailment); } } else { var trainer = new CategoricalEntailmentEnsembleTrainer( new List <CategoricalEntailment>(this.entailments), numberOfTrainedCategoricalEntailments, features, response, allowEntailmentPartialTruthValues, trainSequentially: false); var partialClassifier = Train( trainer, numberOfTrainedCategoricalEntailments); for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++) { this.entailments.Add(partialClassifier.entailments[i]); } } }
/// <summary> /// Returns the accuracy of a predicted classification with respect /// to an actual one. /// </summary> /// <param name="predictedDataSet"> /// The data set containing the predicted classification. /// </param> /// <param name="predictedResponseVariableIndex"> /// The zero-based index of the column /// in <paramref name="predictedDataSet"/> containing the /// predictions about the response variable applied /// for classifying. /// </param> /// <param name="actualDataSet"> /// The data set containing the actual classification. /// </param> /// <param name="actualResponseVariableIndex"> /// The zero-based index of the column /// in <paramref name="actualDataSet"/> containing the /// observations about the response variable applied /// for classifying. /// </param> /// <returns> /// The accuracy of a predicted classification with respect to an /// actual one. /// </returns> /// <exception cref="ArgumentNullException"> /// <paramref name="predictedDataSet"/> is <b>null</b>.<br/> /// -or-<br/> /// <paramref name="actualDataSet"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="predictedResponseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="predictedDataSet"/>.<br/> /// -or-<br/> /// <paramref name="actualResponseVariableIndex"/> is /// not a valid column index for the /// <see cref="CategoricalDataSet.Data"/> of /// <paramref name="actualDataSet"/>. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="actualDataSet"/> has not the same /// <see cref="CategoricalDataSet.NumberOfRows"/> of /// parameter <paramref name="predictedDataSet"/>. /// </exception> /// <seealso href="https://en.wikipedia.org/wiki/Confusion_matrix"/> public static double EvaluateAccuracy( CategoricalDataSet predictedDataSet, int predictedResponseVariableIndex, CategoricalDataSet actualDataSet, int actualResponseVariableIndex) { #region Input validation if (predictedDataSet is null) { throw new ArgumentNullException(nameof(predictedDataSet)); } if (predictedResponseVariableIndex >= predictedDataSet.Data.NumberOfColumns || predictedResponseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(predictedResponseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(predictedDataSet))); } if (actualDataSet is null) { throw new ArgumentNullException(nameof(actualDataSet)); } if (actualResponseVariableIndex >= actualDataSet.Data.NumberOfColumns || actualResponseVariableIndex < 0) { throw new ArgumentOutOfRangeException( nameof(actualResponseVariableIndex), string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"), "column", nameof(actualDataSet))); } if (predictedDataSet.Data.NumberOfRows != actualDataSet.Data.NumberOfRows) { throw new ArgumentException( string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_HAVE_SAME_NUM_OF_ROWS"), nameof(predictedDataSet)), nameof(actualDataSet)); } #endregion var actualResponses = actualDataSet.Data[":", actualResponseVariableIndex]; var predictedResponses = predictedDataSet.Data[":", predictedResponseVariableIndex]; var responseCodes = actualDataSet.Variables[actualResponseVariableIndex].CategoryCodes; double numberOfExactPredictions = 0.0; foreach (var code in responseCodes) { IndexCollection codePredictedIndexes = predictedResponses.Find(code); if (codePredictedIndexes is not null) { var correspondingActualResponses = actualResponses.Vec(codePredictedIndexes); numberOfExactPredictions += correspondingActualResponses.Find(code)?.Count ?? 0; } } var accuracy = numberOfExactPredictions / (double)(actualDataSet.NumberOfRows); return(accuracy); }