static TestableMultipleCorrespondence00()
        {
            // Create a data stream.
            string[] data = new string[6] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "Red,Negative",
                "Blue,Negative",
                "Blue,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection extractedColumns = IndexCollection.Range(0, 1);
            bool            firstLineContainsColumnHeaders = true;

            dataSet = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);
        }
Пример #2
0
        /// <summary>
        /// Verifies that specified categorical data sets are equal.
        /// </summary>
        /// <param name="expected">The expected categorical data set.</param>
        /// <param name="actual">The actual categorical data set.</param>
        public static void AreEqual(
            CategoricalDataSet expected,
            CategoricalDataSet actual)
        {
            if (null == expected && null == actual)
            {
                return;
            }

            if (((null == expected) && (null != actual))
                ||
                ((null != expected) && (null == actual)))
            {
                throw new AssertFailedException(
                          "One categorical data set is null, the other is not.");
            }

            if (expected.Name != actual.Name)
            {
                throw new AssertFailedException(
                          "Categorical data sets have different names.");
            }

            if (expected.Variables.Count != actual.Variables.Count)
            {
                throw new AssertFailedException(
                          "Categorical data sets have different numbers of variables.");
            }

            for (int i = 0; i < expected.Variables.Count; i++)
            {
                CategoricalVariableAssert.AreEqual(
                    expected.Variables[i], actual.Variables[i]);
            }

            var expectedVariables = (List <CategoricalVariable>)
                                    Reflector.GetField(expected, "variables");

            var actualVariables = (List <CategoricalVariable>)
                                  Reflector.GetField(actual, "variables");

            if (expectedVariables.Count != actualVariables.Count)
            {
                throw new AssertFailedException(
                          "Categorical data sets have different numbers of variables.");
            }

            for (int i = 0; i < expectedVariables.Count; i++)
            {
                CategoricalVariableAssert.AreEqual(
                    expectedVariables[i], actualVariables[i]);
            }

            DoubleMatrixAssert.AreEqual(
                (DoubleMatrix)Reflector.GetField(expected, "data"),
                (DoubleMatrix)Reflector.GetField(actual, "data"),
                1e-4);

            DoubleMatrixAssert.AreEqual(expected.Data, actual.Data, 1e-4);
        }
Пример #3
0
        /// <summary>
        /// Analyzes the multiple correspondence of the specified
        /// categorical data set.
        /// </summary>
        /// <param name="dataSet">The data set to analyze.</param>
        /// <returns>The multiple correspondence of the specified data set.</returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="dataSet"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// The disjoint form of parameter <paramref name="dataSet"/> has at least a non
        /// positive marginal sum.
        /// </exception>
        /// <exception cref="InvalidOperationException">
        /// The Singular Value Decomposition needed to acquire
        /// the correspondence cannot be executed or does not converge.<br/>
        /// -or-<br/>
        /// No principal variable has positive variance.
        /// The principal information cannot be acquired.
        /// </exception>
        public static MultipleCorrespondence Analyze(
            CategoricalDataSet dataSet)
        {
            if (dataSet is null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            var disjunctiveProtocol = dataSet.Disjoin();

            Correspondence correspondence;

            try
            {
                correspondence = Correspondence.Analyze(
                    disjunctiveProtocol);
            }
            catch (ArgumentOutOfRangeException)
            {
                throw new ArgumentOutOfRangeException(nameof(dataSet),
                                                      ImplementationServices.GetResourceString(
                                                          "STR_EXCEPT_GDA_MCA_NON_POSITIVE_MARGINAL_SUMS"));
            }
            catch (Exception)
            {
                throw;
            }

            var multipleCorrespondence = new MultipleCorrespondence
            {
                correspondence = correspondence
            };

            return(multipleCorrespondence);
        }
Пример #4
0
        /// <summary>
        /// Initializes a new instance of
        /// the <see cref="CategoricalEntailmentEnsembleTrainer"/> class
        /// aimed to train the specified number of categorical
        /// entailments by exploiting the specified data sets.
        /// </summary>
        /// <param name="initialCategoricalEntailments">
        /// The collection of initial categorical entailments.
        /// It can be empty: see <see cref="entailments"/>.
        /// </param>
        /// <param name="numberOfTrainedCategoricalEntailments">
        /// The number of categorical entailments
        /// to be trained.
        /// </param>
        /// <param name="features">
        /// The data set containing the training features.
        /// </param>
        /// <param name="response">
        /// The data set containing the training response.
        /// </param>
        /// <param name="allowEntailmentPartialTruthValues">
        /// If set to <c>true</c> signals that the truth value of a
        /// categorical entailment must be equal to the homogeneity
        /// of the probability distribution from which its conclusion has been
        /// drawn. Otherwise, the truth value is unity.
        /// </param>
        /// <param name="trainSequentially">
        /// If set to <c>true</c> signals that the ensemble is trained
        /// sequentially, i.e. it starts as an empty collection, and new
        /// categorical entailments are added through a step-by-step
        /// procedure to the trained ensemble,
        /// by selecting, at each step, the entailment that better
        /// improves the system's performance of the current ensemble.
        /// Otherwise, the categorical entailments are trained simultaneously.
        /// </param>
        public CategoricalEntailmentEnsembleTrainer(
            IReadOnlyList <CategoricalEntailment> initialCategoricalEntailments,
            int numberOfTrainedCategoricalEntailments,
            CategoricalDataSet features,
            CategoricalDataSet response,
            bool allowEntailmentPartialTruthValues,
            bool trainSequentially)
        {
            this.trainSequentially = trainSequentially;

            this.entailments =
                new List <CategoricalEntailment>(initialCategoricalEntailments);

            this.numberOfTrainedCategoricalEntailments = numberOfTrainedCategoricalEntailments;

            var responseVariable = response.Variables[0];
            var featureVariables = features.Variables;

            List <int> featureCategoryCounts            = new(featureVariables.Count);
            int        overallNumberOfFeatureCategories = 0;

            for (int j = 0; j < featureVariables.Count; j++)
            {
                int currentFeatureNumberOfCategories =
                    featureVariables[j].NumberOfCategories;
                featureCategoryCounts.Add(currentFeatureNumberOfCategories);
                overallNumberOfFeatureCategories += currentFeatureNumberOfCategories;
            }
            this.featureCategoryCounts      = featureCategoryCounts;
            this.numberOfResponseCategories = responseVariable.NumberOfCategories;

            this.overallNumberOfCategories = overallNumberOfFeatureCategories +
                                             this.numberOfResponseCategories;
            this.entailmentRepresentationLength = this.overallNumberOfCategories + 1;

            this.responseCodeIndexPairs = new SortedList <double, int>();
            int c = 0;

            foreach (var code in responseVariable.CategoryCodes)
            {
                this.responseCodeIndexPairs[code] = c;
                c++;
            }

            this.randomNumberGeneratorPool =
                new ConcurrentDictionary <int, RandomNumberGenerator>();

            this.featuresData = new DoubleMatrix[features.NumberOfRows];
            for (int i = 0; i < this.featuresData.Length; i++)
            {
                this.featuresData[i] = features.Data[i, ":"];
            }

            this.allowEntailmentPartialTruthValues = allowEntailmentPartialTruthValues;
            this.responseData     = response.Data;
            this.FeatureVariables = features.Variables;
            this.ResponseVariable = response.Variables[0];
        }
Пример #5
0
        public void Main()
        {
            // Create a data stream.
            string[] data = new string[6] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "Red,Negative",
                "Black,Negative",
                "Black,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set.
            StreamReader       streamReader     = new(stream);
            char               columnDelimiter  = ',';
            IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
            bool               firstLineContainsColumnHeaders = true;
            CategoricalDataSet dataset = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);

            // Decode and show the data set.
            Console.WriteLine("Decoded data set:");
            Console.WriteLine();
            var decodedDataSet    = dataset.Decode();
            int numberOfInstances = dataset.Data.NumberOfRows;
            int numberOfVariables = dataset.Data.NumberOfColumns;

            foreach (var variable in dataset.Variables)
            {
                Console.Write(variable.Name + ",");
            }
            Console.WriteLine();

            for (int i = 0; i < numberOfInstances; i++)
            {
                for (int j = 0; j < numberOfVariables; j++)
                {
                    Console.Write(decodedDataSet[i][j] + ",");
                }
                Console.WriteLine();
            }
        }
        public void Main()
        {
            // Create a data stream.
            string[] data = new string[7] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "White,Positive",
                "Red,Negative",
                "Blue,Negative",
                "Blue,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set.
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection extractedColumns = IndexCollection.Range(0, 1);
            bool            firstLineContainsColumnHeaders = true;
            var             dataSet = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);

            // Assign the categories of variable NUMBER
            // to the rows of the table.
            int rowVariableIndex = 1;

            // Assign the categories of variable COLOR
            // to the columns of the table.
            int columnVariableIndex = 0;

            // Get the NUMBER-by-COLOR table.
            var table = dataSet.GetContingencyTable(
                rowVariableIndex,
                columnVariableIndex);

            // Show the table.
            Console.WriteLine("Contingency table:");
            Console.WriteLine(table);
        }
Пример #7
0
        public double Performance(DoubleMatrix state)
        {
            #region Create the ensemble of categorical entailments

            List <CategoricalEntailment> entailments =
                new(this.entailments);

            int numberOfSelectedCategoricalEntailments =
                this.trainSequentially
                ?
                1
                :
                this.numberOfTrainedCategoricalEntailments;

            int numberOfResponseCategories = this.numberOfResponseCategories;

            for (int e = 0; e < numberOfSelectedCategoricalEntailments; e++)
            {
                int entailmentRepresentationIndex = e * this.entailmentRepresentationLength;
                entailments.Add(new CategoricalEntailment(
                                    state[0, IndexCollection.Range(
                                              entailmentRepresentationIndex,
                                              entailmentRepresentationIndex + this.overallNumberOfCategories)],
                                    this.FeatureVariables,
                                    this.ResponseVariable));
            }

            #endregion

            #region Exploit the ensemble to classify observed items

            int          numberOfItems       = this.featuresData.Length;
            DoubleMatrix itemClassifications = DoubleMatrix.Dense(numberOfItems, 1);

            DoubleMatrix item, votes;

            for (int r = 0; r < this.featuresData.Length; r++)
            {
                votes = DoubleMatrix.Dense(1, numberOfResponseCategories);
                item  = this.featuresData[r];
                for (int e = 0; e < entailments.Count; e++)
                {
                    if (entailments[e].ValidatePremises(item))
                    {
                        votes[this.responseCodeIndexPairs[entailments[e].ResponseConclusion]]
                            += entailments[e].TruthValue;
                    }
                }

                double maximumVote = Stat.Max(votes).value;

                var maximumVoteIndexes =
                    votes.Find(maximumVote);

                int numberOfMaximumVoteIndexes = maximumVoteIndexes.Count;
                if (numberOfMaximumVoteIndexes == 1)
                {
                    itemClassifications[r] =
                        this.ResponseVariable.Categories[maximumVoteIndexes[0]].Code;
                }
                else
                {
                    // Pick a position corresponding to a maximum vote at random
                    int randomMaximumVotePosition = Convert.ToInt32(
                        Math.Floor(numberOfMaximumVoteIndexes *
                                   this.randomNumberGeneratorPool.GetOrAdd(
                                       Environment.CurrentManagedThreadId,
                                       (threadId) =>
                    {
                        var localRandomNumberGenerator =
                            RandomNumberGenerator.CreateNextMT2203(7777777);
                        return(localRandomNumberGenerator);
                    }).DefaultUniform()));

                    itemClassifications[r] =
                        this.ResponseVariable.Categories[
                            maximumVoteIndexes[randomMaximumVotePosition]].Code;
                }
            }

            var predictedResponses = new CategoricalDataSet(
                new List <CategoricalVariable>(1)
            {
                this.ResponseVariable
            },
                itemClassifications);

            #endregion

            #region Evaluate classification accuracy

            var actualResponses = this.responseData;

            var responseCodes = this.ResponseVariable.CategoryCodes;

            double numberOfExactPredictions = 0.0;
            foreach (var code in responseCodes)
            {
                IndexCollection codePredictedIndexes = predictedResponses.Data.Find(code);
                if (codePredictedIndexes is not null)
                {
                    DoubleMatrix correspondingActualResponses =
                        actualResponses.Vec(codePredictedIndexes);
                    numberOfExactPredictions +=
                        correspondingActualResponses.Find(code)?.Count ?? 0;
                }
            }

            // Compute the overall confusion
            double totalConfusion = actualResponses.Count;

            var accuracy = numberOfExactPredictions / totalConfusion;

            return(accuracy);

            #endregion
        }
        public void Main()
        {
            // Create a data stream.
            const int numberOfInstances = 27;

            string[] data = new string[numberOfInstances + 1] {
                "NUMERICAL,TARGET",
                "0,A",
                "0,A",
                "0,A",
                "1,B",
                "1,B",
                "1,B",
                "1,B",
                "2,B",
                "2,B",
                "3,C",
                "3,C",
                "3,C",
                "4,B",
                "4,B",
                "4,B",
                "4,C",
                "5,A",
                "5,A",
                "6,A",
                "7,C",
                "7,C",
                "7,C",
                "8,C",
                "8,C",
                "9,C",
                "9,C",
                "9,C"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Identify the special categorizer for variable NUMERICAL.
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection numericalColumns = IndexCollection.Range(0, 0);
            bool            firstLineContainsColumnHeaders = true;
            int             targetColumn        = 1;
            IFormatProvider provider            = CultureInfo.InvariantCulture;
            var             specialCategorizers = CategoricalDataSet.CategorizeByEntropyMinimization(
                streamReader,
                columnDelimiter,
                numericalColumns,
                firstLineContainsColumnHeaders,
                targetColumn,
                provider);

            // Encode the categorical data set using the special categorizer.
            stream.Position = 0;
            IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
            CategoricalDataSet dataset          = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders,
                specialCategorizers,
                provider);

            // Decode and show the data set.
            Console.WriteLine("Decoded data set:");
            Console.WriteLine();
            var decodedDataSet    = dataset.Decode();
            int numberOfVariables = dataset.Data.NumberOfColumns;

            foreach (var variable in dataset.Variables)
            {
                Console.Write(variable.Name + ",");
            }
            Console.WriteLine();

            for (int i = 0; i < numberOfInstances; i++)
            {
                for (int j = 0; j < numberOfVariables; j++)
                {
                    Console.Write(decodedDataSet[i][j] + ",");
                }
                Console.WriteLine();
            }
        }
Пример #9
0
        public void FromEncodedDataTest()
        {
            // variables is null
            {
                List <CategoricalVariable> variables = null;
                DoubleMatrix data = DoubleMatrix.Identity(3);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "variables");
            }

            // data is null
            {
                List <CategoricalVariable> variables =
                    new();
                DoubleMatrix data = null;

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "data");
            }

            // variables count unequal to data number of columns
            {
                List <CategoricalVariable> variables =
                    new()
                {
                    new CategoricalVariable("var0"),
                    new CategoricalVariable("var1")
                };

                DoubleMatrix data = DoubleMatrix.Dense(6, 3);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentOutOfRangeException),
                    expectedPartialMessage: String.Format(
                        ImplementationServices.GetResourceString(
                            "STR_EXCEPT_CAT_COLUMNS_NOT_EQUAL_TO_VARIABLES_COUNT"),
                        "variables"),
                    expectedParameterName: "data");
            }

            // category not included in variable
            {
                List <CategoricalVariable> variables =
                    new()
                {
                    new CategoricalVariable("var0"),
                    new CategoricalVariable("var1")
                };

                variables[0].Add(0.0);
                variables[1].Add(1.0);

                DoubleMatrix data = DoubleMatrix.Dense(1, 2);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentException),
                    expectedPartialMessage:
                    ImplementationServices.GetResourceString(
                        "STR_EXCEPT_CAT_MATRIX_ENTRY_NOT_IN_VARIABLE_LIST"),
                    expectedParameterName: "data");
            }

            // Valid input
            {
                // Create a data stream
                string[] data = new string[6] {
                    "COLOR,NUMBER",
                    "Red,  -2.2",
                    "Green, 0.0",
                    "Red,  -3.3",
                    "Black,-1.1",
                    "Black, 4.4"
                };

                MemoryStream stream = new();
                StreamWriter writer = new(stream);
                for (int i = 0; i < data.Length; i++)
                {
                    writer.WriteLine(data[i].ToCharArray());
                    writer.Flush();
                }
                stream.Position = 0;

                // Define a special categorizer for variable NUMBER
                string numberCategorizer(string token, IFormatProvider provider)
                {
                    double datum = Convert.ToDouble(token, provider);

                    if (datum == 0)
                    {
                        return("Zero");
                    }
                    else if (datum < 0)
                    {
                        return("Negative");
                    }
                    else
                    {
                        return("Positive");
                    }
                }

                // Attach the special categorizer to variable NUMBER
                int numberColumnIndex   = 1;
                var specialCategorizers = new Dictionary <int, Categorizer>
                {
                    { numberColumnIndex, numberCategorizer }
                };

                // Encode the categorical data set
                StreamReader       streamReader     = new(stream);
                char               columnDelimiter  = ',';
                IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
                bool               firstLineContainsColumnHeaders = true;
                CategoricalDataSet actual = CategoricalDataSet.Encode(
                    streamReader,
                    columnDelimiter,
                    extractedColumns,
                    firstLineContainsColumnHeaders,
                    specialCategorizers,
                    CultureInfo.InvariantCulture);

                CategoricalVariable color = new("COLOR")
                {
                    { 0, "Red" },
Пример #10
0
        /// <summary>
        /// Classifies the categorical items in the specified data set.
        /// </summary>
        /// <param name="dataSet">
        /// The data set whose rows contain the specified items.
        /// </param>
        /// <param name="featureVariableIndexes">
        /// The zero-based indexes of the data set columns that contain the
        /// data about the features involved in the premises of the
        /// entailments defined by this instance.
        /// </param>
        /// <remarks>
        /// <para>
        /// Let <latex>L</latex> be the <see cref="IReadOnlyCollection{T}.Count"/> of
        /// the <see cref="CategoricalEntailment.FeaturePremises"/>
        /// defined by the <see cref="Entailments"/> exploited by this instance.
        /// It is expected
        /// that <paramref name="featureVariableIndexes"/> has the same count,
        /// and that the <latex>l</latex>-th position
        /// of <paramref name="featureVariableIndexes"/>, say <latex>k_l</latex>,
        /// is the index of the
        /// column that, in <paramref name="dataSet"/>, contains observations
        /// about the same feature variable on which is built the <latex>l</latex>-th
        /// premise of the <see cref="Entailments"/> of this instance.
        /// </para>
        /// </remarks>
        /// <returns>
        /// The collection of data set row indexes containing the items
        /// that satisfy the premises of the entailments defined by this instance.
        /// </returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="dataSet"/> is <b>null</b>.<br/>
        /// -or-<br/>
        /// <paramref name="featureVariableIndexes"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="featureVariableIndexes"/> contains values which
        /// are not valid column indexes for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="dataSet"/>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="featureVariableIndexes"/> has not the same
        /// <see cref="IndexCollection.Count"/> of the
        /// <see cref="FeatureVariables"/> of this instance.
        /// </exception>
        public CategoricalDataSet Classify(
            CategoricalDataSet dataSet,
            IndexCollection featureVariableIndexes)
        {
            #region Input validation

            if (dataSet is null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            if (featureVariableIndexes is null)
            {
                throw new ArgumentNullException(nameof(featureVariableIndexes));
            }

            if (dataSet.NumberOfColumns <= featureVariableIndexes.Max)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(featureVariableIndexes),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column", nameof(dataSet)));
            }

            if (this.FeatureVariables.Count != featureVariableIndexes.Count)
            {
                throw new ArgumentException(
                          ImplementationServices.GetResourceString(
                              "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"),
                          nameof(featureVariableIndexes));
            }

            #endregion

            var          itemData      = dataSet.Data[":", featureVariableIndexes];
            int          numberOfItems = itemData.NumberOfRows;
            DoubleMatrix itemResponses = DoubleMatrix.Dense(numberOfItems, 1);
            itemResponses.SetColumnName(0, this.ResponseVariable.Name);

            DoubleMatrix item, votes;
            int          numberOfResponseCategories = this.responseCodeIndexPairs.Count;
            for (int r = 0; r < itemData.NumberOfRows; r++)
            {
                votes = DoubleMatrix.Dense(1, numberOfResponseCategories);
                item  = itemData[r, ":"];
                for (int e = 0; e < this.entailments.Count; e++)
                {
                    if (this.entailments[e].ValidatePremises(item))
                    {
                        votes[this.responseCodeIndexPairs[this.entailments[e].ResponseConclusion]]
                            += this.entailments[e].TruthValue;
                    }
                }

                double maximumVote = Stat.Max(votes).value;

                var maximumVoteIndexes =
                    votes.Find(maximumVote);

                int numberOfMaximumVoteIndexes = maximumVoteIndexes.Count;
                if (numberOfMaximumVoteIndexes == 1)
                {
                    itemResponses[r] =
                        this.ResponseVariable.Categories[maximumVoteIndexes[0]].Code;
                }
                else
                {
                    // Pick a position corresponding to a maximum vote at random
                    int randomMaximumVotePosition = Convert.ToInt32(
                        Math.Floor(numberOfMaximumVoteIndexes *
                                   this.randomNumberGenerator.DefaultUniform()));

                    itemResponses[r] =
                        this.ResponseVariable.Categories[
                            maximumVoteIndexes[randomMaximumVotePosition]].Code;
                }
            }

            return(new CategoricalDataSet(
                       new List <CategoricalVariable>()
            {
                this.ResponseVariable
            },
                       itemResponses));
        }
Пример #11
0
        /// <summary>
        /// Adds a number of new categorical entailments
        /// by training them together with
        /// the entailments currently included in this instance.
        /// Training happens on the
        /// specified features and response categorical
        /// variables in a given data set.
        /// </summary>
        /// <param name="dataSet">The categorical data set containing
        /// information about
        /// the available feature and response variables.
        /// </param>
        /// <param name="featureVariableIndexes">
        /// The zero-based indexes of the columns in <paramref name="dataSet"/>
        /// containing observations about
        /// the feature variables on which premises must be defined.
        /// </param>
        /// <param name="responseVariableIndex">
        /// The zero-based index of the column in <paramref name="dataSet"/>
        /// containing observations about the response variable.
        /// </param>
        /// <param name="numberOfTrainedCategoricalEntailments">
        /// The number of categorical entailments to be trained.
        /// </param>
        /// <param name="allowEntailmentPartialTruthValues">
        /// If set to <c>true</c> signals that the truth value of a
        /// categorical entailment must be equal to the homogeneity
        /// of the probability distribution from which its conclusion has been
        /// drawn. Otherwise, the truth value is unity.
        /// </param>
        /// <param name="trainSequentially">
        /// If set to <c>true</c> signals that the ensemble is trained
        /// sequentially, i.e. it starts as an empty collection, and new
        /// categorical entailments are added through a step-by-step
        /// procedure to the trained ensemble,
        /// by selecting, at each step, the entailment that better
        /// improves the system's performance of the current ensemble.
        /// Otherwise, the categorical entailments are trained simultaneously.
        /// </param>
        /// <returns>
        /// The instance of the
        /// <see cref="CategoricalEntailmentEnsembleClassifier" /> class
        /// based on the trained ensemble of categorical entailments.
        /// </returns>
        /// <remarks>
        /// <para>
        /// The entailments to be trained are added to the
        /// <see cref="Entailments"/>
        /// to optimally enlarge such collection by the specified
        /// <paramref name="numberOfTrainedCategoricalEntailments"/>.
        /// </para>
        /// <para>
        /// it is expected that <paramref name="featureVariableIndexes"/> has the same count
        /// of the <see cref="FeatureVariables"/>
        /// of this instance,
        /// and that the <latex>l</latex>-th position
        /// of <paramref name="featureVariableIndexes"/> is the index of the
        /// column that, in <paramref name="dataSet"/>, contains observations
        /// about the <latex>l</latex>-th feature variable of the classifier.
        /// Furthermore, <paramref name="responseVariableIndex"/> must be the index
        /// of the column where, in <paramref name="dataSet"/>, are stored
        /// observations about
        /// the <see cref="ResponseVariable"/>
        /// of this instance.
        /// </para>
        /// </remarks>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="dataSet"/> is <b>null</b>.<br/>
        /// -or-<br/>
        /// <paramref name="featureVariableIndexes"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="numberOfTrainedCategoricalEntailments"/> is not positive.<br/>
        /// -or-<br/>
        /// <paramref name="featureVariableIndexes"/> contains values which
        /// are not valid column indexes for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="dataSet"/>.<br/>
        /// -or-<br/>
        /// <paramref name="responseVariableIndex"/> is
        /// not a valid column index for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="dataSet"/>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="featureVariableIndexes"/> has not the same
        /// <see cref="IndexCollection.Count"/> of the
        /// <see cref="FeatureVariables"/> of this instance.
        /// </exception>
        public void AddTrained(
            CategoricalDataSet dataSet,
            IndexCollection featureVariableIndexes,
            int responseVariableIndex,
            int numberOfTrainedCategoricalEntailments,
            bool allowEntailmentPartialTruthValues,
            bool trainSequentially)
        {
            #region Input validation

            if (dataSet is null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            if (featureVariableIndexes is null)
            {
                throw new ArgumentNullException(nameof(featureVariableIndexes));
            }

            if (featureVariableIndexes.Max >= dataSet.Data.NumberOfColumns)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(featureVariableIndexes),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column", nameof(dataSet)));
            }

            if (this.FeatureVariables.Count != featureVariableIndexes.Count)
            {
                throw new ArgumentException(
                          ImplementationServices.GetResourceString(
                              "STR_EXCEPT_CEE_MUST_HAVE_SAME_FEATURES_COUNT"),
                          nameof(featureVariableIndexes));
            }

            if (responseVariableIndex >= dataSet.Data.NumberOfColumns
                ||
                responseVariableIndex < 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(responseVariableIndex),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column", nameof(dataSet)));
            }

            if (numberOfTrainedCategoricalEntailments < 1)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(numberOfTrainedCategoricalEntailments),
                          ImplementationServices.GetResourceString(
                              "STR_EXCEPT_PAR_MUST_BE_POSITIVE"));
            }

            #endregion

            var features = dataSet[":", featureVariableIndexes];
            var response = dataSet[":", responseVariableIndex];

            if (trainSequentially)
            {
                var trainer = new CategoricalEntailmentEnsembleTrainer(
                    new List <CategoricalEntailment>(this.entailments),
                    numberOfTrainedCategoricalEntailments,
                    features,
                    response,
                    allowEntailmentPartialTruthValues,
                    trainSequentially);

                for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++)
                {
                    var partialClassifier = Train(
                        trainer,
                        numberOfTrainedCategoricalEntailments: 1);

                    var trainedEntailment =
                        partialClassifier.Entailments[0];

                    trainer.entailments.Add(trainedEntailment);

                    this.entailments.Add(trainedEntailment);
                }
            }
            else
            {
                var trainer = new CategoricalEntailmentEnsembleTrainer(
                    new List <CategoricalEntailment>(this.entailments),
                    numberOfTrainedCategoricalEntailments,
                    features,
                    response,
                    allowEntailmentPartialTruthValues,
                    trainSequentially: false);

                var partialClassifier = Train(
                    trainer,
                    numberOfTrainedCategoricalEntailments);

                for (int i = 0; i < numberOfTrainedCategoricalEntailments; i++)
                {
                    this.entailments.Add(partialClassifier.entailments[i]);
                }
            }
        }
Пример #12
0
        /// <summary>
        /// Returns the accuracy of a predicted classification with respect
        /// to an actual one.
        /// </summary>
        /// <param name="predictedDataSet">
        /// The data set containing the predicted classification.
        /// </param>
        /// <param name="predictedResponseVariableIndex">
        /// The zero-based index of the column
        /// in <paramref name="predictedDataSet"/> containing the
        /// predictions about the response variable applied
        /// for classifying.
        /// </param>
        /// <param name="actualDataSet">
        /// The data set containing the actual classification.
        /// </param>
        /// <param name="actualResponseVariableIndex">
        /// The zero-based index of the column
        /// in <paramref name="actualDataSet"/> containing the
        /// observations about the response variable applied
        /// for classifying.
        /// </param>
        /// <returns>
        /// The accuracy of a predicted classification with respect to an
        /// actual one.
        /// </returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="predictedDataSet"/> is <b>null</b>.<br/>
        /// -or-<br/>
        /// <paramref name="actualDataSet"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="predictedResponseVariableIndex"/> is
        /// not a valid column index for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="predictedDataSet"/>.<br/>
        /// -or-<br/>
        /// <paramref name="actualResponseVariableIndex"/> is
        /// not a valid column index for the
        /// <see cref="CategoricalDataSet.Data"/> of
        /// <paramref name="actualDataSet"/>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="actualDataSet"/> has not the same
        /// <see cref="CategoricalDataSet.NumberOfRows"/> of
        /// parameter <paramref name="predictedDataSet"/>.
        /// </exception>
        /// <seealso href="https://en.wikipedia.org/wiki/Confusion_matrix"/>
        public static double EvaluateAccuracy(
            CategoricalDataSet predictedDataSet,
            int predictedResponseVariableIndex,
            CategoricalDataSet actualDataSet,
            int actualResponseVariableIndex)
        {
            #region Input validation

            if (predictedDataSet is null)
            {
                throw new ArgumentNullException(nameof(predictedDataSet));
            }

            if (predictedResponseVariableIndex >= predictedDataSet.Data.NumberOfColumns
                ||
                predictedResponseVariableIndex < 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(predictedResponseVariableIndex),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column",
                              nameof(predictedDataSet)));
            }

            if (actualDataSet is null)
            {
                throw new ArgumentNullException(nameof(actualDataSet));
            }

            if (actualResponseVariableIndex >= actualDataSet.Data.NumberOfColumns
                ||
                actualResponseVariableIndex < 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(actualResponseVariableIndex),
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_INDEX_EXCEEDS_OTHER_PAR_DIMS"),
                              "column",
                              nameof(actualDataSet)));
            }

            if (predictedDataSet.Data.NumberOfRows != actualDataSet.Data.NumberOfRows)
            {
                throw new ArgumentException(
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_MUST_HAVE_SAME_NUM_OF_ROWS"),
                              nameof(predictedDataSet)),
                          nameof(actualDataSet));
            }

            #endregion

            var actualResponses    = actualDataSet.Data[":", actualResponseVariableIndex];
            var predictedResponses = predictedDataSet.Data[":", predictedResponseVariableIndex];

            var responseCodes =
                actualDataSet.Variables[actualResponseVariableIndex].CategoryCodes;

            double numberOfExactPredictions = 0.0;
            foreach (var code in responseCodes)
            {
                IndexCollection codePredictedIndexes =
                    predictedResponses.Find(code);

                if (codePredictedIndexes is not null)
                {
                    var correspondingActualResponses =
                        actualResponses.Vec(codePredictedIndexes);
                    numberOfExactPredictions +=
                        correspondingActualResponses.Find(code)?.Count ?? 0;
                }
            }

            var accuracy = numberOfExactPredictions
                           / (double)(actualDataSet.NumberOfRows);

            return(accuracy);
        }