static TestableMultipleCorrespondence00()
        {
            // Create a data stream.
            string[] data = new string[6] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "Red,Negative",
                "Blue,Negative",
                "Blue,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection extractedColumns = IndexCollection.Range(0, 1);
            bool            firstLineContainsColumnHeaders = true;

            dataSet = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);
        }
Пример #2
0
        public void Main()
        {
            // Create a data stream.
            string[] data = new string[6] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "Red,Negative",
                "Black,Negative",
                "Black,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set.
            StreamReader       streamReader     = new(stream);
            char               columnDelimiter  = ',';
            IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
            bool               firstLineContainsColumnHeaders = true;
            CategoricalDataSet dataset = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);

            // Decode and show the data set.
            Console.WriteLine("Decoded data set:");
            Console.WriteLine();
            var decodedDataSet    = dataset.Decode();
            int numberOfInstances = dataset.Data.NumberOfRows;
            int numberOfVariables = dataset.Data.NumberOfColumns;

            foreach (var variable in dataset.Variables)
            {
                Console.Write(variable.Name + ",");
            }
            Console.WriteLine();

            for (int i = 0; i < numberOfInstances; i++)
            {
                for (int j = 0; j < numberOfVariables; j++)
                {
                    Console.Write(decodedDataSet[i][j] + ",");
                }
                Console.WriteLine();
            }
        }
        public void Main()
        {
            // Create a data stream.
            string[] data = new string[7] {
                "COLOR,NUMBER",
                "Red,Negative",
                "Green,Zero",
                "White,Positive",
                "Red,Negative",
                "Blue,Negative",
                "Blue,Positive"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Encode the categorical data set.
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection extractedColumns = IndexCollection.Range(0, 1);
            bool            firstLineContainsColumnHeaders = true;
            var             dataSet = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders);

            // Assign the categories of variable NUMBER
            // to the rows of the table.
            int rowVariableIndex = 1;

            // Assign the categories of variable COLOR
            // to the columns of the table.
            int columnVariableIndex = 0;

            // Get the NUMBER-by-COLOR table.
            var table = dataSet.GetContingencyTable(
                rowVariableIndex,
                columnVariableIndex);

            // Show the table.
            Console.WriteLine("Contingency table:");
            Console.WriteLine(table);
        }
        public void Main()
        {
            // Create a data stream.
            const int numberOfInstances = 27;

            string[] data = new string[numberOfInstances + 1] {
                "NUMERICAL,TARGET",
                "0,A",
                "0,A",
                "0,A",
                "1,B",
                "1,B",
                "1,B",
                "1,B",
                "2,B",
                "2,B",
                "3,C",
                "3,C",
                "3,C",
                "4,B",
                "4,B",
                "4,B",
                "4,C",
                "5,A",
                "5,A",
                "6,A",
                "7,C",
                "7,C",
                "7,C",
                "8,C",
                "8,C",
                "9,C",
                "9,C",
                "9,C"
            };

            MemoryStream stream = new();
            StreamWriter writer = new(stream);

            for (int i = 0; i < data.Length; i++)
            {
                writer.WriteLine(data[i].ToCharArray());
                writer.Flush();
            }
            stream.Position = 0;

            // Identify the special categorizer for variable NUMERICAL.
            StreamReader    streamReader     = new(stream);
            char            columnDelimiter  = ',';
            IndexCollection numericalColumns = IndexCollection.Range(0, 0);
            bool            firstLineContainsColumnHeaders = true;
            int             targetColumn        = 1;
            IFormatProvider provider            = CultureInfo.InvariantCulture;
            var             specialCategorizers = CategoricalDataSet.CategorizeByEntropyMinimization(
                streamReader,
                columnDelimiter,
                numericalColumns,
                firstLineContainsColumnHeaders,
                targetColumn,
                provider);

            // Encode the categorical data set using the special categorizer.
            stream.Position = 0;
            IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
            CategoricalDataSet dataset          = CategoricalDataSet.Encode(
                streamReader,
                columnDelimiter,
                extractedColumns,
                firstLineContainsColumnHeaders,
                specialCategorizers,
                provider);

            // Decode and show the data set.
            Console.WriteLine("Decoded data set:");
            Console.WriteLine();
            var decodedDataSet    = dataset.Decode();
            int numberOfVariables = dataset.Data.NumberOfColumns;

            foreach (var variable in dataset.Variables)
            {
                Console.Write(variable.Name + ",");
            }
            Console.WriteLine();

            for (int i = 0; i < numberOfInstances; i++)
            {
                for (int j = 0; j < numberOfVariables; j++)
                {
                    Console.Write(decodedDataSet[i][j] + ",");
                }
                Console.WriteLine();
            }
        }
Пример #5
0
        public void FromEncodedDataTest()
        {
            // variables is null
            {
                List <CategoricalVariable> variables = null;
                DoubleMatrix data = DoubleMatrix.Identity(3);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "variables");
            }

            // data is null
            {
                List <CategoricalVariable> variables =
                    new();
                DoubleMatrix data = null;

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentNullException),
                    expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage,
                    expectedParameterName: "data");
            }

            // variables count unequal to data number of columns
            {
                List <CategoricalVariable> variables =
                    new()
                {
                    new CategoricalVariable("var0"),
                    new CategoricalVariable("var1")
                };

                DoubleMatrix data = DoubleMatrix.Dense(6, 3);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentOutOfRangeException),
                    expectedPartialMessage: String.Format(
                        ImplementationServices.GetResourceString(
                            "STR_EXCEPT_CAT_COLUMNS_NOT_EQUAL_TO_VARIABLES_COUNT"),
                        "variables"),
                    expectedParameterName: "data");
            }

            // category not included in variable
            {
                List <CategoricalVariable> variables =
                    new()
                {
                    new CategoricalVariable("var0"),
                    new CategoricalVariable("var1")
                };

                variables[0].Add(0.0);
                variables[1].Add(1.0);

                DoubleMatrix data = DoubleMatrix.Dense(1, 2);

                ArgumentExceptionAssert.Throw(
                    () =>
                {
                    var actual = CategoricalDataSet.FromEncodedData(
                        variables: variables,
                        data: data);
                },
                    expectedType: typeof(ArgumentException),
                    expectedPartialMessage:
                    ImplementationServices.GetResourceString(
                        "STR_EXCEPT_CAT_MATRIX_ENTRY_NOT_IN_VARIABLE_LIST"),
                    expectedParameterName: "data");
            }

            // Valid input
            {
                // Create a data stream
                string[] data = new string[6] {
                    "COLOR,NUMBER",
                    "Red,  -2.2",
                    "Green, 0.0",
                    "Red,  -3.3",
                    "Black,-1.1",
                    "Black, 4.4"
                };

                MemoryStream stream = new();
                StreamWriter writer = new(stream);
                for (int i = 0; i < data.Length; i++)
                {
                    writer.WriteLine(data[i].ToCharArray());
                    writer.Flush();
                }
                stream.Position = 0;

                // Define a special categorizer for variable NUMBER
                string numberCategorizer(string token, IFormatProvider provider)
                {
                    double datum = Convert.ToDouble(token, provider);

                    if (datum == 0)
                    {
                        return("Zero");
                    }
                    else if (datum < 0)
                    {
                        return("Negative");
                    }
                    else
                    {
                        return("Positive");
                    }
                }

                // Attach the special categorizer to variable NUMBER
                int numberColumnIndex   = 1;
                var specialCategorizers = new Dictionary <int, Categorizer>
                {
                    { numberColumnIndex, numberCategorizer }
                };

                // Encode the categorical data set
                StreamReader       streamReader     = new(stream);
                char               columnDelimiter  = ',';
                IndexCollection    extractedColumns = IndexCollection.Range(0, 1);
                bool               firstLineContainsColumnHeaders = true;
                CategoricalDataSet actual = CategoricalDataSet.Encode(
                    streamReader,
                    columnDelimiter,
                    extractedColumns,
                    firstLineContainsColumnHeaders,
                    specialCategorizers,
                    CultureInfo.InvariantCulture);

                CategoricalVariable color = new("COLOR")
                {
                    { 0, "Red" },