public void ApplyTest1()
        {
            DataTable table = ProjectionFilterTest.CreateTable();

            // Show the start data
            // DataGridBox.Show(table);

            // Create a new data projection (column) filter
            var filter = new Discretization("Cost (M)");

            // Apply the filter and get the result
            DataTable result = filter.Apply(table);

            // Show it
            // DataGridBox.Show(result);

            Assert.AreEqual(5, result.Columns.Count);
            Assert.AreEqual(5, result.Rows.Count);

            Assert.AreEqual("213", result.Rows[0]["Cost (M)"]);
            Assert.AreEqual("4", result.Rows[1]["Cost (M)"]);
            Assert.AreEqual("3", result.Rows[2]["Cost (M)"]);
            Assert.AreEqual("3", result.Rows[3]["Cost (M)"]);
            Assert.AreEqual("2", result.Rows[4]["Cost (M)"]);
        }
        public void ApplyTest1()
        {
            DataTable table = ProjectionFilterTest.CreateTable();

            // Show the start data
            // DataGridBox.Show(table);

            // Create a new data projection (column) filter
            var filter = new Discretization("Cost (M)");

            // Apply the filter and get the result
            DataTable result = filter.Apply(table);

            // Show it
            // DataGridBox.Show(result);

            Assert.AreEqual(5, result.Columns.Count);
            Assert.AreEqual(5, result.Rows.Count);

            Assert.AreEqual("213", result.Rows[0]["Cost (M)"]);
            Assert.AreEqual("4", result.Rows[1]["Cost (M)"]);
            Assert.AreEqual("3", result.Rows[2]["Cost (M)"]);
            Assert.AreEqual("3", result.Rows[3]["Cost (M)"]);
            Assert.AreEqual("2", result.Rows[4]["Cost (M)"]);
        }
Example #3
0
        public void rule_matching_test()
        {
            DataTable input = new DataTable("Sample data");

            input.Columns.Add("x", typeof(double));
            input.Columns.Add("y", typeof(double));
            input.Columns.Add("z", typeof(double));

            input.Rows.Add(0.02, 60.6, 24.2);
            input.Rows.Add(0.92, 50.2, 21.1);
            input.Rows.Add(0.32, 60.9, 19.8);
            input.Rows.Add(2.02, 61.8, 92.4);


            // Create a discretization filter to operate on the first 2 columns
            var target = new Discretization <double, int>("x", "y");

            target.Columns["x"].Mapping[x => true] = x => (int)System.Math.Round(x, MidpointRounding.AwayFromZero);
            target.Columns["y"].Mapping[x => true] = x => ((x - (int)x) >= 0.7999999999999) ? ((int)x + 1) : (int)x;

            DataTable expected = new DataTable("Sample data");

            expected.Columns.Add("x", typeof(double));
            expected.Columns.Add("y", typeof(double));
            expected.Columns.Add("z", typeof(double));

            expected.Rows.Add(0, 60, 24.2);
            expected.Rows.Add(1, 50, 21.1);
            expected.Rows.Add(0, 61, 19.8);
            expected.Rows.Add(2, 62, 92.4);


            DataTable actual = target.Apply(input);

            for (int i = 0; i < actual.Rows.Count; i++)
            {
                double ex = (double)expected.Rows[i][0];
                double ey = (double)expected.Rows[i][1];
                double ez = (double)expected.Rows[i][2];

                double ax = (int)actual.Rows[i][0];
                double ay = (int)actual.Rows[i][1];
                double az = (double)actual.Rows[i][2];

                Assert.AreEqual(ex, ax);
                Assert.AreEqual(ey, ay);
                Assert.AreEqual(ez, az);
            }
        }
Example #4
0
        public void ApplyTest()
        {
            DataTable input = new DataTable("Sample data");

            input.Columns.Add("x", typeof(double));
            input.Columns.Add("y", typeof(double));
            input.Columns.Add("z", typeof(double));

            input.Rows.Add(0.02, 60.6, 24.2);
            input.Rows.Add(0.92, 50.2, 21.1);
            input.Rows.Add(0.32, 60.9, 19.8);
            input.Rows.Add(2.02, 61.8, 92.4);


            // Create a discretization filter to operate on the first 2 columns
            Discretization target = new Discretization("x", "y");

            target.Columns["y"].Threshold = 0.8;

            DataTable expected = new DataTable("Sample data");

            expected.Columns.Add("x", typeof(double));
            expected.Columns.Add("y", typeof(double));
            expected.Columns.Add("z", typeof(double));

            expected.Rows.Add(0, 60, 24.2);
            expected.Rows.Add(1, 50, 21.1);
            expected.Rows.Add(0, 61, 19.8);
            expected.Rows.Add(2, 62, 92.4);


            DataTable actual = target.Apply(input);

            for (int i = 0; i < actual.Rows.Count; i++)
            {
                double ex = (double)expected.Rows[i][0];
                double ey = (double)expected.Rows[i][1];
                double ez = (double)expected.Rows[i][2];

                double ax = (double)actual.Rows[i][0];
                double ay = (double)actual.Rows[i][1];
                double az = (double)actual.Rows[i][2];

                Assert.AreEqual(ex, ax);
                Assert.AreEqual(ey, ay);
                Assert.AreEqual(ez, az);
            }
        }
        public void ApplyTest()
        {
            

            DataTable input = new DataTable("Sample data");
            input.Columns.Add("x", typeof(double));
            input.Columns.Add("y", typeof(double));
            input.Columns.Add("z", typeof(double));

            input.Rows.Add(0.02, 60.6, 24.2);
            input.Rows.Add(0.92, 50.2, 21.1);
            input.Rows.Add(0.32, 60.9, 19.8);
            input.Rows.Add(2.02, 61.8, 92.4);


            // Create a discretization filter to operate on the first 2 columns
            Discretization target = new Discretization("x","y");
            target.Columns["y"].Threshold = 0.8;

            DataTable expected = new DataTable("Sample data");
            expected.Columns.Add("x", typeof(double));
            expected.Columns.Add("y", typeof(double));
            expected.Columns.Add("z", typeof(double));

            expected.Rows.Add(0, 60, 24.2);
            expected.Rows.Add(1, 50, 21.1);
            expected.Rows.Add(0, 61, 19.8);
            expected.Rows.Add(2, 62, 92.4);


            DataTable actual = target.Apply(input);

            for (int i = 0; i < actual.Rows.Count; i++)
            {
                    double ex = (double)expected.Rows[i][0];
                    double ey = (double)expected.Rows[i][1];
                    double ez = (double)expected.Rows[i][2];

                    double ax = (double)actual.Rows[i][0];
                    double ay = (double)actual.Rows[i][1];
                    double az = (double)actual.Rows[i][2];

                    Assert.AreEqual(ex, ax);
                    Assert.AreEqual(ey, ay);
                    Assert.AreEqual(ez, az);
            }
            
        }
Example #6
0
        public void missing_values_thresholds_test()
        {
            DataTable input = new DataTable("Tennis Example with Missing Values");

            input.Columns.Add("Day", typeof(string));
            input.Columns.Add("Outlook", typeof(string));
            input.Columns.Add("Temperature", typeof(int));
            input.Columns.Add("Humidity", typeof(string));
            input.Columns.Add("Wind", typeof(string));
            input.Columns.Add("PlayTennis", typeof(string));
            input.Rows.Add("D1", "Sunny", 35, "High", "Weak", "No");
            input.Rows.Add("D2", null, 32, "High", "Strong", "No");
            input.Rows.Add("D3", null, null, "High", null, "Yes");
            input.Rows.Add("D4", "Rain", 25, "High", "Weak", "Yes");
            input.Rows.Add("D5", "Rain", 16, null, "Weak", "Yes");
            input.Rows.Add("D6", "Rain", 12, "Normal", "Strong", "No");
            input.Rows.Add("D7", "Overcast", "18", "Normal", "Strong", "Yes");
            input.Rows.Add("D8", null, 27, "High", null, "No");
            input.Rows.Add("D9", null, 17, "Normal", "Weak", "Yes");
            input.Rows.Add("D10", null, null, "Normal", null, "Yes");
            input.Rows.Add("D11", null, 23, "Normal", null, "Yes");
            input.Rows.Add("D12", "Overcast", 25, null, "Strong", "Yes");
            input.Rows.Add("D13", "Overcast", 33, null, "Weak", "Yes");
            input.Rows.Add("D14", "Rain", 24, "High", "Strong", "No");

            Assert.AreEqual(14, input.Rows.Count);
            Assert.AreEqual(6, input.Columns.Count);

            var discretization = new Discretization <double, string>()
            {
                { "Temperature", x => x >= 30 && x < 50, "Hot" },
                { "Temperature", x => x >= 20 && x < 30, "Mild" },
                { "Temperature", x => x >= 00 && x < 20, "Cool" },
            };

            DataTable actual = discretization.Apply(input);

            Assert.AreEqual(14, actual.Rows.Count);
            Assert.AreEqual(6, actual.Columns.Count);


            DataTable expected = new DataTable("Tennis Example with Missing Values");

            expected.Columns.Add("Day", typeof(string));
            expected.Columns.Add("Outlook", typeof(string));
            expected.Columns.Add("Temperature", typeof(string));
            expected.Columns.Add("Humidity", typeof(string));
            expected.Columns.Add("Wind", typeof(string));
            expected.Columns.Add("PlayTennis", typeof(string));
            expected.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            expected.Rows.Add("D2", null, "Hot", "High", "Strong", "No");
            expected.Rows.Add("D3", null, null, "High", null, "Yes");
            expected.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            expected.Rows.Add("D5", "Rain", "Cool", null, "Weak", "Yes");
            expected.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            expected.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            expected.Rows.Add("D8", null, "Mild", "High", null, "No");
            expected.Rows.Add("D9", null, "Cool", "Normal", "Weak", "Yes");
            expected.Rows.Add("D10", null, null, "Normal", null, "Yes");
            expected.Rows.Add("D11", null, "Mild", "Normal", null, "Yes");
            expected.Rows.Add("D12", "Overcast", "Mild", null, "Strong", "Yes");
            expected.Rows.Add("D13", "Overcast", "Hot", null, "Weak", "Yes");
            expected.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");


            for (int j = 0; j < expected.Rows.Count; j++)
            {
                var erow = expected.Rows[j];
                var arow = actual.Rows[j];

                for (int i = 0; i < expected.Columns.Count; i++)
                {
                    object e = erow[i];
                    object a = arow[i];
                    Assert.AreEqual(e, a);
                }
            }
        }
Example #7
0
        public void missing_values_thresholds_test()
        {
            #region doc_missing_thresholds
            // In this example, we will be using a modified version of the famous Play Tennis
            // example by Tom Mitchell (1998), where some values have been replaced by missing
            // values. We will use NaN double values to represent values missing from the data.

            // Note: this example uses DataTables to represent the input data,
            // but this is not required. The same could be performed using plain
            // double[][] matrices and vectors instead.
            DataTable data = new DataTable("Tennis Example with Missing Values");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(int));
            data.Columns.Add("Humidity", typeof(string));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 35, "High", "Weak", "No");
            data.Rows.Add("D2", null, 32, "High", "Strong", "No");
            data.Rows.Add("D3", null, null, "High", null, "Yes");
            data.Rows.Add("D4", "Rain", 25, "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 16, null, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 12, "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "18", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", null, 27, "High", null, "No");
            data.Rows.Add("D9", null, 17, "Normal", "Weak", "Yes");
            data.Rows.Add("D10", null, null, "Normal", null, "Yes");
            data.Rows.Add("D11", null, 23, "Normal", null, "Yes");
            data.Rows.Add("D12", "Overcast", 25, null, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 33, null, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 24, "High", "Strong", "No");

            string[] inputNames = new[] { "Outlook", "Temperature", "Humidity", "Wind" };

            // Create a new discretization codebook to convert
            // the numbers above into discrete, string labels:
            var discretization = new Discretization <double, string>()
            {
                { "Temperature", x => x >= 30 && x < 50, "Hot" },
                { "Temperature", x => x >= 20 && x < 30, "Mild" },
                { "Temperature", x => x >= 00 && x < 20, "Cool" },
            };

            // Use the discretization to convert all the data
            DataTable discrete = discretization.Apply(data);

            // Create a new codification codebook to convert
            // the strings above into numeric, integer labels:
            var codebook = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            // Use the codebook to convert all the data
            DataTable symbols = codebook.Apply(discrete);

            // Grab the training input and output instances:
            double[][] inputs  = symbols.ToJagged(inputNames);
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Create a new learning algorithm
            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };

            // Use the learning algorithm to induce a new tree:
            DecisionTree tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // The classification error (~0.214) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "PlayTennis",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output should be:
            string expected = @"No =: (Outlook == Sunny)
No =: (Outlook == Rain) && (Wind == Strong)
Yes =: (Outlook == Overcast)
Yes =: (Outlook == Rain) && (Wind == Weak)
";
            #endregion

            expected = expected.Replace("\r\n", Environment.NewLine);
            Assert.AreEqual(expected, ruleText);

            Assert.AreEqual(14, codebook["Day"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Outlook"].NumberOfSymbols);
            Assert.AreEqual(3, codebook["Temperature"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Humidity"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["Wind"].NumberOfSymbols);
            Assert.AreEqual(2, codebook["PlayTennis"].NumberOfSymbols);

            foreach (var col in codebook)
            {
                Assert.AreEqual(Double.NaN, col.MissingValueReplacement);
                Assert.AreEqual(CodificationVariable.Ordinal, col.VariableType);
            }

            Assert.AreEqual(0.21428571428571427, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(2, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.21428571428571427, newError, 1e-10);
        }