Пример #1
0
        public void SerializationTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            var target = Serializer.Load <NaiveBayes <GeneralDiscreteDistribution> >(new MemoryStream(Resources.nb));

            Assert.AreEqual(target.InputCount, 4);
            Assert.AreEqual(target.ClassCount, 2);
            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Пример #2
0
        private string toString(Codification codebook)
        {
            if (IsRoot)
            {
                return("Root");
            }

            String name = Owner.Attributes[Parent.Branches.AttributeIndex].Name;

            if (String.IsNullOrEmpty(name))
            {
                name = "x" + Parent.Branches.AttributeIndex;
            }

            String op = ComparisonExtensions.ToString(Comparison);

            String value;

            if (codebook != null && Value.HasValue && codebook.Columns.Contains(name))
            {
                value = codebook.Translate(name, (int)Value.Value);
            }

            else
            {
                value = Value.ToString();
            }


            return(String.Format("{0} {1} {2}", name, op, value));
        }
Пример #3
0
        public void IrisDatasetTest()
        {
            string[][] text = Resources.iris_data.Split(
                new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries)
                              .Apply(x => x.Split(','));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));
            }

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.026666666666666668, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                                             System.Globalization.CultureInfo.InvariantCulture);

            // TODO: implement this assertion properly, actually checking
            // the text contents once the feature is completely finished.
            Assert.AreEqual(596, ruleText.Length);

            string expected = @"0 =: (petal length <= 2.45)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
1 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
2 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
2 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";

            Assert.AreEqual(expected, ruleText);
        }
        public void test_serialization()
        {
            // Fix random seed for reproducibility
            Accord.Math.Random.Generator.Seed = 1;

            string[][] text = Resources.iris_data.Split(new[] { "\r\n" },
                                                        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);

            int[] outputs = codebook.Translate("Output", labels);

            var teacher = new RandomForestLearning()
            {
                NumberOfTrees = 10, // use 10 trees in the forest
            };

            var forest1 = teacher.Learn(inputs, outputs);


            byte[] bytes = forest1.Save();

            var forest2 = Serializer.Load <RandomForest>(bytes);

            forest1.ParallelOptions.MaxDegreeOfParallelism = forest2.ParallelOptions.MaxDegreeOfParallelism = 1;

            Assert.IsTrue(forest1.Decide(inputs).IsEqual(forest2.Decide(inputs)));
            Assert.IsTrue(forest1.Transform(inputs).IsEqual(forest2.Transform(inputs)));
        }
Пример #5
0
        private string toString(Antecedent antecedent, Codification codebook, CultureInfo culture)
        {
            int    index = antecedent.Index;
            String name  = Variables[index].Name;

            if (String.IsNullOrEmpty(name))
            {
                name = "x[" + index + "]";
            }

            String op = ComparisonExtensions.ToString(antecedent.Comparison);

            String value;

            if (codebook != null && codebook.Columns.Contains(name))
            {
                value = codebook.Translate(name, (int)antecedent.Value);
            }

            else
            {
                value = antecedent.Value.ToString(culture);
            }

            return(String.Format(culture, "{0} {1} {2}", name, op, value));
        }
    protected void btnchangepassword0_Click(object sender, EventArgs e)
    {
        DataTable data = new DataTable();

        data = f1.getrecord1("select * from dataset");
        if (data.Rows.Count > 0)
        {
            double[][] inputs = data.ToJagged <double>("n", "p", "k", "ph", "ec");
            string[]   labels = new string[data.Rows.Count];
            for (int i = 0; i < data.Rows.Count; i++)
            {
                labels[i] = data.Rows[i]["fertility"].ToString();
            }
            var codebook = new Codification("fertility", labels);
            // With the codebook, we can convert the labels:
            int[]       outputs   = codebook.Translate("fertility", labels);
            C45Learning teacher   = new C45Learning();
            var         tree      = teacher.Learn(inputs, outputs);
            int[]       predicted = tree.Decide(inputs);
            DecisionSet rules     = tree.ToRules();
            string      ruleText  = rules.ToString(codebook, "fertility", System.Globalization.CultureInfo.InvariantCulture);
            var         cm1       = new GeneralConfusionMatrix(classes: 3, expected: outputs, predicted: predicted);
            //int[,] matrix = cm.Matrix;
            double cm  = cm1.Accuracy;
            double cm2 = cm * 100;
            Label1.Text = cm2.ToString();
        }
    }
        private string bayes(DataTable tbl)
        {
            Codification codebook = new Codification(tbl,
                                                     "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            int[][] inputs  = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[]   outputs = symbols.ToIntArray("Class").GetColumn(0);


            // Gather information about decision variables
            int[] symbolCounts =
            {
                codebook["Clump Thickness"].Symbols,          // 3 possible values (Sunny, overcast, rain)
                codebook["Uniformity of Cell Size"].Symbols,  // 3 possible values (Hot, mild, cool)
                codebook["Uniformity of Cell Shape"].Symbols, // 2 possible values (High, normal)
                codebook["Marginal Adhesion"].Symbols,        // 2 possible values (Weak, strong)
                codebook["Single Epithelial Cell Size"].Symbols,
                codebook["Bare Nuclei"].Symbols,
                codebook["Bland Chromatin"].Symbols,
                codebook["Normal Nucleoli"].Symbols,
                codebook["Mitoses"].Symbols
            };

            int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no)

            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            // We will be computing the label for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                                                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);

            // Now, we can feed this instance to our model
            int output = target.Compute(instance);

            // Finally, the result can be translated back to one of the codewords using
            string result = codebook.Translate("Class", output); // result is "No"

            return(result);
        }
Пример #8
0
        public HMMGenerator(PatchNames instrument)
        {
            this.book = new Codebook<Note>();
            this.instrument = instrument;

            DotNetLearn.Data.SampleSet asdasd;

            Accord.Math.Tools.SetupGenerator(10);

            // Consider some phrases:
            //
            string[][] phrases =
            {
            "The Big Brown Fox Jumps Over the Ugly Dog".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "This is too hot to handle".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "I am flying away like a gold eagle".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Onamae wa nan desu ka".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "And then she asked, why is it so small?".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Great stuff John! Now you will surely be promoted".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Jayne was taken aback when she found out her son was gay".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            };

            // Let's begin by transforming them to sequence of
            // integer labels using a codification codebook:
            var codebook = new Codification("Words", phrases);

            // Now we can create the training data for the models:
            int[][] sequence = codebook.Translate("Words", phrases);

            // To create the models, we will specify a forward topology,
            // as the sequences have definite start and ending points.
            //
            var topology = new Forward(states: codebook["Words"].Symbols);
            int symbols = codebook["Words"].Symbols; // We have 7 different words

            // Create the hidden Markov model
            HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols);

            // Create the learning algorithm
            var teacher = new ViterbiLearning(hmm);

            // Teach the model about the phrases
            double error = teacher.Run(sequence);

            // Now, we can ask the model to generate new samples
            // from the word distributions it has just learned:
            //
            List<int> sample = new List<int>();
            int count = 10;
            sample.Add(hmm.Generate(1)[0]);
            while(sample.Count < count)
            {
                var k = hmm.Predict(sample.ToArray(), 1);
                sample.AddRange(k);
            }

            // And the result will be: "those", "are", "words".
            string[] result = codebook.Translate("Words", sample.ToArray());
        }
Пример #9
0
        private string toString(Codification codebook)
        {
            if (IsRoot)
            {
                return("Root");
            }

            String name = Owner.Attributes[Parent.Branches.AttributeIndex].Name;

            if (String.IsNullOrEmpty(name))
            {
                name = "x" + Parent.Branches.AttributeIndex;
            }

            String op;

            switch (Comparison)
            {
            case ComparisonKind.Equal:
                op = "=="; break;

            case ComparisonKind.GreaterThan:
                op = ">"; break;

            case ComparisonKind.GreaterThanOrEqual:
                op = ">="; break;

            case ComparisonKind.LessThan:
                op = "<"; break;

            case ComparisonKind.LessThanOrEqual:
                op = "<="; break;

            case ComparisonKind.NotEqual:
                op = "!="; break;

            default:
                return("Unexpected comparison type.");
            }

            String value;

            if (codebook != null && Value.HasValue && codebook.Columns.Contains(name))
            {
                value = codebook.Translate(name, (int)Value.Value);
            }

            else
            {
                value = Value.ToString();
            }


            return(String.Format("{0} {1} {2}", name, op, value));
        }
Пример #10
0
        static void Main(string[] args)
        {
            DataTable data = new DataTable("Categories of words");

            data.Columns.Add("Category", "Word");
            List <InputData> words = ExcelDataProvider.GetData(@"C:\Users\Дарья\Desktop\AdverbNoun.xlsx", 0);

            foreach (var word in words)
            {
                data.Rows.Add(word.Category, word.Word);
            }

            Codification codebook = new Codification(data, "Category", "Word");

            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToJagged <int>("Category");
            int[]   outputs = symbols.ToArray <int>("Word");

            var        learner = new NaiveBayesLearning();
            NaiveBayes nb      = learner.Learn(inputs, outputs);

            data = new DataTable("Categories of words");
            data.Columns.Add("Category", "Word");
            words = ExcelDataProvider.GetData(@"C:\Users\Дарья\Desktop\TestAdverbNoun.xlsx", 0);

            foreach (var word in words)
            {
                data.Rows.Add(word.Category, word.Word);
            }

            int[] instance = codebook.Translate("helpful");

            int c = nb.Decide(instance);

            string result = codebook.Translate("Category", c);

            double[] probs = nb.Probabilities(instance);

            Console.WriteLine(0);
        }
        public void SerializationTest()
        {
            string[] names = { "child", "adult", "elder" };

            Codification codebook = new Codification("Label", names);

            Assert.AreEqual(0, codebook.Translate("Label", "child"));
            Assert.AreEqual(1, codebook.Translate("Label", "adult"));
            Assert.AreEqual(2, codebook.Translate("Label", "elder"));
            Assert.AreEqual("child", codebook.Translate("Label", 0));
            Assert.AreEqual("adult", codebook.Translate("Label", 1));
            Assert.AreEqual("elder", codebook.Translate("Label", 2));


            byte[] bytes = codebook.Save();

            Codification reloaded = Serializer.Load <Codification>(bytes);

            Assert.AreEqual(codebook.Active, reloaded.Active);
            Assert.AreEqual(codebook.Columns.Count, reloaded.Columns.Count);
            Assert.AreEqual(codebook.Columns[0].ColumnName, reloaded.Columns[0].ColumnName);

            Assert.AreEqual(0, reloaded.Translate("Label", "child"));
            Assert.AreEqual(1, reloaded.Translate("Label", "adult"));
            Assert.AreEqual(2, reloaded.Translate("Label", "elder"));
            Assert.AreEqual("child", reloaded.Translate("Label", 0));
            Assert.AreEqual("adult", reloaded.Translate("Label", 1));
            Assert.AreEqual("elder", reloaded.Translate("Label", 2));
        }
Пример #12
0
        public void ApplyTest3()
        {
            string[] names = { "child", "adult", "elder" };

            Codification codebook = new Codification("Label", names);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"

            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);
        }
Пример #13
0
        public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);

            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Пример #14
0
        public void test_learn()
        {
            #region doc_iris
            // Fix random seed for reproducibility
            Accord.Math.Random.Generator.Seed = 1;

            // In this example, we will process the famous Fisher's Iris dataset in
            // which the task is to classify weather the features of an Iris flower
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { "\r\n" },
                                                        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Create the forest learning algorithm
            var teacher = new RandomForestLearning()
            {
                NumberOfTrees = 10, // use 10 trees in the forest
            };

            // Finally, learn a random forest from data
            var forest = teacher.Learn(inputs, outputs);

            // We can estimate class labels using
            int[] predicted = forest.Decide(inputs);

            // And the classification error (0.0006) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs));
            #endregion

            Assert.IsTrue(error < 0.015);
        }
        public string kararAgaci(DataTable tbl)
        {
            int          classCount = 2;
            Codification codebook   = new Codification(tbl);


            DecisionVariable[] attributes =
            {
                new DecisionVariable("Clump Thickness",         10),
                new DecisionVariable("Uniformity of Cell Size", 10),new DecisionVariable("Uniformity of Cell Shape",     10),
                new DecisionVariable("Marginal Adhesion",       10),new DecisionVariable("Single Epithelial Cell Size",  10),
                new DecisionVariable("Bare Nuclei",             10),new DecisionVariable("Bland Chromatin",              10),
                new DecisionVariable("Normal Nucleoli",         10),new DecisionVariable("Mitoses",                      10),
            };



            DecisionTree tree        = new DecisionTree(attributes, classCount);
            ID3Learning  id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            int[][] inputs  = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[]   outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
            id3learning.Run(inputs, outputs);

            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                                             inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int    output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);

            return(answer);
        }
        public static string getNaiveBayesResult(string newsType, Guid companyId)
        {
            try
            {
                int[] info = codebook.Translate(new string[] { "NewsType", "CompanyId" }, new string[] { newsType.ToString(), companyId.ToString() });
                //int[] info = codebook.Transform(new string[] { newsType, companyId.ToString() });
                if (naiveBayes == null)
                {
                    instance = AppNaiveBayes.Instance;
                }

                if (naiveBayes != null)
                {
                    int    c      = naiveBayes.Decide(info);
                    string result = codebook.Translate("Percent", c);
                    return(result);
                }
                return("-1");
            }
            catch (Exception e)
            {
                return("-1");
            }
        }
    protected void btnshow_Click(object sender, EventArgs e)
    {
        quality.Visible = true;
        DataTable _dataTable = new DataTable();

        Panel1.Visible = true;
        n          = Convert.ToDouble(txtN.Text);
        p          = Convert.ToDouble(txtP.Text);
        k          = Convert.ToDouble(txtK.Text);
        ph         = Convert.ToDouble(txtPh.Text);
        ec         = Convert.ToDouble(txtec.Text);
        _dataTable = f1.getrecord1("select * from dataset");

        double[][] inputs = _dataTable.ToJagged <double>("n", "p", "k", "ph", "ec");
        string[]   labels = new string[_dataTable.Rows.Count];
        for (int i = 0; i < _dataTable.Rows.Count; i++)
        {
            labels[i] = _dataTable.Rows[i]["fertility"].ToString();
        }
        var codebook = new Codification("fertility", labels);

        // With the codebook, we can convert the labels:
        int[]       outputs = codebook.Translate("fertility", labels);
        C45Learning teacher = new C45Learning();
        var         tree    = teacher.Learn(inputs, outputs);

        double[][] input_test =
        {
            new double[] { n, p, k, ph, ec },
        };



        int[] val = tree.Decide(input_test);

        if (val[0] == 0)
        {
            quality.Text = "Low";
        }
        else if (val[0] == 1)
        {
            quality.Text = "Medium";
        }
        else
        {
            quality.Text = "High";
        }
    }
Пример #18
0
        private string toString(Codification codebook, string outputColumn, CultureInfo culture)
        {
            StringBuilder sb = new StringBuilder();

            var expr = expressions.ToArray();

            for (int i = 0; i < expr.Length - 1; i++)
            {
                sb.AppendFormat("({0}) && ", toString(expr[i], codebook, culture));
            }
            sb.AppendFormat("({0})", toString(expr[expr.Length - 1], codebook, culture));

            if (String.IsNullOrEmpty(outputColumn))
            {
                return(String.Format(culture, "{0} =: {1}", Output, sb));
            }

            string name = codebook.Translate(outputColumn, (int)Output);

            return(String.Format(culture, "{0} =: {1}", name, sb));
        }
Пример #19
0
        public Accord.MachineLearning.DecisionTrees.DecisionTree Learn(string[][] records, string[] columnNamesWithoutResult, ref Codification codebook)
        {
            DataTable data = new DataTable();

            foreach (var columnName in records[0])
            {
                data.Columns.Add(columnName);
            }

            int rowsAdderCounter = 0;

            foreach (var record in records)
            {
                if (rowsAdderCounter == 0)
                {
                    rowsAdderCounter++;
                    continue;
                }

                data.Rows.Add(record);
            }

            double[][] inputs = data.ToJagged(columnNamesWithoutResult);
            string[]   labels = data.ToArray <string>(Constants.RESULT_COLUMN_NAME);

            int[] outputs = codebook.Translate(Constants.RESULT_COLUMN_NAME, labels);

            var teacher = new C45Learning();

            foreach (var columnName in columnNamesWithoutResult)
            {
                DecisionVariable decVar = new DecisionVariable(columnName, DecisionVariableKind.Continuous);
                teacher.Add(decVar);
            }

            Accord.MachineLearning.DecisionTrees.DecisionTree tree = teacher.Learn(inputs, outputs);

            return(tree);
        }
Пример #20
0
        private TreeNode convert(DecisionNode node)
        {
            TreeNode treeNode = (codebook == null) ?
                                new TreeNode(node.ToString()) :
                                new TreeNode(node.ToString(codebook));


            if (!node.IsLeaf)
            {
                foreach (var child in node.Branches)
                {
                    treeNode.Nodes.Add(convert(child));
                }

                return(treeNode);
            }


            if (codebook == null || !node.Output.HasValue)
            {
                treeNode.Nodes.Add(new TreeNode(node.Output.ToString()));
                return(treeNode);
            }

            int index  = node.Parent.Branches.AttributeIndex;
            var attrib = treeSource.Attributes[index];

            if (attrib.Nature != DecisionVariableKind.Discrete)
            {
                treeNode.Nodes.Add(new TreeNode(node.Output.ToString()));
                return(treeNode);
            }

            string value = codebook.Translate(attrib.Name, node.Output.Value);

            treeNode.Nodes.Add(new TreeNode(value));
            return(treeNode);
        }
Пример #21
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType    = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4;                              // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes <IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[]
            {
                codebook.Translate(columnName: "Outlook", value: "Sunny"),
                12.0,
                90.0,
                codebook.Translate(columnName: "Wind", value: "Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Пример #22
0
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in 
            // which the task is to classify weather the features of an Iris flower 
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);
            
            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Пример #23
0
        private string toString(Codification codebook)
        {
            if (IsRoot)
                return "Root";

            String name = Owner.Attributes[Parent.Branches.AttributeIndex].Name;

            if (String.IsNullOrEmpty(name))
                name = "x" + Parent.Branches.AttributeIndex;

            String op;

            switch (Comparison)
            {
                case ComparisonKind.Equal:
                    op = "=="; break;

                case ComparisonKind.GreaterThan:
                    op = ">"; break;

                case ComparisonKind.GreaterThanOrEqual:
                    op = ">="; break;

                case ComparisonKind.LessThan:
                    op = "<"; break;

                case ComparisonKind.LessThanOrEqual:
                    op = "<="; break;

                case ComparisonKind.NotEqual:
                    op = "!="; break;

                default:
                    return "Unexpected comparison type.";
            }

            String value;
            if (codebook != null && Value.HasValue && codebook.Columns.Contains(name))
                value = codebook.Translate(name, (int)Value.Value);

            else value = Value.ToString();


            return String.Format("{0} {1} {2}", name, op, value);
        }
Пример #24
0
        public void GenerateTest2()
        {
            Accord.Math.Tools.SetupGenerator(42);

            // Consider some phrases:
            //
            string[][] phrases =
            {
                new[] { "those", "are", "sample", "words", "from", "a", "dictionary" },
                new[] { "those", "are", "sample", "words" },
                new[] { "sample", "words", "are", "words" },
                new[] { "those", "words" },
                new[] { "those", "are", "words" },
                new[] { "words", "from", "a", "dictionary" },
                new[] { "those", "are", "words", "from", "a", "dictionary" }
            };

            // Let's begin by transforming them to sequence of
            // integer labels using a codification codebook:
            var codebook = new Codification("Words", phrases);

            // Now we can create the training data for the models:
            int[][] sequence = codebook.Translate("Words", phrases);

            // To create the models, we will specify a forward topology,
            // as the sequences have definite start and ending points.
            //
            var topology = new Forward(states: 4);
            int symbols = codebook["Words"].Symbols; // We have 7 different words

            // Create the hidden Markov model
            HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols);

            // Create the learning algorithm
            BaumWelchLearning teacher = new BaumWelchLearning(hmm);

            // Teach the model about the phrases
            double error = teacher.Run(sequence);

            // Now, we can ask the model to generate new samples
            // from the word distributions it has just learned:
            //
            int[] sample = hmm.Generate(3);

            // And the result will be: "those", "are", "words".
            string[] result = codebook.Translate("Words", sample);

            Assert.AreEqual("those", result[0]);
            Assert.AreEqual("are", result[1]);
            Assert.AreEqual("words", result[2]);
        }
Пример #25
0
        public void SerializationTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            var target = Serializer.Load<NaiveBayes<GeneralDiscreteDistribution>>(new MemoryStream(Resources.nb));

            Assert.AreEqual(target.InputCount, 4);
            Assert.AreEqual(target.ClassCount, 2);
            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Пример #26
0
        public void ComputeTest_Obsolete()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            int[] symbolCounts =
            {
                codebook["Outlook"].Symbols,     // 3 possible values (Sunny, overcast, rain)
                codebook["Temperature"].Symbols, // 3 possible values (Hot, mild, cool)
                codebook["Humidity"].Symbols,    // 2 possible values (High, normal)
                codebook["Wind"].Symbols         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)


            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Пример #27
0
        public void TranslateTest3()
        {
            string[] colNames = { "col1", "col2", "col3" };
            DataTable table = new DataTable("TranslateTest1 Table");
            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);
            Matrix.IsEqual(new int[] { 0, 0, 0 }, codeBook.Translate(new[] { "1", "2", "3" }));
            Matrix.IsEqual(new int[] { 0, 1, 1 }, codeBook.Translate(new[] { "1", "3", "5" }));
            Matrix.IsEqual(new int[] { 0, 2, 2 }, codeBook.Translate(new[] { "1", "4", "7" }));
            Matrix.IsEqual(new int[] { 1, 2, 3 }, codeBook.Translate(new[] { "2", "4", "6" }));
            Matrix.IsEqual(new int[] { 1, 3, 4 }, codeBook.Translate(new[] { "2", "5", "8" }));
            Matrix.IsEqual(new int[] { 1, 4, 5 }, codeBook.Translate(new[] { "2", "6", "10" }));
            Matrix.IsEqual(new int[] { 2, 2, 1 }, codeBook.Translate(new[] { "3", "4", "5" }));
            Matrix.IsEqual(new int[] { 2, 3, 2 }, codeBook.Translate(new[] { "3", "5", "7" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            Matrix.IsEqual(new int[] { 2 }, codeBook.Translate(new[] { "3" }));
            Matrix.IsEqual(new int[] { 2, 4 }, codeBook.Translate(new[] { "3", "6" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            bool thrown = false;

            try { codeBook.Translate(new[] { "3", "6", "9", "10" }); }
            catch (Exception) { thrown = true; }

            Assert.IsTrue(thrown);
        }
Пример #28
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), 
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
        public string kararAgaci(DataTable tbl)
        {
            int classCount = 2;
            Codification codebook = new Codification(tbl);

            DecisionVariable[] attributes ={
                                          new DecisionVariable("Clump Thickness",10),
                                          new DecisionVariable("Uniformity of Cell Size",10),new DecisionVariable("Uniformity of Cell Shape",10),
                                          new DecisionVariable("Marginal Adhesion",10),new DecisionVariable("Single Epithelial Cell Size",10),
                                          new DecisionVariable("Bare Nuclei",10),new DecisionVariable("Bland Chromatin",10),
                                          new DecisionVariable("Normal Nucleoli",10),new DecisionVariable("Mitoses",10),

                                          };

            DecisionTree tree = new DecisionTree(attributes, classCount);
            ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
            id3learning.Run(inputs, outputs);

            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);

            return answer;
        }
Пример #30
0
        public void Run()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day");
            data.Columns.Add("Outlook");
            data.Columns.Add("Temperature");
            data.Columns.Add("Humidity");
            data.Columns.Add("Wind");
            data.Columns.Add("PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            CreateDic("Outlook", symbols);
            CreateDic("Temperature", symbols);
            CreateDic("Humidity", symbols);
            CreateDic("Wind", symbols);
            CreateDic("PlayTennis", symbols);

            int[][] inputs = (from p in symbols.AsEnumerable()
                              select new int[]
                              {
                                  GetIndex("Outlook", p["Outlook"].ToString()),
                                  GetIndex("Temperature", p["Temperature"].ToString()),
                                  GetIndex("Humidity", p["Humidity"].ToString()),
                                  GetIndex("Wind", p["Wind"].ToString())
                              }).Cast<int[]>().ToArray();

            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex("PlayTennis", p["PlayTennis"].ToString())).Cast<int>().ToArray();

            /*
            // Gather information about decision variables
            DecisionVariable[] attributes =
            {
              new DecisionVariable("Outlook",     3), // 3 possible values (Sunny, overcast, rain)
              new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
              new DecisionVariable("Humidity",    2), // 2 possible values (High, normal)
              new DecisionVariable("Wind",        2)  // 2 possible values (Weak, strong)
            };

             */
            DecisionVariable[] attributes =
            {
              new DecisionVariable("Outlook",     GetCount("Outlook")), // 3 possible values (Sunny, overcast, rain)
              new DecisionVariable("Temperature", GetCount("Temperature")), // 3 possible values (Hot, mild, cool)
              new DecisionVariable("Humidity",    GetCount("Humidity")), // 2 possible values (High, normal)
              new DecisionVariable("Wind",        GetCount("Wind"))  // 2 possible values (Weak, strong)
            };

            int classCount = GetCount("PlayTennis"); // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            string answer = codebook.Translate("PlayTennis",
                tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

            Console.WriteLine("Calculate for: Sunny, Hot, High, Strong");
            Console.WriteLine("Answer: " + answer);

            var expression = tree.ToExpression();
            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet s = tree.ToRules();

            Console.WriteLine(s.ToString());

            // Compiles the expression to IL
            var func = expression.Compile();
        }
Пример #31
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][] inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
Пример #32
0
        public void AttributeReuseTest1()
        {
            string[][] text = Resources.iris_data.Split(
                new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries)
                .Apply(x => x.Split(','));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            teacher.Join = 3;

            double error = teacher.Run(inputs, outputs);
            Assert.AreEqual(0.02, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.02, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                System.Globalization.CultureInfo.InvariantCulture);

            // TODO: implement this assertion properly, actually checking
            // the text contents once the feature is completely finished.
            Assert.AreEqual(600, ruleText.Length);
        }
Пример #33
0
        public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);
            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Пример #34
0
        public void ApplyTest3()
        {
            string[] names = { "child", "adult", "elder" };

            Codification codebook = new Codification("Label", names);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"

            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);
        }
        private string C45(DataTable tbl)
        {
            int classCount = 2;
            Codification codebook = new Codification(tbl);

            DecisionVariable[] attributes ={
                                          new DecisionVariable("Clump Thickness",10),
                                          new DecisionVariable("Uniformity of Cell Size",10),new DecisionVariable("Uniformity of Cell Shape",10),
                                          new DecisionVariable("Marginal Adhesion",10),new DecisionVariable("Single Epithelial Cell Size",10),
                                          new DecisionVariable("Bare Nuclei",10),new DecisionVariable("Bland Chromatin",10),
                                          new DecisionVariable("Normal Nucleoli",10),new DecisionVariable("Mitoses",10),

                                          };

            DecisionTree tree = new DecisionTree(attributes, classCount);
               // ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
               // id3learning.Run(inputs, outputs);
            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);

            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[5]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func<double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[5]);

            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);
            return answer;

               // throw new NotImplementedException();
        }
Пример #36
0
        public void TranslateTest1()
        {
            string[] colNames = { "col1", "col2", "col3" };
            DataTable table = new DataTable("TranslateTest1 Table");
            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);

            Assert.AreEqual(0, codeBook.Translate("col1", "1"));
            Assert.AreEqual(1, codeBook.Translate("col1", "2"));
            Assert.AreEqual(2, codeBook.Translate("col1", "3"));

            Assert.AreEqual(0, codeBook.Translate("col2", "2"));
            Assert.AreEqual(1, codeBook.Translate("col2", "3"));
            Assert.AreEqual(2, codeBook.Translate("col2", "4"));
            Assert.AreEqual(3, codeBook.Translate("col2", "5"));
            Assert.AreEqual(4, codeBook.Translate("col2", "6"));

            Assert.AreEqual(0, codeBook.Translate("col3", "3"));
            Assert.AreEqual(1, codeBook.Translate("col3", "5"));
            Assert.AreEqual(2, codeBook.Translate("col3", "7"));
            Assert.AreEqual(3, codeBook.Translate("col3", "6"));
            Assert.AreEqual(4, codeBook.Translate("col3", "8"));
            Assert.AreEqual(5, codeBook.Translate("col3", "10"));
            Assert.AreEqual(6, codeBook.Translate("col3", "9"));
        }
Пример #37
0
        public void TranslateTest1()
        {
            string[]  colNames = { "col1", "col2", "col3" };
            DataTable table    = new DataTable("TranslateTest1 Table");

            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);

            Assert.AreEqual(0, codeBook.Translate("col1", "1"));
            Assert.AreEqual(1, codeBook.Translate("col1", "2"));
            Assert.AreEqual(2, codeBook.Translate("col1", "3"));

            Assert.AreEqual(0, codeBook.Translate("col2", "2"));
            Assert.AreEqual(1, codeBook.Translate("col2", "3"));
            Assert.AreEqual(2, codeBook.Translate("col2", "4"));
            Assert.AreEqual(3, codeBook.Translate("col2", "5"));
            Assert.AreEqual(4, codeBook.Translate("col2", "6"));

            Assert.AreEqual(0, codeBook.Translate("col3", "3"));
            Assert.AreEqual(1, codeBook.Translate("col3", "5"));
            Assert.AreEqual(2, codeBook.Translate("col3", "7"));
            Assert.AreEqual(3, codeBook.Translate("col3", "6"));
            Assert.AreEqual(4, codeBook.Translate("col3", "8"));
            Assert.AreEqual(5, codeBook.Translate("col3", "10"));
            Assert.AreEqual(6, codeBook.Translate("col3", "9"));
        }
        public void ApplyTest2()
        {
            // Suppose we have a data table relating the age of
            // a person and its categorical classification, as
            // in "child", "adult" or "elder".

            // The Codification filter is able to extract those
            // string labels and transform them into discrete
            // symbols, assigning integer labels to each of them
            // such as "child" = 0, "adult" = 1, and "elder" = 3.

            // Create the aforementioned sample table
            DataTable table = new DataTable("Sample data");

            table.Columns.Add("Age", typeof(int));
            table.Columns.Add("Label", typeof(string));

            //            age   label
            table.Rows.Add(10, "child");
            table.Rows.Add(07, "child");
            table.Rows.Add(04, "child");
            table.Rows.Add(21, "adult");
            table.Rows.Add(27, "adult");
            table.Rows.Add(12, "child");
            table.Rows.Add(79, "elder");
            table.Rows.Add(40, "adult");
            table.Rows.Add(30, "adult");


            // Now, let's say we need to translate those text labels
            // into integer symbols. Let's use a Codification filter:

            Codification codebook = new Codification(table);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"


            // We can also process an entire data table at once:
            DataTable result = codebook.Apply(table);

            // The resulting table can be transformed to jagged array:
            double[][] matrix = Matrix.ToJagged(result);

            // and the resulting matrix will be given by
            string str = matrix.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // str == new double[][]
            // {
            //     new double[] { 10, 0 },
            //     new double[] {  7, 0 },
            //     new double[] {  4, 0 },
            //     new double[] { 21, 1 },
            //     new double[] { 27, 1 },
            //     new double[] { 12, 0 },
            //     new double[] { 79, 2 },
            //     new double[] { 40, 1 },
            //     new double[] { 30, 1 }
            // };



            // Now we will be able to feed this matrix to any machine learning
            // algorithm without having to worry about text labels in our data:

            int classes = codebook["Label"].Symbols; // 3 classes (child, adult, elder)

            // Use the first column as input variables,
            // and the second column as outputs classes
            //
            double[][] inputs  = matrix.GetColumns(new[] { 0 });
            int[]      outputs = matrix.GetColumn(1).ToInt32();


            // Create a multi-class SVM for 1 input (Age) and 3 classes (Label)
            var machine = new MulticlassSupportVectorMachine(inputs: 1, classes: classes);

            // Create a Multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning(machine, inputs, outputs)
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Algorithm = (svm, classInputs, classOutputs, i, j) =>
                {
                    return(new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                    {
                        Complexity = 1
                    });
                }
            };

            // Run the learning algorithm
            double error = teacher.Run();


            // After we have learned the machine, we can use it to classify
            // new data points, and use the codebook to translate the machine
            // outputs to the original text labels:

            string result1 = codebook.Translate("Label", machine.Compute(10)); // child
            string result2 = codebook.Translate("Label", machine.Compute(40)); // adult
            string result3 = codebook.Translate("Label", machine.Compute(70)); // elder


            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);

            Assert.AreEqual("child", result1);
            Assert.AreEqual("adult", result2);
            Assert.AreEqual("elder", result3);
        }
Пример #39
0
        public void TranslateTest3()
        {
            string[]  colNames = { "col1", "col2", "col3" };
            DataTable table    = new DataTable("TranslateTest1 Table");

            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);

            Matrix.IsEqual(new int[] { 0, 0, 0 }, codeBook.Translate(new[] { "1", "2", "3" }));
            Matrix.IsEqual(new int[] { 0, 1, 1 }, codeBook.Translate(new[] { "1", "3", "5" }));
            Matrix.IsEqual(new int[] { 0, 2, 2 }, codeBook.Translate(new[] { "1", "4", "7" }));
            Matrix.IsEqual(new int[] { 1, 2, 3 }, codeBook.Translate(new[] { "2", "4", "6" }));
            Matrix.IsEqual(new int[] { 1, 3, 4 }, codeBook.Translate(new[] { "2", "5", "8" }));
            Matrix.IsEqual(new int[] { 1, 4, 5 }, codeBook.Translate(new[] { "2", "6", "10" }));
            Matrix.IsEqual(new int[] { 2, 2, 1 }, codeBook.Translate(new[] { "3", "4", "5" }));
            Matrix.IsEqual(new int[] { 2, 3, 2 }, codeBook.Translate(new[] { "3", "5", "7" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            Matrix.IsEqual(new int[] { 2 }, codeBook.Translate(new[] { "3" }));
            Matrix.IsEqual(new int[] { 2, 4 }, codeBook.Translate(new[] { "3", "6" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            bool thrown = false;

            try { codeBook.Translate(new[] { "3", "6", "9", "10" }); }
            catch (Exception) { thrown = true; }

            Assert.IsTrue(thrown);
        }
Пример #40
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][]   inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]     outputs = symbols.ToArray <int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
Пример #41
0
        // 
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        //[ClassCleanup()]
        //public static void MyClassCleanup()
        //{
        //}
        //
        //Use TestInitialize to run code before running each test
        //[TestInitialize()]
        //public void MyTestInitialize()
        //{
        //}
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion


        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);
            Assert.AreEqual(0, error);


            foreach (DataRow row in data.Rows)
            {
                var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind");

                int y = tree.Compute(x);

                string actual = codebook.Translate("PlayTennis", y);
                string expected = row["PlayTennis"] as string;

                Assert.AreEqual(expected, actual);
            }

            {
                string answer = codebook.Translate("PlayTennis",
                    tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

                Assert.AreEqual("No", answer);
            }
        }
Пример #42
0
        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", codebook["Humidity"].Symbols),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            Assert.AreEqual(0, error);


            {
                int[] query = codebook.Translate("Sunny", "Hot", "High", "Strong");

                int output = tree.Compute(query);

                string answer = codebook.Translate("PlayTennis", output);

                Assert.AreEqual("No", answer);
            }


            foreach (DataRow row in data.Rows)
            {
                var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind");

                int y = tree.Compute(x);

                string actual   = codebook.Translate("PlayTennis", y);
                string expected = row["PlayTennis"] as string;

                Assert.AreEqual(expected, actual);
            }

            {
                string answer = codebook.Translate("PlayTennis",
                                                   tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

                Assert.AreEqual("No", answer);
            }
        }
        public void ApplyTest2()
        {
            // Suppose we have a data table relating the age of
            // a person and its categorical classification, as 
            // in "child", "adult" or "elder".

            // The Codification filter is able to extract those
            // string labels and transform them into discrete
            // symbols, assigning integer labels to each of them
            // such as "child" = 0, "adult" = 1, and "elder" = 3.

            // Create the aforementioned sample table
            DataTable table = new DataTable("Sample data");
            table.Columns.Add("Age", typeof(int));
            table.Columns.Add("Label", typeof(string));

            //            age   label
            table.Rows.Add(10, "child");
            table.Rows.Add(07, "child");
            table.Rows.Add(04, "child");
            table.Rows.Add(21, "adult");
            table.Rows.Add(27, "adult");
            table.Rows.Add(12, "child");
            table.Rows.Add(79, "elder");
            table.Rows.Add(40, "adult");
            table.Rows.Add(30, "adult");


            // Now, let's say we need to translate those text labels
            // into integer symbols. Let's use a Codification filter:

            Codification codebook = new Codification(table);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"


            // We can also process an entire data table at once:
            DataTable result = codebook.Apply(table);

            // The resulting table can be transformed to jagged array:
            double[][] matrix = Matrix.ToArray(result);

            // and the resulting matrix will be given by
            string str = matrix.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // str == new double[][] 
            // {
            //     new double[] { 10, 0 },
            //     new double[] {  7, 0 },
            //     new double[] {  4, 0 },
            //     new double[] { 21, 1 },
            //     new double[] { 27, 1 },
            //     new double[] { 12, 0 },
            //     new double[] { 79, 2 },
            //     new double[] { 40, 1 },
            //     new double[] { 30, 1 } 
            // };



            // Now we will be able to feed this matrix to any machine learning
            // algorithm without having to worry about text labels in our data:

            int classes = codebook["Label"].Symbols; // 3 classes (child, adult, elder)

            // Use the first column as input variables,
            // and the second column as outputs classes
            //
            double[][] inputs = matrix.GetColumns(0);
            int[] outputs = matrix.GetColumn(1).ToInt32();


            // Create a multi-class SVM for 1 input (Age) and 3 classes (Label)
            var machine = new MulticlassSupportVectorMachine(inputs: 1, classes: classes);

            // Create a Multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning(machine, inputs, outputs);

            // Configure the learning algorithm to use SMO to train the
            //  underlying SVMs in each of the binary class subproblems.
            teacher.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                return new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                {
                    Complexity = 1
                };
            };

            // Run the learning algorithm
            double error = teacher.Run();


            // After we have learned the machine, we can use it to classify
            // new data points, and use the codebook to translate the machine
            // outputs to the original text labels:

            string result1 = codebook.Translate("Label", machine.Compute(10)); // child
            string result2 = codebook.Translate("Label", machine.Compute(40)); // adult
            string result3 = codebook.Translate("Label", machine.Compute(70)); // elder


            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);

            Assert.AreEqual("child", result1);
            Assert.AreEqual("adult", result2);
            Assert.AreEqual("elder", result3);

        }
Пример #44
0
        public void learn_test_mitchell()
        {
            #region doc_mitchell_1
            // We will represent Mitchell's Tennis example using a DataTable. However,
            // the use of a DataTable is not required in order to use the Naive Bayes. 
            // Please take a look at the other examples below for simpler approaches.
            DataTable data = new DataTable("Mitchell's Tennis Example");
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);
            // Add some data
            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
            #endregion

            #region doc_mitchell_2
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);
            #endregion

            #region doc_mitchell_3
            // Some distributions require constructor parameters, and as such, cannot 
            // be automatically initialized by the learning algorithm. For this reason, 
            // we might need to specify how each component should be initialized:
            IUnivariateFittableDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var learner = new NaiveBayesLearning<IUnivariateFittableDistribution>()
            {
                // Tell the learner how to initialize the distributions
                Distribution = (classIndex, variableIndex) => priors[variableIndex]
            };

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Learn the Naive Bayes model
            var naiveBayes = learner.Learn(inputs, outputs);
            #endregion

            #region doc_mitchell_4
            // Create an instance representing a "sunny, cool, humid and windy day":
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), //n 0
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong") // 1
            };

            // We can obtain a class prediction using
            int predicted = naiveBayes.Decide(instance);

            // Or compute probabilities of each class using
            double[] probabilities = naiveBayes.Probabilities(instance);

            // Or obtain the log-likelihood of prediction
            double ll = naiveBayes.LogLikelihood(instance);

            // Finally, the result can be translated back using
            string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, predicted);
            Assert.AreEqual(0.840, probabilities[0], 1e-3);
            Assert.AreEqual(-10.493243476691351, ll, 1e-6);
            Assert.AreEqual(1, probabilities.Sum(), 1e-10);
            Assert.AreEqual(2, probabilities.Length);
        }
        private string bayes(DataTable tbl)
        {
            Codification codebook = new Codification(tbl,
             "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);
            int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // Gather information about decision variables
            int[] symbolCounts =
            {
                codebook["Clump Thickness"].Symbols,     // 3 possible values (Sunny, overcast, rain)
                codebook["Uniformity of Cell Size"].Symbols, // 3 possible values (Hot, mild, cool)
                codebook["Uniformity of Cell Shape"].Symbols,    // 2 possible values (High, normal)
                codebook["Marginal Adhesion"].Symbols ,        // 2 possible values (Weak, strong)
                codebook["Single Epithelial Cell Size"].Symbols  ,
                codebook["Bare Nuclei"].Symbols  ,
                codebook["Bland Chromatin"].Symbols ,
                codebook["Normal Nucleoli"].Symbols ,
                codebook["Mitoses"].Symbols
            };

            int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no)

            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);

            // We will be computing the label for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);

            // Now, we can feed this instance to our model
            int output = target.Compute(instance);

            // Finally, the result can be translated back to one of the codewords using
            string result = codebook.Translate("Class", output); // result is "No"
            return result;
        }
Пример #46
0
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in
            // which the task is to classify weather the features of an Iris flower
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Пример #47
0
        public void learn_test_mitchell()
        {
            #region doc_mitchell_1
            // We will represent Mitchell's Tennis example using a DataTable. However,
            // the use of a DataTable is not required in order to use the Naive Bayes.
            // Please take a look at the other examples below for simpler approaches.
            DataTable data = new DataTable("Mitchell's Tennis Example");
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType    = typeof(double);
            // Add some data
            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
            #endregion

            #region doc_mitchell_2
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);
            #endregion

            #region doc_mitchell_3
            // Some distributions require constructor parameters, and as such, cannot
            // be automatically initialized by the learning algorithm. For this reason,
            // we might need to specify how each component should be initialized:
            IUnivariateFittableDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var learner = new NaiveBayesLearning <IUnivariateFittableDistribution>()
            {
                // Tell the learner how to initialize the distributions
                Distribution = (classIndex, variableIndex) => priors[variableIndex]
            };

            // Extract symbols from data and train the classifier
            DataTable  symbols = codebook.Apply(data);
            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Learn the Naive Bayes model
            var naiveBayes = learner.Learn(inputs, outputs);
            #endregion

            #region doc_mitchell_4
            // Create an instance representing a "sunny, cool, humid and windy day":
            double[] instance = new double[]
            {
                codebook.Translate(columnName: "Outlook", value: "Sunny"), //n 0
                12.0,
                90.0,
                codebook.Translate(columnName: "Wind", value: "Strong") // 1
            };

            // We can obtain a class prediction using
            int predicted = naiveBayes.Decide(instance);

            // Or compute probabilities of each class using
            double[] probabilities = naiveBayes.Probabilities(instance);

            // Or obtain the log-likelihood of prediction
            double ll = naiveBayes.LogLikelihood(instance);

            // Finally, the result can be translated back using
            string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, predicted);
            Assert.AreEqual(0.840, probabilities[0], 1e-3);
            Assert.AreEqual(-10.493243476691351, ll, 1e-6);
            Assert.AreEqual(1, probabilities.Sum(), 1e-10);
            Assert.AreEqual(2, probabilities.Length);
        }
Пример #48
0
        public void Run()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day");
            data.Columns.Add("Outlook");
            data.Columns.Add("Temperature");
            data.Columns.Add("Humidity");
            data.Columns.Add("Wind");
            data.Columns.Add("PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);



            CreateDic("Outlook", symbols);
            CreateDic("Temperature", symbols);
            CreateDic("Humidity", symbols);
            CreateDic("Wind", symbols);
            CreateDic("PlayTennis", symbols);


            int[][] inputs = (from p in symbols.AsEnumerable()
                              select new int[]
            {
                GetIndex("Outlook", p["Outlook"].ToString()),
                GetIndex("Temperature", p["Temperature"].ToString()),
                GetIndex("Humidity", p["Humidity"].ToString()),
                GetIndex("Wind", p["Wind"].ToString())
            }).Cast <int[]>().ToArray();


            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex("PlayTennis", p["PlayTennis"].ToString())).Cast <int>().ToArray();



            /*
             * // Gather information about decision variables
             * DecisionVariable[] attributes =
             * {
             * new DecisionVariable("Outlook",     3), // 3 possible values (Sunny, overcast, rain)
             * new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
             * new DecisionVariable("Humidity",    2), // 2 possible values (High, normal)
             * new DecisionVariable("Wind",        2)  // 2 possible values (Weak, strong)
             * };
             *
             */
            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", GetCount("Outlook")),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", GetCount("Temperature")), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", GetCount("Humidity")),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", GetCount("Wind"))                // 2 possible values (Weak, strong)
            };


            int classCount = GetCount("PlayTennis"); // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);


            string answer = codebook.Translate("PlayTennis",
                                               tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

            Console.WriteLine("Calculate for: Sunny, Hot, High, Strong");
            Console.WriteLine("Answer: " + answer);


            var expression = tree.ToExpression();

            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet s = tree.ToRules();

            Console.WriteLine(s.ToString());

            // Compiles the expression to IL
            var func = expression.Compile();
        }
Пример #49
0
        private string toString(Codification codebook)
        {
            if (IsRoot)
                return "Root";

            String name = Owner.Attributes[Parent.Branches.AttributeIndex].Name;

            if (String.IsNullOrEmpty(name))
                name = "x" + Parent.Branches.AttributeIndex;

            String op = ComparisonExtensions.ToString(Comparison);

            String value;
            if (codebook != null && Value.HasValue && codebook.Columns.Contains(name))
                value = codebook.Translate(name, (int)Value.Value);

            else value = Value.ToString();


            return String.Format("{0} {1} {2}", name, op, value);
        }
Пример #50
0
        public static void Exemplo01()
        {
            //LINK: http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_ID3Learning.htm
            //LINK: http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_DecisionTree.htm

            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs = symbols.ToArray <int>("PlayTennis");

            // Gather information about decision variables
            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", 3),     // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)
                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)
            };

            int classCount = 2; // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            //Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            //Suggest you read the example in the guide carefully. At the very end of the procedure they generate the expression tree with var expression = tree.ToExpression(); and compile it:
            var expression = tree.ToExpression();
            var func       = expression.Compile();
            //The result is a delegate that you can simply execute to get a decision for a given input.In the example, you could do something like
            bool willPlayTennis = func(new double[] { 1.0, 1.0, 1.0, 1.0 }) == 1;

            int[]  query  = codebook.Translate("Sunny", "Hot", "High", "Strong");
            int    output = tree.Decide(query);
            string answer = codebook.Translate("PlayTennis", output);

            Console.WriteLine(answer);
            Console.ReadLine();

            //RESULT:
            //In the above example, answer will be "No".
        }
Пример #51
0
        public void ComputeTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            GeneralDiscreteDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
                new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new GeneralDiscreteDistribution(codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<GeneralDiscreteDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }