예제 #1
0
        public double DecisionTreeAccuracyPercentageLib()
        {
            DataTable data = GenerateTestingDataTableLib();

            DataTable symbols = codebook.Apply(data);

            int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR",
                                                                       "BRUISES", "ODOR", "GILL ATTACHMENT",
                                                                       "GILL SPACING", "GILL SIZE", "GILL COLOR",
                                                                       "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING",
                                                                       "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING",
                                                                       "VEIL TYPE", "VEIL COLOR", "RING NUMBER",
                                                                       "RING TYPE", "SPORE PRINT COLOR", "POPULATION",
                                                                       "HABITAT" });

            int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" });
            int[]   outputs  = new int[mOutputs.Length];
            for (int i = 0; i < mOutputs.Length; i++)
            {
                outputs[i] = mOutputs[i][0];
            }

            double error = new ZeroOneLoss(outputs).Loss(decisionTreeLib.Decide(inputs));

            return(1 - error);
        }
예제 #2
0
        public void runNaiveBayes()
        {
            codebook = new Codification(trainingData, "Feature1", "Feature2", "GeneratedByProgram");


            //  Training data to symbol
            DataTable trainingsymbols = codebook.Apply(trainingData);;

            int[][] trainingInputs  = trainingsymbols.ToJagged <int>("Feature1", "Feature2");
            int[]   trainingOutputs = trainingsymbols.ToArray <int>("GeneratedByProgram");

            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            learner.Options.InnerOption.UseLaplaceRule = true;

            // We learn the algorithm:
            NaiveBayes nb = learner.Learn(trainingInputs, trainingOutputs);

            DataTable testdata = new DataTable("Sample Data");

            testdata.Columns.Add("Feature1", "Feature2", "GeneratedByProgram");

            testdata.Rows.Add("This", " is real", "No");
            testdata.Rows.Add("a", "8", "Yes");
            testdata.Rows.Add("b", "2000", "Yes");
            testdata.Rows.Add("a", "9", "Yes");
            testdata.Rows.Add("a", "90", "Yes");
            testdata.Rows.Add("a", "12", "Yes");
            testdata.Rows.Add("b", "15", "Yes");
            testdata.Rows.Add("b", "18", "Yes");
            testdata.Rows.Add("b", "200", "Yes");
            testdata.Rows.Add("a", "5", "Yes");
            testdata.Rows.Add("a", "62", "Yes");
            testdata.Rows.Add("b", "5000", "Yes");
            testdata.Rows.Add("b", "17", "Yes");
            testdata.Rows.Add("b", "62", "Yes");
            testdata.Rows.Add("b", "90", "Yes");
            testdata.Rows.Add("b", "123", "Yes");
            testdata.Rows.Add("This", " is Ok", "Yes");
            testdata.Rows.Add("b", "1", "Yes");
            testdata.Rows.Add("b", "64", "Yes");
            testdata.Rows.Add("I ", "am god", "No");
            testdata.Rows.Add("b", "33", "Yes");

            String[] inst = { "b", "15" };
            testForInstance(nb, inst, "Yes");

            DataTable testsymbols = codebook.Apply(testdata);

            int[][] testInput  = testsymbols.ToJagged <int>("Feature1", "Feature2");
            int[]   testOutput = testsymbols.ToArray <int>("GeneratedByProgram");
            int[]   answers    = nb.Decide(testInput);


            Console.WriteLine("\n Accuracy (Tested on 20 data set): " + calculateAccuracy(answers, testOutput));
        }
예제 #3
0
        public double Test(DataTable test)
        {
            DataTable convertedData = codeBook.Apply(test);

            //Convierte los valores traducidos a inputs y el output esperado.
            double[][] inputs  = convertedData.ToJagged(headers);
            int[]      outputs = convertedData.ToArray <int>("G3");

            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            return(1 - error);
        }
예제 #4
0
        public void ApplyTest1()
        {
            DataTable table = ProjectionFilterTest.CreateTable();

            // Show the start data
            //DataGridBox.Show(table);

            // Create a new data projection (column) filter
            var filter = new Codification(table, "Category");

            // Apply the filter and get the result
            DataTable result = filter.Apply(table);

            // Show it
            //DataGridBox.Show(result);

            Assert.AreEqual(5, result.Columns.Count);
            Assert.AreEqual(5, result.Rows.Count);

            Assert.AreEqual(0, result.Rows[0]["Category"]);
            Assert.AreEqual(1, result.Rows[1]["Category"]);
            Assert.AreEqual(1, result.Rows[2]["Category"]);
            Assert.AreEqual(0, result.Rows[3]["Category"]);
            Assert.AreEqual(2, result.Rows[4]["Category"]);
        }
        public string knn(DataTable tbl)
        {
            Codification codebook = new Codification(tbl);

            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int        sayac  = 0;

            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);



            KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 2,
                                                          inputs: inputs, outputs: outputs);

            int answer = knn.Compute(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]),
                                                    Convert.ToInt32(inputlar[2]), Convert.ToInt32(inputlar[3]), Convert.ToInt32(inputlar[4]),
                                                    Convert.ToInt32(inputlar[5]), Convert.ToInt32(inputlar[6]), Convert.ToInt32(inputlar[7]), Convert.ToInt32(inputlar[8]) }); // answer will be 2.

            if (answer == 0)
            {
                answer = 4;
            }
            else
            {
                answer = 2;
            }

            return(answer.ToString());
        }
예제 #6
0
        private static DecisionTree TrainModel(DataTable dt)
        {
            DecisionTree tree = new DecisionTree((IList <DecisionVariable>) new DecisionVariable[16]
            {
                new DecisionVariable("v0", 2048),
                new DecisionVariable("v1", 2048),
                new DecisionVariable("v2", 2048),
                new DecisionVariable("v3", 2048),
                new DecisionVariable("v4", 2048),
                new DecisionVariable("v5", 2048),
                new DecisionVariable("v6", 2048),
                new DecisionVariable("v7", 2048),
                new DecisionVariable("v8", 2048),
                new DecisionVariable("v9", 2048),
                new DecisionVariable("v10", 2048),
                new DecisionVariable("v11", 2048),
                new DecisionVariable("v12", 2048),
                new DecisionVariable("v13", 2048),
                new DecisionVariable("v14", 2048),
                new DecisionVariable("v15", 2048)
            }, 1000);
            Codification codification = new Codification(dt);
            //ID3Learning id3Learning = new ID3Learning(tree);
            DataTable table = codification.Apply(dt);

            int[][]      array1  = table.ToArray <int>("v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15");
            int[]        array2  = table.ToArray <int>("classification");
            C45Learning  teacher = new C45Learning();
            DecisionTree model   = teacher.Learn(array1, array2, null);

            //DecisionTree model = id3Learning.Learn(array1, array2, (double[]) null);
            return(model);
        }
예제 #7
0
        private void ComputeInference()
        {
            var codebook = new Codification();

            codebook.Learn(tradeTable);

            DataTable symbols = codebook.Apply(tradeTable);

            string[]   inputNames = new[] { "Strike", "MarketPrice", "Notional" };
            double[][] inputs     = tradeTable.ToJagged(inputNames);
            int[]      outputs    = tradeTable.ToArray <int>("Result");


            var teacher = new C45Learning()
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNames)
            };


            DecisionTree tree = teacher.Learn(inputs, outputs);

            int[]       predicted = tree.Decide(inputs);
            double      error     = new ZeroOneLoss(outputs).Loss(predicted);
            DecisionSet rules     = tree.ToRules();

            var str = rules.ToString();

            textBoxInferredRules.Text = str;
        }
예제 #8
0
파일: Class1.cs 프로젝트: 50417/BotvsBot
        public void foo()
        {
            String path = Environment.CurrentDirectory + "\\example.xlsx";

            // Read the Excel worksheet into a DataTable
            DataTable table = new ExcelReader(path).GetWorksheet("T1");

            //Convert the DataTable to input and output vectors
            String[] trainingInputs = table.Columns["Output"].ToArray <String>();

            // Create a new codification codebook to
            //convert strings into discrete symbols
            Codification codebook = new Codification(table,
                                                     "GeneratedByProgram");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(table);

            int[] trainingOutputs = symbols.ToArray <int>("GeneratedByProgram");
            var   knn             = new KNearestNeighbors <string>(k: 1, distance: new Levenshtein());

            // In order to compare strings, we will be using Levenshtein's string distance

            // We learn the algorithm:
            knn.Learn(trainingInputs, trainingOutputs);

            int answer = knn.Decide("Chars");
        }
        public void ApplyTest1()
        {
            DataTable table = ProjectionFilterTest.CreateTable();

            // Show the start data
            //Accord.Controls.DataGridBox.Show(table);

            // Create a new data projection (column) filter
            var filter = new Codification(table, "Category");

            // Apply the filter and get the result
            DataTable result = filter.Apply(table);

            // Show it
            //Accord.Controls.DataGridBox.Show(result);

            Assert.AreEqual(5, result.Columns.Count);
            Assert.AreEqual(5, result.Rows.Count);

            Assert.AreEqual(0, result.Rows[0]["Category"]);
            Assert.AreEqual(1, result.Rows[1]["Category"]);
            Assert.AreEqual(1, result.Rows[2]["Category"]);
            Assert.AreEqual(0, result.Rows[3]["Category"]);
            Assert.AreEqual(2, result.Rows[4]["Category"]);
        }
예제 #10
0
    public string predict(string p1, string p2, string p3, string p4, string p5, string p6, string p7, string p8, string p9)
    {
        SqlCommand cmd = new SqlCommand();

        cmd.CommandText = "select * from Dataset_tbl";
        DataTable dt = db.get(cmd);

        //Codification codebook = new Codification(ds.Tables[0], "Par1", "Par2", "Par3", "Par4", "Par5", "Par6", "Par7", "Par8", "Par9", "Par10", "Par11", "Par12", "Par13", "Par14", "Par15", "Par16", "Par17", "Par18", "Par19", "Par20", "Par21", "Res");
        Codification codebook = new Codification(dt, "p1", "p2", "p3", "p4", "p5", "p6", "result");
        //DataTable symbols = codebook.Apply(ds.Tables[0]);
        DataTable symbols = codebook.Apply(dt);

        double[][] inputs = symbols.ToArray <double>("p1", "p2", "p3", "p4", "p5", "p6");

        int[] outputs = symbols.ToArray <int>("result");
        int   K       = 1;

        try
        {
            int[] sample = codebook.Translate(p1, p2, p3, p4, p5, p6);
            //int[] sample = new int[] { int.Parse(p1), int.Parse(p2), int.Parse(p3) };
            int classCount                = 1; // 3 possible output values
            KNearestNeighbors knn         = new KNearestNeighbors(k: K, classes: 2, inputs: inputs, outputs: outputs);
            double[]          doubleArray = Array.ConvertAll(sample, x => (double)x);

            answer = codebook.Translate("result", knn.Compute(doubleArray));
        }
        catch
        {
            answer = "Nill";
        }
        return(answer);
    }
예제 #11
0
 public AccordAdapter(string treeName, DataTable originalData)
 {
     CountryName = treeName;
     codebook    = new Codification(originalData);
     symbols     = codebook.Apply(originalData);
     inputs      = symbols.ToJagged <int>("year", "generation", "sex");
     outputs     = symbols.ToArray <int>("risk");
 }
예제 #12
0
        public string getRecommendationsByUsers(string id = "user4")
        {
            DataTable data = new DataTable("dataTable");

            PopulateHead(data);
            PopulateTable(data, id);

            Codification codification = new Codification(data);
            DataTable    codifiedData = codification.Apply(data);

            int[][] input = codifiedData.ToJagged <int>("Age", "Gender");

            int[] predictions = codifiedData.ToArray <int>("Best Genre");

            ID3Learning decisionTreeLearningAlgorithm = new ID3Learning {
            };

            try
            {
                var   customer = _context.Customers.Where(c => c.Username == id).FirstOrDefault();
                int[] query;
                if (customer.Age <= 12)
                {
                    query = codification.Transform(new[, ] {
                        { "Age", "0-12" }, { "Gender", customer.Gender.ToString() }
                    });
                }
                else if (12 < customer.Age && customer.Age <= 25)
                {
                    query = codification.Transform(new[, ] {
                        { "Age", "13-25" }, { "Gender", customer.Gender.ToString() }
                    });
                }
                else if (25 < customer.Age && customer.Age < 40)
                {
                    query = codification.Transform(new[, ] {
                        { "Age", "26-39" }, { "Gender", customer.Gender.ToString() }
                    });
                }
                else
                {
                    query = codification.Transform(new[, ] {
                        { "Age", "40+" }, { "Gender", customer.Gender.ToString() }
                    });
                }

                DecisionTree decisionTree = decisionTreeLearningAlgorithm.Learn(input, predictions);
                int          result       = decisionTree.Decide(query);
                string       diagnosis    = codification.Revert("Best Genre", result);
                return(diagnosis);
            }
            catch (Exception)
            {
                return("Unfortunatly No Matches Were Found");

                throw;
            }
        }
예제 #13
0
        public Codification GenerateDecisionTreeLib(DataTable data)
        {
            Codification b = new Codification(data);

            DataTable symbols = b.Apply(data);

            int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR",
                                                                       "BRUISES", "ODOR", "GILL ATTACHMENT",
                                                                       "GILL SPACING", "GILL SIZE", "GILL COLOR",
                                                                       "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING",
                                                                       "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING",
                                                                       "VEIL TYPE", "VEIL COLOR", "RING NUMBER",
                                                                       "RING TYPE", "SPORE PRINT COLOR", "POPULATION",
                                                                       "HABITAT" });

            int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" });
            int[]   outputs  = new int[mOutputs.Length];
            for (int i = 0; i < mOutputs.Length; i++)
            {
                outputs[i] = mOutputs[i][0];
            }

            ID3Learning id3learning = new ID3Learning()
            {
                new DecisionVariable("CAP SHAPE", Mushroom.CAP_SHAPE.Length),                               //1
                new DecisionVariable("CAP SURFACE", Mushroom.CAP_SURFACE.Length),                           //2
                new DecisionVariable("CAP COLOR", Mushroom.CAP_COLOR.Length),                               //3

                new DecisionVariable("BRUISES", Mushroom.BRUISES.Length),                                   //4
                new DecisionVariable("ODOR", Mushroom.ODOR.Length),                                         //5

                new DecisionVariable("GILL ATTACHMENT", Mushroom.GILL_ATTACHMENT.Length),                   //6
                new DecisionVariable("GILL SPACING", Mushroom.GILL_SPACING.Length),                         //7
                new DecisionVariable("GILL SIZE", Mushroom.GILL_SIZE.Length),                               //8
                new DecisionVariable("GILL COLOR", Mushroom.GILL_COLOR.Length),                             //9

                new DecisionVariable("STALK SHAPE", Mushroom.STALK_SHAPE.Length),                           //10
                new DecisionVariable("STALK ROOT", Mushroom.STALK_ROOT.Length),                             //11
                new DecisionVariable("STALK SURFACE ABOVE RING", Mushroom.STALK_SURFACE_ABOVE_RING.Length), //12
                new DecisionVariable("STALK SURFACE BELOW RING", Mushroom.STALK_SURFACE_BELOW_RING.Length), //13
                new DecisionVariable("STALK COLOR ABOVE RING", Mushroom.STALK_COLOR_ABOVE_RING.Length),     //14
                new DecisionVariable("STALK COLOR BELOW RING", Mushroom.STALK_COLOR_BELOW_RING.Length),     //15

                new DecisionVariable("VEIL TYPE", Mushroom.VEIL_TYPE.Length),                               //16
                new DecisionVariable("VEIL COLOR", Mushroom.VEIL_COLOR.Length),                             //17

                new DecisionVariable("RING NUMBER", Mushroom.RING_NUMBER.Length),                           //18
                new DecisionVariable("RING TYPE", Mushroom.RING_TYPE.Length),                               //19

                new DecisionVariable("SPORE PRINT COLOR", Mushroom.SPORE_PRINT_COLOR.Length),               //20
                new DecisionVariable("POPULATION", Mushroom.POPULATION.Length),                             //21
                new DecisionVariable("HABITAT", Mushroom.HABITAT.Length)                                    //22
            };

            decisionTreeLib = id3learning.Learn(inputs, outputs);

            return(b);
        }
예제 #14
0
        private List <productos> revisarProductos(DataTable data)
        {
            var codebook = new Codification(data);

            int numCategorias = db.categorias.Count();

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Categoria", numCategorias),  // 3 possible values (Sunny, overcast, rain)
                                new DecisionVariable("Precio", 5), // 3 possible values (Hot, mild, cool)  
                 
            };

            int classCount = 2; // 2 possible output values for playing tennis: yes or no

            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToIntArray("Categoria", "Precio");
            int[]   outputs = symbols.ToIntArray("recomendar").GetColumn(0);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            // Compute the training error when predicting training instances
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // The tree can now be queried for new examples through
            // its decide method. For example, we can create a query

            List <productos> product = db.productos.ToList();

            foreach (productos item in db.productos.ToList())
            {
                int[] query = codebook.Transform(new[, ] {
                    { "Categoria", Convert.ToString(item.fkCategoria) },
                    { "Precio", devolverTipoPrecio(item.precio) }
                });

                // And then predict the label using
                int predicted = tree.Decide(query);  // result will be 0

                // We can translate it back to strings using
                string answer = codebook.Revert("recomendar", predicted); // Answer will be: "No"
                if (answer.Equals("no"))
                {
                    product.Remove(item);
                }
            }
            return(product);
        }
        public void remapping_test()
        {
            // https://web.archive.org/web/20170210050820/http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            CsvReader reader = CsvReader.FromText(Properties.Resources.hsbdemo, hasHeaders: true);

            var table = reader.ToTable();

            var codification = new Codification(table);

            codification["ses"].VariableType   = CodificationVariable.CategoricalWithBaseline;
            codification["prog"].VariableType  = CodificationVariable.Categorical;
            codification["write"].VariableType = CodificationVariable.Discrete;
            codification["ses"].Remap("low", 0);
            codification["ses"].Remap("middle", 1);
            codification["prog"].Remap("academic", 0);
            codification["prog"].Remap("general", 1);

            Assert.AreEqual(CodificationVariable.Discrete, codification["write"].VariableType);

            var inputs = codification.Apply(table, "write", "ses");
            var output = codification.Apply(table, "prog");

            // Get inputs
            string[] inputNames;
            var      inputsData = inputs.ToArray(out inputNames);

            // Get outputs
            string[] outputNames;
            var      outputData = output.ToArray(out outputNames);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(new double[] { 35, 0, 0 }, inputsData[0]);
            Assert.AreEqual(new double[] { 33, 1, 0 }, inputsData[1]);
            Assert.AreEqual(new double[] { 39, 0, 1 }, inputsData[2]);

            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[0]);
            Assert.AreEqual(new double[] { 0, 1, 0 }, outputData[1]);
            Assert.AreEqual(new double[] { 0, 0, 1 }, outputData[2]);
            Assert.AreEqual(new double[] { 1, 0, 0 }, outputData[11]);
        }
예제 #16
0
        public void ApplyTest()
        {
            Codification target = new Codification();


            DataTable input = new DataTable("Sample data");

            input.Columns.Add("Age", typeof(int));
            input.Columns.Add("Classification", typeof(string));

            input.Rows.Add(10, "child");
            input.Rows.Add(7, "child");
            input.Rows.Add(4, "child");
            input.Rows.Add(21, "adult");
            input.Rows.Add(27, "adult");
            input.Rows.Add(12, "child");
            input.Rows.Add(79, "elder");
            input.Rows.Add(40, "adult");
            input.Rows.Add(30, "adult");



            DataTable expected = new DataTable("Sample data");

            expected.Columns.Add("Age", typeof(int));
            expected.Columns.Add("Classification", typeof(int));

            expected.Rows.Add(10, 0);
            expected.Rows.Add(7, 0);
            expected.Rows.Add(4, 0);
            expected.Rows.Add(21, 1);
            expected.Rows.Add(27, 1);
            expected.Rows.Add(12, 0);
            expected.Rows.Add(79, 2);
            expected.Rows.Add(40, 1);
            expected.Rows.Add(30, 1);



            // Detect the mappings
            target.Detect(input);

            // Apply the categorization
            DataTable actual = target.Apply(input);


            for (int i = 0; i < actual.Rows.Count; i++)
            {
                for (int j = 0; j < actual.Columns.Count; j++)
                {
                    Assert.AreEqual(expected.Rows[i][j], actual.Rows[i][j]);
                }
            }
        }
예제 #17
0
        public void Learn()
        {
            DataTable symbols = codeBook.Apply(data);

            int[][] inputs  = symbols.ToJagged <int>(headers);
            int[]   outputs = symbols.ToArray <int>(headerToPredict);

            var learner = new NaiveBayesLearning();

            nativeBayes = learner.Learn(inputs, outputs);
        }
예제 #18
0
        public void runKNN()
        {
            // K=1 means Only its nearest neighbour will be used
            var knn = new KNearestNeighbors <string>(k: 1, distance: new Levenshtein());

            // In order to compare strings, we will be using Levenshtein's string distance

            String[]  trainingInput   = trainingData.ToArray <String>("Output");
            DataTable trainingsymbols = convertStringDataToDiscreteSymbol();

            int[] trainingOutput = trainingsymbols.ToArray <int>("GeneratedByProgram");
            // We learn the algorithm:
            knn.Learn(trainingInput, trainingOutput);

            // After the algorithm has been created, we can use it:`
            int answer = knn.Decide("Chars"); // answer should be 1.

            DataTable testdata = new DataTable("Sample Data");

            testdata.Columns.Add("Output", "GeneratedByProgram");

            testdata.Rows.Add("a8", "Yes");
            testdata.Rows.Add("b5", "Yes");
            testdata.Rows.Add("This is real", "No");
            testdata.Rows.Add("a9", "Yes");
            testdata.Rows.Add("b15", "Yes");
            testdata.Rows.Add("b15", "Yes");
            testdata.Rows.Add("b18", "Yes");
            testdata.Rows.Add("b200", "Yes");
            testdata.Rows.Add("b17", "Yes");
            testdata.Rows.Add("b62", "Yes");
            testdata.Rows.Add("b90", "Yes");
            testdata.Rows.Add("b123", "Yes");
            testdata.Rows.Add("This is Ok", "Yes");
            testdata.Rows.Add("b1", "Yes");
            testdata.Rows.Add("b64", "Yes");
            testdata.Rows.Add("I am god", "No");
            testdata.Rows.Add("b14", "Yes");
            testdata.Rows.Add("b1", "Yes");
            testdata.Rows.Add("b64", "Yes");
            testdata.Rows.Add("b100000000000", "Yes");

            testForInstance(knn, "b15", "Yes");

            DataTable testsymbols = codebook.Apply(testdata);

            String[] testInput  = testdata.ToArray <String>("Output");
            int[]    testOutput = testsymbols.ToArray <int>("GeneratedByProgram");
            int[]    answers    = knn.Decide(testInput); // answer should be 1.


            Console.WriteLine("\n Accuracy (Tested on 20 data set): " + calculateAccuracy(answers, testOutput));
        }
예제 #19
0
        static double Decision_Tree(bool show)
        {
            DataTable    data       = DataController.MakeDataTable("../../drug_consumption.txt");
            DataTable    entireData = DataController.MakeDataTable("../../drug_consumption.txt");
            DataTable    tests      = DataController.MakeDataTable("../../drug_consumption_test2.txt");
            Codification codebook   = new Codification(entireData);

            DecisionVariable[] attributes = DataController.GetAttributes();
            int classCount = 7; // (7) "Never Used", "Used over a Decade Ago", "Used in Last Decade", "Used in Last Year", "Used in Last Month", "Used in Last Week", and "Used in Last Day"

            DecisionTree tree        = new DecisionTree(attributes, classCount);
            ID3Learning  id3learning = new ID3Learning(tree);

            id3learning.MaxHeight = 7;
            DataTable symbols    = codebook.Apply(data);
            string    LookingFor = "Cannabis";

            int[][] inputs  = symbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS");
            int[]   outputs = symbols.ToArray <int>(LookingFor);

            id3learning.Learn(inputs, outputs);
            DataTable testSymbols = codebook.Apply(tests);

            int[][]     testIn   = testSymbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS");
            int[]       testOut  = testSymbols.ToArray <int>(LookingFor);
            DecisionSet rules    = tree.ToRules();
            string      ruleText = rules.ToString(codebook, LookingFor, System.Globalization.CultureInfo.InvariantCulture);
            double      error    = new ZeroOneLoss(testOut).Loss(tree.Decide(testIn));

            if (show == true)
            {
                Console.WriteLine(LookingFor);
                Console.WriteLine();
                Console.WriteLine(ruleText);
                Console.ReadKey();
                Console.WriteLine("Blad - " + Math.Round(error, 4) + "%");
                Console.ReadKey();
            }
            return(error);
        }
예제 #20
0
        public static DecisionTree Learn(DataTable data, string[] inputColumns, string outputColumn)
        {
            var codebook = new Codification(data);
            var symbols  = codebook.Apply(data);

            double[][] inputs  = symbols.ToJagged(inputColumns);
            int[]      outputs = symbols.ToArray <int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var c45        = new C45Learning(attributes);

            return(c45.Learn(inputs, outputs));
        }
예제 #21
0
        /*
         * Takes a Datatable with the training data
         * translates the data to ints
         * trains using the training data
         * The last col of the datatable input is the thing to predicted
         */
        public void Train(int index)
        {
            DataTable dataTable = this.theData;

            // Debug.Write("DataTable size: ");
            // Debug.Write("Rows: " + dataTable.Rows.Count);
            // Debug.Write("Cols: " + dataTable.Columns.Count);

            ArrayList inputNames = new ArrayList();

            foreach (DataColumn column in dataTable.Columns)
            {
                inputNames.Add(column.ColumnName);
            }
            this.toPredict = (string)inputNames[index];                                         // The column to predict
            inputNames.RemoveAt(index);                                                         // the data input data (predict column removed)
            this.inputNamesArr = (string[])inputNames.ToArray(typeof(string));

            // Debug.Write("Input arr size: " + inputNamesArr.Length);

            // Using Accord.Statistics.Filters to present the data as integers,
            // as integers are more efficient
            this.codebook = new Codification(dataTable)
            {
                DefaultMissingValueReplacement = 0
            };                                                                                   // codebook object that can convert  strings to ints, null/missing value will be defaulted to 0
            DataTable symbols = codebook.Apply(dataTable);                                       // applying our data to the codebook

            int[][] inputs  = symbols.ToJagged <int>(inputNamesArr);                             // The conversion to ints
            int[]   outputs = symbols.ToArray <int>(toPredict);                                  // The conversion to ints

            // Debug.Write("Array size: ");
            // Debug.Write("inputs: " + inputs.Length);
            // Debug.Write("outputs: " + outputs.Length);

            // Debug.Write("Test");

            var id3 = new ID3Learning()                                                          // the id3 algo
            {
                Attributes = DecisionVariable.FromCodebook(codebook, inputNamesArr)              // the trees decision attributes/headers from excel, second argument could be given saying what columns it should be
            };

            this.tree = id3.Learn(inputs, outputs);                                              // Learn using the inputs and output defined above

            // transform the rules of the tree into a string
            DecisionSet treeRules = tree.ToRules();

            ruleText = treeRules.ToString(codebook, toPredict,
                                          System.Globalization.CultureInfo.InvariantCulture);
            Debug.WriteLine(ruleText);
        }
예제 #22
0
        public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
                new DecisionVariable("Humidity", DecisionVariableKind.Continuous),    // continuous values
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = c45.Run(inputs, outputs);
        }
        public void AnalyzeExample1()
        {
            // Note: results perfectly match the example at
            // https://web.archive.org/web/20170210050820/http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm

            CsvReader reader = CsvReader.FromText(Properties.Resources.hsbdemo, hasHeaders: true);

            var table = reader.ToTable();

            var codification = new Codification(table);

            codification["ses"].VariableType   = CodificationVariable.CategoricalWithBaseline;
            codification["prog"].VariableType  = CodificationVariable.Categorical;
            codification["write"].VariableType = CodificationVariable.Continuous;
            codification["ses"].Remap("low", 0);
            codification["ses"].Remap("middle", 1);
            codification["prog"].Remap("academic", 0);
            codification["prog"].Remap("general", 1);

            var inputs = codification.Apply(table, "write", "ses");
            var output = codification.Apply(table, "prog");


            // Get inputs
            string[] inputNames;
            var      inputsData = inputs.ToJagged(out inputNames);

            // Get outputs
            string[] outputNames;
            var      outputData = output.ToJagged(out outputNames);


            var analysis = new MultinomialLogisticRegressionAnalysis(inputsData, outputData, inputNames, outputNames);

            analysis.Compute();

            testmlr(analysis);
        }
예제 #24
0
        public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),      // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Humidity",    DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)          // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = c45.Run(inputs, outputs);
        }
예제 #25
0
파일: Program.cs 프로젝트: volend/ML
        Codification BuildCodebook(List <Record> trainingSet, ReferenceTable table, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Diabetes dataset");

            data.Columns.AddRange(Array.ConvertAll(table.Columns, x => new DataColumn(x)));

            trainingSet.ForEach(each => data.Rows.Add(each.Values));

            Codification codebook = new Codification(data);
            DataTable    symbols  = codebook.Apply(data);

            inputs  = symbols.ToArray <int>(ExcludeLast(table.Columns));
            outputs = symbols.ToArray <int>(table.Columns.Last());
            return(codebook);
        }
예제 #26
0
        public string Predict(DataTable dataTable)
        {
            Codification codification = new Codification(dataTable);

            int[] codewords = this.Model.Decide(codification.Apply(dataTable).ToArray <int>("v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"));
            if (codewords[0] == -1)
            {
                return(null);
            }
            string moveString = codification.Revert(codewords)[0];

            var firstDirectionInString = GetFirstString(moveString);

            return(firstDirectionInString);
        }
        private string bayes(DataTable tbl)
        {
            Codification codebook = new Codification(tbl,
                                                     "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            int[][] inputs  = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[]   outputs = symbols.ToIntArray("Class").GetColumn(0);


            // Gather information about decision variables
            int[] symbolCounts =
            {
                codebook["Clump Thickness"].Symbols,          // 3 possible values (Sunny, overcast, rain)
                codebook["Uniformity of Cell Size"].Symbols,  // 3 possible values (Hot, mild, cool)
                codebook["Uniformity of Cell Shape"].Symbols, // 2 possible values (High, normal)
                codebook["Marginal Adhesion"].Symbols,        // 2 possible values (Weak, strong)
                codebook["Single Epithelial Cell Size"].Symbols,
                codebook["Bare Nuclei"].Symbols,
                codebook["Bland Chromatin"].Symbols,
                codebook["Normal Nucleoli"].Symbols,
                codebook["Mitoses"].Symbols
            };

            int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no)

            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            // We will be computing the label for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                                                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);

            // Now, we can feed this instance to our model
            int output = target.Compute(instance);

            // Finally, the result can be translated back to one of the codewords using
            string result = codebook.Translate("Class", output); // result is "No"

            return(result);
        }
        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            double[][] tempInputs = symbols.ToJagged(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            if (inputs.Length > 0)
            {
                NaiveBayes <NormalDistribution> learner = teacher.Learn(inputs, outputs);
                container.trainer = learner;
            }

            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }
예제 #29
0
        private static DecisionTree createTree(out double[][] inputs, out int[] outputs)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };


            string outputColumn = "output";


            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            Codification codebook = new Codification(table);


            DataTable symbols = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);


            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);


            C45Learning c45 = new C45Learning(tree);

            c45.Run(inputs, outputs);

            return(tree);
        }
예제 #30
0
        //
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        //[ClassCleanup()]
        //public static void MyClassCleanup()
        //{
        //}
        //
        //Use TestInitialize to run code before running each test
        //[TestInitialize()]
        //public void MyTestInitialize()
        //{
        //}
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion


        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", codebook["Humidity"].Symbols),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToIntArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToIntArray("PlayTennis").GetColumn(0);

            id3.Run(inputs, outputs);
        }
예제 #31
0
        public static DecisionTree createNurseryExample(out double[][] inputs, out int[] outputs, int first)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };

            string outputColumn = "output";

            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }

            Codification codebook = new Codification(table);
            DataTable    symbols  = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);

            C45Learning c45   = new C45Learning(tree);
            double      error = c45.Run(inputs.First(first), outputs.First(first));

            Assert.AreEqual(0, error);

            return(tree);
        }
예제 #32
0
        private static void initDecisionTreeModel()
        {
            dtStatic.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            dtStatic.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            dtStatic.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            dtStatic.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            dtStatic.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            dtStatic.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            dtStatic.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            dtStatic.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            dtStatic.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            dtStatic.Rows.Add("D15", "Rain", "Cool", "High", "Strong", "No");
            dtStatic.Rows.Add("D16", "Rain", "Hot", "High", "Strong", "Yes");
            dtStatic.Rows.Add("D17", "Rain", "Hot", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D18", "Rain", "Cool", "High", "Weak", "No");
            dtStatic.Rows.Add("D19", "Rain", "Cool", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D20", "Rain", "Mild", "High", "Strong", "Yes");

            myCodeBook = new Codification(dtStatic);

            DataTable symbols = myCodeBook.Apply(dtStatic);

            int[][] inputs      = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs     = symbols.ToArray <int>("PlayTennis");
            var     id3learning = new ID3Learning()
            {
                new DecisionVariable("Outlook", 3),     // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)
                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)
            };

            myTreeModel = id3learning.Learn(inputs, outputs);

            double error = new ZeroOneLoss(outputs).Loss(myTreeModel.Decide(inputs));

            Console.WriteLine("learnt model training accuracy is: " + (100 - error).ToString("N2"));
        }
        public string knn(DataTable tbl)
        {
            Codification codebook = new Codification(tbl);

            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int sayac = 0;

            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 2,
            inputs: inputs, outputs: outputs);

            int answer = knn.Compute(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]),
                        Convert.ToInt32( inputlar[2]), Convert.ToInt32( inputlar[3]), Convert.ToInt32( inputlar[4]),
                        Convert.ToInt32( inputlar[5]), Convert.ToInt32( inputlar[6]), Convert.ToInt32( inputlar[7]), Convert.ToInt32( inputlar[8]) }); // answer will be 2.
            if (answer == 0)
                answer = 4;
            else
                answer = 2;

            return answer.ToString();
        }
        public void ApplyTest2()
        {
            // Suppose we have a data table relating the age of
            // a person and its categorical classification, as 
            // in "child", "adult" or "elder".

            // The Codification filter is able to extract those
            // string labels and transform them into discrete
            // symbols, assigning integer labels to each of them
            // such as "child" = 0, "adult" = 1, and "elder" = 3.

            // Create the aforementioned sample table
            DataTable table = new DataTable("Sample data");
            table.Columns.Add("Age", typeof(int));
            table.Columns.Add("Label", typeof(string));

            //            age   label
            table.Rows.Add(10, "child");
            table.Rows.Add(07, "child");
            table.Rows.Add(04, "child");
            table.Rows.Add(21, "adult");
            table.Rows.Add(27, "adult");
            table.Rows.Add(12, "child");
            table.Rows.Add(79, "elder");
            table.Rows.Add(40, "adult");
            table.Rows.Add(30, "adult");


            // Now, let's say we need to translate those text labels
            // into integer symbols. Let's use a Codification filter:

            Codification codebook = new Codification(table);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"


            // We can also process an entire data table at once:
            DataTable result = codebook.Apply(table);

            // The resulting table can be transformed to jagged array:
            double[][] matrix = Matrix.ToArray(result);

            // and the resulting matrix will be given by
            string str = matrix.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // str == new double[][] 
            // {
            //     new double[] { 10, 0 },
            //     new double[] {  7, 0 },
            //     new double[] {  4, 0 },
            //     new double[] { 21, 1 },
            //     new double[] { 27, 1 },
            //     new double[] { 12, 0 },
            //     new double[] { 79, 2 },
            //     new double[] { 40, 1 },
            //     new double[] { 30, 1 } 
            // };



            // Now we will be able to feed this matrix to any machine learning
            // algorithm without having to worry about text labels in our data:

            int classes = codebook["Label"].Symbols; // 3 classes (child, adult, elder)

            // Use the first column as input variables,
            // and the second column as outputs classes
            //
            double[][] inputs = matrix.GetColumns(0);
            int[] outputs = matrix.GetColumn(1).ToInt32();


            // Create a multi-class SVM for 1 input (Age) and 3 classes (Label)
            var machine = new MulticlassSupportVectorMachine(inputs: 1, classes: classes);

            // Create a Multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning(machine, inputs, outputs);

            // Configure the learning algorithm to use SMO to train the
            //  underlying SVMs in each of the binary class subproblems.
            teacher.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                return new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                {
                    Complexity = 1
                };
            };

            // Run the learning algorithm
            double error = teacher.Run();


            // After we have learned the machine, we can use it to classify
            // new data points, and use the codebook to translate the machine
            // outputs to the original text labels:

            string result1 = codebook.Translate("Label", machine.Compute(10)); // child
            string result2 = codebook.Translate("Label", machine.Compute(40)); // adult
            string result3 = codebook.Translate("Label", machine.Compute(70)); // elder


            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);

            Assert.AreEqual("child", result1);
            Assert.AreEqual("adult", result2);
            Assert.AreEqual("elder", result3);

        }
        private string bayes(DataTable tbl)
        {
            Codification codebook = new Codification(tbl,
             "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);
            int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // Gather information about decision variables
            int[] symbolCounts =
            {
                codebook["Clump Thickness"].Symbols,     // 3 possible values (Sunny, overcast, rain)
                codebook["Uniformity of Cell Size"].Symbols, // 3 possible values (Hot, mild, cool)
                codebook["Uniformity of Cell Shape"].Symbols,    // 2 possible values (High, normal)
                codebook["Marginal Adhesion"].Symbols ,        // 2 possible values (Weak, strong)
                codebook["Single Epithelial Cell Size"].Symbols  ,
                codebook["Bare Nuclei"].Symbols  ,
                codebook["Bland Chromatin"].Symbols ,
                codebook["Normal Nucleoli"].Symbols ,
                codebook["Mitoses"].Symbols
            };

            int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no)

            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);

            // We will be computing the label for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);

            // Now, we can feed this instance to our model
            int output = target.Compute(instance);

            // Finally, the result can be translated back to one of the codewords using
            string result = codebook.Translate("Class", output); // result is "No"
            return result;
        }
예제 #36
0
        public void LargeRunTest()
        {
            #region doc_nursery
            // This example uses the Nursery Database available from the University of
            // California Irvine repository of machine learning databases, available at
            //
            //   http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names
            //
            // The description paragraph is listed as follows.
            //
            //   Nursery Database was derived from a hierarchical decision model
            //   originally developed to rank applications for nursery schools. It
            //   was used during several years in 1980's when there was excessive
            //   enrollment to these schools in Ljubljana, Slovenia, and the
            //   rejected applications frequently needed an objective
            //   explanation. The final decision depended on three subproblems:
            //   occupation of parents and child's nursery, family structure and
            //   financial standing, and social and health picture of the family.
            //   The model was developed within expert system shell for decision
            //   making DEX (M. Bohanec, V. Rajkovic: Expert system for decision
            //   making. Sistemica 1(1), pp. 145-157, 1990.).
            //

            // Let's begin by loading the raw data. This string variable contains
            // the contents of the nursery.data file as a single, continuous text.
            //
            string nurseryData = Resources.nursery;

            // Those are the input columns available in the data
            //
            string[] inputColumns = 
            {
                "parents", "has_nurs", "form", "children",
                "housing", "finance", "social", "health"
            };

            // And this is the output, the last column of the data.
            //
            string outputColumn = "output";


            // Let's populate a data table with this information.
            //
            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
                table.Rows.Add(line.Split(','));


            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(table);

            // And then convert all data into symbols
            //
            DataTable symbols = codebook.Apply(table);
            double[][] inputs = symbols.ToArray(inputColumns);
            int[] outputs = symbols.ToArray<int>(outputColumn);

            // From now on, we can start creating the decision tree.
            //
            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            DecisionTree tree = new DecisionTree(attributes, classes: 5);


            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);


            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[25]);
            #endregion

            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual = tree.Compute(inputs[i]);

                Assert.AreEqual(expected, actual);
            }


#if !NET35

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func<double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[25]);


            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual = func(inputs[i]);

                Assert.AreEqual(expected, actual);
            }
#endif
        }
예제 #37
0
        public void Run()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day");
            data.Columns.Add("Outlook");
            data.Columns.Add("Temperature");
            data.Columns.Add("Humidity");
            data.Columns.Add("Wind");
            data.Columns.Add("PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            CreateDic("Outlook", symbols);
            CreateDic("Temperature", symbols);
            CreateDic("Humidity", symbols);
            CreateDic("Wind", symbols);
            CreateDic("PlayTennis", symbols);

            int[][] inputs = (from p in symbols.AsEnumerable()
                              select new int[]
                              {
                                  GetIndex("Outlook", p["Outlook"].ToString()),
                                  GetIndex("Temperature", p["Temperature"].ToString()),
                                  GetIndex("Humidity", p["Humidity"].ToString()),
                                  GetIndex("Wind", p["Wind"].ToString())
                              }).Cast<int[]>().ToArray();

            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex("PlayTennis", p["PlayTennis"].ToString())).Cast<int>().ToArray();

            /*
            // Gather information about decision variables
            DecisionVariable[] attributes =
            {
              new DecisionVariable("Outlook",     3), // 3 possible values (Sunny, overcast, rain)
              new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
              new DecisionVariable("Humidity",    2), // 2 possible values (High, normal)
              new DecisionVariable("Wind",        2)  // 2 possible values (Weak, strong)
            };

             */
            DecisionVariable[] attributes =
            {
              new DecisionVariable("Outlook",     GetCount("Outlook")), // 3 possible values (Sunny, overcast, rain)
              new DecisionVariable("Temperature", GetCount("Temperature")), // 3 possible values (Hot, mild, cool)
              new DecisionVariable("Humidity",    GetCount("Humidity")), // 2 possible values (High, normal)
              new DecisionVariable("Wind",        GetCount("Wind"))  // 2 possible values (Weak, strong)
            };

            int classCount = GetCount("PlayTennis"); // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            string answer = codebook.Translate("PlayTennis",
                tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

            Console.WriteLine("Calculate for: Sunny, Hot, High, Strong");
            Console.WriteLine("Answer: " + answer);

            var expression = tree.ToExpression();
            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet s = tree.ToRules();

            Console.WriteLine(s.ToString());

            // Compiles the expression to IL
            var func = expression.Compile();
        }
예제 #38
0
        public void Run(String filename)
        {
            ReadFile(filename);

            // Create a new codification codebook to
            // convert strings into integer symbols

            Codification codebook = new Codification(data, inputColumns.ToArray());

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            foreach (String s in inputColumns)
                CreateDic(s, symbols);

            CreateDic(outputColumn, symbols);

            int[][] inputs = (from p in symbols.AsEnumerable()
                              select GetInputRow(p)
                              ).Cast<int[]>().ToArray();

            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex(outputColumn, p[outputColumn].ToString())).Cast<int>().ToArray();

            // Gather information about decision variables

            DecisionVariable[] attributes = GetDecisionVariables();

            int classCount = GetCount(outputColumn); // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);
            //C45Learning c45learning = new C45Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);
            //c45learning.Run(inputs2, outputs);

            /*
            string answer = codebook.Translate(outputColumn,
                tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

            Console.WriteLine("Calculate for: Sunny, Hot, High, Strong");
            Console.WriteLine("Answer: " + answer);
            */

            var expression = tree.ToExpression();
            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet rules = tree.ToRules();

            Console.WriteLine(rules.ToString());

            // Compiles the expression to IL
            var func = expression.Compile();
        }
예제 #39
0
        public void Run(String filename)
        {
            ReadFile(filename);

            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(data);

            // And then convert all data into symbols
            //
            DataTable symbols = codebook.Apply(data);

            for (int i = 0; i < inputColumns.Count; i++)
                if (inputTypes[i] == "string")
                CreateDic(inputColumns[i], symbols);

            CreateDic(outputColumn, symbols);

            double[][] inputs = (from p in symbols.AsEnumerable()
                              select GetInputRow(p)
                              ).Cast<double[]>().ToArray();

            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex(outputColumn, p[outputColumn].ToString())).Cast<int>().ToArray();

            // From now on, we can start creating the decision tree.
            //
            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns.ToArray());
            DecisionTree tree = new DecisionTree(attributes, 5); //outputClasses: 5

            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);

            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            //int y = tree.Compute(inputs[25]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            //Func<double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            //int z = func(inputs[25]);

            var expression = tree.ToExpression();
            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet s = tree.ToRules();

            Console.WriteLine(s.ToString());
        }
        public string kararAgaci(DataTable tbl)
        {
            int classCount = 2;
            Codification codebook = new Codification(tbl);

            DecisionVariable[] attributes ={
                                          new DecisionVariable("Clump Thickness",10),
                                          new DecisionVariable("Uniformity of Cell Size",10),new DecisionVariable("Uniformity of Cell Shape",10),
                                          new DecisionVariable("Marginal Adhesion",10),new DecisionVariable("Single Epithelial Cell Size",10),
                                          new DecisionVariable("Bare Nuclei",10),new DecisionVariable("Bland Chromatin",10),
                                          new DecisionVariable("Normal Nucleoli",10),new DecisionVariable("Mitoses",10),

                                          };

            DecisionTree tree = new DecisionTree(attributes, classCount);
            ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
            id3learning.Run(inputs, outputs);

            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);

            return answer;
        }
예제 #41
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), 
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
예제 #42
0
        public static DecisionTree createNurseryExample(out double[][] inputs, out int[] outputs, int first)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns = 
            {
                "parents", "has_nurs", "form", "children",
                "housing", "finance", "social", "health"
            };

            string outputColumn = "output";

            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
                table.Rows.Add(line.Split(','));

            Codification codebook = new Codification(table);
            DataTable symbols = codebook.Apply(table);
            inputs = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray<int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree = new DecisionTree(attributes, outputClasses: 5);

            C45Learning c45 = new C45Learning(tree);
            double error = c45.Run(inputs.Submatrix(first), outputs.Submatrix(first));

            Assert.AreEqual(0, error);

            return tree;
        }
예제 #43
0
        public void ComputeTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            GeneralDiscreteDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
                new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new GeneralDiscreteDistribution(codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<GeneralDiscreteDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
예제 #44
0
        // 
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        //[ClassCleanup()]
        //public static void MyClassCleanup()
        //{
        //}
        //
        //Use TestInitialize to run code before running each test
        //[TestInitialize()]
        //public void MyTestInitialize()
        //{
        //}
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion


        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny",     "Hot",  "High",   "Weak",   "No");
            data.Rows.Add("D2", "Sunny",     "Hot",  "High",   "Strong", "No");
            data.Rows.Add("D3", "Overcast",  "Hot",  "High",   "Weak",   "Yes");
            data.Rows.Add("D4", "Rain",      "Mild", "High",   "Weak",   "Yes");
            data.Rows.Add("D5", "Rain",      "Cool", "Normal", "Weak",   "Yes");
            data.Rows.Add("D6", "Rain",      "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast",  "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny",     "Mild", "High",   "Weak",   "No");
            data.Rows.Add("D9", "Sunny",     "Cool", "Normal", "Weak",   "Yes");
            data.Rows.Add("D10", "Rain",     "Mild", "Normal", "Weak",   "Yes");
            data.Rows.Add("D11", "Sunny",    "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High",   "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot",  "Normal", "Weak",   "Yes");
            data.Rows.Add("D14", "Rain",     "Mild", "High",   "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToIntArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToIntArray("PlayTennis").GetColumn(0);

            id3.Run(inputs, outputs);
        }
예제 #45
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;
            double[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 50, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 50, 90, "Weak", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Weak", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Weak", "Yes");
            data.Rows.Add("D8", "Sunny", 50, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Weak", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Weak", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 50, 80, "Weak", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),      // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Humidity",    DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Wind",        codebook["Wind"].Symbols + 1)      // 1 possible value (Weak)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = c45.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
        public string kmeans(DataTable tbl)
        {
            Codification codebook = new Codification(tbl);

            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int sayac = 0;

            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // Declare some observations
            //double[][] observations =
            //    {
            //     new double[] { -5, -2, -1 },
            //     new double[] { -5, -5, -6 },
            //     new double[] {  2,  1,  1 },
            //     new double[] {  1,  1,  2 },
            //     new double[] {  1,  2,  2 },
            //     new double[] {  3,  1,  2 },
            //     new double[] { 11,  5,  4 },
            //     new double[] { 15,  5,  6 },
            //     new double[] { 10,  5,  6 },
            //    };

            KMeans kmeans = new KMeans(2);

            int[] labels = kmeans.Compute(inputs);

            int c = kmeans.Clusters.Nearest(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]),
                        Convert.ToInt32( inputlar[2]), Convert.ToInt32( inputlar[3]), Convert.ToInt32( inputlar[4]),
                        Convert.ToInt32( inputlar[5]), Convert.ToInt32( inputlar[6]), Convert.ToInt32( inputlar[7]), Convert.ToInt32( inputlar[8]) });
            return c.ToString(); ;
        }
        private string C45(DataTable tbl)
        {
            int classCount = 2;
            Codification codebook = new Codification(tbl);

            DecisionVariable[] attributes ={
                                          new DecisionVariable("Clump Thickness",10),
                                          new DecisionVariable("Uniformity of Cell Size",10),new DecisionVariable("Uniformity of Cell Shape",10),
                                          new DecisionVariable("Marginal Adhesion",10),new DecisionVariable("Single Epithelial Cell Size",10),
                                          new DecisionVariable("Bare Nuclei",10),new DecisionVariable("Bland Chromatin",10),
                                          new DecisionVariable("Normal Nucleoli",10),new DecisionVariable("Mitoses",10),

                                          };

            DecisionTree tree = new DecisionTree(attributes, classCount);
               // ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
               // id3learning.Run(inputs, outputs);
            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);

            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[5]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func<double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[5]);

            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);
            return answer;

               // throw new NotImplementedException();
        }
예제 #48
0
        public void learn_test_mitchell()
        {
            #region doc_mitchell_1
            // We will represent Mitchell's Tennis example using a DataTable. However,
            // the use of a DataTable is not required in order to use the Naive Bayes. 
            // Please take a look at the other examples below for simpler approaches.
            DataTable data = new DataTable("Mitchell's Tennis Example");
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);
            // Add some data
            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
            #endregion

            #region doc_mitchell_2
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);
            #endregion

            #region doc_mitchell_3
            // Some distributions require constructor parameters, and as such, cannot 
            // be automatically initialized by the learning algorithm. For this reason, 
            // we might need to specify how each component should be initialized:
            IUnivariateFittableDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var learner = new NaiveBayesLearning<IUnivariateFittableDistribution>()
            {
                // Tell the learner how to initialize the distributions
                Distribution = (classIndex, variableIndex) => priors[variableIndex]
            };

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Learn the Naive Bayes model
            var naiveBayes = learner.Learn(inputs, outputs);
            #endregion

            #region doc_mitchell_4
            // Create an instance representing a "sunny, cool, humid and windy day":
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), //n 0
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong") // 1
            };

            // We can obtain a class prediction using
            int predicted = naiveBayes.Decide(instance);

            // Or compute probabilities of each class using
            double[] probabilities = naiveBayes.Probabilities(instance);

            // Or obtain the log-likelihood of prediction
            double ll = naiveBayes.LogLikelihood(instance);

            // Finally, the result can be translated back using
            string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, predicted);
            Assert.AreEqual(0.840, probabilities[0], 1e-3);
            Assert.AreEqual(-10.493243476691351, ll, 1e-6);
            Assert.AreEqual(1, probabilities.Sum(), 1e-10);
            Assert.AreEqual(2, probabilities.Length);
        }
예제 #49
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;
            int[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Hot", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Hot", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Hot", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 1 constant value (Hot)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)


            bool thrown = false;
            try
            {
                tree = new DecisionTree(attributes, classCount);
            }
            catch
            {
                thrown = true;
            }

            Assert.IsTrue(thrown);


            attributes[1] = new DecisionVariable("Temperature", 2);
            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
예제 #50
0
        public void ApplyTest4()
        {
            string path = @"Resources\intrusion.xls";

            ExcelReader db = new ExcelReader(path, false, true);

            DataTable table = db.GetWorksheet("test");

            Codification codebook = new Codification(table);

            DataTable result = codebook.Apply(table);

            Assert.IsNotNull(result);

            foreach (DataColumn col in result.Columns)
                Assert.AreNotEqual(col.DataType, typeof(string));

            Assert.IsTrue(result.Rows.Count > 0);
        }
예제 #51
0
        public void IncompleteDiscreteVariableTest()
        {
            DecisionTree tree;
            int[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols+200), // 203 possible values, 200 undefined
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            Assert.AreEqual(203, tree.Root.Branches.Count);
            Assert.IsTrue(tree.Root.Branches[100].IsLeaf);
            Assert.IsNull(tree.Root.Branches[100].Output);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
예제 #52
0
        public void ApplyTest()
        {
            Codification target = new Codification();

            DataTable input = new DataTable("Sample data");

            input.Columns.Add("Age", typeof(int));
            input.Columns.Add("Classification", typeof(string));

            input.Rows.Add(10, "child");
            input.Rows.Add(7, "child");
            input.Rows.Add(4, "child");
            input.Rows.Add(21, "adult");
            input.Rows.Add(27, "adult");
            input.Rows.Add(12, "child");
            input.Rows.Add(79, "elder");
            input.Rows.Add(40, "adult");
            input.Rows.Add(30, "adult");



            DataTable expected = new DataTable("Sample data");

            expected.Columns.Add("Age", typeof(int));
            expected.Columns.Add("Classification", typeof(int));

            expected.Rows.Add(10, 0);
            expected.Rows.Add(7, 0);
            expected.Rows.Add(4, 0);
            expected.Rows.Add(21, 1);
            expected.Rows.Add(27, 1);
            expected.Rows.Add(12, 0);
            expected.Rows.Add(79, 2);
            expected.Rows.Add(40, 1);
            expected.Rows.Add(30, 1);



            // Detect the mappings
            target.Detect(input);

            // Apply the categorization
            DataTable actual = target.Apply(input);


            for (int i = 0; i < actual.Rows.Count; i++)
                for (int j = 0; j < actual.Columns.Count; j++)
                    Assert.AreEqual(expected.Rows[i][j], actual.Rows[i][j]);
        }
예제 #53
0
        // 
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        //[ClassCleanup()]
        //public static void MyClassCleanup()
        //{
        //}
        //
        //Use TestInitialize to run code before running each test
        //[TestInitialize()]
        //public void MyTestInitialize()
        //{
        //}
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion


        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);
            Assert.AreEqual(0, error);


            foreach (DataRow row in data.Rows)
            {
                var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind");

                int y = tree.Compute(x);

                string actual = codebook.Translate("PlayTennis", y);
                string expected = row["PlayTennis"] as string;

                Assert.AreEqual(expected, actual);
            }

            {
                string answer = codebook.Translate("PlayTennis",
                    tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

                Assert.AreEqual("No", answer);
            }
        }
예제 #54
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][] inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
        // Convert Data Table of strings into double[] inputs and int output
        // Constructs the Symbol table
        public void Codify()
        {
            string[] cols = {"Array Size", "Runs", "Selected Sorting Algorithm"};
            string[] cols_ip = {"Array Size", "Runs"};
            string cols_op = "Selected Sorting Algorithm";

            codebook = new Codification(Table, cols);
            Symbols = codebook.Apply(Table);
            Inputs = Symbols.ToArray(cols_ip);
            Outputs = Symbols.ToArray<int>(cols_op);

            IntInputs = Symbols.ToArray<int>(cols_ip);
            DoubleOutputs = Symbols.ToArray<double>(cols_op);

            SymbolCounts = new int[2] { codebook["Array Size"].Symbols, codebook["Runs"].Symbols };
            ClassCount = codebook["Selected Sorting Algorithm"].Symbols;

            // Declares the Sorting methods needed : All except HeapSort
            var x = codebook["Selected Sorting Algorithm"].Mapping;
            //foreach(var val in x.Keys)
                //Console.WriteLine(val + "");
        }
        public void AnalyzeExample1()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            CsvReader reader = CsvReader.FromText(Properties.Resources.hsbdemo, hasHeaders: true);

            var table = reader.ToTable();

            var codification = new Codification(table);
            codification["ses"].VariableType = CodificationVariable.CategoricalWithBaseline;
            codification["prog"].VariableType = CodificationVariable.Categorical;
            codification["prog"].Remap("academic", 0);

            var inputs = codification.Apply(table, "ses", "write");
            var output = codification.Apply(table, "prog");


            // Get inputs
            string[] inputNames;
            var inputsData = inputs.ToArray(out inputNames);

            // Get outputs
            string[] outputNames;
            var outputData = output.ToArray(out outputNames);


            var analysis = new MultinomialLogisticRegressionAnalysis(inputsData, outputData, inputNames, outputNames);

            analysis.Compute();

            Assert.AreEqual(9, analysis.Coefficients.Count);

            int i = 0;

            Assert.AreEqual("(baseline)", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: academic", analysis.Coefficients[i].Class);
            Assert.AreEqual(0, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.0302662690579185, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.083689163424126883, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.58217998138556049, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-1.1112048569892283, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.2715455854613191, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.13231057837059781, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(0.20451187629162043, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.93207938490449849, analysis.Coefficients[i].Value);
        }
예제 #57
-1
        /// <summary>
        /// The main entry point for the program
        /// </summary>
        public static void Main()
        {
            try
            {
                #region Exploratory Data Analysis Explanation
                /*
                    John Tukey coined the term Exploratory Data Analysis in his seminal book of the same name.  There really is not a prescribed way to do an EDA.
                    Tools I use for EDA include Microsoft Excel, plots and visual inspection of the data.  Without creating an early bias, gut feelings do play a role in a good EDA.
                    Some objectives of EDA are to:
                        •	Identify the types of data in the dataset
                        •	Examine the statistical properties of the data
                        •	Look for invalid data (may need Domain or Subject Matter experts)
                        •	Understand the provenance of the data
                        •	Aide in the selection of appropriate statistical tools and techniques

                    For our diabetes dataset, notice that there is both quantitative and qualitative data.  Note that the result or outcome variable (which indicates if the person has
                    diabetes) is nominal data with only two states.  This is called dichotomous or binary categorical data which rules out some machine learning algorithms and directs
                    us to others.
                */
                #endregion
                // Because of time constraints, the loading of the DataTables and EDA is complete.
                XmlConfigurator.Configure();

                Logger.Info("Exploratory Data Analysis");

                FileInfo fi = new FileInfo("training.csv");
                DataTable training = DataTableCsvConvertor.GetDataTableFromCsv(fi);

                fi = new FileInfo("test.csv");
                DataTable test = DataTableCsvConvertor.GetDataTableFromCsv(fi);

                // Print out the first few table rows.
                Head.PrintHead(training);

                //Logger.Info(string.Empty);
                //BasicStatistics.BasicStats(training); // For most EDA's Basic Descriptive statistics are important, but this outputs a lot of information

                #region Data Imputation & Cleanup Explanation
                /*
                    Keep in mind that Machine Learning algorithms operate on numerical data only, something will have to be done with the data is text or NULL.  Also predictor
                    variables(aka features or columns of data) that do not vary will not be predictive and may need to be removed.  Due to time constraints the EDA, ETL (Extract, Transform and Load)
                    and data cleaning is already completed in the solution.  For this analysis, the HeartRate column because it is all NULL and remove any rows of data that contain NULLs.
                */
                #endregion
                // Delete any columns that are not needed.
                training.Columns.Remove("HeartRate");
                test.Columns.Remove("HeartRate");

                // How to handle rows containing missing or NA data - data imputation or deletion?
                training = DataImputation.RemoveMissing(training);
                test = DataImputation.RemoveMissing(test);

                Codification codebook = new Codification(training);
                int outputClasses = 2;

                string[] inputColumns =
                {
                    "Gender", "YearOfBirth", "SmokingEffectiveYear", "NISTcode", "Height", "Weight", "BMI", "SystolicBP", "DiastolicBP", "RespiratoryRate", "Temperature"
                };

                string outputColumn = "DMIndicator";

                // Translate our training data into integer symbols using our codebook:
                DataTable symbols = codebook.Apply(training);
                double[][] inputs = symbols.ToArray(inputColumns);
                int[] outputs = Matrix.ToArray<int>(training, outputColumn);

                #region Decision Tree Overview
                /*
                    Decision Trees are very powerful, especially with a binary classification model, and are somewhat resistant to over-fitting the data.
                    Additionally, they are intuitive to explain to stakeholders.
                */
                #endregion
                Logger.Info(string.Empty);
                Logger.Info("Decision Tree");

                DecisionVariable[] attributes =
                {
                    new DecisionVariable("Gender", 2), // 2 possible values (Male, Female)
                    new DecisionVariable("YearOfBirth", DecisionVariableKind.Continuous),
                    new DecisionVariable("SmokingEffectiveYear", DecisionVariableKind.Continuous),
                    new DecisionVariable("NISTcode", DecisionVariableKind.Continuous),
                    new DecisionVariable("Height", DecisionVariableKind.Continuous),
                    new DecisionVariable("Weight", DecisionVariableKind.Continuous),
                    new DecisionVariable("BMI", DecisionVariableKind.Continuous),
                    new DecisionVariable("SystolicBP", DecisionVariableKind.Continuous),
                    new DecisionVariable("DiastolicBP", DecisionVariableKind.Continuous),
                    new DecisionVariable("RespiratoryRate", DecisionVariableKind.Continuous),
                    new DecisionVariable("Temperature", DecisionVariableKind.Continuous)
                };

                DecisionTree tree = new DecisionTree(attributes, outputClasses);

                C45Learning c45learning = new C45Learning(tree);

                // Learn the training instances!
                c45learning.Run(inputs, outputs);

                // The next two lines are optional to save the model into IL for future use.
                // Convert to an expression tree
                var expression = tree.ToExpression();
                // Compiles the expression to IL
                var func = expression.Compile();

                #region Evaluation Explanation
                /*
                    To evaluate the model, now use each row of the test dataset to predict the output variable (DMIndicator) using the DecisionTree’s compute method passing in the same
                    variables that were used to train the model.  Store the test dataset’s value of DMIndicator and the predicted value in a DataTable and integer collection for future
                    validation of the model.
                */
                #endregion
                Evaluator.Evaluate(test, tree);

                #region Validation Explanation
                /*
                    There are many ways to validate models, but we will use a confusion matrix because it is intuitive and a very accepted way to validate binary classification models.
                    Most conveniently the Accord.Net has a ConfusionMatrix class to create this matrix for you.  Passing in the collection of integers of predicted and actual values
                    stored earlier to the ConfusionMatrix class and output the matrix and accuracy.
                */
                #endregion
                Validator.Validate(test, tree);

                #region Support Vector Machine Overview
                /*
                    Support Vector Machines are powerful classification machine learning algorithms with very few knobs to turn.  The kernel of the SVM can be exchanged to use
                    a number of different mathematical algorithms including polynomials, neural networks and Gaussian functions.
                */
                #endregion
                Logger.Info(string.Empty);
                Logger.Info("Support Vector Machine");

                // Add SVM code here
                IKernel kernel = new Linear();

                // Create the Multi-class Support Vector Machine using the selected Kernel
                int inputDimension = inputs[0].Length;
                var ksvm = new MulticlassSupportVectorMachine(inputDimension, kernel, outputClasses);

                // Create the learning algorithm using the machine and the training data
                var ml = new MulticlassSupportVectorLearning(ksvm, inputs, outputs)
                {
                    Algorithm = (svm, classInputs, classOutputs, i, j) =>
                    {
                        return new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                        {
                            CacheSize = 0
                        };
                    }
                };

                double svmError = ml.Run();

                #region Evaluation Explanation
                /*
                    To evaluate the model, now use each row of the test dataset to predict the output variable (DMIndicator) using the DecisionTree’s compute method passing in the same
                    variables that were used to train the model.  Store the test dataset’s value of DMIndicator and the predicted value in a DataTable and integer collection for future
                    validation of the model.
                */
                #endregion
                Evaluator.Evaluate(test, ksvm);

                #region Validation Explanation
                /*
                    There are many ways to validate models, but we will use a confusion matrix because it is intuitive and a very accepted way to validate binary classification models.
                    Most conveniently the Accord.Net has a ConfusionMatrix class to create this matrix for you.  Passing in the collection of integers of predicted and actual values
                    stored earlier to the ConfusionMatrix class and output the matrix and accuracy.
                */
                #endregion
                Validator.Validate(test, ksvm);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
        }