Exemplo n.º 1
0
        private void Initialize(DataTable dataTable, Codification codification)
        {
            this.codificationData = codification;
            
            DataTable discreteValueDatatable = this.codificationData.Apply(dataTable);

            List<string> columnNames = new List<string>();

            // Get column's name of training data
            for (int columnIndex = 0; columnIndex < discreteValueDatatable.Columns.Count - 1; columnIndex++)
            {
                columnNames.Add(discreteValueDatatable.Columns[columnIndex].ColumnName);
            }  

            // Create trainning data
            this.trainningAttributes = discreteValueDatatable.ToArray(columnNames.ToArray());
            
            // Create classifier data for trainning            
            this.classificationAttribute = discreteValueDatatable.ToIntArray(TableMetaData.ClassAttribute).GetColumn(0);
            //this.classificationAttribute = discreteValueDatatable.ToArray<int>(ClassColumnName);

            string classificationColumnName = TableMetaData.ClassAttribute;

            // Set positive, negative value to test model            
            if (this.codificationData.Columns[classificationColumnName].Mapping.ContainsKey(TableMetaData.PositiveString))
            {
                this.positiveValue = this.codificationData.Columns[classificationColumnName].Mapping[TableMetaData.PositiveString];
            }

            if (this.codificationData.Columns[classificationColumnName].Mapping.ContainsKey(TableMetaData.NegativeString))
            {
                this.negativeValue = this.codificationData.Columns[classificationColumnName].Mapping[TableMetaData.NegativeString];
            }            
        }
Exemplo n.º 2
0
        private void superTabControlMain_SelectedTabChanged(object sender, SuperTabStripSelectedTabChangedEventArgs e)
        {            
            switch ((sender as SuperTabControl).SelectedTabIndex)
            { 
                case 1:
                    int totalAttributeFormDatabase = (int)discreteIntervalTableAdapter.TotalAttributes();
                    int totalAttributeFromSetting = TableMetaData.AllAttributes.Length;

                    if (totalAttributeFormDatabase + 1== totalAttributeFromSetting)
                    {
                        refreshTabCreateModel();
                        if (this.codification == null)
                        {
                            this.codification = new Codification(getDataTableForCodification());
                        }
                    }
                    else {
                        (sender as SuperTabControl).SelectedTabIndex = 0;
                        MessageBox.Show("Chưa có dữ liệu rời rạc!", "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Error);
                        return;
                    }
                    break;
                default:
                    break;
            }
        }
        public void ApplyTest1()
        {
            DataTable table = ProjectionFilterTest.CreateTable();

            // Show the start data
            //Accord.Controls.DataGridBox.Show(table);

            // Create a new data projection (column) filter
            var filter = new Codification(table, "Category");

            // Apply the filter and get the result
            DataTable result = filter.Apply(table);

            // Show it
            //Accord.Controls.DataGridBox.Show(result);

            Assert.AreEqual(5, result.Columns.Count);
            Assert.AreEqual(5, result.Rows.Count);

            Assert.AreEqual(0, result.Rows[0]["Category"]);
            Assert.AreEqual(1, result.Rows[1]["Category"]);
            Assert.AreEqual(1, result.Rows[2]["Category"]);
            Assert.AreEqual(0, result.Rows[3]["Category"]);
            Assert.AreEqual(2, result.Rows[4]["Category"]);
        }
Exemplo n.º 4
0
        public HMMGenerator(PatchNames instrument)
        {
            this.book = new Codebook<Note>();
            this.instrument = instrument;

            DotNetLearn.Data.SampleSet asdasd;

            Accord.Math.Tools.SetupGenerator(10);

            // Consider some phrases:
            //
            string[][] phrases =
            {
            "The Big Brown Fox Jumps Over the Ugly Dog".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "This is too hot to handle".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "I am flying away like a gold eagle".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Onamae wa nan desu ka".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "And then she asked, why is it so small?".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Great stuff John! Now you will surely be promoted".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            "Jayne was taken aback when she found out her son was gay".Split(new char[]{' '},  StringSplitOptions.RemoveEmptyEntries),
            };

            // Let's begin by transforming them to sequence of
            // integer labels using a codification codebook:
            var codebook = new Codification("Words", phrases);

            // Now we can create the training data for the models:
            int[][] sequence = codebook.Translate("Words", phrases);

            // To create the models, we will specify a forward topology,
            // as the sequences have definite start and ending points.
            //
            var topology = new Forward(states: codebook["Words"].Symbols);
            int symbols = codebook["Words"].Symbols; // We have 7 different words

            // Create the hidden Markov model
            HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols);

            // Create the learning algorithm
            var teacher = new ViterbiLearning(hmm);

            // Teach the model about the phrases
            double error = teacher.Run(sequence);

            // Now, we can ask the model to generate new samples
            // from the word distributions it has just learned:
            //
            List<int> sample = new List<int>();
            int count = 10;
            sample.Add(hmm.Generate(1)[0]);
            while(sample.Count < count)
            {
                var k = hmm.Predict(sample.ToArray(), 1);
                sample.AddRange(k);
            }

            // And the result will be: "those", "are", "words".
            string[] result = codebook.Translate("Words", sample.ToArray());
        }
Exemplo n.º 5
0
        public FormTreeRule(DecisionTree tree, Codification codification, string rule)
            : this()
        {
            this.codification = codification;

            // Show the learned tree in the view            
            decisionTreeView.SetTree(tree, codification);
            decisionTreeView.viewRule(rule);
            
        }
Exemplo n.º 6
0
        public void ApplyTest()
        {
            Codification target = new Codification();


            DataTable input = new DataTable("Sample data");

            input.Columns.Add("Age", typeof(int));
            input.Columns.Add("Classification", typeof(string));

            input.Rows.Add(10, "child");
            input.Rows.Add(7, "child");
            input.Rows.Add(4, "child");
            input.Rows.Add(21, "adult");
            input.Rows.Add(27, "adult");
            input.Rows.Add(12, "child");
            input.Rows.Add(79, "elder");
            input.Rows.Add(40, "adult");
            input.Rows.Add(30, "adult");



            DataTable expected = new DataTable("Sample data");

            expected.Columns.Add("Age", typeof(int));
            expected.Columns.Add("Classification", typeof(int));

            expected.Rows.Add(10, 0);
            expected.Rows.Add(7, 0);
            expected.Rows.Add(4, 0);
            expected.Rows.Add(21, 1);
            expected.Rows.Add(27, 1);
            expected.Rows.Add(12, 0);
            expected.Rows.Add(79, 2);
            expected.Rows.Add(40, 1);
            expected.Rows.Add(30, 1);



            // Detect the mappings
            target.Detect(input);

            // Apply the categorization
            DataTable actual = target.Apply(input);


            for (int i = 0; i < actual.Rows.Count; i++)
            {
                for (int j = 0; j < actual.Columns.Count; j++)
                {
                    Assert.AreEqual(expected.Rows[i][j], actual.Rows[i][j]);
                }
            }

        }
Exemplo n.º 7
0
        public FormTreeView(DecisionTree tree, Codification codification)
            : this()
        {
            this.codification = codification;

            // Show the learned tree in the view            
            decisionTreeView.SetTree(tree, codification);
            
            if (tree != null && tree.Root != null)
                CreateRuleList(tree.Root, "");
        }
Exemplo n.º 8
0
        // Create tree
        public void SetTree(DecisionTree tree, Codification codification)
        {
            this.treeSource = tree;            
            this.codification = codification;

            treeView1.Nodes.Clear();

            if (treeSource != null && treeSource.Root != null)
                treeView1.Nodes.Add(convert(TreeSource.Root));
            
        }
Exemplo n.º 9
0
        public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),      // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Humidity",    DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)          // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = c45.Run(inputs, outputs);
        }
Exemplo n.º 10
0
        // Create decision tree
        protected DecisionTree CreateDecisionTree(Codification codification)
        {
            int lastIndex = codification.Columns.Count - 1;

            List<DecisionVariable> attributes = new List<DecisionVariable>();

            for (int indexColumn = 0; indexColumn < lastIndex; indexColumn++)
            {
                attributes.Add(new DecisionVariable(codification.Columns[indexColumn].ColumnName,
                    codification[indexColumn].Symbols));
            }

            return new DecisionTree(attributes.ToArray(), 2);
        }
Exemplo n.º 11
0
        public static void TestAccord()
        {
            /*
             * http://crsouza.com/2012/01/decision-trees-in-c/
             * */


            DataTable data = new DataTable("Memory");

            /*add People names/ID to columns dynamically*/
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            /*possibly add sentences to this?
             * maybe keywords*/
            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            /* NO IDEA FOR THIS */
            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", 3),                     // 3 possible values (Sunny, overcast, rain)
                                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)  
                                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)   
                                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)
                 
            };


            /* For possible values, make it one so it narrows to one individual fact about a word*/
            int classCount = 2; // 2 possible output values for playing tennis: yes or no


            DecisionTree tree = new DecisionTree(attributes, classCount);

            // Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs = symbols.ToIntArray("PlayTennis").GetColumn(0);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            /*This is how we will query the memory*/
            int[] query  = codebook.Translate("Sunny", "Hot", "High", "Strong");
            int   output = tree.Compute(query);


            /*Respond to user*/
            string answer = codebook.Translate("PlayTennis", output); // answer will be "No".

            Console.WriteLine(answer);
        }
        private string C45(DataTable tbl)
        {
            int          classCount = 2;
            Codification codebook   = new Codification(tbl);


            DecisionVariable[] attributes =
            {
                new DecisionVariable("Clump Thickness",         10),
                new DecisionVariable("Uniformity of Cell Size", 10),new DecisionVariable("Uniformity of Cell Shape",     10),
                new DecisionVariable("Marginal Adhesion",       10),new DecisionVariable("Single Epithelial Cell Size",  10),
                new DecisionVariable("Bare Nuclei",             10),new DecisionVariable("Bland Chromatin",              10),
                new DecisionVariable("Normal Nucleoli",         10),new DecisionVariable("Mitoses",                      10),
            };



            DecisionTree tree = new DecisionTree(attributes, classCount);
            // ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs  = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int[]      outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
            // id3learning.Run(inputs, outputs);
            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);


            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[5]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func <double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[5]);


            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                                             inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int    output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);

            return(answer);

            // throw new NotImplementedException();
        }
Exemplo n.º 13
0
        public void TranslateTest3()
        {
            string[] colNames = { "col1", "col2", "col3" };
            DataTable table = new DataTable("TranslateTest1 Table");
            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);
            Matrix.IsEqual(new int[] { 0, 0, 0 }, codeBook.Translate(new[] { "1", "2", "3" }));
            Matrix.IsEqual(new int[] { 0, 1, 1 }, codeBook.Translate(new[] { "1", "3", "5" }));
            Matrix.IsEqual(new int[] { 0, 2, 2 }, codeBook.Translate(new[] { "1", "4", "7" }));
            Matrix.IsEqual(new int[] { 1, 2, 3 }, codeBook.Translate(new[] { "2", "4", "6" }));
            Matrix.IsEqual(new int[] { 1, 3, 4 }, codeBook.Translate(new[] { "2", "5", "8" }));
            Matrix.IsEqual(new int[] { 1, 4, 5 }, codeBook.Translate(new[] { "2", "6", "10" }));
            Matrix.IsEqual(new int[] { 2, 2, 1 }, codeBook.Translate(new[] { "3", "4", "5" }));
            Matrix.IsEqual(new int[] { 2, 3, 2 }, codeBook.Translate(new[] { "3", "5", "7" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            Matrix.IsEqual(new int[] { 2 }, codeBook.Translate(new[] { "3" }));
            Matrix.IsEqual(new int[] { 2, 4 }, codeBook.Translate(new[] { "3", "6" }));
            Matrix.IsEqual(new int[] { 2, 4, 6 }, codeBook.Translate(new[] { "3", "6", "9" }));

            bool thrown = false;

            try { codeBook.Translate(new[] { "3", "6", "9", "10" }); }
            catch (Exception) { thrown = true; }

            Assert.IsTrue(thrown);
        }
Exemplo n.º 14
0
        public Data.DecisionTree runDecisionSupport(int codTask, int totalExecutions, int classCount)
        {
            Data.DecisionTree decisionTree = new Data.DecisionTree();

            // Utilizado se já existe uma árvore de decisão prévia criada, serve para reutilização de alguns parâmetros
            Data.DecisionTree serializedTree = null;

            // Verifica se já existe uma árvore gerada anteriormente
            bool hasSerializedTree = decisionTree.Serialization.hasSerializedTree(codTask);

            // Carregar a árvore existente
            if (hasSerializedTree)
            {
                serializedTree = decisionTree.deserializeTree(codTask);
            }

            // Número total de execuções da tarefa
            //int totalExecutions = getTotalExecutions(codTask);

            Dictionary <dynamic, int> fieldSymbols = new Dictionary <dynamic, int>();

            // Preenche o dicionário com a lista de campos do processo e a quantidade de símbolos para cada campo
            fillSymbolsCount(codTask, ref fieldSymbols);

            // Preenche a relevância dos campos de acordo com a taxa de variação e de nulos
            setSymbolsRelevance(ref fieldSymbols, totalExecutions, codTask);

            StringBuilder CodFieldListSB       = new StringBuilder();
            StringBuilder CodFieldListSBIsNull = new StringBuilder();
            StringBuilder DsFieldNameList      = new StringBuilder();
            List <string> codFieldList         = new List <string>();

            // Prepara lista de campos
            List <string> codFieldListComDsFlowResult = prepareFieldList(ref fieldSymbols, ref CodFieldListSB, ref CodFieldListSBIsNull, ref DsFieldNameList, ref codFieldList);

            // Consulta os dados de treino
            DataTable data = getData(codTask, CodFieldListSB, CodFieldListSBIsNull);

            decisionTree.Data = data;

            // Criando o objeto TRAINING
            decisionTree.Training.setTrainingData(data);

            // Preenchendo o VALIDATION do DecisionTree
            decisionTree.Validation.setValidationData(data);

            // ############################################# Passado para a classe Training.

            // Passando o data para criação do codebook
            // Converte em números inteiros as strings
            Codification codebook = new Codification(data, codFieldListComDsFlowResult.ToArray());

            // Montando a tabela de variáveis de decisão
            List <DecisionVariable> decisionaVariableList = new List <DecisionVariable>();

            foreach (KeyValuePair <dynamic, int> entry in fieldSymbols.Where(p => ((DecisionSupportField)p.Key).relevante == true))
            {
                DecisionSupportField c = entry.Key;
                c.simbolos = entry.Value;
                decisionaVariableList.Add(new DecisionVariable(c.codigo.ToString(), c.simbolos + 1)); // Adicionando + 1 do possível nulo
            }

            int qtdCamposRelevantes = fieldSymbols.Count(i => ((DecisionSupportField)i.Key).relevante == true);

            DecisionVariable[] attributes = new DecisionVariable[qtdCamposRelevantes];

            // Tabela de variáveis que impactam na decisão
            attributes = decisionaVariableList.ToArray();

            //List<string> DsFlowResults = getTaskResults(codTask);

            // Número de possíveis resultados para a tarefa sendo analisada
            //int classCount = DsFlowResults.Count;

            // Cria a árvore de decisão
            Accord.MachineLearning.DecisionTrees.DecisionTree tree = new Accord.MachineLearning.DecisionTrees.DecisionTree(attributes, classCount);

            // Cria uma instância do algoritmo de aprendizado utilizado, o ID3
            ID3Learning id3learning = new ID3Learning(tree);

            // Traduz os dados de treino em simbolos inteiros utilizando o codebook
            DataTable symbols = codebook.Apply(decisionTree.Training.TrainingData);

            // Colunas de entrada
            // *** Quantidade de colunas dos inputs deve ser o mesmo número de DecisionVariables(attributes)
            int[][] inputs = symbols.ToArray <int>(codFieldList.ToArray());

            // Coluna com a saída
            int[] outputs = symbols.ToArray <int>("DsFlowResult");

            // Aprendendo com as instâncias de treino
            id3learning.Run(inputs, outputs);

            // ############################################# Passado para a classe Training.

            decisionTree.Tree = tree;

            // Atribuindo as listas de códigos de campos e nomes ao objeto decisionTree
            // Utilizado para manter o uso da palavra chave 'ref'
            decisionTree.CodFieldListSB              = CodFieldListSB;
            decisionTree.CodFieldListSBIsNull        = CodFieldListSBIsNull;
            decisionTree.DsFieldNameList             = DsFieldNameList;
            decisionTree.codFieldList                = codFieldList;
            decisionTree.codFieldListComDsFlowResult = codFieldListComDsFlowResult;
            decisionTree.fieldSymbols                = fieldSymbols;
            decisionTree.Codebook = codebook;

            decisionTree.serializeTree(codTask);

            return(decisionTree);
        }
Exemplo n.º 15
0
        public void ComputeTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            GeneralDiscreteDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
                new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new GeneralDiscreteDistribution(codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<GeneralDiscreteDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Exemplo n.º 16
0
 /// <summary>
 ///   Returns a <see cref="System.String"/> that represents this instance.
 /// </summary>
 ///
 /// <returns>
 ///   A <see cref="System.String"/> that represents this instance.
 /// </returns>
 ///
 public string ToString(Codification codebook, CultureInfo cultureInfo)
 {
     return(toString(codebook, cultureInfo));
 }
Exemplo n.º 17
0
 /// <summary>
 ///   Returns a <see cref="System.String"/> that represents this instance.
 /// </summary>
 ///
 /// <returns>
 ///   A <see cref="System.String"/> that represents this instance.
 /// </returns>
 ///
 public string ToString(Codification codebook)
 {
     return(toString(codebook, CultureInfo.CurrentUICulture));
 }
Exemplo n.º 18
0
        private static DecisionTree createTree(out double[][] inputs, out int[] outputs)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };


            string outputColumn = "output";


            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);


            Codification codebook = new Codification(table);


            DataTable symbols = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);

            Assert.AreEqual(12960, inputs.Rows());
            Assert.AreEqual(8, inputs.Columns());
            Assert.AreEqual(12960, outputs.Length);
            Assert.AreEqual(4, outputs.Max());
            Assert.AreEqual(0, outputs.Min());
            Assert.AreEqual(5, outputs.DistinctCount());


            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);

            Assert.AreEqual(8, tree.NumberOfInputs);
            Assert.AreEqual(5, tree.NumberOfOutputs);

            C45Learning c45 = new C45Learning(tree);

            double error = c45.Run(inputs, outputs);

            Assert.AreEqual(8, tree.Attributes.Count);
            for (int i = 0; i < tree.Attributes.Count; i++)
            {
                Assert.AreEqual(tree.Attributes[i].Nature, DecisionVariableKind.Discrete);
                Assert.AreEqual(tree.Attributes[i].Range.Min, 0);
            }

            Assert.AreEqual(tree.Attributes[0].Name, "parents");
            Assert.AreEqual(tree.Attributes[0].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[1].Name, "has_nurs");
            Assert.AreEqual(tree.Attributes[1].Range.Max, 4);
            Assert.AreEqual(tree.Attributes[2].Name, "form");
            Assert.AreEqual(tree.Attributes[2].Range.Max, 3);
            Assert.AreEqual(tree.Attributes[3].Name, "children");
            Assert.AreEqual(tree.Attributes[3].Range.Max, 3);
            Assert.AreEqual(tree.Attributes[4].Name, "housing");
            Assert.AreEqual(tree.Attributes[4].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[5].Name, "finance");
            Assert.AreEqual(tree.Attributes[5].Range.Max, 1);
            Assert.AreEqual(tree.Attributes[6].Name, "social");
            Assert.AreEqual(tree.Attributes[6].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[7].Name, "health");
            Assert.AreEqual(tree.Attributes[7].Range.Max, 2);


            Assert.AreEqual(8, tree.NumberOfInputs);
            Assert.AreEqual(5, tree.NumberOfOutputs);
            Assert.AreEqual(0, error);

            return(tree);
        }
Exemplo n.º 19
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][]   inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]     outputs = symbols.ToArray <int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
Exemplo n.º 20
0
        public void AnalyzeExample1()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            CsvReader reader = CsvReader.FromText(Properties.Resources.hsbdemo, hasHeaders: true);

            var table = reader.ToTable();

            var codification = new Codification(table);

            codification["ses"].VariableType  = CodificationVariable.CategoricalWithBaseline;
            codification["prog"].VariableType = CodificationVariable.Categorical;
            codification["prog"].Remap("academic", 0);

            var inputs = codification.Apply(table, "ses", "write");
            var output = codification.Apply(table, "prog");


            // Get inputs
            string[] inputNames;
            var      inputsData = inputs.ToArray(out inputNames);

            // Get outputs
            string[] outputNames;
            var      outputData = output.ToArray(out outputNames);


            var analysis = new MultinomialLogisticRegressionAnalysis(inputsData, outputData, inputNames, outputNames);

            analysis.Compute();

            Assert.AreEqual(9, analysis.Coefficients.Count);

            int i = 0;

            Assert.AreEqual("(baseline)", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: academic", analysis.Coefficients[i].Class);
            Assert.AreEqual(0, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.0302662690579185, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.083689163424126883, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.58217998138556049, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-1.1112048569892283, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.2715455854613191, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.13231057837059781, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(0.20451187629162043, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.93207938490449849, analysis.Coefficients[i].Value);
        }
Exemplo n.º 21
0
        public void Run()
        {
            Library.Overtake overtake;

            WriteLine("Decision Tree - C45 Learning");

            //Get amount of data the user wants the decision tree to train
            int trainAmount = GetUserInput("Amount of data to train");

            double[][] trainInputs  = new double[trainAmount][];
            int[]      trainOutputs = new int[trainAmount];

            //Get data from OvertakeAI program and insert it into train inputs and outputs arrays
            for (int i = 0; i < trainAmount; i++)
            {
                overtake = OvertakeData.GetData();

                trainInputs[i] = new double[3]
                {
                    overtake.InitialSeparationM,
                    overtake.OvertakingSpeedMPS,
                    overtake.OncomingSpeedMPS
                };

                trainOutputs[i] = ToInt32(overtake.Success);
            }

            //Train decison tree using C4.5 algorithm using the trainInputs and trainOutputs
            var          learningAlgorithm = new C45Learning();
            DecisionTree tree = learningAlgorithm.Learn(trainInputs, trainOutputs);

            //Get the amount of data the user wants to predict against the decision tree
            int testAmount = GetUserInput("Amount of data to predict");

            double[] testInputs;
            int      outcomeIndex;
            string   actualOutcome;
            var      scoreCard = new List <bool>();

            string[] possibleOutcomes = { "Won't Pass", "Will Pass" };
            string   predictedOutcome;

            WriteLine($"\n{"Initial Seperation (m)",21}" +
                      $"{"Overtaking Speed (m/s)",28}" +
                      $"{"Oncoming Speed (m/s)",26}" +
                      $"{"Outcome",14}" +
                      $"{"Prediction",17}");

            //Loop for amount of times that want to be predicted
            for (int i = 0; i < testAmount; i++)
            {
                //Get the data from OvertakeAI
                overtake = OvertakeData.GetData();

                testInputs = new double[3] {
                    overtake.InitialSeparationM,
                    overtake.OvertakingSpeedMPS,
                    overtake.OncomingSpeedMPS
                };

                actualOutcome = overtake.Success ? "Will Pass" : "Won't Pass";

                //Preict the result using the decision tree
                outcomeIndex = tree.Decide(testInputs);

                //Compare actual outcome to the predicted outcome
                scoreCard.Add(actualOutcome == possibleOutcomes[outcomeIndex]);

                //Print out the data
                predictedOutcome = scoreCard[i] ? "Correct" : "Incorrect";
                WriteLine($"{Round(testInputs[0], 2).ToString("F"),14}" +
                          $"{Round(testInputs[1], 2).ToString("F"),27}" +
                          $"{Round(testInputs[2], 2).ToString("F"),27}" +
                          $"{actualOutcome,22}" +
                          $"{predictedOutcome,17}");
            }

            //Count amount of correct values in score card to show accuracy percentage
            WriteLine($"\nAccuracy: {Round((scoreCard.Count(x => x) / ToDouble(scoreCard.Count)) * 100, 2)}%");

            //Get the training error of the decision tree
            int[]  predicted = tree.Decide(trainInputs);
            double error     = new ZeroOneLoss(trainOutputs).Loss(predicted);

            WriteLine($"Training Error: {Round(error, 2)}\n");

            //Print out the rules that the decision tree came up with
            WriteLine("Decision Tree Rules:");
            DecisionSet rules        = tree.ToRules();
            var         codebook     = new Codification("Possible Results", possibleOutcomes);
            var         encodedRules = rules.ToString(codebook, "Possible Results", CultureInfo.InvariantCulture);

            WriteLine($"{encodedRules}");
        }
Exemplo n.º 22
0
        public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);
            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Exemplo n.º 23
0
 /// <summary>
 ///   Returns a <see cref="System.String"/> that represents this instance.
 /// </summary>
 /// 
 /// <returns>
 ///   A <see cref="System.String"/> that represents this instance.
 /// </returns>
 /// 
 public string ToString(Codification codebook)
 {
     return toString(codebook);
 }
Exemplo n.º 24
0
        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", codebook["Humidity"].Symbols),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            Assert.AreEqual(0, error);


            {
                int[] query = codebook.Translate("Sunny", "Hot", "High", "Strong");

                int output = tree.Compute(query);

                string answer = codebook.Translate("PlayTennis", output);

                Assert.AreEqual("No", answer);
            }


            foreach (DataRow row in data.Rows)
            {
                var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind");

                int y = tree.Compute(x);

                string actual   = codebook.Translate("PlayTennis", y);
                string expected = row["PlayTennis"] as string;

                Assert.AreEqual(expected, actual);
            }

            {
                string answer = codebook.Translate("PlayTennis",
                                                   tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

                Assert.AreEqual("No", answer);
            }
        }
Exemplo n.º 25
0
        public void SerializationTest()
        {
            string[] names = { "child", "adult", "elder" };

            Codification codebook = new Codification("Label", names);

            byte[] bytes = codebook.Save();

            Codification reloaded = Serializer.Load<Codification>(bytes);

            Assert.AreEqual(codebook.Active, reloaded.Active);
            Assert.AreEqual(codebook.Columns.Count, reloaded.Columns.Count);
            Assert.AreEqual(codebook.Columns[0].ColumnName, reloaded.Columns[0].ColumnName);

            Assert.AreEqual(0, reloaded.Translate("Label", "child"));
            Assert.AreEqual(1, reloaded.Translate("Label", "adult"));
            Assert.AreEqual(2, reloaded.Translate("Label", "elder"));
            Assert.AreEqual("child", reloaded.Translate("Label", 0));
            Assert.AreEqual("adult", reloaded.Translate("Label", 1));
            Assert.AreEqual("elder", reloaded.Translate("Label", 2));
        }
Exemplo n.º 26
0
        public void IncompleteDiscreteVariableTest()
        {
            DecisionTree tree;

            int[][] inputs;
            int[]   outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols + 200),   // 203 possible values, 200 undefined
                new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", codebook["Humidity"].Symbols),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            Assert.AreEqual(0, error);

            Assert.AreEqual(203, tree.Root.Branches.Count);
            Assert.IsTrue(tree.Root.Branches[100].IsLeaf);
            Assert.IsNull(tree.Root.Branches[100].Output);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
Exemplo n.º 27
0
        public void ApplyTest4()
        {
            string path = @"Resources\intrusion.xls";

            ExcelReader db = new ExcelReader(path, false, true);

            DataTable table = db.GetWorksheet("test");

            Codification codebook = new Codification(table);

            DataTable result = codebook.Apply(table);

            Assert.IsNotNull(result);

            foreach (DataColumn col in result.Columns)
                Assert.AreNotEqual(col.DataType, typeof(string));

            Assert.IsTrue(result.Rows.Count > 0);
        }
Exemplo n.º 28
0
        public void learn_doc2()
        {
            #region doc_learn_mitchell
            // In this example, we will be using the famous Play Tennis example by Tom Mitchell (1998).
            // In Mitchell's example, one would like to infer if a person would play tennis or not
            // based solely on four input variables. Those variables are all categorical, meaning that
            // there is no order between the possible values for the variable (i.e. there is no order
            // relationship between Sunny and Rain, one is not bigger nor smaller than the other, but are
            // just distinct). Moreover, the rows, or instances presented above represent days on which the
            // behavior of the person has been registered and annotated, pretty much building our set of
            // observation instances for learning:

            // Note: this example uses DataTables to represent the input data , but this is not required.
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // In order to try to learn a decision tree, we will first convert this problem to a more simpler
            // representation. Since all variables are categories, it does not matter if they are represented
            // as strings, or numbers, since both are just symbols for the event they represent. Since numbers
            // are more easily representable than text string, we will convert the problem to use a discrete
            // alphabet through the use of a Accord.Statistics.Filters.Codification codebook.</para>

            // A codebook effectively transforms any distinct possible value for a variable into an integer
            // symbol. For example, “Sunny” could as well be represented by the integer label 0, “Overcast”
            // by “1”, Rain by “2”, and the same goes by for the other variables. So:</para>

            // Create a new codification codebook to
            // convert strings into integer symbols
            var codebook = new Codification(data);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);
            int[][]   inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]     outputs = symbols.ToArray <int>("PlayTennis");

            // For this task, in which we have only categorical variables, the simplest choice
            // to induce a decision tree is to use the ID3 algorithm by Quinlan. Let’s do it:

            // Create a teacher ID3 algorithm
            var id3learning = new ID3Learning()
            {
                // Now that we already have our learning input/ouput pairs, we should specify our
                // decision tree. We will be trying to build a tree to predict the last column, entitled
                // “PlayTennis”. For this, we will be using the “Outlook”, “Temperature”, “Humidity” and
                // “Wind” as predictors (variables which will we will use for our decision). Since those
                // are categorical, we must specify, at the moment of creation of our tree, the
                // characteristics of each of those variables. So:

                new DecisionVariable("Outlook", 3),     // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)
                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)

                // Note: It is also possible to create a DecisionVariable[] from a codebook:
                // DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook);
            };

            // Learn the training instances!
            DecisionTree tree = id3learning.Learn(inputs, outputs);

            // Compute the training error when predicting training instances
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // The tree can now be queried for new examples through
            // its decide method. For example, we can create a query

            int[] query = codebook.Transform(new[, ]
            {
                { "Outlook", "Sunny" },
                { "Temperature", "Hot" },
                { "Humidity", "High" },
                { "Wind", "Strong" }
            });

            // And then predict the label using
            int predicted = tree.Decide(query);  // result will be 0

            // We can translate it back to strings using
            string answer = codebook.Revert("PlayTennis", predicted); // Answer will be: "No"
            #endregion

            Assert.AreEqual(0, predicted);
            Assert.AreEqual("No", answer);
            Assert.AreEqual(0, error);
        }
Exemplo n.º 29
0
        public void GenerateTest2()
        {
            Accord.Math.Tools.SetupGenerator(42);

            // Consider some phrases:
            //
            string[][] phrases =
            {
                new[] { "those", "are", "sample", "words", "from", "a", "dictionary" },
                new[] { "those", "are", "sample", "words" },
                new[] { "sample", "words", "are", "words" },
                new[] { "those", "words" },
                new[] { "those", "are", "words" },
                new[] { "words", "from", "a", "dictionary" },
                new[] { "those", "are", "words", "from", "a", "dictionary" }
            };

            // Let's begin by transforming them to sequence of
            // integer labels using a codification codebook:
            var codebook = new Codification("Words", phrases);

            // Now we can create the training data for the models:
            int[][] sequence = codebook.Translate("Words", phrases);

            // To create the models, we will specify a forward topology,
            // as the sequences have definite start and ending points.
            //
            var topology = new Forward(states: 4);
            int symbols = codebook["Words"].Symbols; // We have 7 different words

            // Create the hidden Markov model
            HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols);

            // Create the learning algorithm
            BaumWelchLearning teacher = new BaumWelchLearning(hmm);

            // Teach the model about the phrases
            double error = teacher.Run(sequence);

            // Now, we can ask the model to generate new samples
            // from the word distributions it has just learned:
            //
            int[] sample = hmm.Generate(3);

            // And the result will be: "those", "are", "words".
            string[] result = codebook.Translate("Words", sample);

            Assert.AreEqual("those", result[0]);
            Assert.AreEqual("are", result[1]);
            Assert.AreEqual("words", result[2]);
        }
        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Revenue");
            data.Rows.Add("D1", 0, 8, "Sunny", 101.2);
            data.Rows.Add("D2", 1, 10, "Sunny", 24.1);
            data.Rows.Add("D3", 2, 10, "Rain", 107);
            data.Rows.Add("D4", 3, 16, "Rain", 223);
            data.Rows.Add("D5", 4, 15, "Rain", 1);
            data.Rows.Add("D6", 5, 20, "Rain", 42);
            data.Rows.Add("D7", 6, 12, "Cloudy", 123);
            data.Rows.Add("D8", 7, 12, "Sunny", 64);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Revenue").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a linear regression model for this data, there are two possible
            // ways depending on which aspect of the linear regression we are interested the most. If we
            // are interested in interpreting the linear regression, performing hypothesis tests with the
            // coefficients and performing an actual _linear regression analysis_, then we can use the
            // MultipleLinearRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // MultipleLinearRegression and OrdinaryLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the MultipleLinearRegression class.

            // We can create a new multiple linear analysis for the variables
            var mlra = new MultipleLinearRegressionAnalysis(intercept: true)
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // To overcome linear dependency errors
            mlra.OrdinaryLeastSquares.IsRobust = true;

            // Compute the analysis and obtain the estimated regression
            MultipleLinearRegression regression = mlra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = mlra.Transform(inputs[0]); // result will be ~72.3

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int    inputCount       = mlra.NumberOfInputs;    // should be 4
            int    outputCount      = mlra.NumberOfOutputs;   // should be 1
            double r2               = mlra.RSquared;          // should be 0.12801838425195311
            AnovaSourceCollection a = mlra.Table;             // ANOVA table (bind to a visual control for quick inspection)
            double[][]            h = mlra.InformationMatrix; // should contain Fisher's information matrix for the problem
            ZTest z = mlra.ZTest;                             // should be 0 (p=0.999, non-significant)
            #endregion

            Assert.AreEqual(72.279574468085144d, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(0.12801838425195311, r2, 1e-8);
            Assert.AreEqual(0.11010987669344097, a[0].Statistic, 1e-8);

            string     str       = h.ToCSharp();
            double[][] expectedH = new double[][]
            {
                new double[] { 0.442293243337911, -0.069833718526197, -0.228692384542512, -0.0141758263063635, 0.143767140269202 },
                new double[] { -0.0698337185261971, 0.717811616891116, -0.112258662892007, -0.0655549422852099, 0.535719235472913 },
                new double[] { -0.228692384542512, -0.112258662892007, 0.717434922237013, -0.0232803210243207, 0.376483874802496 },
                new double[] { -0.0141758263063635, -0.0655549422852099, -0.0232803210243207, 0.0370082984668314, -0.103011089615894 },
                new double[] { 0.143767140269202, 0.535719235472913, 0.376483874802496, -0.103011089615894, 1.05597025054461 }
            };

            Assert.IsTrue(expectedH.IsEqual(h, 1e-8));
            Assert.AreEqual(0, z.Statistic, 1e-8);
            Assert.AreEqual(1, z.PValue, 1e-8);
        }
Exemplo n.º 31
0
        private void button_Click(object sender, RoutedEventArgs e)
        {
            //Decided against the Iris set which is in Accord
            //var iris = new Iris();
            //double[][] inputs = iris.Instances;
            //int[] outputs = iris.ClassLabels;

            string[][] data = DataSet.CustomIris.iris_values.Split(new[] { "\r\n" },
                                                                   StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            //features
            double[][] inputs = data.GetColumns(0, 1, 2, 3).To <double[][]>();

            //labels
            string[] labels = data.GetColumn(4);

            //Codebook translates any input into usable (integers) for the tree
            //var codebook = new Codification(outputs, inputs);
            var cb = new Codification("Output", labels);

            int[] outputs = cb.Transform("Output", labels);

            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };

            var decisionTree = new DecisionTree(inputs: features, classes: 3);
            var c45learner   = new C45Learning(decisionTree);

            c45learner.Learn(inputs, outputs);

            int[] estimated = decisionTree.Decide(inputs);

            double error = new ZeroOneLoss(outputs).Loss(decisionTree.Decide(inputs));

            //Why rules?
            DecisionSet decisionSet = decisionTree.ToRules();

            string ruleText = decisionSet.ToString(cb, "Output",
                                                   System.Globalization.CultureInfo.InvariantCulture);

            //var tree = new DecisionTree(inputs: features, classes: 3);

            #region UI
            //Set ouput to UI
            tb_output.Text = ruleText;

            //Calculate the flowers and input to UI -> TODO Bindings
            var setosaCount     = 0;
            var versicolorCount = 0;
            var virginicaCount  = 0;

            for (int i = 0; i < estimated.Length; i++)
            {
                if (estimated[i] == 0)
                {
                    setosaCount++;
                }
                if (estimated[i] == 1)
                {
                    versicolorCount++;
                }
                if (estimated[i] == 2)
                {
                    virginicaCount++;
                }
            }

            tb_setosa.Text = setosaCount.ToString();
            tb_versi.Text  = versicolorCount.ToString();
            tb_virgi.Text  = virginicaCount.ToString();
            #endregion UI
        }
Exemplo n.º 32
0
        public void LargeRunTest()
        {
            #region doc_nursery
            // Fix random seed for reproducibility
            Accord.Math.Random.Generator.Seed = 1;

            // This example uses the Nursery Database available from the University of
            // California Irvine repository of machine learning databases, available at
            //
            //   http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names
            //
            // The description paragraph is listed as follows.
            //
            //   Nursery Database was derived from a hierarchical decision model
            //   originally developed to rank applications for nursery schools. It
            //   was used during several years in 1980's when there was excessive
            //   enrollment to these schools in Ljubljana, Slovenia, and the
            //   rejected applications frequently needed an objective
            //   explanation. The final decision depended on three subproblems:
            //   occupation of parents and child's nursery, family structure and
            //   financial standing, and social and health picture of the family.
            //   The model was developed within expert system shell for decision
            //   making DEX (M. Bohanec, V. Rajkovic: Expert system for decision
            //   making. Sistemica 1(1), pp. 145-157, 1990.).
            //

            // Let's begin by loading the raw data. This string variable contains
            // the contents of the nursery.data file as a single, continuous text.
            //
            string nurseryData = Resources.nursery;

            // Those are the input columns available in the data
            //
            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };

            // And this is the output, the last column of the data.
            //
            string outputColumn = "output";


            // Let's populate a data table with this information.
            //
            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(table);

            // And then convert all data into symbols
            //
            DataTable  symbols = codebook.Apply(table);
            double[][] inputs  = symbols.ToArray(inputColumns);
            int[]      outputs = symbols.ToArray <int>(outputColumn);

            // From now on, we can start creating the decision tree.
            //
            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);

            // Now, let's create the forest learning algorithm
            var teacher = new RandomForestLearning(attributes)
            {
                NumberOfTrees = 1,
                SampleRatio   = 1.0
            };

            // Finally, learn a random forest from data
            var forest = teacher.Learn(inputs, outputs);

            // We can estimate class labels using
            int[] predicted = forest.Decide(inputs);

            // And the classification error (0) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs));
            #endregion

            Assert.AreEqual(0, error, 1e-10);
            Assert.IsTrue(outputs.IsEqual(predicted));

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);

            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = forest.Compute(inputs[i]);

                Assert.AreEqual(expected, actual);
            }
        }
Exemplo n.º 33
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;

            int[][] inputs;
            int[]   outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Hot", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Hot", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Hot", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 1 constant value (Hot)
                new DecisionVariable("Humidity", codebook["Humidity"].Symbols),       // 2 possible values (High, normal)
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)


            bool thrown = false;

            try
            {
                tree = new DecisionTree(attributes, classCount);
            }
            catch
            {
                thrown = true;
            }

            Assert.IsTrue(thrown);


            attributes[1] = new DecisionVariable("Temperature", 2);
            tree          = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
Exemplo n.º 34
0
        public static void Run()
        {
            // In this example, we will be using the famous Play Tennis example by Tom Mitchell(1998).
            // In Mitchell's example, one would like to infer if a person would play tennis or not
            // based solely on four input variables. Those variables are all categorical, meaning that
            // there is no order between the possible values for the variable (i.e. there is no order
            // relationship between Sunny and Rain, one is not bigger nor smaller than the other, but are
            // just distinct).

            // Note: this example uses DataTables to represent the input data , but this is not required.
            var example = "Overtake";

            Console.WriteLine(example);

            DataTable data = new DataTable(example);

            data.Columns.Add("Separation", typeof(String));
            data.Columns.Add("Speed", typeof(String));
            data.Columns.Add("OncomingSpeed", typeof(String));
            data.Columns.Add("Result", typeof(String));
            var shuffledInputs = GetInputs(100);

            for (int index = 0; index < shuffledInputs.Length; index++)
            {
                data.Rows.Add(shuffledInputs[index][0], shuffledInputs[index][1], shuffledInputs[index][2], shuffledInputs[index][3]);
            }



            // In order to try to learn a decision tree, we will first convert this problem to a more simpler
            // representation. Since all variables are categories, it does not matter if they are represented
            // as strings, or numbers, since both are just symbols for the event they represent. Since numbers
            // are more easily representable than text string, we will convert the problem to use a discrete
            // alphabet through the use of a Accord.Statistics.Filters.Codification codebook.</para>

            // A codebook effectively transforms any distinct possible value for a variable into an integer
            // symbol. For example, “Sunny” could as well be represented by the integer label 0, “Overcast”
            // by “1”, Rain by “2”, and the same goes by for the other variables. So:</para>

            // Create a new codification codebook to convert strings into integer symbols
            var codebook = new Codification(data);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][]  inputs     = symbols.ToJagged <int>(new string[] { "Separation", "Speed", "OncomingSpeed", "Result" });
            int[]    outputs    = symbols.ToArray <int>("Overtake");
            string[] classNames = new string[] { "success", "fail" };

            // For this task, in which we have only categorical variables, the simplest choice
            // to induce a decision tree is to use the ID3 algorithm by Quinlan. Let’s do it:

            // Create an ID3 algorithm
            var id3learning = new ID3Learning()
            {
                // Now that we already have our learning input/ouput pairs, we should specify our
                // decision tree. We will be trying to build a tree to predict the last column, entitled
                // “PlayTennis”. For this, we will be using the “Outlook”, “Temperature”, “Humidity” and
                // “Wind” as predictors (variables which will we will use for our decision). Since those
                // are categorical, we must specify, at the moment of creation of our tree, the
                // characteristics of each of those variables. So:

                new DecisionVariable("Separation", 150),    // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Speed", 150),         // 3 possible values (Hot, mild, cool)
                new DecisionVariable("OncomingSpeed", 150), // 2 possible values (High, normal)
                new DecisionVariable("Result", 2)           // 2 possible values (Weak, strong)
            };

            // Learn the training instances!
            DecisionTree tree = id3learning.Learn(inputs, outputs);

            // The tree can now be queried for new examples through
            // its Decide method. For example, we can create a query

            int[] query = codebook.Transform(new[, ]
            {
                { "Separation", "150" },
                { "Speed", "150" },
                { "OncomingSpeed", "150" },
                { "Result", "success" }
            });

            // And then predict the label using
            int predicted = tree.Decide(query);

            var answer = codebook.Revert("Overtake", predicted);

            Console.WriteLine("");

            Console.WriteLine(answer);

            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            Console.WriteLine($"{error * 100:F10}");

            DecisionSet rules        = tree.ToRules();
            var         encodedRules = rules.ToString();

            Console.WriteLine(encodedRules);



            Console.ReadKey(); // Keep the window open till a key is pressed
        }
Exemplo n.º 35
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;
            double[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 50, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 50, 90, "Weak", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Weak", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Weak", "Yes");
            data.Rows.Add("D8", "Sunny", 50, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Weak", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Weak", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 50, 80, "Weak", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),      // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Humidity",    DecisionVariableKind.Continuous), // continuous values
               new DecisionVariable("Wind",        codebook["Wind"].Symbols + 1)      // 1 possible value (Weak)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = c45.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
        static void Main(string[] args)
        {
            DataTable data = new DataTable("Should I Go To Work For Company X");

            data.Columns.Add("Scenario");
            data.Columns.Add("Pay");
            data.Columns.Add("Benefits");
            data.Columns.Add("Culture");
            data.Columns.Add("WorkFromHome");
            data.Columns.Add("ShouldITakeJob");

            data.Rows.Add("D1", "Good", "Good", "Mean", "Yes", "Yes");
            data.Rows.Add("D2", "Good", "Good", "Mean", "No", "Yes");
            data.Rows.Add("D3", "Average", "Good", "Good", "Yes", "Yes");
            data.Rows.Add("D4", "Average", "Good", "Good", "No", "Yes");
            data.Rows.Add("D5", "Bad", "Good", "Good", "Yes", "No");
            data.Rows.Add("D6", "Bad", "Good", "Good", "No", "No");
            data.Rows.Add("D7", "Good", "Average", "Mean", "Yes", "Yes");
            data.Rows.Add("D8", "Good", "Average", "Mean", "No", "Yes");
            data.Rows.Add("D9", "Average", "Average", "Good", "Yes", "No");
            data.Rows.Add("D10", "Average", "Average", "Good", "No", "No");
            data.Rows.Add("D11", "Bad", "Average", "Good", "Yes", "No");
            data.Rows.Add("D12", "Bad", "Average", "Good", "No", "No");
            data.Rows.Add("D13", "Good", "Bad", "Mean", "Yes", "Yes");
            data.Rows.Add("D14", "Good", "Bad", "Mean", "No", "Yes");
            data.Rows.Add("D15", "Average", "Bad", "Good", "Yes", "No");
            data.Rows.Add("D16", "Average", "Bad", "Good", "No", "No");
            data.Rows.Add("D17", "Bad", "Bad", "Good", "Yes", "No");
            data.Rows.Add("D18", "Bad", "Bad", "Good", "No", "No");
            data.Rows.Add("D19", "Good", "Good", "Good", "Yes", "Yes");
            data.Rows.Add("D20", "Good", "Good", "Good", "No", "Yes");


            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Pay",          3),
                new DecisionVariable("Benefits",     3),
                new DecisionVariable("Culture",      3),
                new DecisionVariable("WorkFromHome", 2)
            };

            int          outputValues = 2; // 2 possible output values: yes or no
            DecisionTree tree         = new DecisionTree(attributes, outputValues);
            ID3Learning  id3          = new ID3Learning(tree);

#pragma warning disable CS0618 // Type or member is obsolete
            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);
            int[][]   inputs  = symbols.ToArray <int>("Pay", "Benefits", $"Culture", "WorkFromHome");
            int[]     outputs = symbols.ToIntArray("ShouldITakeJob").GetColumn(0);

            // Learn the training instances!
            id3.Run(inputs, outputs);


            int[]  query  = codebook.Translate("D19", "Good", "Good", "Good", "Yes");
            int    output = tree.Compute(query);
            string answer = codebook.Translate("ShouldITakeJob", output); // answer will be "Yes".

#pragma warning restore CS0618                                            // Type or member is obsolete

            Console.WriteLine("Answer is: " + answer);
            Console.ReadKey();
        }
Exemplo n.º 37
0
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in 
            // which the task is to classify weather the features of an Iris flower 
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);
            
            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Exemplo n.º 38
0
        //El ejemplo planteaba para solamente categóricos, revisar como funcionaria añadiendole los numéricos
        // o si funciona igual de bien
        public void TrainTree(DataTable data)
        {
            //Codebook convierte los datos de texto a labels numéricos (esto **podria** molestar con numéricos)
            codeBook = new Codification(data, "school", "sex", "address", "famsize", "Pstatus", "Mjob", "Fjob", "reason", "guardian",
                                        "schoolsup", "famsup", "paid", "activities", "nursery", "higher", "internet", "romantic");

            DataTable convertedData = codeBook.Apply(data);

            String[] headers = new string[32];
            for (int i = 0; i < 32; i++)
            {
                headers[i] = data.Columns[i].ColumnName;
            }

            setHeaders(headers);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("school",                                   2),
                new DecisionVariable("sex",                                      2),
                new DecisionVariable("age",        DecisionVariableKind.Continuous),// revisar i=2 || i=6 || i=7 || (i>=12 && i<=14) || i>=23
                new DecisionVariable("address",                                  2),
                new DecisionVariable("famsize",                                  2),
                new DecisionVariable("Pstatus",                                  2),
                new DecisionVariable("Medu",       DecisionVariableKind.Continuous),
                new DecisionVariable("Fedu",       DecisionVariableKind.Continuous),
                new DecisionVariable("Mjob",                                     5),
                new DecisionVariable("Fjob",                                     5),
                new DecisionVariable("reason",                                   4),
                new DecisionVariable("guardian",                                 3),
                new DecisionVariable("traveltime", DecisionVariableKind.Continuous),
                new DecisionVariable("studytime",  DecisionVariableKind.Continuous),
                new DecisionVariable("failures",   DecisionVariableKind.Continuous),
                new DecisionVariable("schoolsup",                                2),
                new DecisionVariable("famsup",                                   2),
                new DecisionVariable("paid",                                     2),
                new DecisionVariable("activities",                               2),
                new DecisionVariable("nursery",                                  2),
                new DecisionVariable("higher",                                   2),
                new DecisionVariable("internet",                                 2),
                new DecisionVariable("romantic",                                 2),
                new DecisionVariable("famrel",     DecisionVariableKind.Continuous),
                new DecisionVariable("freetime",   DecisionVariableKind.Continuous),
                new DecisionVariable("goout",      DecisionVariableKind.Continuous),
                new DecisionVariable("Dalc",       DecisionVariableKind.Continuous),
                new DecisionVariable("Walc",       DecisionVariableKind.Continuous),
                new DecisionVariable("health",     DecisionVariableKind.Continuous),
                new DecisionVariable("absences",   DecisionVariableKind.Continuous),
                new DecisionVariable("G1",         DecisionVariableKind.Continuous),
                new DecisionVariable("G2",         DecisionVariableKind.Continuous)
            };


            tree = new DecisionTree(attributes, 11);
            C45Learning c45 = new C45Learning(tree);

            //Convierte los valores traducidos a inputs y el output esperado.
            double[][] inputs  = convertedData.ToJagged(headers);
            int[]      outputs = convertedData.ToArray <int>("G3");



            //Entrenamiento del arbol
            c45.Learn(inputs, outputs);
        }
Exemplo n.º 39
0
        public void AttributeReuseTest1()
        {
            string[][] text = Resources.iris_data.Split(
                new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries)
                .Apply(x => x.Split(','));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            teacher.Join = 3;

            double error = teacher.Run(inputs, outputs);
            Assert.AreEqual(0.02, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.02, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                System.Globalization.CultureInfo.InvariantCulture);

            // TODO: implement this assertion properly, actually checking
            // the text contents once the feature is completely finished.
            Assert.AreEqual(600, ruleText.Length);
        }
Exemplo n.º 40
0
        public void learn_test()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            #region doc_learn_1
            // This example downloads an example dataset from the web and learns a multinomial logistic
            // regression on it. However, please keep in mind that the Multinomial Logistic Regression
            // can also work without many of the elements that will be shown below, like the codebook,
            // DataTables, and a CsvReader.

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                { "write", CodificationVariable.Continuous },
                { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } },
                { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } },
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble();

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis()
            {
                InputNames  = inputNames,
                OutputNames = outputNames,
            };

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 9
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 3
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            inputNames  = analysis.InputNames;                 // should be "write", "ses: middle", "ses: high"
            outputNames = analysis.OutputNames;                // should be "prog: academic", "prog: general", "prog: vocation"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=1.06300120956871E-08
            double        logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1));

            double acc   = cm.Accuracy; // should be 0.61
            double kappa = cm.Kappa;    // should be 0.2993487536492252
            #endregion


            Assert.AreEqual(9, coefficients);
            Assert.AreEqual(3, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(0.61, acc, 1e-10);
            Assert.AreEqual(0.2993487536492252, kappa, 1e-10);
            Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8);

            testmlr(analysis);
        }
Exemplo n.º 41
0
        private string toString(Codification codebook)
        {
            if (IsRoot)
                return "Root";

            String name = Owner.Attributes[Parent.Branches.AttributeIndex].Name;

            if (String.IsNullOrEmpty(name))
                name = "x" + Parent.Branches.AttributeIndex;

            String op = ComparisonExtensions.ToString(Comparison);

            String value;
            if (codebook != null && Value.HasValue && codebook.Columns.Contains(name))
                value = codebook.Translate(name, (int)Value.Value);

            else value = Value.ToString();


            return String.Format("{0} {1} {2}", name, op, value);
        }
Exemplo n.º 42
0
        public void IncompleteDiscreteVariableTest()
        {
            DecisionTree tree;
            int[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols+200), // 203 possible values, 200 undefined
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            Assert.AreEqual(203, tree.Root.Branches.Count);
            Assert.IsTrue(tree.Root.Branches[100].IsLeaf);
            Assert.IsNull(tree.Root.Branches[100].Output);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
Exemplo n.º 43
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), 
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
        public void ApplyTest2()
        {
            // Suppose we have a data table relating the age of
            // a person and its categorical classification, as 
            // in "child", "adult" or "elder".

            // The Codification filter is able to extract those
            // string labels and transform them into discrete
            // symbols, assigning integer labels to each of them
            // such as "child" = 0, "adult" = 1, and "elder" = 3.

            // Create the aforementioned sample table
            DataTable table = new DataTable("Sample data");
            table.Columns.Add("Age", typeof(int));
            table.Columns.Add("Label", typeof(string));

            //            age   label
            table.Rows.Add(10, "child");
            table.Rows.Add(07, "child");
            table.Rows.Add(04, "child");
            table.Rows.Add(21, "adult");
            table.Rows.Add(27, "adult");
            table.Rows.Add(12, "child");
            table.Rows.Add(79, "elder");
            table.Rows.Add(40, "adult");
            table.Rows.Add(30, "adult");


            // Now, let's say we need to translate those text labels
            // into integer symbols. Let's use a Codification filter:

            Codification codebook = new Codification(table);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"


            // We can also process an entire data table at once:
            DataTable result = codebook.Apply(table);

            // The resulting table can be transformed to jagged array:
            double[][] matrix = Matrix.ToArray(result);

            // and the resulting matrix will be given by
            string str = matrix.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // str == new double[][] 
            // {
            //     new double[] { 10, 0 },
            //     new double[] {  7, 0 },
            //     new double[] {  4, 0 },
            //     new double[] { 21, 1 },
            //     new double[] { 27, 1 },
            //     new double[] { 12, 0 },
            //     new double[] { 79, 2 },
            //     new double[] { 40, 1 },
            //     new double[] { 30, 1 } 
            // };



            // Now we will be able to feed this matrix to any machine learning
            // algorithm without having to worry about text labels in our data:

            int classes = codebook["Label"].Symbols; // 3 classes (child, adult, elder)

            // Use the first column as input variables,
            // and the second column as outputs classes
            //
            double[][] inputs = matrix.GetColumns(0);
            int[] outputs = matrix.GetColumn(1).ToInt32();


            // Create a multi-class SVM for 1 input (Age) and 3 classes (Label)
            var machine = new MulticlassSupportVectorMachine(inputs: 1, classes: classes);

            // Create a Multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning(machine, inputs, outputs);

            // Configure the learning algorithm to use SMO to train the
            //  underlying SVMs in each of the binary class subproblems.
            teacher.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                return new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                {
                    Complexity = 1
                };
            };

            // Run the learning algorithm
            double error = teacher.Run();


            // After we have learned the machine, we can use it to classify
            // new data points, and use the codebook to translate the machine
            // outputs to the original text labels:

            string result1 = codebook.Translate("Label", machine.Compute(10)); // child
            string result2 = codebook.Translate("Label", machine.Compute(40)); // adult
            string result3 = codebook.Translate("Label", machine.Compute(70)); // elder


            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);

            Assert.AreEqual("child", result1);
            Assert.AreEqual("adult", result2);
            Assert.AreEqual("elder", result3);

        }
Exemplo n.º 45
0
        public void LargeRunTest()
        {
            #region doc_nursery
            // This example uses the Nursery Database available from the University of
            // California Irvine repository of machine learning databases, available at
            //
            //   http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names
            //
            // The description paragraph is listed as follows.
            //
            //   Nursery Database was derived from a hierarchical decision model
            //   originally developed to rank applications for nursery schools. It
            //   was used during several years in 1980's when there was excessive
            //   enrollment to these schools in Ljubljana, Slovenia, and the
            //   rejected applications frequently needed an objective
            //   explanation. The final decision depended on three subproblems:
            //   occupation of parents and child's nursery, family structure and
            //   financial standing, and social and health picture of the family.
            //   The model was developed within expert system shell for decision
            //   making DEX (M. Bohanec, V. Rajkovic: Expert system for decision
            //   making. Sistemica 1(1), pp. 145-157, 1990.).
            //

            // Let's begin by loading the raw data. This string variable contains
            // the contents of the nursery.data file as a single, continuous text.
            //
            string nurseryData = Resources.nursery;

            // Those are the input columns available in the data
            //
            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };

            // And this is the output, the last column of the data.
            //
            string outputColumn = "output";


            // Let's populate a data table with this information.
            //
            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(table);

            // And then convert all data into symbols
            //
            DataTable  symbols = codebook.Apply(table);
            double[][] inputs  = symbols.ToArray(inputColumns);
            int[]      outputs = symbols.ToArray <int>(outputColumn);

            // We can either specify the decision attributes we want
            // manually, or we can ask the codebook to do it for us:
            DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, inputColumns);

            // Now, let's create the C4.5 algorithm:
            C45Learning c45 = new C45Learning(attributes);

            // and induce a decision tree from the data:
            DecisionTree tree = c45.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error (of 0.0) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Decide(inputs[25]); // should be 1
            #endregion

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);

            Assert.AreEqual(0, error);
            Assert.AreEqual(1, y);

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = tree.Compute(inputs[i]);

                Assert.AreEqual(expected, actual);
            }

#if !NET35
            #region doc_nursery_native
            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func <double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[25]);
            #endregion

            Assert.AreEqual(z, y);

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = func(inputs[i]);

                Assert.AreEqual(expected, actual);
            }
#endif
        }
Exemplo n.º 46
0
        public void learn_test_mitchell()
        {
            #region doc_mitchell_1
            // We will represent Mitchell's Tennis example using a DataTable. However,
            // the use of a DataTable is not required in order to use the Naive Bayes.
            // Please take a look at the other examples below for simpler approaches.
            DataTable data = new DataTable("Mitchell's Tennis Example");
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType    = typeof(double);
            // Add some data
            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
            #endregion

            #region doc_mitchell_2
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);
            #endregion

            #region doc_mitchell_3
            // Some distributions require constructor parameters, and as such, cannot
            // be automatically initialized by the learning algorithm. For this reason,
            // we might need to specify how each component should be initialized:
            IUnivariateFittableDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var learner = new NaiveBayesLearning <IUnivariateFittableDistribution>()
            {
                // Tell the learner how to initialize the distributions
                Distribution = (classIndex, variableIndex) => priors[variableIndex]
            };

            // Extract symbols from data and train the classifier
            DataTable  symbols = codebook.Apply(data);
            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Learn the Naive Bayes model
            var naiveBayes = learner.Learn(inputs, outputs);
            #endregion

            #region doc_mitchell_4
            // Create an instance representing a "sunny, cool, humid and windy day":
            double[] instance = new double[]
            {
                codebook.Translate(columnName: "Outlook", value: "Sunny"), //n 0
                12.0,
                90.0,
                codebook.Translate(columnName: "Wind", value: "Strong") // 1
            };

            // We can obtain a class prediction using
            int predicted = naiveBayes.Decide(instance);

            // Or compute probabilities of each class using
            double[] probabilities = naiveBayes.Probabilities(instance);

            // Or obtain the log-likelihood of prediction
            double ll = naiveBayes.LogLikelihood(instance);

            // Finally, the result can be translated back using
            string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, predicted);
            Assert.AreEqual(0.840, probabilities[0], 1e-3);
            Assert.AreEqual(-10.493243476691351, ll, 1e-6);
            Assert.AreEqual(1, probabilities.Sum(), 1e-10);
            Assert.AreEqual(2, probabilities.Length);
        }
Exemplo n.º 47
0
        public static DecisionTree createNurseryExample(out double[][] inputs, out int[] outputs, int first)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns = 
            {
                "parents", "has_nurs", "form", "children",
                "housing", "finance", "social", "health"
            };

            string outputColumn = "output";

            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
                table.Rows.Add(line.Split(','));

            Codification codebook = new Codification(table);
            DataTable symbols = codebook.Apply(table);
            inputs = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray<int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree = new DecisionTree(attributes, outputClasses: 5);

            C45Learning c45 = new C45Learning(tree);
            double error = c45.Run(inputs.Submatrix(first), outputs.Submatrix(first));

            Assert.AreEqual(0, error);

            return tree;
        }
Exemplo n.º 48
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;

            double[][] inputs;
            int[]      outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 50, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 50, 90, "Weak", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Weak", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Weak", "Yes");
            data.Rows.Add("D8", "Sunny", 50, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Weak", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Weak", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 50, 80, "Weak", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
                new DecisionVariable("Humidity", DecisionVariableKind.Continuous),    // continuous values
                new DecisionVariable("Wind", codebook["Wind"].Symbols + 1)            // 1 possible value (Weak)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = c45.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
Exemplo n.º 49
0
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;
            int[][] inputs;
            int[] outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Hot", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Hot", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Hot", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Hot", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 1 constant value (Hot)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)


            bool thrown = false;
            try
            {
                tree = new DecisionTree(attributes, classCount);
            }
            catch
            {
                thrown = true;
            }

            Assert.IsTrue(thrown);


            attributes[1] = new DecisionVariable("Temperature", 2);
            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
Exemplo n.º 50
0
        public void iris_new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { "\r\n" },
                                                        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);

            int[] outputs = codebook.Translate("Output", labels);

            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };

            var teacher = new C45Learning(features);

            var tree = teacher.Learn(inputs, outputs);

            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);


            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Exemplo n.º 51
0
        // 
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        //[ClassCleanup()]
        //public static void MyClassCleanup()
        //{
        //}
        //
        //Use TestInitialize to run code before running each test
        //[TestInitialize()]
        //public void MyTestInitialize()
        //{
        //}
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion


        public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
               new DecisionVariable("Outlook",     codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
               new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
               new DecisionVariable("Humidity",    codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
               new DecisionVariable("Wind",        codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            ID3Learning id3 = new ID3Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray<int>("PlayTennis");

            double error = id3.Run(inputs, outputs);
            Assert.AreEqual(0, error);


            foreach (DataRow row in data.Rows)
            {
                var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind");

                int y = tree.Compute(x);

                string actual = codebook.Translate("PlayTennis", y);
                string expected = row["PlayTennis"] as string;

                Assert.AreEqual(expected, actual);
            }

            {
                string answer = codebook.Translate("PlayTennis",
                    tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong")));

                Assert.AreEqual("No", answer);
            }
        }
Exemplo n.º 52
0
        public void new_method_create_tree()
        {
            #region doc_simplest
            // In this example, we will process the famous Fisher's Iris dataset in
            // which the task is to classify weather the features of an Iris flower
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { "\r\n" },
                                                        StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            C45Learning teacher = new C45Learning();

            // Finally induce the tree from the data:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error (of 0.0266) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";
            #endregion

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Exemplo n.º 53
0
        public void ApplyTest3()
        {
            string[] names = { "child", "adult", "elder" };

            Codification codebook = new Codification("Label", names);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"

            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);
        }
Exemplo n.º 54
0
        public void AttributeReuseTest1()
        {
            string[][] text = Resources.iris_data
                              .Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
                              .Apply(x => x.Split(','));

            Assert.AreEqual(150, text.Rows());
            Assert.AreEqual(5, text.Columns());
            Assert.AreEqual("Iris-setosa", text[0].Get(-1));
            Assert.AreEqual("Iris-virginica", text.Get(-1).Get(-1));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));
            }

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            teacher.Join = 3;

            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0.02, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.02, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                                             System.Globalization.CultureInfo.InvariantCulture);

            string expected = @"0 =: (petal length <= 2.45)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (petal length <= 4.95)
1 =: (petal length > 2.45) && (petal width > 1.75) && (petal length <= 4.85) && (sepal length <= 5.95)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
2 =: (petal length > 2.45) && (petal width > 1.75) && (petal length > 4.85)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (petal length > 4.95)
2 =: (petal length > 2.45) && (petal width > 1.75) && (petal length <= 4.85) && (sepal length > 5.95)
";

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(expected, ruleText);
        }
Exemplo n.º 55
0
        public void TranslateTest1()
        {
            string[] colNames = { "col1", "col2", "col3" };
            DataTable table = new DataTable("TranslateTest1 Table");
            table.Columns.Add(colNames);

            table.Rows.Add(1, 2, 3);
            table.Rows.Add(1, 3, 5);
            table.Rows.Add(1, 4, 7);
            table.Rows.Add(2, 4, 6);
            table.Rows.Add(2, 5, 8);
            table.Rows.Add(2, 6, 10);
            table.Rows.Add(3, 4, 5);
            table.Rows.Add(3, 5, 7);
            table.Rows.Add(3, 6, 9);

            // ok, so values 1,2,3 are in column 1
            // values 2,3,4,5,6 in column 2
            // values 3,5,6,7,8,9,10 in column 3
            var codeBook = new Codification(table);

            Assert.AreEqual(0, codeBook.Translate("col1", "1"));
            Assert.AreEqual(1, codeBook.Translate("col1", "2"));
            Assert.AreEqual(2, codeBook.Translate("col1", "3"));

            Assert.AreEqual(0, codeBook.Translate("col2", "2"));
            Assert.AreEqual(1, codeBook.Translate("col2", "3"));
            Assert.AreEqual(2, codeBook.Translate("col2", "4"));
            Assert.AreEqual(3, codeBook.Translate("col2", "5"));
            Assert.AreEqual(4, codeBook.Translate("col2", "6"));

            Assert.AreEqual(0, codeBook.Translate("col3", "3"));
            Assert.AreEqual(1, codeBook.Translate("col3", "5"));
            Assert.AreEqual(2, codeBook.Translate("col3", "7"));
            Assert.AreEqual(3, codeBook.Translate("col3", "6"));
            Assert.AreEqual(4, codeBook.Translate("col3", "8"));
            Assert.AreEqual(5, codeBook.Translate("col3", "10"));
            Assert.AreEqual(6, codeBook.Translate("col3", "9"));
        }
Exemplo n.º 56
0
        public void ApplyTest2()
        {
            // Suppose we have a data table relating the age of
            // a person and its categorical classification, as
            // in "child", "adult" or "elder".

            // The Codification filter is able to extract those
            // string labels and transform them into discrete
            // symbols, assigning integer labels to each of them
            // such as "child" = 0, "adult" = 1, and "elder" = 3.

            // Create the aforementioned sample table
            DataTable table = new DataTable("Sample data");

            table.Columns.Add("Age", typeof(int));
            table.Columns.Add("Label", typeof(string));

            //            age   label
            table.Rows.Add(10, "child");
            table.Rows.Add(07, "child");
            table.Rows.Add(04, "child");
            table.Rows.Add(21, "adult");
            table.Rows.Add(27, "adult");
            table.Rows.Add(12, "child");
            table.Rows.Add(79, "elder");
            table.Rows.Add(40, "adult");
            table.Rows.Add(30, "adult");


            // Now, let's say we need to translate those text labels
            // into integer symbols. Let's use a Codification filter:

            Codification codebook = new Codification(table);


            // After that, we can use the codebook to "translate"
            // the text labels into discrete symbols, such as:

            int a = codebook.Translate("Label", "child"); // returns 0
            int b = codebook.Translate("Label", "adult"); // returns 1
            int c = codebook.Translate("Label", "elder"); // returns 2

            // We can also do the reverse:
            string labela = codebook.Translate("Label", 0); // returns "child"
            string labelb = codebook.Translate("Label", 1); // returns "adult"
            string labelc = codebook.Translate("Label", 2); // returns "elder"


            // We can also process an entire data table at once:
            DataTable result = codebook.Apply(table);

            // The resulting table can be transformed to jagged array:
            double[][] matrix = Matrix.ToArray(result);

            // and the resulting matrix will be given by
            string str = matrix.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // str == new double[][]
            // {
            //     new double[] { 10, 0 },
            //     new double[] {  7, 0 },
            //     new double[] {  4, 0 },
            //     new double[] { 21, 1 },
            //     new double[] { 27, 1 },
            //     new double[] { 12, 0 },
            //     new double[] { 79, 2 },
            //     new double[] { 40, 1 },
            //     new double[] { 30, 1 }
            // };



            // Now we will be able to feed this matrix to any machine learning
            // algorithm without having to worry about text labels in our data:

            int classes = codebook["Label"].Symbols; // 3 classes (child, adult, elder)

            // Use the first column as input variables,
            // and the second column as outputs classes
            //
            double[][] inputs  = matrix.GetColumns(new[] { 0 });
            int[]      outputs = matrix.GetColumn(1).ToInt32();


            // Create a multi-class SVM for 1 input (Age) and 3 classes (Label)
            var machine = new MulticlassSupportVectorMachine(inputs: 1, classes: classes);

            // Create a Multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning(machine, inputs, outputs);

            // Configure the learning algorithm to use SMO to train the
            //  underlying SVMs in each of the binary class subproblems.
            teacher.Algorithm = (svm, classInputs, classOutputs, i, j) =>
            {
                return(new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                {
                    Complexity = 1
                });
            };

            // Run the learning algorithm
            double error = teacher.Run();


            // After we have learned the machine, we can use it to classify
            // new data points, and use the codebook to translate the machine
            // outputs to the original text labels:

            string result1 = codebook.Translate("Label", machine.Compute(10)); // child
            string result2 = codebook.Translate("Label", machine.Compute(40)); // adult
            string result3 = codebook.Translate("Label", machine.Compute(70)); // elder


            Assert.AreEqual(0, a);
            Assert.AreEqual(1, b);
            Assert.AreEqual(2, c);
            Assert.AreEqual("child", labela);
            Assert.AreEqual("adult", labelb);
            Assert.AreEqual("elder", labelc);

            Assert.AreEqual("child", result1);
            Assert.AreEqual("adult", result2);
            Assert.AreEqual("elder", result3);
        }
Exemplo n.º 57
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType    = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4;                              // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes <IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[]
            {
                codebook.Translate(columnName: "Outlook", value: "Sunny"),
                12.0,
                90.0,
                codebook.Translate(columnName: "Wind", value: "Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Exemplo n.º 58
0
        public void ComputeTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4;                              // 4 variables (Outlook, Temperature, Humidity, Wind)

            GeneralDiscreteDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
                new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new GeneralDiscreteDistribution(codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes <GeneralDiscreteDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
        public void AnalyzeExample1()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            CsvReader reader = CsvReader.FromText(Properties.Resources.hsbdemo, hasHeaders: true);

            var table = reader.ToTable();

            var codification = new Codification(table);
            codification["ses"].VariableType = CodificationVariable.CategoricalWithBaseline;
            codification["prog"].VariableType = CodificationVariable.Categorical;
            codification["prog"].Remap("academic", 0);

            var inputs = codification.Apply(table, "ses", "write");
            var output = codification.Apply(table, "prog");


            // Get inputs
            string[] inputNames;
            var inputsData = inputs.ToArray(out inputNames);

            // Get outputs
            string[] outputNames;
            var outputData = output.ToArray(out outputNames);


            var analysis = new MultinomialLogisticRegressionAnalysis(inputsData, outputData, inputNames, outputNames);

            analysis.Compute();

            Assert.AreEqual(9, analysis.Coefficients.Count);

            int i = 0;

            Assert.AreEqual("(baseline)", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: academic", analysis.Coefficients[i].Class);
            Assert.AreEqual(0, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.0302662690579185, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.083689163424126883, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.58217998138556049, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: general", analysis.Coefficients[i].Class);
            Assert.AreEqual(-1.1112048569892283, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("Intercept", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(1.2715455854613191, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("write", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.13231057837059781, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: middle", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(0.20451187629162043, analysis.Coefficients[i].Value);

            i++;
            Assert.AreEqual("ses: high", analysis.Coefficients[i].Name);
            Assert.AreEqual("prog: vocation", analysis.Coefficients[i].Class);
            Assert.AreEqual(-0.93207938490449849, analysis.Coefficients[i].Value);
        }
Exemplo n.º 60
0
 /// <summary>
 ///   Returns a <see cref="System.String"/> that represents this instance.
 /// </summary>
 ///
 /// <returns>
 ///   A <see cref="System.String"/> that represents this instance.
 /// </returns>
 ///
 public string ToString(Codification <string> codebook, string outputColumn, CultureInfo cultureInfo)
 {
     return(toString(codebook, outputColumn, CultureInfo.CurrentUICulture));
 }