コード例 #1
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void ConsistencyTest1()
        {
            double[,] random = Matrix.Random(1000, 10, 0.0, 1.0);

            double[][] samples = random.ToJagged();
            int[]      outputs = new int[1000];

            for (int i = 0; i < samples.Length; i++)
            {
                if (samples[i][0] > 0.8)
                {
                    outputs[i] = 1;
                }
            }

            DecisionVariable[] vars = new DecisionVariable[10];
            for (int i = 0; i < vars.Length; i++)
            {
                vars[i] = new DecisionVariable(i.ToString(), DecisionVariableKind.Continuous);
            }

            DecisionTree tree = new DecisionTree(vars, 2);

            C45Learning teacher = new C45Learning(tree);

            double error = teacher.Run(samples, outputs);

            Assert.AreEqual(0, error);

            Assert.AreEqual(2, tree.Root.Branches.Count);
            Assert.IsTrue(tree.Root.Branches[0].IsLeaf);
            Assert.IsTrue(tree.Root.Branches[1].IsLeaf);
        }
コード例 #2
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void LargeSampleTest2()
        {
            Accord.Math.Tools.SetupGenerator(0);

            double[][] dataSamples = Matrix.Random(500, 3, 0.0, 10.0).ToJagged();
            int[]      target      = Matrix.Random(500, 1, 0.0, 2.0).ToInt32().GetColumn(0);

            DecisionVariable[] features =
            {
                new DecisionVariable("Outlook",     DecisionVariableKind.Continuous),
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous),
                new DecisionVariable("Humidity",    DecisionVariableKind.Continuous),
            };


            DecisionTree tree    = new DecisionTree(features, 2);
            C45Learning  teacher = new C45Learning(tree);

            double error = teacher.Run(dataSamples, target);

            foreach (var node in tree)
            {
                if (node.IsLeaf)
                {
                    Assert.IsNotNull(node.Output);
                }
            }

            Assert.IsTrue(error < 0.50);
        }
コード例 #3
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void ArgumentCheck1()
        {
            double[][] samples =
            {
                new [] { 0, 2, 4.0 },
                new [] { 1, 5, 2.0 },
                null,
                new [] { 1, 5, 6.0 },
            };

            int[] outputs =
            {
                1, 1, 0, 0
            };

            DecisionVariable[] vars = new DecisionVariable[3];
            for (int i = 0; i < vars.Length; i++)
            {
                vars[i] = DecisionVariable.Continuous(i.ToString());
            }

            DecisionTree tree    = new DecisionTree(vars, 2);
            var          teacher = new C45Learning(tree);

            bool thrown = false;

            try { double error = teacher.Run(samples, outputs); }
            catch (ArgumentNullException) { thrown = true; }

            Assert.IsTrue(thrown);
        }
コード例 #4
0
        public void IrisDatasetTest()
        {
            string[][] text = Resources.iris_data.Split(
                new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries)
                              .Apply(x => x.Split(','));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));
            }

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.026666666666666668, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                                             System.Globalization.CultureInfo.InvariantCulture);

            // TODO: implement this assertion properly, actually checking
            // the text contents once the feature is completely finished.
            Assert.AreEqual(596, ruleText.Length);

            string expected = @"0 =: (petal length <= 2.45)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
1 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
2 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
2 =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";

            Assert.AreEqual(expected, ruleText);
        }
コード例 #5
0
        private static void decisionTree(double[][] inputs, int[] outputs)
        {
            // In our problem, we have 2 classes (samples can be either
            // positive or negative), and 2 continuous-valued inputs.
            DecisionTree tree = new DecisionTree(inputs: new[]
            {
                DecisionVariable.Continuous("X"),
                DecisionVariable.Continuous("Y")
            }, classes: 2);

            C45Learning teacher = new C45Learning(tree);

            // The C4.5 algorithm expects the class labels to
            // range from 0 to k, so we convert -1 to be zero:
            //
            outputs = outputs.Apply(x => x < 0 ? 0 : x);

            double error = teacher.Run(inputs, outputs);

            // Classify the samples using the model
            int[] answers = inputs.Apply(tree.Compute);

            // Plot the results
            ScatterplotBox.Show("Expected results", inputs, outputs);
            ScatterplotBox.Show("Decision Tree results", inputs, answers)
            .Hold();
        }
コード例 #6
0
ファイル: MainForm.cs プロジェクト: haf/Accord.Net
        /// <summary>
        ///   Creates and learns a Decision Tree to recognize the
        ///   previously loaded dataset using the current settings.
        /// </summary>
        ///
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();



            // Creates a matrix from the entire source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values (first two columns)
            double[][] inputs = table.GetColumns(0, 1).ToArray();

            // Get only the output labels (last column)
            int[] outputs = table.GetColumn(2).ToInt32();


            // Specify the input variables
            DecisionVariable[] variables =
            {
                new DecisionVariable("x", DecisionVariableKind.Continuous),
                new DecisionVariable("y", DecisionVariableKind.Continuous),
            };

            // Create the discrete Decision tree
            tree = new DecisionTree(variables, 2);

            // Create the C4.5 learning algorithm
            C45Learning c45 = new C45Learning(tree);

            // Learn the decision tree using C4.5
            double error = c45.Run(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Get the ranges for each variable (X and Y)
            DoubleRange[] ranges = Matrix.Range(table, 0);

            // Generate a Cartesian coordinate system
            double[][] map = Matrix.CartesianProduct(
                Matrix.Interval(ranges[0], 0.05),
                Matrix.Interval(ranges[1], 0.05));

            // Classify each point in the Cartesian coordinate system
            double[] result = map.Apply(tree.Compute).ToDouble();
            double[,] surface = map.ToMatrix().InsertColumn(result);

            CreateScatterplot(zedGraphControl2, surface);

            lbStatus.Text = "Learning finished! Click the other tabs to explore results!";
        }
コード例 #7
0
        public static TrainingSet[] GenerateTrainingSets(IEnumerable <KeyValuePair <User, double[]> > studentsAndMarks, string[] normalRecords, string[] anomalies)
        {
            var countOfEntries = normalRecords.Length + anomalies.Length;
            var inputData      = new double[countOfEntries][];
            var outputData     = new int[countOfEntries];
            var counter        = 0;

            foreach (var studentAndMarks in studentsAndMarks)
            {
                if (normalRecords.Contains(studentAndMarks.Key.OpenId))
                {
                    inputData[counter]    = studentAndMarks.Value;
                    outputData[counter++] = 1;
                }

                if (!anomalies.Contains(studentAndMarks.Key.OpenId))
                {
                    continue;
                }

                inputData[counter]    = studentAndMarks.Value;
                outputData[counter++] = 0;
            }

            var countOfFeatures = studentsAndMarks.ElementAt(0).Value.Length;
            var features        = new DecisionVariable[countOfFeatures];

            features[0] = new DecisionVariable("0", DecisionAttributeKind.Continuous, new AForge.DoubleRange(80, 1200));

            for (var i = 1; i < countOfFeatures; i++)
            {
                features[i] = new DecisionVariable(i.ToString(), DecisionAttributeKind.Continuous, new AForge.DoubleRange(0, 10));
            }

            // Create the Decision tree with only 2 result values
            var tree = new DecisionTree(features, 2);

            // Creates a new instance of the C4.5 learning algorithm
            var c45 = new C45Learning(tree);

            // Learn the decision tree
            var error = c45.Run(inputData, outputData);

            // Split all data into normal and anomalies
            var setOfNormalRecords = studentsAndMarks.Where(x => tree.Compute(x.Value) == 1);
            var setOfAnomalies     = studentsAndMarks.Where(x => tree.Compute(x.Value) == 0);

            // Split normal records into 2 groups (one for training set and one for anomaly detection ocurency detection)
            var setOfNormalRecordsList = setOfNormalRecords.ToList();
            var splitCount             = setOfNormalRecordsList.Count * 2 / 3;
            var setOfNormalRecordsTr1  = setOfNormalRecordsList.GetRange(0, splitCount);
            var setOfNormalRecordsTr2  = setOfNormalRecordsList.GetRange(splitCount, setOfNormalRecordsList.Count - splitCount);
            // Create Training Sets
            var trSetNormalFirst  = CreateTrainingSetFromResources(setOfNormalRecordsTr1);
            var trSetNormalSecond = CreateTrainingSetFromResources(setOfNormalRecordsTr2);
            var trSetAnomalies    = CreateTrainingSetFromResources(setOfAnomalies);

            return(new[] { trSetNormalFirst, trSetNormalSecond, trSetAnomalies });
        }
コード例 #8
0
ファイル: MainForm.cs プロジェクト: kamranamini61/Accord
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns);


            // Perform classification
            C45Learning c45;

            // Get only the input vector values
            double[][] inputs = sourceMatrix.Submatrix(null, 0, 1).ToArray();

            // Get only the label outputs
            int[] outputs = sourceMatrix.GetColumn(2).ToInt32();


            DecisionVariable[] attributes =
            {
                new DecisionVariable("x", DecisionAttributeKind.Continuous),
                new DecisionVariable("y", DecisionAttributeKind.Continuous),
            };

            // Create the Decision tree
            tree = new DecisionTree(attributes, 2);

            // Creates a new instance of the C4.5 learning algorithm
            c45 = new C45Learning(tree);

            // Learn the decision tree
            double error = c45.Run(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Draw the separating surface
            var ranges = Matrix.Range(sourceMatrix);

            double[][] map = Matrix.CartesianProduct(
                Matrix.Interval(ranges[0], 0.05),
                Matrix.Interval(ranges[1], 0.05));

            var result = map.Apply(tree.Compute).Apply(Math.Sign);

            var graph = map.ToMatrix().InsertColumn(result.ToDouble());

            CreateScatterplot(zedGraphControl2, graph);
        }
コード例 #9
0
        /*************************** Primary Methods *******************************/
        public double learnDecisionTreeModel(DataSet trainSet)
        {
            // Convert TrainSet --> TrainDataTable
            this.convertToTrainIntputTable(trainSet);
            // C4.5 Decision Tree Algorithm
            double      learningError;
            C45Learning c45 = new C45Learning(this.descisionTree);

            learningError = c45.Run(this.trainInputArray, this.trainOutputVector);

            return(learningError);
        }
コード例 #10
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
                new DecisionVariable("Humidity", DecisionVariableKind.Continuous),    // continuous values
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = c45.Run(inputs, outputs);
        }
コード例 #11
0
        /// <summary>
        /// Train the classifier with some data, using parameters.
        /// </summary>
        /// <param name="trainingData">Data used to train the classifier.</param>
        /// <param name="maxJoin">How many times a variable can join
        /// the decision process.</param>
        /// <param name="maxHeight">Maximum height when learning the tree.</param>
        /// <returns>Classifier prediction error.</returns>
        public double TrainClassifierWithParameters(
            ClassificationData trainingData,
            int maxJoin   = 0,
            int maxHeight = 0)
        {
            double classifierError = 0;
            List <DecisionVariable> decisionVariables = new List <DecisionVariable>();

            if (DecisionVariableNames != null)
            {
                for (int n = 0; n < trainingData.InputAttributeNumber; ++n)
                {
                    decisionVariables.Add(
                        new DecisionVariable(DecisionVariableNames[n], DecisionVariableKind.Continuous)
                        );
                }
            }
            // Generate automatic names for the variables if no names are provided.
            else
            {
                for (int n = 0; n < trainingData.InputAttributeNumber; ++n)
                {
                    decisionVariables.Add(
                        new DecisionVariable("variable_" + (n + 1).ToString(),
                                             DecisionVariableKind.Continuous));
                }
            }

            // Create a new Decision Tree classifier.
            ClassificationDecisionTree = new DecisionTree(decisionVariables, trainingData.OutputPossibleValues);

            // Create a new instance of the C45 algorithm to be learned by the tree.
            C45LearningTree = new C45Learning(ClassificationDecisionTree);

            // Change some classifier's parameters if valid new
            // values are provided.
            if (maxJoin > 0)
            {
                C45LearningTree.Join = maxJoin;
            }
            if (maxHeight > 0)
            {
                C45LearningTree.MaxHeight = maxHeight;
            }

            // Use data to train the tree.
            classifierError = C45LearningTree.Run(trainingData.InputData, trainingData.OutputData);

            return(classifierError);
        }
コード例 #12
0
        private static DecisionTree createTree(out double[][] inputs, out int[] outputs)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };


            string outputColumn = "output";


            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            Codification codebook = new Codification(table);


            DataTable symbols = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);


            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);


            C45Learning c45 = new C45Learning(tree);

            c45.Run(inputs, outputs);

            return(tree);
        }
コード例 #13
0
        public static DecisionTree createNurseryExample(out double[][] inputs, out int[] outputs, int first)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };

            string outputColumn = "output";

            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }

            Codification codebook = new Codification(table);
            DataTable    symbols  = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);

            C45Learning c45   = new C45Learning(tree);
            double      error = c45.Run(inputs.First(first), outputs.First(first));

            Assert.AreEqual(0, error);

            return(tree);
        }
コード例 #14
0
ファイル: Program.cs プロジェクト: musicnova/BotModernAI
        static void Main(string[] args)
        {
            var data = new ExcelReader(@"c:\temp\accordemo\accordemo\titanic.xls").GetWorksheet("titanic3");

            data.Rows.RemoveAt(data.Rows.Count - 1);
            var          d     = new Elimination("age").Apply(data);
            var          fdata = new Codification(d, "sex").Apply(d);
            var          outp  = fdata.Columns["survived"].ToArray <int>();
            var          input = fdata.ToArray <double>("pclass", "sex", "age", "parch", "sibsp");
            DecisionTree T     = new DecisionTree(
                DecisionVariable.FromData(input), 2);
            var learn = new C45Learning(T);

            learn.Run(input, outp);
            var r1 = T.Decide(new double[] { 0, 1, 23, 0, 0 });
            var r2 = T.Decide(new double[] { 1, 0, 30, 1, 1 });

            Console.WriteLine($"Male={r1}, Female={r2}");
            Console.ReadKey();
        }
コード例 #15
0
        //public static C45Model CreateC45Model(Codification codification)
        //{
        //    int lastIndex = codification.Columns.Count - 1;

        //    List<DecisionVariable> attributes = new List<DecisionVariable>();

        //    for (int indexColumn = 0; indexColumn < lastIndex; indexColumn++)
        //    {
        //        attributes.Add(new DecisionVariable(codification.Columns[indexColumn].ColumnName,
        //            codification[indexColumn].Symbols));
        //    }

        //    C45Model model = new C45Model(new DecisionTree(attributes.ToArray(), 2));

        //    return model;
        //}

        //public C45Model(DecisionTree tree)
        //{
        //    this.Tree = tree;
        //}

        // Trainning decision tree with C4.5 algorithm
        public override void TrainningModel(TrainningData trainningData)
        {
            // Get data for trainning tree
            Codification codification = trainningData.CodificationData;

            double[][] inputs  = trainningData.TrainningAttributes;
            int[]      outputs = trainningData.ClassificationAttribute;

            // Create tree
            this.Tree = this.CreateDecisionTree(codification);
            //var attributes = DecisionVariable.FromCodebook(codification, inputColumns);
            //DecisionTree tree = new DecisionTree(attributes, outputClasses: 5);


            // Creates a new instance of the C4.5 learning algorithm
            C45Learning c45 = new C45Learning(this.Tree);

            // Learn the decision tree
            double error = c45.Run(inputs, outputs);
        }
コード例 #16
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void same_input_different_output_minimal()
        {
            double[][] inputs = new double[][] {
                new double[] { 0 },
                new double[] { 0 }
            };

            int[] outputs = new int[] {
                1,
                0
            };


            DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) };

            DecisionTree decisionTree = new DecisionTree(variables, 2);
            C45Learning  c45Learning  = new C45Learning(decisionTree);

            c45Learning.Run(inputs, outputs); // System.AggregateException thrown here

            Assert.AreEqual(decisionTree.Decide(new[] { 0 }), 0);
        }
コード例 #17
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void same_input_different_output()
        {
            double[][] inputs = new double[][] {
                new double[] { 1 },
                new double[] { 0 },
                new double[] { 2 },
                new double[] { 3 },
                new double[] { 0 },
            };

            int[] outputs = new int[] {
                11,
                00,
                22,
                33,
                01
            };


            DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) };

            DecisionTree decisionTree = new DecisionTree(variables, 34);
            C45Learning  c45Learning  = new C45Learning(decisionTree)
            {
                Join      = 10,
                MaxHeight = 10
            };

            c45Learning.Run(inputs, outputs); // System.AggregateException thrown here

            int[] actual = decisionTree.Decide(inputs);

            Assert.AreEqual(11, actual[0]);
            Assert.AreEqual(00, actual[1]);
            Assert.AreEqual(22, actual[2]);
            Assert.AreEqual(33, actual[3]);
            Assert.AreEqual(00, actual[4]);
        }
コード例 #18
0
        public override IModelDiscrete <double, int> GenerateModelDiscrete(IDataSet <double, int> training_set)
        {
            double[][] instance_features_array = training_set.FeatureData;
            int[]      outputs = ToolsCollection.ConvertToArray2D(training_set.LabelData).Select1DIndex1(0);

            // Specify the input variables
            List <DecisionVariable> variables = new List <DecisionVariable>();

            foreach (VariableDescriptor feature_descriptor in training_set.DataContext.FeatureDescriptors)
            {
                variables.Add(new DecisionVariable(feature_descriptor.Name, DecisionVariableKind.Continuous));
            }

            // Create the discrete Decision tree
            DecisionTree tree = new DecisionTree(variables, training_set.DataContext.GetLabelDescriptor(0).ValueCount);

            // Create the C4.5 learning algorithm
            C45Learning c45 = new C45Learning(tree); //TODO are there others?

            // Learn the decision tree using C4.5
            double error = c45.Run(instance_features_array, outputs);

            return(new ModelDiscreteC45 <int>(training_set.DataContext, tree));
        }
コード例 #19
0
ファイル: C45LearningTest.cs プロジェクト: sujitzzz/framework
        public void AttributeReuseTest1()
        {
            string[][] text = Resources.iris_data
                              .Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
                              .Apply(x => x.Split(','));

            Assert.AreEqual(150, text.Rows());
            Assert.AreEqual(5, text.Columns());
            Assert.AreEqual("Iris-setosa", text[0].Get(-1));
            Assert.AreEqual("Iris-virginica", text.Get(-1).Get(-1));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));
            }

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            teacher.Join = 3;

            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0.02, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.02, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                                             System.Globalization.CultureInfo.InvariantCulture);

            string expected = @"0 =: (petal length <= 2.45)
1 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (petal length <= 4.95)
1 =: (petal length > 2.45) && (petal width > 1.75) && (petal length <= 4.85) && (sepal length <= 5.95)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
2 =: (petal length > 2.45) && (petal width > 1.75) && (petal length > 4.85)
2 =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (petal length > 4.95)
2 =: (petal length > 2.45) && (petal width > 1.75) && (petal length <= 4.85) && (sepal length > 5.95)
";

            expected = expected.Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(expected, ruleText);
        }
コード例 #20
0
ファイル: Program.cs プロジェクト: bnantz/ML-MKEDOTNET
        /// <summary>
        /// The main entry point for the program
        /// </summary>
        public static void Main()
        {
            try
            {
                #region Exploratory Data Analysis Explanation

                /*
                 *  John Tukey coined the term Exploratory Data Analysis in his seminal book of the same name.  There really is not a prescribed way to do an EDA.
                 *  Tools I use for EDA include Microsoft Excel, plots and visual inspection of the data.  Without creating an early bias, gut feelings do play a role in a good EDA.
                 *  Some objectives of EDA are to:
                 *      •	Identify the types of data in the dataset
                 *      •	Examine the statistical properties of the data
                 *      •	Look for invalid data (may need Domain or Subject Matter experts)
                 *      •	Understand the provenance of the data
                 *      •	Aide in the selection of appropriate statistical tools and techniques
                 *
                 *  For our diabetes dataset, notice that there is both quantitative and qualitative data.  Note that the result or outcome variable (which indicates if the person has
                 *  diabetes) is nominal data with only two states.  This is called dichotomous or binary categorical data which rules out some machine learning algorithms and directs
                 *  us to others.
                 */
                #endregion
                // Because of time constraints, the loading of the DataTables and EDA is complete.
                XmlConfigurator.Configure();

                Logger.Info("Exploratory Data Analysis");

                FileInfo  fi       = new FileInfo("training.csv");
                DataTable training = DataTableCsvConvertor.GetDataTableFromCsv(fi);

                fi = new FileInfo("test.csv");
                DataTable test = DataTableCsvConvertor.GetDataTableFromCsv(fi);

                // Print out the first few table rows.
                Head.PrintHead(training);

                //Logger.Info(string.Empty);
                //BasicStatistics.BasicStats(training); // For most EDA's Basic Descriptive statistics are important, but this outputs a lot of information

                #region Data Imputation & Cleanup Explanation

                /*
                 *  Keep in mind that Machine Learning algorithms operate on numerical data only, something will have to be done with the data is text or NULL.  Also predictor
                 *  variables(aka features or columns of data) that do not vary will not be predictive and may need to be removed.  Due to time constraints the EDA, ETL (Extract, Transform and Load)
                 *  and data cleaning is already completed in the solution.  For this analysis, the HeartRate column because it is all NULL and remove any rows of data that contain NULLs.
                 */
                #endregion
                // Delete any columns that are not needed.
                training.Columns.Remove("HeartRate");
                test.Columns.Remove("HeartRate");

                // How to handle rows containing missing or NA data - data imputation or deletion?
                training = DataImputation.RemoveMissing(training);
                test     = DataImputation.RemoveMissing(test);

                Codification codebook      = new Codification(training);
                int          outputClasses = 2;

                string[] inputColumns =
                {
                    "Gender", "YearOfBirth", "SmokingEffectiveYear", "NISTcode", "Height", "Weight", "BMI", "SystolicBP", "DiastolicBP", "RespiratoryRate", "Temperature"
                };

                string outputColumn = "DMIndicator";

                // Translate our training data into integer symbols using our codebook:
                DataTable  symbols = codebook.Apply(training);
                double[][] inputs  = symbols.ToArray(inputColumns);
                int[]      outputs = Matrix.ToArray <int>(training, outputColumn);


                #region Decision Tree Overview

                /*
                 *  Decision Trees are very powerful, especially with a binary classification model, and are somewhat resistant to over-fitting the data.
                 *  Additionally, they are intuitive to explain to stakeholders.
                 */
                #endregion
                Logger.Info(string.Empty);
                Logger.Info("Decision Tree");

                DecisionVariable[] attributes =
                {
                    new DecisionVariable("Gender",                                             2), // 2 possible values (Male, Female)
                    new DecisionVariable("YearOfBirth",          DecisionVariableKind.Continuous),
                    new DecisionVariable("SmokingEffectiveYear", DecisionVariableKind.Continuous),
                    new DecisionVariable("NISTcode",             DecisionVariableKind.Continuous),
                    new DecisionVariable("Height",               DecisionVariableKind.Continuous),
                    new DecisionVariable("Weight",               DecisionVariableKind.Continuous),
                    new DecisionVariable("BMI",                  DecisionVariableKind.Continuous),
                    new DecisionVariable("SystolicBP",           DecisionVariableKind.Continuous),
                    new DecisionVariable("DiastolicBP",          DecisionVariableKind.Continuous),
                    new DecisionVariable("RespiratoryRate",      DecisionVariableKind.Continuous),
                    new DecisionVariable("Temperature",          DecisionVariableKind.Continuous)
                };

                DecisionTree tree = new DecisionTree(attributes, outputClasses);

                C45Learning c45learning = new C45Learning(tree);

                // Learn the training instances!
                c45learning.Run(inputs, outputs);

                // The next two lines are optional to save the model into IL for future use.
                // Convert to an expression tree
                var expression = tree.ToExpression();
                // Compiles the expression to IL
                var func = expression.Compile();

                #region Evaluation Explanation

                /*
                 *  To evaluate the model, now use each row of the test dataset to predict the output variable (DMIndicator) using the DecisionTree’s compute method passing in the same
                 *  variables that were used to train the model.  Store the test dataset’s value of DMIndicator and the predicted value in a DataTable and integer collection for future
                 *  validation of the model.
                 */
                #endregion
                Evaluator.Evaluate(test, tree);

                #region Validation Explanation

                /*
                 *  There are many ways to validate models, but we will use a confusion matrix because it is intuitive and a very accepted way to validate binary classification models.
                 *  Most conveniently the Accord.Net has a ConfusionMatrix class to create this matrix for you.  Passing in the collection of integers of predicted and actual values
                 *  stored earlier to the ConfusionMatrix class and output the matrix and accuracy.
                 */
                #endregion
                Validator.Validate(test, tree);


                #region Support Vector Machine Overview

                /*
                 *  Support Vector Machines are powerful classification machine learning algorithms with very few knobs to turn.  The kernel of the SVM can be exchanged to use
                 *  a number of different mathematical algorithms including polynomials, neural networks and Gaussian functions.
                 */
                #endregion
                Logger.Info(string.Empty);
                Logger.Info("Support Vector Machine");

                // Add SVM code here
                IKernel kernel = new Linear();

                // Create the Multi-class Support Vector Machine using the selected Kernel
                int inputDimension = inputs[0].Length;
                var ksvm           = new MulticlassSupportVectorMachine(inputDimension, kernel, outputClasses);

                // Create the learning algorithm using the machine and the training data
                var ml = new MulticlassSupportVectorLearning(ksvm, inputs, outputs)
                {
                    Algorithm = (svm, classInputs, classOutputs, i, j) =>
                    {
                        return(new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                        {
                            CacheSize = 0
                        });
                    }
                };

                double svmError = ml.Run();

                #region Evaluation Explanation

                /*
                 *  To evaluate the model, now use each row of the test dataset to predict the output variable (DMIndicator) using the DecisionTree’s compute method passing in the same
                 *  variables that were used to train the model.  Store the test dataset’s value of DMIndicator and the predicted value in a DataTable and integer collection for future
                 *  validation of the model.
                 */
                #endregion
                Evaluator.Evaluate(test, ksvm);

                #region Validation Explanation

                /*
                 *  There are many ways to validate models, but we will use a confusion matrix because it is intuitive and a very accepted way to validate binary classification models.
                 *  Most conveniently the Accord.Net has a ConfusionMatrix class to create this matrix for you.  Passing in the collection of integers of predicted and actual values
                 *  stored earlier to the ConfusionMatrix class and output the matrix and accuracy.
                 */
                #endregion
                Validator.Validate(test, ksvm);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
        }
コード例 #21
0
ファイル: Program.cs プロジェクト: LidaTomsk/MusicDataMining
        private static void DoYourWork()
        {
            var songs = SongsFactory.GetSongsForLearning(); // этот метод вернёт песни, у которых есть ВСЕ данные

            var data        = new DataTable("Songs Example");
            var columnNames =
                typeof(LearnModel).GetProperties()
                .Select(p => p.Name)
                .OrderBy(x => x)
                .ToList();

            data.Columns.AddRange(columnNames.Select(name => new DataColumn(name)).ToArray());

            foreach (var song in songs)
            {
                data.Rows.Add(song.ArtistBeginYear, song.ArtistType, song.Duration, song.GenreType, song.LyricCharsCount,
                              song.LyricWordsCount, song.Negative, song.Popularity.ToString(), song.Positive, song.SongDateYear);
            }
            var codebook = new Codification(data, columnNames.ToArray());
            var symbols  = codebook.Apply(data);
            var input    = symbols.ToArray <double>(columnNames.Where(x => x != "Popularity").ToArray());
            var output   = symbols.ToArray <int>("Popularity");

            var mins = new int[9];
            var maxs = new int[9];

            for (int i = 0; i < 9; i++)
            {
                var curMinForColumn = int.MaxValue;
                var curMaxForColumn = int.MinValue;
                for (int j = 0; j < input.GetLength(0); j++)
                {
                    var curValue = (int)input[j][i];
                    if (curValue < curMinForColumn)
                    {
                        curMinForColumn = curValue;
                    }
                    if (curValue > curMaxForColumn)
                    {
                        curMaxForColumn = curValue;
                    }
                }

                mins[i] = curMinForColumn;
                maxs[i] = curMaxForColumn;
            }

            DecisionVariable[] attributes =
            {
                new DecisionVariable("ArtistBeginYear", new IntRange(mins[0],                                maxs[0])),
                new DecisionVariable("ArtistType",      songs.Select(x => x.ArtistType).Distinct().Count()),
                new DecisionVariable("Duration",        new IntRange(mins[2],                                maxs[2])),
                new DecisionVariable("GenreType",       songs.Select(x => x.GenreType).Distinct().Count()),
                new DecisionVariable("LyricCharsCount", new IntRange(mins[4],                                maxs[4])),
                new DecisionVariable("LyricWordsCount", new IntRange(mins[5],                                maxs[5])),
                new DecisionVariable("Negative",        new DoubleRange(songs.Min(x => x.Negative),          songs.Max(x => x.Negative))),
                new DecisionVariable("Positive",        new DoubleRange(songs.Min(x => x.Positive),          songs.Max(x => x.Positive))),
                new DecisionVariable("SongDateYear",    new DoubleRange(songs.Min(x => x.SongDateYear),      songs.Max(x => x.SongDateYear))),
            };

            var classCount = 2; // popular, unpopular

            var tree = new DecisionTree(attributes, classCount);
            var algo = new C45Learning(tree);

            algo.Run(input, output);

            // проверяем своими данными
            data.Rows.Add(1966, 1, 302, 1, 1354, 255, 92.944470512297059, 0 /*vashe pofig*/, 7.05552948770294, 2009);
            var lastItem       = data.Rows[data.Rows.Count - 1];
            var input0         = codebook.Translate(lastItem, columnNames.Where(x => x != "Popularity").ToArray());
            var answer         = tree.Compute(input0);
            var readableAnswer = codebook.Translate("Popularity", answer);
        }
コード例 #22
0
 // Returns Error as a percentage. Best is 0.0, Worst is 1.0
 public double TreeLearn()
 {
     c45 = new C45Learning(tree);
     return(c45.Run(Inputs, Outputs));
 }
コード例 #23
0
        public void Run(String filename)
        {
            ReadFile(filename);

            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(data);

            // And then convert all data into symbols
            //
            DataTable symbols = codebook.Apply(data);


            for (int i = 0; i < inputColumns.Count; i++)
            {
                if (inputTypes[i] == "string")
                {
                    CreateDic(inputColumns[i], symbols);
                }
            }

            CreateDic(outputColumn, symbols);

            double[][] inputs = (from p in symbols.AsEnumerable()
                                 select GetInputRow(p)
                                 ).Cast <double[]>().ToArray();


            int[] outputs = (from p in symbols.AsEnumerable()
                             select GetIndex(outputColumn, p[outputColumn].ToString())).Cast <int>().ToArray();

            // From now on, we can start creating the decision tree.
            //
            var          attributes = DecisionVariable.FromCodebook(codebook, inputColumns.ToArray());
            DecisionTree tree       = new DecisionTree(attributes, 5); //outputClasses: 5


            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);


            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            //int y = tree.Compute(inputs[25]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            //Func<double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            //int z = func(inputs[25]);

            var expression = tree.ToExpression();

            Console.WriteLine(tree.ToCode("ClassTest"));

            DecisionSet s = tree.ToRules();

            Console.WriteLine(s.ToString());
        }
コード例 #24
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void ConstantDiscreteVariableTest()
        {
            DecisionTree tree;

            double[][] inputs;
            int[]      outputs;

            DataTable data = new DataTable("Degenerated Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 50, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 50, 90, "Weak", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Weak", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Weak", "Yes");
            data.Rows.Add("D8", "Sunny", 50, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Weak", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Weak", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 50, 80, "Weak", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
                new DecisionVariable("Humidity", DecisionVariableKind.Continuous),    // continuous values
                new DecisionVariable("Wind", codebook["Wind"].Symbols + 1)            // 1 possible value (Weak)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = c45.Run(inputs, outputs);

            for (int i = 0; i < inputs.Length; i++)
            {
                int y = tree.Compute(inputs[i]);
                Assert.AreEqual(outputs[i], y);
            }
        }
コード例 #25
0
ファイル: C45LearningTest.cs プロジェクト: xadxxadx/framework
        public void LargeRunTest()
        {
            #region doc_nursery
            // This example uses the Nursery Database available from the University of
            // California Irvine repository of machine learning databases, available at
            //
            //   http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names
            //
            // The description paragraph is listed as follows.
            //
            //   Nursery Database was derived from a hierarchical decision model
            //   originally developed to rank applications for nursery schools. It
            //   was used during several years in 1980's when there was excessive
            //   enrollment to these schools in Ljubljana, Slovenia, and the
            //   rejected applications frequently needed an objective
            //   explanation. The final decision depended on three subproblems:
            //   occupation of parents and child's nursery, family structure and
            //   financial standing, and social and health picture of the family.
            //   The model was developed within expert system shell for decision
            //   making DEX (M. Bohanec, V. Rajkovic: Expert system for decision
            //   making. Sistemica 1(1), pp. 145-157, 1990.).
            //

            // Let's begin by loading the raw data. This string variable contains
            // the contents of the nursery.data file as a single, continuous text.
            //
            string nurseryData = Resources.nursery;

            // Those are the input columns available in the data
            //
            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };

            // And this is the output, the last column of the data.
            //
            string outputColumn = "output";


            // Let's populate a data table with this information.
            //
            DataTable table = new DataTable("Nursery");
            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            // Now, we have to convert the textual, categorical data found
            // in the table to a more manageable discrete representation.
            //
            // For this, we will create a codebook to translate text to
            // discrete integer symbols:
            //
            Codification codebook = new Codification(table);

            // And then convert all data into symbols
            //
            DataTable  symbols = codebook.Apply(table);
            double[][] inputs  = symbols.ToArray(inputColumns);
            int[]      outputs = symbols.ToArray <int>(outputColumn);

            // From now on, we can start creating the decision tree.
            //
            var          attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            DecisionTree tree       = new DecisionTree(attributes, classes: 5);


            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);


            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[25]);
            #endregion

            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = tree.Compute(inputs[i]);

                Assert.AreEqual(expected, actual);
            }


#if !NET35
            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func <double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[25]);


            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = func(inputs[i]);

                Assert.AreEqual(expected, actual);
            }
#endif
        }
コード例 #26
0
        private static DecisionTree createTree(out double[][] inputs, out int[] outputs)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };


            string outputColumn = "output";


            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }

            Assert.AreEqual(12960, lines.Length);
            Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]);
            Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]);


            Codification codebook = new Codification(table);


            DataTable symbols = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);

            Assert.AreEqual(12960, inputs.Rows());
            Assert.AreEqual(8, inputs.Columns());
            Assert.AreEqual(12960, outputs.Length);
            Assert.AreEqual(4, outputs.Max());
            Assert.AreEqual(0, outputs.Min());
            Assert.AreEqual(5, outputs.DistinctCount());


            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);

            Assert.AreEqual(8, tree.NumberOfInputs);
            Assert.AreEqual(5, tree.NumberOfOutputs);

            C45Learning c45 = new C45Learning(tree);

            double error = c45.Run(inputs, outputs);

            Assert.AreEqual(8, tree.Attributes.Count);
            for (int i = 0; i < tree.Attributes.Count; i++)
            {
                Assert.AreEqual(tree.Attributes[i].Nature, DecisionVariableKind.Discrete);
                Assert.AreEqual(tree.Attributes[i].Range.Min, 0);
            }

            Assert.AreEqual(tree.Attributes[0].Name, "parents");
            Assert.AreEqual(tree.Attributes[0].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[1].Name, "has_nurs");
            Assert.AreEqual(tree.Attributes[1].Range.Max, 4);
            Assert.AreEqual(tree.Attributes[2].Name, "form");
            Assert.AreEqual(tree.Attributes[2].Range.Max, 3);
            Assert.AreEqual(tree.Attributes[3].Name, "children");
            Assert.AreEqual(tree.Attributes[3].Range.Max, 3);
            Assert.AreEqual(tree.Attributes[4].Name, "housing");
            Assert.AreEqual(tree.Attributes[4].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[5].Name, "finance");
            Assert.AreEqual(tree.Attributes[5].Range.Max, 1);
            Assert.AreEqual(tree.Attributes[6].Name, "social");
            Assert.AreEqual(tree.Attributes[6].Range.Max, 2);
            Assert.AreEqual(tree.Attributes[7].Name, "health");
            Assert.AreEqual(tree.Attributes[7].Range.Max, 2);


            Assert.AreEqual(8, tree.NumberOfInputs);
            Assert.AreEqual(5, tree.NumberOfOutputs);
            Assert.AreEqual(0, error);

            return(tree);
        }
コード例 #27
0
        private string C45(DataTable tbl)
        {
            int          classCount = 2;
            Codification codebook   = new Codification(tbl);


            DecisionVariable[] attributes =
            {
                new DecisionVariable("Clump Thickness",         10),
                new DecisionVariable("Uniformity of Cell Size", 10),new DecisionVariable("Uniformity of Cell Shape",     10),
                new DecisionVariable("Marginal Adhesion",       10),new DecisionVariable("Single Epithelial Cell Size",  10),
                new DecisionVariable("Bare Nuclei",             10),new DecisionVariable("Bland Chromatin",              10),
                new DecisionVariable("Normal Nucleoli",         10),new DecisionVariable("Mitoses",                      10),
            };



            DecisionTree tree = new DecisionTree(attributes, classCount);
            // ID3Learning id3learning = new ID3Learning(tree);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);

            double[][] inputs  = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble();
            int[]      outputs = symbols.ToIntArray("Class").GetColumn(0);

            // symbols.
            // id3learning.Run(inputs, outputs);
            // Now, let's create the C4.5 algorithm
            C45Learning c45 = new C45Learning(tree);

            // and learn a decision tree. The value of
            //   the error variable below should be 0.
            //
            double error = c45.Run(inputs, outputs);


            // To compute a decision for one of the input points,
            //   such as the 25-th example in the set, we can use
            //
            int y = tree.Compute(inputs[5]);

            // Finally, we can also convert our tree to a native
            // function, improving efficiency considerably, with
            //
            Func <double[], int> func = tree.ToExpression().Compile();

            // Again, to compute a new decision, we can just use
            //
            int z = func(inputs[5]);


            int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                                             inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);
            int    output = tree.Compute(query);
            string answer = codebook.Translate("Class", output);

            return(answer);

            // throw new NotImplementedException();
        }