Ejemplo n.º 1
0
        public void RandomForestBuild(List <train> datalist)
        {
            int length = datalist.Count;
            int d      = datalist[0].d;

            forest = new List <DecisionTree>();

            int n = datalist.Count;
            int k = d; //(int)Math.Sqrt(d);
            int m = 100;

            for (int i = 0; i < m; ++i)
            {
                double[][] inputs;
                int[]      outputs;
                int[]      indexs;
                GetData(out inputs, out outputs, datalist, n, k, out indexs);

                DecisionVariable[] variables = new DecisionVariable[k];
                for (int j = 0; j < k; ++j)
                {
                    variables[j] = new DecisionVariable("attribute" + (indexs[j] + 1), DecisionVariableKind.Continuous);
                }

                // Create the C4.5 learning algorithm
                var c45 = new C45Learning(variables);

                // Learn the decision tree using C4.5
                DecisionTree dtmp = c45.Learn(inputs, outputs);
                forest.Add(dtmp);
            }
            log("The random forest model has been trained");
        }
Ejemplo n.º 2
0
        private double run(double[][] inputs, int[] output)
        {
            int rows = inputs.Length;
            int cols = inputs[0].Length;

            int colsPerTree = 0;

            if (CoverageRatio == 0)
            {
                colsPerTree = (int)(System.Math.Sqrt(cols));
            }
            else
            {
                colsPerTree = (int)(cols * CoverageRatio);
            }

            var trees = forest.Trees;

            Parallel.For(0, trees.Length, ParallelOptions, i =>
            {
                int[] idx = Vector.Sample(SampleRatio, output.Length);
                var x     = inputs.Get(idx);
                var y     = output.Get(idx);

                var c45 = new C45Learning(forest.Trees[i])
                {
                    MaxVariables = colsPerTree,
                    Join         = 100
                };

                c45.Learn(x, y);
            });

            return(0);
        }
Ejemplo n.º 3
0
        public void Learn()
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var variables = new List <DecisionVariable>();

            foreach (var vector in LearningData.TrainingData.First().ToVectorArray(Metadata, PropertiesToSkip))
            {
                variables.Add(new DecisionVariable(variables.Count.ToString(), new DoubleRange(-1, 1)));
            }
            Tree = new DecisionTree(variables, 2);
            var learner = new C45Learning(Tree);

            learner.Learn(LearningData.TrainingData.Select(data => data.ToVectorArray(Metadata, PropertiesToSkip)).ToArray(),
                          LearningData.TrainingData.Select(data => data.PercentMatch > 0 ? 1 : 0).ToArray());

            var matcher = new LoggingDecisionTreeMatcher(LearningData.TrainingData);

            matcher.LogMatchCount($"{Name} TrainingData", Tree, Metadata, PropertiesToSkip);
            matcher = new LoggingDecisionTreeMatcher(LearningData.TestData);
            matcher.LogMatchCount($"{Name} TestData", Tree, Metadata, PropertiesToSkip);

            stopWatch.Stop();
            Logger.InfoFormat("DecisionTreeLearning took {0}", stopWatch.Elapsed);
        }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            DataTable table = new Accord.IO.CsvReader("C:\\Users\\michael\\Downloads\\JulyToOct2015Test.csv", true).ToTable();

            // Convert the DataTable to input and output vectors
            double[][] inputs  = table.ToJagged <double>("BookToPrice", "DividendYield", "DebtToEquity", "MarketBeta", "SectorID");
            int[]      outputs = table.Columns["MonthlyReturn"].ToArray <int>();


            //SecurityID BookToPrice DividendYield EarningsYield   SalesGrowth AssetsToEquity  MarketCap MarketBeta  DebtToEquity    1YrVol  5YrVol  3YrVol ExposureToCurrencyGain  SectorID countryID

            DecisionTree tree = new DecisionTree(
                inputs: new List <DecisionVariable>
            {
                DecisionVariable.Continuous("BookToPrice"),
                DecisionVariable.Continuous("DividendYield"),
                DecisionVariable.Continuous("DebtToEquity"),
                DecisionVariable.Continuous("MarketBeta"),
                DecisionVariable.Discrete("SectorID", 11)
            },
                classes: 2);

            C45Learning teacher = new C45Learning(tree);

            teacher.Learn(inputs, outputs);
            int[] answers = tree.Decide(inputs);


            // Plot the results
            // ScatterplotBox.Show("Expected results", inputs, outputs);
            //ScatterplotBox.Show("Ans", inputs, answers)
            //    .Hold();
        }
Ejemplo n.º 5
0
        private static void decisionTree(double[][] inputs, int[] outputs)
        {
            // In our problem, we have 2 classes (samples can be either
            // positive or negative), and 2 continuous-valued inputs.
            DecisionTree tree = new DecisionTree(attributes: new[]
            {
                DecisionVariable.Continuous("X"),
                DecisionVariable.Continuous("Y")
            }, outputClasses: 2);

            C45Learning teacher = new C45Learning(tree);

            // The C4.5 algorithm expects the class labels to
            // range from 0 to k, so we convert -1 to be zero:
            //
            outputs = outputs.Apply(x => x < 0 ? 0 : x);

            double error = teacher.Run(inputs, outputs);

            // Classify the samples using the model
            int[] answers = inputs.Apply(tree.Compute);

            // Plot the results
            ScatterplotBox.Show("Expected results", inputs, outputs);
            ScatterplotBox.Show("Decision Tree results", inputs, answers)
            .Hold();
        }
Ejemplo n.º 6
0
        public DecisionTree GenerateDecisionTree(int inputsCount, ref double[][] inputs, ref int[] outputs,
                                                 int outputClassNum, List <string> listVariablesName, int neiWindowSize, int landuseTypesCount)
        {
            DecisionVariable[] variable = new DecisionVariable[inputsCount];

            for (int i = 0; i < inputsCount - 2; i++)
            {
                DecisionVariable v = new DecisionVariable(listVariablesName[i], DecisionVariableKind.Continuous);
                variable[i] = v;
            }
            ;
            DecisionVariable dv = new DecisionVariable(listVariablesName[inputsCount - 2], neiWindowSize * neiWindowSize + 1);

            variable[inputsCount - 2] = dv;
            DecisionVariable dv2 = new DecisionVariable(listVariablesName[inputsCount - 1], landuseTypesCount);

            variable[inputsCount - 1] = dv2;

            DecisionTree tree = new DecisionTree(variable, outputClassNum);
            C45Learning  c45  = new C45Learning(tree);

            //double error = c45.Run(inputs, outputs);
            tree = c45.Learn(inputs, outputs);

            return(tree);
        }
Ejemplo n.º 7
0
        public void ArgumentCheck1()
        {
            double[][] samples =
            {
                new [] { 0, 2, 4.0 },
                new [] { 1, 5, 2.0 },
                null,
                new [] { 1, 5, 6.0 },
            };

            int[] outputs =
            {
                1, 1, 0, 0
            };

            DecisionVariable[] vars = new DecisionVariable[3];
            for (int i = 0; i < vars.Length; i++)
            {
                vars[i] = DecisionVariable.Continuous(i.ToString());
            }

            DecisionTree tree    = new DecisionTree(vars, 2);
            var          teacher = new C45Learning(tree);

            bool thrown = false;

            try { double error = teacher.Run(samples, outputs); }
            catch (ArgumentNullException) { thrown = true; }

            Assert.IsTrue(thrown);
        }
Ejemplo n.º 8
0
        public void LargeSampleTest2()
        {
            Accord.Math.Tools.SetupGenerator(0);

            double[][] dataSamples = Matrix.Random(500, 3, 0.0, 10.0).ToJagged();
            int[]      target      = Matrix.Random(500, 1, 0.0, 2.0).ToInt32().GetColumn(0);

            DecisionVariable[] features =
            {
                new DecisionVariable("Outlook",     DecisionVariableKind.Continuous),
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous),
                new DecisionVariable("Humidity",    DecisionVariableKind.Continuous),
            };


            DecisionTree tree    = new DecisionTree(features, 2);
            C45Learning  teacher = new C45Learning(tree);

            double error = teacher.Run(dataSamples, target);

            foreach (var node in tree)
            {
                if (node.IsLeaf)
                {
                    Assert.IsNotNull(node.Output);
                }
            }

            Assert.IsTrue(error < 0.50);
        }
    protected void btnchangepassword0_Click(object sender, EventArgs e)
    {
        DataTable data = new DataTable();

        data = f1.getrecord1("select * from dataset");
        if (data.Rows.Count > 0)
        {
            double[][] inputs = data.ToJagged <double>("n", "p", "k", "ph", "ec");
            string[]   labels = new string[data.Rows.Count];
            for (int i = 0; i < data.Rows.Count; i++)
            {
                labels[i] = data.Rows[i]["fertility"].ToString();
            }
            var codebook = new Codification("fertility", labels);
            // With the codebook, we can convert the labels:
            int[]       outputs   = codebook.Translate("fertility", labels);
            C45Learning teacher   = new C45Learning();
            var         tree      = teacher.Learn(inputs, outputs);
            int[]       predicted = tree.Decide(inputs);
            DecisionSet rules     = tree.ToRules();
            string      ruleText  = rules.ToString(codebook, "fertility", System.Globalization.CultureInfo.InvariantCulture);
            var         cm1       = new GeneralConfusionMatrix(classes: 3, expected: outputs, predicted: predicted);
            //int[,] matrix = cm.Matrix;
            double cm  = cm1.Accuracy;
            double cm2 = cm * 100;
            Label1.Text = cm2.ToString();
        }
    }
Ejemplo n.º 10
0
        public void ConsistencyTest1()
        {
            double[,] random = Matrix.Random(1000, 10, 0.0, 1.0);

            double[][] samples = random.ToJagged();
            int[]      outputs = new int[1000];

            for (int i = 0; i < samples.Length; i++)
            {
                if (samples[i][0] > 0.8)
                {
                    outputs[i] = 1;
                }
            }

            DecisionVariable[] vars = new DecisionVariable[10];
            for (int i = 0; i < vars.Length; i++)
            {
                vars[i] = new DecisionVariable(i.ToString(), DecisionVariableKind.Continuous);
            }

            DecisionTree tree = new DecisionTree(vars, 2);

            C45Learning teacher = new C45Learning(tree);

            double error = teacher.Run(samples, outputs);

            Assert.AreEqual(0, error);

            Assert.AreEqual(2, tree.Root.Branches.Count);
            Assert.IsTrue(tree.Root.Branches[0].IsLeaf);
            Assert.IsTrue(tree.Root.Branches[1].IsLeaf);
        }
Ejemplo n.º 11
0
        /// <summary>
        ///   Creates and learns a Decision Tree to recognize the
        ///   previously loaded dataset using the current settings.
        /// </summary>
        ///
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();



            // Creates a matrix from the entire source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values (first two columns)
            double[][] inputs = table.GetColumns(0, 1).ToArray();

            // Get only the output labels (last column)
            int[] outputs = table.GetColumn(2).ToInt32();


            // Specify the input variables
            DecisionVariable[] variables =
            {
                new DecisionVariable("x", DecisionVariableKind.Continuous),
                new DecisionVariable("y", DecisionVariableKind.Continuous),
            };

            // Create the discrete Decision tree
            tree = new DecisionTree(variables, 2);

            // Create the C4.5 learning algorithm
            C45Learning c45 = new C45Learning(tree);

            // Learn the decision tree using C4.5
            double error = c45.Run(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Get the ranges for each variable (X and Y)
            DoubleRange[] ranges = Matrix.Range(table, 0);

            // Generate a Cartesian coordinate system
            double[][] map = Matrix.CartesianProduct(
                Matrix.Interval(ranges[0], 0.05),
                Matrix.Interval(ranges[1], 0.05));

            // Classify each point in the Cartesian coordinate system
            double[] result = map.Apply(tree.Compute).ToDouble();
            double[,] surface = map.ToMatrix().InsertColumn(result);

            CreateScatterplot(zedGraphControl2, surface);

            lbStatus.Text = "Learning finished! Click the other tabs to explore results!";
        }
Ejemplo n.º 12
0
        public static TrainingSet[] GenerateTrainingSets(IEnumerable <KeyValuePair <User, double[]> > studentsAndMarks, string[] normalRecords, string[] anomalies)
        {
            var countOfEntries = normalRecords.Length + anomalies.Length;
            var inputData      = new double[countOfEntries][];
            var outputData     = new int[countOfEntries];
            var counter        = 0;

            foreach (var studentAndMarks in studentsAndMarks)
            {
                if (normalRecords.Contains(studentAndMarks.Key.OpenId))
                {
                    inputData[counter]    = studentAndMarks.Value;
                    outputData[counter++] = 1;
                }

                if (!anomalies.Contains(studentAndMarks.Key.OpenId))
                {
                    continue;
                }

                inputData[counter]    = studentAndMarks.Value;
                outputData[counter++] = 0;
            }

            var countOfFeatures = studentsAndMarks.ElementAt(0).Value.Length;
            var features        = new DecisionVariable[countOfFeatures];

            features[0] = new DecisionVariable("0", DecisionAttributeKind.Continuous, new AForge.DoubleRange(80, 1200));

            for (var i = 1; i < countOfFeatures; i++)
            {
                features[i] = new DecisionVariable(i.ToString(), DecisionAttributeKind.Continuous, new AForge.DoubleRange(0, 10));
            }

            // Create the Decision tree with only 2 result values
            var tree = new DecisionTree(features, 2);

            // Creates a new instance of the C4.5 learning algorithm
            var c45 = new C45Learning(tree);

            // Learn the decision tree
            var error = c45.Run(inputData, outputData);

            // Split all data into normal and anomalies
            var setOfNormalRecords = studentsAndMarks.Where(x => tree.Compute(x.Value) == 1);
            var setOfAnomalies     = studentsAndMarks.Where(x => tree.Compute(x.Value) == 0);

            // Split normal records into 2 groups (one for training set and one for anomaly detection ocurency detection)
            var setOfNormalRecordsList = setOfNormalRecords.ToList();
            var splitCount             = setOfNormalRecordsList.Count * 2 / 3;
            var setOfNormalRecordsTr1  = setOfNormalRecordsList.GetRange(0, splitCount);
            var setOfNormalRecordsTr2  = setOfNormalRecordsList.GetRange(splitCount, setOfNormalRecordsList.Count - splitCount);
            // Create Training Sets
            var trSetNormalFirst  = CreateTrainingSetFromResources(setOfNormalRecordsTr1);
            var trSetNormalSecond = CreateTrainingSetFromResources(setOfNormalRecordsTr2);
            var trSetAnomalies    = CreateTrainingSetFromResources(setOfAnomalies);

            return(new[] { trSetNormalFirst, trSetNormalSecond, trSetAnomalies });
        }
Ejemplo n.º 13
0
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns);


            // Perform classification
            C45Learning c45;

            // Get only the input vector values
            double[][] inputs = sourceMatrix.Submatrix(null, 0, 1).ToArray();

            // Get only the label outputs
            int[] outputs = sourceMatrix.GetColumn(2).ToInt32();


            DecisionVariable[] attributes =
            {
                new DecisionVariable("x", DecisionAttributeKind.Continuous),
                new DecisionVariable("y", DecisionAttributeKind.Continuous),
            };

            // Create the Decision tree
            tree = new DecisionTree(attributes, 2);

            // Creates a new instance of the C4.5 learning algorithm
            c45 = new C45Learning(tree);

            // Learn the decision tree
            double error = c45.Run(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Draw the separating surface
            var ranges = Matrix.Range(sourceMatrix);

            double[][] map = Matrix.CartesianProduct(
                Matrix.Interval(ranges[0], 0.05),
                Matrix.Interval(ranges[1], 0.05));

            var result = map.Apply(tree.Compute).Apply(Math.Sign);

            var graph = map.ToMatrix().InsertColumn(result.ToDouble());

            CreateScatterplot(zedGraphControl2, graph);
        }
Ejemplo n.º 14
0
        private void trainingC45lib()
        {
            Accord.Math.Random.Generator.Seed = 0;
            c45Learning = new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            };
            int size = trainingSets.Count;

            double[][] inputs1  = new double[size][];
            int[]      outputs1 = new int[size];
            int        i        = 0;

            foreach (Patient patient in trainingSets)
            {
                double[] aux = new double[9];
                for (int j = 1; j <= 9; j++)
                {
                    if (j == 1)
                    {
                        aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2;
                    }
                    else
                    {
                        aux[j - 1] = patient.get(j);
                    }
                }
                inputs1[i]  = aux;
                outputs1[i] = patient.get(10);
                i++;
            }

            var crossValidation = CrossValidation.Create(

                k: 5,

                learner: (p) => new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputs1, y: outputs1
                );

            decisionTreeLib = c45Learning.Learn(inputs1, outputs1);
            var result = crossValidation.Learn(inputs1, outputs1);

            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1);

            accuracyC45lib = Math.Round(gcm.Accuracy, 3);
        }
Ejemplo n.º 15
0
        public static void Main(string[] args)
        {
            //getting example data
            Iris iris = new Iris();

            //we are creating training data arrays
            double[][] input  = new double[147][];
            int[]      output = new int[147];

            //we process 'Iris' data and delete 1 from each type for later test purpose
            int j = 0;

            for (int i = 0; i < 147; i++)
            {
                if (i != 0 || i != 50 || i != 100)
                {
                    input[j]  = new double[4];
                    output[j] = iris.ClassLabels[i];
                    for (int k = 0; k < 4; k++)
                    {
                        input[j][k] = iris.Instances[i][k];
                    }
                    j++;
                }
            }

            //learning algorithm for decision tree
            C45Learning teacher = new C45Learning(new[] {
                DecisionVariable.Continuous(iris.VariableNames[0]),
                DecisionVariable.Continuous(iris.VariableNames[1]),
                DecisionVariable.Continuous(iris.VariableNames[2]),
                DecisionVariable.Continuous(iris.VariableNames[3]),
            });

            //model learning
            DecisionTree tree = teacher.Learn(input, output);

            //If we would have some other irises we could just wrote like this
            //DecisionTree tree = teacher.Learn(iris.Instances, iris.ClassLabels);
            //but we prefer to left some for test purpose (to check if our programm is working fine)

            //testing our model
            double[][] test    = { iris.Instances[0], iris.Instances[50], iris.Instances[100] };
            int[]      answers = tree.Decide(test);

            Console.WriteLine("Answer should be as follow:\n0,1,2,\nAnswer is:");

            foreach (int ans in answers)
            {
                Console.Write(ans + ",");
            }

            Console.Write("\nPress any key to continue . . . ");
            Console.ReadKey(true);
        }
Ejemplo n.º 16
0
        public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds)
        {
            return(Task.Factory.StartNew(() =>
            {
                int numFeatures = classificationModel.FeatureVectors.Count;
                DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString())));

                double[][] input = new double[numFeatures][];
                int[] responses = new int[numFeatures];

                for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
                {
                    var featureVector = classificationModel.FeatureVectors[featureIndex];

                    input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                    responses[featureIndex] = featureVector.FeatureClass;
                }

                List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>();

                // Create a new Cross-validation algorithm passing the data set size and the number of folds
                var crossvalidation = new CrossValidation(input.Length, folds);

                crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // Lets now grab the training data:
                    var trainingInputs = input.Get(indicesTrain);
                    var trainingOutputs = responses.Get(indicesTrain);

                    // And now the validation data:
                    var validationInputs = input.Get(indicesValidation);
                    var validationOutputs = responses.Get(indicesValidation);

                    var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length);
                    C45Learning id3Learning = new C45Learning(tree);
                    id3Learning.Learn(trainingInputs, trainingOutputs);

                    var predictedTraining = tree.Decide(trainingInputs);
                    var predictedValidation = tree.Decide(validationInputs);

                    double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining);
                    double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation);

                    GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(Enum.GetValues(typeof(LandcoverTypeViewModel)).Length - 1, validationOutputs, predictedValidation);
                    confusionMatrices.Add(confusionMatrix);

                    // Return a new information structure containing the model and the errors achieved.
                    return new CrossValidationValues(trainingError, validationError);
                };

                var result = crossvalidation.Compute();

                return confusionMatrices;
            }));
        }
Ejemplo n.º 17
0
        public void Classification_Train(double[,] train_docrule, int[] label, string algorithm)
        {
            string classmodelpath;
            int    attrSize     = eclatlitems.Count;
            int    attrSizeTest = eclatlitems.Count;

            // Specify the input variables
            DecisionVariable[] variables = new DecisionVariable[attrSize];
            for (int i = 0; i < attrSize; i++)
            {
                variables[i] = new DecisionVariable((i + 1).ToString(), DecisionVariableKind.Discrete);
            }

            if (algorithm == "Tree")
            {
                classmodelpath = algorithm + ".model";
                //RandomForest tree2 = new RandomForest(2, variables);
                DecisionTree tree    = new DecisionTree(variables, 2);
                C45Learning  teacher = new C45Learning(tree);
                var          model   = teacher.Learn(train_docrule.ToJagged(), label);
                //save model
                teacher.Save(Path.Combine("", classmodelpath));
            }
            if (algorithm == "SVM")
            {
                classmodelpath = algorithm + ".model";
                var learn = new SequentialMinimalOptimization()
                {
                    UseComplexityHeuristic = true,
                    UseKernelEstimation    = false
                };
                SupportVectorMachine teacher = learn.Learn(train_docrule.ToJagged(), label);
                //save model
                teacher.Save(Path.Combine("", classmodelpath));
            }

            if (algorithm == "Logistic")
            {
                classmodelpath = algorithm + ".model";
                var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
                {
                    Tolerance      = 1e-4, // Let's set some convergence parameters
                    Iterations     = 1,    // maximum number of iterations to perform
                    Regularization = 0
                };
                LogisticRegression teacher = learner.Learn(train_docrule.ToJagged(), label);
                teacher.Save(Path.Combine("", classmodelpath));
            }

            if (algorithm == "GA")
            {
                weights_ga_matlab();
            }
        }
Ejemplo n.º 18
0
        /*************************** Primary Methods *******************************/
        public double learnDecisionTreeModel(DataSet trainSet)
        {
            // Convert TrainSet --> TrainDataTable
            this.convertToTrainIntputTable(trainSet);
            // C4.5 Decision Tree Algorithm
            double      learningError;
            C45Learning c45 = new C45Learning(this.descisionTree);

            learningError = c45.Run(this.trainInputArray, this.trainOutputVector);

            return(learningError);
        }
Ejemplo n.º 19
0
        public override Task TrainAsync(ClassificationModel classificationModel)
        {
            int numFeatures = classificationModel.FeatureVectors.Count;

            DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString())));

            double[][] input     = new double[numFeatures][];
            int[]      responses = new int[numFeatures];

            for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
            {
                var featureVector = classificationModel.FeatureVectors[featureIndex];
                input[featureIndex]     = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                responses[featureIndex] = featureVector.FeatureClass;
            }

            if (Boosting)
            {
                return(Task.Factory.StartNew(() =>
                {
                    var classifier = new Boost <Weak <DecisionTree> >();

                    var teacher = new AdaBoostM1 <Weak <DecisionTree> >(classifier)
                    {
                        Creation = (weights) =>
                        {
                            var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length);
                            var c45Learning = new C45Learning(tree);
                            c45Learning.Learn(input, responses, weights);
                            return new Weak <DecisionTree>(tree, (s, x) => s.Decide(x));
                        },

                        Iterations = Iterations,
                        Tolerance = 1e-2
                    };

                    teacher.Run(input, responses);
                    _tree = Either.Right <DecisionTree, Boost <Weak <DecisionTree> > >(classifier);
                }));
            }
            else
            {
                return(Task.Factory.StartNew(() =>
                {
                    var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length);
                    C45Learning id3Learning = new C45Learning(tree);
                    id3Learning.Learn(input, responses);

                    _tree = Either.Left <DecisionTree, Boost <Weak <DecisionTree> > >(tree);
                }));
            }
        }
Ejemplo n.º 20
0
        public static DecisionTree Learn(DataTable data, string[] inputColumns, string outputColumn)
        {
            var codebook = new Codification(data);
            var symbols  = codebook.Apply(data);

            double[][] inputs  = symbols.ToJagged(inputColumns);
            int[]      outputs = symbols.ToArray <int>(outputColumn);

            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var c45        = new C45Learning(attributes);

            return(c45.Learn(inputs, outputs));
        }
Ejemplo n.º 21
0
        private void btnSampleRunAnalysis_Click(object sender, EventArgs e)
        {
            if (!isTrainingDataLoaded)
            {
                MessageBox.Show("Please load your training data first");
                return;
            }

            // Creates a matrix from the entire source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values (in the first two columns)
            double[][] inputs = ConvertDataTableToMatrix(TrainingData.Tables["InterestedTrainingDataValues"]);

            // Get only the output labels (last column)
            int[] outputs = table.GetColumn(2).ToInt32();


            // Creates a new instance of the SMO learning algorithm
            DecisionVariable[] variables =
             {
                new DecisionVariable("x", DecisionVariableKind.Continuous),
                new DecisionVariable("y", DecisionVariableKind.Continuous),
            };


            // Create the C4.5 learning algorithm
            var c45 = new C45Learning(variables);

            // Learn the decision tree using C4.5
            tree = c45.Learn(inputs, outputs);

            // Show the learned tree in the view
            decisionTreeView1.TreeSource = tree;


            // Get the ranges for each variable (X and Y)
            DoubleRange[] ranges = table.GetRange(0);

            // Generate a Cartesian coordinate system
            double[][] map = Matrix.Cartesian(
                Vector.Interval(ranges[0], 0.05),
                Vector.Interval(ranges[1], 0.05));

            // Classify each point in the Cartesian coordinate system
            double[] result = map.Apply(tree.Compute).ToDouble();
            double[,] surface = map.ToMatrix().InsertColumn(result);
            
            MessageBox.Show("Training Complete");

        }
Ejemplo n.º 22
0
        public void AttributeReuseTest1()
        {
            string[][] text = Resources.iris_data.Split(
                new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries)
                              .Apply(x => x.Split(','));

            double[][] inputs = new double[text.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture));
            }

            string[] labels = text.GetColumn(4);

            Codification codebook = new Codification("Label", labels);

            int[] outputs = codebook.Translate("Label", labels);


            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),
                new DecisionVariable("sepal width",  DecisionVariableKind.Continuous),
                new DecisionVariable("petal length", DecisionVariableKind.Continuous),
                new DecisionVariable("petal width",  DecisionVariableKind.Continuous),
            };


            DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols);

            C45Learning teacher = new C45Learning(tree);

            teacher.Join = 3;

            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0.02, error, 1e-10);

            DecisionSet rules = tree.ToRules();

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.02, newError, 1e-10);

            string ruleText = rules.ToString(codebook,
                                             System.Globalization.CultureInfo.InvariantCulture);

            // TODO: implement this assertion properly, actually checking
            // the text contents once the feature is completely finished.
            Assert.AreEqual(600, ruleText.Length);
        }
Ejemplo n.º 23
0
        public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);

            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                                             System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);

            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
Ejemplo n.º 24
0
        public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs)
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", typeof(string));
            data.Columns.Add("Outlook", typeof(string));
            data.Columns.Add("Temperature", typeof(double));
            data.Columns.Add("Humidity", typeof(double));
            data.Columns.Add("Wind", typeof(string));
            data.Columns.Add("PlayTennis", typeof(string));

            data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", codebook["Outlook"].Symbols),         // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values
                new DecisionVariable("Humidity", DecisionVariableKind.Continuous),    // continuous values
                new DecisionVariable("Wind", codebook["Wind"].Symbols)                // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)

            tree = new DecisionTree(attributes, classCount);
            C45Learning c45 = new C45Learning(tree);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            outputs = symbols.ToArray <int>("PlayTennis");

            double error = c45.Run(inputs, outputs);
        }
        /// <summary>
        /// Train the classifier with some data, using parameters.
        /// </summary>
        /// <param name="trainingData">Data used to train the classifier.</param>
        /// <param name="maxJoin">How many times a variable can join
        /// the decision process.</param>
        /// <param name="maxHeight">Maximum height when learning the tree.</param>
        /// <returns>Classifier prediction error.</returns>
        public double TrainClassifierWithParameters(
            ClassificationData trainingData,
            int maxJoin   = 0,
            int maxHeight = 0)
        {
            double classifierError = 0;
            List <DecisionVariable> decisionVariables = new List <DecisionVariable>();

            if (DecisionVariableNames != null)
            {
                for (int n = 0; n < trainingData.InputAttributeNumber; ++n)
                {
                    decisionVariables.Add(
                        new DecisionVariable(DecisionVariableNames[n], DecisionVariableKind.Continuous)
                        );
                }
            }
            // Generate automatic names for the variables if no names are provided.
            else
            {
                for (int n = 0; n < trainingData.InputAttributeNumber; ++n)
                {
                    decisionVariables.Add(
                        new DecisionVariable("variable_" + (n + 1).ToString(),
                                             DecisionVariableKind.Continuous));
                }
            }

            // Create a new Decision Tree classifier.
            ClassificationDecisionTree = new DecisionTree(decisionVariables, trainingData.OutputPossibleValues);

            // Create a new instance of the C45 algorithm to be learned by the tree.
            C45LearningTree = new C45Learning(ClassificationDecisionTree);

            // Change some classifier's parameters if valid new
            // values are provided.
            if (maxJoin > 0)
            {
                C45LearningTree.Join = maxJoin;
            }
            if (maxHeight > 0)
            {
                C45LearningTree.MaxHeight = maxHeight;
            }

            // Use data to train the tree.
            classifierError = C45LearningTree.Run(trainingData.InputData, trainingData.OutputData);

            return(classifierError);
        }
        public void Learn()
        {
            var inputs  = GetLearnInputs();
            var outputs = GetOutputs();
            var teacher = new C45Learning {
                Join = 0
            };

            foreach (var controllerOutputProperty in GetControllerOutputProperties())
            {
                teacher.Attributes.Add(DecisionVariable.Continuous(controllerOutputProperty));
            }

            DecisionTree = teacher.Learn(inputs, outputs);
        }
    protected void btnshow_Click(object sender, EventArgs e)
    {
        quality.Visible = true;
        DataTable _dataTable = new DataTable();

        Panel1.Visible = true;
        n          = Convert.ToDouble(txtN.Text);
        p          = Convert.ToDouble(txtP.Text);
        k          = Convert.ToDouble(txtK.Text);
        ph         = Convert.ToDouble(txtPh.Text);
        ec         = Convert.ToDouble(txtec.Text);
        _dataTable = f1.getrecord1("select * from dataset");

        double[][] inputs = _dataTable.ToJagged <double>("n", "p", "k", "ph", "ec");
        string[]   labels = new string[_dataTable.Rows.Count];
        for (int i = 0; i < _dataTable.Rows.Count; i++)
        {
            labels[i] = _dataTable.Rows[i]["fertility"].ToString();
        }
        var codebook = new Codification("fertility", labels);

        // With the codebook, we can convert the labels:
        int[]       outputs = codebook.Translate("fertility", labels);
        C45Learning teacher = new C45Learning();
        var         tree    = teacher.Learn(inputs, outputs);

        double[][] input_test =
        {
            new double[] { n, p, k, ph, ec },
        };



        int[] val = tree.Decide(input_test);

        if (val[0] == 0)
        {
            quality.Text = "Low";
        }
        else if (val[0] == 1)
        {
            quality.Text = "Medium";
        }
        else
        {
            quality.Text = "High";
        }
    }
Ejemplo n.º 28
0
    // Decision Tree
    public DecisionTree DecisionThrust(double[][] inputs, int[] outputs)
    {
        C45Learning teacher = new C45Learning(new[] {
            DecisionVariable.Continuous("X"),
            DecisionVariable.Continuous("Y"),
            DecisionVariable.Continuous("Z"),
            DecisionVariable.Continuous("W")
        });

        teacher.ParallelOptions.MaxDegreeOfParallelism = 1;

        // Use the learning algorithm to induce the tree
        DecisionTree tree = teacher.Learn(inputs, outputs);

        return(tree);
    }
Ejemplo n.º 29
0
        private static DecisionTree createTree(out double[][] inputs, out int[] outputs)
        {
            string nurseryData = Resources.nursery;

            string[] inputColumns =
            {
                "parents", "has_nurs", "form",   "children",
                "housing", "finance",  "social", "health"
            };


            string outputColumn = "output";


            DataTable table = new DataTable("Nursery");

            table.Columns.Add(inputColumns);
            table.Columns.Add(outputColumn);

            string[] lines = nurseryData.Split(
                new[] { Environment.NewLine }, StringSplitOptions.None);

            foreach (var line in lines)
            {
                table.Rows.Add(line.Split(','));
            }


            Codification codebook = new Codification(table);


            DataTable symbols = codebook.Apply(table);

            inputs  = symbols.ToArray(inputColumns);
            outputs = symbols.ToArray <int>(outputColumn);


            var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
            var tree       = new DecisionTree(attributes, classes: 5);


            C45Learning c45 = new C45Learning(tree);

            c45.Run(inputs, outputs);

            return(tree);
        }
Ejemplo n.º 30
0
        public void  Train(List <TrainingValue> trainingData)
        {
            List <DecisionVariable> trainingVariables = new List <DecisionVariable>();

            for (int i = 0; i < featureSize; i++)
            {
                trainingVariables.Add(DecisionVariable.Continuous(i.ToString()));
            }

            tree = new DecisionTree(inputs: trainingVariables, classes: 2);


            double[][] featuresArray = new double[trainingData.Count][];
            int[]      labels        = new int[trainingData.Count];

            for (int i = 0; i < featuresArray.Length; i++)
            {
                featuresArray[i] = trainingData[i].Features;
                labels[i]        = Convert.ToInt32(trainingData[i].State);
            }

            switch (type)
            {
            case ClassifierType.DecisionTree:
                C45Learning teacher = new C45Learning(tree);
                teacher.Learn(featuresArray, labels);
                break;

            case ClassifierType.LDA:
                LinearDiscriminantAnalysis lda = new LinearDiscriminantAnalysis();
                pipeline = lda.Learn(featuresArray, labels);
                break;

            case ClassifierType.SVM:
                LinearCoordinateDescent svmLearner = new LinearCoordinateDescent();
                svm = svmLearner.Learn(featuresArray, labels);
                break;

            case ClassifierType.Bayes:
                NaiveBayesLearning <NormalDistribution> learner = new NaiveBayesLearning <NormalDistribution>();
                bayes = learner.Learn(featuresArray, labels);
                break;
            }

            Trained = true;
        }