public void RandomForestBuild(List <train> datalist) { int length = datalist.Count; int d = datalist[0].d; forest = new List <DecisionTree>(); int n = datalist.Count; int k = d; //(int)Math.Sqrt(d); int m = 100; for (int i = 0; i < m; ++i) { double[][] inputs; int[] outputs; int[] indexs; GetData(out inputs, out outputs, datalist, n, k, out indexs); DecisionVariable[] variables = new DecisionVariable[k]; for (int j = 0; j < k; ++j) { variables[j] = new DecisionVariable("attribute" + (indexs[j] + 1), DecisionVariableKind.Continuous); } // Create the C4.5 learning algorithm var c45 = new C45Learning(variables); // Learn the decision tree using C4.5 DecisionTree dtmp = c45.Learn(inputs, outputs); forest.Add(dtmp); } log("The random forest model has been trained"); }
private double run(double[][] inputs, int[] output) { int rows = inputs.Length; int cols = inputs[0].Length; int colsPerTree = 0; if (CoverageRatio == 0) { colsPerTree = (int)(System.Math.Sqrt(cols)); } else { colsPerTree = (int)(cols * CoverageRatio); } var trees = forest.Trees; Parallel.For(0, trees.Length, ParallelOptions, i => { int[] idx = Vector.Sample(SampleRatio, output.Length); var x = inputs.Get(idx); var y = output.Get(idx); var c45 = new C45Learning(forest.Trees[i]) { MaxVariables = colsPerTree, Join = 100 }; c45.Learn(x, y); }); return(0); }
public void Learn() { var stopWatch = new Stopwatch(); stopWatch.Start(); var variables = new List <DecisionVariable>(); foreach (var vector in LearningData.TrainingData.First().ToVectorArray(Metadata, PropertiesToSkip)) { variables.Add(new DecisionVariable(variables.Count.ToString(), new DoubleRange(-1, 1))); } Tree = new DecisionTree(variables, 2); var learner = new C45Learning(Tree); learner.Learn(LearningData.TrainingData.Select(data => data.ToVectorArray(Metadata, PropertiesToSkip)).ToArray(), LearningData.TrainingData.Select(data => data.PercentMatch > 0 ? 1 : 0).ToArray()); var matcher = new LoggingDecisionTreeMatcher(LearningData.TrainingData); matcher.LogMatchCount($"{Name} TrainingData", Tree, Metadata, PropertiesToSkip); matcher = new LoggingDecisionTreeMatcher(LearningData.TestData); matcher.LogMatchCount($"{Name} TestData", Tree, Metadata, PropertiesToSkip); stopWatch.Stop(); Logger.InfoFormat("DecisionTreeLearning took {0}", stopWatch.Elapsed); }
static void Main(string[] args) { DataTable table = new Accord.IO.CsvReader("C:\\Users\\michael\\Downloads\\JulyToOct2015Test.csv", true).ToTable(); // Convert the DataTable to input and output vectors double[][] inputs = table.ToJagged <double>("BookToPrice", "DividendYield", "DebtToEquity", "MarketBeta", "SectorID"); int[] outputs = table.Columns["MonthlyReturn"].ToArray <int>(); //SecurityID BookToPrice DividendYield EarningsYield SalesGrowth AssetsToEquity MarketCap MarketBeta DebtToEquity 1YrVol 5YrVol 3YrVol ExposureToCurrencyGain SectorID countryID DecisionTree tree = new DecisionTree( inputs: new List <DecisionVariable> { DecisionVariable.Continuous("BookToPrice"), DecisionVariable.Continuous("DividendYield"), DecisionVariable.Continuous("DebtToEquity"), DecisionVariable.Continuous("MarketBeta"), DecisionVariable.Discrete("SectorID", 11) }, classes: 2); C45Learning teacher = new C45Learning(tree); teacher.Learn(inputs, outputs); int[] answers = tree.Decide(inputs); // Plot the results // ScatterplotBox.Show("Expected results", inputs, outputs); //ScatterplotBox.Show("Ans", inputs, answers) // .Hold(); }
private static void decisionTree(double[][] inputs, int[] outputs) { // In our problem, we have 2 classes (samples can be either // positive or negative), and 2 continuous-valued inputs. DecisionTree tree = new DecisionTree(attributes: new[] { DecisionVariable.Continuous("X"), DecisionVariable.Continuous("Y") }, outputClasses: 2); C45Learning teacher = new C45Learning(tree); // The C4.5 algorithm expects the class labels to // range from 0 to k, so we convert -1 to be zero: // outputs = outputs.Apply(x => x < 0 ? 0 : x); double error = teacher.Run(inputs, outputs); // Classify the samples using the model int[] answers = inputs.Apply(tree.Compute); // Plot the results ScatterplotBox.Show("Expected results", inputs, outputs); ScatterplotBox.Show("Decision Tree results", inputs, answers) .Hold(); }
public DecisionTree GenerateDecisionTree(int inputsCount, ref double[][] inputs, ref int[] outputs, int outputClassNum, List <string> listVariablesName, int neiWindowSize, int landuseTypesCount) { DecisionVariable[] variable = new DecisionVariable[inputsCount]; for (int i = 0; i < inputsCount - 2; i++) { DecisionVariable v = new DecisionVariable(listVariablesName[i], DecisionVariableKind.Continuous); variable[i] = v; } ; DecisionVariable dv = new DecisionVariable(listVariablesName[inputsCount - 2], neiWindowSize * neiWindowSize + 1); variable[inputsCount - 2] = dv; DecisionVariable dv2 = new DecisionVariable(listVariablesName[inputsCount - 1], landuseTypesCount); variable[inputsCount - 1] = dv2; DecisionTree tree = new DecisionTree(variable, outputClassNum); C45Learning c45 = new C45Learning(tree); //double error = c45.Run(inputs, outputs); tree = c45.Learn(inputs, outputs); return(tree); }
public void ArgumentCheck1() { double[][] samples = { new [] { 0, 2, 4.0 }, new [] { 1, 5, 2.0 }, null, new [] { 1, 5, 6.0 }, }; int[] outputs = { 1, 1, 0, 0 }; DecisionVariable[] vars = new DecisionVariable[3]; for (int i = 0; i < vars.Length; i++) { vars[i] = DecisionVariable.Continuous(i.ToString()); } DecisionTree tree = new DecisionTree(vars, 2); var teacher = new C45Learning(tree); bool thrown = false; try { double error = teacher.Run(samples, outputs); } catch (ArgumentNullException) { thrown = true; } Assert.IsTrue(thrown); }
public void LargeSampleTest2() { Accord.Math.Tools.SetupGenerator(0); double[][] dataSamples = Matrix.Random(500, 3, 0.0, 10.0).ToJagged(); int[] target = Matrix.Random(500, 1, 0.0, 2.0).ToInt32().GetColumn(0); DecisionVariable[] features = { new DecisionVariable("Outlook", DecisionVariableKind.Continuous), new DecisionVariable("Temperature", DecisionVariableKind.Continuous), new DecisionVariable("Humidity", DecisionVariableKind.Continuous), }; DecisionTree tree = new DecisionTree(features, 2); C45Learning teacher = new C45Learning(tree); double error = teacher.Run(dataSamples, target); foreach (var node in tree) { if (node.IsLeaf) { Assert.IsNotNull(node.Output); } } Assert.IsTrue(error < 0.50); }
protected void btnchangepassword0_Click(object sender, EventArgs e) { DataTable data = new DataTable(); data = f1.getrecord1("select * from dataset"); if (data.Rows.Count > 0) { double[][] inputs = data.ToJagged <double>("n", "p", "k", "ph", "ec"); string[] labels = new string[data.Rows.Count]; for (int i = 0; i < data.Rows.Count; i++) { labels[i] = data.Rows[i]["fertility"].ToString(); } var codebook = new Codification("fertility", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("fertility", labels); C45Learning teacher = new C45Learning(); var tree = teacher.Learn(inputs, outputs); int[] predicted = tree.Decide(inputs); DecisionSet rules = tree.ToRules(); string ruleText = rules.ToString(codebook, "fertility", System.Globalization.CultureInfo.InvariantCulture); var cm1 = new GeneralConfusionMatrix(classes: 3, expected: outputs, predicted: predicted); //int[,] matrix = cm.Matrix; double cm = cm1.Accuracy; double cm2 = cm * 100; Label1.Text = cm2.ToString(); } }
public void ConsistencyTest1() { double[,] random = Matrix.Random(1000, 10, 0.0, 1.0); double[][] samples = random.ToJagged(); int[] outputs = new int[1000]; for (int i = 0; i < samples.Length; i++) { if (samples[i][0] > 0.8) { outputs[i] = 1; } } DecisionVariable[] vars = new DecisionVariable[10]; for (int i = 0; i < vars.Length; i++) { vars[i] = new DecisionVariable(i.ToString(), DecisionVariableKind.Continuous); } DecisionTree tree = new DecisionTree(vars, 2); C45Learning teacher = new C45Learning(tree); double error = teacher.Run(samples, outputs); Assert.AreEqual(0, error); Assert.AreEqual(2, tree.Root.Branches.Count); Assert.IsTrue(tree.Root.Branches[0].IsLeaf); Assert.IsTrue(tree.Root.Branches[1].IsLeaf); }
/// <summary> /// Creates and learns a Decision Tree to recognize the /// previously loaded dataset using the current settings. /// </summary> /// private void btnCreate_Click(object sender, EventArgs e) { if (dgvLearningSource.DataSource == null) { MessageBox.Show("Please load some data first."); return; } // Finishes and save any pending changes to the given data dgvLearningSource.EndEdit(); // Creates a matrix from the entire source data table double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames); // Get only the input vector values (first two columns) double[][] inputs = table.GetColumns(0, 1).ToArray(); // Get only the output labels (last column) int[] outputs = table.GetColumn(2).ToInt32(); // Specify the input variables DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous), new DecisionVariable("y", DecisionVariableKind.Continuous), }; // Create the discrete Decision tree tree = new DecisionTree(variables, 2); // Create the C4.5 learning algorithm C45Learning c45 = new C45Learning(tree); // Learn the decision tree using C4.5 double error = c45.Run(inputs, outputs); // Show the learned tree in the view decisionTreeView1.TreeSource = tree; // Get the ranges for each variable (X and Y) DoubleRange[] ranges = Matrix.Range(table, 0); // Generate a Cartesian coordinate system double[][] map = Matrix.CartesianProduct( Matrix.Interval(ranges[0], 0.05), Matrix.Interval(ranges[1], 0.05)); // Classify each point in the Cartesian coordinate system double[] result = map.Apply(tree.Compute).ToDouble(); double[,] surface = map.ToMatrix().InsertColumn(result); CreateScatterplot(zedGraphControl2, surface); lbStatus.Text = "Learning finished! Click the other tabs to explore results!"; }
public static TrainingSet[] GenerateTrainingSets(IEnumerable <KeyValuePair <User, double[]> > studentsAndMarks, string[] normalRecords, string[] anomalies) { var countOfEntries = normalRecords.Length + anomalies.Length; var inputData = new double[countOfEntries][]; var outputData = new int[countOfEntries]; var counter = 0; foreach (var studentAndMarks in studentsAndMarks) { if (normalRecords.Contains(studentAndMarks.Key.OpenId)) { inputData[counter] = studentAndMarks.Value; outputData[counter++] = 1; } if (!anomalies.Contains(studentAndMarks.Key.OpenId)) { continue; } inputData[counter] = studentAndMarks.Value; outputData[counter++] = 0; } var countOfFeatures = studentsAndMarks.ElementAt(0).Value.Length; var features = new DecisionVariable[countOfFeatures]; features[0] = new DecisionVariable("0", DecisionAttributeKind.Continuous, new AForge.DoubleRange(80, 1200)); for (var i = 1; i < countOfFeatures; i++) { features[i] = new DecisionVariable(i.ToString(), DecisionAttributeKind.Continuous, new AForge.DoubleRange(0, 10)); } // Create the Decision tree with only 2 result values var tree = new DecisionTree(features, 2); // Creates a new instance of the C4.5 learning algorithm var c45 = new C45Learning(tree); // Learn the decision tree var error = c45.Run(inputData, outputData); // Split all data into normal and anomalies var setOfNormalRecords = studentsAndMarks.Where(x => tree.Compute(x.Value) == 1); var setOfAnomalies = studentsAndMarks.Where(x => tree.Compute(x.Value) == 0); // Split normal records into 2 groups (one for training set and one for anomaly detection ocurency detection) var setOfNormalRecordsList = setOfNormalRecords.ToList(); var splitCount = setOfNormalRecordsList.Count * 2 / 3; var setOfNormalRecordsTr1 = setOfNormalRecordsList.GetRange(0, splitCount); var setOfNormalRecordsTr2 = setOfNormalRecordsList.GetRange(splitCount, setOfNormalRecordsList.Count - splitCount); // Create Training Sets var trSetNormalFirst = CreateTrainingSetFromResources(setOfNormalRecordsTr1); var trSetNormalSecond = CreateTrainingSetFromResources(setOfNormalRecordsTr2); var trSetAnomalies = CreateTrainingSetFromResources(setOfAnomalies); return(new[] { trSetNormalFirst, trSetNormalSecond, trSetAnomalies }); }
private void btnCreate_Click(object sender, EventArgs e) { if (dgvLearningSource.DataSource == null) { MessageBox.Show("Please load some data first."); return; } // Finishes and save any pending changes to the given data dgvLearningSource.EndEdit(); // Creates a matrix from the source data table double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns); // Perform classification C45Learning c45; // Get only the input vector values double[][] inputs = sourceMatrix.Submatrix(null, 0, 1).ToArray(); // Get only the label outputs int[] outputs = sourceMatrix.GetColumn(2).ToInt32(); DecisionVariable[] attributes = { new DecisionVariable("x", DecisionAttributeKind.Continuous), new DecisionVariable("y", DecisionAttributeKind.Continuous), }; // Create the Decision tree tree = new DecisionTree(attributes, 2); // Creates a new instance of the C4.5 learning algorithm c45 = new C45Learning(tree); // Learn the decision tree double error = c45.Run(inputs, outputs); // Show the learned tree in the view decisionTreeView1.TreeSource = tree; // Draw the separating surface var ranges = Matrix.Range(sourceMatrix); double[][] map = Matrix.CartesianProduct( Matrix.Interval(ranges[0], 0.05), Matrix.Interval(ranges[1], 0.05)); var result = map.Apply(tree.Compute).Apply(Math.Sign); var graph = map.ToMatrix().InsertColumn(result.ToDouble()); CreateScatterplot(zedGraphControl2, graph); }
private void trainingC45lib() { Accord.Math.Random.Generator.Seed = 0; c45Learning = new C45Learning() { Join = 2, MaxHeight = 5 }; int size = trainingSets.Count; double[][] inputs1 = new double[size][]; int[] outputs1 = new int[size]; int i = 0; foreach (Patient patient in trainingSets) { double[] aux = new double[9]; for (int j = 1; j <= 9; j++) { if (j == 1) { aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2; } else { aux[j - 1] = patient.get(j); } } inputs1[i] = aux; outputs1[i] = patient.get(10); i++; } var crossValidation = CrossValidation.Create( k: 5, learner: (p) => new C45Learning() { Join = 2, MaxHeight = 5 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs1, y: outputs1 ); decisionTreeLib = c45Learning.Learn(inputs1, outputs1); var result = crossValidation.Learn(inputs1, outputs1); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1); accuracyC45lib = Math.Round(gcm.Accuracy, 3); }
public static void Main(string[] args) { //getting example data Iris iris = new Iris(); //we are creating training data arrays double[][] input = new double[147][]; int[] output = new int[147]; //we process 'Iris' data and delete 1 from each type for later test purpose int j = 0; for (int i = 0; i < 147; i++) { if (i != 0 || i != 50 || i != 100) { input[j] = new double[4]; output[j] = iris.ClassLabels[i]; for (int k = 0; k < 4; k++) { input[j][k] = iris.Instances[i][k]; } j++; } } //learning algorithm for decision tree C45Learning teacher = new C45Learning(new[] { DecisionVariable.Continuous(iris.VariableNames[0]), DecisionVariable.Continuous(iris.VariableNames[1]), DecisionVariable.Continuous(iris.VariableNames[2]), DecisionVariable.Continuous(iris.VariableNames[3]), }); //model learning DecisionTree tree = teacher.Learn(input, output); //If we would have some other irises we could just wrote like this //DecisionTree tree = teacher.Learn(iris.Instances, iris.ClassLabels); //but we prefer to left some for test purpose (to check if our programm is working fine) //testing our model double[][] test = { iris.Instances[0], iris.Instances[50], iris.Instances[100] }; int[] answers = tree.Decide(test); Console.WriteLine("Answer should be as follow:\n0,1,2,\nAnswer is:"); foreach (int ans in answers) { Console.Write(ans + ","); } Console.Write("\nPress any key to continue . . . "); Console.ReadKey(true); }
public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds) { return(Task.Factory.StartNew(() => { int numFeatures = classificationModel.FeatureVectors.Count; DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString()))); double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>(); // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation(input.Length, folds); crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // Lets now grab the training data: var trainingInputs = input.Get(indicesTrain); var trainingOutputs = responses.Get(indicesTrain); // And now the validation data: var validationInputs = input.Get(indicesValidation); var validationOutputs = responses.Get(indicesValidation); var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length); C45Learning id3Learning = new C45Learning(tree); id3Learning.Learn(trainingInputs, trainingOutputs); var predictedTraining = tree.Decide(trainingInputs); var predictedValidation = tree.Decide(validationInputs); double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining); double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation); GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(Enum.GetValues(typeof(LandcoverTypeViewModel)).Length - 1, validationOutputs, predictedValidation); confusionMatrices.Add(confusionMatrix); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues(trainingError, validationError); }; var result = crossvalidation.Compute(); return confusionMatrices; })); }
public void Classification_Train(double[,] train_docrule, int[] label, string algorithm) { string classmodelpath; int attrSize = eclatlitems.Count; int attrSizeTest = eclatlitems.Count; // Specify the input variables DecisionVariable[] variables = new DecisionVariable[attrSize]; for (int i = 0; i < attrSize; i++) { variables[i] = new DecisionVariable((i + 1).ToString(), DecisionVariableKind.Discrete); } if (algorithm == "Tree") { classmodelpath = algorithm + ".model"; //RandomForest tree2 = new RandomForest(2, variables); DecisionTree tree = new DecisionTree(variables, 2); C45Learning teacher = new C45Learning(tree); var model = teacher.Learn(train_docrule.ToJagged(), label); //save model teacher.Save(Path.Combine("", classmodelpath)); } if (algorithm == "SVM") { classmodelpath = algorithm + ".model"; var learn = new SequentialMinimalOptimization() { UseComplexityHeuristic = true, UseKernelEstimation = false }; SupportVectorMachine teacher = learn.Learn(train_docrule.ToJagged(), label); //save model teacher.Save(Path.Combine("", classmodelpath)); } if (algorithm == "Logistic") { classmodelpath = algorithm + ".model"; var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 1, // maximum number of iterations to perform Regularization = 0 }; LogisticRegression teacher = learner.Learn(train_docrule.ToJagged(), label); teacher.Save(Path.Combine("", classmodelpath)); } if (algorithm == "GA") { weights_ga_matlab(); } }
/*************************** Primary Methods *******************************/ public double learnDecisionTreeModel(DataSet trainSet) { // Convert TrainSet --> TrainDataTable this.convertToTrainIntputTable(trainSet); // C4.5 Decision Tree Algorithm double learningError; C45Learning c45 = new C45Learning(this.descisionTree); learningError = c45.Run(this.trainInputArray, this.trainOutputVector); return(learningError); }
public override Task TrainAsync(ClassificationModel classificationModel) { int numFeatures = classificationModel.FeatureVectors.Count; DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString()))); double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } if (Boosting) { return(Task.Factory.StartNew(() => { var classifier = new Boost <Weak <DecisionTree> >(); var teacher = new AdaBoostM1 <Weak <DecisionTree> >(classifier) { Creation = (weights) => { var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length); var c45Learning = new C45Learning(tree); c45Learning.Learn(input, responses, weights); return new Weak <DecisionTree>(tree, (s, x) => s.Decide(x)); }, Iterations = Iterations, Tolerance = 1e-2 }; teacher.Run(input, responses); _tree = Either.Right <DecisionTree, Boost <Weak <DecisionTree> > >(classifier); })); } else { return(Task.Factory.StartNew(() => { var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length); C45Learning id3Learning = new C45Learning(tree); id3Learning.Learn(input, responses); _tree = Either.Left <DecisionTree, Boost <Weak <DecisionTree> > >(tree); })); } }
public static DecisionTree Learn(DataTable data, string[] inputColumns, string outputColumn) { var codebook = new Codification(data); var symbols = codebook.Apply(data); double[][] inputs = symbols.ToJagged(inputColumns); int[] outputs = symbols.ToArray <int>(outputColumn); var attributes = DecisionVariable.FromCodebook(codebook, inputColumns); var c45 = new C45Learning(attributes); return(c45.Learn(inputs, outputs)); }
private void btnSampleRunAnalysis_Click(object sender, EventArgs e) { if (!isTrainingDataLoaded) { MessageBox.Show("Please load your training data first"); return; } // Creates a matrix from the entire source data table double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames); // Get only the input vector values (in the first two columns) double[][] inputs = ConvertDataTableToMatrix(TrainingData.Tables["InterestedTrainingDataValues"]); // Get only the output labels (last column) int[] outputs = table.GetColumn(2).ToInt32(); // Creates a new instance of the SMO learning algorithm DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous), new DecisionVariable("y", DecisionVariableKind.Continuous), }; // Create the C4.5 learning algorithm var c45 = new C45Learning(variables); // Learn the decision tree using C4.5 tree = c45.Learn(inputs, outputs); // Show the learned tree in the view decisionTreeView1.TreeSource = tree; // Get the ranges for each variable (X and Y) DoubleRange[] ranges = table.GetRange(0); // Generate a Cartesian coordinate system double[][] map = Matrix.Cartesian( Vector.Interval(ranges[0], 0.05), Vector.Interval(ranges[1], 0.05)); // Classify each point in the Cartesian coordinate system double[] result = map.Apply(tree.Compute).ToDouble(); double[,] surface = map.ToMatrix().InsertColumn(result); MessageBox.Show("Training Complete"); }
public void AttributeReuseTest1() { string[][] text = Resources.iris_data.Split( new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries) .Apply(x => x.Split(',')); double[][] inputs = new double[text.Length][]; for (int i = 0; i < inputs.Length; i++) { inputs[i] = text[i].First(4).Convert(s => Double.Parse(s, System.Globalization.CultureInfo.InvariantCulture)); } string[] labels = text.GetColumn(4); Codification codebook = new Codification("Label", labels); int[] outputs = codebook.Translate("Label", labels); DecisionVariable[] features = { new DecisionVariable("sepal length", DecisionVariableKind.Continuous), new DecisionVariable("sepal width", DecisionVariableKind.Continuous), new DecisionVariable("petal length", DecisionVariableKind.Continuous), new DecisionVariable("petal width", DecisionVariableKind.Continuous), }; DecisionTree tree = new DecisionTree(features, codebook.Columns[0].Symbols); C45Learning teacher = new C45Learning(tree); teacher.Join = 3; double error = teacher.Run(inputs, outputs); Assert.AreEqual(0.02, error, 1e-10); DecisionSet rules = tree.ToRules(); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.02, newError, 1e-10); string ruleText = rules.ToString(codebook, System.Globalization.CultureInfo.InvariantCulture); // TODO: implement this assertion properly, actually checking // the text contents once the feature is completely finished. Assert.AreEqual(600, ruleText.Length); }
public void new_method_create_tree() { string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>(); string[] labels = text.GetColumn(4); var codebook = new Codification("Output", labels); int[] outputs = codebook.Translate("Output", labels); // And we can use the C4.5 for learning: var teacher = new C45Learning(); // And finally induce the tree: var tree = teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (2 <= 2.45) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85) Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05) Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05) "; Assert.AreEqual(0.026666666666666668, error, 1e-10); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }
public static void CreateMitchellExample(out DecisionTree tree, out double[][] inputs, out int[] outputs) { DataTable data = new DataTable("Mitchell's Tennis Example"); data.Columns.Add("Day", typeof(string)); data.Columns.Add("Outlook", typeof(string)); data.Columns.Add("Temperature", typeof(double)); data.Columns.Add("Humidity", typeof(double)); data.Columns.Add("Wind", typeof(string)); data.Columns.Add("PlayTennis", typeof(string)); data.Rows.Add("D1", "Sunny", 85, 85, "Weak", "No"); data.Rows.Add("D2", "Sunny", 80, 90, "Strong", "No"); data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes"); data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes"); data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes"); data.Rows.Add("D6", "Rain", 65, 70, "Strong", "No"); data.Rows.Add("D7", "Overcast", 64, 65, "Strong", "Yes"); data.Rows.Add("D8", "Sunny", 72, 95, "Weak", "No"); data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes"); data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes"); data.Rows.Add("D11", "Sunny", 75, 70, "Strong", "Yes"); data.Rows.Add("D12", "Overcast", 72, 90, "Strong", "Yes"); data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes"); data.Rows.Add("D14", "Rain", 71, 80, "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data); DecisionVariable[] attributes = { new DecisionVariable("Outlook", codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values new DecisionVariable("Humidity", DecisionVariableKind.Continuous), // continuous values new DecisionVariable("Wind", codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) tree = new DecisionTree(attributes, classCount); C45Learning c45 = new C45Learning(tree); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind"); outputs = symbols.ToArray <int>("PlayTennis"); double error = c45.Run(inputs, outputs); }
/// <summary> /// Train the classifier with some data, using parameters. /// </summary> /// <param name="trainingData">Data used to train the classifier.</param> /// <param name="maxJoin">How many times a variable can join /// the decision process.</param> /// <param name="maxHeight">Maximum height when learning the tree.</param> /// <returns>Classifier prediction error.</returns> public double TrainClassifierWithParameters( ClassificationData trainingData, int maxJoin = 0, int maxHeight = 0) { double classifierError = 0; List <DecisionVariable> decisionVariables = new List <DecisionVariable>(); if (DecisionVariableNames != null) { for (int n = 0; n < trainingData.InputAttributeNumber; ++n) { decisionVariables.Add( new DecisionVariable(DecisionVariableNames[n], DecisionVariableKind.Continuous) ); } } // Generate automatic names for the variables if no names are provided. else { for (int n = 0; n < trainingData.InputAttributeNumber; ++n) { decisionVariables.Add( new DecisionVariable("variable_" + (n + 1).ToString(), DecisionVariableKind.Continuous)); } } // Create a new Decision Tree classifier. ClassificationDecisionTree = new DecisionTree(decisionVariables, trainingData.OutputPossibleValues); // Create a new instance of the C45 algorithm to be learned by the tree. C45LearningTree = new C45Learning(ClassificationDecisionTree); // Change some classifier's parameters if valid new // values are provided. if (maxJoin > 0) { C45LearningTree.Join = maxJoin; } if (maxHeight > 0) { C45LearningTree.MaxHeight = maxHeight; } // Use data to train the tree. classifierError = C45LearningTree.Run(trainingData.InputData, trainingData.OutputData); return(classifierError); }
public void Learn() { var inputs = GetLearnInputs(); var outputs = GetOutputs(); var teacher = new C45Learning { Join = 0 }; foreach (var controllerOutputProperty in GetControllerOutputProperties()) { teacher.Attributes.Add(DecisionVariable.Continuous(controllerOutputProperty)); } DecisionTree = teacher.Learn(inputs, outputs); }
protected void btnshow_Click(object sender, EventArgs e) { quality.Visible = true; DataTable _dataTable = new DataTable(); Panel1.Visible = true; n = Convert.ToDouble(txtN.Text); p = Convert.ToDouble(txtP.Text); k = Convert.ToDouble(txtK.Text); ph = Convert.ToDouble(txtPh.Text); ec = Convert.ToDouble(txtec.Text); _dataTable = f1.getrecord1("select * from dataset"); double[][] inputs = _dataTable.ToJagged <double>("n", "p", "k", "ph", "ec"); string[] labels = new string[_dataTable.Rows.Count]; for (int i = 0; i < _dataTable.Rows.Count; i++) { labels[i] = _dataTable.Rows[i]["fertility"].ToString(); } var codebook = new Codification("fertility", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("fertility", labels); C45Learning teacher = new C45Learning(); var tree = teacher.Learn(inputs, outputs); double[][] input_test = { new double[] { n, p, k, ph, ec }, }; int[] val = tree.Decide(input_test); if (val[0] == 0) { quality.Text = "Low"; } else if (val[0] == 1) { quality.Text = "Medium"; } else { quality.Text = "High"; } }
// Decision Tree public DecisionTree DecisionThrust(double[][] inputs, int[] outputs) { C45Learning teacher = new C45Learning(new[] { DecisionVariable.Continuous("X"), DecisionVariable.Continuous("Y"), DecisionVariable.Continuous("Z"), DecisionVariable.Continuous("W") }); teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // Use the learning algorithm to induce the tree DecisionTree tree = teacher.Learn(inputs, outputs); return(tree); }
private static DecisionTree createTree(out double[][] inputs, out int[] outputs) { string nurseryData = Resources.nursery; string[] inputColumns = { "parents", "has_nurs", "form", "children", "housing", "finance", "social", "health" }; string outputColumn = "output"; DataTable table = new DataTable("Nursery"); table.Columns.Add(inputColumns); table.Columns.Add(outputColumn); string[] lines = nurseryData.Split( new[] { Environment.NewLine }, StringSplitOptions.None); foreach (var line in lines) { table.Rows.Add(line.Split(',')); } Codification codebook = new Codification(table); DataTable symbols = codebook.Apply(table); inputs = symbols.ToArray(inputColumns); outputs = symbols.ToArray <int>(outputColumn); var attributes = DecisionVariable.FromCodebook(codebook, inputColumns); var tree = new DecisionTree(attributes, classes: 5); C45Learning c45 = new C45Learning(tree); c45.Run(inputs, outputs); return(tree); }
public void Train(List <TrainingValue> trainingData) { List <DecisionVariable> trainingVariables = new List <DecisionVariable>(); for (int i = 0; i < featureSize; i++) { trainingVariables.Add(DecisionVariable.Continuous(i.ToString())); } tree = new DecisionTree(inputs: trainingVariables, classes: 2); double[][] featuresArray = new double[trainingData.Count][]; int[] labels = new int[trainingData.Count]; for (int i = 0; i < featuresArray.Length; i++) { featuresArray[i] = trainingData[i].Features; labels[i] = Convert.ToInt32(trainingData[i].State); } switch (type) { case ClassifierType.DecisionTree: C45Learning teacher = new C45Learning(tree); teacher.Learn(featuresArray, labels); break; case ClassifierType.LDA: LinearDiscriminantAnalysis lda = new LinearDiscriminantAnalysis(); pipeline = lda.Learn(featuresArray, labels); break; case ClassifierType.SVM: LinearCoordinateDescent svmLearner = new LinearCoordinateDescent(); svm = svmLearner.Learn(featuresArray, labels); break; case ClassifierType.Bayes: NaiveBayesLearning <NormalDistribution> learner = new NaiveBayesLearning <NormalDistribution>(); bayes = learner.Learn(featuresArray, labels); break; } Trained = true; }