public void Train(List <Person> people, int numberOfTrees, int skillSetSize) { double[][] inputs = _dataPointService.GenerateDataPointsFromPeople(people, skillSetSize); int[] expectedResults = _dataPointService.GenerateExpectedResultFromPeople(people); // Create the forest learning algorithm var teacher = new RandomForestLearning() { NumberOfTrees = numberOfTrees, // use 10 trees in the forest }; // Finally, learn a random forest from data _randomForest = teacher.Learn(inputs, expectedResults); // We can estimate class labels using trainingPredictions = _randomForest.Decide(inputs); // And the classification error (0.0006) can be computed as double error = new ZeroOneLoss(expectedResults).Loss(_randomForest.Decide(inputs)); File.WriteAllLines( @"C:\Users\Niall\Documents\Visual Studio 2015\Projects\LinkedInSearchUi\LinkedIn Dataset\XML\random_forest_predictions.txt" // <<== Put the file name here , trainingPredictions.Select(d => d.ToString()).ToArray()); }
public void sample_ratio_less_than_1() { // https://github.com/accord-net/framework/issues/576 string localPath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "gh576"); Accord.Math.Random.Generator.Seed = 1; var nursery = new DataSets.Nursery(localPath); int[][] inputs = nursery.Instances; int[] outputs = nursery.ClassLabels; var teacher = new RandomForestLearning(nursery.VariableNames) { NumberOfTrees = 1, SampleRatio = 0.5 }; teacher.ParallelOptions.MaxDegreeOfParallelism = 1; var forest = teacher.Learn(inputs, outputs); forest.ParallelOptions.MaxDegreeOfParallelism = 1; int[] predicted = forest.Decide(inputs); double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); Assert.AreEqual(0.0023148148148148147d, error, 1e-10); }
public double CheckAccuracy(int trees, double ratio) { var variables = new DecisionVariable[Data.FeatureCount]; for (int i = 0; i < Data.FeatureCount; i++) { variables[i] = new DecisionVariable(i.ToString(), DecisionVariableKind.Continuous); } var options = new ParallelOptions(); options.MaxDegreeOfParallelism = 4; double accuracy = 0; for (int k = 0; k < Folds; k++) { RandomForestLearning teacher = new RandomForestLearning(variables); //teacher.ParallelOptions = options; teacher.SampleRatio = ratio; teacher.NumberOfTrees = trees; teacher. var model = teacher.Learn(TrainInput[k], TrainOutput[k]); int correct = 0; for (int i = 0; i < Data.InstancesPerFold; i++) { var label = model.Decide(TestInput[k][i]); if (label == TestOutput[k][i]) { correct++; } } accuracy += (double)correct / Data.InstancesPerFold; } return(accuracy); }
public void test_serialization() { // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; string[][] text = Resources.iris_data.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>(); string[] labels = text.GetColumn(4); var codebook = new Codification("Output", labels); int[] outputs = codebook.Translate("Output", labels); var teacher = new RandomForestLearning() { NumberOfTrees = 10, // use 10 trees in the forest }; var forest1 = teacher.Learn(inputs, outputs); byte[] bytes = forest1.Save(); var forest2 = Serializer.Load <RandomForest>(bytes); forest1.ParallelOptions.MaxDegreeOfParallelism = forest2.ParallelOptions.MaxDegreeOfParallelism = 1; Assert.IsTrue(forest1.Decide(inputs).IsEqual(forest2.Decide(inputs))); Assert.IsTrue(forest1.Transform(inputs).IsEqual(forest2.Transform(inputs))); }
public void Run() { // iris datasetini yüklüyoruz var iris = new Accord.DataSets.Iris(); // iris inputları ve outputlarını tanımlıyoruz double[][] inputs = iris.Instances; int[] outputs = iris.ClassLabels; var teacher = new RandomForestLearning() { NumberOfTrees = 10, // 10 trees in the forest }; // forest variable'ı oluşturup train ediyoruz var forest = teacher.Learn(inputs, outputs); // kendi inputlarımızla test ediyoruz int[] predicated = forest.Decide(inputs); // error hesaplama. outputs expected, predicated ise tahmin edilen double error = new ZeroOneLoss(outputs).Loss(predicated); // error 0 çıkacak System.Console.WriteLine(error); }
//private static double[] NormalizeData(double[] data, int min, int max) //{ // var sorted = data.OrderBy(d => d); // double dataMax = sorted.First(); // double dataMin = sorted.Last(); // double[] ret = new double[data.Length]; // double avgIn = (double)((min + max) / 2.0); // double avgOut = ((dataMax + dataMin) / 2.0); // for (int i = 0; i < data.Length; i++) // { // ret[i] = (double) Math.Round(avgOut * (data[i] + avgIn) / 2); // } // return ret; //} // train classifier and enter trades based on predictions private void Classifier() { Generator.Seed = 1; // input columns double[][] inputs = z; // result; // normalize the inputs array //for (int i = 0; i < 5; i++) //{ // z[i] = NormalizeData(z[i], 0, 1); //} // output column int[] outputs = Label; // set the learning algorithm var teacher = new RandomForestLearning() { NumberOfTrees = 1000, //SampleRatio = 1.0, //Join = 2, //CoverageRatio = 0.5, }; // train the model var model = teacher.Learn(inputs, outputs); // set array to be predicted double[] inputs2 = x[0]; // compute the machine's answer for the array to be classified int answers = model.Decide(inputs2); // enter long if predicted value is 1 if (answers == 1) { EnterLong(); } // enter short if predicted value is 0 if (answers == 0) { EnterShort(); } //Calculate the confusion matrix ConfusionMatrix cm = ConfusionMatrix.Estimate(model, inputs, outputs); false_neg = cm.FalseNegatives; false_pos = cm.FalsePositives; // Print false positive and false negative Print(false_neg + ", " + false_pos); }
private static double[] RandomForest(List <Wine> testingSet, List <Wine> trainingSet) { var teacher = new RandomForestLearning() { NumberOfTrees = 4 }; var forest = teacher.Learn(trainingSet.Select(x => x.GetParams()).ToArray(), trainingSet.Select(x => x.Quality).ToArray()); var result = forest.Decide(testingSet.Select(x => x.GetParams()).ToArray()); return(result.Select(x => (double)x).ToArray()); }
public void buildModel() { var attributes = DecisionVariable.FromData(inputs); // Now, let's create the forest learning algorithm var teacher = new RandomForestLearning(attributes) { NumberOfTrees = 1, SampleRatio = 1.0 }; // Finally, learn a random forest from data this.forest = teacher.Learn(inputs, outputs); }
public void test_learn() { #region doc_iris // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset into an array of text that we can process string[][] text = Resources.iris_data.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); // The first four columns contain the flower features double[][] inputs = text.GetColumns(0, 1, 2, 3).To <double[][]>(); // The last column contains the expected flower type string[] labels = text.GetColumn(4); // Since the labels are represented as text, the first step is to convert // those text labels into integer class labels, so we can process them // more easily. For this, we will create a codebook to encode class labels: // var codebook = new Codification("Output", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("Output", labels); // Create the forest learning algorithm var teacher = new RandomForestLearning() { NumberOfTrees = 10, // use 10 trees in the forest }; // Finally, learn a random forest from data var forest = teacher.Learn(inputs, outputs); // We can estimate class labels using int[] predicted = forest.Decide(inputs); // And the classification error (0.0006) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); #endregion Assert.IsTrue(error < 0.015); }
private int predict(double[][] trainingInputs, int[] trainingOutputs, double[][] inputToPredict) { var teacher = new RandomForestLearning() { NumberOfTrees = forestNumberOfTrees, Join = forestJoin }; var forest = teacher.Learn(trainingInputs, trainingOutputs); int[] predicted = forest.Decide(inputToPredict); return(predicted[0]); }
private static RandomForest CreateRandomForestModel( DecisionVariable[] decisionVariables, GridSearchParameterCollection bestParameters, double[][] trainingInputs, int[] trainingOutputs) { var teacher = new RandomForestLearning(decisionVariables) { NumberOfTrees = (int)bestParameters["trees"].Value, SampleRatio = bestParameters["sampleRatio"].Value, Join = (int)bestParameters["join"].Value }; // Create a training algorithm and learn the training data var rfcModel = teacher.Learn(trainingInputs, trainingOutputs); return(rfcModel); }
/// <summary> /// Classify our data using random forest classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <param name="NumOfTrees">Number of trees used in Random forest classifer</param> /// <returns></returns> public void RandomForestLearning(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name, int NumOfTrees = 20) { var teacher = new RandomForestLearning() { NumberOfTrees = NumOfTrees, }; var forest = teacher.Learn(train_data, train_label); int[] predicted = forest.Decide(test_data); double error = new ZeroOneLoss(test_label).Loss(predicted); Console.WriteLine(error); forest.Save(Path.Combine(Classifier_Path, Classifier_Name)); }
public void Uczenie(string[] naglowki, string[][] dane) { Codification kody = new Codification(naglowki, dane); int[][] symbole = kody.Transform(dane); int[][] daneWejsciowe = symbole.Get(null, 0, -1); KolumnaWynikow = symbole.GetColumn(-1); RandomForestLearning nauczyciel = new RandomForestLearning() { SampleRatio = IloscDanychModelu }; RandomForest las = nauczyciel.Learn(daneWejsciowe, KolumnaWynikow); Rezultaty = las.Decide(daneWejsciowe); }
static RandomForest RandomForestClassification(List <int[]> trainingData, List <int[]> testingData, out double precision) { int testingCount = testingData.Count / 10; int trainingCount = testingData.Count - testingCount; double errorAverage = 0; int indexTestingStart = testingData.Count - testingCount; int indexTestingEnd = testingData.Count; double prec = 0; Console.WriteLine("Random Forest Classification"); RandomForest bestforest = null; for (int i = 0; i < iterations; i++) { var watch = System.Diagnostics.Stopwatch.StartNew(); Console.WriteLine("Testing from: {0} to {1}", indexTestingStart, indexTestingEnd); int[][] inputData, testinputData; int[] outputData, testoutputData; PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd); var RanForest = new RandomForestLearning() { NumberOfTrees = 100, }; var forest = RanForest.Learn(inputData, outputData); Console.WriteLine("Medis sukurtas - ismokta"); double er = new ZeroOneLoss(testoutputData).Loss(forest.Decide(testinputData)); Console.WriteLine("Apmokymo tikslumas: {0}", 1 - er); if (1 - er > prec) { prec = 1 - er; bestforest = forest; } watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs); indexTestingEnd = indexTestingStart; indexTestingStart -= testingCount; errorAverage += er; Console.WriteLine("------------------------------------------------------------------------------"); } precision = 1 - (errorAverage / iterations); return(bestforest); }
public void constructor_test() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var times = ReadCSV(Properties.Resources.times); var features = ReadCSV(Properties.Resources.features); var didSolve = times.Select(list => list.Select(d => d < 5000).ToList()).ToList(); var foldCount = 10; for (int i = 0; i < foldCount; i++) { var elementsPerFold = didSolve.Count / foldCount; var y_test = didSolve.Skip(i * elementsPerFold).Take(elementsPerFold); var y_train = didSolve.Except(y_test).ToList(); var x_test = features.Skip(i * elementsPerFold).Take(elementsPerFold); var x_train = features.Except(x_test); var allSolverPredictions = new List <bool[]>(); for (int j = 0; j < y_train.First().Count; j++) { var y_train_current_solver = y_train.Select(list => list.Skip(j).First()).Select(b => b ? 1 : 0); var randomForestLearning = new RandomForestLearning() { Trees = 10 }; var currentSolverPredictions = new List <bool>(); var randomForest = randomForestLearning.Learn(x_train.Select(list => list.ToArray()).ToArray(), y_train_current_solver.ToArray()); foreach (var test_instance in x_test) { var compute = randomForest.Compute(test_instance.ToArray()); currentSolverPredictions.Add(compute != 0); } allSolverPredictions.Add(currentSolverPredictions.ToArray()); } Assert.AreEqual(allSolverPredictions.Count, 29); foreach (var p in allSolverPredictions) { Assert.AreEqual(p.Length, 424); } } }
private static void randomForest(double[][] inputs, int[] outputs) { var teacher = new RandomForestLearning() { NumberOfTrees = 100, // Use 100 decision trees to cover this problem }; // Use the learning algorithm to induce the tree RandomForest rf = teacher.Learn(inputs, outputs); // Classify the samples using the RF int[] predicted = rf.Decide(inputs); // Create a confusion matrix to check the quality of the predictions: var cm = new ConfusionMatrix(predicted: predicted, expected: outputs); // Check the accuracy measure: double accuracy = cm.Accuracy; // (should be 1.0 or 100%) }
/// <summary> /// Trains the classifier and computes the training error if option provided. /// </summary> /// <param name="trainingData">The training data that will be used to train classifier.</param> /// <param name="trainingLabels">The training labels related to provided training data.</param> /// <param name="calculateError">The boolean check to tell if the training error should be calculated.</param> public override void Train(List <double[]> trainingData, List <int> trainingLabels, bool calculateError = true) { LearningAlgorithm = new RandomForestLearning(); if (NumTrees > 0) { LearningAlgorithm.NumberOfTrees = NumTrees; } if (SamplePropotion > 0) { LearningAlgorithm.SampleRatio = SamplePropotion; } Model = LearningAlgorithm.Learn(trainingData.ToArray(), trainingLabels.ToArray()); if (calculateError == true) { TrainingError = new ZeroOneLoss(trainingLabels.ToArray()).Loss(Model.Decide(trainingData.ToArray())); } }
public Classifier(List <Person> data, int size) { var testSet = data.Take(size).ToList(); var learnSet = data.Skip(size).ToList(); double[][] inputs = learnSet.Select(x => new double[6] { x.Parch, x.Pclass, x.SibSp, x.Sex, x.Fare, x.Embarked }).ToArray(); int[] outputs = learnSet.Select(x => x.Survived).ToArray(); DecisionVariable[] variables = { new DecisionVariable("Parch", DecisionVariableKind.Discrete), new DecisionVariable("Pclass", DecisionVariableKind.Discrete), new DecisionVariable("SibSp", DecisionVariableKind.Discrete), new DecisionVariable("Sex", DecisionVariableKind.Discrete), new DecisionVariable("Fare", DecisionVariableKind.Continuous), new DecisionVariable("Embarked", DecisionVariableKind.Discrete) }; var teacher = new RandomForestLearning() { NumberOfTrees = 10, }; var tree = teacher.Learn(inputs, outputs); var result = tree.Decide(testSet.Select(x => new double[6] { x.Parch, x.Pclass, x.SibSp, x.Sex, x.Fare, x.Embarked }).ToArray()); var good = 0f; for (int i = 0; i < result.Count(); ++i) { if (result[i] == (testSet[i].Survived)) { good++; } } Console.WriteLine($"Good: {good/ size * 100}%"); }
/// <summary> /// <inheritdoc /> /// </summary> public override void Train() { var inputs = data.GetSelectedInput(features); var outputs = data.GetExpectedClassificationOutput(); var DecisionVariables = new List <DecisionVariable>(); for (int i = 0; i < inputs[0].Length; i++) { DecisionVariables.Add(DecisionVariable.Continuous(i.ToString(), new DoubleRange(0.0, 1.0))); } var teacher = new RandomForestLearning(DecisionVariables.ToArray()) { NumberOfTrees = 20 }; forest = teacher.Learn(inputs, outputs); Save(); }
public override void Train(List <double[]> trainingData, List <double> trainingLabels, bool calculateError = true) { LearningAlgorithm = new RandomForestLearning(); if (NumTrees > 0) { LearningAlgorithm.NumberOfTrees = NumTrees; } if (SamplePropotion > 0) { LearningAlgorithm.SampleRatio = SamplePropotion; } int[][] TrainingData = TypeCasters.DoubleMultiArrayToInt(trainingData).ToArray(); int[] TrainingLabels = TypeCasters.DoubleArrayToInt(trainingLabels).ToArray(); Model = LearningAlgorithm.Learn(TrainingData, TrainingLabels); if (calculateError == true) { TrainingError = new ZeroOneLoss(TrainingLabels).Loss(Model.Decide(TrainingData)); } }
public void test_learn() { #region doc_iris // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset: var iris = new DataSets.Iris(); double[][] inputs = iris.Instances; // flower features int[] outputs = iris.ClassLabels; // flower categories // Create the forest learning algorithm var teacher = new RandomForestLearning() { NumberOfTrees = 10, // use 10 trees in the forest }; // Finally, learn a random forest from data var forest = teacher.Learn(inputs, outputs); // We can estimate class labels using int[] predicted = forest.Decide(inputs); // And the classification error (0.0006) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); #endregion Assert.AreEqual(10, forest.Trees.Length); Assert.IsTrue(error < 0.015); }
static void Main(string[] args) { // sample input double[][] inputs = { new double[] { 0, 0 }, new double[] { 1, 0 }, new double[] { 0, 1 }, new double[] { 1, 1 }, }; // sample binary output int[] outputs = { 0, 1, 1, 0, }; // sample binary output for Neural Network double[][] nnOutputs = { new double[] { 1, 0 }, new double[] { 0, 1 }, new double[] { 0, 1 }, new double[] { 1, 0 }, }; // sample multinomial output int[] multiOutputs = { 0, 1, 1, 2, }; // 1. Binary Logistic Regression var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100 }; var model = learner.Learn(inputs, outputs); var preds = model.Decide(inputs); Console.WriteLine("\n\n*Binary Logistic Regression Predictions: {0}", String.Join(", ", preds)); // 2. Multinomial Logistic Regression var learner2 = new MultinomialLogisticLearning <GradientDescent>() { MiniBatchSize = 4 }; var model2 = learner2.Learn(inputs, multiOutputs); var preds2 = model2.Decide(inputs); Console.WriteLine("\n\n*Multinomial Logistic Regression Predictions: {0}", String.Join(", ", preds2)); // 3. Binary Naive Bayes Classifier var learner3 = new NaiveBayesLearning <NormalDistribution>(); var model3 = learner3.Learn(inputs, outputs); var preds3 = model2.Decide(inputs); Console.WriteLine("\n\n*Binary Naive Bayes Predictions: {0}", String.Join(", ", preds3)); // 4. RandomForest var learner4 = new RandomForestLearning() { NumberOfTrees = 3, CoverageRatio = 0.9, SampleRatio = 0.9 }; var model4 = learner4.Learn(inputs, outputs); var preds4 = model4.Decide(inputs); Console.WriteLine("\n\n*Binary RandomForest Classifier Predictions: {0}", String.Join(", ", preds4)); // 5. SVM var learner5 = new SequentialMinimalOptimization <Gaussian>(); var model5 = learner.Learn(inputs, outputs); var preds5 = model5.Decide(inputs); Console.WriteLine("\n\n*Binary SVM Predictions: {0}", String.Join(", ", preds5)); // 6. Neural Network var network = new ActivationNetwork( new BipolarSigmoidFunction(2), 2, 1, 2 ); var teacher = new LevenbergMarquardtLearning(network); Console.WriteLine("\n-- Training Neural Network"); int numEpoch = 3; double error = Double.PositiveInfinity; for (int i = 0; i < numEpoch; i++) { error = teacher.RunEpoch(inputs, nnOutputs); Console.WriteLine("* Epoch {0} - error: {1:0.0000}", i + 1, error); } double[][] nnPreds = inputs.Select( x => network.Compute(x) ).ToArray(); int[] preds6 = nnPreds.Select( x => x.ToList().IndexOf(x.Max()) ).ToArray(); Console.WriteLine("\n\n*Binary Neural Network Predictions: {0}", String.Join(", ", preds6)); Console.WriteLine("\n\n\n\nDONE!!"); Console.ReadKey(); }
public static void TrainClassifiers() { // -------------------------- Logistic Regression ---------------------------------- var MLRG = new MultinomialLogisticLearning <GradientDescent>(); Predictor.MultinomialLogisticRegression = MLRG.Learn(PredictorPointsTrain, FrequencyLabelsInt); // -------------------------- Random Forest ---------------------------------- var teacher = new RandomForestLearning() { NumberOfTrees = NumTrees, }; Predictor.RandomForest = teacher.Learn(PredictorPointsTrain, FrequencyLabelsInt); // -------------------------- Minimum Mean Distance ---------------------------------- Predictor.MinimumMeanDistance = new MinimumMeanDistanceClassifier(); // Compute the analysis and create a classifier Predictor.MinimumMeanDistance.Learn(PredictorPointsTrain, FrequencyLabelsInt); // -------------------------- Support Vector Machine ---------------------------------- // Declare the parameters and ranges to be searched /*GridSearchRange[] ranges = * { * new GridSearchRange("complexity", new double[] { 0.00000001, 5.20, 0.30, 0.50 } ), * };*/ // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines /* var gridsearch = new GridSearch<SupportVectorMachine>(ranges); * * // Set the fitting function for the algorithm * gridsearch.Fitting = delegate (GridSearchParameterCollection parameters, out double error) * { * // The parameters to be tried will be passed as a function parameter. * double complexity = parameters["complexity"].Value; * * // Use the parameters to build the SVM model * SupportVectorMachine ksvm = new SupportVectorMachine( 2); * * * // Create a new learning algorithm for SVMs * SequentialMinimalOptimization smo = new SequentialMinimalOptimization(ksvm, PredictorPointsTrain, FrequencyLabelsInt); * smo.Complexity = complexity; * * // Measure the model performance to return as an out parameter * error = smo.Run(); * * return ksvm; // Return the current model * }; * * * // Declare some out variables to pass to the grid search algorithm * GridSearchParameterCollection bestParameters; double minError; * * // Compute the grid search to find the best Support Vector Machine * Predictor.SVM = gridsearch.Compute(out bestParameters, out minError);*/ }
public void LargeRunTest() { string localPath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "rf"); #region doc_nursery // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; // This example uses the Nursery Database available from the University of // California Irvine repository of machine learning databases, available at // // http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names // // The description paragraph is listed as follows. // // Nursery Database was derived from a hierarchical decision model // originally developed to rank applications for nursery schools. It // was used during several years in 1980's when there was excessive // enrollment to these schools in Ljubljana, Slovenia, and the // rejected applications frequently needed an objective // explanation. The final decision depended on three subproblems: // occupation of parents and child's nursery, family structure and // financial standing, and social and health picture of the family. // The model was developed within expert system shell for decision // making DEX (M. Bohanec, V. Rajkovic: Expert system for decision // making. Sistemica 1(1), pp. 145-157, 1990.). // // Let's begin by loading the raw data. This string variable contains // the contents of the nursery.data file as a single, continuous text. // var nursery = new DataSets.Nursery(path: localPath); int[][] inputs = nursery.Instances; int[] outputs = nursery.ClassLabels; // Now, let's create the forest learning algorithm var teacher = new RandomForestLearning(nursery.VariableNames) { NumberOfTrees = 1, SampleRatio = 1.0 }; // Finally, learn a random forest from data var forest = teacher.Learn(inputs, outputs); // We can estimate class labels using int[] predicted = forest.Decide(inputs); // And the classification error (0) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); #endregion Assert.AreEqual(0, error, 1e-10); Assert.IsTrue(outputs.IsEqual(predicted)); Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { int expected = outputs[i]; int actual = forest.Compute(inputs[i].ToDouble()); Assert.AreEqual(expected, actual); } }
public double Train(double[][] inputs, int[] outputs) { _forest = _teacher.Learn(inputs, outputs); return(0.0); }
public void LargeRunTest() { #region doc_nursery // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; // This example uses the Nursery Database available from the University of // California Irvine repository of machine learning databases, available at // // http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names // // The description paragraph is listed as follows. // // Nursery Database was derived from a hierarchical decision model // originally developed to rank applications for nursery schools. It // was used during several years in 1980's when there was excessive // enrollment to these schools in Ljubljana, Slovenia, and the // rejected applications frequently needed an objective // explanation. The final decision depended on three subproblems: // occupation of parents and child's nursery, family structure and // financial standing, and social and health picture of the family. // The model was developed within expert system shell for decision // making DEX (M. Bohanec, V. Rajkovic: Expert system for decision // making. Sistemica 1(1), pp. 145-157, 1990.). // // Let's begin by loading the raw data. This string variable contains // the contents of the nursery.data file as a single, continuous text. // string nurseryData = Resources.nursery; // Those are the input columns available in the data // string[] inputColumns = { "parents", "has_nurs", "form", "children", "housing", "finance", "social", "health" }; // And this is the output, the last column of the data. // string outputColumn = "output"; // Let's populate a data table with this information. // DataTable table = new DataTable("Nursery"); table.Columns.Add(inputColumns); table.Columns.Add(outputColumn); string[] lines = nurseryData.Split( new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (var line in lines) { table.Rows.Add(line.Split(',')); } // Now, we have to convert the textual, categorical data found // in the table to a more manageable discrete representation. // // For this, we will create a codebook to translate text to // discrete integer symbols: // Codification codebook = new Codification(table); // And then convert all data into symbols // DataTable symbols = codebook.Apply(table); double[][] inputs = symbols.ToArray(inputColumns); int[] outputs = symbols.ToArray <int>(outputColumn); // From now on, we can start creating the decision tree. // var attributes = DecisionVariable.FromCodebook(codebook, inputColumns); // Now, let's create the forest learning algorithm var teacher = new RandomForestLearning(attributes) { NumberOfTrees = 1, SampleRatio = 1.0 }; // Finally, learn a random forest from data var forest = teacher.Learn(inputs, outputs); // We can estimate class labels using int[] predicted = forest.Decide(inputs); // And the classification error (0) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); #endregion Assert.AreEqual(0, error, 1e-10); Assert.IsTrue(outputs.IsEqual(predicted)); Assert.AreEqual(12960, lines.Length); Assert.AreEqual("usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend", lines[0]); Assert.AreEqual("great_pret,very_crit,foster,more,critical,inconv,problematic,not_recom,not_recom", lines[lines.Length - 1]); Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { int expected = outputs[i]; int actual = forest.Compute(inputs[i]); Assert.AreEqual(expected, actual); } }
private List <Colaborador> metodoquedefineoscolaboradores(int pGerencia, int pClassificacao, int pDificuldade, int pPrioridade, int pTipoFalha) { string filename = "C:/Users/Oscar/Downloads/solics-completo.csv"; CsvReader reader = new CsvReader(filename, hasHeaders: true); DataTable table = reader.ToTable(); string[] inputNames = new[] { "Id", "Gerencia", "Cliente", "Sistema", "Modulo", "Classificacao", "TipoFalha", "Prioridade", "Dificuldade" }; string[] outputNames = new[] { "Colaborador" }; /*var codification1 = new Codification() * { * { "Id", CodificationVariable.Ordinal}, * { "Gerencia", CodificationVariable.Discrete }, * { "Cliente", CodificationVariable.Discrete }, * { "Sistema", CodificationVariable.Discrete }, * { "Modulo", CodificationVariable.Discrete }, * { "Classificacao", CodificationVariable.Discrete}, * { "TipoFalha", CodificationVariable.Discrete }, * { "Prioridade", CodificationVariable.Discrete }, * { "Dificuldade", CodificationVariable.Discrete }, * }; * * var codification2 = new Codification() * { * DefaultMissingValueReplacement = Double.NaN * }; * * codification1.Learn(table); * codification2.Learn(table); * */ Codification codebook = new Codification(table); //DataTable symbols = codification.Apply(table); //int[] outputis = symbols.ToArray<int>("COLABORADOR"); // Now, transform symbols into a vector representation, growing the number of inputs: //double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble(); int[][] x = codebook.Apply(table, "Id", "Gerencia", "Cliente", "Sistema", "Modulo", "Classificacao", "TipoFalha", "Prioridade", "Dificuldade").ToJagged(out inputNames).ToInt32(); //double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble(); //int[] y = codification.Apply(table, "COLABORADOR").ToJagged(out outputName); string outputName; // can see below the new variable names that will be generated) int[] outputs = codebook.Apply(table, "Colaborador").ToVector(out outputName).ToInt32(); /* * // Create the multi-class learning algorithm for the machine * var teacher = new MultilabelSupportVectorLearning<Gaussian>() * { * // Configure the learning algorithm to use SMO to train the * // underlying SVMs in each of the binary class subproblems. * Learner = (param) => new SequentialMinimalOptimization<Gaussian>() * { * // Estimate a suitable guess for the Gaussian kernel's parameters. * // This estimate can serve as a starting point for a grid search. * UseKernelEstimation = true * } * }; * * // Learn a machine * var machine = teacher.Learn(x, outputs); * * // Create the multi-class learning algorithm for the machine * var calibration = new MultilabelSupportVectorLearning<Gaussian>() * { * Model = machine, // We will start with an existing machine * * // Configure the learning algorithm to use SMO to train the * // underlying SVMs in each of the binary class subproblems. * Learner = (param) => new ProbabilisticOutputCalibration<Gaussian>() * { * Model = param.Model // Start with an existing machine * } * }; * * * // Configure parallel execution options * calibration.ParallelOptions.MaxDegreeOfParallelism = 1; * * // Learn a machine * calibration.Learn(x, outputs); * * // Obtain class predictions for each sample * bool[][] predicted = machine.Decide(x); * * // Get class scores for each sample * double[][] scores = machine.Scores(x); * * // Get log-likelihoods (should be same as scores) * double[][] logl = machine.LogLikelihoods(x); * * // Get probability for each sample * double[][] prob = machine.Probabilities(x); * * // Compute classification error using mean accuracy (mAcc) * double error = new HammingLoss(outputs).Loss(predicted); * double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob);*/ Accord.Math.Random.Generator.Seed = 1; DecisionVariable[] Attributes = DecisionVariable.FromCodebook(codebook, inputNames); // Now, let's create the forest learning algorithm var teacher = new RandomForestLearning(Attributes) { NumberOfTrees = 1, SampleRatio = 1.0 }; // Finally, learn a random forest from data var forest = teacher.Learn(x, outputs); // We can estimate class labels using int[] predicted = forest.Decide(x); // And the classification error (0) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(x)); // Compute classification error using mean accuracy (mAcc) double error2 = new HammingLoss(outputs).Loss(predicted); List <Colaborador> lLista = new List <Colaborador>(); //lLista.AddRange(db.Colaborador.Where(x => x.Nome.StartsWith("J")).OrderBy(x => x.Nome)); return(lLista); }