/// <summary> /// Classify our data using random forest classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <param name="NumOfTrees">Number of trees used in Random forest classifer</param> /// <returns></returns> public void RandomForestLearning(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name, int NumOfTrees = 20) { var teacher = new RandomForestLearning() { NumberOfTrees = NumOfTrees, }; var forest = teacher.Learn(train_data, train_label); int[] predicted = forest.Decide(test_data); double error = new ZeroOneLoss(test_label).Loss(predicted); Console.WriteLine(error); forest.Save(Path.Combine(Classifier_Path, Classifier_Name)); }
public void LearnTest() { double[][] inputs = { new double[] { -1, -1 }, new double[] { -1, 1 }, new double[] { 1, -1 }, new double[] { 1, 1 } }; int[] xor = { -1, 1, 1, -1 }; var kernel = new Polynomial(2, 0.0); double[][] augmented = new double[inputs.Length][]; for (int i = 0; i < inputs.Length; i++) { augmented[i] = kernel.Transform(inputs[i]); } // Create the Least Squares Support Vector Machine teacher var learn = new StochasticGradientDescent() { LearningRate = 1e-3 }; // Run the learning algorithm var svm = learn.Learn(augmented, xor); bool[] predicted = svm.Decide(augmented); double error = new ZeroOneLoss(xor).Loss(predicted); Assert.AreEqual(0, error); int[] output = augmented.Apply(p => Math.Sign(svm.Compute(p))); for (int i = 0; i < output.Length; i++) { Assert.AreEqual(System.Math.Sign(xor[i]), System.Math.Sign(output[i])); } }
public void Learn_Clicked(object sender, EventArgs args) { Task.Factory.StartNew(() => { var bow = CreateBow(); foreach (var image in Images) { TrainingData.Add(GetData(image, bow)); } var kernel = new Polynomial(); var teacher = new MulticlassSupportVectorLearning <IKernel>() { Kernel = kernel, Learner = (param) => new SequentialMinimalOptimization <IKernel>() { Kernel = kernel } }; var svm = teacher.Learn(TrainingData.ToArray(), Tags.ToArray()); var error = new ZeroOneLoss(Tags.ToArray()).Loss(svm.Decide(TrainingData.ToArray())); Error.Dispatcher.Invoke(() => Error.Text = error.ToString()); //var kernel = new Polynomial(16, 5); //var complexity = CalculateComplexity(kernel); //var ml = new MulticlassSupportVectorLearning<IKernel>() //{ // Learner = (param) => new SequentialMinimalOptimization<IKernel>() // { // Complexity = complexity, // Kernel = kernel // } //}; //machine = ml.Learn(TrainingData.ToArray(), Tags.ToArray()); //var result = machine.Decide(TrainingData.ToArray()); //var error = new ZeroOneLoss(Tags.ToArray()) //{ // Mean = true //}.Loss(result); //Error.Dispatcher.Invoke(() => Error.Text = error.ToString()); }); }
// Use this for initialization void Start() { // In this example, we will learn a decision tree directly from integer // matrices that define the inputs and outputs of our learning problem. int[][] inputs = // Tabela de valores lógicos (1 é verdadeiro e 0 é falso) { new int[] { 1, 0 }, new int[] { 0, 1 }, new int[] { 0, 0 }, new int[] { 1, 1 }, }; int[] outputs = // Operação AND { 0, 0, 0, 1 }; int[][] exampleData = { new int[] { 1, 1 }, new int[] { 0, 0 }, new int[] { 1, 0 }, new int[] { 0, 1 }, }; // Create an ID3 learning algorithm ID3Learning teacher = new ID3Learning(); // Learn a decision tree for the XOR problem var tree = teacher.Learn(inputs, outputs); // Compute the error in the learning double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); Debug.Log("Houve erro?" + error); // The tree can now be queried for new examples: int[] predicted = tree.Decide(exampleData); // A saída será { 1, 0, 0, 0 } for (int i = 0; i < predicted.Length; i++) { Debug.Log(predicted[i]); } }
public void TreeLearning() { Console.WriteLine("SottoProgramma chiamato: TreeLearning."); this.clock = DateTime.Now; var teacher = new C45Learning(); DecisionTree TreeAlgorithm = teacher.Learn(DataSets[1].ItemsFeatures, DataSets[1].CatIDs); int[] predicted = TreeAlgorithm.Decide(DataSets[0].ItemsFeatures); double error = new ZeroOneLoss(DataSets[0].CatIDs).Loss(predicted); PrintReport(predicted, error, "Tree"); Console.WriteLine("SottoProgramma TreeLearning terminato.\nErrore: {0}", error); Console.WriteLine("Tempo richiesto per l'operazione: " + (DateTime.Now - clock).TotalSeconds + " secondi."); }
public void kaggle_digits_with_compress() { string root = Environment.CurrentDirectory; var training = Properties.Resources.trainingsample; var validation = Properties.Resources.validationsample; var tset = readData(training); var observations = tset.Item1; var labels = tset.Item2; var teacher = new MulticlassSupportVectorLearning <Linear>(); var svm = teacher.Learn(observations, labels); Assert.AreEqual(50, svm.Models[0][0].SupportVectors.Length); Assert.AreEqual(127, svm.Models[1][0].SupportVectors.Length); svm.Compress(); Assert.AreEqual(1, svm.Models[0][0].SupportVectors.Length); Assert.AreEqual(1, svm.Models[1][0].SupportVectors.Length); { var trainingLoss = new ZeroOneLoss(labels) { Mean = true }; double error = trainingLoss.Loss(svm.Decide(observations)); Assert.AreEqual(0.054, error); } { var vset = readData(validation); var validationData = vset.Item1; var validationLabels = vset.Item2; var validationLoss = new ZeroOneLoss(validationLabels) { Mean = true }; double val = validationLoss.Loss(svm.Decide(validationData)); Assert.AreEqual(0.082, val); } }
private static void initDecisionTreeModel() { dtStatic.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); dtStatic.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); dtStatic.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); dtStatic.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); dtStatic.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); dtStatic.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); dtStatic.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); dtStatic.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); dtStatic.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); dtStatic.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); dtStatic.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); dtStatic.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); dtStatic.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); dtStatic.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); dtStatic.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); dtStatic.Rows.Add("D15", "Rain", "Cool", "High", "Strong", "No"); dtStatic.Rows.Add("D16", "Rain", "Hot", "High", "Strong", "Yes"); dtStatic.Rows.Add("D17", "Rain", "Hot", "High", "Weak", "Yes"); dtStatic.Rows.Add("D18", "Rain", "Cool", "High", "Weak", "No"); dtStatic.Rows.Add("D19", "Rain", "Cool", "High", "Weak", "Yes"); dtStatic.Rows.Add("D20", "Rain", "Mild", "High", "Strong", "Yes"); myCodeBook = new Codification(dtStatic); DataTable symbols = myCodeBook.Apply(dtStatic); int[][] inputs = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind"); int[] outputs = symbols.ToArray <int>("PlayTennis"); var id3learning = new ID3Learning() { new DecisionVariable("Outlook", 3), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool) new DecisionVariable("Humidity", 2), // 2 possible values (High, normal) new DecisionVariable("Wind", 2) // 2 possible values (Weak, strong) }; myTreeModel = id3learning.Learn(inputs, outputs); double error = new ZeroOneLoss(outputs).Loss(myTreeModel.Decide(inputs)); Console.WriteLine("learnt model training accuracy is: " + (100 - error).ToString("N2")); }
static RandomForest RandomForestClassification(List <int[]> trainingData, List <int[]> testingData, out double precision) { int testingCount = testingData.Count / 10; int trainingCount = testingData.Count - testingCount; double errorAverage = 0; int indexTestingStart = testingData.Count - testingCount; int indexTestingEnd = testingData.Count; double prec = 0; Console.WriteLine("Random Forest Classification"); RandomForest bestforest = null; for (int i = 0; i < iterations; i++) { var watch = System.Diagnostics.Stopwatch.StartNew(); Console.WriteLine("Testing from: {0} to {1}", indexTestingStart, indexTestingEnd); int[][] inputData, testinputData; int[] outputData, testoutputData; PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd); var RanForest = new RandomForestLearning() { NumberOfTrees = 100, }; var forest = RanForest.Learn(inputData, outputData); Console.WriteLine("Medis sukurtas - ismokta"); double er = new ZeroOneLoss(testoutputData).Loss(forest.Decide(testinputData)); Console.WriteLine("Apmokymo tikslumas: {0}", 1 - er); if (1 - er > prec) { prec = 1 - er; bestforest = forest; } watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs); indexTestingEnd = indexTestingStart; indexTestingStart -= testingCount; errorAverage += er; Console.WriteLine("------------------------------------------------------------------------------"); } precision = 1 - (errorAverage / iterations); return(bestforest); }
private void ParameterLearning() { int[][] inputs = new int[trainingVects.Length * trainingVects[0].Count][]; int[] outputs = new int[trainingVects.Length * trainingVects[0].Count]; for (int i = 0; i < trainingVects.Length; i++) { for (int j = 0; j < trainingVects[i].Count; j++) { inputs[i * trainingVects[i].Count + j] = new int[] { (int)trainingVects[i][j][0], (int)trainingVects[i][j][1] }; outputs[i * trainingVects[i].Count + j] = (int)trainingVects[i][j][2] - 1; } } // Create an ID3 learning algorithm C45Learning teacher = new C45Learning(); DecisionVariable var1 = new DecisionVariable("A", new Accord.DoubleRange(0, 100)); DecisionVariable var2 = new DecisionVariable("B", new Accord.DoubleRange(0, 100)); var1.Nature = DecisionVariableKind.Continuous; var2.Nature = DecisionVariableKind.Continuous; teacher.Attributes.Add(var1); teacher.Attributes.Add(var2); var tree = teacher.Learn(inputs, outputs); var r = tree.ToRules(); for (int i = 0; i < r.Count; i++) { double o = r.ElementAt(i).Output; string name1 = r.ElementAt(i).Variables.ElementAt(0).Name; string name2 = r.ElementAt(i).Variables.ElementAt(1).Name; } double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); int[] predicted = tree.Decide(inputs); int[][] inputs2 = new int[1][]; inputs2[0] = new int[2] { 80, 81 }; var tmp = tree.Decide(inputs2); }
static void Main(string[] args) { // In this example, we will learn a decision tree directly from integer // matrices that define the inputs and outputs of our learning problem. int[][] inputs = { new int[] { 0, 0 }, new int[] { 0, 1 }, new int[] { 1, 0 }, new int[] { 1, 1 }, }; int[] outputs = // xor between inputs[0] and inputs[1] { 0, 2, 1, 2 }; // Create an ID3 learning algorithm C45Learning teacher = new C45Learning(); DecisionVariable var1 = new DecisionVariable("0", new Accord.DoubleRange(0, 999)); DecisionVariable var2 = new DecisionVariable("1", new Accord.DoubleRange(0, 999)); var1.Nature = DecisionVariableKind.Continuous; var2.Nature = DecisionVariableKind.Continuous; teacher.Attributes.Add(var1); teacher.Attributes.Add(var2); // Learn a decision tree for the XOR problem var tree = teacher.Learn(inputs, outputs); var r = tree.ToRules(); for (int i = 0; i < r.Count; i++) { double o = r.ElementAt(i).Output; string name1 = r.ElementAt(i).Variables.ElementAt(0).Name; string name2 = r.ElementAt(i).Variables.ElementAt(1).Name; } // Compute the error in the learning double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); // The tree can now be queried for new examples: int[] predicted = tree.Decide(inputs); // should be { 0, 1, 1, 0 } }
public ClassifierWReview() { //runData2 and runData2_1 //string filedata = System.IO.File.ReadAllText("../runData2.txt"); string filedata = System.IO.File.ReadAllText("../runData2_1.txt"); string[] inputColumns = { "Day", "Outlook", "Temperature", "Humidity", "Wind", "SprintReview" }; string outputColumn = "GoRun"; DataTable data = new DataTable("Internet Services Run Calculator"); data.Columns.Add(inputColumns); data.Columns.Add(outputColumn); string[] lines = filedata.Split( new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (var line in lines) { data.Rows.Add(line.Split(',')); } //create codebook to turn the strings into number representations codebook = new Accord.Statistics.Filters.Codification(data); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(data); int[][] inputs = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind", "SprintReview"); int[] outputs = symbols.ToArray <int>("GoRun"); string[] decisionVariables = { "Outlook", "Temperature", "Humidity", "Wind", "SprintReview" }; DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, decisionVariables); // Create a teacher ID3 algorithm var id3learning = new ID3Learning(attributes); tree = id3learning.Learn(inputs, outputs); // Compute the training error when predicting training instances double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); }
public void CalibrateSVM(double[][] inputs, int[] outputs, double[] weights) { var tempInputs = _inputs.Take(_inputs.Count).Concat(inputs).ToArray(); tempInputs = Accord.Statistics.Tools.ZScores(tempInputs); inputs = tempInputs.Skip(_inputs.Count).Take(inputs.Length).ToArray(); if (_pcaTransform) { inputs = _pca.Transform(inputs); } var calibration = new MulticlassSupportVectorLearning <Gaussian>() { Model = _svm, Learner = (param) => new ProbabilisticOutputCalibration <Gaussian>() { Model = param.Model } }; //calibration.ParallelOptions.MaxDegreeOfParallelism = 4; calibration.Learn(inputs, outputs); _svm.Method = MulticlassComputeMethod.Elimination; var predicted = _svm.Decide(inputs); var error = new ZeroOneLoss(outputs).Loss(predicted); var cm = new GeneralConfusionMatrix(3, outputs, predicted); Console.WriteLine("Accuracy: {0} Variance: {1} Kappa: {2} Error: {3}", cm.Accuracy, cm.Variance, cm.Kappa, error); _cm = cm; /*int[] predicted = _svm.Decide(inputs); * double error = new ZeroOneLoss(outputs).Loss(predicted); * * double[][] probabilities = _svm.Probabilities(inputs); * Console.WriteLine("P: {0} Prob: {1} E: {2}", predicted.Length, probabilities.Length, error); * double loss = new CategoryCrossEntropyLoss(outputs).Loss(probabilities); * * Console.WriteLine("Number of classes: {0} Error: {1} Loss: {2}", _svm.NumberOfClasses, error, loss);*/ }
static DecisionTree DecisionTreeClassification(List <int[]> trainingData, List <int[]> testingData, out double precision) { int testingCount = testingData.Count / 10; int trainingCount = testingData.Count - testingCount; double errorAverage = 0; int indexTestingStart = testingData.Count - testingCount; int indexTestingEnd = testingData.Count; double prec = 0; Console.WriteLine("Decision Tree Classification"); DecisionTree bestDecision = null; for (int i = 0; i < iterations; i++) { var watch = System.Diagnostics.Stopwatch.StartNew(); Console.WriteLine("Testing from: {0} to {1}", indexTestingStart, indexTestingEnd); int[][] inputData, testinputData; int[] outputData, testoutputData; PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd); ID3Learning teacher = new ID3Learning(); var decision = teacher.Learn(inputData, outputData); Console.WriteLine("Medis sukurtas - ismokta"); double error = new ZeroOneLoss(testoutputData).Loss(decision.Decide(testinputData)); Console.WriteLine("Apmokymo tikslumas: {0}", 1 - error); if (1 - error > prec) { prec = 1 - error; bestDecision = decision; } watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs); indexTestingEnd = indexTestingStart; indexTestingStart -= testingCount; errorAverage += error; bestDecision = decision; Console.WriteLine("------------------------------------------------------------------------------"); } precision = 1 - (errorAverage / iterations); return(bestDecision); }
/// <summary> /// Trains the classifier and computes the training error if option provided. /// </summary> /// <param name="trainingData">The training data that will be used to train classifier.</param> /// <param name="trainingLabels">The training labels related to provided training data.</param> /// <param name="calculateError">The boolean check to tell if the training error should be calculated.</param> public override void Train(List <double[]> trainingData, List <int> trainingLabels, bool calculateError = true) { LearningAlgorithm = new RandomForestLearning(); if (NumTrees > 0) { LearningAlgorithm.NumberOfTrees = NumTrees; } if (SamplePropotion > 0) { LearningAlgorithm.SampleRatio = SamplePropotion; } Model = LearningAlgorithm.Learn(trainingData.ToArray(), trainingLabels.ToArray()); if (calculateError == true) { TrainingError = new ZeroOneLoss(trainingLabels.ToArray()).Loss(Model.Decide(trainingData.ToArray())); } }
void Start() { // Adicionando classe e atributos à tabela keyTable.Columns.Add("First key", typeof(string)); keyTable.Columns.Add("Second key", typeof(string)); keyTable.Columns.Add("Third key", typeof(string)); keyTable.Columns.Add("Exit", typeof(string)); // Adicionando registros à tabela keyTable.Rows.Add("Yellow", "Purple", "Blue", "First"); keyTable.Rows.Add("Yellow", "Blue", "Purple", "Second"); keyTable.Rows.Add("Purple", "Yellow", "Blue", "First"); keyTable.Rows.Add("Purple", "Blue", "Yellow", "Second"); keyTable.Rows.Add("Blue", "Purple", "Yellow", "First"); keyTable.Rows.Add("Blue", "Yellow", "Purple", "Second"); // Para ficar menos custoso computacionalmente, o Accord converte as // strings em integer symbols. Para isso, usa-se o codebook codebook = new Codification(keyTable); // Converterndo os dados da tabela para integer symbols usando o codebook DataTable symbols = codebook.Apply(keyTable); int[][] inputs = symbols.ToJagged <int> ("First key", "Second key", "Third key"); int[] outputs = symbols.ToArray <int> ("Exit"); // Criando o algoritmo ID3 var id3Learning = new ID3Learning() { // Quantidade de instâncias diferentes em cada coluna new DecisionVariable("First key", 3), // Cada uma possui três instâncias possíveis: new DecisionVariable("Second key", 3), // 1.yellow 2.purple 3.blue new DecisionVariable("Third key", 3) }; // Treinando a árvore tree = id3Learning.Learn(inputs, outputs); // Verificando se houve erro no treino da árvore double errorTraining = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); Debug.Log("Tree error? (0 = no, 1 = yes) \n" + errorTraining); }
public void sparse_zero_vector_test() { // Create a linear-SVM learning method var teacher = new LinearNewtonMethod <Linear, Sparse <double> >() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model }; // Now suppose you have some points Sparse <double>[] inputs = Sparse.FromDense(new[] { new double[] { 1, 1, 2 }, new double[] { 0, 1, 6 }, new double[] { 1, 0, 8 }, new double[] { 0, 0, 0 }, }); int[] outputs = { 1, -1, 1, -1 }; // Learn the support vector machine var svm = teacher.Learn(inputs, outputs); // Compute the predicted points bool[] predicted = svm.Decide(inputs); // And the squared error loss using double error = new ZeroOneLoss(outputs).Loss(predicted); Assert.AreEqual(3, svm.NumberOfInputs); Assert.AreEqual(1, svm.NumberOfOutputs); Assert.AreEqual(2, svm.NumberOfClasses); Assert.AreEqual(1, svm.Weights.Length); Assert.AreEqual(1, svm.SupportVectors.Length); Assert.AreEqual(1.0, svm.Weights[0], 1e-6); Assert.AreEqual(2.0056922148257597, svm.SupportVectors[0][0], 1e-6); Assert.AreEqual(-0.0085361347231909836, svm.SupportVectors[0][1], 1e-6); Assert.AreEqual(0.0014225721169379331, svm.SupportVectors[0][2], 1e-6); Assert.AreEqual(0.0, error); }
public void kaggle_digits() { string root = TestContext.CurrentContext.TestDirectory; var training = Properties.Resources.trainingsample; var validation = Properties.Resources.validationsample; var tset = readData(training); var observations = tset.Item1; var labels = tset.Item2; var teacher = new MulticlassSupportVectorLearning <Linear>(); #if MONO teacher.ParallelOptions.MaxDegreeOfParallelism = 1; #endif var svm = teacher.Learn(observations, labels); { var trainingLoss = new ZeroOneLoss(labels) { Mean = true }; double error = trainingLoss.Loss(svm.Decide(observations)); Assert.AreEqual(0.054, error); } { var vset = readData(validation); var validationData = vset.Item1; var validationLabels = vset.Item2; var validationLoss = new ZeroOneLoss(validationLabels) { Mean = true }; double val = validationLoss.Loss(svm.Decide(validationData)); Assert.AreEqual(0.082, val); } }
public static MulticlassSupportVectorMachine <Linear> RunSVM(double[][] features, int[] labels, string modelSaveLocation = null) { var teacher = new MulticlassSupportVectorLearning <Linear>() { //Learner is aweful naming.. I can hardly talk but still Learner = (p) => new LinearDualCoordinateDescent() { Loss = Loss.L2 } }; var svm = teacher.Learn(features, labels); var output = svm.Decide(features); double error = new ZeroOneLoss(labels).Loss(output); if (!string.IsNullOrEmpty(modelSaveLocation)) { Serializer.Save(obj: svm, path: modelSaveLocation); } return(svm); }
public override void Train(List <double[]> trainingData, List <double> trainingLabels, bool calculateError = true) { LearningAlgorithm = new RandomForestLearning(); if (NumTrees > 0) { LearningAlgorithm.NumberOfTrees = NumTrees; } if (SamplePropotion > 0) { LearningAlgorithm.SampleRatio = SamplePropotion; } int[][] TrainingData = TypeCasters.DoubleMultiArrayToInt(trainingData).ToArray(); int[] TrainingLabels = TypeCasters.DoubleArrayToInt(trainingLabels).ToArray(); Model = LearningAlgorithm.Learn(TrainingData, TrainingLabels); if (calculateError == true) { TrainingError = new ZeroOneLoss(TrainingLabels).Loss(Model.Decide(TrainingData)); } }
public void RegressTest2() { Accord.Math.Random.Generator.Seed = 0; double[][] inputs; int[] outputs; MultinomialLogisticRegressionTest.CreateInputOutputsExample1(out inputs, out outputs); // Create an algorithm to estimate the regression var msgd = new MultinomialLogisticLearning <ConjugateGradient>(); // Now, we can iteratively estimate our model MultinomialLogisticRegression mlr = msgd.Learn(inputs, outputs); int[] predicted = mlr.Decide(inputs); double acc = new ZeroOneLoss(outputs).Loss(predicted); Assert.AreEqual(0.61088435374149663, acc, 1e-8); }
private void Train() { DataTable data = GetDataTable(Application.dataPath + "/" + trainData); //DebugTable(data); codebook = new Codification(data); DataTable symbols = codebook.Apply(data); int[][] inputs = symbols.ToArray <int>("LIFE", "TOWERS", "MELIANTS", "TIME", "ENEMY_COINS"); int[] outputs = symbols.ToArray <int>("POSITION"); var id3learning = new ID3Learning(); id3learning.Attributes = DecisionVariable.FromData(inputs); tree = id3learning.Learn(inputs, outputs); double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); tree.Save(Application.dataPath + "/" + treeLocation); }
static double Decision_Tree(bool show) { DataTable data = DataController.MakeDataTable("../../drug_consumption.txt"); DataTable entireData = DataController.MakeDataTable("../../drug_consumption.txt"); DataTable tests = DataController.MakeDataTable("../../drug_consumption_test2.txt"); Codification codebook = new Codification(entireData); DecisionVariable[] attributes = DataController.GetAttributes(); int classCount = 7; // (7) "Never Used", "Used over a Decade Ago", "Used in Last Decade", "Used in Last Year", "Used in Last Month", "Used in Last Week", and "Used in Last Day" DecisionTree tree = new DecisionTree(attributes, classCount); ID3Learning id3learning = new ID3Learning(tree); id3learning.MaxHeight = 7; DataTable symbols = codebook.Apply(data); string LookingFor = "Cannabis"; int[][] inputs = symbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"); int[] outputs = symbols.ToArray <int>(LookingFor); id3learning.Learn(inputs, outputs); DataTable testSymbols = codebook.Apply(tests); int[][] testIn = testSymbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"); int[] testOut = testSymbols.ToArray <int>(LookingFor); DecisionSet rules = tree.ToRules(); string ruleText = rules.ToString(codebook, LookingFor, System.Globalization.CultureInfo.InvariantCulture); double error = new ZeroOneLoss(testOut).Loss(tree.Decide(testIn)); if (show == true) { Console.WriteLine(LookingFor); Console.WriteLine(); Console.WriteLine(ruleText); Console.ReadKey(); Console.WriteLine("Blad - " + Math.Round(error, 4) + "%"); Console.ReadKey(); } return(error); }
public CrossValidationResult <RandomForest> GetCrossValidationResultsOfRandomForestModel(AppIdentAcordSource appIdentAcordSource, GridSearchParameterCollection bestParameters, int folds = 10) { var samples = appIdentAcordSource.Samples; var labels = appIdentAcordSource.LabelsAsIntegers; var decisionVariables = appIdentAcordSource.DecisionVariables; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation <RandomForest>(samples.Length, folds) { Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. Console.WriteLine($"{DateTime.Now} RandomForest cross validation."); // Lets now grab the training data: var trainingInputs = samples.Get(indicesTrain); var trainingOutputs = labels.Get(indicesTrain); // And now the validation data: var validationInputs = samples.Get(indicesValidation); var validationOutputs = labels.Get(indicesValidation); // create random forest model with the best parameters from grid search results var rfcModel = CreateRandomForestModel(decisionVariables, bestParameters, trainingInputs, trainingOutputs); // compute the training error rate with ZeroOneLoss function var trainingError = new ZeroOneLoss(trainingOutputs).Loss(rfcModel.Decide(trainingInputs)); // Now we can compute the validation error on the validation data: var validationError = new ZeroOneLoss(validationOutputs).Loss(rfcModel.Decide(validationInputs)); // Return a new information structure containing the model and the errors achieved. var tag = new ValidationDataSource(validationInputs, validationOutputs); return(new CrossValidationValues <RandomForest>(rfcModel, trainingError, validationError) { Tag = tag }); } }; // Compute the cross-validation return(crossvalidation.Compute()); }
public void missing_values() { var dataset = new WisconsinOriginalBreastCancer(); int?[][] inputs = dataset.Features; int[] outputs = dataset.ClassLabels; var c45 = new C45Learning() { }; var tree = c45.Learn(inputs, outputs); int height = tree.GetHeight(); Assert.AreEqual(4, height); int[] predicted = tree.Decide(inputs); double error = new ZeroOneLoss(outputs).Loss(predicted); Assert.AreEqual(0.0028612303290414878, error, 1e-8); }
public string Rules2String() { int count = dt.Rows.Count; int[][] inputs = new int [count][]; string[] labels = new string[count]; int num = 0; foreach (DataRow dr in dt.Rows) { int res = Convert.ToInt32(dr[30]); inputs[num] = new int[30]; for (int sensor_i = 0; sensor_i < 30; sensor_i++) { inputs[num][sensor_i] = Convert.ToInt32(dr[sensor_i]); } labels[num] = "class-" + res.ToString(); num++; } var codebook = new Codification("Output", labels); int[] outputs = codebook.Transform("Output", labels); DecisionVariable[] dv = new DecisionVariable[30]; for (int i = 0; i < 30; i++) { string name = "sensor_" + (i + 1).ToString(); dv[i] = new DecisionVariable(name, DecisionVariableKind.Continuous); } //use C45 Spanning tree algorithm var C45 = new C45Learning(dv); DecisionTree tree = C45.Learn(inputs, outputs); int[] predicted = tree.Decide(inputs); double error = new ZeroOneLoss(outputs).Loss(predicted); DecisionSet rules = tree.ToRules(); return(rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture)); }
public void test_learn() { #region doc_iris // Fix random seed for reproducibility Accord.Math.Random.Generator.Seed = 1; // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset: var iris = new DataSets.Iris(); double[][] inputs = iris.Instances; // flower features int[] outputs = iris.ClassLabels; // flower categories // Create the forest learning algorithm var teacher = new RandomForestLearning() { NumberOfTrees = 10, // use 10 trees in the forest }; // Finally, learn a random forest from data var forest = teacher.Learn(inputs, outputs); // We can estimate class labels using int[] predicted = forest.Decide(inputs); // And the classification error (0.0006) can be computed as double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs)); #endregion Assert.AreEqual(10, forest.Trees.Length); Assert.IsTrue(error < 0.015); }
public void learn_doc() { #region doc_learn_simplest // In this example, we will learn a decision tree directly from integer // matrices that define the inputs and outputs of our learning problem. int[][] inputs = { new int[] { 0, 0 }, new int[] { 0, 1 }, new int[] { 1, 0 }, new int[] { 1, 1 }, }; int[] outputs = // xor between inputs[0] and inputs[1] { 0, 1, 1, 0 }; // Create an ID3 learning algorithm ID3Learning teacher = new ID3Learning(); // Learn a decision tree for the XOR problem var tree = teacher.Learn(inputs, outputs); // Compute the error in the learning double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); // The tree can now be queried for new examples: int[] predicted = tree.Decide(inputs); // should be { 0, 1, 1, 0 } #endregion Assert.AreEqual(0, error); Assert.AreEqual(0, predicted[0]); Assert.AreEqual(1, predicted[1]); Assert.AreEqual(1, predicted[2]); Assert.AreEqual(0, predicted[3]); }
public double DecisionTreeAccuracyPercentageLib(DataTable data, Codification codebook) { DataTable symbols = codebook.Apply(data); int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR", "BRUISES", "ODOR", "GILL ATTACHMENT", "GILL SPACING", "GILL SIZE", "GILL COLOR", "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING", "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING", "VEIL TYPE", "VEIL COLOR", "RING NUMBER", "RING TYPE", "SPORE PRINT COLOR", "POPULATION", "HABITAT" }); int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" }); int[] outputs = new int[mOutputs.Length]; for (int i = 0; i < mOutputs.Length; i++) { outputs[i] = mOutputs[i][0]; } double error = new ZeroOneLoss(outputs).Loss(decisionTreeLib.Decide(inputs)); return(1 - error); }
public void multiclass_precomputed_matrix_smo() { #region doc_precomputed // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] trainInputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] trainOutputs = // those are the training set class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Let's chose a kernel function Polynomial kernel = new Polynomial(2); // Get the kernel matrix for the training set double[][] K = kernel.ToJagged(trainInputs); // Create a pre-computed kernel var pre = new Precomputed(K); // Create a one-vs-one learning algorithm using SMO var teacher = new MulticlassSupportVectorLearning <Precomputed, int>() { Learner = (p) => new SequentialMinimalOptimization <Precomputed, int>() { Kernel = pre } }; #if DEBUG teacher.ParallelOptions.MaxDegreeOfParallelism = 1; #endif // Learn a machine var machine = teacher.Learn(pre.Indices, trainOutputs); // Compute the machine's prediction for the training set int[] trainPrediction = machine.Decide(pre.Indices); // Evaluate prediction error for the training set using mean accuracy (mAcc) double trainingError = new ZeroOneLoss(trainOutputs).Loss(trainPrediction); // Now let's compute the machine's prediction for a test set double[][] testInputs = // test-set inputs { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 }; int[] testOutputs = // those are the test set class labels { 0, 0, 1, 2, }; // Compute precomputed matrix between train and testing pre.Values = kernel.ToJagged2(trainInputs, testInputs); // Update the kernel machine.Kernel = pre; // Compute the machine's prediction for the test set int[] testPrediction = machine.Decide(pre.Indices); // Evaluate prediction error for the training set using mean accuracy (mAcc) double testError = new ZeroOneLoss(testOutputs).Loss(testPrediction); #endregion Assert.AreEqual(0, trainingError); Assert.AreEqual(0, testError); // Create a one-vs-one learning algorithm using SMO var teacher2 = new MulticlassSupportVectorLearning <Polynomial>() { Learner = (p) => new SequentialMinimalOptimization <Polynomial>() { Kernel = kernel } }; #if DEBUG teacher.ParallelOptions.MaxDegreeOfParallelism = 1; #endif // Learn a machine var expected = teacher2.Learn(trainInputs, trainOutputs); Assert.AreEqual(4, expected.NumberOfInputs); Assert.AreEqual(3, expected.NumberOfOutputs); Assert.AreEqual(0, machine.NumberOfInputs); Assert.AreEqual(3, machine.NumberOfOutputs); var machines = Enumerable.Zip(machine, expected, (a, b) => Tuple.Create(a.Value, b.Value)); foreach (var pair in machines) { var a = pair.Item1; var e = pair.Item2; Assert.AreEqual(0, a.NumberOfInputs); Assert.AreEqual(2, a.NumberOfOutputs); Assert.AreEqual(4, e.NumberOfInputs); Assert.AreEqual(2, e.NumberOfOutputs); Assert.IsTrue(a.Weights.IsEqual(e.Weights)); } }
public void multiclass_precomputed_matrix_smo() { #region doc_precomputed // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] trainInputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] trainOutputs = // those are the training set class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Let's chose a kernel function Polynomial kernel = new Polynomial(2); // Get the kernel matrix for the training set double[][] K = kernel.ToJagged(trainInputs); // Create a pre-computed kernel var pre = new Precomputed(K); // Create a one-vs-one learning algorithm using SMO var teacher = new MulticlassSupportVectorLearning<Precomputed, int>() { Learner = (p) => new SequentialMinimalOptimization<Precomputed, int>() { Kernel = pre } }; #if DEBUG teacher.ParallelOptions.MaxDegreeOfParallelism = 1; #endif // Learn a machine var machine = teacher.Learn(pre.Indices, trainOutputs); // Compute the machine's prediction for the training set int[] trainPrediction = machine.Decide(pre.Indices); // Evaluate prediction error for the training set using mean accuracy (mAcc) double trainingError = new ZeroOneLoss(trainOutputs).Loss(trainPrediction); // Now let's compute the machine's prediction for a test set double[][] testInputs = // test-set inputs { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 }; int[] testOutputs = // those are the test set class labels { 0, 0, 1, 2, }; // Compute precomputed matrix between train and testing pre.Values = kernel.ToJagged2(trainInputs, testInputs); // Update the kernel machine.Kernel = pre; // Compute the machine's prediction for the test set int[] testPrediction = machine.Decide(pre.Indices); // Evaluate prediction error for the training set using mean accuracy (mAcc) double testError = new ZeroOneLoss(testOutputs).Loss(testPrediction); #endregion Assert.AreEqual(0, trainingError); Assert.AreEqual(0, testError); // Create a one-vs-one learning algorithm using SMO var teacher2 = new MulticlassSupportVectorLearning<Polynomial>() { Learner = (p) => new SequentialMinimalOptimization<Polynomial>() { Kernel = kernel } }; #if DEBUG teacher.ParallelOptions.MaxDegreeOfParallelism = 1; #endif // Learn a machine var expected = teacher2.Learn(trainInputs, trainOutputs); Assert.AreEqual(4, expected.NumberOfInputs); Assert.AreEqual(3, expected.NumberOfOutputs); Assert.AreEqual(0, machine.NumberOfInputs); Assert.AreEqual(3, machine.NumberOfOutputs); var machines = Enumerable.Zip(machine, expected, (a,b) => Tuple.Create(a.Value, b.Value)); foreach (var pair in machines) { var a = pair.Item1; var e = pair.Item2; Assert.AreEqual(0, a.NumberOfInputs); Assert.AreEqual(2, a.NumberOfOutputs); Assert.AreEqual(4, e.NumberOfInputs); Assert.AreEqual(2, e.NumberOfOutputs); Assert.IsTrue(a.Weights.IsEqual(e.Weights)); } }
public void multiclass_calibration_generic_kernel() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, }; // Create the multi-class learning algorithm for the machine var teacher = new MulticlassSupportVectorLearning<IKernel>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization<IKernel>() { UseKernelEstimation = false, Kernel = Gaussian.FromGamma(0.5) } }; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Create the multi-class learning algorithm for the machine var calibration = new MulticlassSupportVectorLearning<IKernel>(machine) { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new ProbabilisticOutputCalibration<IKernel>(param.Model) }; // Configure parallel execution options calibration.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine calibration.Learn(inputs, outputs); // Obtain class predictions for each sample int[] predicted = machine.Decide(inputs); // Get class scores for each sample double[] scores = machine.Score(inputs); // Get log-likelihoods (should be same as scores) double[][] logl = machine.LogLikelihoods(inputs); // Get probability for each sample double[][] prob = machine.Probabilities(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob); //string str = logl.ToCSharp(); double[] expectedScores = { 1.87436400885238, 1.81168086449304, 1.74038320983522, 1.87436400885238, 1.81168086449304, 1.55446926953952, 1.67016543853596, 1.67016543853596, 1.83135194001403, 1.83135194001403, 1.59836868669125, 2.0618816310294 }; double[][] expectedLogL = { new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 }, new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 }, new double[] { 1.74038320983522, -1.58848669816072, -1.74038320983522 }, new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 }, new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 }, new double[] { -1.55446926953952, 1.55446926953952, -0.573599079216229 }, new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 }, new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 }, new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 }, new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 }, new double[] { -0.894598978116595, -1.59836868669125, 1.59836868669125 }, new double[] { -1.87336852014759, -2.0618816310294, 2.0618816310294 } }; double[][] expectedProbs = { new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 }, new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 }, new double[] { 0.937543314993345, 0.0335955309754816, 0.028861154031173 }, new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 }, new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 }, new double[] { 0.0383670466237636, 0.859316640577158, 0.102316312799079 }, new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 }, new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 }, new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 }, new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 }, new double[] { 0.0735735561383806, 0.0363980776342206, 0.890028366227399 }, new double[] { 0.0188668069460003, 0.0156252941482294, 0.96550789890577 } }; // Must be exactly the same as test above Assert.AreEqual(0, error); Assert.AreEqual(0.5, ((Gaussian)machine[0].Value.Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[1].Value.Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[2].Value.Kernel).Gamma); Assert.AreEqual(1.0231652126930515, loss); Assert.IsTrue(predicted.IsEqual(outputs)); Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10)); Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10)); Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10)); }
public void new_method_create_tree() { string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); string[] labels = text.GetColumn(4); var codebook = new Codification("Output", labels); int[] outputs = codebook.Translate("Output", labels); // And we can use the C4.5 for learning: var teacher = new C45Learning(); // And finally induce the tree: var tree = teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (2 <= 2.45) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85) Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85) Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05) Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95) Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05) "; Assert.AreEqual(0.026666666666666668, error, 1e-10); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }
public void learn_test() { #region doc_learn // Generate always same random numbers Accord.Math.Random.Generator.Seed = 0; // The following is a simple auto association function in which // the last column of each input correspond to its own class. This // problem should be easily solved using a Linear kernel. // Sample input data double[][] inputs = { new double[] { 1, 2, 0 }, new double[] { 6, 2, 3 }, new double[] { 1, 1, 1 }, new double[] { 7, 6, 2 }, }; // Output for each of the inputs int[] outputs = { 0, 3, 1, 2 }; // Create the multi-class learning algorithm for the machine var teacher = new MulticlassSupportVectorLearning<Linear>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization<Linear>() { // If you would like to use other kernels, simply replace // the generic parameter to the desired kernel class, such // as for example, Polynomial or Gaussian: Kernel = new Linear() // use the Linear kernel } }; // Estimate the multi-class support vector machine using one-vs-one method MulticlassSupportVectorMachine<Linear> ovo = teacher.Learn(inputs, outputs); // Obtain class predictions for each sample int[] predicted = ovo.Decide(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); #endregion Assert.AreEqual(0, error); Assert.IsTrue(predicted.IsEqual(outputs)); Assert.IsTrue(ovo.Scores(inputs[0]).IsEqual(new double[] { 0.62, -0.25, -0.59, -0.62 }, 1e-2)); Assert.IsTrue(ovo.Scores(inputs[1]).IsEqual(new double[] { -0.62, -0.57, -0.13, 0.62 }, 1e-2)); Assert.IsTrue(ovo.Scores(inputs[2]).IsEqual(new double[] { -0.25, 0.63, -0.63, -0.51 }, 1e-2)); }
public void laplace_smoothing_missing_sample() { #region doc_laplace // To test the effectiveness of the Laplace rule for when // an example of a symbol is not present in the training set, // lets create dataset where the second column could contain // values 0, 1 or 2 but only actually contains examples with // containing 1 and 2: int[][] inputs = { // input output new [] { 0, 1 }, // 0 new [] { 0, 2 }, // 0 new [] { 0, 1 }, // 0 new [] { 1, 2 }, // 1 new [] { 0, 2 }, // 1 new [] { 0, 2 }, // 1 new [] { 1, 1 }, // 2 new [] { 0, 1 }, // 2 new [] { 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 1, 1, 1, 2, 2, 2, }; // Since the data is not enough to determine which symbols we are // expecting in our model, we will have to specify the model by // hand. The first column can assume 2 different values, whereas // the third column can assume 3: var bayes = new NaiveBayes(classes: 3, symbols: new[] { 2, 3 }); // Now we can create a learning algorithm var learning = new NaiveBayesLearning() { Model = bayes }; // Enable the use of the Laplace rule learning.Options.InnerOption.UseLaplaceRule = true; // Learn the Naive Bayes model learning.Learn(inputs, outputs); // Estimate a sample with 0 in the second col int answer = bayes.Decide(new int[] { 0, 1 }); #endregion Assert.AreEqual(0, answer); double prob = bayes.Probability(new int[] { 0, 1 }, out answer); Assert.AreEqual(0, answer); Assert.AreEqual(0.52173913043478259, prob, 1e-10); double error = new ZeroOneLoss(outputs) { Mean = true }.Loss(bayes.Decide(inputs)); Assert.AreEqual(2 / 9.0, error); }
public void IrisDatasetTest() { #region doc_iris // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset into an array of text that we can process string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); // The first four columns contain the flower features double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); // The last column contains the expected flower type string[] labels = text.GetColumn(4); // Since the labels are represented as text, the first step is to convert // those text labels into integer class labels, so we can process them // more easily. For this, we will create a codebook to encode class labels: // var codebook = new Codification("Output", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("Output", labels); // Let's declare the names of our input variables: DecisionVariable[] features = { new DecisionVariable("sepal length", DecisionVariableKind.Continuous), new DecisionVariable("sepal width", DecisionVariableKind.Continuous), new DecisionVariable("petal length", DecisionVariableKind.Continuous), new DecisionVariable("petal width", DecisionVariableKind.Continuous), }; // Now, we can finally create our tree for the 3 classes: var tree = new DecisionTree(inputs: features, classes: 3); // And we can use the C4.5 for learning: var teacher = new C45Learning(tree); // And finally induce the tree: teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (petal length <= 2.45) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05) Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05) "; #endregion Assert.AreEqual(0.026666666666666668, error, 1e-10); Assert.AreEqual(4, tree.NumberOfInputs); Assert.AreEqual(3, tree.NumberOfOutputs); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }
public void multiclass_gaussian_new_usage() { #region doc_learn_gaussian // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Create the multi-class learning algorithm for the machine var teacher = new MulticlassSupportVectorLearning<Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization<Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. UseKernelEstimation = true } }; // Configure parallel execution options teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Obtain class predictions for each sample int[] predicted = machine.Decide(inputs); // Get class scores for each sample double[] scores = machine.Score(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); #endregion // Get log-likelihoods (should be same as scores) double[][] logl = machine.LogLikelihoods(inputs); // Get probability for each sample double[][] prob = machine.Probabilities(inputs); // Compute classification error double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob); string str = scores.ToCSharp(); double[] expectedScores = { 1.00888999727541, 1.00303259868784, 1.00068403386636, 1.00888999727541, 1.00303259868784, 1.00831890183328, 1.00831890183328, 0.843757409449037, 0.996768862332386, 0.996768862332386, 1.02627325826713, 1.00303259868784, 0.996967401312164, 0.961947708617365, 1.02627325826713 }; double[][] expectedLogL = { new double[] { 1.00888999727541, -1.00888999727541, -1.00135670089335 }, new double[] { 1.00303259868784, -0.991681098166717, -1.00303259868784 }, new double[] { 1.00068403386636, -0.54983354268499, -1.00068403386636 }, new double[] { 1.00888999727541, -1.00888999727541, -1.00135670089335 }, new double[] { 1.00303259868784, -0.991681098166717, -1.00303259868784 }, new double[] { -1.00831890183328, 1.00831890183328, -0.0542719287771535 }, new double[] { -1.00831890183328, 1.00831890183328, -0.0542719287771535 }, new double[] { -0.843757409449037, 0.843757409449037, -0.787899083913034 }, new double[] { -0.178272229157676, 0.996768862332386, -0.996768862332386 }, new double[] { -0.178272229157676, 0.996768862332386, -0.996768862332386 }, new double[] { -1.02627325826713, -1.00323113766761, 1.02627325826713 }, new double[] { -1.00303259868784, -0.38657999872922, 1.00303259868784 }, new double[] { -0.996967401312164, -0.38657999872922, 0.996967401312164 }, new double[] { -0.479189991343958, -0.961947708617365, 0.961947708617365 }, new double[] { -1.02627325826713, -1.00323113766761, 1.02627325826713 } }; double[][] expectedProbs = { new double[] { 0.789324598208647, 0.104940932711551, 0.105734469079803 }, new double[] { 0.78704862182644, 0.107080012017624, 0.105871366155937 }, new double[] { 0.74223157627093, 0.157455631737191, 0.100312791991879 }, new double[] { 0.789324598208647, 0.104940932711551, 0.105734469079803 }, new double[] { 0.78704862182644, 0.107080012017624, 0.105871366155937 }, new double[] { 0.0900153422818135, 0.676287261796794, 0.233697395921392 }, new double[] { 0.0900153422818135, 0.676287261796794, 0.233697395921392 }, new double[] { 0.133985810363445, 0.72433118122885, 0.141683008407705 }, new double[] { 0.213703968297751, 0.692032433073136, 0.0942635986291124 }, new double[] { 0.213703968297751, 0.692032433073136, 0.0942635986291124 }, new double[] { 0.10192623206507, 0.104302095948601, 0.79377167198633 }, new double[] { 0.0972161784678357, 0.180077937396817, 0.722705884135347 }, new double[] { 0.0981785890979593, 0.180760971768703, 0.721060439133338 }, new double[] { 0.171157270099157, 0.105617610634377, 0.723225119266465 }, new double[] { 0.10192623206507, 0.104302095948601, 0.79377167198633 } }; Assert.AreEqual(0, error); Assert.AreEqual(4.5289447815997672, loss, 1e-10); Assert.IsTrue(predicted.IsEqual(outputs)); Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10)); Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10)); Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10)); }
public void learn_test_with_options() { #region doc_learn_options // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Create a new Gaussian distribution naive Bayes learner var teacher = new NaiveBayesLearning<NormalDistribution, NormalOptions>(); // Set options for the component distributions teacher.Options.InnerOption.Regularization = 1e-5; // to avoid zero variances // Learn the naive Bayes model NaiveBayes<NormalDistribution> bayes = teacher.Learn(inputs, outputs); // Use the model to predict class labels int[] predicted = bayes.Decide(inputs); // Estimate the model error. The error should be zero: double error = new ZeroOneLoss(outputs).Loss(predicted); // Now, let's test the model output for the first input sample: int answer = bayes.Decide(new double[] { 1, 0, 0, 1 }); // should be 1 #endregion Assert.AreEqual(0, error); Assert.AreEqual(1, answer); Assert.IsTrue(predicted.IsEqual(outputs)); }
public void kaggle_digits_with_compress() { string root = Environment.CurrentDirectory; var training = Properties.Resources.trainingsample; var validation = Properties.Resources.validationsample; var tset = readData(training); var observations = tset.Item1; var labels = tset.Item2; var teacher = new MulticlassSupportVectorLearning<Linear>(); var svm = teacher.Learn(observations, labels); Assert.AreEqual(50, svm.Models[0][0].SupportVectors.Length); Assert.AreEqual(127, svm.Models[1][0].SupportVectors.Length); svm.Compress(); Assert.AreEqual(1, svm.Models[0][0].SupportVectors.Length); Assert.AreEqual(1, svm.Models[1][0].SupportVectors.Length); { var trainingLoss = new ZeroOneLoss(labels) { Mean = true }; double error = trainingLoss.Loss(svm.Decide(observations)); Assert.AreEqual(0.054, error); } { var vset = readData(validation); var validationData = vset.Item1; var validationLabels = vset.Item2; var validationLoss = new ZeroOneLoss(validationLabels) { Mean = true }; double val = validationLoss.Loss(svm.Decide(validationData)); Assert.AreEqual(0.082, val); } }
/// <summary> /// Calibrates the current Support Vector Machine to produce /// probabilistic outputs using ProbabilisticOutputLearning. /// </summary> /// private void btnRunCalibration_Click(object sender, EventArgs e) { if (ksvm == null) { MessageBox.Show("Please train the machines first."); return; } // Extract inputs and outputs int rows = dgvTrainingSource.Rows.Count; double[][] input = new double[rows][]; int[] output = new int[rows]; for (int i = 0; i < rows; i++) { input[i] = (double[])dgvTrainingSource.Rows[i].Cells["colTrainingFeatures"].Value; output[i] = (int)dgvTrainingSource.Rows[i].Cells["colTrainingLabel"].Value; } // Create the calibration algorithm using the training data var ml = new MulticlassSupportVectorLearning<IKernel>() { Model = ksvm, // Configure the calibration algorithm Learner = (p) => new ProbabilisticOutputCalibration<IKernel>() { Model = p.Model } }; lbStatus.Text = "Calibrating the classifiers. This may take a (very) significant amount of time..."; Application.DoEvents(); Stopwatch sw = Stopwatch.StartNew(); // Train the machines. It should take a while. ml.Learn(input, output); sw.Stop(); double error = new ZeroOneLoss(output).Loss(ksvm.Decide(input)); lbStatus.Text = String.Format( "Calibration complete ({0}ms, {1}er). Click Classify to test the classifiers.", sw.ElapsedMilliseconds, error); btnClassifyVoting.Enabled = true; }
public void ComputeTest3() { #region doc_multiclass // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // int[][] inputs = { // input output new int[] { 0, 1, 1, 0 }, // 0 new int[] { 0, 1, 0, 0 }, // 0 new int[] { 0, 0, 1, 0 }, // 0 new int[] { 0, 1, 1, 0 }, // 0 new int[] { 0, 1, 0, 0 }, // 0 new int[] { 1, 0, 0, 0 }, // 1 new int[] { 1, 0, 0, 0 }, // 1 new int[] { 1, 0, 0, 1 }, // 1 new int[] { 0, 0, 0, 1 }, // 1 new int[] { 0, 0, 0, 1 }, // 1 new int[] { 1, 1, 1, 1 }, // 2 new int[] { 1, 0, 1, 1 }, // 2 new int[] { 1, 1, 0, 1 }, // 2 new int[] { 0, 1, 1, 1 }, // 2 new int[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Let us create a learning algorithm var learner = new NaiveBayesLearning(); // and teach a model on the data examples NaiveBayes nb = learner.Learn(inputs, outputs); // Now, let's test the model output for the first input sample: int answer = nb.Decide(new int[] { 0, 1, 1, 0 }); // should be 1 #endregion double error = new ZeroOneLoss(outputs).Loss(nb.Decide(inputs)); Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { error = nb.Compute(inputs[i]); double expected = outputs[i]; Assert.AreEqual(expected, error); } }
/// <summary> /// Creates a Support Vector Machine and estimate /// its parameters using a learning algorithm. /// </summary> /// private void btnRunTraining_Click(object sender, EventArgs e) { if (dgvTrainingSource.Rows.Count == 0) { MessageBox.Show("Please load the training data before clicking this button"); return; } lbStatus.Text = "Gathering data. This may take a while..."; Application.DoEvents(); // Extract inputs and outputs int rows = dgvTrainingSource.Rows.Count; double[][] input = new double[rows][]; int[] output = new int[rows]; for (int i = 0; i < rows; i++) { input[i] = (double[])dgvTrainingSource.Rows[i].Cells["colTrainingFeatures"].Value; output[i] = (int)dgvTrainingSource.Rows[i].Cells["colTrainingLabel"].Value; } // Create the chosen kernel function // using the user interface parameters // IKernel kernel = createKernel(); // Extract training parameters from the interface double complexity = (double)numComplexity.Value; double tolerance = (double)numTolerance.Value; int cacheSize = (int)numCache.Value; SelectionStrategy strategy = (SelectionStrategy)cbStrategy.SelectedItem; // Create the learning algorithm using the machine and the training data var ml = new MulticlassSupportVectorLearning<IKernel>() { // Configure the learning algorithm Learner = (param) => new SequentialMinimalOptimization<IKernel>() { Complexity = complexity, Tolerance = tolerance, CacheSize = cacheSize, Strategy = strategy, Kernel = kernel } }; lbStatus.Text = "Training the classifiers. This may take a (very) significant amount of time..."; Application.DoEvents(); Stopwatch sw = Stopwatch.StartNew(); // Train the machines. It should take a while. ksvm = ml.Learn(input, output); // If we created a linear machine, compress the support vectors // into one single parameter vector for increased performance: if (ksvm.Kernel is Linear) { ksvm.Compress(); } sw.Stop(); double error = new ZeroOneLoss(output) { Mean = true }.Loss(ksvm.Decide(input)); lbStatus.Text = String.Format( "Training complete ({0}ms, {1}er). Click Classify to test the classifiers.", sw.ElapsedMilliseconds, error); // Update the interface status btnClassifyVoting.Enabled = true; btnClassifyElimination.Enabled = true; btnCalibration.Enabled = true; // Populate the information tab with the machines dgvMachines.Rows.Clear(); int k = 1; for (int i = 0; i < 10; i++) { for (int j = 0; j < i; j++, k++) { var machine = ksvm[i, j]; int sv = machine.SupportVectors == null ? 0 : machine.SupportVectors.Length; int c = dgvMachines.Rows.Add(k, i + "-vs-" + j, sv, machine.Threshold); dgvMachines.Rows[c].Tag = machine; } } // approximate size in bytes = // number of support vectors * number of doubles in a support vector * size of double int bytes = ksvm.SupportVectorUniqueCount * 1024 * sizeof(double); float megabytes = bytes / (1024 * 1024); lbSize.Text = String.Format("{0} ({1} MB)", ksvm.SupportVectorUniqueCount, megabytes); }
public void learn_test() { #region doc_learn double[][] inputs = // Example XOR problem { new double[] { 0, 0 }, // 0 xor 0: 1 (label +1) new double[] { 0, 1 }, // 0 xor 1: 0 (label -1) new double[] { 1, 0 }, // 1 xor 0: 0 (label -1) new double[] { 1, 1 } // 1 xor 1: 1 (label +1) }; int[] outputs = // XOR outputs { 1, 0, 0, 1 }; // Instantiate a new SMO learning algorithm for SVMs var smo = new SequentialMinimalOptimization<Gaussian>() { Kernel = new Gaussian(0.1), Complexity = 1.0 }; // Learn a SVM using the algorithm var svm = smo.Learn(inputs, outputs); // Predict labels for each input sample bool[] predicted = svm.Decide(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); // Instantiate the probabilistic calibration (using Platt's scaling) var calibration = new ProbabilisticOutputCalibration<Gaussian>(svm); // Run the calibration algorithm calibration.Learn(inputs, outputs); // returns the same machine // Predict probabilities of each input sample double[] probabilities = svm.Probability(inputs); // Compute the error based on a hard decision double loss = new BinaryCrossEntropyLoss(outputs).Loss(probabilities); // Compute the decision output for one of the input vectors, // while also retrieving the probability of the answer bool decision; double probability = svm.Probability(inputs[0], out decision); #endregion // At this point, decision is +1 with a probability of 75% Assert.AreEqual(true, decision); Assert.AreEqual(0, error); Assert.AreEqual(5.5451735748925355, loss); Assert.AreEqual(0.74999975815069375, probability, 1e-10); Assert.IsTrue(svm.IsProbabilistic); Assert.AreEqual(-1.0986109988055595, svm.Weights[0]); Assert.AreEqual(1.0986109988055595, svm.Weights[1]); Assert.AreEqual(-1.0986109988055595, svm.Weights[2]); Assert.AreEqual(1.0986109988055595, svm.Weights[3]); }
public static void BuildDecisionTreeOnYearAndUser() { DataHandler.ImportReviewData(5); DataHandler.Reviews.Shuffle(); DataTable data = new DataTable("Review Simple Input Data"); data.Columns.Add("Year"); data.Columns.Add("User"); data.Columns.Add("Review"); for (int i = 0; i < 1500; i++) { var currentReview = DataHandler.Reviews[i]; object[] values = new object[3]; values[0] = currentReview.reviewTime.Year; values[1] = currentReview.reviewerID; values[2] = currentReview.overall; data.Rows.Add(values); } // Create a new codification codebook to // convert strings into integer symbols var codebook = new Codification(data, "Year", "User", "Review"); DataTable symbols = codebook.Apply(data, "Year", "User", "Review"); int[][] inputs = symbols.ToJagged <int>("Year", "User"); int[] outputs = symbols.ToArray <int>("Review"); // Gather information about decision variables DecisionVariable[] attributes = { new DecisionVariable("Year", 7), // 3 years new DecisionVariable("User", 18), // 18 possible users }; // Create a new instance of the ID3 algorithm var id3learning = new ID3Learning(attributes); // Learn the training instances! DecisionTree tree = id3learning.Learn(inputs, outputs); // Compute the training error when predicting training instances double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); // The tree can now be queried for new examples through // its decide method. For example, we can create a query DataTable newData = new DataTable("Review Simple Input Data"); newData.Columns.Add("Year"); newData.Columns.Add("User"); newData.Columns.Add("Review"); for (int i = 1500; i < 2000; i++) { var currentReview = DataHandler.Reviews[i]; object[] values = new object[3]; values[0] = currentReview.reviewTime.Year; values[1] = currentReview.reviewerID; values[2] = currentReview.overall; newData.Rows.Add(values); } DataTable newSymbols = codebook.Apply(data, "Year", "User", "Review"); int[][] newInputs = newSymbols.ToJagged <int>("Year", "User"); int[] newOutputs = newSymbols.ToArray <int>("Review"); int[] answers = tree.Decide(newInputs); ScatterplotBox.Show("Expected results", newOutputs.Select(i => (double)i).ToArray()); ScatterplotBox.Show("Decision Tree results", newOutputs.Select(i => (double)i).ToArray()) .Hold(); }
public void multiclass_linear_new_usage() { #region doc_learn_ldcd // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Create a one-vs-one multi-class SVM learning algorithm var teacher = new MulticlassSupportVectorLearning<Linear>() { // using LIBLINEAR's L2-loss SVC dual for each SVM Learner = (p) => new LinearDualCoordinateDescent() { Loss = Loss.L2 } }; // Configure parallel execution options teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Obtain class predictions for each sample int[] predicted = machine.Decide(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); #endregion Assert.AreEqual(0, error); Assert.IsTrue(predicted.IsEqual(outputs)); }