public static TrainingSet[] GenerateTrainingSets(IEnumerable <KeyValuePair <User, double[]> > studentsAndMarks, string[] normalRecords, string[] anomalies) { var countOfEntries = normalRecords.Length + anomalies.Length; var inputData = new double[countOfEntries][]; var outputData = new int[countOfEntries]; var counter = 0; foreach (var studentAndMarks in studentsAndMarks) { if (normalRecords.Contains(studentAndMarks.Key.OpenId)) { inputData[counter] = studentAndMarks.Value; outputData[counter++] = 1; } if (!anomalies.Contains(studentAndMarks.Key.OpenId)) { continue; } inputData[counter] = studentAndMarks.Value; outputData[counter++] = 0; } var countOfFeatures = studentsAndMarks.ElementAt(0).Value.Length; var features = new DecisionVariable[countOfFeatures]; features[0] = new DecisionVariable("0", DecisionAttributeKind.Continuous, new AForge.DoubleRange(80, 1200)); for (var i = 1; i < countOfFeatures; i++) { features[i] = new DecisionVariable(i.ToString(), DecisionAttributeKind.Continuous, new AForge.DoubleRange(0, 10)); } // Create the Decision tree with only 2 result values var tree = new DecisionTree(features, 2); // Creates a new instance of the C4.5 learning algorithm var c45 = new C45Learning(tree); // Learn the decision tree var error = c45.Run(inputData, outputData); // Split all data into normal and anomalies var setOfNormalRecords = studentsAndMarks.Where(x => tree.Compute(x.Value) == 1); var setOfAnomalies = studentsAndMarks.Where(x => tree.Compute(x.Value) == 0); // Split normal records into 2 groups (one for training set and one for anomaly detection ocurency detection) var setOfNormalRecordsList = setOfNormalRecords.ToList(); var splitCount = setOfNormalRecordsList.Count * 2 / 3; var setOfNormalRecordsTr1 = setOfNormalRecordsList.GetRange(0, splitCount); var setOfNormalRecordsTr2 = setOfNormalRecordsList.GetRange(splitCount, setOfNormalRecordsList.Count - splitCount); // Create Training Sets var trSetNormalFirst = CreateTrainingSetFromResources(setOfNormalRecordsTr1); var trSetNormalSecond = CreateTrainingSetFromResources(setOfNormalRecordsTr2); var trSetAnomalies = CreateTrainingSetFromResources(setOfAnomalies); return(new[] { trSetNormalFirst, trSetNormalSecond, trSetAnomalies }); }
private void button3_Click(object sender, EventArgs e) { string answer = codebook.Translate("Tennis", tree.Compute(codebook.Translate(comboBox6.Text, comboBox7.Text, comboBox8.Text, comboBox9.Text))); label10.Text = answer; }
bool Translate(Record instance, DecisionTree tree, Codification codebook, string label) { int[] inputs = codebook.Translate(ExcludeLast(instance.Values)); string answer = codebook.Translate(label, tree.Compute(inputs)); return(answer == "1"); }
/// <summary> /// Tests the previously created tree into a new set of data. /// </summary> /// private void btnTestingRun_Click(object sender, EventArgs e) { if (tree == null || dgvTestingSource.DataSource == null) { MessageBox.Show("Please create a machine first."); return; } // Creates a matrix from the entire source data table double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames); // Get only the input vector values (first two columns) double[][] inputs = table.GetColumns(0, 1).ToArray(); // Get the expected output labels (last column) int[] expected = table.GetColumn(2).ToInt32(); // Compute the actual tree outputs int[] actual = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { actual[i] = tree.Compute(inputs[i]); } // Use confusion matrix to compute some statistics. ConfusionMatrix confusionMatrix = new ConfusionMatrix(actual, expected, 1, 0); dgvPerformance.DataSource = new [] { confusionMatrix }; // Create performance scatter plot CreateResultScatterplot(zedGraphControl1, inputs, expected.ToDouble(), actual.ToDouble()); }
/// <summary> /// Computes the prediction error for the tree /// over a given set of input and outputs. /// </summary> /// /// <param name="inputs">The input points.</param> /// <param name="outputs">The corresponding output labels.</param> /// /// <returns>The percentage error of the prediction.</returns> /// public double ComputeError(double[][] inputs, int[] outputs) { int miss = 0; for (int i = 0; i < inputs.Length; i++) { if (tree.Compute(inputs[i]) != outputs[i]) miss++; } return (double)miss / inputs.Length; }
public ConfusionMatrix GetDecisionTreeAccuracy(DecisionTree tree, ref double[][] inputs, ref int[] outputs) { int[] actual = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { actual[i] = tree.Compute(inputs[i]); } ConfusionMatrix confusionMatrix = new ConfusionMatrix(actual, outputs, 1, 0); return(confusionMatrix); }
/// <summary> /// Computes the prediction error for the tree /// over a given set of input and outputs. /// </summary> /// /// <param name="inputs">The input points.</param> /// <param name="outputs">The corresponding output labels.</param> /// /// <returns>The percentual error of the prediction.</returns> /// public double ComputeError(int[][] inputs, int[] outputs) { int miss = 0; for (int i = 0; i < inputs.Length; i++) { if (tree.Compute(inputs[i].ToDouble()) != outputs[i]) { miss++; } } return((double)miss / inputs.Length); }
private double computeError() { int error = 0; for (int i = 0; i < inputs.Length; i++) { int actual = tree.Compute(inputs[i]); int expected = outputs[i]; if (actual != expected) { error++; } } return(error / (double)inputs.Length); }
public void DeserializationTest1() { MemoryStream stream = new MemoryStream(Properties.Resources.tree); DecisionTree tree = Serializer.Load <DecisionTree>(stream); Assert.AreEqual(4, tree.InputCount); Assert.AreEqual(2, tree.OutputClasses); Assert.IsNotNull(tree.Root); DecisionTree newtree; int[][] inputs; int[] outputs; ID3LearningTest.CreateMitchellExample(out newtree, out inputs, out outputs); for (int i = 0; i < inputs.Length; i++) { int y = tree.Compute(inputs[i].ToDouble()); Assert.AreEqual(outputs[i], y); } DecisionNode[] expected = { tree.Root, tree.Root.Branches[0], // Outlook = 0 tree.Root.Branches[1], // Outlook = 1 tree.Root.Branches[2], // Outlook = 2 tree.Root.Branches[0].Branches[0], // Humidity = 0 tree.Root.Branches[0].Branches[1], // Humidity = 1 tree.Root.Branches[2].Branches[0], // Wind = 0 tree.Root.Branches[2].Branches[1], // Wind = 1 }; int c = 0; foreach (var node in tree.Traverse(DecisionTreeTraversal.BreadthFirst)) { Assert.AreEqual(expected[c++], node); } Assert.AreEqual(expected.Length, c); }
// Actual Selection using Decision Tree public string GetBestAlgorithmForInputTree(string[] input, bool returnNonTranslatedInt) { try { double[] codes = { codebook.Translate("Array Size", input[0]), codebook.Translate("Runs", input[1]) }; int result = tree.Compute(codes); string bestAlgorithm; if (returnNonTranslatedInt) { bestAlgorithm = result + ""; } else { bestAlgorithm = codebook.Translate("Selected Sorting Algorithm", result); } return(bestAlgorithm); } catch (Exception ex) { return("Could not match inputs"); } }
public string kararAgaci(DataTable tbl) { int classCount = 2; Codification codebook = new Codification(tbl); DecisionVariable[] attributes = { new DecisionVariable("Clump Thickness", 10), new DecisionVariable("Uniformity of Cell Size", 10),new DecisionVariable("Uniformity of Cell Shape", 10), new DecisionVariable("Marginal Adhesion", 10),new DecisionVariable("Single Epithelial Cell Size", 10), new DecisionVariable("Bare Nuclei", 10),new DecisionVariable("Bland Chromatin", 10), new DecisionVariable("Normal Nucleoli", 10),new DecisionVariable("Mitoses", 10), }; DecisionTree tree = new DecisionTree(attributes, classCount); ID3Learning id3learning = new ID3Learning(tree); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(tbl); int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses"); int[] outputs = symbols.ToIntArray("Class").GetColumn(0); // symbols. id3learning.Run(inputs, outputs); int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3], inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]); int output = tree.Compute(query); string answer = codebook.Translate("Class", output); return(answer); }
public void LargeRunTest() { #region doc_nursery // This example uses the Nursery Database available from the University of // California Irvine repository of machine learning databases, available at // // http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names // // The description paragraph is listed as follows. // // Nursery Database was derived from a hierarchical decision model // originally developed to rank applications for nursery schools. It // was used during several years in 1980's when there was excessive // enrollment to these schools in Ljubljana, Slovenia, and the // rejected applications frequently needed an objective // explanation. The final decision depended on three subproblems: // occupation of parents and child's nursery, family structure and // financial standing, and social and health picture of the family. // The model was developed within expert system shell for decision // making DEX (M. Bohanec, V. Rajkovic: Expert system for decision // making. Sistemica 1(1), pp. 145-157, 1990.). // // Let's begin by loading the raw data. This string variable contains // the contents of the nursery.data file as a single, continuous text. // string nurseryData = Resources.nursery; // Those are the input columns available in the data // string[] inputColumns = { "parents", "has_nurs", "form", "children", "housing", "finance", "social", "health" }; // And this is the output, the last column of the data. // string outputColumn = "output"; // Let's populate a data table with this information. // DataTable table = new DataTable("Nursery"); table.Columns.Add(inputColumns); table.Columns.Add(outputColumn); string[] lines = nurseryData.Split( new[] { Environment.NewLine }, StringSplitOptions.None); foreach (var line in lines) { table.Rows.Add(line.Split(',')); } // Now, we have to convert the textual, categorical data found // in the table to a more manageable discrete representation. // // For this, we will create a codebook to translate text to // discrete integer symbols: // Codification codebook = new Codification(table); // And then convert all data into symbols // DataTable symbols = codebook.Apply(table); double[][] inputs = symbols.ToArray(inputColumns); int[] outputs = symbols.ToArray <int>(outputColumn); // From now on, we can start creating the decision tree. // var attributes = DecisionVariable.FromCodebook(codebook, inputColumns); DecisionTree tree = new DecisionTree(attributes, classes: 5); // Now, let's create the C4.5 algorithm C45Learning c45 = new C45Learning(tree); // and learn a decision tree. The value of // the error variable below should be 0. // double error = c45.Run(inputs, outputs); // To compute a decision for one of the input points, // such as the 25-th example in the set, we can use // int y = tree.Compute(inputs[25]); #endregion Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { int expected = outputs[i]; int actual = tree.Compute(inputs[i]); Assert.AreEqual(expected, actual); } #if !NET35 // Finally, we can also convert our tree to a native // function, improving efficiency considerably, with // Func <double[], int> func = tree.ToExpression().Compile(); // Again, to compute a new decision, we can just use // int z = func(inputs[25]); for (int i = 0; i < inputs.Length; i++) { int expected = outputs[i]; int actual = func(inputs[i]); Assert.AreEqual(expected, actual); } #endif }
public void IncompleteDiscreteVariableTest() { DecisionTree tree; int[][] inputs; int[] outputs; DataTable data = new DataTable("Degenerated Tennis Example"); data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data); DecisionVariable[] attributes = { new DecisionVariable("Outlook", codebook["Outlook"].Symbols + 200), // 203 possible values, 200 undefined new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool) new DecisionVariable("Humidity", codebook["Humidity"].Symbols), // 2 possible values (High, normal) new DecisionVariable("Wind", codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) tree = new DecisionTree(attributes, classCount); ID3Learning id3 = new ID3Learning(tree); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); inputs = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind"); outputs = symbols.ToArray <int>("PlayTennis"); double error = id3.Run(inputs, outputs); Assert.AreEqual(0, error); Assert.AreEqual(203, tree.Root.Branches.Count); Assert.IsTrue(tree.Root.Branches[100].IsLeaf); Assert.IsNull(tree.Root.Branches[100].Output); for (int i = 0; i < inputs.Length; i++) { int y = tree.Compute(inputs[i]); Assert.AreEqual(outputs[i], y); } }
public static void CreateMitchellExample(out DecisionTree tree, out int[][] inputs, out int[] outputs) { DataTable data = new DataTable("Mitchell's Tennis Example"); data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); DecisionVariable[] attributes = { new DecisionVariable("Outlook", codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool) new DecisionVariable("Humidity", codebook["Humidity"].Symbols), // 2 possible values (High, normal) new DecisionVariable("Wind", codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) tree = new DecisionTree(attributes, classCount); ID3Learning id3 = new ID3Learning(tree); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); inputs = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind"); outputs = symbols.ToArray <int>("PlayTennis"); double error = id3.Run(inputs, outputs); Assert.AreEqual(0, error); { int[] query = codebook.Translate("Sunny", "Hot", "High", "Strong"); int output = tree.Compute(query); string answer = codebook.Translate("PlayTennis", output); Assert.AreEqual("No", answer); } foreach (DataRow row in data.Rows) { var x = codebook.Translate(row, "Outlook", "Temperature", "Humidity", "Wind"); int y = tree.Compute(x); string actual = codebook.Translate("PlayTennis", y); string expected = row["PlayTennis"] as string; Assert.AreEqual(expected, actual); } { string answer = codebook.Translate("PlayTennis", tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong"))); Assert.AreEqual("No", answer); } }
public void ConstantDiscreteVariableTest() { DecisionTree tree; int[][] inputs; int[] outputs; DataTable data = new DataTable("Degenerated Tennis Example"); data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Hot", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Hot", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Hot", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Hot", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Hot", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data); DecisionVariable[] attributes = { new DecisionVariable("Outlook", codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", codebook["Temperature"].Symbols), // 1 constant value (Hot) new DecisionVariable("Humidity", codebook["Humidity"].Symbols), // 2 possible values (High, normal) new DecisionVariable("Wind", codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) bool thrown = false; try { tree = new DecisionTree(attributes, classCount); } catch { thrown = true; } Assert.IsTrue(thrown); attributes[1] = new DecisionVariable("Temperature", 2); tree = new DecisionTree(attributes, classCount); ID3Learning id3 = new ID3Learning(tree); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); inputs = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind"); outputs = symbols.ToArray <int>("PlayTennis"); double error = id3.Run(inputs, outputs); Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { int y = tree.Compute(inputs[i]); Assert.AreEqual(outputs[i], y); } }
private string C45(DataTable tbl) { int classCount = 2; Codification codebook = new Codification(tbl); DecisionVariable[] attributes = { new DecisionVariable("Clump Thickness", 10), new DecisionVariable("Uniformity of Cell Size", 10),new DecisionVariable("Uniformity of Cell Shape", 10), new DecisionVariable("Marginal Adhesion", 10),new DecisionVariable("Single Epithelial Cell Size", 10), new DecisionVariable("Bare Nuclei", 10),new DecisionVariable("Bland Chromatin", 10), new DecisionVariable("Normal Nucleoli", 10),new DecisionVariable("Mitoses", 10), }; DecisionTree tree = new DecisionTree(attributes, classCount); // ID3Learning id3learning = new ID3Learning(tree); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(tbl); double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble(); int[] outputs = symbols.ToIntArray("Class").GetColumn(0); // symbols. // id3learning.Run(inputs, outputs); // Now, let's create the C4.5 algorithm C45Learning c45 = new C45Learning(tree); // and learn a decision tree. The value of // the error variable below should be 0. // double error = c45.Run(inputs, outputs); // To compute a decision for one of the input points, // such as the 25-th example in the set, we can use // int y = tree.Compute(inputs[5]); // Finally, we can also convert our tree to a native // function, improving efficiency considerably, with // Func <double[], int> func = tree.ToExpression().Compile(); // Again, to compute a new decision, we can just use // int z = func(inputs[5]); int[] query = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3], inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]); int output = tree.Compute(query); string answer = codebook.Translate("Class", output); return(answer); // throw new NotImplementedException(); }
public void ConstantDiscreteVariableTest() { DecisionTree tree; double[][] inputs; int[] outputs; DataTable data = new DataTable("Degenerated Tennis Example"); data.Columns.Add("Day", typeof(string)); data.Columns.Add("Outlook", typeof(string)); data.Columns.Add("Temperature", typeof(double)); data.Columns.Add("Humidity", typeof(double)); data.Columns.Add("Wind", typeof(string)); data.Columns.Add("PlayTennis", typeof(string)); data.Rows.Add("D1", "Sunny", 50, 85, "Weak", "No"); data.Rows.Add("D2", "Sunny", 50, 90, "Weak", "No"); data.Rows.Add("D3", "Overcast", 83, 78, "Weak", "Yes"); data.Rows.Add("D4", "Rain", 70, 96, "Weak", "Yes"); data.Rows.Add("D5", "Rain", 68, 80, "Weak", "Yes"); data.Rows.Add("D6", "Rain", 65, 70, "Weak", "No"); data.Rows.Add("D7", "Overcast", 64, 65, "Weak", "Yes"); data.Rows.Add("D8", "Sunny", 50, 95, "Weak", "No"); data.Rows.Add("D9", "Sunny", 69, 70, "Weak", "Yes"); data.Rows.Add("D10", "Rain", 75, 80, "Weak", "Yes"); data.Rows.Add("D11", "Sunny", 75, 70, "Weak", "Yes"); data.Rows.Add("D12", "Overcast", 72, 90, "Weak", "Yes"); data.Rows.Add("D13", "Overcast", 81, 75, "Weak", "Yes"); data.Rows.Add("D14", "Rain", 50, 80, "Weak", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data); DecisionVariable[] attributes = { new DecisionVariable("Outlook", codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", DecisionVariableKind.Continuous), // continuous values new DecisionVariable("Humidity", DecisionVariableKind.Continuous), // continuous values new DecisionVariable("Wind", codebook["Wind"].Symbols + 1) // 1 possible value (Weak) }; int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) tree = new DecisionTree(attributes, classCount); C45Learning c45 = new C45Learning(tree); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind"); outputs = symbols.ToArray <int>("PlayTennis"); double error = c45.Run(inputs, outputs); for (int i = 0; i < inputs.Length; i++) { int y = tree.Compute(inputs[i]); Assert.AreEqual(outputs[i], y); } }
public int GetLabelIndex(double[] instance_features) { return(tree.Compute(instance_features)); }
public static void TestAccord() { /* * http://crsouza.com/2012/01/decision-trees-in-c/ * */ DataTable data = new DataTable("Memory"); /*add People names/ID to columns dynamically*/ data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); /*possibly add sentences to this? * maybe keywords*/ data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); /* NO IDEA FOR THIS */ DecisionVariable[] attributes = { new DecisionVariable("Outlook", 3), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool) new DecisionVariable("Humidity", 2), // 2 possible values (High, normal) new DecisionVariable("Wind", 2) // 2 possible values (Weak, strong) }; /* For possible values, make it one so it narrows to one individual fact about a word*/ int classCount = 2; // 2 possible output values for playing tennis: yes or no DecisionTree tree = new DecisionTree(attributes, classCount); // Create a new instance of the ID3 algorithm ID3Learning id3learning = new ID3Learning(tree); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(data); int[][] inputs = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind"); int[] outputs = symbols.ToIntArray("PlayTennis").GetColumn(0); // Learn the training instances! id3learning.Run(inputs, outputs); /*This is how we will query the memory*/ int[] query = codebook.Translate("Sunny", "Hot", "High", "Strong"); int output = tree.Compute(query); /*Respond to user*/ string answer = codebook.Translate("PlayTennis", output); // answer will be "No". Console.WriteLine(answer); }
static void Main(string[] args) { DataTable data = new DataTable("Should I Go To Work For Company X"); data.Columns.Add("Scenario"); data.Columns.Add("Pay"); data.Columns.Add("Benefits"); data.Columns.Add("Culture"); data.Columns.Add("WorkFromHome"); data.Columns.Add("ShouldITakeJob"); data.Rows.Add("D1", "Good", "Good", "Mean", "Yes", "Yes"); data.Rows.Add("D2", "Good", "Good", "Mean", "No", "Yes"); data.Rows.Add("D3", "Average", "Good", "Good", "Yes", "Yes"); data.Rows.Add("D4", "Average", "Good", "Good", "No", "Yes"); data.Rows.Add("D5", "Bad", "Good", "Good", "Yes", "No"); data.Rows.Add("D6", "Bad", "Good", "Good", "No", "No"); data.Rows.Add("D7", "Good", "Average", "Mean", "Yes", "Yes"); data.Rows.Add("D8", "Good", "Average", "Mean", "No", "Yes"); data.Rows.Add("D9", "Average", "Average", "Good", "Yes", "No"); data.Rows.Add("D10", "Average", "Average", "Good", "No", "No"); data.Rows.Add("D11", "Bad", "Average", "Good", "Yes", "No"); data.Rows.Add("D12", "Bad", "Average", "Good", "No", "No"); data.Rows.Add("D13", "Good", "Bad", "Mean", "Yes", "Yes"); data.Rows.Add("D14", "Good", "Bad", "Mean", "No", "Yes"); data.Rows.Add("D15", "Average", "Bad", "Good", "Yes", "No"); data.Rows.Add("D16", "Average", "Bad", "Good", "No", "No"); data.Rows.Add("D17", "Bad", "Bad", "Good", "Yes", "No"); data.Rows.Add("D18", "Bad", "Bad", "Good", "No", "No"); data.Rows.Add("D19", "Good", "Good", "Good", "Yes", "Yes"); data.Rows.Add("D20", "Good", "Good", "Good", "No", "Yes"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data); DecisionVariable[] attributes = { new DecisionVariable("Pay", 3), new DecisionVariable("Benefits", 3), new DecisionVariable("Culture", 3), new DecisionVariable("WorkFromHome", 2) }; int outputValues = 2; // 2 possible output values: yes or no DecisionTree tree = new DecisionTree(attributes, outputValues); ID3Learning id3 = new ID3Learning(tree); #pragma warning disable CS0618 // Type or member is obsolete // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(data); int[][] inputs = symbols.ToArray <int>("Pay", "Benefits", $"Culture", "WorkFromHome"); int[] outputs = symbols.ToIntArray("ShouldITakeJob").GetColumn(0); // Learn the training instances! id3.Run(inputs, outputs); int[] query = codebook.Translate("D19", "Good", "Good", "Good", "Yes"); int output = tree.Compute(query); string answer = codebook.Translate("ShouldITakeJob", output); // answer will be "Yes". #pragma warning restore CS0618 // Type or member is obsolete Console.WriteLine("Answer is: " + answer); Console.ReadKey(); }
public void Run() { DataTable data = new DataTable("Mitchell's Tennis Example"); data.Columns.Add("Day"); data.Columns.Add("Outlook"); data.Columns.Add("Temperature"); data.Columns.Add("Humidity"); data.Columns.Add("Wind"); data.Columns.Add("PlayTennis"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into integer symbols Codification codebook = new Codification(data, "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(data); CreateDic("Outlook", symbols); CreateDic("Temperature", symbols); CreateDic("Humidity", symbols); CreateDic("Wind", symbols); CreateDic("PlayTennis", symbols); int[][] inputs = (from p in symbols.AsEnumerable() select new int[] { GetIndex("Outlook", p["Outlook"].ToString()), GetIndex("Temperature", p["Temperature"].ToString()), GetIndex("Humidity", p["Humidity"].ToString()), GetIndex("Wind", p["Wind"].ToString()) }).Cast <int[]>().ToArray(); int[] outputs = (from p in symbols.AsEnumerable() select GetIndex("PlayTennis", p["PlayTennis"].ToString())).Cast <int>().ToArray(); /* * // Gather information about decision variables * DecisionVariable[] attributes = * { * new DecisionVariable("Outlook", 3), // 3 possible values (Sunny, overcast, rain) * new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool) * new DecisionVariable("Humidity", 2), // 2 possible values (High, normal) * new DecisionVariable("Wind", 2) // 2 possible values (Weak, strong) * }; * */ DecisionVariable[] attributes = { new DecisionVariable("Outlook", GetCount("Outlook")), // 3 possible values (Sunny, overcast, rain) new DecisionVariable("Temperature", GetCount("Temperature")), // 3 possible values (Hot, mild, cool) new DecisionVariable("Humidity", GetCount("Humidity")), // 2 possible values (High, normal) new DecisionVariable("Wind", GetCount("Wind")) // 2 possible values (Weak, strong) }; int classCount = GetCount("PlayTennis"); // 2 possible output values for playing tennis: yes or no //Create the decision tree using the attributes and classes DecisionTree tree = new DecisionTree(attributes, classCount); // Create a new instance of the ID3 algorithm ID3Learning id3learning = new ID3Learning(tree); // Learn the training instances! id3learning.Run(inputs, outputs); string answer = codebook.Translate("PlayTennis", tree.Compute(codebook.Translate("Sunny", "Hot", "High", "Strong"))); Console.WriteLine("Calculate for: Sunny, Hot, High, Strong"); Console.WriteLine("Answer: " + answer); var expression = tree.ToExpression(); Console.WriteLine(tree.ToCode("ClassTest")); DecisionSet s = tree.ToRules(); Console.WriteLine(s.ToString()); // Compiles the expression to IL var func = expression.Compile(); }
private static void DoYourWork() { var songs = SongsFactory.GetSongsForLearning(); // этот метод вернёт песни, у которых есть ВСЕ данные var data = new DataTable("Songs Example"); var columnNames = typeof(LearnModel).GetProperties() .Select(p => p.Name) .OrderBy(x => x) .ToList(); data.Columns.AddRange(columnNames.Select(name => new DataColumn(name)).ToArray()); foreach (var song in songs) { data.Rows.Add(song.ArtistBeginYear, song.ArtistType, song.Duration, song.GenreType, song.LyricCharsCount, song.LyricWordsCount, song.Negative, song.Popularity.ToString(), song.Positive, song.SongDateYear); } var codebook = new Codification(data, columnNames.ToArray()); var symbols = codebook.Apply(data); var input = symbols.ToArray <double>(columnNames.Where(x => x != "Popularity").ToArray()); var output = symbols.ToArray <int>("Popularity"); var mins = new int[9]; var maxs = new int[9]; for (int i = 0; i < 9; i++) { var curMinForColumn = int.MaxValue; var curMaxForColumn = int.MinValue; for (int j = 0; j < input.GetLength(0); j++) { var curValue = (int)input[j][i]; if (curValue < curMinForColumn) { curMinForColumn = curValue; } if (curValue > curMaxForColumn) { curMaxForColumn = curValue; } } mins[i] = curMinForColumn; maxs[i] = curMaxForColumn; } DecisionVariable[] attributes = { new DecisionVariable("ArtistBeginYear", new IntRange(mins[0], maxs[0])), new DecisionVariable("ArtistType", songs.Select(x => x.ArtistType).Distinct().Count()), new DecisionVariable("Duration", new IntRange(mins[2], maxs[2])), new DecisionVariable("GenreType", songs.Select(x => x.GenreType).Distinct().Count()), new DecisionVariable("LyricCharsCount", new IntRange(mins[4], maxs[4])), new DecisionVariable("LyricWordsCount", new IntRange(mins[5], maxs[5])), new DecisionVariable("Negative", new DoubleRange(songs.Min(x => x.Negative), songs.Max(x => x.Negative))), new DecisionVariable("Positive", new DoubleRange(songs.Min(x => x.Positive), songs.Max(x => x.Positive))), new DecisionVariable("SongDateYear", new DoubleRange(songs.Min(x => x.SongDateYear), songs.Max(x => x.SongDateYear))), }; var classCount = 2; // popular, unpopular var tree = new DecisionTree(attributes, classCount); var algo = new C45Learning(tree); algo.Run(input, output); // проверяем своими данными data.Rows.Add(1966, 1, 302, 1, 1354, 255, 92.944470512297059, 0 /*vashe pofig*/, 7.05552948770294, 2009); var lastItem = data.Rows[data.Rows.Count - 1]; var input0 = codebook.Translate(lastItem, columnNames.Where(x => x != "Popularity").ToArray()); var answer = tree.Compute(input0); var readableAnswer = codebook.Translate("Popularity", answer); }