// This function is used to find the best split available from this dataset. // It is meant to be used recusively until we find the ideal tree. public static (double gain, Question question) FindBestSplit(Dataset dataset) { // First, we initialize the values that will be returned double bestGain = 0; Question bestQuestion = null; // We need to save the Gini of the dataset we are working on. // This is then used to calculate the information gain. double currentGini = CalculateGini(dataset); // Remember that Dataset.MaxFeatureCount returns all the features - 1. // This is because the last column is always used as the label. int featureCount = dataset.MaxFeatureCount; // We loop over each feature for (int i = 0; i < featureCount; ++i) { // Get each unique value found in this feature column HashSet <Feature> features = dataset.GetUniqueValues(i); // For each of this features, we test creating a Question using // it. If it is better than the one we already have, we save it // as the best. At the end, we will have found the best Question // for this dataset. foreach (Feature feature in features) { Question question = new Question(i, dataset.Headers[i], feature); (Dataset trueRows, Dataset falseRows) = PartitionDataset(dataset, question); // If this Question doesn't partition the dataset at all, then we omit the rest if (trueRows.Count == 0 || falseRows.Count == 0) { continue; } double gain = CalculateInfoGain(trueRows, falseRows, currentGini); if (gain > bestGain) { bestGain = gain; bestQuestion = question; } } } return(bestGain, bestQuestion); }
private void CreateFieldsFromDataset() { // Prevent running if we don't have a tree to analyze if (tree == null) { return; } FlowLayoutPanel panel = fieldsPanel; Dataset dataset = tree.Dataset; features = new Row(Enumerable.Repeat <Feature>(null, tree.Dataset.MaxFeatureCount).ToList()); // Suspend the layout to prevent re-renders panel.SuspendLayout(); // Clear the panel before removing any previous text box panel.Controls.Clear(); // Initialize the List in which we'll save every control to add List <Control> controls = new List <Control>(); int totalFeatures = dataset.MaxFeatureCount; // Create a field for each feature for (int i = 0; i < totalFeatures; ++i) { int staticIndex = i; // Create the Label for this field Label label = new Label(); label.Text = dataset.Headers[i].FirstCharToUpper(); label.Margin = new Padding(7, 7, 3, 0); label.AutoSize = true; controls.Add(label); // We need different Control types for categorical and numeric features if (dataset.IsNumeric(i)) { // Create a NumericUpDown for numeric features NumericUpDown numericUpDown = new NumericUpDown(); numericUpDown.Minimum = decimal.MinValue; numericUpDown.Maximum = decimal.MaxValue; numericUpDown.DecimalPlaces = 2; numericUpDown.TabStop = true; numericUpDown.TabIndex = 0; numericUpDown.Margin = new Padding(10, 3, 3, 3); // Initialize this feature in 0 features[i] = new Feature(0); // Set the Event Handler numericUpDown.ValueChanged += new EventHandler((object sender, EventArgs e) => { NumericUpDown control = sender as NumericUpDown; if (features != null) { features[staticIndex] = new Feature(control.Value); } predictionBtn.Enabled = IsValidPredictionRow(features); }); panel.SetFlowBreak(numericUpDown, true); controls.Add(numericUpDown); } else { // Create the ComboBox for categoric features ComboBox comboBox = new ComboBox(); comboBox.AutoCompleteMode = AutoCompleteMode.Append; comboBox.AutoCompleteSource = AutoCompleteSource.ListItems; comboBox.DropDownStyle = ComboBoxStyle.DropDownList; comboBox.FormattingEnabled = true; comboBox.TabStop = true; comboBox.TabIndex = 0; comboBox.Margin = new Padding(10, 3, 3, 3); // Set the Event Handler comboBox.SelectionChangeCommitted += new EventHandler((object sender, EventArgs e) => { ComboBox control = sender as ComboBox; if (features != null && control.SelectedItem != null) { features[staticIndex] = new Feature(control.SelectedItem.ToString()); } predictionBtn.Enabled = IsValidPredictionRow(features); }); // Get all the unique values for this ComboBox HashSet <Feature> uniqueFeatures = dataset.GetUniqueValues(i); string[] values = uniqueFeatures.Select(item => item.ToString()).ToArray(); comboBox.Items.AddRange(values); panel.SetFlowBreak(comboBox, true); controls.Add(comboBox); } } // Assign the Enabled property for the Button with the current Row predictionBtn.Enabled = IsValidPredictionRow(features); // Display every new control in the UI panel.Controls.AddRange(controls.ToArray()); // Re-render panels panel.ResumeLayout(false); panel.PerformLayout(); }