Пример #1
0
        // This function is used to find the best split available from this dataset.
        // It is meant to be used recusively until we find the ideal tree.
        public static (double gain, Question question) FindBestSplit(Dataset dataset)
        {
            // First, we initialize the values that will be returned
            double   bestGain     = 0;
            Question bestQuestion = null;
            // We need to save the Gini of the dataset we are working on.
            // This is then used to calculate the information gain.
            double currentGini = CalculateGini(dataset);
            // Remember that Dataset.MaxFeatureCount returns all the features - 1.
            // This is because the last column is always used as the label.
            int featureCount = dataset.MaxFeatureCount;

            // We loop over each feature
            for (int i = 0; i < featureCount; ++i)
            {
                // Get each unique value found in this feature column
                HashSet <Feature> features = dataset.GetUniqueValues(i);

                // For each of this features, we test creating a Question using
                // it. If it is better than the one we already have, we save it
                // as the best. At the end, we will have found the best Question
                // for this dataset.
                foreach (Feature feature in features)
                {
                    Question question = new Question(i, dataset.Headers[i], feature);
                    (Dataset trueRows, Dataset falseRows) = PartitionDataset(dataset, question);
                    // If this Question doesn't partition the dataset at all, then we omit the rest
                    if (trueRows.Count == 0 || falseRows.Count == 0)
                    {
                        continue;
                    }
                    double gain = CalculateInfoGain(trueRows, falseRows, currentGini);
                    if (gain > bestGain)
                    {
                        bestGain     = gain;
                        bestQuestion = question;
                    }
                }
            }

            return(bestGain, bestQuestion);
        }
        private void CreateFieldsFromDataset()
        {
            // Prevent running if we don't have a tree to analyze
            if (tree == null)
            {
                return;
            }

            FlowLayoutPanel panel   = fieldsPanel;
            Dataset         dataset = tree.Dataset;

            features = new Row(Enumerable.Repeat <Feature>(null, tree.Dataset.MaxFeatureCount).ToList());

            // Suspend the layout to prevent re-renders
            panel.SuspendLayout();
            // Clear the panel before removing any previous text box
            panel.Controls.Clear();

            // Initialize the List in which we'll save every control to add
            List <Control> controls = new List <Control>();

            int totalFeatures = dataset.MaxFeatureCount;

            // Create a field for each feature
            for (int i = 0; i < totalFeatures; ++i)
            {
                int staticIndex = i;

                // Create the Label for this field
                Label label = new Label();
                label.Text     = dataset.Headers[i].FirstCharToUpper();
                label.Margin   = new Padding(7, 7, 3, 0);
                label.AutoSize = true;
                controls.Add(label);

                // We need different Control types for categorical and numeric features
                if (dataset.IsNumeric(i))
                {
                    // Create a NumericUpDown for numeric features
                    NumericUpDown numericUpDown = new NumericUpDown();
                    numericUpDown.Minimum       = decimal.MinValue;
                    numericUpDown.Maximum       = decimal.MaxValue;
                    numericUpDown.DecimalPlaces = 2;
                    numericUpDown.TabStop       = true;
                    numericUpDown.TabIndex      = 0;
                    numericUpDown.Margin        = new Padding(10, 3, 3, 3);
                    // Initialize this feature in 0
                    features[i] = new Feature(0);
                    // Set the Event Handler
                    numericUpDown.ValueChanged += new EventHandler((object sender, EventArgs e) => {
                        NumericUpDown control = sender as NumericUpDown;
                        if (features != null)
                        {
                            features[staticIndex] = new Feature(control.Value);
                        }
                        predictionBtn.Enabled = IsValidPredictionRow(features);
                    });
                    panel.SetFlowBreak(numericUpDown, true);
                    controls.Add(numericUpDown);
                }
                else
                {
                    // Create the ComboBox for categoric features
                    ComboBox comboBox = new ComboBox();
                    comboBox.AutoCompleteMode   = AutoCompleteMode.Append;
                    comboBox.AutoCompleteSource = AutoCompleteSource.ListItems;
                    comboBox.DropDownStyle      = ComboBoxStyle.DropDownList;
                    comboBox.FormattingEnabled  = true;
                    comboBox.TabStop            = true;
                    comboBox.TabIndex           = 0;
                    comboBox.Margin             = new Padding(10, 3, 3, 3);
                    // Set the Event Handler
                    comboBox.SelectionChangeCommitted += new EventHandler((object sender, EventArgs e) => {
                        ComboBox control = sender as ComboBox;
                        if (features != null && control.SelectedItem != null)
                        {
                            features[staticIndex] = new Feature(control.SelectedItem.ToString());
                        }
                        predictionBtn.Enabled = IsValidPredictionRow(features);
                    });
                    // Get all the unique values for this ComboBox
                    HashSet <Feature> uniqueFeatures = dataset.GetUniqueValues(i);
                    string[]          values         = uniqueFeatures.Select(item => item.ToString()).ToArray();
                    comboBox.Items.AddRange(values);
                    panel.SetFlowBreak(comboBox, true);
                    controls.Add(comboBox);
                }
            }

            // Assign the Enabled property for the Button with the current Row
            predictionBtn.Enabled = IsValidPredictionRow(features);

            // Display every new control in the UI
            panel.Controls.AddRange(controls.ToArray());

            // Re-render panels
            panel.ResumeLayout(false);
            panel.PerformLayout();
        }