public void Can_split_dataset()
 {
     var matrix = GetDataSet();
     
     var tree = new DecisionTreeNode(matrix);
     
     var result = tree.Split(0, 1);
     
     // { {1, 1}, {1, 1}, {0, 0} }
     Assert.IsNotNull(result);
     foreach(var row in result)
     {
         Assert.AreEqual(2, row.Length);
         Assert.AreEqual(row[0], row[1]);
     }
     
     result = tree.Split(0, 0);
     // { {1, 0}, {1, 0} }
     Assert.IsNotNull(result);
     foreach (var row in result)
     {
         Assert.AreEqual(2, row.Length);
         Assert.AreNotEqual(row[0], row[1]);
     } 
 }
示例#2
0
 private static void ReverseNodeOrder(DecisionTreeNode casted)
 {
     var reversed = casted.Children.Reverse().ToList();
     casted.Children.Clear();
     foreach(var item in reversed)
     {
         casted.Children.Add(item);
     }
 }
        public void Can_get_gain_grid_with_classification_error()
        {
            var matrix = GetDataSet();
            
            var tree = new DecisionTreeNode(matrix);

            var grid = tree.GetInformationGain(DisorderType.ClassificationError);

            Assert.IsNotNull(grid);
            Assert.AreEqual(3, grid.Count);
            Assert.AreEqual(1, grid.First().Key);
            Assert.AreEqual(2, grid.Last().Key);
        }
        public void Can_get_gain_grid_with_gini_impurity()
        {
            var matrix = GetDataSet();
            
            var tree = new DecisionTreeNode(matrix);

            var grid = tree.GetInformationGain(DisorderType.GiniImpurity);

            Assert.IsNotNull(grid);
            Assert.AreEqual(3, grid.Count);
            Assert.AreEqual(0, grid.First().Key);
            Assert.AreEqual(1, grid.Last().Key);
        }
示例#5
0
        private static ITreeNode CreateTree(Matrix dataSet, IList<string> labels, DisorderType disorderType)
        {
            var outcomes = dataSet.GetOutcomes();

            if(outcomes.Distinct().Count().Equals(1))
            {
                return new DecisionTreeLeafNode(outcomes[0]);
            }

            var outcome = dataSet.GetMajorityOutcome();

            if(dataSet.Columns == 1)
            {
                return new DecisionTreeLeafNode(outcome);
            }

            ITreeNode node = new DecisionTreeNode(dataSet, outcome);

            var casted = (DecisionTreeNode) node;

            var best = casted.GetInformationGain(disorderType).First();

            var label = labels[best.Key];

            casted.Label = label;

            labels = labels.Where(l => !l.Equals(label)).ToList();

            var subset = dataSet.Select(row => row[best.Key]).ToList();
            
            var values = subset.Distinct().ToList();

            foreach(var value in values)
            {
                var copy = new List<string>(labels);
                var split = casted.Split(best.Key, value);
                var child = CreateTree(split, copy, disorderType);
                casted.AddChild(child);
            }

            ReverseNodeOrder(casted);

            return node;
        }