/// <summary>Builds a tree.</summary> /// <param name="x">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <param name="depth">The depth.</param> /// <param name="used">The used.</param> /// <returns>A Node.</returns> private Node BuildTree(Matrix x, Vector y, int depth, List<int> used, Tree tree) { if (depth < 0) return BuildLeafNode(y.Mode()); var tuple = GetBestSplit(x, y, used); var col = tuple.Item1; var gain = tuple.Item2; var measure = tuple.Item3; // uh oh, need to return something? // a weird node of some sort... // but just in case... if (col == -1) return BuildLeafNode(y.Mode()); used.Add(col); Node node = new Node { Column = col, Gain = gain, IsLeaf = false, Name = Descriptor.ColumnAt(col) }; // populate edges List<Edge> edges = new List<Edge>(measure.Segments.Length); for (int i = 0; i < measure.Segments.Length; i++) { // working set var segment = measure.Segments[i]; var edge = new Edge() { ParentId = node.Id, Discrete = measure.Discrete, Min = segment.Min, Max = segment.Max }; IEnumerable<int> slice; if (edge.Discrete) { // get discrete label edge.Label = Descriptor.At(col).Convert(segment.Min).ToString(); // do value check for matrix slicing slice = x.Indices(v => v[col] == segment.Min); } else { // get range label edge.Label = string.Format("{0} <= x < {1}", segment.Min, segment.Max); // do range check for matrix slicing slice = x.Indices(v => v[col] >= segment.Min && v[col] < segment.Max); } // something to look at? // if this number is 0 then this edge // leads to a dead end - the edge will // not be built if (slice.Count() > 0) { Vector ySlice = y.Slice(slice); // only one answer, set leaf if (ySlice.Distinct().Count() == 1) { var child = BuildLeafNode(ySlice[0]); tree.AddVertex(child); edge.ChildId = child.Id; } // otherwise continue to build tree else { var child = BuildTree(x.Slice(slice), ySlice, depth - 1, used, tree); tree.AddVertex(child); edge.ChildId = child.Id; } edges.Add(edge); } } // problem, need to convert // parent to terminal node // with mode if (edges.Count <= 1) { var val = y.Mode(); node.IsLeaf = true; node.Value = val; } tree.AddVertex(node); if(edges.Count > 1) foreach (var e in edges) tree.AddEdge(e); return node; }
public void TreeNotEqualTest() { Vertex.Reset(); Tree g1 = new Tree(); var r1 = new Vertex(); g1.Root = r1; g1.AddVertex(r1); // 1 g1.AddVertex(new Vertex()); // 2 g1.AddVertex(new Vertex()); // 3 g1.AddVertex(new Vertex()); // 4 g1.AddVertex(new Vertex()); // 5 g1.AddVertex(new Vertex()); // 6 g1.AddVertex(new Vertex()); // 7 g1.AddEdge(new Edge { ParentId = 1, ChildId = 2 }); g1.AddEdge(new Edge { ParentId = 1, ChildId = 3 }); g1.AddEdge(new Edge { ParentId = 2, ChildId = 4 }); g1.AddEdge(new Edge { ParentId = 2, ChildId = 5 }); g1.AddEdge(new Edge { ParentId = 3, ChildId = 6 }); g1.AddEdge(new Edge { ParentId = 3, ChildId = 7 }); Vertex.Reset(); Tree g2 = new Tree(); var r2 = new Vertex(); g2.Root = r2; g2.AddVertex(r2); // 1 g2.AddVertex(new Vertex()); // 2 g2.AddVertex(new Vertex()); // 3 g2.AddVertex(new Vertex()); // 4 g2.AddVertex(new Vertex()); // 5 g2.AddVertex(new Vertex()); // 6 g2.AddVertex(new Vertex()); // 7 g2.AddEdge(new Edge { ParentId = 1, ChildId = 2 }); g2.AddEdge(new Edge { ParentId = 1, ChildId = 3 }); g2.AddEdge(new Edge { ParentId = 2, ChildId = 4 }); g2.AddEdge(new Edge { ParentId = 2, ChildId = 5 }); g2.AddEdge(new Edge { ParentId = 1, ChildId = 6 }); g2.AddEdge(new Edge { ParentId = 3, ChildId = 7 }); Assert.False(g1.Equals(g2)); }
/// <summary>Generates.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="X">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <returns>An IModel.</returns> public override IModel Generate(Matrix X, Vector y) { if (Descriptor == null) throw new InvalidOperationException("Cannot build decision tree without type knowledge!"); this.Preprocess(X); var tree = new Tree(); //var n = BuildUglyTree(x, y, Depth, new List<int>(x.Cols)); tree.Root = BuildTree(X, y, Depth, new List<int>(X.Cols), tree); // have to guess something.... // especially when automating // the thing in a Learner // this only happens if it is something // it has never seen. if (Hint == double.Epsilon) Hint = y.GetRandom(); // flip a coin... return new DecisionTreeModel { Descriptor = Descriptor, NormalizeFeatures = NormalizeFeatures, FeatureNormalizer = FeatureNormalizer, FeatureProperties = FeatureProperties, Tree = tree, Hint = Hint }; }