Exemple #1
0
        /// <summary>Gets base conditionals.</summary>
        /// <param name="x">The Matrix to process.</param>
        /// <returns>An array of measure.</returns>
        private Measure[] GetBaseConditionals(Matrix x)
        {
            Measure[] features = new Measure[x.Cols];
            for (int i = 0; i < features.Length; i++)
            {
                Property p = Descriptor.At(i);
                var      f = new Measure
                {
                    Discrete = p.Discrete,
                    Label    = Descriptor.ColumnAt(i),
                };

                IEnumerable <Statistic> fstats;
                if (f.Discrete)
                {
                    fstats = x[i, VectorType.Col].Distinct().OrderBy(d => d)
                             .Select(d => Statistic.Make(p.Convert(d).ToString(), d, 1));
                }
                else
                {
                    fstats = x[i, VectorType.Col].Segment(Width)
                             .Select(d => Statistic.Make(f.Label, d, 1));
                }

                f.Probabilities = fstats.ToArray();
                features[i]     = f;
            }

            return(features);
        }
Exemple #2
0
        /// <summary>Gets best split.</summary>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="used">The used.</param>
        /// <returns>The best split.</returns>
        private Tuple <int, double, Impurity> GetBestSplit(Matrix x, Vector y, List <int> used)
        {
            double bestGain    = -1;
            int    bestFeature = -1;

            Impurity bestMeasure = null;

            for (int i = 0; i < x.Cols; i++)
            {
                // already used?
                if (used.Contains(i))
                {
                    continue;
                }

                double gain = 0;
                //Impurity measure = (Impurity)Activator.CreateInstance(ImpurityType);

                Impurity measure = (Impurity)Ject.Create(ImpurityType);

                // get appropriate column vector
                var feature = x.Col(i);
                // get appropriate feature at index i
                // (important on because of multivalued
                // cols)
                var property = Descriptor.At(i);
                // if discrete, calculate full relative gain
                if (property.Discrete)
                {
                    gain = measure.RelativeGain(y, feature);
                }
                // otherwise segment based on width
                else
                {
                    gain = measure.SegmentedRelativeGain(y, feature, Width);
                }

                // best one?
                if (gain > bestGain)
                {
                    bestGain    = gain;
                    bestFeature = i;
                    bestMeasure = measure;
                }
            }

            return(new Tuple <int, double, Impurity>(bestFeature, bestGain, bestMeasure));
        }
Exemple #3
0
        /// <summary>Walk node.</summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="v">The Vector to process.</param>
        /// <param name="node">The node.</param>
        /// <returns>A double.</returns>
        private double WalkNode(Vector v, Node node)
        {
            if (node.IsLeaf)
            {
                return(node.Value);
            }

            // Get the index of the feature for this node.
            var col = node.Column;

            if (col == -1)
            {
                throw new InvalidOperationException("Invalid Feature encountered during node walk!");
            }

            var edges = Tree.GetOutEdges(node).ToArray();

            for (var i = 0; i < edges.Length; i++)
            {
                var edge = (Edge)edges[i];
                if (edge.Discrete && v[col] == edge.Min)
                {
                    return(WalkNode(v, (Node)Tree.GetVertex(edge.ChildId)));
                }
                if (!edge.Discrete && v[col] >= edge.Min && v[col] < edge.Max)
                {
                    return(WalkNode(v, (Node)Tree.GetVertex(edge.ChildId)));
                }
            }

            if (Hint != double.Epsilon)
            {
                return(Hint);
            }
            throw new InvalidOperationException(
                      string.Format(
                          "Unable to match split value {0} for feature {1}[2]\nConsider setting a Hint in order to avoid this error.",
                          v[col],
                          Descriptor.At(col),
                          col));
        }
Exemple #4
0
        /// <summary>Gets best split.</summary>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="used">The used.</param>
        /// <returns>The best split.</returns>
        private Tuple <int, double, Impurity> GetBestSplit(Matrix x, Vector y, List <int> used)
        {
            double bestGain    = -1;
            var    bestFeature = -1;

            Impurity bestMeasure = null;

            for (var i = 0; i < x.Cols; i++)
            {
                // already used?
                if (used.Contains(i))
                {
                    continue;
                }

                var measure = (Impurity)Ject.Create(ImpurityType);

                // get appropriate column vector
                var feature = x.Col(i);
                // get appropriate feature at index i
                // (important on because of multivalued
                // cols)
                var property = Descriptor.At(i);
                // if discrete, calculate full relative gain
                var gain = property.Discrete ? measure.RelativeGain(y, feature) : measure.SegmentedRelativeGain(y, feature, Width);

                // best one?
                if (!(gain > bestGain))
                {
                    continue;
                }

                bestGain    = gain;
                bestFeature = i;
                bestMeasure = measure;
            }

            return(new Tuple <int, double, Impurity>(bestFeature, bestGain, bestMeasure));
        }
        /// <summary>Builds a tree.</summary>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="depth">The depth.</param>
        /// <param name="used">The used.</param>
        /// <returns>A Node.</returns>
        private Node BuildTree(Matrix x, Vector y, int depth, List <int> used, Tree tree)
        {
            if (depth < 0)
            {
                return(BuildLeafNode(y.Mode()));
            }

            var tuple   = GetBestSplit(x, y, used);
            var col     = tuple.Item1;
            var gain    = tuple.Item2;
            var measure = tuple.Item3;

            // uh oh, need to return something?
            // a weird node of some sort...
            // but just in case...
            if (col == -1)
            {
                return(BuildLeafNode(y.Mode()));
            }

            used.Add(col);

            Node node = new Node
            {
                Column = col,
                Gain   = gain,
                IsLeaf = false,
                Name   = Descriptor.ColumnAt(col)
            };

            // populate edges
            List <Edge> edges = new List <Edge>(measure.Segments.Length);

            for (int i = 0; i < measure.Segments.Length; i++)
            {
                // working set
                var segment = measure.Segments[i];
                var edge    = new Edge()
                {
                    ParentId = node.Id,
                    Discrete = measure.Discrete,
                    Min      = segment.Min,
                    Max      = segment.Max
                };

                IEnumerable <int> slice;

                if (edge.Discrete)
                {
                    // get discrete label
                    edge.Label = Descriptor.At(col).Convert(segment.Min).ToString();
                    // do value check for matrix slicing
                    slice = x.Indices(v => v[col] == segment.Min);
                }
                else
                {
                    // get range label
                    edge.Label = string.Format("{0} <= x < {1}", segment.Min, segment.Max);
                    // do range check for matrix slicing
                    slice = x.Indices(v => v[col] >= segment.Min && v[col] < segment.Max);
                }

                // something to look at?
                // if this number is 0 then this edge
                // leads to a dead end - the edge will
                // not be built
                if (slice.Count() > 0)
                {
                    Vector ySlice = y.Slice(slice);
                    // only one answer, set leaf
                    if (ySlice.Distinct().Count() == 1)
                    {
                        var child = BuildLeafNode(ySlice[0]);
                        tree.AddVertex(child);
                        edge.ChildId = child.Id;
                    }
                    // otherwise continue to build tree
                    else
                    {
                        var child = BuildTree(x.Slice(slice), ySlice, depth - 1, used, tree);
                        tree.AddVertex(child);
                        edge.ChildId = child.Id;
                    }

                    edges.Add(edge);
                }
            }

            // problem, need to convert
            // parent to terminal node
            // with mode
            if (edges.Count <= 1)
            {
                var val = y.Mode();
                node.IsLeaf = true;
                node.Value  = val;
            }

            tree.AddVertex(node);

            if (edges.Count > 1)
            {
                foreach (var e in edges)
                {
                    tree.AddEdge(e);
                }
            }

            return(node);
        }