Slice() public static method

Slices.
public static Slice ( Matrix m, IEnumerable indices ) : Matrix
m Matrix Input Matrix.
indices IEnumerable The indices.
return Matrix
Example #1
0
        public override IModel Generate(Matrix x, Vector y)
        {
            int N = y.Length;
            Vector a = Vector.Zeros(N);

            // compute kernel
            Matrix K = Kernel.Compute(x);

            int n = 1;

            // hopefully enough to converge right? ;)
            // need to be smarter about storing SPD kernels...
            bool found_error = true;
            while (n < 500 && found_error)
            {
                found_error = false;
                for (int i = 0; i < N; i++)
                {
                    found_error = y[i] * a.Dot(K[i]) <= 0;
                    if (found_error) a[i] += y[i];
                }

                n++;
            }

            // anything that *matters*
            // i.e. support vectors
            var indices = a.Indices(d => d != 0);

            // slice up examples to contain
            // only support vectors
            return new KernelPerceptronModel
            {
                Kernel = Kernel,
                A = a.Slice(indices),
                Y = y.Slice(indices),
                X = x.Slice(indices)
            };
        }
Example #2
0
        /// <summary>Builds a tree.</summary>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="depth">The depth.</param>
        /// <param name="used">The used.</param>
        /// <returns>A Node.</returns>
        private Node BuildTree(Matrix x, Vector y, int depth, List<int> used, Tree tree)
        {
            if (depth < 0)
                return BuildLeafNode(y.Mode());

            var tuple = GetBestSplit(x, y, used);
            var col = tuple.Item1;
            var gain = tuple.Item2;
            var measure = tuple.Item3;

            // uh oh, need to return something?
            // a weird node of some sort...
            // but just in case...
            if (col == -1)
                return BuildLeafNode(y.Mode());

            used.Add(col);

            Node node = new Node
            {
                Column = col,
                Gain = gain,
                IsLeaf = false,
                Name = Descriptor.ColumnAt(col)
            };

            // populate edges
            List<Edge> edges = new List<Edge>(measure.Segments.Length);
            for (int i = 0; i < measure.Segments.Length; i++)
            {
                // working set
                var segment = measure.Segments[i];
                var edge = new Edge()
                {
                    ParentId = node.Id,
                    Discrete = measure.Discrete,
                    Min = segment.Min,
                    Max = segment.Max
                };

                IEnumerable<int> slice;

                if (edge.Discrete)
                {
                    // get discrete label
                    edge.Label = Descriptor.At(col).Convert(segment.Min).ToString();
                    // do value check for matrix slicing
                    slice = x.Indices(v => v[col] == segment.Min);
                }
                else
                {
                    // get range label
                    edge.Label = string.Format("{0} <= x < {1}", segment.Min, segment.Max);
                    // do range check for matrix slicing
                    slice = x.Indices(v => v[col] >= segment.Min && v[col] < segment.Max);
                }

                // something to look at?
                // if this number is 0 then this edge
                // leads to a dead end - the edge will
                // not be built
                if (slice.Count() > 0)
                {
                    Vector ySlice = y.Slice(slice);
                    // only one answer, set leaf
                    if (ySlice.Distinct().Count() == 1)
                    {
                        var child = BuildLeafNode(ySlice[0]);
                        tree.AddVertex(child);
                        edge.ChildId = child.Id;
                    }
                    // otherwise continue to build tree
                    else
                    {
                        var child = BuildTree(x.Slice(slice), ySlice, depth - 1, used, tree);
                        tree.AddVertex(child);
                        edge.ChildId = child.Id;
                    }

                    edges.Add(edge);
                }
            }

            // problem, need to convert
            // parent to terminal node
            // with mode
            if (edges.Count <= 1)
            {
                var val = y.Mode();
                node.IsLeaf = true;
                node.Value = val;
            }

            tree.AddVertex(node);

            if(edges.Count > 1)
                foreach (var e in edges)
                    tree.AddEdge(e);

            return node;
        }
Example #3
0
        private static LearningModel GenerateModel(IGenerator generator, Matrix x, Vector y, IEnumerable<object> examples, double trainingPct)
        {
            var descriptor = generator.Descriptor;
            var total = examples.Count();
            var trainingCount = (int)System.Math.Floor(total * trainingPct);

            // 100 - trainingPercentage for testing
            var testingSlice = GetTestPoints(total - trainingCount, total).ToArray();

            // trainingPercentage for training
            var trainingSlice = GetTrainingPoints(testingSlice, total).ToArray();

            // training
            var x_t = x.Slice(trainingSlice);
            var y_t = y.Slice(trainingSlice);

            // generate model
            var model = generator.Generate(x_t, y_t);
            model.Descriptor = descriptor;

            // testing
            object[] test = GetTestExamples(testingSlice, examples);
            double accuracy = 0;

            for (int j = 0; j < test.Length; j++)
            {
                // items under test
                object o = test[j];

                // get truth
                var truth = Ject.Get(o, descriptor.Label.Name);

                // if truth is a string, sanitize
                if (descriptor.Label.Type == typeof(string))
                    truth = StringHelpers.Sanitize(truth.ToString());

                // make prediction
                var features = descriptor.Convert(o, false).ToVector();

                var p = model.Predict(features);
                var pred = descriptor.Label.Convert(p);

                // assess accuracy
                if (truth.Equals(pred))
                    accuracy += 1;
            }

            // get percentage correct
            accuracy /= test.Length;

            return new LearningModel { Generator = generator, Model = model, Accuracy = accuracy };
        }
Example #4
0
        /// <summary>Generates.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="k">The int to process.</param>
        public void Generate(Matrix X, int k)
        {
            int n = X.Rows;
            int d = X.Cols;
            
            /***********************
             * initialize parameters
             ***********************/
            // convergence params
            var log_probability = 0d;
            var probability_difference = double.MaxValue;
            var mu_difference = double.MaxValue;

            // initialize centers with KMeans
            KMeans kmeans = new KMeans();
            var asgn = kmeans.Generate(X, k, new EuclidianDistance());

            // tentative centers
            var mu_k = kmeans.Centers;

            // initial covariances (stored as diag(cov) 1 of k)
            var sg_k = new Matrix(k, d);
            for (int i = 0; i < k; i++)
            {
                var indices = asgn.Select((a, b) => new Tuple<int, int>(a, b)).Where(t => t.Item1 == i).Select(t => t.Item2);
                var matrix = X.Slice(indices, VectorType.Row);
                sg_k[i] = matrix.CovarianceDiag();
            }

            // mixing coefficient
            var pi_k = asgn
                        .OrderBy(i => i)
                        .GroupBy(j => j)
                        .Select(g => (double)g.Count() / (double)asgn.Length)
                        .ToVector();

            int max_iter = 100;
            do
            {
                /***********************
                 * Expectation Step
                 ***********************/
                // responsibilty matrix: how much is gaussian k responsible for this point x
                var z_nk = new Matrix(n, k);
                for (int i = 0; i < n; i++)
                {
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                        z_nk[i, j] = pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);

                    var dn = z_nk[i].Sum();

                    if(dn == 0)
                        Console.WriteLine("Uh oh....");

                    z_nk[i].Each(z => z / dn);
                }

                /***********************
                 * Maximization Step
                 ***********************/
                var N_k = z_nk.Sum(VectorType.Row);

                var mu_k_new = new Matrix(mu_k.Rows, mu_k.Cols);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                        sum += z_nk[j, i] * X[j];
                    mu_k_new[i] = sum / N_k[i];
                }

                var sg_k_new = new Matrix(k, d);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                        sum += z_nk[j, i] * (X[j] - mu_k_new[i]).Each(s => s * s);
                    sg_k_new[i] = sum / N_k[i];
                }

                var pi_k_new = N_k / n;

                /***********************
                 * Convergence Check
                 ***********************/
                var new_log_prob = 0d;
                for (int i = 0; i < n; i++)
                {
                    var acc = 0d;
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                        acc += pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);

                    new_log_prob += System.Math.Log(acc, System.Math.E);
                }

                // log likelihood differences
                probability_difference = System.Math.Abs(log_probability - new_log_prob);
                Console.WriteLine("Log Likelihoods (Total Points: {0}, k={1}, d={2})\nO: {3}\nN: {4}\nDifference: {5}\n", n, k, d, log_probability, new_log_prob, probability_difference);
                log_probability = new_log_prob;


                // centers differences
                mu_difference = mu_k.GetRows()
                                .Zip(mu_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                                .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Centers:\nO: {0}\nN: {1}\nDifference: {2}\n", mu_k, mu_k_new, mu_difference);
                mu_k = mu_k_new;

                // covariance differences
                double diff = sg_k.GetRows()
                            .Zip(sg_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                            .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Covariance:\nO: {0}\nN: {1}\nDifference: {2}\n", sg_k, sg_k_new, diff);
                sg_k = sg_k_new;

                // mixing differences
                diff = (pi_k - pi_k_new).Each(s => System.Math.Abs(s)).Sum();
                Console.WriteLine("Mixing Coeffs:\nO: {0}\nN: {1}\nDifference: {2}\n", pi_k, pi_k_new, diff);
                pi_k = pi_k_new;

                Console.WriteLine("-------------------------------------------------------------");

            } while (probability_difference > .0000000001 && mu_difference > .0000000001 && --max_iter >= 0);
        }
Example #5
0
        /// <summary>Generates a SVM model based on a set of examples.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <returns>Model.</returns>
        public override IModel Generate(Matrix X, Vector y)
        {
            this.Preprocess(X);

            // expect truth = 1 and false = -1
            y = y.ToBinary(k => k == 1d, falseValue: -1.0);

            // initialise variables
            int m = X.Rows, n = X.Cols, i = -1, j = -1, changes = 0, iterations = 0;
            double lagLow = 0.0, lagHigh = 0.0, cost = 0.0, tempAI = 0d, tempAJ = 0d;

            Vector gradient = Vector.Zeros(m), alpha = Vector.Zeros(m);

            // precompute kernal matrix (using similarity function)
            Matrix K = this.KernelFunction.Compute(X);

            // synchronise SVM parameters with working set selection function.
            this.SelectionFunction.Bias = this.Bias; this.SelectionFunction.C = this.C; this.SelectionFunction.Epsilon = this.Epsilon;
            this.SelectionFunction.K = K; this.SelectionFunction.Y = y;

            bool finalise = false;

            this.SelectionFunction.Initialize(alpha, gradient);

            while (finalise == false && iterations < this.MaxIterations)
            {
                changes = 0;

                #region Training

                for (int p = 0; p < m; p++)
                {
                    // get new working set selection using heuristic function
                    Tuple<int, int> newPair = this.SelectionFunction.GetWorkingSet(i, j, gradient, alpha);

                    // check for valid i, j pairs
                    if (newPair.Item1 >= 0 && newPair.Item2 >= 0 && newPair.Item1 != newPair.Item2)
                    {
                        i = newPair.Item1; j = newPair.Item2;
                        // compute new gradients
                        gradient[i] = Bias + (alpha * y * K[i, VectorType.Col]).Sum() - y[i];

                        if ((y[i] * gradient[i] < -this.Epsilon && alpha[i] < this.C) || (y[i] * gradient[i] > this.Epsilon && alpha[i] > 0))
                        {
                            gradient[j] = Bias + (alpha * y * K[j, VectorType.Col]).Sum() - y[j];

                            // store temp working copies of alpha from both pairs (i, j)
                            tempAI = alpha[i]; tempAJ = alpha[j];

                            // update lower and upper bounds of lagrange multipliers
                            if (y[i] == y[j])
                            {
                                // pairs are same class don't apply large margin
                                lagLow = System.Math.Max(0.0, alpha[j] + alpha[i] - this.C);
                                lagHigh = System.Math.Min(this.C, alpha[j] + alpha[i]);
                            }
                            else
                            {
                                // pairs are not same class, apply large margin
                                lagLow = System.Math.Max(0.0, alpha[j] - alpha[i]);
                                lagHigh = System.Math.Min(this.C, this.C + alpha[j] - alpha[i]);
                            }

                            // if lagrange constraints are not diverse then get new working set
                            if (lagLow == lagHigh) continue;

                            // compute cost and if it's greater than 0 skip
                            // cost should optimise large margin where fit line intercepts <= 0
                            cost = 2.0 * K[i, j] - K[i, i] - K[j, j];
                            if (cost >= 0.0) continue;
                            else
                            {
                                // update alpha of (j) w.r.t to the relative cost difference of the i-th and j-th gradient
                                alpha[j] = alpha[j] - (y[j] * (gradient[i] - gradient[j])) / cost;

                                // clip alpha with lagrange multipliers
                                alpha[j] = System.Math.Min(lagHigh, alpha[j]);
                                alpha[j] = System.Math.Max(lagLow, alpha[j]);

                                // check alpha tolerance factor
                                if (System.Math.Abs(alpha[j] - tempAJ) < this.Epsilon)
                                {
                                    // we're optimising large margins so skip small ones
                                    alpha[j] = tempAJ; continue;
                                }

                                // update alpha of i if we have a large margin w.r.t to alpha (j)
                                alpha[i] = alpha[i] + y[i] * y[j] * (tempAJ - alpha[j]);

                                // precompute i, j into feasible region for Bias
                                double yBeta = (alpha[i] - tempAI) * K[i, j] - y[j] * (alpha[j] - tempAJ);
                                // store temp beta with gradient for i, j pairs
                                double beta_i = this.Bias - gradient[i] - y[i] * yBeta * K[i, j];
                                double beta_j = this.Bias - gradient[j] - y[i] * yBeta * K[j, j];

                                // update new bias with constrained alpha limits (0 < alpha < C)
                                if (0.0 < alpha[i] && alpha[i] < this.C) this.Bias = beta_i;
                                else if (0.0 < alpha[j] && alpha[j] < this.C) this.Bias = beta_j;
                                else this.Bias = (beta_i + beta_j) / 2.0;

                                changes++;
                            }
                        }
                    }
                    else if (newPair.Item1 == -1 || newPair.Item2 == -1)
                    {
                        // unable to find suitable sub problem (j) to optimise
                        finalise = true;
                        break;
                    }
                }

                if (changes == 0) iterations++;
                else iterations = 0;

                #endregion
            }

            // get only supporting parameters where alpha is positive
            // i.e. because 0 < alpha < large margin
            int[] fitness = (alpha > 0d).ToArray();

            // return initialised model
            return new SVMModel()
            {
                Descriptor = this.Descriptor,
                FeatureNormalizer = base.FeatureNormalizer,
                FeatureProperties = base.FeatureProperties,
                Theta = ((alpha * y) * X).ToVector(),
                Alpha = alpha.Slice(fitness),
                Bias = this.Bias,
                X = X.Slice(fitness, VectorType.Row),
                Y = y.Slice(fitness),
                KernelFunction = this.KernelFunction
            };
        }
Example #6
0
 /// <summary>A Matrix extension method that slices.</summary>
 /// <param name="m">Matrix.</param>
 /// <param name="indices">The indices.</param>
 /// <param name="t">Row or Column sum.</param>
 /// <returns>A Matrix.</returns>
 public static Matrix Slice(this Matrix m, IEnumerable <int> indices, VectorType t)
 {
     return(Matrix.Slice(m, indices, t));
 }
Example #7
0
 /// <summary>A Matrix extension method that slices.</summary>
 /// <param name="m">Matrix.</param>
 /// <param name="indices">The indices.</param>
 /// <returns>A Matrix.</returns>
 public static Matrix Slice(this Matrix m, IEnumerable <int> indices)
 {
     return(Matrix.Slice(m, indices));
 }
Example #8
0
        /// <summary>Generates a model.</summary>
        /// <param name="generator">Model generator used.</param>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="examples">Source data.</param>
        /// <param name="trainingPct">The training pct.</param>
        /// <param name="total">Number of Examples</param>
        /// <returns>The model.</returns>
        private static LearningModel GenerateModel(IGenerator generator, Matrix x, Vector y, IEnumerable<object> examples, double trainingPct, int total)
        {
            var descriptor = generator.Descriptor;
            //var total = examples.Count();
            var trainingCount = (int)System.Math.Floor(total * trainingPct);

            // 100 - trainingPercentage for testing
            var testingSlice = GetTestPoints(total - trainingCount, total).ToArray();

            // trainingPercentage for training
            var trainingSlice = GetTrainingPoints(testingSlice, total).ToArray();

            // training
            var x_t = x.Slice(trainingSlice);
            var y_t = y.Slice(trainingSlice);

            // generate model
            var model = generator.Generate(x_t, y_t);
            model.Descriptor = descriptor;

            Score score = new Score();

            if (testingSlice.Count() > 0)
            {
                // testing
                object[] test = GetTestExamples(testingSlice, examples);
                Vector y_pred = new Vector(test.Length);
                Vector y_test = descriptor.ToExamples(test).Item2;

                bool isBinary = y_test.IsBinary();
                if (isBinary)
                    y_test = y_test.ToBinary(f => f == 1d, 1.0, 0.0);

                for (int j = 0; j < test.Length; j++)
                {
                    // items under test
                    object o = test[j];

                    // make prediction
                    var features = descriptor.Convert(o, false).ToVector();
                    // --- temp changes ---
                    double val = model.Predict(features);
                    var pred = descriptor.Label.Convert(val);

                    var truth = Ject.Get(o, descriptor.Label.Name);

                    if (truth.Equals(pred))
                        y_pred[j] = y_test[j];
                    else
                        y_pred[j] = (isBinary ? (y_test[j] >= 1d ? 0d : 1d) : val);
                }

                // score predictions
                score = Score.ScorePredictions(y_pred, y_test);
            }

            return new LearningModel { Generator = generator, Model = model, Score = score };
        }