/// <summary>Generate Logistic Regression model based on a set of examples.</summary> /// <param name="X">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <returns>Model.</returns> public override IModel Generate(Matrix X, Vector y) { X = IncreaseDimensions(X, this.PolynomialFeatures); this.Preprocess(X); // guarantee 1/0 based label vector y = y.ToBinary(f => f == 1d, falseValue: 0d); // add intercept term X = X.Insert(Vector.Ones(X.Rows), 0, VectorType.Col, false); Vector theta = Vector.Rand(X.Cols); // run gradient descent var optimizer = new numl.Math.Optimization.Optimizer(theta, this.MaxIterations, this.LearningRate) { CostFunction = new numl.Math.Functions.Cost.LogisticCostFunction() { X = X, Y = y, Lambda = this.Lambda, Regularizer = new numl.Math.Functions.Regularization.L2Regularizer(), LogisticFunction = this.LogisticFunction } }; optimizer.Run(); LogisticRegressionModel model = new LogisticRegressionModel() { Descriptor = this.Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Theta = optimizer.Properties.Theta, LogisticFunction = this.LogisticFunction, PolynomialFeatures = this.PolynomialFeatures }; return model; }
/// <summary>Generates a SVM model based on a set of examples.</summary> /// <param name="X">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <returns>Model.</returns> public override IModel Generate(Matrix X, Vector y) { this.Preprocess(X); // expect truth = 1 and false = -1 y = y.ToBinary(k => k == 1d, falseValue: -1.0); // initialise variables int m = X.Rows, n = X.Cols, i = -1, j = -1, changes = 0, iterations = 0; double lagLow = 0.0, lagHigh = 0.0, cost = 0.0, tempAI = 0d, tempAJ = 0d; Vector gradient = Vector.Zeros(m), alpha = Vector.Zeros(m); // precompute kernal matrix (using similarity function) Matrix K = this.KernelFunction.Compute(X); // synchronise SVM parameters with working set selection function. this.SelectionFunction.Bias = this.Bias; this.SelectionFunction.C = this.C; this.SelectionFunction.Epsilon = this.Epsilon; this.SelectionFunction.K = K; this.SelectionFunction.Y = y; bool finalise = false; this.SelectionFunction.Initialize(alpha, gradient); while (finalise == false && iterations < this.MaxIterations) { changes = 0; #region Training for (int p = 0; p < m; p++) { // get new working set selection using heuristic function Tuple<int, int> newPair = this.SelectionFunction.GetWorkingSet(i, j, gradient, alpha); // check for valid i, j pairs if (newPair.Item1 >= 0 && newPair.Item2 >= 0 && newPair.Item1 != newPair.Item2) { i = newPair.Item1; j = newPair.Item2; // compute new gradients gradient[i] = Bias + (alpha * y * K[i, VectorType.Col]).Sum() - y[i]; if ((y[i] * gradient[i] < -this.Epsilon && alpha[i] < this.C) || (y[i] * gradient[i] > this.Epsilon && alpha[i] > 0)) { gradient[j] = Bias + (alpha * y * K[j, VectorType.Col]).Sum() - y[j]; // store temp working copies of alpha from both pairs (i, j) tempAI = alpha[i]; tempAJ = alpha[j]; // update lower and upper bounds of lagrange multipliers if (y[i] == y[j]) { // pairs are same class don't apply large margin lagLow = System.Math.Max(0.0, alpha[j] + alpha[i] - this.C); lagHigh = System.Math.Min(this.C, alpha[j] + alpha[i]); } else { // pairs are not same class, apply large margin lagLow = System.Math.Max(0.0, alpha[j] - alpha[i]); lagHigh = System.Math.Min(this.C, this.C + alpha[j] - alpha[i]); } // if lagrange constraints are not diverse then get new working set if (lagLow == lagHigh) continue; // compute cost and if it's greater than 0 skip // cost should optimise large margin where fit line intercepts <= 0 cost = 2.0 * K[i, j] - K[i, i] - K[j, j]; if (cost >= 0.0) continue; else { // update alpha of (j) w.r.t to the relative cost difference of the i-th and j-th gradient alpha[j] = alpha[j] - (y[j] * (gradient[i] - gradient[j])) / cost; // clip alpha with lagrange multipliers alpha[j] = System.Math.Min(lagHigh, alpha[j]); alpha[j] = System.Math.Max(lagLow, alpha[j]); // check alpha tolerance factor if (System.Math.Abs(alpha[j] - tempAJ) < this.Epsilon) { // we're optimising large margins so skip small ones alpha[j] = tempAJ; continue; } // update alpha of i if we have a large margin w.r.t to alpha (j) alpha[i] = alpha[i] + y[i] * y[j] * (tempAJ - alpha[j]); // precompute i, j into feasible region for Bias double yBeta = (alpha[i] - tempAI) * K[i, j] - y[j] * (alpha[j] - tempAJ); // store temp beta with gradient for i, j pairs double beta_i = this.Bias - gradient[i] - y[i] * yBeta * K[i, j]; double beta_j = this.Bias - gradient[j] - y[i] * yBeta * K[j, j]; // update new bias with constrained alpha limits (0 < alpha < C) if (0.0 < alpha[i] && alpha[i] < this.C) this.Bias = beta_i; else if (0.0 < alpha[j] && alpha[j] < this.C) this.Bias = beta_j; else this.Bias = (beta_i + beta_j) / 2.0; changes++; } } } else if (newPair.Item1 == -1 || newPair.Item2 == -1) { // unable to find suitable sub problem (j) to optimise finalise = true; break; } } if (changes == 0) iterations++; else iterations = 0; #endregion } // get only supporting parameters where alpha is positive // i.e. because 0 < alpha < large margin int[] fitness = (alpha > 0d).ToArray(); // return initialised model return new SVMModel() { Descriptor = this.Descriptor, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Theta = ((alpha * y) * X).ToVector(), Alpha = alpha.Slice(fitness), Bias = this.Bias, X = X.Slice(fitness, VectorType.Row), Y = y.Slice(fitness), KernelFunction = this.KernelFunction }; }