/// <summary> /// The log-likelihood of the Weibull distribution on censored and uncensored arrays /// with features. /// </summary> /// <param name="w">The matrix of parameters.</param> /// <param name="fSamples">The features corresponding to the organic recoveries. /// Number of rows should be same as this.OrganicRecoveryDurations.Length</param> /// <param name="fCensored">The features corresponding to the reboots. /// Number of rows should be the same as this.InorganicRecoveryDurations.Length</param> /// <returns>The log-likelihood of the data along with features.</returns> public double LogLikelihood(Matrix <double> w, Matrix <double> fSamples, Matrix <double> fCensored) { List <double> t = this.OrganicRecoveryDurations; List <double> x = this.InorganicRecoveryDurations; double lik = 0; Sigmoid sShape = new Sigmoid(this.ShapeUpperBound); Sigmoid sScale = new Sigmoid(this.ScaleUpperBound); for (int i = 0; i < fSamples.RowCount; i++) { Vector <double> currentRow = fSamples.Row(i); Vector <double> theta = w.Multiply(currentRow); double shape = sShape.Transform(theta[0]); double scale = sScale.Transform(theta[1]); lik += this.LogPdf(t.ElementAt(i), shape, scale); } for (int i = 0; i < fCensored.RowCount; i++) { Vector <double> currentRow = fCensored.Row(i); Vector <double> theta = w.Multiply(currentRow); double shape = sShape.Transform(theta[0]); double scale = sScale.Transform(theta[1]); lik += this.LogSurvival(x.ElementAt(i), shape, scale); } return(lik); }
public void TestLogLogisticWFeatures() { DataGen dg = new DataGen(); dg.GenLogLogisticWFeatures(); double shapeMax = 5.0; double scaleMax = 500.0; double[] arr = new double[] { 1.0, 150.0 }; Vector <double> init = Vector <double> .Build.DenseOfArray(arr); LogLogistic modelLogLogistic = new LogLogistic(dg.organicRecoveryDurations, dg.inorganicRecoverydurations); modelLogLogistic.GradientDescent(init); Console.WriteLine("LL without features is " + modelLogLogistic.LogLikelihood(modelLogLogistic.Kappa, modelLogLogistic.Lambda) + " with Kappa " + modelLogLogistic.Kappa + " and Lambda " + modelLogLogistic.Lambda); double[,] warr = new double[2, dg.fCensored.ColumnCount]; warr[0, 0] = Sigmoid.InverseSigmoid(modelLogLogistic.Kappa, shapeMax); warr[1, 0] = Sigmoid.InverseSigmoid(modelLogLogistic.Lambda, scaleMax); Matrix <double> w = Matrix <double> .Build.DenseOfArray(warr); LogLogistic modelLogLogisticFeatured = new LogLogistic(dg.organicRecoveryDurations, dg.inorganicRecoverydurations, dg.fSamples, dg.fCensored); modelLogLogisticFeatured.ShapeUpperBound = shapeMax; modelLogLogisticFeatured.ScaleUpperBound = scaleMax; Matrix <double> logLogisticParameters = modelLogLogisticFeatured.GradientDescent(w, 2001); Vector <double> frstSample = Vector <double> .Build.DenseOfArray( new double[] { 1.0, 2.0, 3.0 }); Vector <double> scndSample = Vector <double> .Build.DenseOfArray( new double[] { 1.0, 4.0, 2.0 }); Vector <double> res = logLogisticParameters.Multiply(frstSample); var alpha_shape = res[0]; var shape = Sigmoid.Transform(alpha_shape, shapeMax); var alpha_scale = res[1]; var scale = Sigmoid.Transform(alpha_scale, scaleMax); res = logLogisticParameters.Multiply(scndSample); alpha_shape = res[0]; shape = Sigmoid.Transform(alpha_shape, shapeMax); alpha_scale = res[1]; scale = Sigmoid.Transform(alpha_scale, scaleMax); Assert.IsTrue(Math.Abs(scale - 80.0) < 2.0); }
/// <summary> /// Calculates the gradient of the log-likelihood function for Weibull with features. /// Provided below is the derivation in Latex. Paste it into an online /// Latex editor like https://www.overleaf.com/7650945xdjrrdzrmsdd#/26769609/ /// to read and and understand where the formulas came from. /// Let $f_i$ be the vector of features for the $i^{th}$ data point and $x_i$ be the duration it took to recover. /// Then, the Likelihood and log-likelihood functions are given by - /// \[L = \prod_{i=1}^n \log(pdf(x_i, W.f_i)) \] /// \[ll = \sum_{i=1}^n \log(pdf(x_i, W.f_i))\] /// \[\frac{\partial(ll)}{\partial W} = \frac{1}{pdf(x_i, W.f_i)} f_i(\partial \theta_i)^T \] /// Where, /// \[\theta_i = W.f_i = [\kappa, \lambda]\] here $\kappa$, once the sigmoid is applied to it /// to ensure it being positive, is the shape paramter of the Weibull and $\lambda$ once the /// sigmoid is applied is the scale parameter. /// \[\partial \theta_i = \frac{\partial(pdf(x,\theta_i))}{\partial(\theta_i)}\] /// </summary> /// <param name="w">The matrix of parameters (2 x # of features)</param> /// <param name="fSamples">The features corresponding to the organic recoveries. /// Number of rows should be same as this.OrganicRecoveryDurations.Length</param> /// <param name="fCensored">The features corresponding to the reboots. /// Number of rows should be the same as this.InorganicRecoveryDurations.Length</param> /// <param name="eps">Since we divide by the PDF and survival functions, we need to make sure /// they don't get lower than a threshold or the gradients blow up. /// This parameter is a lower bound on them.</param> /// <param name="bailOutSurvivalValue">The value to turn to for survival when gradients start blowing up.</param> /// <returns>The gradient of the log-likelihood with the matrix of parameters, w</returns> public Matrix <double> GradLL(Matrix <double> w, Matrix <double> fSamples, Matrix <double> fCensored, double eps = 1e-8, double bailOutSurvivalValue = 10.0) { IReadOnlyCollection <double> t = this.OrganicRecoveryDurations; IReadOnlyCollection <double> x = this.InorganicRecoveryDurations; Matrix <double> gradW = Matrix <double> .Build.Dense(w.RowCount, w.ColumnCount); Sigmoid sShape = new Sigmoid(this.ShapeUpperBound); Sigmoid sScale = new Sigmoid(this.ScaleUpperBound); for (int i = 0; i < fSamples.RowCount; i++) { Vector <double> currentRow = fSamples.Row(i); Vector <double> theta = w.Multiply(currentRow); // A 2 dim vector that will be converted to the 2 Weibull params. double shape = sShape.Transform(theta[0]); // To prevent the parameters from becoming negative, we use sigmoids. double scale = sScale.Transform(theta[1]); double pdf = this.PDF(t.ElementAt(i), shape, scale); Vector <double> pdfGrad = this.GradPDF(t.ElementAt(i), shape, scale, pdf); Vector <double> sigmoidGrad = Vector <double> .Build.DenseOfArray(new double[] { sShape.Grad(theta[0]), sScale.Grad(theta[1]) }); Vector <double> delTheta = pdfGrad.PointwiseMultiply(sigmoidGrad); // Since we used the sigmoids, the product rule dictates that we point-wise multiply the derivatives. // Since we are dividing the matrix by the pdf, we need to be careful it doesn't blow up. if (pdf > eps) { gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(pdf)); // del/del(W) log(lik(x,W.f)) = 1/lik(x,W.f) * f. (del(lik(x))/del(W.f))^T } else { double survival = this.Survival(bailOutSurvivalValue, shape, scale); Vector <double> survivalGrad = this.GradSurvival(bailOutSurvivalValue, shape, scale, survival); delTheta = survivalGrad.PointwiseMultiply(sigmoidGrad); gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(survival)); } } for (int i = 0; i < fCensored.RowCount; i++) { Vector <double> currentRow = fCensored.Row(i); Vector <double> theta = w.Multiply(currentRow); double shape = sShape.Transform(theta[0]); double scale = sScale.Transform(theta[1]); double survival = this.Survival(x.ElementAt(i), shape, scale); Vector <double> survivalGrad = this.GradSurvival(x.ElementAt(i), shape, scale, survival); Vector <double> sigmoidGrad = Vector <double> .Build.DenseOfArray(new double[] { sShape.Grad(theta[0]), sScale.Grad(theta[1]) }); Vector <double> delTheta = survivalGrad.PointwiseMultiply(sigmoidGrad); // Since we are dividing the matrix by the survival, we need to be careful it doesn't blow up. if (survival > eps) { gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(survival)); } else { survival = this.Survival(bailOutSurvivalValue, shape, scale); survivalGrad = this.GradSurvival(bailOutSurvivalValue, shape, scale, survival); delTheta = survivalGrad.PointwiseMultiply(sigmoidGrad); gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(survival)); } } return(gradW); }
/// <summary> /// Calculating gradient of the loglikelihood using GradLPDF and GradLSurvival. /// </summary> /// <param name="w">The current weight where we want to evaluate the gradient.</param> /// <param name="fSamples">Organic recover time data.</param> /// <param name="fCensored">Inorganic recover time data.</param> /// <param name="eps">Threshold for small pdf.</param> /// <param name="bailOutSurvivalValue">Threshold for small survival function.</param> /// <returns>The gradient of the log-likelihood function with respect to w (the weights). /// </returns> public Matrix <double> GradLL2(Matrix <double> w, Matrix <double> fSamples, Matrix <double> fCensored, double eps = 1e-8, double bailOutSurvivalValue = 10.0) { List <double> t = this.OrganicRecoveryDurations; List <double> x = this.InorganicRecoveryDurations; Matrix <double> gradW = Matrix <double> .Build.Dense(w.RowCount, w.ColumnCount); Sigmoid sShape = new Sigmoid(this.ShapeUpperBound); Sigmoid sScale = new Sigmoid(this.ScaleUpperBound); for (int i = 0; i < fSamples.RowCount; i++) { Vector <double> currentRow = fSamples.Row(i); // A 2 dim vector that will be converted to the 2 Weibull params. Vector <double> theta = w.Multiply(currentRow); // To prevent the parameters from becoming negative, we use sigmoids. double shape = sShape.Transform(theta[0]); double scale = sScale.Transform(theta[1]); //// double pdf = this.PDF(t.ElementAt(i), kappa, lambda); Vector <double> lpdfGrad = this.GradLPDF(t.ElementAt(i), shape, scale); Vector <double> sigmoidGrad = Vector <double> .Build.DenseOfArray(new double[] { sShape.Grad(theta[0]), sScale.Grad(theta[1]) }); // Since we used the sigmoids, the product rule dictates that // we point-wise multiply the derivatives. Vector <double> delTheta = lpdfGrad.PointwiseMultiply(sigmoidGrad); // currentRow is just feature vector. gradW = gradW.Add(delTheta.OuterProduct(currentRow)); if (double.IsNaN(gradW[0, 0]) || double.IsPositiveInfinity(gradW[0, 0]) || double.IsNegativeInfinity(gradW[0, 0])) { // Hopefully, we will never enter this code path. throw new Exception("The moment we feared has arrived, gradient has blown" + "up due to samples data." + "First, try tightening the upper bounds for the shape and scale parameters" + "and if that doesn't work, add a break point here. My suspicion in delTheta"); } /* Since we are dividing the matrix by the pdf, we need to be * careful it doesn't blow up. * if (pdf > eps) * { * gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(pdf)); * // del/del(W) log(lik(x,W.f)) = 1/lik(x,W.f) * f. (del(lik(x))/del(W.f))^T * } * else * { * double survival = this.Survival(bailOutSurvivalValue, kappa, lambda); * Vector<double> survivalGrad = this.GradSurvival(bailOutSurvivalValue, * kappa, lambda, survival); * delTheta = survivalGrad.PointwiseMultiply(sigmoidGrad); * gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(survival)); * } */ } for (int i = 0; i < fCensored.RowCount; i++) { Vector <double> currentRow = fCensored.Row(i); Vector <double> theta = w.Multiply(currentRow); // To prevent the parameters from becoming negative, we use sigmoids. double shape = sShape.Transform(theta[0]); double scale = sScale.Transform(theta[1]); //// double survival = this.Survival(t.ElementAt(i), kappa, lambda); Vector <double> lsurvivalGrad = this.GradLSurvival(x.ElementAt(i), shape, scale); Vector <double> sigmoidGrad = Vector <double> .Build.DenseOfArray( new double[] { sShape.Grad(theta[0]), sScale.Grad(theta[1]) }); Vector <double> delTheta = lsurvivalGrad.PointwiseMultiply(sigmoidGrad); gradW = gradW.Add(delTheta.OuterProduct(currentRow)); if (double.IsNaN(gradW[0, 0]) || double.IsPositiveInfinity(gradW[0, 0]) || double.IsNegativeInfinity(gradW[0, 0])) { throw new Exception("The moment we feared has arrived, gradient has blown" + "up due to censored data." + "First, try tightening the upper bounds for the shape and scale parameters" + "and if that doesn't work, add a break point here. My suspicion in delTheta"); } /* * Since we are dividing the matrix by the survival, we need to be * careful it doesn't blow up. * if (survival > eps) * { * } * else * { * survival = this.Survival(bailOutSurvivalValue, kappa, lambda); * survivalGrad = this.GradSurvival(bailOutSurvivalValue, kappa, lambda, survival); * delTheta = survivalGrad.PointwiseMultiply(sigmoidGrad); * gradW = gradW.Add(delTheta.OuterProduct(currentRow).Divide(survival)); * } */ } return(gradW); }