/// <summary> /// Constructs a Gamma distribution with the given mean and mean logarithm. /// </summary> /// <param name="mean">Desired expected value.</param> /// <param name="meanLog">Desired expected logarithm.</param> /// <returns>A new Gamma distribution.</returns> /// <remarks>This function is equivalent to maximum-likelihood estimation of a Gamma distribution /// from data given by sufficient statistics. /// This function is significantly slower than the other constructors since it /// involves nonlinear optimization. The algorithm is a generalized Newton iteration, /// described in "Estimating a Gamma distribution" by T. Minka, 2002. /// </remarks> public static Gamma FromMeanAndMeanLog(double mean, double meanLog) { double delta = Math.Log(mean) - meanLog; if (delta <= 2e-16) { return(Gamma.PointMass(mean)); } double shape = 0.5 / delta; for (int iter = 0; iter < 100; iter++) { double oldShape = shape; double g = Math.Log(shape) - delta - MMath.Digamma(shape); shape /= 1 + g / (1 - shape * MMath.Trigamma(shape)); if (Math.Abs(shape - oldShape) < 1e-8) { break; } } if (Double.IsNaN(shape)) { throw new Exception("shape is nan"); } return(Gamma.FromShapeAndRate(shape, shape / mean)); }
/// <summary> /// Constructs a Gamma distribution with the given log mean and mean logarithm. /// </summary> /// <param name="logMean">Log of desired expected value.</param> /// <param name="meanLog">Desired expected logarithm.</param> /// <returns>A new Gamma distribution.</returns> /// <remarks> /// This function is significantly slower than the other constructors since it /// involves nonlinear optimization. The algorithm is a generalized Newton iteration, /// described in "Estimating a Gamma distribution" by T. Minka, 2002. /// </remarks> public static Gamma FromLogMeanAndMeanLog(double logMean, double meanLog) { // logMean = log(shape)-log(rate) // meanLog = Psi(shape)-log(rate) // delta = log(shape)-Psi(shape) double delta = logMean - meanLog; if (delta <= 2e-16) { return(Gamma.PointMass(Math.Exp(logMean))); } double shape = 0.5 / delta; for (int iter = 0; iter < 100; iter++) { double oldShape = shape; double g = Math.Log(shape) - delta - MMath.Digamma(shape); shape /= 1 + g / (1 - shape * MMath.Trigamma(shape)); if (Math.Abs(shape - oldShape) < 1e-8) { break; } } if (Double.IsNaN(shape)) { throw new Exception("shape is nan"); } Gamma result = Gamma.FromShapeAndRate(shape, shape / Math.Exp(logMean)); return(result); }
public void DigammaInvTest() { for (int i = 0; i < 1000; i++) { double y = -3 + i * 0.01; double x = MMath.DigammaInv(y); double y2 = MMath.Digamma(x); double error = MMath.AbsDiff(y, y2, 1e-8); Assert.True(error < 1e-8); } }
private static double LogMinusDigamma(double shape) { if (shape > largeShape) { // The next term in the series is -1/120/shape^4, which bounds the error. return((0.5 - 1.0 / 12 / shape) / shape); } else { return(Math.Log(shape) - MMath.Digamma(shape)); } }
static internal double ComputeMeanLogOneMinus(double trueCount, double falseCount) { if (double.IsPositiveInfinity(falseCount)) { return(Math.Log(1 - trueCount)); } if ((trueCount == 0.0) && (falseCount == 0.0)) { throw new ImproperDistributionException(new Beta(trueCount, falseCount)); } return(MMath.Digamma(falseCount) - MMath.Digamma(trueCount + falseCount)); }
/// <summary> /// Used to compute log odds in the above operator /// </summary> /// <param name="trueCount"></param> /// <param name="falseCount"></param> /// <returns></returns> internal static double ComputeLogOdds(double trueCount, double falseCount) { if (falseCount == Double.PositiveInfinity) { // compute log odds from prob true return(MMath.Logit(trueCount)); } else if ((trueCount == 0) || (falseCount == 0)) { throw new ImproperMessageException(new Beta(trueCount, falseCount)); } return(MMath.Digamma(trueCount) - MMath.Digamma(falseCount)); }
/// <summary> /// Computes E[log(x)] /// </summary> /// <returns></returns> public double GetMeanLog() { if (IsPointMass) { return(Math.Log(Point)); } else if (!IsProper()) { throw new ImproperDistributionException(this); } else { return(Power * (MMath.Digamma(Shape) - Math.Log(Rate))); } }
/// <summary> /// The expected logarithm E[log(p)]. /// </summary> /// <returns></returns> public double GetMeanLog() { if (IsPointMass) { return(System.Math.Log(Point)); } else if (!IsProper()) { throw new ImproperDistributionException(this); } else { return(MMath.Digamma(TrueCount) - MMath.Digamma(TotalCount)); } }
public void RandWishart() { // multivariate Gamma double a = 2.7; int d = 3; PositiveDefiniteMatrix mTrue = new PositiveDefiniteMatrix(d, d); mTrue.SetToIdentity(); mTrue.SetToProduct(mTrue, a); LowerTriangularMatrix L = new LowerTriangularMatrix(d, d); PositiveDefiniteMatrix X = new PositiveDefiniteMatrix(d, d); PositiveDefiniteMatrix m = new PositiveDefiniteMatrix(d, d); m.SetAllElementsTo(0); double s = 0; for (int i = 0; i < nsamples; i++) { Rand.Wishart(a, L); X.SetToProduct(L, L.Transpose()); m = m + X; s = s + X.LogDeterminant(); } double sTrue = 0; for (int i = 0; i < d; i++) { sTrue += MMath.Digamma(a - i * 0.5); } m.Scale(1.0 / nsamples); s = s / nsamples; Console.WriteLine(""); Console.WriteLine("Multivariate Gamma"); Console.WriteLine("-------------------"); Console.WriteLine("m = \n{0}", m); double dError = m.MaxDiff(mTrue); if (dError > TOLERANCE) { Assert.True(false, String.Format("Wishart({0}) mean: (should be {0}*I), error = {1}", a, dError)); } if (System.Math.Abs(s - sTrue) > TOLERANCE) { Assert.True(false, string.Format("E[logdet]: {0} (should be {1})", s, sTrue)); } }
/// <summary> /// Gets the mean log determinant /// </summary> /// <returns>The mean log determinant</returns> public double GetMeanLogDeterminant() { if (IsPointMass) { return(Point.LogDeterminant()); } // E[logdet(X)] = -logdet(B) + sum_{i=0..d-1} digamma(a) double s = 0; int d = Dimension; for (int i = 0; i < d; i++) { s += MMath.Digamma(Shape - i * 0.5); } s -= rate.LogDeterminant(); return(s); }
/// <summary>VMP message to <c>sample</c>.</summary> /// <param name="probTrue">Incoming message from <c>probTrue</c>. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <returns>The outgoing VMP message to the <c>sample</c> argument.</returns> /// <remarks> /// <para>The outgoing message is the exponential of the average log-factor value, where the average is over all arguments except <c>sample</c>. The formula is <c>exp(sum_(probTrue) p(probTrue) log(factor(sample,probTrue)))</c>.</para> /// </remarks> /// <exception cref="ImproperMessageException"> /// <paramref name="probTrue" /> is not a proper distribution.</exception> public static Bernoulli SampleAverageLogarithm([SkipIfUniform] Beta probTrue) { if (probTrue.IsPointMass) { return(new Bernoulli(probTrue.Point)); } else if (!probTrue.IsProper()) { throw new ImproperMessageException(probTrue); } else { // E[x*log(p) + (1-x)*log(1-p)] = x*E[log(p)] + (1-x)*E[log(1-p)] // p(x=true) = exp(E[log(p)])/(exp(E[log(p)]) + exp(E[log(1-p)])) // log(p(x=true)/p(x=false)) = E[log(p)] - E[log(1-p)] = digamma(trueCount) - digamma(falseCount) return(Bernoulli.FromLogOdds(MMath.Digamma(probTrue.TrueCount) - MMath.Digamma(probTrue.FalseCount))); } }
/// <summary> /// The expected logarithms E[log(p)] and E[log(1-p)]. /// </summary> /// <param name="eLogP"></param> /// <param name="eLogOneMinusP"></param> public void GetMeanLogs(out double eLogP, out double eLogOneMinusP) { if (IsPointMass) { eLogP = System.Math.Log(Point); eLogOneMinusP = System.Math.Log(1 - Point); } else if (!IsProper()) { throw new ImproperDistributionException(this); } else { double d = MMath.Digamma(TotalCount); eLogP = MMath.Digamma(TrueCount) - d; eLogOneMinusP = MMath.Digamma(FalseCount) - d; } }
public static void GetDerivLogZ(GammaPower sum, GammaPower toSum, double ds, double dds, double dr, double ddr, out double dlogZ, out double ddlogZ) { if (sum.Power != toSum.Power) { throw new ArgumentException($"sum.Power ({sum.Power}) != toSum.Power ({toSum.Power})"); } if (toSum.IsPointMass) { throw new NotSupportedException(); } if (toSum.IsUniform()) { dlogZ = 0; ddlogZ = 0; return; } if (sum.IsPointMass) { // Z = toSum.GetLogProb(sum.Point) // log(Z) = (toSum.Shape/toSum.Power - 1)*log(sum.Point) - toSum.Rate*sum.Point^(1/toSum.Power) + toSum.Shape*log(toSum.Rate) - GammaLn(toSum.Shape) if (sum.Point == 0) { throw new NotSupportedException(); } double logSumOverPower = Math.Log(sum.Point) / toSum.Power; double powSum = Math.Exp(logSumOverPower); double logRate = Math.Log(toSum.Rate); double digammaShape = MMath.Digamma(toSum.Shape); double shapeOverRate = toSum.Shape / toSum.Rate; dlogZ = ds * logSumOverPower - dr * powSum + ds * logRate + shapeOverRate * dr - digammaShape * ds; ddlogZ = dds * logSumOverPower - ddr * powSum + dds * logRate + 2 * ds * dr / toSum.Rate + shapeOverRate * ddr - MMath.Trigamma(toSum.Shape) * ds - digammaShape * dds; } else { GammaPower product = sum * toSum; double cs = (MMath.Digamma(product.Shape) - Math.Log(product.Shape)) - (MMath.Digamma(toSum.Shape) - Math.Log(toSum.Shape)); double cr = toSum.Shape / toSum.Rate - product.Shape / product.Rate; double css = MMath.Trigamma(product.Shape) - MMath.Trigamma(toSum.Shape); double csr = 1 / toSum.Rate - 1 / product.Rate; double crr = product.Shape / (product.Rate * product.Rate) - toSum.Shape / (toSum.Rate * toSum.Rate); dlogZ = cs * ds + cr * dr; ddlogZ = cs * dds + cr * ddr + css * ds * ds + 2 * csr * ds * dr + crr * dr * dr; } }
public static Gaussian DAverageConditional([SkipIfUniform] Gamma exp, [Proper] Gaussian d) { // as a function of d, the factor is Ga(exp(d); shape, rate) = exp(d*(shape-1) -rate*exp(d)) if (exp.IsUniform()) { return(Gaussian.Uniform()); } if (exp.IsPointMass) { return(ExpOp.DAverageConditional(exp.Point)); } if (exp.Rate < 0) { throw new ImproperMessageException(exp); } if (exp.Rate == 0) { return(Gaussian.FromNatural(exp.Shape - 1, 0)); } if (d.IsUniform()) { if (exp.Shape <= 1) { throw new ArgumentException("The posterior has infinite variance due to input of Exp distributed as " + d + " and output of Exp distributed as " + exp + " (shape <= 1)"); } // posterior for d is a shifted log-Gamma distribution: // exp((a-1)*d - b*exp(d)) =propto exp(a*(d+log(b)) - exp(d+log(b))) // we find the Gaussian with same moments. // u = d+log(b) // E[u] = digamma(a-1) // E[d] = E[u]-log(b) = digamma(a-1)-log(b) // var(d) = var(u) = trigamma(a-1) double lnRate = Math.Log(exp.Rate); return(new Gaussian(MMath.Digamma(exp.Shape - 1) - lnRate, MMath.Trigamma(exp.Shape - 1))); } double aMinus1 = exp.Shape - 1; double b = exp.Rate; if (d.IsPointMass) { double x = d.Point; double expx = Math.Exp(x); double dlogf = aMinus1 - b * expx; double ddlogf = -b * expx; return(Gaussian.FromDerivatives(x, dlogf, ddlogf, true)); } double dmode, dmin, dmax; GetIntegrationBounds(exp, d, out dmode, out dmin, out dmax); double expmode = Math.Exp(dmode); int n = QuadratureNodeCount; double inc = (dmax - dmin) / (n - 1); MeanVarianceAccumulator mva = new MeanVarianceAccumulator(); for (int i = 0; i < n; i++) { double x = dmin + i * inc; double xMinusMode = x - dmode; double diff = aMinus1 * xMinusMode - b * (Math.Exp(x) - expmode) - 0.5 * ((x * x - dmode * dmode) * d.Precision - 2 * xMinusMode * d.MeanTimesPrecision); double p = Math.Exp(diff); mva.Add(x, p); if (double.IsNaN(mva.Variance)) { throw new Exception(); } } double dMean = mva.Mean; double dVariance = mva.Variance; Gaussian result = Gaussian.FromMeanAndVariance(dMean, dVariance); result.SetToRatio(result, d, true); return(result); }
/// <summary> /// EP message to 'exp' /// </summary> /// <param name="exp">Incoming message from 'exp'.</param> /// <param name="d">Incoming message from 'd'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="to_d">Previous outgoing message to 'd'.</param> /// <returns>The outgoing EP message to the 'exp' argument</returns> /// <remarks><para> /// The outgoing message is a distribution matching the moments of 'exp' as the random arguments are varied. /// The formula is <c>proj[p(exp) sum_(d) p(d) factor(exp,d)]/p(exp)</c>. /// </para></remarks> /// <exception cref="ImproperMessageException"><paramref name="d"/> is not a proper distribution</exception> public static Gamma ExpAverageConditional(Gamma exp, [Proper] Gaussian d, Gaussian to_d) { if (d.IsPointMass) { return(Gamma.PointMass(Math.Exp(d.Point))); } if (d.IsUniform()) { return(Gamma.FromShapeAndRate(0, 0)); } if (exp.IsPointMass) { // Z = int_y delta(x - exp(y)) N(y; my, vy) dy // = int_u delta(x - u) N(log(u); my, vy)/u du // = N(log(x); my, vy)/x // logZ = -log(x) -0.5/vy*(log(x)-my)^2 // dlogZ/dx = -1/x -1/vy*(log(x)-my)/x // d2logZ/dx2 = -dlogZ/dx/x -1/vy/x^2 // log Ga(x;a,b) = (a-1)*log(x) - bx // dlogGa/dx = (a-1)/x - b // d2logGa/dx2 = -(a-1)/x^2 // match derivatives and solve for (a,b) double shape = (1 + d.GetMean() - Math.Log(exp.Point)) * d.Precision; double rate = d.Precision / exp.Point; return(Gamma.FromShapeAndRate(shape, rate)); } if (exp.IsUniform()) { return(ExpAverageLogarithm(d)); } if (to_d.IsUniform() && exp.Shape > 1) { to_d = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } double mD, vD; Gaussian dMarginal = d * to_d; dMarginal.GetMeanAndVariance(out mD, out vD); double Z = 0; double sumy = 0; double sumexpy = 0; if (vD < 1e-6) { double m, v; d.GetMeanAndVariance(out m, out v); return(Gamma.FromLogMeanAndMeanLog(m + v / 2.0, m)); } //if (vD < 10) if (true) { // Use Gauss-Hermite quadrature double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); for (int i = 0; i < weights.Length; i++) { weights[i] = Math.Log(weights[i]); } if (!to_d.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] += d.GetLogProb(nodes[i]) - dMarginal.GetLogProb(nodes[i]); } } double maxLogF = Double.NegativeInfinity; // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) // Z E[x] = int_y int_x x Ga(x;a,b) delta(x - exp(y)) N(y;my,vy) dx dy // = int_y exp(y) Ga(exp(y);a,b) N(y;my,vy) dy // Z E[log(x)] = int_y y Ga(exp(y);a,b) N(y;my,vy) dy for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = weights[i] + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fexpy = f * Math.Exp(y); Z += f; sumy += f_y; sumexpy += fexpy; } } else { Converter <double, double> p = delegate(double y) { return(d.GetLogProb(y) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y)); }; double sc = Math.Sqrt(vD); double offset = p(mD); Z = Quadrature.AdaptiveClenshawCurtis(z => Math.Exp(p(sc * z + mD) - offset), 1, 16, 1e-6); sumy = Quadrature.AdaptiveClenshawCurtis(z => (sc * z + mD) * Math.Exp(p(sc * z + mD) - offset), 1, 16, 1e-6); sumexpy = Quadrature.AdaptiveClenshawCurtis(z => Math.Exp(sc * z + mD + p(sc * z + mD) - offset), 1, 16, 1e-6); } if (Z == 0) { throw new ApplicationException("Z==0"); } double s = 1.0 / Z; if (Double.IsPositiveInfinity(s)) { throw new ApplicationException("s is -inf"); } double meanLog = sumy * s; double mean = sumexpy * s; Gamma result = Gamma.FromMeanAndMeanLog(mean, meanLog); if (ForceProper) { result.SetToRatioProper(result, exp); } else { result.SetToRatio(result, exp); } if (Double.IsNaN(result.Shape) || Double.IsNaN(result.Rate)) { throw new ApplicationException("result is nan"); } return(result); }
/// <summary> /// EP message to 'd' /// </summary> /// <param name="exp">Incoming message from 'exp'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="d">Incoming message from 'd'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="result">Modified to contain the outgoing message</param> /// <returns><paramref name="result"/></returns> /// <remarks><para> /// The outgoing message is a distribution matching the moments of 'd' as the random arguments are varied. /// The formula is <c>proj[p(d) sum_(exp) p(exp) factor(exp,d)]/p(d)</c>. /// </para></remarks> /// <exception cref="ImproperMessageException"><paramref name="exp"/> is not a proper distribution</exception> /// <exception cref="ImproperMessageException"><paramref name="d"/> is not a proper distribution</exception> //internal static Gaussian DAverageConditional_slow([SkipIfUniform] Gamma exp, [Proper] Gaussian d) //{ // Gaussian to_d = exp.Shape<=1 || exp.Rate==0 ? // Gaussian.Uniform() // : new Gaussian(MMath.Digamma(exp.Shape-1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape)); // //var to_d = Gaussian.Uniform(); // for (int i = 0; i < QuadratureIterations; i++) { // to_d = DAverageConditional(exp, d, to_d); // } // return to_d; //} // to_d does not need to be Fresh. it is only used for quadrature proposal. public static Gaussian DAverageConditional([SkipIfUniform] Gamma exp, [Proper] Gaussian d, Gaussian result) { if (exp.IsUniform() || d.IsPointMass) { return(Gaussian.Uniform()); } if (exp.IsPointMass) { return(DAverageConditional(exp.Point)); } if (exp.Rate < 0) { throw new ImproperMessageException(exp); } if (d.IsUniform()) { // posterior for d is a shifted log-Gamma distribution: // exp((a-1)*d - b*exp(d)) =propto exp(a*(d+log(b)) - exp(d+log(b))) // we find the Gaussian with same moments. // u = d+log(b) // E[u] = digamma(a-1) // E[d] = E[u]-log(b) = digamma(a-1)-log(b) // var(d) = var(u) = trigamma(a-1) double lnRate = Math.Log(exp.Rate); return(new Gaussian(MMath.Digamma(exp.Shape - 1) - lnRate, MMath.Trigamma(exp.Shape - 1))); } // We use moment matching to find the best Gaussian message. // The moments are computed via quadrature. // Z = int_y f(x,y) q(y) dy =approx sum_k w_k f(x,y_k) q(y_k)/N(y_k;m,v) // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; double moD, voD; d.GetMeanAndVariance(out moD, out voD); double mD, vD; if (result.IsUniform() && exp.Shape > 1) { result = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } Gaussian dMarginal = d * result; dMarginal.GetMeanAndVariance(out mD, out vD); Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); if (!result.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] *= Math.Exp(d.GetLogProb(nodes[i]) - Gaussian.GetLogProb(nodes[i], mD, vD)); } } double Z = 0; double sumy = 0; double sumy2 = 0; double maxLogF = Double.NegativeInfinity; for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = Math.Log(weights[i]) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fyy = f_y * y; Z += f; sumy += f_y; sumy2 += fyy; } if (Z == 0) { return(Gaussian.Uniform()); } double s = 1.0 / Z; double mean = sumy * s; double var = sumy2 * s - mean * mean; if (var <= 0.0) { double quadratureGap = 0.1; var = 2 * vD * quadratureGap * quadratureGap; } result = new Gaussian(mean, var); if (ForceProper) { result.SetToRatioProper(result, d); } else { result.SetToRatio(result, d); } if (result.Precision < -1e10) { throw new ApplicationException("result has negative precision"); } if (Double.IsPositiveInfinity(result.Precision)) { throw new ApplicationException("result is point mass"); } if (Double.IsNaN(result.Precision) || Double.IsNaN(result.MeanTimesPrecision)) { throw new ApplicationException("result is nan"); } return(result); }
//internal static Gaussian DAverageConditional_slow([SkipIfUniform] Gamma exp, [Proper] Gaussian d) //{ // Gaussian to_d = exp.Shape<=1 || exp.Rate==0 ? // Gaussian.Uniform() // : new Gaussian(MMath.Digamma(exp.Shape-1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape)); // //var to_d = Gaussian.Uniform(); // for (int i = 0; i < QuadratureIterations; i++) { // to_d = DAverageConditional(exp, d, to_d); // } // return to_d; //} // to_d does not need to be Fresh. it is only used for quadrature proposal. /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="ExpOp"]/message_doc[@name="DAverageConditional(Gamma, Gaussian, Gaussian)"]/*'/> public static Gaussian DAverageConditional([SkipIfUniform] Gamma exp, [Proper] Gaussian d, Gaussian result) { if (exp.IsUniform() || d.IsUniform() || d.IsPointMass || exp.IsPointMass || exp.Rate <= 0) { return(ExpOp_Slow.DAverageConditional(exp, d)); } // We use moment matching to find the best Gaussian message. // The moments are computed via quadrature. // Z = int_y f(x,y) q(y) dy =approx sum_k w_k f(x,y_k) q(y_k)/N(y_k;m,v) // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; double moD, voD; d.GetMeanAndVariance(out moD, out voD); double mD, vD; if (result.IsUniform() && exp.Shape > 1) { result = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } Gaussian dMarginal = d * result; dMarginal.GetMeanAndVariance(out mD, out vD); if (vD == 0) { return(ExpOp_Slow.DAverageConditional(exp, d)); } Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); if (!result.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] *= Math.Exp(d.GetLogProb(nodes[i]) - Gaussian.GetLogProb(nodes[i], mD, vD)); } } double Z = 0; double sumy = 0; double sumy2 = 0; double maxLogF = Double.NegativeInfinity; for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = Math.Log(weights[i]) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fyy = f_y * y; Z += f; sumy += f_y; sumy2 += fyy; } if (Z == 0) { return(Gaussian.Uniform()); } double s = 1.0 / Z; double mean = sumy * s; double var = sumy2 * s - mean * mean; // TODO: explain this if (var <= 0.0) { double quadratureGap = 0.1; var = 2 * vD * quadratureGap * quadratureGap; } result = new Gaussian(mean, var); result.SetToRatio(result, d, ForceProper); if (result.Precision < -1e10) { throw new InferRuntimeException("result has negative precision"); } if (Double.IsPositiveInfinity(result.Precision)) { throw new InferRuntimeException("result is point mass"); } if (Double.IsNaN(result.Precision) || Double.IsNaN(result.MeanTimesPrecision)) { return(ExpOp_Slow.DAverageConditional(exp, d)); } return(result); }
#pragma warning disable 162 #endif /// <summary> /// Find a Beta distribution with given integral and mean times a Beta weight function. /// </summary> /// <param name="mean">The desired value of the mean</param> /// <param name="logZ">The desired value of the integral</param> /// <param name="a">trueCount-1 of the weight function</param> /// <param name="b">falseCount-1 of the weight function</param> /// <returns></returns> private static Beta BetaFromMeanAndIntegral(double mean, double logZ, double a, double b) { // The constraints are: // 1. int_p to_p(p) p^a (1-p)^b dp = exp(logZ) // 2. int_p to_p(p) p p^a (1-p)^b dp = mean*exp(logZ) // Let to_p(p) = Beta(p; af, bf) // The LHS of (1) is gamma(af+bf)/gamma(af+bf+a+b) gamma(af+a)/gamma(af) gamma(bf+b)/gamma(bf) // The LHS of (2) is gamma(af+bf)/gamma(af+bf+a+b+1) gamma(af+a+1)/gamma(af) gamma(bf+b)/gamma(bf) // The ratio of (2)/(1) is gamma(af+a+1)/gamma(af+a) gamma(af+bf+a+b)/gamma(af+bf+a+b+1) = (af+a)/(af+bf+a+b) = mean // Solving for bf gives bf = (af+a)/mean - (af+a+b). // To solve for af, we apply a generalized Newton algorithm to solve equation (1) with bf substituted. // af0 is the smallest value of af that ensures (af >= 0, bf >= 0). if (mean <= 0) { throw new ArgumentException("mean <= 0"); } if (mean >= 1) { throw new ArgumentException("mean >= 1"); } if (double.IsNaN(mean)) { throw new ArgumentException("mean is NaN"); } // If exp(logZ) exceeds the largest possible value of (1), then we return a point mass. // gammaln(x) =approx (x-0.5)*log(x) - x + 0.5*log(2pi) // (af+x)*log(af+x) =approx (af+x)*log(af) + x + 0.5*x*x/af // For large af, logZ = (af+bf-0.5)*log(af+bf) - (af+bf+a+b-0.5)*log(af+bf+a+b) + // (af+a-0.5)*log(af+a) - (af-0.5)*log(af) + // (bf+b-0.5)*log(bf+b) - (bf-0.5)*log(bf) // =approx (af+bf-0.5)*log(af+bf) - ((af+bf+a+b-0.5)*log(af+bf) + (a+b) + 0.5*(a+b)*(a+b)/(af+bf) -0.5*(a+b)/(af+bf)) + // ((af+a-0.5)*log(af) + a + 0.5*a*a/af - 0.5*a/af) - (af-0.5)*log(af) + // ((bf+b-0.5)*log(bf) + b + 0.5*b*b/bf - 0.5*b/bf) - (bf-0.5)*log(bf) // = -(a+b)*log(af+bf) - 0.5*(a+b)*(a+b-1)/(af+bf) + a*log(af) + 0.5*a*(a-1)/af + b*log(bf) + 0.5*b*(b-1)/bf // =approx (a+b)*log(m) + b*log((1-m)/m) + 0.5*(a+b)*(a+b-1)*m/af - 0.5*a*(a+1)/af - 0.5*b*(b+1)*m/(1-m)/af // =approx (a+b)*log(mean) + b*log((1-mean)/mean) double maxLogZ = (a + b) * Math.Log(mean) + b * Math.Log((1 - mean) / mean); // slope determines whether maxLogZ is the maximum or minimum possible value of logZ double slope = (a + b) * (a + b - 1) * mean - a * (a + 1) - b * (b + 1) * mean / (1 - mean); if ((slope <= 0 && logZ >= maxLogZ) || (slope > 0 && logZ <= maxLogZ)) { // optimal af is infinite return(Beta.PointMass(mean)); } // bf = (af+bx)*(1-m)/m double bx = -(mean * (a + b) - a) / (1 - mean); // af0 is the lower bound for af // we need both af>0 and bf>0 double af0 = Math.Max(0, -bx); double x = Math.Max(0, bx); double af = af0 + 1; // initial guess for af double invMean = 1 / mean; double bf = (af + a) * invMean - (af + a + b); int numIters = 20; for (int iter = 0; iter < numIters; iter++) { double old_af = af; double f = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b)) + (MMath.GammaLn(af + a) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); double g = (MMath.Digamma(af + bf) - MMath.Digamma(af + bf + a + b)) * invMean + (MMath.Digamma(af + a) - MMath.Digamma(af)) + (MMath.Digamma(bf + b) - MMath.Digamma(bf)) * (invMean - 1); // fit a fcn of the form: s*log((af-af0)/(af+x)) + c // whose deriv is s/(af-af0) - s/(af+x) double s = g / (1 / (af - af0) - 1 / (af + x)); double c = f - s * Math.Log((af - af0) / (af + x)); bool isIncreasing = (x > -af0); if ((!isIncreasing && c >= logZ) || (isIncreasing && c <= logZ)) { // the approximation doesn't fit; use Gauss-Newton instead af += (logZ - f) / g; } else { // now solve s*log((af-af0)/(af+x))+c = logz // af-af0 = exp((logz-c)/s) (af+x) af = af0 + (x + af0) / MMath.ExpMinus1((c - logZ) / s); //if (af == af0) // throw new ArgumentException("logZ is out of range"); } if (double.IsNaN(af)) { throw new InferRuntimeException("af is nan"); } bf = (af + a) / mean - (af + a + b); if (Math.Abs(af - old_af) < 1e-8 || af == af0) { break; } //if (iter == numIters-1) // throw new Exception("not converging"); } if (false) { // check that integrals are correct double f = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b)) + (MMath.GammaLn(af + a) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); if (Math.Abs(f - logZ) > 1e-6) { throw new InferRuntimeException("wrong f"); } double f2 = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b + 1)) + (MMath.GammaLn(af + a + 1) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); if (Math.Abs(f2 - (Math.Log(mean) + logZ)) > 1e-6) { throw new InferRuntimeException("wrong f2"); } } return(new Beta(af, bf)); }
/// <summary> /// Find a Beta distribution with given integral and mean times a Beta weight function. /// </summary> /// <param name="mean">The desired value of the mean</param> /// <param name="logZ">The desired value of the integral</param> /// <param name="a">trueCount-1 of the weight function</param> /// <param name="b">falseCount-1 of the weight function</param> /// <returns></returns> private static Beta BetaFromMeanAndIntegral(double mean, double logZ, double a, double b) { // The constraints are: // 1. int_p to_p(p) p^a (1-p)^b dp = exp(logZ) // 2. int_p to_p(p) p p^a (1-p)^b dp = mean*exp(logZ) // Let to_p(p) = Beta(p; af, bf) // The LHS of (1) is gamma(af+bf)/gamma(af+bf+a+b) gamma(af+a)/gamma(af) gamma(bf+b)/gamma(bf) // The LHS of (2) is gamma(af+bf)/gamma(af+bf+a+b+1) gamma(af+a+1)/gamma(af) gamma(bf+b)/gamma(bf) // The ratio of (2)/(1) is gamma(af+a+1)/gamma(af+a) gamma(af+bf+a+b)/gamma(af+bf+a+b+1) = (af+a)/(af+bf+a+b) = mean // Solving for bf gives bf = (af+a)/mean - (af+a+b). // To solve for af, we apply a generalized Newton algorithm to solve equation (1) with bf substituted. // af0 is the smallest value of af that ensures (af >= 0, bf >= 0). if (mean <= 0) { throw new ArgumentException("mean <= 0"); } if (mean >= 1) { throw new ArgumentException("mean >= 1"); } if (double.IsNaN(mean)) { throw new ArgumentException("mean is NaN"); } // bf = (af+bx)*(1-m)/m double bx = -(mean * (a + b) - a) / (1 - mean); // af0 is the lower bound for af // we need both af>0 and bf>0 double af0 = Math.Max(0, -bx); double x = Math.Max(0, bx); double af = af0 + 1; // initial guess for af double invMean = 1 / mean; double bf = (af + a) * invMean - (af + a + b); for (int iter = 0; iter < 20; iter++) { double old_af = af; double f = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b)) + (MMath.GammaLn(af + a) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); double g = (MMath.Digamma(af + bf) - MMath.Digamma(af + bf + a + b)) * invMean + (MMath.Digamma(af + a) - MMath.Digamma(af)) + (MMath.Digamma(bf + b) - MMath.Digamma(bf)) * (invMean - 1); // fit a fcn of the form: s*log((af-af0)/(af+x)) + c // whose deriv is s/(af-af0) - s/(af+x) double s = g / (1 / (af - af0) - 1 / (af + x)); double c = f - s * Math.Log((af - af0) / (af + x)); bool isIncreasing = (x > -af0); if ((!isIncreasing && c >= logZ) || (isIncreasing && c <= logZ)) { // the approximation doesn't fit; use Gauss-Newton instead af += (logZ - f) / g; } else { // now solve s*log((af-af0)/(af+x))+c = logz // af-af0 = exp((logz-c)/s) (af+x) af = af0 + (x + af0) / MMath.ExpMinus1((c - logZ) / s); if (af == af0) { throw new ArgumentException("logZ is out of range"); } } if (double.IsNaN(af)) { throw new ApplicationException("af is nan"); } bf = (af + a) / mean - (af + a + b); if (Math.Abs(af - old_af) < 1e-8) { break; } } if (false) { // check that integrals are correct double f = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b)) + (MMath.GammaLn(af + a) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); if (Math.Abs(f - logZ) > 1e-6) { throw new ApplicationException("wrong f"); } double f2 = (MMath.GammaLn(af + bf) - MMath.GammaLn(af + bf + a + b + 1)) + (MMath.GammaLn(af + a + 1) - MMath.GammaLn(af)) + (MMath.GammaLn(bf + b) - MMath.GammaLn(bf)); if (Math.Abs(f2 - (Math.Log(mean) + logZ)) > 1e-6) { throw new ApplicationException("wrong f2"); } } return(new Beta(af, bf)); }