/// <summary> /// Calculate <c>\sigma'(m,v)=\int N(x;m,v)logistic'(x) dx</c> /// </summary> /// <param name="mean">Mean.</param> /// <param name="variance">Variance.</param> /// <returns>The value of this special function.</returns> /// <remarks><para> /// For large v we can use the big v approximation <c>\sigma'(m,v)=N(m,0,v+pi^2/3)</c>. /// For small and moderate v we use Gauss-Hermite quadrature. /// For moderate v we first find the mode of the (log concave) function since this may be quite far from m. /// </para></remarks> public static double LogisticGaussianDerivative(double mean, double variance) { double halfVariance = 0.5 * variance; mean = Math.Abs(mean); // use the upper bound exp(-|m|+v/2) to prune cases that must be zero if (-mean + halfVariance < log0) { return(0.0); } // use the upper bound 0.5 exp(-0.5 m^2/v) to prune cases that must be zero double q = -0.5 * mean * mean / variance - MMath.Ln2; if (mean <= variance && q < log0) { return(0.0); } if (double.IsPositiveInfinity(variance)) { return(0.0); } // Handle the tail cases using the following exact formula: // sigma'(m,v) = exp(-m+v/2) -2 exp(-2m+2v) +3 exp(-3m+9v/2) sigma(m-3v,v) - exp(-3m+9v/2) sigma'(m-3v,v) if (-mean + 1.5 * variance < logEpsilon) { return(Math.Exp(halfVariance - mean)); } if (-2 * mean + 4 * variance < logEpsilon) { return(Math.Exp(halfVariance - mean) - 2 * Math.Exp(2 * (variance - mean))); } if (variance > LogisticGaussianVarianceThreshold) { double f(double x) { return(Math.Exp(MMath.LogisticLn(x) + MMath.LogisticLn(-x) + Gaussian.GetLogProb(x, mean, variance))); } return(Quadrature.AdaptiveClenshawCurtis(f, 10, 32, 1e-10)); } else { Vector nodes = Vector.Zero(LogisticGaussianQuadratureNodeCount); Vector weights = Vector.Zero(LogisticGaussianQuadratureNodeCount); double m_p, v_p; BigvProposal(mean, variance, out m_p, out v_p); Quadrature.GaussianNodesAndWeights(m_p, v_p, nodes, weights); double weightedIntegrand(double z) { return(Math.Exp(MMath.LogisticLn(z) + MMath.LogisticLn(-z) + Gaussian.GetLogProb(z, mean, variance) - Gaussian.GetLogProb(z, m_p, v_p))); } return(Integrate(weightedIntegrand, nodes, weights)); } }
/// <summary> /// Evaluates E[log(1+exp(x))] under a Gaussian distribution with specified mean and variance. /// </summary> /// <param name="mean"></param> /// <param name="variance"></param> /// <returns></returns> public static double Log1PlusExpGaussian(double mean, double variance) { double[] nodes = new double[11]; double[] weights = new double[11]; Quadrature.GaussianNodesAndWeights(mean, variance, nodes, weights); double z = 0; for (int i = 0; i < nodes.Length; i++) { double x = nodes[i]; double f = MMath.Log1PlusExp(x); z += weights[i] * f; } return(z); }
/// <summary> /// Evidence message for EP /// </summary> /// <param name="exp">Incoming message from 'exp'.</param> /// <param name="d">Incoming message from 'd'.</param> /// <param name="to_d">Previous outgoing message to 'd'.</param> /// <returns>Logarithm of the factor's average value across the given argument distributions</returns> /// <remarks><para> /// The formula for the result is <c>log(sum_(exp,d) p(exp,d) factor(exp,d))</c>. /// </para></remarks> public static double LogAverageFactor(Gamma exp, Gaussian d, Gaussian to_d) { if (d.IsPointMass) { return(LogAverageFactor(exp, d.Point)); } if (d.IsUniform()) { return(exp.GetLogAverageOf(new Gamma(0, 0))); } if (exp.IsPointMass) { return(LogAverageFactor(exp.Point, d)); } if (exp.IsUniform()) { return(0.0); } double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; double mD, vD; Gaussian dMarginal = d * to_d; dMarginal.GetMeanAndVariance(out mD, out vD); Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); if (!to_d.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] *= Math.Exp(d.GetLogProb(nodes[i]) - Gaussian.GetLogProb(nodes[i], mD, vD)); } } double Z = 0; for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = weights[i] * Math.Exp((exp.Shape - 1) * y - exp.Rate * Math.Exp(y)); Z += f; } return(Math.Log(Z) - exp.GetLogNormalizer()); }
/// <summary> /// EP message to 'd' /// </summary> /// <param name="exp">Incoming message from 'exp'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="d">Incoming message from 'd'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="result">Modified to contain the outgoing message</param> /// <returns><paramref name="result"/></returns> /// <remarks><para> /// The outgoing message is a distribution matching the moments of 'd' as the random arguments are varied. /// The formula is <c>proj[p(d) sum_(exp) p(exp) factor(exp,d)]/p(d)</c>. /// </para></remarks> /// <exception cref="ImproperMessageException"><paramref name="exp"/> is not a proper distribution</exception> /// <exception cref="ImproperMessageException"><paramref name="d"/> is not a proper distribution</exception> //internal static Gaussian DAverageConditional_slow([SkipIfUniform] Gamma exp, [Proper] Gaussian d) //{ // Gaussian to_d = exp.Shape<=1 || exp.Rate==0 ? // Gaussian.Uniform() // : new Gaussian(MMath.Digamma(exp.Shape-1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape)); // //var to_d = Gaussian.Uniform(); // for (int i = 0; i < QuadratureIterations; i++) { // to_d = DAverageConditional(exp, d, to_d); // } // return to_d; //} // to_d does not need to be Fresh. it is only used for quadrature proposal. public static Gaussian DAverageConditional([SkipIfUniform] Gamma exp, [Proper] Gaussian d, Gaussian result) { if (exp.IsUniform() || d.IsPointMass) { return(Gaussian.Uniform()); } if (exp.IsPointMass) { return(DAverageConditional(exp.Point)); } if (exp.Rate < 0) { throw new ImproperMessageException(exp); } if (d.IsUniform()) { // posterior for d is a shifted log-Gamma distribution: // exp((a-1)*d - b*exp(d)) =propto exp(a*(d+log(b)) - exp(d+log(b))) // we find the Gaussian with same moments. // u = d+log(b) // E[u] = digamma(a-1) // E[d] = E[u]-log(b) = digamma(a-1)-log(b) // var(d) = var(u) = trigamma(a-1) double lnRate = Math.Log(exp.Rate); return(new Gaussian(MMath.Digamma(exp.Shape - 1) - lnRate, MMath.Trigamma(exp.Shape - 1))); } // We use moment matching to find the best Gaussian message. // The moments are computed via quadrature. // Z = int_y f(x,y) q(y) dy =approx sum_k w_k f(x,y_k) q(y_k)/N(y_k;m,v) // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; double moD, voD; d.GetMeanAndVariance(out moD, out voD); double mD, vD; if (result.IsUniform() && exp.Shape > 1) { result = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } Gaussian dMarginal = d * result; dMarginal.GetMeanAndVariance(out mD, out vD); Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); if (!result.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] *= Math.Exp(d.GetLogProb(nodes[i]) - Gaussian.GetLogProb(nodes[i], mD, vD)); } } double Z = 0; double sumy = 0; double sumy2 = 0; double maxLogF = Double.NegativeInfinity; for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = Math.Log(weights[i]) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fyy = f_y * y; Z += f; sumy += f_y; sumy2 += fyy; } if (Z == 0) { return(Gaussian.Uniform()); } double s = 1.0 / Z; double mean = sumy * s; double var = sumy2 * s - mean * mean; if (var <= 0.0) { double quadratureGap = 0.1; var = 2 * vD * quadratureGap * quadratureGap; } result = new Gaussian(mean, var); if (ForceProper) { result.SetToRatioProper(result, d); } else { result.SetToRatio(result, d); } if (result.Precision < -1e10) { throw new ApplicationException("result has negative precision"); } if (Double.IsPositiveInfinity(result.Precision)) { throw new ApplicationException("result is point mass"); } if (Double.IsNaN(result.Precision) || Double.IsNaN(result.MeanTimesPrecision)) { throw new ApplicationException("result is nan"); } return(result); }
/// <summary> /// EP message to 'exp' /// </summary> /// <param name="exp">Incoming message from 'exp'.</param> /// <param name="d">Incoming message from 'd'. Must be a proper distribution. If uniform, the result will be uniform.</param> /// <param name="to_d">Previous outgoing message to 'd'.</param> /// <returns>The outgoing EP message to the 'exp' argument</returns> /// <remarks><para> /// The outgoing message is a distribution matching the moments of 'exp' as the random arguments are varied. /// The formula is <c>proj[p(exp) sum_(d) p(d) factor(exp,d)]/p(exp)</c>. /// </para></remarks> /// <exception cref="ImproperMessageException"><paramref name="d"/> is not a proper distribution</exception> public static Gamma ExpAverageConditional(Gamma exp, [Proper] Gaussian d, Gaussian to_d) { if (d.IsPointMass) { return(Gamma.PointMass(Math.Exp(d.Point))); } if (d.IsUniform()) { return(Gamma.FromShapeAndRate(0, 0)); } if (exp.IsPointMass) { // Z = int_y delta(x - exp(y)) N(y; my, vy) dy // = int_u delta(x - u) N(log(u); my, vy)/u du // = N(log(x); my, vy)/x // logZ = -log(x) -0.5/vy*(log(x)-my)^2 // dlogZ/dx = -1/x -1/vy*(log(x)-my)/x // d2logZ/dx2 = -dlogZ/dx/x -1/vy/x^2 // log Ga(x;a,b) = (a-1)*log(x) - bx // dlogGa/dx = (a-1)/x - b // d2logGa/dx2 = -(a-1)/x^2 // match derivatives and solve for (a,b) double shape = (1 + d.GetMean() - Math.Log(exp.Point)) * d.Precision; double rate = d.Precision / exp.Point; return(Gamma.FromShapeAndRate(shape, rate)); } if (exp.IsUniform()) { return(ExpAverageLogarithm(d)); } if (to_d.IsUniform() && exp.Shape > 1) { to_d = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } double mD, vD; Gaussian dMarginal = d * to_d; dMarginal.GetMeanAndVariance(out mD, out vD); double Z = 0; double sumy = 0; double sumexpy = 0; if (vD < 1e-6) { double m, v; d.GetMeanAndVariance(out m, out v); return(Gamma.FromLogMeanAndMeanLog(m + v / 2.0, m)); } //if (vD < 10) if (true) { // Use Gauss-Hermite quadrature double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); for (int i = 0; i < weights.Length; i++) { weights[i] = Math.Log(weights[i]); } if (!to_d.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] += d.GetLogProb(nodes[i]) - dMarginal.GetLogProb(nodes[i]); } } double maxLogF = Double.NegativeInfinity; // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) // Z E[x] = int_y int_x x Ga(x;a,b) delta(x - exp(y)) N(y;my,vy) dx dy // = int_y exp(y) Ga(exp(y);a,b) N(y;my,vy) dy // Z E[log(x)] = int_y y Ga(exp(y);a,b) N(y;my,vy) dy for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = weights[i] + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fexpy = f * Math.Exp(y); Z += f; sumy += f_y; sumexpy += fexpy; } } else { Converter <double, double> p = delegate(double y) { return(d.GetLogProb(y) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y)); }; double sc = Math.Sqrt(vD); double offset = p(mD); Z = Quadrature.AdaptiveClenshawCurtis(z => Math.Exp(p(sc * z + mD) - offset), 1, 16, 1e-6); sumy = Quadrature.AdaptiveClenshawCurtis(z => (sc * z + mD) * Math.Exp(p(sc * z + mD) - offset), 1, 16, 1e-6); sumexpy = Quadrature.AdaptiveClenshawCurtis(z => Math.Exp(sc * z + mD + p(sc * z + mD) - offset), 1, 16, 1e-6); } if (Z == 0) { throw new ApplicationException("Z==0"); } double s = 1.0 / Z; if (Double.IsPositiveInfinity(s)) { throw new ApplicationException("s is -inf"); } double meanLog = sumy * s; double mean = sumexpy * s; Gamma result = Gamma.FromMeanAndMeanLog(mean, meanLog); if (ForceProper) { result.SetToRatioProper(result, exp); } else { result.SetToRatio(result, exp); } if (Double.IsNaN(result.Shape) || Double.IsNaN(result.Rate)) { throw new ApplicationException("result is nan"); } return(result); }
/// <summary> /// Calculate sigma(m,v) = \int N(x;m,v) logistic(x) dx /// </summary> /// <param name="mean">Mean</param> /// <param name="variance">Variance</param> /// <returns>The value of this special function.</returns> /// <remarks><para> /// Note <c>1-LogisticGaussian(m,v) = LogisticGaussian(-m,v)</c> which is more accurate. /// </para><para> /// For large v we can use the big v approximation <c>\sigma(m,v)=normcdf(m/sqrt(v+pi^2/3))</c>. /// For small and moderate v we use Gauss-Hermite quadrature. /// For moderate v we first find the mode of the (log concave) function since this may be quite far from m. /// </para></remarks> public static double LogisticGaussian(double mean, double variance) { double halfVariance = 0.5 * variance; // use the upper bound exp(m+v/2) to prune cases that must be zero or one if (mean + halfVariance < log0) { return(0.0); } if (-mean + halfVariance < logEpsilon) { return(1.0); } // use the upper bound 0.5 exp(-0.5 m^2/v) to prune cases that must be zero or one double q = -0.5 * mean * mean / variance - MMath.Ln2; if (mean <= 0 && mean + variance >= 0 && q < log0) { return(0.0); } if (mean >= 0 && variance - mean >= 0 && q < logEpsilon) { return(1.0); } // sigma(|m|,v) <= 0.5 + |m| sigma'(0,v) // sigma'(0,v) <= N(0;0,v+8/pi) double d0Upper = MMath.InvSqrt2PI / Math.Sqrt(variance + 8 / Math.PI); if (mean * mean / (variance + 8 / Math.PI) < 2e-20 * Math.PI) { double deriv = LogisticGaussianDerivative(mean, variance); return(0.5 + mean * deriv); } // Handle tail cases using the following exact formulas: // sigma(m,v) = 1 - exp(-m+v/2) + exp(-2m+2v) - exp(-3m+9v/2) sigma(m-3v,v) if (-mean + variance < logEpsilon) { return(1.0 - Math.Exp(halfVariance - mean)); } if (-3 * mean + 9 * halfVariance < logEpsilon) { return(1.0 - Math.Exp(halfVariance - mean) + Math.Exp(2 * (variance - mean))); } // sigma(m,v) = exp(m+v/2) - exp(2m+2v) + exp(3m + 9v/2) (1 - sigma(m+3v,v)) if (mean + 1.5 * variance < logEpsilon) { return(Math.Exp(mean + halfVariance)); } if (2 * mean + 4 * variance < logEpsilon) { return(Math.Exp(mean + halfVariance) * (1 - Math.Exp(mean + 1.5 * variance))); } if (variance > LogisticGaussianVarianceThreshold) { double f(double x) { return(Math.Exp(MMath.LogisticLn(x) + Gaussian.GetLogProb(x, mean, variance))); } double upperBound = mean + Math.Sqrt(variance); upperBound = Math.Max(upperBound, 10); return(Quadrature.AdaptiveClenshawCurtis(f, upperBound, 32, 1e-10)); } else { Vector nodes = Vector.Zero(LogisticGaussianQuadratureNodeCount); Vector weights = Vector.Zero(LogisticGaussianQuadratureNodeCount); double m_p, v_p; BigvProposal(mean, variance, out m_p, out v_p); Quadrature.GaussianNodesAndWeights(m_p, v_p, nodes, weights); double weightedIntegrand(double z) { return(Math.Exp(MMath.LogisticLn(z) + Gaussian.GetLogProb(z, mean, variance) - Gaussian.GetLogProb(z, m_p, v_p))); } return(Integrate(weightedIntegrand, nodes, weights)); } }
//internal static Gaussian DAverageConditional_slow([SkipIfUniform] Gamma exp, [Proper] Gaussian d) //{ // Gaussian to_d = exp.Shape<=1 || exp.Rate==0 ? // Gaussian.Uniform() // : new Gaussian(MMath.Digamma(exp.Shape-1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape)); // //var to_d = Gaussian.Uniform(); // for (int i = 0; i < QuadratureIterations; i++) { // to_d = DAverageConditional(exp, d, to_d); // } // return to_d; //} // to_d does not need to be Fresh. it is only used for quadrature proposal. /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="ExpOp"]/message_doc[@name="DAverageConditional(Gamma, Gaussian, Gaussian)"]/*'/> public static Gaussian DAverageConditional([SkipIfUniform] Gamma exp, [Proper] Gaussian d, Gaussian result) { if (exp.IsUniform() || d.IsUniform() || d.IsPointMass || exp.IsPointMass || exp.Rate <= 0) { return(ExpOp_Slow.DAverageConditional(exp, d)); } // We use moment matching to find the best Gaussian message. // The moments are computed via quadrature. // Z = int_y f(x,y) q(y) dy =approx sum_k w_k f(x,y_k) q(y_k)/N(y_k;m,v) // f(x,y) = Ga(exp(y); shape, rate) = exp(y*(shape-1) -rate*exp(y)) double[] nodes = new double[QuadratureNodeCount]; double[] weights = new double[QuadratureNodeCount]; double moD, voD; d.GetMeanAndVariance(out moD, out voD); double mD, vD; if (result.IsUniform() && exp.Shape > 1) { result = new Gaussian(MMath.Digamma(exp.Shape - 1) - Math.Log(exp.Rate), MMath.Trigamma(exp.Shape - 1)); } Gaussian dMarginal = d * result; dMarginal.GetMeanAndVariance(out mD, out vD); if (vD == 0) { return(ExpOp_Slow.DAverageConditional(exp, d)); } Quadrature.GaussianNodesAndWeights(mD, vD, nodes, weights); if (!result.IsUniform()) { // modify the weights to include q(y_k)/N(y_k;m,v) for (int i = 0; i < weights.Length; i++) { weights[i] *= Math.Exp(d.GetLogProb(nodes[i]) - Gaussian.GetLogProb(nodes[i], mD, vD)); } } double Z = 0; double sumy = 0; double sumy2 = 0; double maxLogF = Double.NegativeInfinity; for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double logf = Math.Log(weights[i]) + (exp.Shape - 1) * y - exp.Rate * Math.Exp(y); if (logf > maxLogF) { maxLogF = logf; } weights[i] = logf; } for (int i = 0; i < weights.Length; i++) { double y = nodes[i]; double f = Math.Exp(weights[i] - maxLogF); double f_y = f * y; double fyy = f_y * y; Z += f; sumy += f_y; sumy2 += fyy; } if (Z == 0) { return(Gaussian.Uniform()); } double s = 1.0 / Z; double mean = sumy * s; double var = sumy2 * s - mean * mean; // TODO: explain this if (var <= 0.0) { double quadratureGap = 0.1; var = 2 * vD * quadratureGap * quadratureGap; } result = new Gaussian(mean, var); result.SetToRatio(result, d, ForceProper); if (result.Precision < -1e10) { throw new InferRuntimeException("result has negative precision"); } if (Double.IsPositiveInfinity(result.Precision)) { throw new InferRuntimeException("result is point mass"); } if (Double.IsNaN(result.Precision) || Double.IsNaN(result.MeanTimesPrecision)) { return(ExpOp_Slow.DAverageConditional(exp, d)); } return(result); }