// Only disagrees with the MLE ordering in a small number of cases that are necessary to avoid gradient issues public static double MLEV4(GEV dist, double[] evalPoints) { double sum = 0; const double EPSILON = 1E-6; for (int i = 0; i < evalPoints.Length; i++) { double density = dist.Density(evalPoints[i]); sum += Math.Log(EPSILON + density); } return(-sum / evalPoints.Length); }
public static void TestGEVComplementComputations() { double ep = Math.Pow(2, -50); double complementEp = 1.0 - ep; int testSize = 20; //GEV[] dists = new GEV[] { new GEV(0,200,-1), new GEV(0,100,-1) }; GEV[] dists = new GEV[testSize]; Random rand = new Xoshiro256StarStar(8675309); for (int i = 0; i < dists.Length; i++) { dists[i] = new GEV(rand.NextDouble(), rand.NextDouble(), -rand.NextDouble(), rand); } IDistributionWrapper[] wrappedDists = new IDistributionWrapper[dists.Length]; for (int i = 0; i < dists.Length; i++) { wrappedDists[i] = new WrappedDistribution(dists[i], dists[i].InverseCumulativeDistribution(ep), dists[i].InverseCumulativeDistribution(complementEp)); } double[] complements = DiscardProbabilityComputation.ComplementsClenshawCurtisAutomatic(wrappedDists); double[] complemetnsTrap = DiscardProbabilityComputation.ComplementsTrapezoid(wrappedDists, 10000); double[] mcComplements = DiscardProbabilityComputation.ComplementsMonteCarlo(wrappedDists, iterations: 10000000); double totalc = 0; double totalmc = 0; double totalTrap = 0; for (int i = 0; i < complements.Length; i++) { GEV dist = dists[i]; Program.logger.WriteLine($"Distribution Scale: {dist.scale} Loc {dist.location} Shape {dist.shape} " + $"1-P(D) {complements[i]} MC {mcComplements[i]} Trap {complemetnsTrap[i]}"); totalc += complements[i]; totalmc += mcComplements[i]; totalTrap += complemetnsTrap[i]; } Program.logger.WriteLine($"Total probability: {totalc} Total by MC: {totalmc} Total by Trap 10k: {totalTrap}"); }
public static void TestGEV() // Scratchwork, prototyping, etc. { Logger output = new Logger("GEV Test A.csv"); Logger output2 = new Logger("GEV Test B.csv"); //var dist = new ChiSquared(4, Program.rand); var dist = new Beta(2, 2); //var dist = new Beta(2, 5); //var dist = new Beta(2, 1.5); output.WriteLine($"Distribution: {dist.ToString().Replace(',',' ')}"); //var dist = new Exponential(2, Program.rand); //var dist = new Gamma(2, 2, Program.rand); const int sampleSize = 300; output.WriteLine($"Samplesize: {sampleSize}"); // Report the distribution 1-1/e quantile double upperQuantile = dist.InverseCumulativeDistribution(1 - 1.0 / sampleSize); double lowerQuantile = dist.InverseCumulativeDistribution(1.0 / sampleSize); output.WriteLine($"1-1/samplesize quantile: {upperQuantile}"); output.WriteLine($"1/samplesize quantile: {lowerQuantile}"); // Monte Carlo for the true distribution of the sample maximum double[] observations = new double[10000]; for (int i = 0; i < observations.Length; i++) { double max = double.NegativeInfinity; for (int j = 0; j < sampleSize; j++) { max = Math.Max(max, dist.Sample()); } observations[i] = max; } Sorting.Sort(observations); ContinuousDistribution MonteCarloDistributionOfTheMaximum = ContinuousDistribution.ECDF(observations, Program.rand); // --- Find the best fit GEV distribution for this dataset --- #region Old code /* * // Compute location and scale parameter estimates for a given shape parameter Xi using the median and variance * void EstimateParameters(double shape, double median, double variance, out double location, out double scale) * { * if (shape == 0) * { * scale = Math.Sqrt(6 * variance) / Math.PI; * location = median + scale * Math.Log(Math.Log(2)); * return; * } * // This scale may or may not work for Xi > 0.5 * scale = Math.Sign(shape) * shape * Math.Sqrt(variance) / Math.Sqrt(SpecialFunctions.Gamma(1 - 2 * shape) - SpecialFunctions.Gamma(1 - shape) * SpecialFunctions.Gamma(1 - shape)); * if (double.IsNaN(scale)) scale = Math.Sqrt(6 * variance) / Math.PI; * location = median - scale * (Math.Pow(Math.Log(2), -shape) - 1) / shape; * }*/ #endregion double FitnessExactModel(GEV model) { double val = 0; for (int i = 0; i < observations.Length; i++) { val += Math.Pow(model.CumulativeDistribution(observations[i]) - MonteCarloDistributionOfTheMaximum.CumulativeDensity(observations[i]), 2); } return(val); } #region Old code //double medianEst = Statistics.Median(observations); //double varianceEst = Statistics.VarianceEstimate(observations); /* * GEV Optimize(double startingval, out double fitness) * { * double locationEst; * double scaleEst; * double bestScore = double.PositiveInfinity; * GEV bestSoFar = null; * bool increasing = false; * int sinceImproved = 0; * double shapeEst = startingval; // Neg or pos will stay that way throughout the optimization * * while (true) * { * EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst); * GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand); * double score = FitnessExactModel(model); * if (score < bestScore) * { * bestScore = score; * bestSoFar = model; * sinceImproved = 0; * } * else * { * increasing ^= true; * if (++sinceImproved > 10) break; * } * if (increasing) shapeEst += 0.3 * startingval; * else shapeEst *= 0.5; * } * fitness = bestScore; * return bestSoFar; * } * * GEV OptimizeV2(double initialGuess, out double fitness) * { * double locationEst, scaleEst; * double bestScore = double.PositiveInfinity; * GEV bestSoFar = null; * double shapeEst = initialGuess; * double bestShapeSoFar = initialGuess; * // Grow the estimate by doubling until it is no longer improving * while (true) * { * EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst); * GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand); * double score = FitnessExactModel(model); * if (score < bestScore) // If it improved * { * bestScore = score; * bestSoFar = model; * bestShapeSoFar = shapeEst; * } * else break; * shapeEst *= 2; * } * double magnitude = bestShapeSoFar; * for (int i = 0; i < 10; i++) // 10 corresponds to 3 correct digits * { * double delta = magnitude * Math.Pow(2, -(i + 1)); // Half in size for each iteration * * // Three positions: the current one, one lower by delta, and one higher by delta * * // Lower Model * EstimateParameters(bestShapeSoFar - delta, medianEst, varianceEst, out locationEst, out scaleEst); * GEV lowerModel = new GEV(locationEst, scaleEst, bestShapeSoFar - delta, Program.rand); * double lowerScore = FitnessExactModel(lowerModel); * * // Upper Model * EstimateParameters(bestShapeSoFar + delta, medianEst, varianceEst, out locationEst, out scaleEst); * GEV upperModel = new GEV(locationEst, scaleEst, bestShapeSoFar + delta, Program.rand); * double upperScore = FitnessExactModel(upperModel); * * // Move to the best of the three * double bestfitness = Math.Min(bestScore, Math.Min(upperScore, lowerScore)); * bestScore = bestfitness; * if (lowerScore == bestfitness) * { * bestShapeSoFar = bestShapeSoFar - delta; * bestSoFar = lowerModel; * } * else if (upperScore == bestfitness) * { * bestShapeSoFar = bestShapeSoFar + delta; * bestSoFar = upperModel; * } * } * fitness = bestScore; * return bestSoFar; * } */ #endregion GEV OptimizeBFGS(Func <Vector <double>, double> objectiveFunc, double initialShape, double initialScale, double initialLocation) { // Formatted by shape, scale, location var lowerBounds = CreateVector.DenseOfArray(new double[] { -10, Math.Min(-3 * initialScale, 3 * initialScale), Math.Min(-3 * initialLocation, 3 * initialLocation) }); var upperBounds = CreateVector.DenseOfArray(new double[] { 10, Math.Max(-3 * initialScale, 3 * initialScale), Math.Max(-3 * initialLocation, 3 * initialLocation) }); var initialGuess = CreateVector.DenseOfArray(new double[] { initialShape, initialScale, initialLocation }); var min = FindMinimum.OfFunctionConstrained(objectiveFunc, lowerBounds, upperBounds, initialGuess); return(new GEV(min[2], min[1], min[0], Program.rand)); } #region Old code // Optimize for Xi /*double fitNeg, fitZero, fitPos; * GEV bestNeg = OptimizeV2(-1, out fitNeg); * GEV bestPos = OptimizeV2(1, out fitPos); * double locZero, scaleZero; * EstimateParameters(0, medianEst, varianceEst, out locZero, out scaleZero); * GEV zeroModel = new GEV(locZero, scaleZero, 0, Program.rand); * fitZero = Fitness(zeroModel); * // Choose the best model of the three * double minScore = Math.Min(fitNeg, Math.Min(fitPos, fitZero)); * GEV bestModel = null; * if (fitNeg == minScore) bestModel = bestNeg; * if (fitPos == minScore) bestModel = bestPos; * if (fitZero == minScore) bestModel = zeroModel; // Prefer zero, then pos * * Console.WriteLine($"Best Negative model: shape: {bestNeg.shape} scale: {bestNeg.scale} location: {bestNeg.location} fitness: {fitNeg}"); * Console.WriteLine($"Best Positive model: shape: {bestPos.shape} scale: {bestPos.scale} location: {bestPos.location} fitness: {fitPos}"); * Console.WriteLine($"Zero model: shape: {zeroModel.shape} scale: {zeroModel.scale} location: {zeroModel.location} fitness: {fitZero}"); */ #endregion double scaleGuess = Math.Sqrt(6 * Statistics.VarianceEstimate(observations)) / Math.PI; double locationGuess = Statistics.Median(observations) + scaleGuess * Math.Log(Math.Log(2)); double shapeGuess = 0.5; // Use Pickands estimator here in the actual model Func <Vector <double>, double> objectiveFunction = x => FitnessExactModel(new GEV(x[2], x[1], x[0], Program.rand)); GEV bestModelMonteCarlo = OptimizeBFGS(objectiveFunction, shapeGuess, scaleGuess, locationGuess); output.WriteLine($"MC Exact GEV Model: shape{bestModelMonteCarlo.shape} location{bestModelMonteCarlo.location} scale {bestModelMonteCarlo.scale}"); double[] sample = new double[sampleSize]; dist.Samples(sample); // Take a sample from dist Sorting.Sort(sample); // Report the sample min and max output.WriteLine($"Sample maximum: {sample[sample.Length - 1]}"); //var sorter = new List<double>(sample); //sorter.Sort(); //sample = sorter.ToArray(); // Smoothed version //double[] smoothedData = new double[sample.Length - 1]; //for (int i = 0; i < smoothedData.Length; i++) { smoothedData[i] = 0.5 * (sample[i] + sample[i + 1]); } //var pickandsApprox = new PickandsApproximation(smoothedData, method: PickandsApproximation.FittingMethod.Pickands_SupNorm); // Construct a Pickands tail approx from the sample var pickandsApprox = new GPDApproximation(sample, method: GPDApproximation.FittingMethod.V4); // Construct a Pickands tail approx from the sample // Bootstrap observations of the distribution of the sample maximum from the Pickands model double[] approxObservations = new double[observations.Length]; for (int i = 0; i < approxObservations.Length; i++) { double max = double.NegativeInfinity; for (int j = 0; j < sampleSize; j++) { max = Math.Max(max, pickandsApprox.Sample()); } approxObservations[i] = max; } ContinuousDistribution approxECDF = ContinuousDistribution.ECDF(approxObservations); // ECDF of the bootstrapped observations //scaleGuess = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI; //locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2)); // Guess location and scale shapeGuess = pickandsApprox.c; if (shapeGuess < 0) { double g1 = SpecialFunctions.Gamma(1 - shapeGuess); double g2 = SpecialFunctions.Gamma(1 - 2 * shapeGuess); scaleGuess = Math.Sqrt(Statistics.Variance(approxObservations) * shapeGuess * shapeGuess / (g2 - g1 * g1)); locationGuess = Statistics.Mean(approxObservations) - scaleGuess * (g1 - 1) / shapeGuess; } else { scaleGuess = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI; locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2)); } GEV estimatedGEVUnfitted = new GEV(location: locationGuess, scale: scaleGuess, shape: pickandsApprox.c); // Using the Pickands estimator for shape output.WriteLine($"UnfittedGEVModel: shape{estimatedGEVUnfitted.shape} location{estimatedGEVUnfitted.location} scale {estimatedGEVUnfitted.scale}"); // Fit the model to the data drawn from the Pickands model double FitnessApproxModel(GEV model) { double val = 0; for (int i = 0; i < approxObservations.Length; i++) { val += Math.Pow(model.CumulativeDistribution(approxObservations[i]) - approxECDF.CumulativeDensity(approxObservations[i]), 2); } return(val); } objectiveFunction = x => FitnessApproxModel(new GEV(x[2], x[1], x[0], Program.rand)); GEV fittedApproxModel = OptimizeBFGS(objectiveFunction, pickandsApprox.c, scaleGuess, locationGuess); output.WriteLine($"FittedGEVModel: shape{fittedApproxModel.shape} location{fittedApproxModel.location} scale {fittedApproxModel.scale}"); double[] proportions = Interpolation.Linspace(0.000001, 0.999999, 2000); double[] observationQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000); for (int i = 0; i < observationQuantiles.Length; i++) { observationQuantiles[i] = Statistics.Quantile(observations, observationQuantiles[i]); } output.WriteLine("Abscissas,Monte Carlo ECDF,GEV Fit of MC ECDF,Estimated ECDF,Estimated GEV Unfitted,Estimated GEV Fitted,,ErrDistExactAbscissas,ErrDistExactValues,ErrDistModelAbscissas,ErrDistModelValues,ErrDistUnfittedAbscissas,ErrDistUnfittedValues"); for (int i = 0; i < observationQuantiles.Length; i++) { output.WriteLine($"{observationQuantiles[i]}," + $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," + $"{bestModelMonteCarlo.CumulativeDistribution(observationQuantiles[i])}," + $"{approxECDF.CumulativeDensity(observationQuantiles[i])}," + $"{estimatedGEVUnfitted.CumulativeDistribution(observationQuantiles[i])}," + $"{fittedApproxModel.CumulativeDistribution(observationQuantiles[i])}," + $"," + // Space $"{observationQuantiles[i] - upperQuantile}," + $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," + //$"{quantiles[i] - sample[sample.Length - 1]}," + $"{estimatedGEVUnfitted.InverseCumulativeDistribution(proportions[i]) - estimatedGEVUnfitted.location}," + $"{proportions[i]}," + $"{fittedApproxModel.InverseCumulativeDistribution(proportions[i]) - fittedApproxModel.location}," + $"{proportions[i]}"); } double[] distributionQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000); for (int i = 0; i < distributionQuantiles.Length; i++) { distributionQuantiles[i] = dist.InverseCumulativeDistribution(distributionQuantiles[i]); } output2.WriteLine("Abscissas,True CDF,Pickands Estimate"); for (int i = 0; i < distributionQuantiles.Length; i++) { output2.WriteLine($"{distributionQuantiles[i]}," + $"{dist.CumulativeDistribution(distributionQuantiles[i])}," + $"{pickandsApprox.CDF(distributionQuantiles[i])}"); } #region Temp for figure output2.WriteLine(""); output2.WriteLine("TrueDist"); output2.WriteLine("\\draw[line width=1.5pt]"); for (int i = 0; i < distributionQuantiles.Length - 1; i++) { output2.WriteLine($"({distributionQuantiles[i]},{dist.CumulativeDistribution(distributionQuantiles[i])}) --"); } output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{dist.CumulativeDistribution(distributionQuantiles[distributionQuantiles.Length - 1])});"); output2.WriteLine(""); output2.WriteLine("Approx"); output2.WriteLine("\\draw[line width=1.5pt]"); for (int i = 0; i < distributionQuantiles.Length; i++) { output2.WriteLine($"({distributionQuantiles[i]},{pickandsApprox.CDF(distributionQuantiles[i])}) --"); } output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{pickandsApprox.CDF(distributionQuantiles[distributionQuantiles.Length - 1])});"); #endregion // Clean up output.Dispose(); output2.Dispose(); //table.Dispose(); }
public static ParameterDistribution OneOverNthQuantileViaSampleMinimumParameterDistribution(double[] data, double[] monteCarloStorage, Random rand = null) { if (rand == null) { rand = Program.rand; } // Start by computing a tail estimate. The PBDH theorem says this should be GPD shaped. // We are using a small amount of smoothing on the ECDF as well here var GPDECDFApprox = new GPDApproximation(data, GPDApproximation.FittingMethod.V4, rand); // Make observations of the max under this model for (int i = 0; i < monteCarloStorage.Length; i++) { double max = double.NegativeInfinity; for (int j = 0; j < data.Length; j++) // Same number of observations as original sample { max = Math.Max(max, GPDECDFApprox.Sample()); } monteCarloStorage[i] = max; } Sorting.Sort(monteCarloStorage); // --- Optimize to find the best-fit GEV model for these observations --- // Note: Optimization is no longer required here, so these methods are not used #region Helper Methods (Deprecated) /* * double FitnessSquaredError(GEV model) * { * double val = 0; * for (int i = 0; i < monteCarloStorage.Length; i++) * { * double deviation = model.CumulativeDistribution(monteCarloStorage[i]) - (i + 1) * 1.0 / monteCarloStorage.Length; * val += deviation * deviation; * } * return val; * } * * GEV OptimizeBFGS(Func<Vector<double>, double> objectiveFunc, double initialShape, double initialScale, double initialLocation) * { * // Formatted by shape, scale, location * var lowerBounds = CreateVector.DenseOfArray(new double[] { Math.Min(-5, 3 * initialShape), initialScale / 3.0, initialLocation - 5 * initialScale }); * var upperBounds = CreateVector.DenseOfArray(new double[] { Math.Min(3, 3 * Math.Abs(initialShape)), initialScale * 3.0, initialLocation + 5 * initialScale }); * var initialGuess = CreateVector.DenseOfArray(new double[] { initialShape, initialScale, initialLocation }); * * var min = FindMinimum.OfFunctionConstrained(objectiveFunc, lowerBounds, upperBounds, initialGuess, 1E-05, 1E-03, 0.01, 10000); * * //var result = new BfgsBMinimizer(1E-02, 1E-02, 1E-01, 500).FindMinimum(ObjectiveFunction.Value(objectiveFunc), lowerBounds, upperBounds, initialGuess); * //var min = result.MinimizingPoint; * * return new GEV(min[2], min[1], min[0], rand); * } */ #endregion #region Old Code: Moment Estimator and Optimization /* * // Initial guesses * double shapeGuess = Math.Max(-5, Math.Min(GPDECDFApprox.c, 3)); // Shape in PBD is also an estimate of the shape of the GEV * double locationGuess, scaleGuess; * if (shapeGuess < 0) * { * double g1 = SpecialFunctions.Gamma(1 - shapeGuess); * double g2 = SpecialFunctions.Gamma(1 - 2 * shapeGuess); * scaleGuess = Math.Sqrt(Statistics.Variance(bootstrapStorage) * shapeGuess * shapeGuess / (g2 - g1 * g1)); * locationGuess = Statistics.Mean(bootstrapStorage) - scaleGuess * (g1 - 1) / shapeGuess; * } * else * { * scaleGuess = Math.Sqrt(6 * Statistics.Variance(bootstrapStorage)) / Math.PI; * locationGuess = Statistics.Median(bootstrapStorage) + scaleGuess * Math.Log(Math.Log(2)); * } * #if DEBUG * if (scaleGuess <= 0 || double.IsNaN(scaleGuess)) throw new Exception("Scale must be > 0."); #endif */ // Testing /* * Program.logger.WriteLine($"Guesses: shape {shapeGuess} scale {scaleGuess} loc {locationGuess}"); * Program.logger.WriteLine("Data to fit"); * for (int i = 0; i < bootstrapStorage.Length; i++) * { * Program.logger.WriteLine($"{bootstrapStorage[i]}, {(i + 1.0) / bootstrapStorage.Length}"); * } */ // Optimize for the best fit //double ObjFunction(Vector<double> x) => FitnessSquaredError(new GEV(x[2], x[1], x[0], rand)); //GEV fittedApproxModel = OptimizeBFGS(ObjFunction, shapeGuess, scaleGuess, locationGuess); //return new GEV(data[data.Length - 1], fittedApproxModel.scale, fittedApproxModel.shape, rand); // Skip opt //return new GEV(data[data.Length - 1], scaleGuess, shapeGuess, rand); // Needs the data to be sorted ahead of time //return new GEV(locationGuess, scaleGuess, shapeGuess, rand); // Try this with the sample max instead of the locGuess #endregion // Compute the parameters of the GEV distribution of the observed maxima // These two are pretty much exact with mild assumptions double mu = GPDECDFApprox.Quantile((data.Length - 1) * 1.0 / data.Length); double xi = Math.Max(-5, Math.Min(GPDECDFApprox.c, 3)); // Sigma is computed from the observations of the max GEV gevApprox = GEVApprox.ViaMLE(monteCarloStorage, mu, xi); //GEV errorDist = new GEV(mu - gevApprox.Median, gevApprox.scale, gevApprox.shape); // Median version var errorDist = new GEV(0, gevApprox.scale, gevApprox.shape); // Location is always 0 here // Compute the sample max double sampleMax = double.NegativeInfinity; for (int i = 0; i < data.Length; i++) { sampleMax = Math.Max(sampleMax, data[i]); } return(new ParameterDistribution(errorDist, sampleMax, errorDist.InverseCumulativeDistribution(Math.Pow(2, -26)), errorDist.InverseCumulativeDistribution(1 - Math.Pow(2, -26)))); // Sqrt epsilon quantile bounds }