public static void TestPickands() { Logger output = new Logger("Pickands Comparison.csv"); // Generate some data ChiSquared chisq = new ChiSquared(4, Program.rand); double[] raw = new double[1000]; chisq.Samples(raw); List <double> data = new List <double>(raw); data.Sort(); // Apply the new and old Pickands code and write them to the output for comparison ContinuousDistribution oldVersion = GPDApproximation.ApproximatePiecewiseDistributionWithUpperTail(data, 1000); GPDApproximation newVersion = new GPDApproximation(data); raw = new double[10000]; // Generate new data from the approximation newVersion.Samples(raw); var resample = ContinuousDistribution.ECDF(raw, Program.rand); // Make the table and write it to a file DataTable table = new DataTable("Comparison of Pickands Implementations"); table.Columns.Add("Abscissas", typeof(double)); table.Columns.Add("OldCDF", typeof(double)); table.Columns.Add("NewCDF", typeof(double)); table.Columns.Add("ResampledECDF", typeof(double)); table.Columns.Add("TrueCDF", typeof(double)); for (int i = 0; i < oldVersion.abscissas.Count; i++) { DataRow row = table.NewRow(); row["Abscissas"] = oldVersion.abscissas[i]; row["OldCDF"] = oldVersion.cumulativeDensities[i]; row["NewCDF"] = newVersion.CDF(oldVersion.abscissas[i]); row["ResampledECDF"] = resample.CumulativeDensity(oldVersion.abscissas[i]); row["TrueCDF"] = chisq.CumulativeDistribution(oldVersion.abscissas[i]); table.Rows.Add(row); } output.WriteTable(table); // Clean up output.Dispose(); table.Dispose(); /* * Console.WriteLine($"CDF(1):{newVersion.CDF(1)} QF(that): {newVersion.Quantile(newVersion.CDF(1))}"); * Console.WriteLine($"CDF(2):{newVersion.CDF(2)} QF(that): {newVersion.Quantile(newVersion.CDF(2))}"); * Console.WriteLine($"CDF(3):{newVersion.CDF(3)} QF(that): {newVersion.Quantile(newVersion.CDF(3))}"); * Console.WriteLine($"CDF(4):{newVersion.CDF(4)} QF(that): {newVersion.Quantile(newVersion.CDF(4))}"); * Console.WriteLine($"CDF(11):{newVersion.CDF(11)} QF(that): {newVersion.Quantile(newVersion.CDF(11))}"); * Console.WriteLine($"CDF(13):{newVersion.CDF(13)} QF(that): {newVersion.Quantile(newVersion.CDF(13))}"); * Console.WriteLine($"CDF(15):{newVersion.CDF(15)} QF(that): {newVersion.Quantile(newVersion.CDF(15))}"); * double cdfval = PickandsBalkemaDeHaan.TailCDF(0.4, newVersion.a, newVersion.c); * Console.WriteLine($"TailCDF(0.4):{cdfval} QF(that): {PickandsBalkemaDeHaan.TailQuantileFunction(cdfval, newVersion.a, newVersion.c)}"); */ }
public static void TestGEV() // Scratchwork, prototyping, etc. { Logger output = new Logger("GEV Test A.csv"); Logger output2 = new Logger("GEV Test B.csv"); //var dist = new ChiSquared(4, Program.rand); var dist = new Beta(2, 2); //var dist = new Beta(2, 5); //var dist = new Beta(2, 1.5); output.WriteLine($"Distribution: {dist.ToString().Replace(',',' ')}"); //var dist = new Exponential(2, Program.rand); //var dist = new Gamma(2, 2, Program.rand); const int sampleSize = 300; output.WriteLine($"Samplesize: {sampleSize}"); // Report the distribution 1-1/e quantile double upperQuantile = dist.InverseCumulativeDistribution(1 - 1.0 / sampleSize); double lowerQuantile = dist.InverseCumulativeDistribution(1.0 / sampleSize); output.WriteLine($"1-1/samplesize quantile: {upperQuantile}"); output.WriteLine($"1/samplesize quantile: {lowerQuantile}"); // Monte Carlo for the true distribution of the sample maximum double[] observations = new double[10000]; for (int i = 0; i < observations.Length; i++) { double max = double.NegativeInfinity; for (int j = 0; j < sampleSize; j++) { max = Math.Max(max, dist.Sample()); } observations[i] = max; } Sorting.Sort(observations); ContinuousDistribution MonteCarloDistributionOfTheMaximum = ContinuousDistribution.ECDF(observations, Program.rand); // --- Find the best fit GEV distribution for this dataset --- #region Old code /* * // Compute location and scale parameter estimates for a given shape parameter Xi using the median and variance * void EstimateParameters(double shape, double median, double variance, out double location, out double scale) * { * if (shape == 0) * { * scale = Math.Sqrt(6 * variance) / Math.PI; * location = median + scale * Math.Log(Math.Log(2)); * return; * } * // This scale may or may not work for Xi > 0.5 * scale = Math.Sign(shape) * shape * Math.Sqrt(variance) / Math.Sqrt(SpecialFunctions.Gamma(1 - 2 * shape) - SpecialFunctions.Gamma(1 - shape) * SpecialFunctions.Gamma(1 - shape)); * if (double.IsNaN(scale)) scale = Math.Sqrt(6 * variance) / Math.PI; * location = median - scale * (Math.Pow(Math.Log(2), -shape) - 1) / shape; * }*/ #endregion double FitnessExactModel(GEV model) { double val = 0; for (int i = 0; i < observations.Length; i++) { val += Math.Pow(model.CumulativeDistribution(observations[i]) - MonteCarloDistributionOfTheMaximum.CumulativeDensity(observations[i]), 2); } return(val); } #region Old code //double medianEst = Statistics.Median(observations); //double varianceEst = Statistics.VarianceEstimate(observations); /* * GEV Optimize(double startingval, out double fitness) * { * double locationEst; * double scaleEst; * double bestScore = double.PositiveInfinity; * GEV bestSoFar = null; * bool increasing = false; * int sinceImproved = 0; * double shapeEst = startingval; // Neg or pos will stay that way throughout the optimization * * while (true) * { * EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst); * GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand); * double score = FitnessExactModel(model); * if (score < bestScore) * { * bestScore = score; * bestSoFar = model; * sinceImproved = 0; * } * else * { * increasing ^= true; * if (++sinceImproved > 10) break; * } * if (increasing) shapeEst += 0.3 * startingval; * else shapeEst *= 0.5; * } * fitness = bestScore; * return bestSoFar; * } * * GEV OptimizeV2(double initialGuess, out double fitness) * { * double locationEst, scaleEst; * double bestScore = double.PositiveInfinity; * GEV bestSoFar = null; * double shapeEst = initialGuess; * double bestShapeSoFar = initialGuess; * // Grow the estimate by doubling until it is no longer improving * while (true) * { * EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst); * GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand); * double score = FitnessExactModel(model); * if (score < bestScore) // If it improved * { * bestScore = score; * bestSoFar = model; * bestShapeSoFar = shapeEst; * } * else break; * shapeEst *= 2; * } * double magnitude = bestShapeSoFar; * for (int i = 0; i < 10; i++) // 10 corresponds to 3 correct digits * { * double delta = magnitude * Math.Pow(2, -(i + 1)); // Half in size for each iteration * * // Three positions: the current one, one lower by delta, and one higher by delta * * // Lower Model * EstimateParameters(bestShapeSoFar - delta, medianEst, varianceEst, out locationEst, out scaleEst); * GEV lowerModel = new GEV(locationEst, scaleEst, bestShapeSoFar - delta, Program.rand); * double lowerScore = FitnessExactModel(lowerModel); * * // Upper Model * EstimateParameters(bestShapeSoFar + delta, medianEst, varianceEst, out locationEst, out scaleEst); * GEV upperModel = new GEV(locationEst, scaleEst, bestShapeSoFar + delta, Program.rand); * double upperScore = FitnessExactModel(upperModel); * * // Move to the best of the three * double bestfitness = Math.Min(bestScore, Math.Min(upperScore, lowerScore)); * bestScore = bestfitness; * if (lowerScore == bestfitness) * { * bestShapeSoFar = bestShapeSoFar - delta; * bestSoFar = lowerModel; * } * else if (upperScore == bestfitness) * { * bestShapeSoFar = bestShapeSoFar + delta; * bestSoFar = upperModel; * } * } * fitness = bestScore; * return bestSoFar; * } */ #endregion GEV OptimizeBFGS(Func <Vector <double>, double> objectiveFunc, double initialShape, double initialScale, double initialLocation) { // Formatted by shape, scale, location var lowerBounds = CreateVector.DenseOfArray(new double[] { -10, Math.Min(-3 * initialScale, 3 * initialScale), Math.Min(-3 * initialLocation, 3 * initialLocation) }); var upperBounds = CreateVector.DenseOfArray(new double[] { 10, Math.Max(-3 * initialScale, 3 * initialScale), Math.Max(-3 * initialLocation, 3 * initialLocation) }); var initialGuess = CreateVector.DenseOfArray(new double[] { initialShape, initialScale, initialLocation }); var min = FindMinimum.OfFunctionConstrained(objectiveFunc, lowerBounds, upperBounds, initialGuess); return(new GEV(min[2], min[1], min[0], Program.rand)); } #region Old code // Optimize for Xi /*double fitNeg, fitZero, fitPos; * GEV bestNeg = OptimizeV2(-1, out fitNeg); * GEV bestPos = OptimizeV2(1, out fitPos); * double locZero, scaleZero; * EstimateParameters(0, medianEst, varianceEst, out locZero, out scaleZero); * GEV zeroModel = new GEV(locZero, scaleZero, 0, Program.rand); * fitZero = Fitness(zeroModel); * // Choose the best model of the three * double minScore = Math.Min(fitNeg, Math.Min(fitPos, fitZero)); * GEV bestModel = null; * if (fitNeg == minScore) bestModel = bestNeg; * if (fitPos == minScore) bestModel = bestPos; * if (fitZero == minScore) bestModel = zeroModel; // Prefer zero, then pos * * Console.WriteLine($"Best Negative model: shape: {bestNeg.shape} scale: {bestNeg.scale} location: {bestNeg.location} fitness: {fitNeg}"); * Console.WriteLine($"Best Positive model: shape: {bestPos.shape} scale: {bestPos.scale} location: {bestPos.location} fitness: {fitPos}"); * Console.WriteLine($"Zero model: shape: {zeroModel.shape} scale: {zeroModel.scale} location: {zeroModel.location} fitness: {fitZero}"); */ #endregion double scaleGuess = Math.Sqrt(6 * Statistics.VarianceEstimate(observations)) / Math.PI; double locationGuess = Statistics.Median(observations) + scaleGuess * Math.Log(Math.Log(2)); double shapeGuess = 0.5; // Use Pickands estimator here in the actual model Func <Vector <double>, double> objectiveFunction = x => FitnessExactModel(new GEV(x[2], x[1], x[0], Program.rand)); GEV bestModelMonteCarlo = OptimizeBFGS(objectiveFunction, shapeGuess, scaleGuess, locationGuess); output.WriteLine($"MC Exact GEV Model: shape{bestModelMonteCarlo.shape} location{bestModelMonteCarlo.location} scale {bestModelMonteCarlo.scale}"); double[] sample = new double[sampleSize]; dist.Samples(sample); // Take a sample from dist Sorting.Sort(sample); // Report the sample min and max output.WriteLine($"Sample maximum: {sample[sample.Length - 1]}"); //var sorter = new List<double>(sample); //sorter.Sort(); //sample = sorter.ToArray(); // Smoothed version //double[] smoothedData = new double[sample.Length - 1]; //for (int i = 0; i < smoothedData.Length; i++) { smoothedData[i] = 0.5 * (sample[i] + sample[i + 1]); } //var pickandsApprox = new PickandsApproximation(smoothedData, method: PickandsApproximation.FittingMethod.Pickands_SupNorm); // Construct a Pickands tail approx from the sample var pickandsApprox = new GPDApproximation(sample, method: GPDApproximation.FittingMethod.V4); // Construct a Pickands tail approx from the sample // Bootstrap observations of the distribution of the sample maximum from the Pickands model double[] approxObservations = new double[observations.Length]; for (int i = 0; i < approxObservations.Length; i++) { double max = double.NegativeInfinity; for (int j = 0; j < sampleSize; j++) { max = Math.Max(max, pickandsApprox.Sample()); } approxObservations[i] = max; } ContinuousDistribution approxECDF = ContinuousDistribution.ECDF(approxObservations); // ECDF of the bootstrapped observations //scaleGuess = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI; //locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2)); // Guess location and scale shapeGuess = pickandsApprox.c; if (shapeGuess < 0) { double g1 = SpecialFunctions.Gamma(1 - shapeGuess); double g2 = SpecialFunctions.Gamma(1 - 2 * shapeGuess); scaleGuess = Math.Sqrt(Statistics.Variance(approxObservations) * shapeGuess * shapeGuess / (g2 - g1 * g1)); locationGuess = Statistics.Mean(approxObservations) - scaleGuess * (g1 - 1) / shapeGuess; } else { scaleGuess = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI; locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2)); } GEV estimatedGEVUnfitted = new GEV(location: locationGuess, scale: scaleGuess, shape: pickandsApprox.c); // Using the Pickands estimator for shape output.WriteLine($"UnfittedGEVModel: shape{estimatedGEVUnfitted.shape} location{estimatedGEVUnfitted.location} scale {estimatedGEVUnfitted.scale}"); // Fit the model to the data drawn from the Pickands model double FitnessApproxModel(GEV model) { double val = 0; for (int i = 0; i < approxObservations.Length; i++) { val += Math.Pow(model.CumulativeDistribution(approxObservations[i]) - approxECDF.CumulativeDensity(approxObservations[i]), 2); } return(val); } objectiveFunction = x => FitnessApproxModel(new GEV(x[2], x[1], x[0], Program.rand)); GEV fittedApproxModel = OptimizeBFGS(objectiveFunction, pickandsApprox.c, scaleGuess, locationGuess); output.WriteLine($"FittedGEVModel: shape{fittedApproxModel.shape} location{fittedApproxModel.location} scale {fittedApproxModel.scale}"); double[] proportions = Interpolation.Linspace(0.000001, 0.999999, 2000); double[] observationQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000); for (int i = 0; i < observationQuantiles.Length; i++) { observationQuantiles[i] = Statistics.Quantile(observations, observationQuantiles[i]); } output.WriteLine("Abscissas,Monte Carlo ECDF,GEV Fit of MC ECDF,Estimated ECDF,Estimated GEV Unfitted,Estimated GEV Fitted,,ErrDistExactAbscissas,ErrDistExactValues,ErrDistModelAbscissas,ErrDistModelValues,ErrDistUnfittedAbscissas,ErrDistUnfittedValues"); for (int i = 0; i < observationQuantiles.Length; i++) { output.WriteLine($"{observationQuantiles[i]}," + $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," + $"{bestModelMonteCarlo.CumulativeDistribution(observationQuantiles[i])}," + $"{approxECDF.CumulativeDensity(observationQuantiles[i])}," + $"{estimatedGEVUnfitted.CumulativeDistribution(observationQuantiles[i])}," + $"{fittedApproxModel.CumulativeDistribution(observationQuantiles[i])}," + $"," + // Space $"{observationQuantiles[i] - upperQuantile}," + $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," + //$"{quantiles[i] - sample[sample.Length - 1]}," + $"{estimatedGEVUnfitted.InverseCumulativeDistribution(proportions[i]) - estimatedGEVUnfitted.location}," + $"{proportions[i]}," + $"{fittedApproxModel.InverseCumulativeDistribution(proportions[i]) - fittedApproxModel.location}," + $"{proportions[i]}"); } double[] distributionQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000); for (int i = 0; i < distributionQuantiles.Length; i++) { distributionQuantiles[i] = dist.InverseCumulativeDistribution(distributionQuantiles[i]); } output2.WriteLine("Abscissas,True CDF,Pickands Estimate"); for (int i = 0; i < distributionQuantiles.Length; i++) { output2.WriteLine($"{distributionQuantiles[i]}," + $"{dist.CumulativeDistribution(distributionQuantiles[i])}," + $"{pickandsApprox.CDF(distributionQuantiles[i])}"); } #region Temp for figure output2.WriteLine(""); output2.WriteLine("TrueDist"); output2.WriteLine("\\draw[line width=1.5pt]"); for (int i = 0; i < distributionQuantiles.Length - 1; i++) { output2.WriteLine($"({distributionQuantiles[i]},{dist.CumulativeDistribution(distributionQuantiles[i])}) --"); } output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{dist.CumulativeDistribution(distributionQuantiles[distributionQuantiles.Length - 1])});"); output2.WriteLine(""); output2.WriteLine("Approx"); output2.WriteLine("\\draw[line width=1.5pt]"); for (int i = 0; i < distributionQuantiles.Length; i++) { output2.WriteLine($"({distributionQuantiles[i]},{pickandsApprox.CDF(distributionQuantiles[i])}) --"); } output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{pickandsApprox.CDF(distributionQuantiles[distributionQuantiles.Length - 1])});"); #endregion // Clean up output.Dispose(); output2.Dispose(); //table.Dispose(); }