Пример #1
0
        /// <summary> Constructs an alternative ContinuousDistribution version of the approximation with a piecewise-linear ECDF and an upper tail generated using Pickands' algorithm.  </summary>
        /// <param name="data"> An indexed set (array, list, etc.) of observations from a random variable, sorted in increasing order. </param>
        /// <remarks> This is wonderful for testing, but relatively expensive computation and storage-wise. This also uses the right-continuous rather than left-continuous version of the ECDF, though it hardly matters.</remarks>
        public static ContinuousDistribution ApproximatePiecewiseDistributionWithUpperTail(IList <double> data, int resolution = 1000)
        {
            // --- Construct the linear ECDF ---
            // Copy the abscissas from the sample
            List <double> abscissas = new List <double>(data.Count + resolution);

            abscissas.AddRange(data);
            // Evaluate the ECDF
            List <double> cdfVals = new List <double>(data.Count + resolution);

            for (int i = 0; i < data.Count; i++)
            {
                cdfVals.Add(i * 1.0 / data.Count);
            }

            // --- Attach the tail ---
            // Estimate the tail parameters
            ApproximateExcessDistributionParametersPickands(data, out double a, out double c, out int m);

            // Remove the last 4m-1 CDF values so we can replace them with the tail
            abscissas.RemoveRange(abscissas.Count - 4 * m + 1, 4 * m - 1);
            cdfVals.RemoveRange(cdfVals.Count - 4 * m + 1, 4 * m - 1);
            // The last element of the CDF approximation is now Z_4M

            // Generate tail values evenly spaced over the quantiles of the conditional excess distribution function 1-G(x)
            var quantiles = new List <double>(Interpolation.Linspace(0, 1, resolution + 1));

            // Remove the first point, since we already have a point in the CDF at u and the conditional excess will always be 0 there
            quantiles.RemoveAt(0);
            // If the tail is unbounded, replace the quantile at 1 with 1 - 4 * epsilon
            //if (c >= 0) { quantiles.RemoveAt(quantiles.Count - 1); }
            if (c >= 0)
            {
                quantiles[quantiles.Count - 1] = 1 - Math.Pow(2, -50);
            }
            // Replace the proportions with their associated abscissas (eg, the actual quantiles)
            double Z4M = abscissas[abscissas.Count - 1]; // This is where the tail is to be attached

            for (int i = 0; i < quantiles.Count; i++)
            {
                quantiles[i] = TailQuantileFunction(quantiles[i], a, c);
            }
            // Add the CDF values first, then translate and add the abscissas
            double offset = cdfVals[cdfVals.Count - 1]; // Vertical offset
            double scale  = 1 - offset;                 // How much of the full unit probability is left for the tail

            for (int i = 0; i < quantiles.Count; i++)
            {
                cdfVals.Add(scale * TailCDF(quantiles[i], a, c) + offset);
                quantiles[i] += Z4M;
            }
            abscissas.AddRange(quantiles);

            return(new ContinuousDistribution(abscissas, cdfVals));
        }
Пример #2
0
        public static void TestGEVLocation()
        {
            //var dist = new Normal(0, 1, Program.rand);
            var dist = new Exponential(2, Program.rand);

            double estimateLoc(int n) => Math.Sqrt(2 * Math.Log(n) - Math.Log(Math.Log(n)) - Math.Log(4 * Math.PI));

            for (int n = 150; n < 500; n += 10)
            {
                double proportion = (n - 0.78) / n;
                double quant      = dist.InverseCumulativeDistribution(proportion);
                double estimate   = estimateLoc(n);
                //Console.WriteLine($"Quantile: {quant} Est: {estimate} Error: {Math.Abs(estimate - quant)}");

                var props  = Interpolation.Linspace(0.000000001, 0.999999999, 10000);
                var quants = new double[10000];
                for (int i = 0; i < quants.Length; i++)
                {
                    quants[i] = dist.InverseCumulativeDistribution(props[i]);
                }
                GPDApproximation.ApproximateExcessDistributionParametersV4(quants, out double a, out double c, out double u);
                var tailApprox = new GPDApproximation(quants, GPDApproximation.FittingMethod.V4);
                var sample     = new double[10000];
                for (int i = 0; i < 10000; i++)
                {
                    double max = 0;
                    for (int j = 0; j < n; j++)
                    {
                        max = Math.Max(max, tailApprox.Sample());
                    }
                    sample[i] = max;
                }
                double shapeGuess    = tailApprox.c;
                double g1            = SpecialFunctions.Gamma(1 - shapeGuess);
                double g2            = SpecialFunctions.Gamma(1 - 2 * shapeGuess);
                double scaleGuess    = Math.Sqrt(Statistics.Variance(sample) * shapeGuess * shapeGuess / (g2 - g1 * g1));
                double locationGuess = Statistics.Mean(sample) - scaleGuess * (g1 - 1) / shapeGuess;
                Console.WriteLine($"Quantile: {quant} IntroEst: {estimate} Error: {Math.Abs(estimate - quant)} Bootstrap:{locationGuess} Shape {shapeGuess}");
            }
        }
Пример #3
0
        public static void TestGEV() // Scratchwork, prototyping, etc.
        {
            Logger output  = new Logger("GEV Test A.csv");
            Logger output2 = new Logger("GEV Test B.csv");
            //var dist = new ChiSquared(4, Program.rand);
            var dist = new Beta(2, 2);

            //var dist = new Beta(2, 5);
            //var dist = new Beta(2, 1.5);
            output.WriteLine($"Distribution: {dist.ToString().Replace(',',' ')}");
            //var dist = new Exponential(2, Program.rand);
            //var dist = new Gamma(2, 2, Program.rand);
            const int sampleSize = 300;

            output.WriteLine($"Samplesize: {sampleSize}");

            // Report the distribution 1-1/e quantile
            double upperQuantile = dist.InverseCumulativeDistribution(1 - 1.0 / sampleSize);
            double lowerQuantile = dist.InverseCumulativeDistribution(1.0 / sampleSize);

            output.WriteLine($"1-1/samplesize quantile: {upperQuantile}");
            output.WriteLine($"1/samplesize quantile: {lowerQuantile}");

            // Monte Carlo for the true distribution of the sample maximum
            double[] observations = new double[10000];

            for (int i = 0; i < observations.Length; i++)
            {
                double max = double.NegativeInfinity;
                for (int j = 0; j < sampleSize; j++)
                {
                    max = Math.Max(max, dist.Sample());
                }
                observations[i] = max;
            }
            Sorting.Sort(observations);

            ContinuousDistribution MonteCarloDistributionOfTheMaximum = ContinuousDistribution.ECDF(observations, Program.rand);

            // --- Find the best fit GEV distribution for this dataset ---

            #region Old code

            /*
             * // Compute location and scale parameter estimates for a given shape parameter Xi using the median and variance
             * void EstimateParameters(double shape, double median, double variance, out double location, out double scale)
             * {
             *  if (shape == 0)
             *  {
             *      scale = Math.Sqrt(6 * variance) / Math.PI;
             *      location = median + scale * Math.Log(Math.Log(2));
             *      return;
             *  }
             *  // This scale may or may not work for Xi > 0.5
             *  scale = Math.Sign(shape) * shape * Math.Sqrt(variance) / Math.Sqrt(SpecialFunctions.Gamma(1 - 2 * shape) - SpecialFunctions.Gamma(1 - shape) * SpecialFunctions.Gamma(1 - shape));
             *  if (double.IsNaN(scale)) scale = Math.Sqrt(6 * variance) / Math.PI;
             *  location = median - scale * (Math.Pow(Math.Log(2), -shape) - 1) / shape;
             * }*/
            #endregion

            double FitnessExactModel(GEV model)
            {
                double val = 0;

                for (int i = 0; i < observations.Length; i++)
                {
                    val += Math.Pow(model.CumulativeDistribution(observations[i]) - MonteCarloDistributionOfTheMaximum.CumulativeDensity(observations[i]), 2);
                }
                return(val);
            }

            #region Old code
            //double medianEst = Statistics.Median(observations);
            //double varianceEst = Statistics.VarianceEstimate(observations);

            /*
             * GEV Optimize(double startingval, out double fitness)
             * {
             *  double locationEst;
             *  double scaleEst;
             *  double bestScore = double.PositiveInfinity;
             *  GEV bestSoFar = null;
             *  bool increasing = false;
             *  int sinceImproved = 0;
             *  double shapeEst = startingval; // Neg or pos will stay that way throughout the optimization
             *
             *  while (true)
             *  {
             *      EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst);
             *      GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand);
             *      double score = FitnessExactModel(model);
             *      if (score < bestScore)
             *      {
             *          bestScore = score;
             *          bestSoFar = model;
             *          sinceImproved = 0;
             *      }
             *      else
             *      {
             *          increasing ^= true;
             *          if (++sinceImproved > 10) break;
             *      }
             *      if (increasing) shapeEst += 0.3 * startingval;
             *      else shapeEst *= 0.5;
             *  }
             *  fitness = bestScore;
             *  return bestSoFar;
             * }
             *
             * GEV OptimizeV2(double initialGuess, out double fitness)
             * {
             *  double locationEst, scaleEst;
             *  double bestScore = double.PositiveInfinity;
             *  GEV bestSoFar = null;
             *  double shapeEst = initialGuess;
             *  double bestShapeSoFar = initialGuess;
             *  // Grow the estimate by doubling until it is no longer improving
             *  while (true)
             *  {
             *      EstimateParameters(shapeEst, medianEst, varianceEst, out locationEst, out scaleEst);
             *      GEV model = new GEV(locationEst, scaleEst, shapeEst, Program.rand);
             *      double score = FitnessExactModel(model);
             *      if (score < bestScore) // If it improved
             *      {
             *          bestScore = score;
             *          bestSoFar = model;
             *          bestShapeSoFar = shapeEst;
             *      }
             *      else break;
             *      shapeEst *= 2;
             *  }
             *  double magnitude = bestShapeSoFar;
             *  for (int i = 0; i < 10; i++) // 10 corresponds to 3 correct digits
             *  {
             *      double delta = magnitude * Math.Pow(2, -(i + 1)); // Half in size for each iteration
             *
             *      // Three positions: the current one, one lower by delta, and one higher by delta
             *
             *      // Lower Model
             *      EstimateParameters(bestShapeSoFar - delta, medianEst, varianceEst, out locationEst, out scaleEst);
             *      GEV lowerModel = new GEV(locationEst, scaleEst, bestShapeSoFar - delta, Program.rand);
             *      double lowerScore = FitnessExactModel(lowerModel);
             *
             *      // Upper Model
             *      EstimateParameters(bestShapeSoFar + delta, medianEst, varianceEst, out locationEst, out scaleEst);
             *      GEV upperModel = new GEV(locationEst, scaleEst, bestShapeSoFar + delta, Program.rand);
             *      double upperScore = FitnessExactModel(upperModel);
             *
             *      // Move to the best of the three
             *      double bestfitness = Math.Min(bestScore, Math.Min(upperScore, lowerScore));
             *      bestScore = bestfitness;
             *      if (lowerScore == bestfitness)
             *      {
             *          bestShapeSoFar = bestShapeSoFar - delta;
             *          bestSoFar = lowerModel;
             *      }
             *      else if (upperScore == bestfitness)
             *      {
             *          bestShapeSoFar = bestShapeSoFar + delta;
             *          bestSoFar = upperModel;
             *      }
             *  }
             *  fitness = bestScore;
             *  return bestSoFar;
             * }
             */
            #endregion

            GEV OptimizeBFGS(Func <Vector <double>, double> objectiveFunc, double initialShape, double initialScale, double initialLocation)
            {
                // Formatted by shape, scale, location
                var lowerBounds  = CreateVector.DenseOfArray(new double[] { -10, Math.Min(-3 * initialScale, 3 * initialScale), Math.Min(-3 * initialLocation, 3 * initialLocation) });
                var upperBounds  = CreateVector.DenseOfArray(new double[] { 10, Math.Max(-3 * initialScale, 3 * initialScale), Math.Max(-3 * initialLocation, 3 * initialLocation) });
                var initialGuess = CreateVector.DenseOfArray(new double[] { initialShape, initialScale, initialLocation });

                var min = FindMinimum.OfFunctionConstrained(objectiveFunc, lowerBounds, upperBounds, initialGuess);

                return(new GEV(min[2], min[1], min[0], Program.rand));
            }

            #region Old code

            // Optimize for Xi

            /*double fitNeg, fitZero, fitPos;
             * GEV bestNeg = OptimizeV2(-1, out fitNeg);
             * GEV bestPos = OptimizeV2(1, out fitPos);
             * double locZero, scaleZero;
             * EstimateParameters(0, medianEst, varianceEst, out locZero, out scaleZero);
             * GEV zeroModel = new GEV(locZero, scaleZero, 0, Program.rand);
             * fitZero = Fitness(zeroModel);
             * // Choose the best model of the three
             * double minScore = Math.Min(fitNeg, Math.Min(fitPos, fitZero));
             * GEV bestModel = null;
             * if (fitNeg == minScore) bestModel = bestNeg;
             * if (fitPos == minScore) bestModel = bestPos;
             * if (fitZero == minScore) bestModel = zeroModel; // Prefer zero, then pos
             *
             * Console.WriteLine($"Best Negative model: shape: {bestNeg.shape} scale: {bestNeg.scale} location: {bestNeg.location} fitness: {fitNeg}");
             * Console.WriteLine($"Best Positive model: shape: {bestPos.shape} scale: {bestPos.scale} location: {bestPos.location} fitness: {fitPos}");
             * Console.WriteLine($"Zero model: shape: {zeroModel.shape} scale: {zeroModel.scale} location: {zeroModel.location} fitness: {fitZero}");
             */
            #endregion

            double scaleGuess    = Math.Sqrt(6 * Statistics.VarianceEstimate(observations)) / Math.PI;
            double locationGuess = Statistics.Median(observations) + scaleGuess * Math.Log(Math.Log(2));
            double shapeGuess    = 0.5; // Use Pickands estimator here in the actual model
            Func <Vector <double>, double> objectiveFunction = x => FitnessExactModel(new GEV(x[2], x[1], x[0], Program.rand));
            GEV bestModelMonteCarlo = OptimizeBFGS(objectiveFunction, shapeGuess, scaleGuess, locationGuess);

            output.WriteLine($"MC Exact GEV Model: shape{bestModelMonteCarlo.shape} location{bestModelMonteCarlo.location} scale {bestModelMonteCarlo.scale}");

            double[] sample = new double[sampleSize];
            dist.Samples(sample); // Take a sample from dist
            Sorting.Sort(sample);
            // Report the sample min and max
            output.WriteLine($"Sample maximum: {sample[sample.Length - 1]}");
            //var sorter = new List<double>(sample);
            //sorter.Sort();
            //sample = sorter.ToArray();

            // Smoothed version
            //double[] smoothedData = new double[sample.Length - 1];
            //for (int i = 0; i < smoothedData.Length; i++) { smoothedData[i] = 0.5 * (sample[i] + sample[i + 1]); }
            //var pickandsApprox = new PickandsApproximation(smoothedData, method: PickandsApproximation.FittingMethod.Pickands_SupNorm); // Construct a Pickands tail approx from the sample

            var pickandsApprox = new GPDApproximation(sample, method: GPDApproximation.FittingMethod.V4); // Construct a Pickands tail approx from the sample
            // Bootstrap observations of the distribution of the sample maximum from the Pickands model
            double[] approxObservations = new double[observations.Length];
            for (int i = 0; i < approxObservations.Length; i++)
            {
                double max = double.NegativeInfinity;
                for (int j = 0; j < sampleSize; j++)
                {
                    max = Math.Max(max, pickandsApprox.Sample());
                }
                approxObservations[i] = max;
            }

            ContinuousDistribution approxECDF = ContinuousDistribution.ECDF(approxObservations); // ECDF of the bootstrapped observations
            //scaleGuess = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI;
            //locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2));
            // Guess location and scale
            shapeGuess = pickandsApprox.c;
            if (shapeGuess < 0)
            {
                double g1 = SpecialFunctions.Gamma(1 - shapeGuess);
                double g2 = SpecialFunctions.Gamma(1 - 2 * shapeGuess);
                scaleGuess    = Math.Sqrt(Statistics.Variance(approxObservations) * shapeGuess * shapeGuess / (g2 - g1 * g1));
                locationGuess = Statistics.Mean(approxObservations) - scaleGuess * (g1 - 1) / shapeGuess;
            }
            else
            {
                scaleGuess    = Math.Sqrt(6 * Statistics.Variance(approxObservations)) / Math.PI;
                locationGuess = Statistics.Median(approxObservations) + scaleGuess * Math.Log(Math.Log(2));
            }

            GEV estimatedGEVUnfitted = new GEV(location: locationGuess, scale: scaleGuess, shape: pickandsApprox.c); // Using the Pickands estimator for shape

            output.WriteLine($"UnfittedGEVModel: shape{estimatedGEVUnfitted.shape} location{estimatedGEVUnfitted.location} scale {estimatedGEVUnfitted.scale}");

            // Fit the model to the data drawn from the Pickands model
            double FitnessApproxModel(GEV model)
            {
                double val = 0;

                for (int i = 0; i < approxObservations.Length; i++)
                {
                    val += Math.Pow(model.CumulativeDistribution(approxObservations[i]) - approxECDF.CumulativeDensity(approxObservations[i]), 2);
                }
                return(val);
            }

            objectiveFunction = x => FitnessApproxModel(new GEV(x[2], x[1], x[0], Program.rand));
            GEV fittedApproxModel = OptimizeBFGS(objectiveFunction, pickandsApprox.c, scaleGuess, locationGuess);

            output.WriteLine($"FittedGEVModel: shape{fittedApproxModel.shape} location{fittedApproxModel.location} scale {fittedApproxModel.scale}");

            double[] proportions          = Interpolation.Linspace(0.000001, 0.999999, 2000);
            double[] observationQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000);
            for (int i = 0; i < observationQuantiles.Length; i++)
            {
                observationQuantiles[i] = Statistics.Quantile(observations, observationQuantiles[i]);
            }

            output.WriteLine("Abscissas,Monte Carlo ECDF,GEV Fit of MC ECDF,Estimated ECDF,Estimated GEV Unfitted,Estimated GEV Fitted,,ErrDistExactAbscissas,ErrDistExactValues,ErrDistModelAbscissas,ErrDistModelValues,ErrDistUnfittedAbscissas,ErrDistUnfittedValues");
            for (int i = 0; i < observationQuantiles.Length; i++)
            {
                output.WriteLine($"{observationQuantiles[i]}," +
                                 $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," +
                                 $"{bestModelMonteCarlo.CumulativeDistribution(observationQuantiles[i])}," +
                                 $"{approxECDF.CumulativeDensity(observationQuantiles[i])}," +
                                 $"{estimatedGEVUnfitted.CumulativeDistribution(observationQuantiles[i])}," +
                                 $"{fittedApproxModel.CumulativeDistribution(observationQuantiles[i])}," +
                                 $"," + // Space
                                 $"{observationQuantiles[i] - upperQuantile}," +
                                 $"{MonteCarloDistributionOfTheMaximum.CumulativeDensity(observationQuantiles[i])}," +
                                 //$"{quantiles[i] - sample[sample.Length - 1]}," +
                                 $"{estimatedGEVUnfitted.InverseCumulativeDistribution(proportions[i]) - estimatedGEVUnfitted.location}," +
                                 $"{proportions[i]}," +
                                 $"{fittedApproxModel.InverseCumulativeDistribution(proportions[i]) - fittedApproxModel.location}," +
                                 $"{proportions[i]}");
            }

            double[] distributionQuantiles = Interpolation.Linspace(0.000001, 0.999999, 2000);
            for (int i = 0; i < distributionQuantiles.Length; i++)
            {
                distributionQuantiles[i] = dist.InverseCumulativeDistribution(distributionQuantiles[i]);
            }
            output2.WriteLine("Abscissas,True CDF,Pickands Estimate");
            for (int i = 0; i < distributionQuantiles.Length; i++)
            {
                output2.WriteLine($"{distributionQuantiles[i]}," +
                                  $"{dist.CumulativeDistribution(distributionQuantiles[i])}," +
                                  $"{pickandsApprox.CDF(distributionQuantiles[i])}");
            }

            #region Temp for figure
            output2.WriteLine("");
            output2.WriteLine("TrueDist");
            output2.WriteLine("\\draw[line width=1.5pt]");

            for (int i = 0; i < distributionQuantiles.Length - 1; i++)
            {
                output2.WriteLine($"({distributionQuantiles[i]},{dist.CumulativeDistribution(distributionQuantiles[i])}) --");
            }
            output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{dist.CumulativeDistribution(distributionQuantiles[distributionQuantiles.Length - 1])});");


            output2.WriteLine("");
            output2.WriteLine("Approx");
            output2.WriteLine("\\draw[line width=1.5pt]");

            for (int i = 0; i < distributionQuantiles.Length; i++)
            {
                output2.WriteLine($"({distributionQuantiles[i]},{pickandsApprox.CDF(distributionQuantiles[i])}) --");
            }
            output2.WriteLine($"({distributionQuantiles[distributionQuantiles.Length - 1]},{pickandsApprox.CDF(distributionQuantiles[distributionQuantiles.Length - 1])});");

            #endregion

            // Clean up
            output.Dispose();
            output2.Dispose();
            //table.Dispose();
        }
Пример #4
0
        internal static void ApproximateExcessDistributionParametersV4(IList <double> sortedData, out double a, out double c, out double u)
        {
            // The upper tail is defined here by an ECDF interpolating linearly from (u,0) to (x_i, i/n) for data x_1, x_2, ..., x_n all greater than u.
            // This is the model from which we compute the upper tail parameters, using method of moments.
            // This midpoint version works slightly better than the plain ECDF
            double MidpointMSE(IList <double> tailData, double scaleParam, double shapeParam)
            {
                int n = tailData.Count;

                double sum = 0;

                for (int i = 0; i < n - 1; i++)
                {
                    double GHat     = TailCDF(0.5 * (tailData[i] + tailData[i + 1]) - tailData[0], scaleParam, shapeParam);
                    double residual = (2.0 * i + 1) / (2.0 * n) - GHat;
                    sum += residual * residual;
                }
                return(sum / (n - 1));
            }

            double GetScore(double uval, out double scaleParam, out double shapeParam)
            {
                var tailData = GetTailData(sortedData, uval);

                EstimateParamsMOM(tailData, out double scaleEst, out double shapeEst);
                scaleParam = scaleEst;
                shapeParam = shapeEst;
                double score = MidpointMSE(tailData, scaleEst, shapeEst);

                return(score);
            }

            // Try several choices of u evenly spaced over (x_0, x_n-3), and keep the best fit
            var    uValues   = Interpolation.Linspace(sortedData[0], sortedData[sortedData.Count - 5], sortedData.Count / 4);
            double bestU     = 0;
            double bestA     = 0;
            double bestC     = 0;
            double bestScore = double.PositiveInfinity;

            for (int i = 0; i < uValues.Length; i++)
            {
                double score = GetScore(uValues[i], out double scaleEst, out double shapeEst);
                if (score < bestScore)
                {
                    bestScore = score;
                    bestU     = uValues[i];
                    bestA     = scaleEst;
                    bestC     = shapeEst;
                }
            }
            // --- Refine the best so far by bisection search ---
            double delta = uValues[1] - uValues[0];

            for (int i = 0; i < 10; i++)
            {
                delta *= 0.5;
                double forwardU      = Math.Min(bestU + delta, sortedData[sortedData.Count - 3]); // Don't go so high that we don't have data to work with
                double forwardScore  = GetScore(forwardU, out double forwardScale, out double forwardShape);
                double backwardScore = GetScore(bestU - delta, out double backwardScale, out double backwardShape);
                if (forwardScore < bestScore)
                {
                    bestScore = forwardScore;
                    bestU     = forwardU;
                    bestA     = forwardScale;
                    bestC     = forwardShape;
                }
                if (backwardScore < bestScore)
                {
                    bestScore = backwardScore;
                    bestU    -= delta;
                    bestA     = backwardScale;
                    bestC     = backwardShape;
                }
            }

            u = bestU;
            a = bestA;
            c = bestC;
        }