public void fit_gaussian_test()
        {
            #region doc_fit_gaussian
            // Suppose we have the following data, and we would
            // like to estimate a distribution from this data

            double[][] samples =
            {
                new double[] { 0, 1 },
                new double[] { 1, 2 },
                new double[] { 5, 1 },
                new double[] { 7, 1 },
                new double[] { 6, 1 },
                new double[] { 5, 7 },
                new double[] { 2, 1 },
            };

            // Start by specifying a density kernel
            IDensityKernel kernel = new GaussianKernel(dimension: 2);

            // The density kernel gives a window function centered in a particular sample.
            // By creating one of those windows for each sample, we can achieve an empirical
            // multivariate distribution function. An output example for a single Gaussian
            // kernel would be:
            double z = kernel.Function(new double[] { 0, 1 }); // should be 0.096532352630053914


            // Create a multivariate Empirical distribution from the samples
            var dist = new MultivariateEmpiricalDistribution(kernel, samples);

            // Common measures
            double[] mean   = dist.Mean;     // { 3.71, 2.00 }
            double[] median = dist.Median;   // { 3.71, 2.00 }
            double[] var    = dist.Variance; // { 7.23, 5.00 } (diagonal from cov)
            double[,] cov = dist.Covariance; // { { 7.23, 0.83 }, { 0.83, 5.00 } }

            // Probability mass functions
            double pdf1 = dist.ProbabilityDensityFunction(new double[] { 2, 1 });    // 0.017657515909330332
            double pdf2 = dist.ProbabilityDensityFunction(new double[] { 4, 2 });    // 0.011581172997320841
            double pdf3 = dist.ProbabilityDensityFunction(new double[] { 5, 7 });    // 0.0072297668067630525
            double lpdf = dist.LogProbabilityDensityFunction(new double[] { 5, 7 }); // -4.929548496891365
            #endregion

            Assert.AreEqual(0.096532352630053914, z);

            Assert.AreEqual(3.7142857142857144, mean[0]);
            Assert.AreEqual(2.0, mean[1]);
            Assert.AreEqual(3.7142857142857144, median[0]);
            Assert.AreEqual(2.0, median[1]);
            Assert.AreEqual(7.2380952380952381, var[0]);
            Assert.AreEqual(5.0, var[1]);
            Assert.AreEqual(7.2380952380952381, cov[0, 0]);
            Assert.AreEqual(0.83333333333333337, cov[0, 1]);
            Assert.AreEqual(0.83333333333333337, cov[1, 0]);
            Assert.AreEqual(5.0, cov[1, 1]);
            Assert.AreEqual(0.017657515909330332, pdf1);
            Assert.AreEqual(0.011581172997320841, pdf2);
            Assert.AreEqual(0.0072297668067630525, pdf3);
            Assert.AreEqual(-4.929548496891365, lpdf);
        }
        public void GenerateTest1()
        {
            Accord.Math.Tools.SetupGenerator(0);

            double[] mean = { 2, 6 };

            double[,] cov =
            {
                { 2, 1 },
                { 1, 5 }
            };

            var normal = new MultivariateNormalDistribution(mean, cov);

            double[][] source = normal.Generate(10000000);

            var target = new MultivariateEmpiricalDistribution(source);

            Assert.IsTrue(mean.IsEqual(target.Mean, 0.001));
            Assert.IsTrue(cov.IsEqual(target.Covariance, 0.003));

            double[][] samples = target.Generate(10000000);

            double[] sampleMean = samples.Mean();
            double[,] sampleCov = samples.Covariance();

            Assert.AreEqual(2, sampleMean[0], 1e-2);
            Assert.AreEqual(6, sampleMean[1], 1e-2);
            Assert.AreEqual(2, sampleCov[0, 0], 1e-2);
            Assert.AreEqual(1, sampleCov[0, 1], 1e-2);
            Assert.AreEqual(1, sampleCov[1, 0], 1e-2);
            Assert.AreEqual(5, sampleCov[1, 1], 2e-2);
        }
        public void FitTest()
        {
            double[][] observations =
            {
                new double[] { 0.1000, -0.2000 },
                new double[] { 0.4000,  0.6000 },
                new double[] { 2.0000,  0.2000 },
                new double[] { 2.0000,  0.3000 }
            };

            var target = new MultivariateEmpiricalDistribution(observations);

            double[] weigths = { 0.25, 0.25, 0.25, 0.25 };

            bool thrown = false;

            try
            {
                target.Fit(observations, weigths);
            }
            catch (ArgumentException)
            {
                thrown = true;
            }

            Assert.IsTrue(thrown);
        }
Ejemplo n.º 4
0
        // Helper functions
        private static double ComputeDensityEstimation(double pickupDelay, double duration, double interval, IEnumerable <Leg> legs)
        {
            MultivariateEmpiricalDistribution dist = new MultivariateEmpiricalDistribution(legs
                                                                                           .Where(l => l.NumOfPassengersPickedUp > 0)
                                                                                           .Select(l => (new double[] {
                GetPickupDelay(l),
                l.ArrivalTime.Subtract(l.StartTime).TotalMinutes
            })).ToArray());

            double probDist = 1 - dist.DistributionFunction(new double[] { pickupDelay, duration });

            // compute appropriate univariate distribution
            if (Math.Abs(dist.Variance[0]) < Double.Epsilon)
            {
                dist = new MultivariateEmpiricalDistribution(legs
                                                             .Where(l => l.NumOfPassengersPickedUp > 0)
                                                             .Select(l => (new double[] {
                    l.ArrivalTime.Subtract(l.StartTime).TotalMinutes
                })).ToArray());
                probDist = 1 - dist.DistributionFunction(new double[] { duration });
            }

            double frequency = legs.Count(l => Math.Abs(l.StartTime.Subtract(DateTime.Now).TotalDays) < 2 &&
                                          l.NumOfPassengersPickedUp > 0) / 5760.0;

            return(probDist * frequency * interval);
        }
        public void WeightedEmpiricalDistributionConstructorTest3()
        {
            double[] weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 };
            double[] samples = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 };

            weights = weights.Divide(weights.Sum());

            var target = new MultivariateEmpiricalDistribution(samples.ToArray(), weights);

            Assert.AreEqual(1.2377597081667415, target.Smoothing[0, 0]);
        }
        public void ConstructorTest4()
        {
            #region doc_fit_epanechnikov
            // Suppose we have the following data, and we would
            // like to estimate a distribution from this data

            double[][] samples =
            {
                new double[] { 0, 1 },
                new double[] { 1, 2 },
                new double[] { 5, 1 },
                new double[] { 7, 1 },
                new double[] { 6, 1 },
                new double[] { 5, 7 },
                new double[] { 2, 1 },
            };

            // Start by specifying a density kernel
            IDensityKernel kernel = new EpanechnikovKernel(dimension: 2);

            // Create a multivariate Empirical distribution from the samples
            var dist = new MultivariateEmpiricalDistribution(kernel, samples);


            // Common measures
            double[] mean   = dist.Mean;     // { 3.71, 2.00 }
            double[] median = dist.Median;   // { 3.71, 2.00 }
            double[] var    = dist.Variance; // { 7.23, 5.00 } (diagonal from cov)
            double[,] cov = dist.Covariance; // { { 7.23, 0.83 }, { 0.83, 5.00 } }

            // Probability mass functions
            double pdf1 = dist.ProbabilityDensityFunction(new double[] { 2, 1 });    // 0.039131176997318849
            double pdf2 = dist.ProbabilityDensityFunction(new double[] { 4, 2 });    // 0.010212109770266639
            double pdf3 = dist.ProbabilityDensityFunction(new double[] { 5, 7 });    // 0.02891906722705221
            double lpdf = dist.LogProbabilityDensityFunction(new double[] { 5, 7 }); // -3.5432541357714742
            #endregion

            Assert.AreEqual(3.7142857142857144, mean[0]);
            Assert.AreEqual(2.0, mean[1]);
            Assert.AreEqual(3.7142857142857144, median[0]);
            Assert.AreEqual(2.0, median[1]);
            Assert.AreEqual(7.2380952380952381, var[0]);
            Assert.AreEqual(5.0, var[1]);
            Assert.AreEqual(7.2380952380952381, cov[0, 0]);
            Assert.AreEqual(0.83333333333333337, cov[0, 1]);
            Assert.AreEqual(0.83333333333333337, cov[1, 0]);
            Assert.AreEqual(5.0, cov[1, 1]);
            Assert.AreEqual(0.039131176997318849, pdf1);
            Assert.AreEqual(0.010212109770266639, pdf2);
            Assert.AreEqual(0.02891906722705221, pdf3);
            Assert.AreEqual(-3.5432541357714742, lpdf);
        }
Ejemplo n.º 7
0
 // set an appropriate distribution for the given data subset
 private void SetInputDistribution(MultivariateEmpiricalDistribution inputDistribution,
                                   double[][] dataSubsetSamples, out MultivariateEmpiricalDistribution distToSet, out bool distUnivariate)
 {
     if (Math.Abs(inputDistribution.Variance[0]) < double.Epsilon)
     {
         // if we have essentially a univariate distribution
         MultivariateEmpiricalDistribution univInputDistribution
                        = new MultivariateEmpiricalDistribution(dataSubsetSamples.Select(a => new double[] { a[1] }).ToArray());
         distToSet      = univInputDistribution;
         distUnivariate = true;
     }
     else
     {
         distToSet      = inputDistribution;
         distUnivariate = false;
     }
 }
        public void WeightedEmpiricalDistributionConstructorTest2()
        {
            double[] original     = { 5, 5, 1, 4, 1, 2, 2, 3, 3, 3, 4, 3, 3, 3, 4, 3, 2, 3 };
            var      distribution = new MultivariateEmpiricalDistribution(original.ToArray());

            double[]   weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 };
            double[]   source  = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 };
            double[][] samples = source.ToArray();

            weights = weights.Divide(weights.Sum());

            var target = new MultivariateEmpiricalDistribution(samples,
                                                               weights, distribution.Smoothing);

            Assert.AreEqual(distribution.Mean[0], target.Mean[0]);
            Assert.AreEqual(distribution.Median[0], target.Median[0]);
            Assert.AreEqual(distribution.Mode[0], target.Mode[0]);
            Assert.AreEqual(distribution.Smoothing[0, 0], target.Smoothing[0, 0]);
            Assert.AreEqual(1.3655172413793104, target.Variance[0]);
            Assert.AreEqual(target.Weights, weights);
            Assert.AreEqual(target.Samples, samples);

            for (double x = 0; x < 6; x += 0.1)
            {
                double actual, expected;
                expected = distribution.ComplementaryDistributionFunction(x);
                actual   = target.ComplementaryDistributionFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);

                expected = distribution.DistributionFunction(x);
                actual   = target.DistributionFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);

                expected = distribution.LogProbabilityDensityFunction(x);
                actual   = target.LogProbabilityDensityFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);

                expected = distribution.ProbabilityDensityFunction(x);
                actual   = target.ProbabilityDensityFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);
            }
        }
        public void FitTest2()
        {
            double[][] observations =
            {
                new double[] { 0.1000, -0.2000 },
                new double[] { 0.4000,  0.6000 },
                new double[] { 2.0000,  0.2000 },
                new double[] { 2.0000,  0.3000 }
            };

            double[] mean = Accord.Statistics.Tools.Mean(observations);
            double[,] cov = Accord.Statistics.Tools.Covariance(observations);

            var target = new MultivariateEmpiricalDistribution(observations);

            target.Fit(observations);

            Assert.IsTrue(Matrix.IsEqual(mean, target.Mean));
            Assert.IsTrue(Matrix.IsEqual(cov, target.Covariance, 1e-10));
        }
        public void FitTest()
        {
            double[] original     = { 5, 5, 1, 4, 1, 2, 2, 3, 3, 3, 4, 3, 3, 3, 4, 3, 2, 3 };
            var      distribution = new MultivariateEmpiricalDistribution(original.ToJagged());

            int[]      weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 };
            double[]   sources = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 };
            double[][] samples = sources.ToJagged();
            var        target  = new MultivariateEmpiricalDistribution(Jagged.Zeros(1, 1));

            target.Fit(samples, weights);

            Assert.AreEqual(distribution.Mean[0], target.Mean[0]);
            Assert.AreEqual(distribution.Median[0], target.Median[0]);
            Assert.AreEqual(distribution.Mode[0], target.Mode[0]);
            Assert.AreEqual(distribution.Smoothing[0, 0], target.Smoothing[0, 0]);
            Assert.AreEqual(distribution.Variance[0], target.Variance[0]);
            Assert.IsTrue(target.Weights.IsEqual(weights.Divide(weights.Sum())));
            Assert.AreEqual(target.Samples, samples);

            for (double x = 0; x < 6; x += 0.1)
            {
                double actual, expected;
                expected = distribution.ComplementaryDistributionFunction(x);
                actual   = target.ComplementaryDistributionFunction(x);
                Assert.AreEqual(expected, actual);

                expected = distribution.DistributionFunction(x);
                actual   = target.DistributionFunction(x);
                Assert.AreEqual(expected, actual);

                expected = distribution.LogProbabilityDensityFunction(x);
                actual   = target.LogProbabilityDensityFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);

                expected = distribution.ProbabilityDensityFunction(x);
                actual   = target.ProbabilityDensityFunction(x);
                Assert.AreEqual(expected, actual, 1e-15);
            }
        }
        public void WeightedEmpiricalDistribution_DistributionFunction()
        {
            double[][] samples =
            {
                new double[] { 5, 2 },
                new double[] { 1, 5 },
                new double[] { 4, 7 },
                new double[] { 1, 6 },
                new double[] { 2, 2 },
                new double[] { 3, 4 },
                new double[] { 4, 8 },
                new double[] { 3, 2 },
                new double[] { 4, 4 },
                new double[] { 3, 7 },
                new double[] { 2, 4 },
                new double[] { 3, 1 },
            };


            var target = new MultivariateEmpiricalDistribution(samples);

            double[] expected =
            {
                0.33333333333333331, 0.083333333333333329, 0.83333333333333337,
                0.16666666666666666, 0.083333333333333329, 0.41666666666666669,
                0.91666666666666663,                 0.25,                 0.5,
                0.66666666666666663,  0.16666666666666666, 0.083333333333333329
            };

            for (int i = 0; i < samples.Length; i++)
            {
                double e = expected[i];
                double a = target.DistributionFunction(samples[i]);
                Assert.AreEqual(e, a);
            }
        }
Ejemplo n.º 12
0
        public async Task LearnFromDates(DateTime from, DateTime to)
        {
            int maxPickups = await GetMaxNumberOfPickups();

            // train clustering algorithm
            await _locationClustering.RetrainAsync(from, to);

            // initialize storage arrays
            int pickupElementSize = await GetMaxNumberOfPickups() + 1;

            InitStorageArray(ref clusterFareClassRegressions, pickupElementSize - 1, NumberOfFareClassIntervals - 1);
            InitStorageArray(ref clusterFareClassInputDensityKernels, pickupElementSize - 1, NumberOfFareClassIntervals);
            InitStorageArray(ref clusterFareClassDistributionsUnivariate, pickupElementSize - 1, NumberOfFareClassIntervals);
            InitStorageArray(ref clusterPickupFrequencies, pickupElementSize);
            InitStorageArray(ref clusterPickupInputDensityKernels, pickupElementSize);
            InitStorageArray(ref clusterPickupInputDensityKernelsUnivariate, pickupElementSize);

            // for each cluster
            for (int i = 0; i < _locationClustering.NumberOfClusters; i++)
            {
                // obtain data set
                IEnumerable <Task <Pair <Leg, bool> > > decisionTasks = (await _legRepository.ListAsync())
                                                                        .Where(leg => leg.StartTime.CompareTo(from) > 0 && leg.StartTime.CompareTo(to) < 0)
                                                                        .Select(async(leg) =>
                {
                    LegCoordinates coords = await _geocodingDbSync.GetLegCoordinatesAsync(leg.LegID);
                    double[] dp           = (new decimal[] { coords.StartLatitude, coords.StartLongitude, coords.DestLatitude, coords.DestLongitude })
                                            .Select(Convert.ToDouble).ToArray();
                    return(new Pair <Leg, bool>(leg, _locationClustering.ClusterCollection.Decide(dp) == i));
                });
                Pair <Leg, bool>[] decisions = await Task.WhenAll(decisionTasks);

                // Data input values (pickup delay, travel time) in this cluster
                IEnumerable <Leg> dataLegs = decisions.Where(pair => pair.Second).Select(pair => pair.First);
                double[][]        dataset  = dataLegs
                                             .Select(leg => new double[]
                {
                    leg.PickupRequestTime.HasValue
                        ? leg.StartTime.Subtract(leg.PickupRequestTime.Value).TotalMinutes
                        : 0,

                    leg.ArrivalTime.Subtract(leg.StartTime).TotalMinutes
                }).ToArray();
                // Fare classes in this cluster
                int[] fareClasses = dataLegs
                                    .Select(leg =>
                {
                    for (int j = 0; j < FareClassIntervals.Count(); j++)
                    {
                        if (j < FareClassIntervals.Count() &&
                            Convert.ToDecimal(FareClassIntervals.ElementAt(j)) > leg.Fare)
                        {
                            return(j);
                        }
                    }
                    return(FareClassIntervals.Count());
                }).ToArray();
                // Pickup numbers in this cluster
                int[] pickupNumbers = dataLegs.Select(leg => leg.NumOfPassengersPickedUp).ToArray();


                // for each possible number of pickups
                for (int n = 1; n <= maxPickups; n++)
                {
                    double[][] dataSubset        = dataset.Where((dp, k) => pickupNumbers[k] == n).ToArray();
                    int[]      fareClassesSubset = fareClasses.Where((fc, k) => pickupNumbers[k] == n).ToArray();

                    if (dataSubset.Length == 0)
                    {
                        throw new ApplicationException("Insufficient data to make a reliable prediction");
                    }

                    // for each fare class interval boundary
                    for (int j = 0; j < NumberOfFareClassIntervals; j++)
                    {
                        // train logistic regression
                        if (j > 0 && clusterFareClassRegressions[i][n - 1][j - 1] == null)
                        {
                            clusterFareClassRegressions[i][n - 1][j - 1] = _logisticRegressionAnalysis
                                                                           .Learn(dataSubset, fareClassesSubset.Select(fc => fc >= j ? 1.0 : 0.0).ToArray());
                        }

                        // train empirical density functions
                        if (fareClassesSubset.Count(fc => fc >= j) > 0.0)
                        {
                            double[][] dataSubsetSamples = dataSubset.Where((dp, k)
                                                                            => fareClassesSubset[k] >= j).ToArray();
                            MultivariateEmpiricalDistribution fareClassInputDistribution
                                = new MultivariateEmpiricalDistribution(dataSubsetSamples);

                            SetInputDistribution(fareClassInputDistribution, dataSubsetSamples,
                                                 out clusterFareClassInputDensityKernels[i][n - 1][j],
                                                 out clusterFareClassDistributionsUnivariate[i][n - 1][j]);
                        }
                    }
                }

                // compute pickup frequencies
                for (int l = 0; l < pickupElementSize; l++)
                {
                    clusterPickupFrequencies[i][l] = Convert.ToDouble(dataLegs.Count(leg => leg.NumOfPassengersPickedUp == l))
                                                     / to.Subtract(from).TotalMinutes;

                    if (pickupNumbers.Any(pn => pn == l))
                    {
                        double[][] samples = dataset.Where((dp, k) => pickupNumbers[k] == l).ToArray();
                        MultivariateEmpiricalDistribution pickupInputDistribution
                            = new MultivariateEmpiricalDistribution(samples);

                        SetInputDistribution(pickupInputDistribution, samples,
                                             out clusterPickupInputDensityKernels[i][l],
                                             out clusterPickupInputDensityKernelsUnivariate[i][l]);
                    }
                }
            }
        }