public void fit_gaussian_test() { #region doc_fit_gaussian // Suppose we have the following data, and we would // like to estimate a distribution from this data double[][] samples = { new double[] { 0, 1 }, new double[] { 1, 2 }, new double[] { 5, 1 }, new double[] { 7, 1 }, new double[] { 6, 1 }, new double[] { 5, 7 }, new double[] { 2, 1 }, }; // Start by specifying a density kernel IDensityKernel kernel = new GaussianKernel(dimension: 2); // The density kernel gives a window function centered in a particular sample. // By creating one of those windows for each sample, we can achieve an empirical // multivariate distribution function. An output example for a single Gaussian // kernel would be: double z = kernel.Function(new double[] { 0, 1 }); // should be 0.096532352630053914 // Create a multivariate Empirical distribution from the samples var dist = new MultivariateEmpiricalDistribution(kernel, samples); // Common measures double[] mean = dist.Mean; // { 3.71, 2.00 } double[] median = dist.Median; // { 3.71, 2.00 } double[] var = dist.Variance; // { 7.23, 5.00 } (diagonal from cov) double[,] cov = dist.Covariance; // { { 7.23, 0.83 }, { 0.83, 5.00 } } // Probability mass functions double pdf1 = dist.ProbabilityDensityFunction(new double[] { 2, 1 }); // 0.017657515909330332 double pdf2 = dist.ProbabilityDensityFunction(new double[] { 4, 2 }); // 0.011581172997320841 double pdf3 = dist.ProbabilityDensityFunction(new double[] { 5, 7 }); // 0.0072297668067630525 double lpdf = dist.LogProbabilityDensityFunction(new double[] { 5, 7 }); // -4.929548496891365 #endregion Assert.AreEqual(0.096532352630053914, z); Assert.AreEqual(3.7142857142857144, mean[0]); Assert.AreEqual(2.0, mean[1]); Assert.AreEqual(3.7142857142857144, median[0]); Assert.AreEqual(2.0, median[1]); Assert.AreEqual(7.2380952380952381, var[0]); Assert.AreEqual(5.0, var[1]); Assert.AreEqual(7.2380952380952381, cov[0, 0]); Assert.AreEqual(0.83333333333333337, cov[0, 1]); Assert.AreEqual(0.83333333333333337, cov[1, 0]); Assert.AreEqual(5.0, cov[1, 1]); Assert.AreEqual(0.017657515909330332, pdf1); Assert.AreEqual(0.011581172997320841, pdf2); Assert.AreEqual(0.0072297668067630525, pdf3); Assert.AreEqual(-4.929548496891365, lpdf); }
public void GenerateTest1() { Accord.Math.Tools.SetupGenerator(0); double[] mean = { 2, 6 }; double[,] cov = { { 2, 1 }, { 1, 5 } }; var normal = new MultivariateNormalDistribution(mean, cov); double[][] source = normal.Generate(10000000); var target = new MultivariateEmpiricalDistribution(source); Assert.IsTrue(mean.IsEqual(target.Mean, 0.001)); Assert.IsTrue(cov.IsEqual(target.Covariance, 0.003)); double[][] samples = target.Generate(10000000); double[] sampleMean = samples.Mean(); double[,] sampleCov = samples.Covariance(); Assert.AreEqual(2, sampleMean[0], 1e-2); Assert.AreEqual(6, sampleMean[1], 1e-2); Assert.AreEqual(2, sampleCov[0, 0], 1e-2); Assert.AreEqual(1, sampleCov[0, 1], 1e-2); Assert.AreEqual(1, sampleCov[1, 0], 1e-2); Assert.AreEqual(5, sampleCov[1, 1], 2e-2); }
public void FitTest() { double[][] observations = { new double[] { 0.1000, -0.2000 }, new double[] { 0.4000, 0.6000 }, new double[] { 2.0000, 0.2000 }, new double[] { 2.0000, 0.3000 } }; var target = new MultivariateEmpiricalDistribution(observations); double[] weigths = { 0.25, 0.25, 0.25, 0.25 }; bool thrown = false; try { target.Fit(observations, weigths); } catch (ArgumentException) { thrown = true; } Assert.IsTrue(thrown); }
// Helper functions private static double ComputeDensityEstimation(double pickupDelay, double duration, double interval, IEnumerable <Leg> legs) { MultivariateEmpiricalDistribution dist = new MultivariateEmpiricalDistribution(legs .Where(l => l.NumOfPassengersPickedUp > 0) .Select(l => (new double[] { GetPickupDelay(l), l.ArrivalTime.Subtract(l.StartTime).TotalMinutes })).ToArray()); double probDist = 1 - dist.DistributionFunction(new double[] { pickupDelay, duration }); // compute appropriate univariate distribution if (Math.Abs(dist.Variance[0]) < Double.Epsilon) { dist = new MultivariateEmpiricalDistribution(legs .Where(l => l.NumOfPassengersPickedUp > 0) .Select(l => (new double[] { l.ArrivalTime.Subtract(l.StartTime).TotalMinutes })).ToArray()); probDist = 1 - dist.DistributionFunction(new double[] { duration }); } double frequency = legs.Count(l => Math.Abs(l.StartTime.Subtract(DateTime.Now).TotalDays) < 2 && l.NumOfPassengersPickedUp > 0) / 5760.0; return(probDist * frequency * interval); }
public void WeightedEmpiricalDistributionConstructorTest3() { double[] weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 }; double[] samples = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 }; weights = weights.Divide(weights.Sum()); var target = new MultivariateEmpiricalDistribution(samples.ToArray(), weights); Assert.AreEqual(1.2377597081667415, target.Smoothing[0, 0]); }
public void ConstructorTest4() { #region doc_fit_epanechnikov // Suppose we have the following data, and we would // like to estimate a distribution from this data double[][] samples = { new double[] { 0, 1 }, new double[] { 1, 2 }, new double[] { 5, 1 }, new double[] { 7, 1 }, new double[] { 6, 1 }, new double[] { 5, 7 }, new double[] { 2, 1 }, }; // Start by specifying a density kernel IDensityKernel kernel = new EpanechnikovKernel(dimension: 2); // Create a multivariate Empirical distribution from the samples var dist = new MultivariateEmpiricalDistribution(kernel, samples); // Common measures double[] mean = dist.Mean; // { 3.71, 2.00 } double[] median = dist.Median; // { 3.71, 2.00 } double[] var = dist.Variance; // { 7.23, 5.00 } (diagonal from cov) double[,] cov = dist.Covariance; // { { 7.23, 0.83 }, { 0.83, 5.00 } } // Probability mass functions double pdf1 = dist.ProbabilityDensityFunction(new double[] { 2, 1 }); // 0.039131176997318849 double pdf2 = dist.ProbabilityDensityFunction(new double[] { 4, 2 }); // 0.010212109770266639 double pdf3 = dist.ProbabilityDensityFunction(new double[] { 5, 7 }); // 0.02891906722705221 double lpdf = dist.LogProbabilityDensityFunction(new double[] { 5, 7 }); // -3.5432541357714742 #endregion Assert.AreEqual(3.7142857142857144, mean[0]); Assert.AreEqual(2.0, mean[1]); Assert.AreEqual(3.7142857142857144, median[0]); Assert.AreEqual(2.0, median[1]); Assert.AreEqual(7.2380952380952381, var[0]); Assert.AreEqual(5.0, var[1]); Assert.AreEqual(7.2380952380952381, cov[0, 0]); Assert.AreEqual(0.83333333333333337, cov[0, 1]); Assert.AreEqual(0.83333333333333337, cov[1, 0]); Assert.AreEqual(5.0, cov[1, 1]); Assert.AreEqual(0.039131176997318849, pdf1); Assert.AreEqual(0.010212109770266639, pdf2); Assert.AreEqual(0.02891906722705221, pdf3); Assert.AreEqual(-3.5432541357714742, lpdf); }
// set an appropriate distribution for the given data subset private void SetInputDistribution(MultivariateEmpiricalDistribution inputDistribution, double[][] dataSubsetSamples, out MultivariateEmpiricalDistribution distToSet, out bool distUnivariate) { if (Math.Abs(inputDistribution.Variance[0]) < double.Epsilon) { // if we have essentially a univariate distribution MultivariateEmpiricalDistribution univInputDistribution = new MultivariateEmpiricalDistribution(dataSubsetSamples.Select(a => new double[] { a[1] }).ToArray()); distToSet = univInputDistribution; distUnivariate = true; } else { distToSet = inputDistribution; distUnivariate = false; } }
public void WeightedEmpiricalDistributionConstructorTest2() { double[] original = { 5, 5, 1, 4, 1, 2, 2, 3, 3, 3, 4, 3, 3, 3, 4, 3, 2, 3 }; var distribution = new MultivariateEmpiricalDistribution(original.ToArray()); double[] weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 }; double[] source = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 }; double[][] samples = source.ToArray(); weights = weights.Divide(weights.Sum()); var target = new MultivariateEmpiricalDistribution(samples, weights, distribution.Smoothing); Assert.AreEqual(distribution.Mean[0], target.Mean[0]); Assert.AreEqual(distribution.Median[0], target.Median[0]); Assert.AreEqual(distribution.Mode[0], target.Mode[0]); Assert.AreEqual(distribution.Smoothing[0, 0], target.Smoothing[0, 0]); Assert.AreEqual(1.3655172413793104, target.Variance[0]); Assert.AreEqual(target.Weights, weights); Assert.AreEqual(target.Samples, samples); for (double x = 0; x < 6; x += 0.1) { double actual, expected; expected = distribution.ComplementaryDistributionFunction(x); actual = target.ComplementaryDistributionFunction(x); Assert.AreEqual(expected, actual, 1e-15); expected = distribution.DistributionFunction(x); actual = target.DistributionFunction(x); Assert.AreEqual(expected, actual, 1e-15); expected = distribution.LogProbabilityDensityFunction(x); actual = target.LogProbabilityDensityFunction(x); Assert.AreEqual(expected, actual, 1e-15); expected = distribution.ProbabilityDensityFunction(x); actual = target.ProbabilityDensityFunction(x); Assert.AreEqual(expected, actual, 1e-15); } }
public void FitTest2() { double[][] observations = { new double[] { 0.1000, -0.2000 }, new double[] { 0.4000, 0.6000 }, new double[] { 2.0000, 0.2000 }, new double[] { 2.0000, 0.3000 } }; double[] mean = Accord.Statistics.Tools.Mean(observations); double[,] cov = Accord.Statistics.Tools.Covariance(observations); var target = new MultivariateEmpiricalDistribution(observations); target.Fit(observations); Assert.IsTrue(Matrix.IsEqual(mean, target.Mean)); Assert.IsTrue(Matrix.IsEqual(cov, target.Covariance, 1e-10)); }
public void FitTest() { double[] original = { 5, 5, 1, 4, 1, 2, 2, 3, 3, 3, 4, 3, 3, 3, 4, 3, 2, 3 }; var distribution = new MultivariateEmpiricalDistribution(original.ToJagged()); int[] weights = { 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1 }; double[] sources = { 5, 1, 4, 1, 2, 3, 4, 3, 4, 3, 2, 3 }; double[][] samples = sources.ToJagged(); var target = new MultivariateEmpiricalDistribution(Jagged.Zeros(1, 1)); target.Fit(samples, weights); Assert.AreEqual(distribution.Mean[0], target.Mean[0]); Assert.AreEqual(distribution.Median[0], target.Median[0]); Assert.AreEqual(distribution.Mode[0], target.Mode[0]); Assert.AreEqual(distribution.Smoothing[0, 0], target.Smoothing[0, 0]); Assert.AreEqual(distribution.Variance[0], target.Variance[0]); Assert.IsTrue(target.Weights.IsEqual(weights.Divide(weights.Sum()))); Assert.AreEqual(target.Samples, samples); for (double x = 0; x < 6; x += 0.1) { double actual, expected; expected = distribution.ComplementaryDistributionFunction(x); actual = target.ComplementaryDistributionFunction(x); Assert.AreEqual(expected, actual); expected = distribution.DistributionFunction(x); actual = target.DistributionFunction(x); Assert.AreEqual(expected, actual); expected = distribution.LogProbabilityDensityFunction(x); actual = target.LogProbabilityDensityFunction(x); Assert.AreEqual(expected, actual, 1e-15); expected = distribution.ProbabilityDensityFunction(x); actual = target.ProbabilityDensityFunction(x); Assert.AreEqual(expected, actual, 1e-15); } }
public void WeightedEmpiricalDistribution_DistributionFunction() { double[][] samples = { new double[] { 5, 2 }, new double[] { 1, 5 }, new double[] { 4, 7 }, new double[] { 1, 6 }, new double[] { 2, 2 }, new double[] { 3, 4 }, new double[] { 4, 8 }, new double[] { 3, 2 }, new double[] { 4, 4 }, new double[] { 3, 7 }, new double[] { 2, 4 }, new double[] { 3, 1 }, }; var target = new MultivariateEmpiricalDistribution(samples); double[] expected = { 0.33333333333333331, 0.083333333333333329, 0.83333333333333337, 0.16666666666666666, 0.083333333333333329, 0.41666666666666669, 0.91666666666666663, 0.25, 0.5, 0.66666666666666663, 0.16666666666666666, 0.083333333333333329 }; for (int i = 0; i < samples.Length; i++) { double e = expected[i]; double a = target.DistributionFunction(samples[i]); Assert.AreEqual(e, a); } }
public async Task LearnFromDates(DateTime from, DateTime to) { int maxPickups = await GetMaxNumberOfPickups(); // train clustering algorithm await _locationClustering.RetrainAsync(from, to); // initialize storage arrays int pickupElementSize = await GetMaxNumberOfPickups() + 1; InitStorageArray(ref clusterFareClassRegressions, pickupElementSize - 1, NumberOfFareClassIntervals - 1); InitStorageArray(ref clusterFareClassInputDensityKernels, pickupElementSize - 1, NumberOfFareClassIntervals); InitStorageArray(ref clusterFareClassDistributionsUnivariate, pickupElementSize - 1, NumberOfFareClassIntervals); InitStorageArray(ref clusterPickupFrequencies, pickupElementSize); InitStorageArray(ref clusterPickupInputDensityKernels, pickupElementSize); InitStorageArray(ref clusterPickupInputDensityKernelsUnivariate, pickupElementSize); // for each cluster for (int i = 0; i < _locationClustering.NumberOfClusters; i++) { // obtain data set IEnumerable <Task <Pair <Leg, bool> > > decisionTasks = (await _legRepository.ListAsync()) .Where(leg => leg.StartTime.CompareTo(from) > 0 && leg.StartTime.CompareTo(to) < 0) .Select(async(leg) => { LegCoordinates coords = await _geocodingDbSync.GetLegCoordinatesAsync(leg.LegID); double[] dp = (new decimal[] { coords.StartLatitude, coords.StartLongitude, coords.DestLatitude, coords.DestLongitude }) .Select(Convert.ToDouble).ToArray(); return(new Pair <Leg, bool>(leg, _locationClustering.ClusterCollection.Decide(dp) == i)); }); Pair <Leg, bool>[] decisions = await Task.WhenAll(decisionTasks); // Data input values (pickup delay, travel time) in this cluster IEnumerable <Leg> dataLegs = decisions.Where(pair => pair.Second).Select(pair => pair.First); double[][] dataset = dataLegs .Select(leg => new double[] { leg.PickupRequestTime.HasValue ? leg.StartTime.Subtract(leg.PickupRequestTime.Value).TotalMinutes : 0, leg.ArrivalTime.Subtract(leg.StartTime).TotalMinutes }).ToArray(); // Fare classes in this cluster int[] fareClasses = dataLegs .Select(leg => { for (int j = 0; j < FareClassIntervals.Count(); j++) { if (j < FareClassIntervals.Count() && Convert.ToDecimal(FareClassIntervals.ElementAt(j)) > leg.Fare) { return(j); } } return(FareClassIntervals.Count()); }).ToArray(); // Pickup numbers in this cluster int[] pickupNumbers = dataLegs.Select(leg => leg.NumOfPassengersPickedUp).ToArray(); // for each possible number of pickups for (int n = 1; n <= maxPickups; n++) { double[][] dataSubset = dataset.Where((dp, k) => pickupNumbers[k] == n).ToArray(); int[] fareClassesSubset = fareClasses.Where((fc, k) => pickupNumbers[k] == n).ToArray(); if (dataSubset.Length == 0) { throw new ApplicationException("Insufficient data to make a reliable prediction"); } // for each fare class interval boundary for (int j = 0; j < NumberOfFareClassIntervals; j++) { // train logistic regression if (j > 0 && clusterFareClassRegressions[i][n - 1][j - 1] == null) { clusterFareClassRegressions[i][n - 1][j - 1] = _logisticRegressionAnalysis .Learn(dataSubset, fareClassesSubset.Select(fc => fc >= j ? 1.0 : 0.0).ToArray()); } // train empirical density functions if (fareClassesSubset.Count(fc => fc >= j) > 0.0) { double[][] dataSubsetSamples = dataSubset.Where((dp, k) => fareClassesSubset[k] >= j).ToArray(); MultivariateEmpiricalDistribution fareClassInputDistribution = new MultivariateEmpiricalDistribution(dataSubsetSamples); SetInputDistribution(fareClassInputDistribution, dataSubsetSamples, out clusterFareClassInputDensityKernels[i][n - 1][j], out clusterFareClassDistributionsUnivariate[i][n - 1][j]); } } } // compute pickup frequencies for (int l = 0; l < pickupElementSize; l++) { clusterPickupFrequencies[i][l] = Convert.ToDouble(dataLegs.Count(leg => leg.NumOfPassengersPickedUp == l)) / to.Subtract(from).TotalMinutes; if (pickupNumbers.Any(pn => pn == l)) { double[][] samples = dataset.Where((dp, k) => pickupNumbers[k] == l).ToArray(); MultivariateEmpiricalDistribution pickupInputDistribution = new MultivariateEmpiricalDistribution(samples); SetInputDistribution(pickupInputDistribution, samples, out clusterPickupInputDensityKernels[i][l], out clusterPickupInputDensityKernelsUnivariate[i][l]); } } } }