示例#1
0
        public async Task ComputeCompanyStatistics()
        {
            numOfDrivers = await _driverRepository.CountAsync();

            IEnumerable <Leg> legs = await _legRepository.ListAsync();

            pickups     = legs.Select(leg => leg.NumOfPassengersPickedUp).Sum();
            milesDriven = legs.Select(leg => leg.Distance).Sum();

            if (await _legRepository.CountAsync() > 0)
            {
                averagePickupDelay = legs.Select(leg =>
                                                 leg.StartTime.Subtract(leg.PickupRequestTime.GetValueOrDefault(leg.StartTime)).TotalMinutes).Average();
            }

            totalFares = legs.Select(leg => leg.Fare * leg.NumOfPassengersAboard).Sum();
            totalCosts = legs.Select(leg => leg.GetTotalFuelCost()).Sum();
            netProfit  = totalFares - totalCosts;
        }
示例#2
0
 public async Task <IEnumerable <Leg> > Get()
 {
     return(await _legRepository.ListAsync());
 }
示例#3
0
        /// <summary>
        /// Train on number of clusters using gap statistic
        /// </summary>
        private async Task ComputeK(int maxK = 100, int B = 10, int driverID = 0, DateTime?startDate = null, DateTime?endDate = null)
        {
            double[]   Wk      = new double[maxK];
            double[][] Wref_kb = new double[maxK][];
            double[]   Gap     = new double[maxK];
            double[]   sd      = new double[maxK];

            KMeansClusterCollection[] clusterCollections = new KMeansClusterCollection[maxK];

            // obtain dataset
            IEnumerable <Leg> legs = driverID == 0 ? await _legRepository.ListAsync()
                : await _legRepository.ListForDriverAsync(driverID);

            if (startDate == null)
            {
                startDate = DateTime.MinValue;
            }
            if (endDate == null)
            {
                endDate = DateTime.MaxValue;
            }
            legs = legs.Where(leg => leg.StartTime.CompareTo(startDate) >= 0 && leg.StartTime.CompareTo(endDate) < 0);
            double[][] dataset = GetDataset(legs);

            // first cluster the dataset varying K
            for (int k = 1; k <= maxK; k++)
            {
                KMeans kMeans = new KMeans(k)
                {
                    // distance function for geographic coordinates
                    Distance = new GeographicDistance()
                };

                clusterCollections[k - 1] = kMeans.Learn(dataset);
                double[][][] clusterData = ClusterPoints(dataset, k, clusterCollections[k - 1]);

                // sum of pairwise distances
                Wk[k - 1] = ComputeWk(clusterData, clusterCollections[k - 1]);
            }

            // then generate the reference data sets
            double[] lowerBounds   = new double[4];
            double[] boxDimensions = new double[4];
            for (int i = 0; i < 4; i++)
            {
                lowerBounds[i]   = dataset.Select(l => l[i]).Min();
                boxDimensions[i] = dataset.Select(l => l[i]).Max() - lowerBounds[i];
            }
            CorrectLongitudeBounds(lowerBounds, boxDimensions, 1);
            CorrectLongitudeBounds(lowerBounds, boxDimensions, 3);

            Random random = new Random();

            for (int k = 1; k <= maxK; k++)
            {
                Wref_kb[k - 1] = new double[B];
                for (int c = 0; c < B; c++)
                {
                    double[][] refDataset = new double[dataset.Length][];
                    for (int i = 0; i < refDataset.Length; i++)
                    {
                        double[] dataPoint = new double[4];
                        for (int j = 0; j < 4; j++)
                        {
                            dataPoint[j] = random.NextDouble() * boxDimensions[j] + lowerBounds[j];
                            if ((j == 1 || j == 3) && dataPoint[j] > 180)
                            {
                                dataPoint[j] -= 360;
                            }
                        }
                        refDataset[i] = dataPoint;
                    }

                    // cluster reference dataset
                    KMeans refKmeans = new KMeans(k);
                    refKmeans.Distance = new GeographicDistance();
                    KMeansClusterCollection refClusters = refKmeans.Learn(refDataset);

                    // points in each cluster
                    double[][][] refClusterData = ClusterPoints(refDataset, k, refClusters);

                    // compute pairwise distance sum for refDataset
                    Wref_kb[k - 1][c] = ComputeWk(refClusterData, refClusters);
                }

                // compute gap statistic
                double l_avg = Wref_kb[k - 1].Select(x => Log(x)).Average();
                Gap[k - 1] = l_avg - Log(Wk[k - 1]);
                sd[k - 1]  = Sqrt(Wref_kb[k - 1].Select(x => (Log(x) - l_avg) * (Log(x) - l_avg)).Average());

                // decide optimal k
                if (k > 1 && Gap[k - 2] >= Gap[k - 1] - sd[k - 1])
                {
                    ClusterCollection           = clusterCollections[k - 2];
                    NumberOfClustersLastChanged = DateTime.Now;
                    return;
                }
            }
        }
        public async Task LearnFromDates(DateTime from, DateTime to)
        {
            int maxPickups = await GetMaxNumberOfPickups();

            // train clustering algorithm
            await _locationClustering.RetrainAsync(from, to);

            // initialize storage arrays
            int pickupElementSize = await GetMaxNumberOfPickups() + 1;

            InitStorageArray(ref clusterFareClassRegressions, pickupElementSize - 1, NumberOfFareClassIntervals - 1);
            InitStorageArray(ref clusterFareClassInputDensityKernels, pickupElementSize - 1, NumberOfFareClassIntervals);
            InitStorageArray(ref clusterFareClassDistributionsUnivariate, pickupElementSize - 1, NumberOfFareClassIntervals);
            InitStorageArray(ref clusterPickupFrequencies, pickupElementSize);
            InitStorageArray(ref clusterPickupInputDensityKernels, pickupElementSize);
            InitStorageArray(ref clusterPickupInputDensityKernelsUnivariate, pickupElementSize);

            // for each cluster
            for (int i = 0; i < _locationClustering.NumberOfClusters; i++)
            {
                // obtain data set
                IEnumerable <Task <Pair <Leg, bool> > > decisionTasks = (await _legRepository.ListAsync())
                                                                        .Where(leg => leg.StartTime.CompareTo(from) > 0 && leg.StartTime.CompareTo(to) < 0)
                                                                        .Select(async(leg) =>
                {
                    LegCoordinates coords = await _geocodingDbSync.GetLegCoordinatesAsync(leg.LegID);
                    double[] dp           = (new decimal[] { coords.StartLatitude, coords.StartLongitude, coords.DestLatitude, coords.DestLongitude })
                                            .Select(Convert.ToDouble).ToArray();
                    return(new Pair <Leg, bool>(leg, _locationClustering.ClusterCollection.Decide(dp) == i));
                });
                Pair <Leg, bool>[] decisions = await Task.WhenAll(decisionTasks);

                // Data input values (pickup delay, travel time) in this cluster
                IEnumerable <Leg> dataLegs = decisions.Where(pair => pair.Second).Select(pair => pair.First);
                double[][]        dataset  = dataLegs
                                             .Select(leg => new double[]
                {
                    leg.PickupRequestTime.HasValue
                        ? leg.StartTime.Subtract(leg.PickupRequestTime.Value).TotalMinutes
                        : 0,

                    leg.ArrivalTime.Subtract(leg.StartTime).TotalMinutes
                }).ToArray();
                // Fare classes in this cluster
                int[] fareClasses = dataLegs
                                    .Select(leg =>
                {
                    for (int j = 0; j < FareClassIntervals.Count(); j++)
                    {
                        if (j < FareClassIntervals.Count() &&
                            Convert.ToDecimal(FareClassIntervals.ElementAt(j)) > leg.Fare)
                        {
                            return(j);
                        }
                    }
                    return(FareClassIntervals.Count());
                }).ToArray();
                // Pickup numbers in this cluster
                int[] pickupNumbers = dataLegs.Select(leg => leg.NumOfPassengersPickedUp).ToArray();


                // for each possible number of pickups
                for (int n = 1; n <= maxPickups; n++)
                {
                    double[][] dataSubset        = dataset.Where((dp, k) => pickupNumbers[k] == n).ToArray();
                    int[]      fareClassesSubset = fareClasses.Where((fc, k) => pickupNumbers[k] == n).ToArray();

                    if (dataSubset.Length == 0)
                    {
                        throw new ApplicationException("Insufficient data to make a reliable prediction");
                    }

                    // for each fare class interval boundary
                    for (int j = 0; j < NumberOfFareClassIntervals; j++)
                    {
                        // train logistic regression
                        if (j > 0 && clusterFareClassRegressions[i][n - 1][j - 1] == null)
                        {
                            clusterFareClassRegressions[i][n - 1][j - 1] = _logisticRegressionAnalysis
                                                                           .Learn(dataSubset, fareClassesSubset.Select(fc => fc >= j ? 1.0 : 0.0).ToArray());
                        }

                        // train empirical density functions
                        if (fareClassesSubset.Count(fc => fc >= j) > 0.0)
                        {
                            double[][] dataSubsetSamples = dataSubset.Where((dp, k)
                                                                            => fareClassesSubset[k] >= j).ToArray();
                            MultivariateEmpiricalDistribution fareClassInputDistribution
                                = new MultivariateEmpiricalDistribution(dataSubsetSamples);

                            SetInputDistribution(fareClassInputDistribution, dataSubsetSamples,
                                                 out clusterFareClassInputDensityKernels[i][n - 1][j],
                                                 out clusterFareClassDistributionsUnivariate[i][n - 1][j]);
                        }
                    }
                }

                // compute pickup frequencies
                for (int l = 0; l < pickupElementSize; l++)
                {
                    clusterPickupFrequencies[i][l] = Convert.ToDouble(dataLegs.Count(leg => leg.NumOfPassengersPickedUp == l))
                                                     / to.Subtract(from).TotalMinutes;

                    if (pickupNumbers.Any(pn => pn == l))
                    {
                        double[][] samples = dataset.Where((dp, k) => pickupNumbers[k] == l).ToArray();
                        MultivariateEmpiricalDistribution pickupInputDistribution
                            = new MultivariateEmpiricalDistribution(samples);

                        SetInputDistribution(pickupInputDistribution, samples,
                                             out clusterPickupInputDensityKernels[i][l],
                                             out clusterPickupInputDensityKernelsUnivariate[i][l]);
                    }
                }
            }
        }