public async Task ComputeCompanyStatistics() { numOfDrivers = await _driverRepository.CountAsync(); IEnumerable <Leg> legs = await _legRepository.ListAsync(); pickups = legs.Select(leg => leg.NumOfPassengersPickedUp).Sum(); milesDriven = legs.Select(leg => leg.Distance).Sum(); if (await _legRepository.CountAsync() > 0) { averagePickupDelay = legs.Select(leg => leg.StartTime.Subtract(leg.PickupRequestTime.GetValueOrDefault(leg.StartTime)).TotalMinutes).Average(); } totalFares = legs.Select(leg => leg.Fare * leg.NumOfPassengersAboard).Sum(); totalCosts = legs.Select(leg => leg.GetTotalFuelCost()).Sum(); netProfit = totalFares - totalCosts; }
public async Task <IEnumerable <Leg> > Get() { return(await _legRepository.ListAsync()); }
/// <summary> /// Train on number of clusters using gap statistic /// </summary> private async Task ComputeK(int maxK = 100, int B = 10, int driverID = 0, DateTime?startDate = null, DateTime?endDate = null) { double[] Wk = new double[maxK]; double[][] Wref_kb = new double[maxK][]; double[] Gap = new double[maxK]; double[] sd = new double[maxK]; KMeansClusterCollection[] clusterCollections = new KMeansClusterCollection[maxK]; // obtain dataset IEnumerable <Leg> legs = driverID == 0 ? await _legRepository.ListAsync() : await _legRepository.ListForDriverAsync(driverID); if (startDate == null) { startDate = DateTime.MinValue; } if (endDate == null) { endDate = DateTime.MaxValue; } legs = legs.Where(leg => leg.StartTime.CompareTo(startDate) >= 0 && leg.StartTime.CompareTo(endDate) < 0); double[][] dataset = GetDataset(legs); // first cluster the dataset varying K for (int k = 1; k <= maxK; k++) { KMeans kMeans = new KMeans(k) { // distance function for geographic coordinates Distance = new GeographicDistance() }; clusterCollections[k - 1] = kMeans.Learn(dataset); double[][][] clusterData = ClusterPoints(dataset, k, clusterCollections[k - 1]); // sum of pairwise distances Wk[k - 1] = ComputeWk(clusterData, clusterCollections[k - 1]); } // then generate the reference data sets double[] lowerBounds = new double[4]; double[] boxDimensions = new double[4]; for (int i = 0; i < 4; i++) { lowerBounds[i] = dataset.Select(l => l[i]).Min(); boxDimensions[i] = dataset.Select(l => l[i]).Max() - lowerBounds[i]; } CorrectLongitudeBounds(lowerBounds, boxDimensions, 1); CorrectLongitudeBounds(lowerBounds, boxDimensions, 3); Random random = new Random(); for (int k = 1; k <= maxK; k++) { Wref_kb[k - 1] = new double[B]; for (int c = 0; c < B; c++) { double[][] refDataset = new double[dataset.Length][]; for (int i = 0; i < refDataset.Length; i++) { double[] dataPoint = new double[4]; for (int j = 0; j < 4; j++) { dataPoint[j] = random.NextDouble() * boxDimensions[j] + lowerBounds[j]; if ((j == 1 || j == 3) && dataPoint[j] > 180) { dataPoint[j] -= 360; } } refDataset[i] = dataPoint; } // cluster reference dataset KMeans refKmeans = new KMeans(k); refKmeans.Distance = new GeographicDistance(); KMeansClusterCollection refClusters = refKmeans.Learn(refDataset); // points in each cluster double[][][] refClusterData = ClusterPoints(refDataset, k, refClusters); // compute pairwise distance sum for refDataset Wref_kb[k - 1][c] = ComputeWk(refClusterData, refClusters); } // compute gap statistic double l_avg = Wref_kb[k - 1].Select(x => Log(x)).Average(); Gap[k - 1] = l_avg - Log(Wk[k - 1]); sd[k - 1] = Sqrt(Wref_kb[k - 1].Select(x => (Log(x) - l_avg) * (Log(x) - l_avg)).Average()); // decide optimal k if (k > 1 && Gap[k - 2] >= Gap[k - 1] - sd[k - 1]) { ClusterCollection = clusterCollections[k - 2]; NumberOfClustersLastChanged = DateTime.Now; return; } } }
public async Task LearnFromDates(DateTime from, DateTime to) { int maxPickups = await GetMaxNumberOfPickups(); // train clustering algorithm await _locationClustering.RetrainAsync(from, to); // initialize storage arrays int pickupElementSize = await GetMaxNumberOfPickups() + 1; InitStorageArray(ref clusterFareClassRegressions, pickupElementSize - 1, NumberOfFareClassIntervals - 1); InitStorageArray(ref clusterFareClassInputDensityKernels, pickupElementSize - 1, NumberOfFareClassIntervals); InitStorageArray(ref clusterFareClassDistributionsUnivariate, pickupElementSize - 1, NumberOfFareClassIntervals); InitStorageArray(ref clusterPickupFrequencies, pickupElementSize); InitStorageArray(ref clusterPickupInputDensityKernels, pickupElementSize); InitStorageArray(ref clusterPickupInputDensityKernelsUnivariate, pickupElementSize); // for each cluster for (int i = 0; i < _locationClustering.NumberOfClusters; i++) { // obtain data set IEnumerable <Task <Pair <Leg, bool> > > decisionTasks = (await _legRepository.ListAsync()) .Where(leg => leg.StartTime.CompareTo(from) > 0 && leg.StartTime.CompareTo(to) < 0) .Select(async(leg) => { LegCoordinates coords = await _geocodingDbSync.GetLegCoordinatesAsync(leg.LegID); double[] dp = (new decimal[] { coords.StartLatitude, coords.StartLongitude, coords.DestLatitude, coords.DestLongitude }) .Select(Convert.ToDouble).ToArray(); return(new Pair <Leg, bool>(leg, _locationClustering.ClusterCollection.Decide(dp) == i)); }); Pair <Leg, bool>[] decisions = await Task.WhenAll(decisionTasks); // Data input values (pickup delay, travel time) in this cluster IEnumerable <Leg> dataLegs = decisions.Where(pair => pair.Second).Select(pair => pair.First); double[][] dataset = dataLegs .Select(leg => new double[] { leg.PickupRequestTime.HasValue ? leg.StartTime.Subtract(leg.PickupRequestTime.Value).TotalMinutes : 0, leg.ArrivalTime.Subtract(leg.StartTime).TotalMinutes }).ToArray(); // Fare classes in this cluster int[] fareClasses = dataLegs .Select(leg => { for (int j = 0; j < FareClassIntervals.Count(); j++) { if (j < FareClassIntervals.Count() && Convert.ToDecimal(FareClassIntervals.ElementAt(j)) > leg.Fare) { return(j); } } return(FareClassIntervals.Count()); }).ToArray(); // Pickup numbers in this cluster int[] pickupNumbers = dataLegs.Select(leg => leg.NumOfPassengersPickedUp).ToArray(); // for each possible number of pickups for (int n = 1; n <= maxPickups; n++) { double[][] dataSubset = dataset.Where((dp, k) => pickupNumbers[k] == n).ToArray(); int[] fareClassesSubset = fareClasses.Where((fc, k) => pickupNumbers[k] == n).ToArray(); if (dataSubset.Length == 0) { throw new ApplicationException("Insufficient data to make a reliable prediction"); } // for each fare class interval boundary for (int j = 0; j < NumberOfFareClassIntervals; j++) { // train logistic regression if (j > 0 && clusterFareClassRegressions[i][n - 1][j - 1] == null) { clusterFareClassRegressions[i][n - 1][j - 1] = _logisticRegressionAnalysis .Learn(dataSubset, fareClassesSubset.Select(fc => fc >= j ? 1.0 : 0.0).ToArray()); } // train empirical density functions if (fareClassesSubset.Count(fc => fc >= j) > 0.0) { double[][] dataSubsetSamples = dataSubset.Where((dp, k) => fareClassesSubset[k] >= j).ToArray(); MultivariateEmpiricalDistribution fareClassInputDistribution = new MultivariateEmpiricalDistribution(dataSubsetSamples); SetInputDistribution(fareClassInputDistribution, dataSubsetSamples, out clusterFareClassInputDensityKernels[i][n - 1][j], out clusterFareClassDistributionsUnivariate[i][n - 1][j]); } } } // compute pickup frequencies for (int l = 0; l < pickupElementSize; l++) { clusterPickupFrequencies[i][l] = Convert.ToDouble(dataLegs.Count(leg => leg.NumOfPassengersPickedUp == l)) / to.Subtract(from).TotalMinutes; if (pickupNumbers.Any(pn => pn == l)) { double[][] samples = dataset.Where((dp, k) => pickupNumbers[k] == l).ToArray(); MultivariateEmpiricalDistribution pickupInputDistribution = new MultivariateEmpiricalDistribution(samples); SetInputDistribution(pickupInputDistribution, samples, out clusterPickupInputDensityKernels[i][l], out clusterPickupInputDensityKernelsUnivariate[i][l]); } } } }