public SortedDictionary <string, long> Compare( DownloadData oldData, DownloadData newData) { if (newData.Count == 0) { throw new InvalidOperationException("The new data should not be empty."); } var stopwatch = Stopwatch.StartNew(); // We use a very simplistic algorithm here. Find the union of both ID sets and compare each download count. var uniqueIds = new HashSet <string>( oldData.Keys.Concat(newData.Keys), StringComparer.OrdinalIgnoreCase); _logger.LogInformation( "There are {OldCount} IDs in the old data, {NewCount} IDs in the new data, and {TotalCount} IDs in total.", oldData.Count, newData.Count, uniqueIds.Count); var result = new SortedDictionary <string, long>(StringComparer.OrdinalIgnoreCase); var decreaseCount = 0; foreach (var id in uniqueIds) { // Detect download count decreases and emit a metric. This is not necessarily wrong because there have // been times that we manually delete spoofed download counts. DetectDownloadCountDecreases(oldData, newData, id, ref decreaseCount); var oldCount = oldData.GetDownloadCount(id); var newCount = newData.GetDownloadCount(id); if (oldCount != newCount) { result.Add(id, newCount); } } _logger.LogInformation("There are {Count} package IDs with download count changes.", result.Count); _logger.LogInformation("There are {Count} package versions with download count decreases.", decreaseCount); if (decreaseCount > _options.Value.MaxDownloadCountDecreases) { throw new InvalidOperationException("Too many download count decreases are occurring."); } stopwatch.Stop(); _telemetryService.TrackDownloadSetComparison(oldData.Count, newData.Count, result.Count, stopwatch.Elapsed); return(result); }
private void CleanDownloadData(DownloadData data) { var invalidIdCount = 0; var invalidVersionCount = 0; var nonNormalizedVersionCount = 0; foreach (var id in data.Keys.ToList()) { var isValidId = id.Length <= PackageIdValidator.MaxPackageIdLength && PackageIdValidator.IsValidPackageIdWithTimeout(id); if (!isValidId) { invalidIdCount++; } foreach (var version in data[id].Keys.ToList()) { var isValidVersion = NuGetVersion.TryParse(version, out var parsedVersion); if (!isValidVersion) { invalidVersionCount++; } if (!isValidId || !isValidVersion) { // Clear the download count if the ID or version is invalid. data.SetDownloadCount(id, version, 0); continue; } var normalizedVersion = parsedVersion.ToNormalizedString(); var isNormalizedVersion = StringComparer.OrdinalIgnoreCase.Equals(version, normalizedVersion); if (!isNormalizedVersion) { nonNormalizedVersionCount++; // Use the normalized version string if the original was not normalized. var downloads = data.GetDownloadCount(id, version); data.SetDownloadCount(id, version, 0); data.SetDownloadCount(id, normalizedVersion, downloads); } } } _logger.LogInformation( "There were {InvalidIdCount} invalid IDs, {InvalidVersionCount} invalid versions, and " + "{NonNormalizedVersionCount} non-normalized IDs.", invalidIdCount, invalidVersionCount, nonNormalizedVersionCount); }
private long TransferPackageDownloads( string packageId, PopularityTransferData outgoingTransfers, SortedDictionary <string, SortedSet <string> > incomingTransfers, DownloadData downloads) { var originalDownloads = downloads.GetDownloadCount(packageId); var transferPercentage = _options.Value.Scoring.PopularityTransfer; // Calculate packages with outgoing transfers first. These packages transfer a percentage // or their downloads equally to a set of "incoming" packages. Packages with both outgoing // and incoming transfers "reject" the incoming transfers. if (outgoingTransfers.ContainsKey(packageId)) { var keepPercentage = 1 - transferPercentage; return((long)(originalDownloads * keepPercentage)); } // Next, calculate packages with incoming transfers. These packages receive downloads // from one or more "outgoing" packages. if (incomingTransfers.TryGetValue(packageId, out var incomingTransferIds)) { var result = originalDownloads; foreach (var incomingTransferId in incomingTransferIds) { var incomingDownloads = downloads.GetDownloadCount(incomingTransferId); var incomingSplit = outgoingTransfers[incomingTransferId].Count; result += (long)(incomingDownloads * transferPercentage / incomingSplit); } return(result); } // The package has no outgoing or incoming transfers. Return its downloads unchanged. return(originalDownloads); }
public SortedDictionary <string, long> UpdateDownloadTransfers( DownloadData downloads, SortedDictionary <string, long> downloadChanges, PopularityTransferData oldTransfers, PopularityTransferData newTransfers) { Guard.Assert( downloadChanges.Comparer == StringComparer.OrdinalIgnoreCase, $"Download changes should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}"); Guard.Assert( downloadChanges.All(x => downloads.GetDownloadCount(x.Key) == x.Value), "The download changes should match the latest downloads"); // Downloads are transferred from a "from" package to one or more "to" packages. // The "oldTransfers" and "newTransfers" maps "from" packages to their corresponding "to" packages. // The "incomingTransfers" maps "to" packages to their corresponding "from" packages. var incomingTransfers = GetIncomingTransfers(newTransfers); _logger.LogInformation("Detecting changes in popularity transfers."); var transferChanges = _dataComparer.ComparePopularityTransfers(oldTransfers, newTransfers); _logger.LogInformation("{Count} popularity transfers have changed.", transferChanges.Count); // Get the transfer changes for packages affected by the download and transfer changes. var affectedPackages = GetPackagesAffectedByChanges( oldTransfers, newTransfers, incomingTransfers, transferChanges, downloadChanges); return(ApplyDownloadTransfers( downloads, newTransfers, incomingTransfers, affectedPackages)); }