Beispiel #1
0
        public SortedDictionary <string, long> Compare(
            DownloadData oldData,
            DownloadData newData)
        {
            if (newData.Count == 0)
            {
                throw new InvalidOperationException("The new data should not be empty.");
            }

            var stopwatch = Stopwatch.StartNew();

            // We use a very simplistic algorithm here. Find the union of both ID sets and compare each download count.
            var uniqueIds = new HashSet <string>(
                oldData.Keys.Concat(newData.Keys),
                StringComparer.OrdinalIgnoreCase);

            _logger.LogInformation(
                "There are {OldCount} IDs in the old data, {NewCount} IDs in the new data, and {TotalCount} IDs in total.",
                oldData.Count,
                newData.Count,
                uniqueIds.Count);

            var result        = new SortedDictionary <string, long>(StringComparer.OrdinalIgnoreCase);
            var decreaseCount = 0;

            foreach (var id in uniqueIds)
            {
                // Detect download count decreases and emit a metric. This is not necessarily wrong because there have
                // been times that we manually delete spoofed download counts.
                DetectDownloadCountDecreases(oldData, newData, id, ref decreaseCount);

                var oldCount = oldData.GetDownloadCount(id);
                var newCount = newData.GetDownloadCount(id);
                if (oldCount != newCount)
                {
                    result.Add(id, newCount);
                }
            }

            _logger.LogInformation("There are {Count} package IDs with download count changes.", result.Count);
            _logger.LogInformation("There are {Count} package versions with download count decreases.", decreaseCount);

            if (decreaseCount > _options.Value.MaxDownloadCountDecreases)
            {
                throw new InvalidOperationException("Too many download count decreases are occurring.");
            }

            stopwatch.Stop();
            _telemetryService.TrackDownloadSetComparison(oldData.Count, newData.Count, result.Count, stopwatch.Elapsed);

            return(result);
        }
Beispiel #2
0
        private void CleanDownloadData(DownloadData data)
        {
            var invalidIdCount            = 0;
            var invalidVersionCount       = 0;
            var nonNormalizedVersionCount = 0;

            foreach (var id in data.Keys.ToList())
            {
                var isValidId = id.Length <= PackageIdValidator.MaxPackageIdLength &&
                                PackageIdValidator.IsValidPackageIdWithTimeout(id);
                if (!isValidId)
                {
                    invalidIdCount++;
                }

                foreach (var version in data[id].Keys.ToList())
                {
                    var isValidVersion = NuGetVersion.TryParse(version, out var parsedVersion);
                    if (!isValidVersion)
                    {
                        invalidVersionCount++;
                    }

                    if (!isValidId || !isValidVersion)
                    {
                        // Clear the download count if the ID or version is invalid.
                        data.SetDownloadCount(id, version, 0);
                        continue;
                    }

                    var normalizedVersion   = parsedVersion.ToNormalizedString();
                    var isNormalizedVersion = StringComparer.OrdinalIgnoreCase.Equals(version, normalizedVersion);

                    if (!isNormalizedVersion)
                    {
                        nonNormalizedVersionCount++;

                        // Use the normalized version string if the original was not normalized.
                        var downloads = data.GetDownloadCount(id, version);
                        data.SetDownloadCount(id, version, 0);
                        data.SetDownloadCount(id, normalizedVersion, downloads);
                    }
                }
            }

            _logger.LogInformation(
                "There were {InvalidIdCount} invalid IDs, {InvalidVersionCount} invalid versions, and " +
                "{NonNormalizedVersionCount} non-normalized IDs.",
                invalidIdCount,
                invalidVersionCount,
                nonNormalizedVersionCount);
        }
        private long TransferPackageDownloads(
            string packageId,
            PopularityTransferData outgoingTransfers,
            SortedDictionary <string, SortedSet <string> > incomingTransfers,
            DownloadData downloads)
        {
            var originalDownloads  = downloads.GetDownloadCount(packageId);
            var transferPercentage = _options.Value.Scoring.PopularityTransfer;

            // Calculate packages with outgoing transfers first. These packages transfer a percentage
            // or their downloads equally to a set of "incoming" packages. Packages with both outgoing
            // and incoming transfers "reject" the incoming transfers.
            if (outgoingTransfers.ContainsKey(packageId))
            {
                var keepPercentage = 1 - transferPercentage;

                return((long)(originalDownloads * keepPercentage));
            }

            // Next, calculate packages with incoming transfers. These packages receive downloads
            // from one or more "outgoing" packages.
            if (incomingTransfers.TryGetValue(packageId, out var incomingTransferIds))
            {
                var result = originalDownloads;

                foreach (var incomingTransferId in incomingTransferIds)
                {
                    var incomingDownloads = downloads.GetDownloadCount(incomingTransferId);
                    var incomingSplit     = outgoingTransfers[incomingTransferId].Count;

                    result += (long)(incomingDownloads * transferPercentage / incomingSplit);
                }

                return(result);
            }

            // The package has no outgoing or incoming transfers. Return its downloads unchanged.
            return(originalDownloads);
        }
        public SortedDictionary <string, long> UpdateDownloadTransfers(
            DownloadData downloads,
            SortedDictionary <string, long> downloadChanges,
            PopularityTransferData oldTransfers,
            PopularityTransferData newTransfers)
        {
            Guard.Assert(
                downloadChanges.Comparer == StringComparer.OrdinalIgnoreCase,
                $"Download changes should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

            Guard.Assert(
                downloadChanges.All(x => downloads.GetDownloadCount(x.Key) == x.Value),
                "The download changes should match the latest downloads");

            // Downloads are transferred from a "from" package to one or more "to" packages.
            // The "oldTransfers" and "newTransfers" maps "from" packages to their corresponding "to" packages.
            // The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
            var incomingTransfers = GetIncomingTransfers(newTransfers);

            _logger.LogInformation("Detecting changes in popularity transfers.");
            var transferChanges = _dataComparer.ComparePopularityTransfers(oldTransfers, newTransfers);

            _logger.LogInformation("{Count} popularity transfers have changed.", transferChanges.Count);

            // Get the transfer changes for packages affected by the download and transfer changes.
            var affectedPackages = GetPackagesAffectedByChanges(
                oldTransfers,
                newTransfers,
                incomingTransfers,
                transferChanges,
                downloadChanges);

            return(ApplyDownloadTransfers(
                       downloads,
                       newTransfers,
                       incomingTransfers,
                       affectedPackages));
        }