public static async Task UpdateNuGetSearchIndexAsync(
            this ISearchServiceClient client,
            SearchScorerSettings settings,
            Index index,
            double packageIdWeight,
            double tokenizedPackageIdWeight,
            double tagsWeight,
            double downloadScoreBoost)
        {
            Console.WriteLine($"Updating Azure Search service '{settings.AzureSearchServiceName}', index '{settings.AzureSearchIndexName}'");

            Console.WriteLine($"Package ID weight: {packageIdWeight}");
            Console.WriteLine($"Tokenized package ID weight: {tokenizedPackageIdWeight}");
            Console.WriteLine($"Tags weight: {tagsWeight}");
            Console.WriteLine($"Download score boost: {downloadScoreBoost}");

            index.EnsureValidNuGetSearchIndex(settings);

            var indexFieldWeights     = index.ScoringProfiles[0].TextWeights.Weights;
            var downloadScoreFunction = index.ScoringProfiles[0].Functions[0];

            indexFieldWeights.Clear();
            indexFieldWeights[PackageIdFieldName]          = packageIdWeight;
            indexFieldWeights[TokenizedPackageIdFieldName] = tokenizedPackageIdWeight;
            indexFieldWeights[TagsFieldName] = tagsWeight;

            downloadScoreFunction.Boost = downloadScoreBoost;

            await client.Indexes.CreateOrUpdateAsync(index);

            Console.WriteLine($"Updated Azure Search service '{settings.AzureSearchServiceName}', index '{settings.AzureSearchIndexName}'");
        }
Beispiel #2
0
        private async Task <SearchQueriesReport <CuratedSearchQuery> > GetCuratedSearchQueriesScoreAsync(
            string baseUrl,
            SearchScorerSettings settings,
            IReadOnlyDictionary <string, int> topQueries,
            IReadOnlyDictionary <string, int> topSearchReferrals)
        {
            var minQueryCount      = topQueries.Min(x => x.Value);
            var adjustedTopQueries = topQueries.ToDictionary(
                x => x.Key,
                x =>
            {
                if (topSearchReferrals.TryGetValue(x.Key, out var referrals))
                {
                    return(Math.Max(x.Value - referrals, minQueryCount));
                }

                return(x.Value);
            });

            var scores = RelevancyScoreBuilder.FromCuratedSearchQueriesCsv(settings);

            var results = await ProcessAsync(
                scores,
                baseUrl);

            return(WeightByTopQueries(adjustedTopQueries, results));
        }
Beispiel #3
0
        private async Task <SearchQueriesReport <FeedbackSearchQuery> > GetFeedbackSearchQueriesScoreAsync(
            string baseUrl,
            SearchScorerSettings settings)
        {
            var scores = RelevancyScoreBuilder.FromFeedbackSearchQueriesCsv(settings);

            var results = await ProcessAsync(
                scores,
                baseUrl);

            return(WeightEvently(results));
        }
Beispiel #4
0
        public async Task RunAsync(SearchScorerSettings settings)
        {
            var report = await GetReportAsync(settings);

            ConsoleUtility.WriteHeading("Curated Search Queries", '=');
            WriteBiggestWinnersAndLosersToConsole(report, v => v.CuratedSearchQueries);

            ConsoleUtility.WriteHeading("Client Curated Search Queries", '=');
            WriteBiggestWinnersAndLosersToConsole(report, v => v.ClientCuratedSearchQueries);

            ConsoleUtility.WriteHeading("Feedback", '=');
            WriteBiggestWinnersAndLosersToConsole(report, v => v.FeedbackSearchQueries);
        }
Beispiel #5
0
        private async Task <SearchQueriesReport <CuratedSearchQuery> > GetClientCuratedSearchQueriesScoreAsync(
            string baseUrl,
            SearchScorerSettings settings,
            IReadOnlyDictionary <string, int> topClientQueries)
        {
            var scores = RelevancyScoreBuilder.FromClientCuratedSearchQueriesCsv(settings);

            var results = await ProcessAsync(
                scores,
                baseUrl);

            return(WeightByTopQueries(topClientQueries, results));
        }
Beispiel #6
0
        public async Task <VariantReport> GetCustomVariantReportAsync(
            SearchScorerSettings settings,
            string customVariantUrl)
        {
            var topQueries         = TopSearchQueriesCsvReader.Read(settings.TopSearchQueriesCsvPath);
            var topClientQueries   = TopClientSearchQueriesCsvReader.Read(settings.TopClientSearchQueriesCsvPath);
            var topSearchReferrals = GoogleAnalyticsSearchReferralsCsvReader.Read(settings.GoogleAnalyticsSearchReferralsCsvPath);

            return(await GetVariantReport(
                       customVariantUrl,
                       settings,
                       topQueries,
                       topClientQueries,
                       topSearchReferrals));
        }
        private static void EnsureValidNuGetSearchIndex(this Index index, SearchScorerSettings settings)
        {
            if (index.ScoringProfiles.Count != 1 || index.ScoringProfiles[0].Name != ScoringProfileName)
            {
                throw new InvalidOperationException(
                          $"Azure Search index '{settings.AzureSearchIndexName}' should have one scoring profile named '{ScoringProfileName}'");
            }

            var scoringProfile = index.ScoringProfiles[0];

            if (scoringProfile.Functions.Count != 1 || scoringProfile.Functions[0].FieldName != DownloadScoreBoostName)
            {
                throw new InvalidOperationException(
                          $"Azure Search index '{settings.AzureSearchIndexName}' should have one scoring function on the '{DownloadScoreBoostName}' field");
            }
        }
        public async Task <bool> DoesPackageIdExistAsync(
            string packageIdPattern,
            SearchScorerSettings settings)
        {
            Task <bool> controlExistsTask;
            Task <bool> treatmentExistsTask;

            if (PackageIdValidator.IsValidPackageId(packageIdPattern))
            {
                var query = $"packageid:{packageIdPattern}";
                controlExistsTask   = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.ControlBaseUrl, query, take: 1);
                treatmentExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.TreatmentBaseUrl, query, take: 1);
            }
            else if (packageIdPattern.EndsWith("*"))
            {
                var prefix = packageIdPattern.Substring(0, packageIdPattern.Length - 1).TrimEnd(Separators);
                if (!PackageIdValidator.IsValidPackageId(prefix))
                {
                    throw new ArgumentException($"The package ID '{packageIdPattern}' looks like a pattern but the part before the wildcard is not a valid package ID.");
                }

                var pieces = prefix
                             .Split(Separators)
                             .Where(x => !string.IsNullOrWhiteSpace(x));
                var query = string.Join(" ", pieces);
                controlExistsTask   = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.ControlBaseUrl, query, take: 1000);
                treatmentExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.TreatmentBaseUrl, query, take: 1000);
            }
            else
            {
                throw new NotSupportedException();
            }

            await Task.WhenAll(controlExistsTask, treatmentExistsTask);

            if (controlExistsTask.Result != treatmentExistsTask.Result)
            {
                throw new ArgumentNullException(
                          $"The package ID '{packageIdPattern}' has inconsistent availability. " +
                          $"Exists in control: {controlExistsTask.Result}. " +
                          $"Exists in treatment: {treatmentExistsTask.Result}.");
            }

            return(controlExistsTask.Result);
        }
Beispiel #9
0
        private async Task <VariantReport> GetVariantReport(
            string baseUrl,
            SearchScorerSettings settings,
            IReadOnlyDictionary <string, int> topQueries,
            IReadOnlyDictionary <string, int> topClientQueries,
            IReadOnlyDictionary <string, int> topSearchReferrals)
        {
            var curatedSearchQueriesReport = await GetCuratedSearchQueriesScoreAsync(baseUrl, settings, topQueries, topSearchReferrals);

            var clientCuratedSearchQueriesReport = await GetClientCuratedSearchQueriesScoreAsync(baseUrl, settings, topClientQueries);

            var feedbackSearchQueriesReport = await GetFeedbackSearchQueriesScoreAsync(baseUrl, settings);

            return(new VariantReport(
                       curatedSearchQueriesReport,
                       clientCuratedSearchQueriesReport,
                       feedbackSearchQueriesReport));
        }
Beispiel #10
0
        private async Task <SearchQueriesReport <SearchQueryWithSelections> > GetTopSearchSelectionsScoreAsync(
            string baseUrl,
            SearchScorerSettings settings,
            IReadOnlyDictionary <string, int> topQueries)
        {
            var topSearchSelectionScores = RelevancyScoreBuilder.FromTopSearchSelectionsCsv(settings.TopSearchSelectionsCsvPath);

            // Take the the top search selection data by query frequency.
            var selectionsOfTopQueries = topSearchSelectionScores
                                         .Where(x => topQueries.ContainsKey(x.SearchQuery))
                                         .OrderByDescending(x => topQueries[x.SearchQuery])
                                         .Take(1000);

            var results = await ProcessAsync(
                selectionsOfTopQueries,
                baseUrl);

            return(WeightByTopQueries(topQueries, results));
        }
Beispiel #11
0
        private async Task <RelevancyReport> GetReportAsync(SearchScorerSettings settings)
        {
            var topQueries         = TopSearchQueriesCsvReader.Read(settings.TopSearchQueriesCsvPath);
            var topClientQueries   = TopClientSearchQueriesCsvReader.Read(settings.TopClientSearchQueriesCsvPath);
            var topSearchReferrals = GoogleAnalyticsSearchReferralsCsvReader.Read(settings.GoogleAnalyticsSearchReferralsCsvPath);

            var controlReport = await GetVariantReport(
                settings.ControlBaseUrl,
                settings,
                topQueries,
                topClientQueries,
                topSearchReferrals);

            var treatmentReport = await GetVariantReport(
                settings.TreatmentBaseUrl,
                settings,
                topQueries,
                topClientQueries,
                topSearchReferrals);

            return(new RelevancyReport(
                       controlReport,
                       treatmentReport));
        }
        public async Task <List <string> > GetNonExistentPackageIdsAsync(IEnumerable <string> packageIds, SearchScorerSettings settings)
        {
            var distinct = packageIds.Distinct(StringComparer.OrdinalIgnoreCase);
            var work     = new ConcurrentBag <string>(distinct);
            var output   = new ConcurrentBag <string>();

            var workers = Enumerable
                          .Range(0, 16)
                          .Select(async workerId =>
            {
                while (work.TryTake(out var packageIdPattern))
                {
                    var exists = await DoesPackageIdExistAsync(packageIdPattern, settings);
                    Console.Write(".");
                    if (!exists)
                    {
                        output.Add(packageIdPattern);
                    }
                }
            })
                          .ToList();

            await Task.WhenAll(workers);

            return(output
                   .OrderBy(x => x, StringComparer.OrdinalIgnoreCase)
                   .ToList());
        }
        public static IReadOnlyList <SearchQueryRelevancyScores <FeedbackSearchQuery> > FromFeedbackSearchQueriesCsv(SearchScorerSettings settings)
        {
            var output = new List <SearchQueryRelevancyScores <FeedbackSearchQuery> >();
            var feedbackSearchQueries = FeedbackSearchQueriesCsvReader.Read(settings.FeedbackSearchQueriesCsvPath);

            foreach (var feedback in feedbackSearchQueries)
            {
                // Give expected package IDs the maximum relevancy score.
                var scores = feedback
                             .MostRelevantPackageIds
                             .ToDictionary(x => x, x => MaximumRelevancyScore, StringComparer.OrdinalIgnoreCase);;

                output.Add(new SearchQueryRelevancyScores <FeedbackSearchQuery>(
                               feedback.SearchQuery,
                               scores,
                               feedback));
            }

            return(output);
        }
        public static IReadOnlyList <SearchQueryRelevancyScores <CuratedSearchQuery> > FromClientCuratedSearchQueriesCsv(SearchScorerSettings settings)
        {
            var queries = CuratedSearchQueriesCsvReader.Read(
                settings.ClientCuratedSearchQueriesCsvPath,
                settings.CuratedSearchQueriesCsvPath);

            return(FromCuratedSearchQueries(queries));
        }
        public static async Task <Index> GetNuGetSearchIndexAsync(this ISearchServiceClient client, SearchScorerSettings settings)
        {
            var index = await client.Indexes.GetAsync(settings.AzureSearchIndexName);

            index.EnsureValidNuGetSearchIndex(settings);

            return(index);
        }