public static async Task UpdateNuGetSearchIndexAsync( this ISearchServiceClient client, SearchScorerSettings settings, Index index, double packageIdWeight, double tokenizedPackageIdWeight, double tagsWeight, double downloadScoreBoost) { Console.WriteLine($"Updating Azure Search service '{settings.AzureSearchServiceName}', index '{settings.AzureSearchIndexName}'"); Console.WriteLine($"Package ID weight: {packageIdWeight}"); Console.WriteLine($"Tokenized package ID weight: {tokenizedPackageIdWeight}"); Console.WriteLine($"Tags weight: {tagsWeight}"); Console.WriteLine($"Download score boost: {downloadScoreBoost}"); index.EnsureValidNuGetSearchIndex(settings); var indexFieldWeights = index.ScoringProfiles[0].TextWeights.Weights; var downloadScoreFunction = index.ScoringProfiles[0].Functions[0]; indexFieldWeights.Clear(); indexFieldWeights[PackageIdFieldName] = packageIdWeight; indexFieldWeights[TokenizedPackageIdFieldName] = tokenizedPackageIdWeight; indexFieldWeights[TagsFieldName] = tagsWeight; downloadScoreFunction.Boost = downloadScoreBoost; await client.Indexes.CreateOrUpdateAsync(index); Console.WriteLine($"Updated Azure Search service '{settings.AzureSearchServiceName}', index '{settings.AzureSearchIndexName}'"); }
private async Task <SearchQueriesReport <CuratedSearchQuery> > GetCuratedSearchQueriesScoreAsync( string baseUrl, SearchScorerSettings settings, IReadOnlyDictionary <string, int> topQueries, IReadOnlyDictionary <string, int> topSearchReferrals) { var minQueryCount = topQueries.Min(x => x.Value); var adjustedTopQueries = topQueries.ToDictionary( x => x.Key, x => { if (topSearchReferrals.TryGetValue(x.Key, out var referrals)) { return(Math.Max(x.Value - referrals, minQueryCount)); } return(x.Value); }); var scores = RelevancyScoreBuilder.FromCuratedSearchQueriesCsv(settings); var results = await ProcessAsync( scores, baseUrl); return(WeightByTopQueries(adjustedTopQueries, results)); }
private async Task <SearchQueriesReport <FeedbackSearchQuery> > GetFeedbackSearchQueriesScoreAsync( string baseUrl, SearchScorerSettings settings) { var scores = RelevancyScoreBuilder.FromFeedbackSearchQueriesCsv(settings); var results = await ProcessAsync( scores, baseUrl); return(WeightEvently(results)); }
public async Task RunAsync(SearchScorerSettings settings) { var report = await GetReportAsync(settings); ConsoleUtility.WriteHeading("Curated Search Queries", '='); WriteBiggestWinnersAndLosersToConsole(report, v => v.CuratedSearchQueries); ConsoleUtility.WriteHeading("Client Curated Search Queries", '='); WriteBiggestWinnersAndLosersToConsole(report, v => v.ClientCuratedSearchQueries); ConsoleUtility.WriteHeading("Feedback", '='); WriteBiggestWinnersAndLosersToConsole(report, v => v.FeedbackSearchQueries); }
private async Task <SearchQueriesReport <CuratedSearchQuery> > GetClientCuratedSearchQueriesScoreAsync( string baseUrl, SearchScorerSettings settings, IReadOnlyDictionary <string, int> topClientQueries) { var scores = RelevancyScoreBuilder.FromClientCuratedSearchQueriesCsv(settings); var results = await ProcessAsync( scores, baseUrl); return(WeightByTopQueries(topClientQueries, results)); }
public async Task <VariantReport> GetCustomVariantReportAsync( SearchScorerSettings settings, string customVariantUrl) { var topQueries = TopSearchQueriesCsvReader.Read(settings.TopSearchQueriesCsvPath); var topClientQueries = TopClientSearchQueriesCsvReader.Read(settings.TopClientSearchQueriesCsvPath); var topSearchReferrals = GoogleAnalyticsSearchReferralsCsvReader.Read(settings.GoogleAnalyticsSearchReferralsCsvPath); return(await GetVariantReport( customVariantUrl, settings, topQueries, topClientQueries, topSearchReferrals)); }
private static void EnsureValidNuGetSearchIndex(this Index index, SearchScorerSettings settings) { if (index.ScoringProfiles.Count != 1 || index.ScoringProfiles[0].Name != ScoringProfileName) { throw new InvalidOperationException( $"Azure Search index '{settings.AzureSearchIndexName}' should have one scoring profile named '{ScoringProfileName}'"); } var scoringProfile = index.ScoringProfiles[0]; if (scoringProfile.Functions.Count != 1 || scoringProfile.Functions[0].FieldName != DownloadScoreBoostName) { throw new InvalidOperationException( $"Azure Search index '{settings.AzureSearchIndexName}' should have one scoring function on the '{DownloadScoreBoostName}' field"); } }
public async Task <bool> DoesPackageIdExistAsync( string packageIdPattern, SearchScorerSettings settings) { Task <bool> controlExistsTask; Task <bool> treatmentExistsTask; if (PackageIdValidator.IsValidPackageId(packageIdPattern)) { var query = $"packageid:{packageIdPattern}"; controlExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.ControlBaseUrl, query, take: 1); treatmentExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.TreatmentBaseUrl, query, take: 1); } else if (packageIdPattern.EndsWith("*")) { var prefix = packageIdPattern.Substring(0, packageIdPattern.Length - 1).TrimEnd(Separators); if (!PackageIdValidator.IsValidPackageId(prefix)) { throw new ArgumentException($"The package ID '{packageIdPattern}' looks like a pattern but the part before the wildcard is not a valid package ID."); } var pieces = prefix .Split(Separators) .Where(x => !string.IsNullOrWhiteSpace(x)); var query = string.Join(" ", pieces); controlExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.ControlBaseUrl, query, take: 1000); treatmentExistsTask = DoesPackageIdExistInQueryAsync(packageIdPattern, settings.TreatmentBaseUrl, query, take: 1000); } else { throw new NotSupportedException(); } await Task.WhenAll(controlExistsTask, treatmentExistsTask); if (controlExistsTask.Result != treatmentExistsTask.Result) { throw new ArgumentNullException( $"The package ID '{packageIdPattern}' has inconsistent availability. " + $"Exists in control: {controlExistsTask.Result}. " + $"Exists in treatment: {treatmentExistsTask.Result}."); } return(controlExistsTask.Result); }
private async Task <VariantReport> GetVariantReport( string baseUrl, SearchScorerSettings settings, IReadOnlyDictionary <string, int> topQueries, IReadOnlyDictionary <string, int> topClientQueries, IReadOnlyDictionary <string, int> topSearchReferrals) { var curatedSearchQueriesReport = await GetCuratedSearchQueriesScoreAsync(baseUrl, settings, topQueries, topSearchReferrals); var clientCuratedSearchQueriesReport = await GetClientCuratedSearchQueriesScoreAsync(baseUrl, settings, topClientQueries); var feedbackSearchQueriesReport = await GetFeedbackSearchQueriesScoreAsync(baseUrl, settings); return(new VariantReport( curatedSearchQueriesReport, clientCuratedSearchQueriesReport, feedbackSearchQueriesReport)); }
private async Task <SearchQueriesReport <SearchQueryWithSelections> > GetTopSearchSelectionsScoreAsync( string baseUrl, SearchScorerSettings settings, IReadOnlyDictionary <string, int> topQueries) { var topSearchSelectionScores = RelevancyScoreBuilder.FromTopSearchSelectionsCsv(settings.TopSearchSelectionsCsvPath); // Take the the top search selection data by query frequency. var selectionsOfTopQueries = topSearchSelectionScores .Where(x => topQueries.ContainsKey(x.SearchQuery)) .OrderByDescending(x => topQueries[x.SearchQuery]) .Take(1000); var results = await ProcessAsync( selectionsOfTopQueries, baseUrl); return(WeightByTopQueries(topQueries, results)); }
private async Task <RelevancyReport> GetReportAsync(SearchScorerSettings settings) { var topQueries = TopSearchQueriesCsvReader.Read(settings.TopSearchQueriesCsvPath); var topClientQueries = TopClientSearchQueriesCsvReader.Read(settings.TopClientSearchQueriesCsvPath); var topSearchReferrals = GoogleAnalyticsSearchReferralsCsvReader.Read(settings.GoogleAnalyticsSearchReferralsCsvPath); var controlReport = await GetVariantReport( settings.ControlBaseUrl, settings, topQueries, topClientQueries, topSearchReferrals); var treatmentReport = await GetVariantReport( settings.TreatmentBaseUrl, settings, topQueries, topClientQueries, topSearchReferrals); return(new RelevancyReport( controlReport, treatmentReport)); }
public async Task <List <string> > GetNonExistentPackageIdsAsync(IEnumerable <string> packageIds, SearchScorerSettings settings) { var distinct = packageIds.Distinct(StringComparer.OrdinalIgnoreCase); var work = new ConcurrentBag <string>(distinct); var output = new ConcurrentBag <string>(); var workers = Enumerable .Range(0, 16) .Select(async workerId => { while (work.TryTake(out var packageIdPattern)) { var exists = await DoesPackageIdExistAsync(packageIdPattern, settings); Console.Write("."); if (!exists) { output.Add(packageIdPattern); } } }) .ToList(); await Task.WhenAll(workers); return(output .OrderBy(x => x, StringComparer.OrdinalIgnoreCase) .ToList()); }
public static IReadOnlyList <SearchQueryRelevancyScores <FeedbackSearchQuery> > FromFeedbackSearchQueriesCsv(SearchScorerSettings settings) { var output = new List <SearchQueryRelevancyScores <FeedbackSearchQuery> >(); var feedbackSearchQueries = FeedbackSearchQueriesCsvReader.Read(settings.FeedbackSearchQueriesCsvPath); foreach (var feedback in feedbackSearchQueries) { // Give expected package IDs the maximum relevancy score. var scores = feedback .MostRelevantPackageIds .ToDictionary(x => x, x => MaximumRelevancyScore, StringComparer.OrdinalIgnoreCase);; output.Add(new SearchQueryRelevancyScores <FeedbackSearchQuery>( feedback.SearchQuery, scores, feedback)); } return(output); }
public static IReadOnlyList <SearchQueryRelevancyScores <CuratedSearchQuery> > FromClientCuratedSearchQueriesCsv(SearchScorerSettings settings) { var queries = CuratedSearchQueriesCsvReader.Read( settings.ClientCuratedSearchQueriesCsvPath, settings.CuratedSearchQueriesCsvPath); return(FromCuratedSearchQueries(queries)); }
public static async Task <Index> GetNuGetSearchIndexAsync(this ISearchServiceClient client, SearchScorerSettings settings) { var index = await client.Indexes.GetAsync(settings.AzureSearchIndexName); index.EnsureValidNuGetSearchIndex(settings); return(index); }