Exemplo n.º 1
0
        private static async Task VerifyPackageIdsExistAsync(SearchScorerSettings settings, HttpClient httpClient)
        {
            var searchClient = new SearchClient(httpClient);
            var validator    = new PackageIdPatternValidator(searchClient);

            // Verify all desired package IDs exist.
            var feedback = FeedbackSearchQueriesCsvReader
                           .Read(settings.FeedbackSearchQueriesCsvPath)
                           .SelectMany(x => x.MostRelevantPackageIds);

            var curated = CuratedSearchQueriesCsvReader
                          .Read(settings.CuratedSearchQueriesCsvPath)
                          .SelectMany(x => x.PackageIdToScore.Keys);

            var clientCurated = CuratedSearchQueriesCsvReader
                                .Read(settings.ClientCuratedSearchQueriesCsvPath)
                                .SelectMany(x => x.PackageIdToScore.Keys);

            Console.WriteLine("Searching for non-existent package IDs");
            var allPackageIds         = feedback.Concat(curated);
            var nonExistentPackageIds = await validator.GetNonExistentPackageIdsAsync(allPackageIds, settings);

            Console.WriteLine();
            Console.WriteLine($"Found {nonExistentPackageIds.Count}.");
            foreach (var packageId in nonExistentPackageIds)
            {
                Console.WriteLine($" - {packageId}");
            }
        }
Exemplo n.º 2
0
 private static void WriteConvenientCsvs(SearchScorerSettings settings)
 {
     // Output data in more convenient formats.
     GitHubUsageCsvWriter.Write(
         settings.GitHubUsageCsvPath,
         GitHubUsageJsonReader.Read(settings.GitHubUsageJsonPath));
     TopSearchSelectionsV2CsvWriter.Write(
         settings.TopSearchSelectionsV2CsvPath,
         TopSearchSelectionsCsvReader.Read(settings.TopSearchSelectionsCsvPath));
 }
Exemplo n.º 3
0
        private static void HashQueries(SearchScorerSettings settings)
        {
            Console.WriteLine("Reading hasher key file...");
            var hasherKey = File.ReadAllText(settings.HasherKeyFile).Trim();

            Console.WriteLine("Reading search queries...");
            var searchQueries = TopV3SearchQueriesCsvReader.Read(settings.TopV3SearchQueriesPathPattern);

            Console.WriteLine("Writing hashed search queries...");
            HashedSearchQueryCsvWriter.Write(hasherKey, settings.HashedSearchQueryLookupCsvPath, searchQueries);
        }
Exemplo n.º 4
0
        static async Task MainAsync(string[] args)
        {
            ServicePointManager.DefaultConnectionLimit = 64;

            var assemblyDir = Path.GetDirectoryName(typeof(Program).Assembly.Location);
            var settings    = new SearchScorerSettings
            {
                ControlBaseUrl                        = "https://azuresearch-usnc.nuget.org/",
                TreatmentBaseUrl                      = "https://azuresearch-usnc-perf.nuget.org/",
                FeedbackSearchQueriesCsvPath          = Path.Combine(assemblyDir, "FeedbackSearchQueries.csv"),
                CuratedSearchQueriesCsvPath           = Path.Combine(assemblyDir, "CuratedSearchQueries.csv"),
                ClientCuratedSearchQueriesCsvPath     = Path.Combine(assemblyDir, "ClientCuratedSearchQueries.csv"),
                TopSearchQueriesCsvPath               = @"C:\Users\jver\Desktop\search-scorer\TopSearchQueries-2019-08-05.csv",
                TopClientSearchQueriesCsvPath         = @"C:\Users\jver\Desktop\search-scorer\TopClientSearchQueries-60d-2019-10-24.csv",
                TopSearchSelectionsCsvPath            = @"C:\Users\jver\Desktop\search-scorer\TopSearchSelections-2019-08-05.csv",
                TopSearchSelectionsV2CsvPath          = @"C:\Users\jver\Desktop\search-scorer\TopSearchSelectionsV2-2019-08-05.csv",
                GoogleAnalyticsSearchReferralsCsvPath = @"C:\Users\jver\Desktop\search-scorer\GoogleAnalyticsSearchReferrals-2019-07-03-2019-08-04.csv",
                GitHubUsageJsonPath                   = @"C:\Users\jver\Desktop\search-scorer\GitHubUsage.v1-2019-08-06.json",
                GitHubUsageCsvPath                    = @"C:\Users\jver\Desktop\search-scorer\GitHubUsage.v1-2019-08-06.csv",

                // The following settings are only necessary if running the "probe" command.
                AzureSearchServiceName = "",
                AzureSearchIndexName   = "",
                AzureSearchApiKey      = "",
                ProbeResultsCsvPath    = @"C:\Users\jver\Desktop\search-scorer\ProbeResults.csv",

                PackageIdWeights          = CreateRange(lower: 1, upper: 10, increments: 3),
                TokenizedPackageIdWeights = CreateRange(lower: 1, upper: 10, increments: 3),
                TagsWeights     = CreateRange(lower: 1, upper: 10, increments: 3),
                DownloadWeights = CreateRange(lower: 1000, upper: 30000, increments: 5000),
            };

            // WriteConvenientCsvs(settings);

            using (var httpClientHandler = new HttpClientHandler {
                AutomaticDecompression = DecompressionMethods.GZip
            })
                using (var httpClient = new HttpClient())
                {
                    if (args.Length == 0 || args[0] == "score")
                    {
                        // await VerifyPackageIdsExistAsync(settings, httpClient);
                        await RunScoreCommandAsync(settings, httpClient);
                    }
                    else if (args[0] == "probe")
                    {
                        await RunProbeCommandAsync(settings, httpClient);
                    }
                }
        }
Exemplo n.º 5
0
        private static void ShowCurationCoverage(SearchScorerSettings settings)
        {
            Console.WriteLine("Search query curation");
            Console.WriteLine("=====================");
            ShowCurationCoverage(
                TopSearchQueriesCsvReader.Read(settings.TopSearchQueriesCsvPath),
                CuratedSearchQueriesCsvReader.Read(settings.CuratedSearchQueriesCsvPath));

            Console.WriteLine();

            Console.WriteLine("Client search query curation");
            Console.WriteLine("============================");
            ShowCurationCoverage(
                TopClientSearchQueriesCsvReader.Read(settings.TopClientSearchQueriesCsvPath),
                CuratedSearchQueriesCsvReader.Read(
                    settings.ClientCuratedSearchQueriesCsvPath,
                    settings.CuratedSearchQueriesCsvPath));
        }
Exemplo n.º 6
0
        private static async Task RunProbeCommandAsync(SearchScorerSettings settings, HttpClient httpClient)
        {
            var credentials       = new SearchCredentials(settings.AzureSearchApiKey);
            var azureSearchClient = new SearchServiceClient(settings.AzureSearchServiceName, credentials);

            var index = await azureSearchClient.GetNuGetSearchIndexAsync(settings);

            Console.WriteLine("Running {0} tests.", GetProbeTests(settings).Count());

            foreach (var test in GetProbeTests(settings))
            {
                var searchClient   = new SearchClient(httpClient);
                var scoreEvaluator = new IREvalutation.RelevancyScoreEvaluator(searchClient);

                // Update the Azure Search index
                await azureSearchClient.UpdateNuGetSearchIndexAsync(
                    settings,
                    index,
                    test.PackageIdWeight,
                    test.TokenizedPackageIdWeight,
                    test.TagsWeight,
                    test.DownloadScoreBoost);

                // Score the new index.
                var report = await scoreEvaluator.GetCustomVariantReportAsync(
                    settings,
                    customVariantUrl : settings.TreatmentBaseUrl);

                // Save the result to the output path
                SearchProbesCsvWriter.Append(
                    settings.ProbeResultsCsvPath,
                    new SearchProbesRecord
                {
                    PackageIdWeight          = test.PackageIdWeight,
                    TokenizedPackageIdWeight = test.TokenizedPackageIdWeight,
                    TagsWeight         = test.TagsWeight,
                    DownloadScoreBoost = test.DownloadScoreBoost,

                    CuratedSearchScore       = report.CuratedSearchQueries.Score,
                    ClientCuratedSearchScore = report.ClientCuratedSearchQueries.Score,
                    FeedbackScore            = report.FeedbackSearchQueries.Score
                });
            }
        }
Exemplo n.º 7
0
        private static async Task CompareSearchTermAsync(SearchScorerSettings settings, HttpClient httpClient, string searchTerm)
        {
            Console.WriteLine($"Search term: {searchTerm}");

            var searchClient = new SearchClient(httpClient);
            var take         = 10;

            Console.WriteLine($"Searching on control {settings.ControlBaseUrl}");
            var control = await searchClient.SearchAsync(settings.ControlBaseUrl, searchTerm, take);

            Console.WriteLine($"Searching on treatment {settings.TreatmentBaseUrl}");
            var treatment = await searchClient.SearchAsync(settings.TreatmentBaseUrl, searchTerm, take);

            Console.WriteLine();

            var maxControl   = GetColumnWidth("Control", control);
            var maxTreatment = GetColumnWidth("Treatment", control);

            Console.Write("Rank | ");
            Console.Write(DisplayHeading("Control", control).PadRight(maxControl));
            Console.Write(" | ");
            Console.Write(DisplayHeading("Treatment", treatment).PadRight(maxTreatment));
            Console.WriteLine();

            Console.Write("---- | ");
            Console.Write(new string('-', maxControl));
            Console.Write(" | ");
            Console.Write(new string('-', maxTreatment));
            Console.WriteLine();

            for (var i = 0; i < control.Data.Count || i < treatment.Data.Count; i++)
            {
                Console.Write((i + 1).ToString().PadRight("Rank".Length));
                Console.Write(" | ");
                Console.Write(DisplayPackage(control.Data.ElementAtOrDefault(i)).PadRight(maxControl));
                Console.Write(" | ");
                Console.Write(DisplayPackage(treatment.Data.ElementAtOrDefault(i)).PadRight(maxTreatment));
                Console.WriteLine();
            }

            Console.WriteLine();
        }
Exemplo n.º 8
0
        private static IEnumerable <SearchProbeTest> GetProbeTests(SearchScorerSettings settings)
        {
            var fields = new[]
            {
                settings.PackageIdWeights,
                settings.TokenizedPackageIdWeights,
                settings.TagsWeights,
                settings.DownloadWeights
            };

            return(CartesianProduct(fields)
                   .Select(x =>
            {
                var values = x.ToList();

                return new SearchProbeTest
                {
                    PackageIdWeight = values[0],
                    TokenizedPackageIdWeight = values[1],
                    TagsWeight = values[2],
                    DownloadScoreBoost = values[3]
                };
            }));
        }
Exemplo n.º 9
0
 private static async Task RunScoreCommandAsync(SearchScorerSettings settings, HttpClient httpClient)
 {
     var searchClient   = new SearchClient(httpClient);
     var scoreEvaluator = new IREvalutation.RelevancyScoreEvaluator(searchClient);
     await scoreEvaluator.RunAsync(settings);
 }
Exemplo n.º 10
0
        static async Task MainAsync(string[] args)
        {
            ServicePointManager.DefaultConnectionLimit = 64;

            var assemblyDir = Path.GetDirectoryName(typeof(Program).Assembly.Location);
            var settings    = new SearchScorerSettings
            {
                ControlBaseUrl   = "https://azuresearch-usnc.nuget.org/",
                TreatmentBaseUrl = "http://*****:*****@"C:\Users\jver\OneDrive - Microsoft\search-scorer\TopSearchQueries-90d-organic-2020-10-19.csv",
                TopClientSearchQueriesCsvPath         = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\TopClientSearchQueries-45d-2020-10-19.csv",
                GoogleAnalyticsSearchReferralsCsvPath = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\GoogleAnalyticsSearchReferrals-empty.csv",

                // Used for the "convert-csv" command
                TopSearchSelectionsCsvPath   = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\TopSearchSelections-90d-2020-10-19.csv",
                GitHubUsageJsonPath          = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\GitHubUsage.v1-2019-08-06.json",
                GitHubUsageCsvPath           = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\GitHubUsage.v1-2019-08-06.csv",
                TopSearchSelectionsV2CsvPath = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\TopSearchSelectionsV2-90d-2020-10-19.csv",

                // Used for the "hash-queries" command.
                TopV3SearchQueriesPathPattern = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\TopV3SearchQueries-90d-p*-2020-10-19.csv",
                HasherKeyFile = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\HasherKey.txt",
                HashedSearchQueryLookupCsvPath = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\HashedSearchQueries-2020-10-19.csv",

                // The following settings are only necessary if running the "probe" command.
                AzureSearchServiceName = "",
                AzureSearchIndexName   = "",
                AzureSearchApiKey      = "",
                ProbeResultsCsvPath    = @"C:\Users\jver\OneDrive - Microsoft\search-scorer\ProbeResults.csv",

                PackageIdWeights          = CreateRange(lower: 1, upper: 10, increments: 3),
                TokenizedPackageIdWeights = CreateRange(lower: 1, upper: 10, increments: 3),
                TagsWeights     = CreateRange(lower: 1, upper: 10, increments: 3),
                DownloadWeights = CreateRange(lower: 1000, upper: 30000, increments: 5000),
            };

            using (var httpClientHandler = new HttpClientHandler {
                AutomaticDecompression = DecompressionMethods.GZip
            })
                using (var httpClient = new HttpClient())
                {
                    if (args.Length == 0 || args[0] == "score")
                    {
                        await RunScoreCommandAsync(settings, httpClient);
                    }
                    else if (args[0] == "probe")
                    {
                        await RunProbeCommandAsync(settings, httpClient);
                    }
                    else if (args[0] == "curation-coverage")
                    {
                        ShowCurationCoverage(settings);
                    }
                    else if (args[0] == "convert-csv")
                    {
                        WriteConvenientCsvs(settings);
                    }
                    else if (args[0] == "verify-package-ids")
                    {
                        await VerifyPackageIdsExistAsync(settings, httpClient);
                    }
                    else if (args[0] == "hash-queries")
                    {
                        HashQueries(settings);
                    }
                    else if (args[0] == "compare")
                    {
                        var searchTerm = string.Join(" ", args.Skip(1).ToArray());
                        await CompareSearchTermAsync(settings, httpClient, searchTerm);
                    }
                }
        }