async Task <IEnumerable <SearchResult> > SearchRepoAsync(GitHubRepo repo, bool handleRenamedRepos = true) { object operation = null; try { List <SearchResult> ret = new List <SearchResult>(); if (_storage.IsNotFound(repo.Owner, repo.Name)) { _logger.Verbose("{Repo} previously not found, skipping", repo.Owner + "/" + repo.Name); return(Enumerable.Empty <SearchResult>()); } if (_storage.HasRepoResults(repo.Owner, repo.Name)) { _logger.Verbose("{Repo} already downloaded", repo.Owner + "/" + repo.Name); ret = _storage.GetRepoResults(repo.Owner, repo.Name).ToList(); } else { var request = new SearchCodeRequest() { FileName = "project.json", }; request.Repos.Add(repo.Owner, repo.Name); int totalResultsReturned = 0; while (true) { if (_cancelToken.IsCancellationRequested) { return(Enumerable.Empty <SearchResult>()); } operation = new { Operation = "Search", Repo = repo.Owner + "/" + repo.Name, Page = request.Page }; SearchCodeResult result; ApiValidationException validationException = null; ExceptionDispatchInfo validationExceptionDispatchInfo = null; result = await _searchThrottler.RunAsync <SearchCodeResult>( async() => { // Do a try/catch inside here so that renamed repos don't get logged as failures by the throttler try { return(await _client.Search.SearchCode(request)); } catch (ApiValidationException ex) when(handleRenamedRepos) { validationException = ex; validationExceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex); return(null); } }, operation ); if (result == null && validationException != null) { _logger.Debug(validationException, "Api validation exception for {Operation}, checking for renamed repo", operation); var renameOperation = new { Operation = "RenameCheck", Repo = repo.Owner + "/" + repo.Name }; var potentiallyRenamedRepo = await _throttler.RunAsync <Repository>( async() => { try { return(await _client.Repository.Get(repo.Owner, repo.Name)); } catch (NotFoundException) { return(null); } }, renameOperation ); if (potentiallyRenamedRepo == null) { _logger.Information("Repo {Repo} not found", renameOperation.Repo); _storage.SaveNotFound(repo.Owner, repo.Name, true); return(Enumerable.Empty <SearchResult>()); } if (potentiallyRenamedRepo.Owner.Login == repo.Owner && potentiallyRenamedRepo.Name == repo.Name) { _logger.Error("Repo was not renamed, Api validation must have failed for some other reason for {Operation}", operation); validationExceptionDispatchInfo.Throw(); } var newRepo = repo.Clone(); newRepo.Owner = potentiallyRenamedRepo.Owner.Login; newRepo.Name = potentiallyRenamedRepo.Name; _logger.Information("Repo {OldRepo} has been renamed to {Repo}", renameOperation.Repo, newRepo.Owner + "/" + newRepo.Name); _storage.SaveRenamedRepo(repo.Owner, repo.Name, newRepo); return(await SearchRepoAsync(newRepo, false)); } foreach (var item in result.Items) { string destFile = _storage.GetFilePath(repo.Owner, repo.Name, item.Path); if (Path.GetFileName(destFile).Equals("project.json", StringComparison.OrdinalIgnoreCase)) { ret.Add(new SearchResult(item)); } else { _logger.Information("{Path} was not a project.json file in {Repo}, ignoring", item.Path, repo.Owner + "/" + repo.Name); } } if (result.IncompleteResults) { _logger.Error("Incomplete search results for {Repo}", repo.Owner + "/" + repo.Name); break; } totalResultsReturned += result.Items.Count; if (totalResultsReturned >= result.TotalCount) { break; } else { request.Page += 1; } } _storage.RecordRepoResults(repo.Owner, repo.Name, ret); _logger.Information("Completed searching repo {Repo}", repo.Owner + "/" + repo.Name); } return(ret); } catch (Exception ex) { _logger.Error(ex, "{Operation} failed", operation); return(Enumerable.Empty <SearchResult>()); } }
// TODO: // - Frameworks that are being targeted in each project.json void Analyze() { HashSet <string> microsoftOrgs = new HashSet <string>() { "dotnet", "aspnet", "xamarin", "Microsoft", "Windows-Readiness", "NuGet", "NuGetArchive", "Microsoft-Build-2016", }; int totalRepos = 0; int totalReposSearched = 0; int notFoundRepos = 0; int remainingRepos = 0; int totalResults = 0; int downloadedFiles = 0; int remainingFiles = 0; Dictionary <string, int> ownerCounts = new Dictionary <string, int>(); using (var sw = new StreamWriter("stats.txt")) using (var propertiesWriter = new StreamWriter("properties.txt")) { sw.WriteLine("Owner\tRepo name\tPath\tIsMicrosoftRepo\tFrameworkCount\tTopLevelDependencies\tFrameworkSpecificDependencies\t" + string.Join("\t", ProjectJsonAnalysis.PropertyNames) + "\tParsing error"); propertiesWriter.WriteLine("Owner\tRepo name\tPath\tIsMicrosoftRepo\tProperty Name\tProperty Path\tFramework\tValue"); foreach (var repo in _storage.GetAllRepos()) { totalRepos++; if (_storage.HasRepoResults(repo.Owner, repo.Name)) { totalReposSearched++; foreach (var result in _storage.GetRepoResults(repo.Owner, repo.Name)) { if (ownerCounts.ContainsKey(repo.Owner)) { ownerCounts[repo.Owner]++; } else { ownerCounts[repo.Owner] = 1; } totalResults++; if (_storage.HasFile(repo.Owner, repo.Name, result.ResultPath)) { downloadedFiles++; string filePath = _storage.GetFilePath(repo.Owner, repo.Name, result.ResultPath); var json = File.ReadAllText(filePath); try { var analysis = ProjectJsonAnalysis.Analyze(json); sw.Write(string.Join("\t", repo.Owner, repo.Name, result.ResultPath, microsoftOrgs.Contains(repo.Owner) ? "Yes" : "No", analysis.Frameworks.Count, analysis.TopLevelDependencies, analysis.FrameworkSpecificDependencies)); sw.Write("\t"); sw.Write(string.Join("\t", ProjectJsonAnalysis.PropertyNames.Select(pn => analysis.PropertiesDefined.Contains(pn) ? "Yes" : "No"))); sw.Write("\t" + analysis.ParsingError); sw.WriteLine(); foreach (var interestingValue in analysis.InterestingValues) { propertiesWriter.WriteLine(string.Join("\t", repo.Owner, repo.Name, result.ResultPath, microsoftOrgs.Contains(repo.Owner) ? "Yes" : "No", interestingValue.Name, interestingValue.Path, interestingValue.Framework, interestingValue.Value)); } } catch (Exception ex) { _logger.Error(ex, "Error parsing {Path} in {Repo}", result.ResultPath, repo.Owner + "/" + repo.Name); } } else { remainingFiles++; } } } else if (_storage.IsNotFound(repo.Owner, repo.Name)) { notFoundRepos++; } else { remainingRepos++; } } } Console.WriteLine($"Total repos: {totalRepos}"); Console.WriteLine($"Repos searched: {totalReposSearched}"); Console.WriteLine($"Not found repos: {notFoundRepos}"); Console.WriteLine($"Remaining repos: {remainingRepos}"); Console.WriteLine($"Total results: {totalResults}"); Console.WriteLine($"Results downloaded: {downloadedFiles}"); Console.WriteLine($"Remaining files: {remainingFiles}"); Console.WriteLine(); foreach (var kvp in ownerCounts.OrderByDescending(kvp => kvp.Value).Take(20)) { Console.WriteLine($"{kvp.Key}\t{kvp.Value}"); } }