public async Task FindProjectJsonAsync() { TransformManyBlock <GitHubRepo, SearchResult> repoSearchBlock = new TransformManyBlock <GitHubRepo, SearchResult>(repo => SearchRepoAsync(repo), new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount * 4 //MaxDegreeOfParallelism = 1 }); ActionBlock <SearchResult> downloadFileBlock = new ActionBlock <SearchResult>(DownloadFileAsync, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount * 4 //MaxDegreeOfParallelism = 1 }); repoSearchBlock.LinkTo(downloadFileBlock, new DataflowLinkOptions() { PropagateCompletion = true }); foreach (var repo in _storage.GetAllRepos()) { if (_cancelToken.IsCancellationRequested) { break; } repoSearchBlock.Post(repo); } repoSearchBlock.Complete(); await downloadFileBlock.Completion; }
// TODO: // - Frameworks that are being targeted in each project.json void Analyze() { HashSet <string> microsoftOrgs = new HashSet <string>() { "dotnet", "aspnet", "xamarin", "Microsoft", "Windows-Readiness", "NuGet", "NuGetArchive", "Microsoft-Build-2016", }; int totalRepos = 0; int totalReposSearched = 0; int notFoundRepos = 0; int remainingRepos = 0; int totalResults = 0; int downloadedFiles = 0; int remainingFiles = 0; Dictionary <string, int> ownerCounts = new Dictionary <string, int>(); using (var sw = new StreamWriter("stats.txt")) using (var propertiesWriter = new StreamWriter("properties.txt")) { sw.WriteLine("Owner\tRepo name\tPath\tIsMicrosoftRepo\tFrameworkCount\tTopLevelDependencies\tFrameworkSpecificDependencies\t" + string.Join("\t", ProjectJsonAnalysis.PropertyNames) + "\tParsing error"); propertiesWriter.WriteLine("Owner\tRepo name\tPath\tIsMicrosoftRepo\tProperty Name\tProperty Path\tFramework\tValue"); foreach (var repo in _storage.GetAllRepos()) { totalRepos++; if (_storage.HasRepoResults(repo.Owner, repo.Name)) { totalReposSearched++; foreach (var result in _storage.GetRepoResults(repo.Owner, repo.Name)) { if (ownerCounts.ContainsKey(repo.Owner)) { ownerCounts[repo.Owner]++; } else { ownerCounts[repo.Owner] = 1; } totalResults++; if (_storage.HasFile(repo.Owner, repo.Name, result.ResultPath)) { downloadedFiles++; string filePath = _storage.GetFilePath(repo.Owner, repo.Name, result.ResultPath); var json = File.ReadAllText(filePath); try { var analysis = ProjectJsonAnalysis.Analyze(json); sw.Write(string.Join("\t", repo.Owner, repo.Name, result.ResultPath, microsoftOrgs.Contains(repo.Owner) ? "Yes" : "No", analysis.Frameworks.Count, analysis.TopLevelDependencies, analysis.FrameworkSpecificDependencies)); sw.Write("\t"); sw.Write(string.Join("\t", ProjectJsonAnalysis.PropertyNames.Select(pn => analysis.PropertiesDefined.Contains(pn) ? "Yes" : "No"))); sw.Write("\t" + analysis.ParsingError); sw.WriteLine(); foreach (var interestingValue in analysis.InterestingValues) { propertiesWriter.WriteLine(string.Join("\t", repo.Owner, repo.Name, result.ResultPath, microsoftOrgs.Contains(repo.Owner) ? "Yes" : "No", interestingValue.Name, interestingValue.Path, interestingValue.Framework, interestingValue.Value)); } } catch (Exception ex) { _logger.Error(ex, "Error parsing {Path} in {Repo}", result.ResultPath, repo.Owner + "/" + repo.Name); } } else { remainingFiles++; } } } else if (_storage.IsNotFound(repo.Owner, repo.Name)) { notFoundRepos++; } else { remainingRepos++; } } } Console.WriteLine($"Total repos: {totalRepos}"); Console.WriteLine($"Repos searched: {totalReposSearched}"); Console.WriteLine($"Not found repos: {notFoundRepos}"); Console.WriteLine($"Remaining repos: {remainingRepos}"); Console.WriteLine($"Total results: {totalResults}"); Console.WriteLine($"Results downloaded: {downloadedFiles}"); Console.WriteLine($"Remaining files: {remainingFiles}"); Console.WriteLine(); foreach (var kvp in ownerCounts.OrderByDescending(kvp => kvp.Value).Take(20)) { Console.WriteLine($"{kvp.Key}\t{kvp.Value}"); } }