protected async override Task <Dictionary <PackageURL, double> > SearchRepoUrlsInPackageMetadata(PackageURL purl, string metadata) { var mapping = new Dictionary <PackageURL, double>(); if (purl.Name?.StartsWith('_') ?? false) // TODO: there are internal modules which do not start with _ { // TODO: internal modules could also be in https://github.com/python/cpython/tree/master/Modules/ mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name, null, null, "cpython/tree/master/Lib/"), 1.0F); return(mapping); } if (string.IsNullOrEmpty(metadata)) { return(mapping); } JsonDocument contentJSON = JsonDocument.Parse(metadata); List <string> possibleProperties = new List <string>() { "homepage", "home_page" }; JsonElement infoJSON; try { infoJSON = contentJSON.RootElement.GetProperty("info"); } catch (Exception) { return(mapping); } foreach (var property in infoJSON.EnumerateObject()) { // there are a couple of possibilities where the repository url might be present - check all of them try { if (possibleProperties.Contains(property.Name.ToLower())) { string homepage = property.Value.ToString() ?? string.Empty; var packageUrls = GitHubProjectManager.ExtractGitHubPackageURLs(homepage); // if we were able to extract a github url, return if (packageUrls != null && packageUrls.Count() > 0) { mapping.Add(packageUrls.First(), 1.0F); return(mapping); } } } catch (Exception) { continue; /* try the next property */ } } return(mapping); }
protected async override Task <Dictionary <PackageURL, double> > PackageMetadataSearch(PackageURL purl, string metadata) { Dictionary <PackageURL, double> mapping = new Dictionary <PackageURL, double>(); try { var packageName = purl.Name; // nuget doesnt provide repository information in the json metadata; we have to extract it from the html home page HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load($"{ENV_NUGET_HOMEPAGE}/{packageName}"); var paths = new List <string>() { "//a[@title=\"View the source code for this package\"]/@href", "//a[@title=\"Visit the project site to learn more about this package\"]/@href" }; foreach (string path in paths) { string repoCandidate = doc.DocumentNode.SelectSingleNode(path).GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(repoCandidate)) { PackageURL repoPurl = GitHubProjectManager.ExtractGitHubPackageURLs(repoCandidate).ToList().FirstOrDefault(); mapping.Add(repoPurl, 1.0F); return(mapping); } } } catch (Exception ex) { Logger.Error(ex, $"Error fetching/parsing NuGet homepage: {ex.Message}"); return(mapping); } // if nothing worked, return empty return(mapping); }