protected async override Task <Dictionary <PackageURL, double> > SearchRepoUrlsInPackageMetadata(PackageURL purl, string metadata) { var mapping = new Dictionary <PackageURL, double>(); if (purl.Name?.StartsWith('_') ?? false) // TODO: there are internal modules which do not start with _ { // TODO: internal modules could also be in https://github.com/python/cpython/tree/master/Modules/ mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name, null, null, "cpython/tree/master/Lib/"), 1.0F); return(mapping); } if (string.IsNullOrEmpty(metadata)) { return(mapping); } JsonDocument contentJSON = JsonDocument.Parse(metadata); List <string> possibleProperties = new List <string>() { "homepage", "home_page" }; JsonElement infoJSON; try { infoJSON = contentJSON.RootElement.GetProperty("info"); } catch (Exception) { return(mapping); } foreach (var property in infoJSON.EnumerateObject()) { // there are a couple of possibilities where the repository url might be present - check all of them try { if (possibleProperties.Contains(property.Name.ToLower())) { string homepage = property.Value.ToString() ?? string.Empty; var packageUrls = GitHubProjectManager.ExtractGitHubPackageURLs(homepage); // if we were able to extract a github url, return if (packageUrls != null && packageUrls.Count() > 0) { mapping.Add(packageUrls.First(), 1.0F); return(mapping); } } } catch (Exception) { continue; /* try the next property */ } } return(mapping); }
/// <summary> /// Searches the package manager metadata to figure out the source code repository /// </summary> /// <param name="purl">the package for which we need to find the source code repository</param> /// <returns>A dictionary, mapping each possible repo source entry to its probability</returns> protected async override Task <Dictionary <PackageURL, float> > PackageMetadataSearch(PackageURL purl, string metadata) { var mapping = new Dictionary <PackageURL, float>(); if (purl.Name.StartsWith('_') || npm_internal_modules.Contains(purl.Name)) { // url = 'https://github.com/nodejs/node/tree/master/lib' + package.name, mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name, null, null, "node/tree/master/lib"), 1.0F); return(mapping); } if (string.IsNullOrEmpty(metadata)) { return(null); } JsonDocument contentJSON = JsonDocument.Parse(metadata); // if a version is provided, search that JSONElement, otherwise, just search the latest version, // which is more likely best maintained // TODO: If the latest version JSONElement doesnt have the repo infor, should we search all elements // on that chance that one of them might have it? JsonElement versionJSON = string.IsNullOrEmpty(purl.Version) ? GetLatestVersion(contentJSON) : GetVersion(contentJSON, new Version(purl.Version)); try { JsonElement repositoryJSON = versionJSON.GetProperty("repository"); string repoType = repositoryJSON.GetProperty("type").ToString().ToLower(); string repoURL = repositoryJSON.GetProperty("url").ToString(); // right now we deal with only github repos if (repoType == "git") { PackageURL gitPURL = GitHubProjectManager.ParseUri(new Uri(repoURL)); // we got a repository value the author specified in the metadata - // so no further processing needed if (gitPURL != null) { mapping.Add(gitPURL, 1.0F); return(mapping); } } } catch (KeyNotFoundException) { /* continue onwards */ } catch (UriFormatException) { /* the uri specified in the metadata invalid */ } return(mapping); }
protected async Task <Dictionary <PackageURL, double> > SearchRepoUrlsInPackageMetadata(PackageURL purl, JsonDocument contentJSON) { var mapping = new Dictionary <PackageURL, double>(); if (purl.Name is string purlName && (purlName.StartsWith('_') || npm_internal_modules.Contains(purlName))) { // url = 'https://github.com/nodejs/node/tree/master/lib' + package.name, mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name, null, null, "node/tree/master/lib"), 1.0F); return(mapping); } // if a version is provided, search that JSONElement, otherwise, just search the latest version, // which is more likely best maintained // TODO: If the latest version JSONElement doesnt have the repo infor, should we search all elements // on that chance that one of them might have it? JsonElement?versionJSON = string.IsNullOrEmpty(purl?.Version) ? GetLatestVersionElement(contentJSON) : GetVersionElement(contentJSON, new Version(purl.Version)); if (versionJSON is JsonElement notNullVersionJSON) { try { JsonElement repositoryJSON = notNullVersionJSON.GetProperty("repository"); string? repoType = Utilities.GetJSONPropertyStringIfExists(repositoryJSON, "type")?.ToLower(); string? repoURL = Utilities.GetJSONPropertyStringIfExists(repositoryJSON, "url"); // right now we deal with only github repos if (repoType == "git" && repoURL is not null) { PackageURL gitPURL = GitHubProjectManager.ParseUri(new Uri(repoURL)); // we got a repository value the author specified in the metadata - so no further // processing needed if (gitPURL != null) { mapping.Add(gitPURL, 1.0F); return(mapping); } } } catch (KeyNotFoundException) { /* continue onwards */ } catch (UriFormatException) { /* the uri specified in the metadata invalid */ } } return(mapping); }
/// <summary> /// Return all github repo patterns in the searchText which have the same name as the package repo /// </summary> /// <param name="purl"></param> /// <param name="searchText"></param> /// <returns></returns> public static IEnumerable <PackageURL> ExtractGitHubUris(PackageURL purl, string searchText) { List <PackageURL> repos = new List <PackageURL>(); if (string.IsNullOrEmpty(searchText)) { return(repos); } MatchCollection matches = GithubExtractorRegex.Matches(searchText); try { matches.ToList().ForEach((item) => { repos.Add(GitHubProjectManager.ParseUri(new Uri(item.Value))); }); } catch (UriFormatException) { /* that was an invalid url, ignore */ } return(repos.Where((item) => item.Name == purl.Name)); }
protected async override Task <Dictionary <PackageURL, double> > PackageMetadataSearch(PackageURL purl, string metadata) { Dictionary <PackageURL, double> mapping = new Dictionary <PackageURL, double>(); try { var packageName = purl.Name; // nuget doesnt provide repository information in the json metadata; we have to extract it from the html home page HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load($"{ENV_NUGET_HOMEPAGE}/{packageName}"); var paths = new List <string>() { "//a[@title=\"View the source code for this package\"]/@href", "//a[@title=\"Visit the project site to learn more about this package\"]/@href" }; foreach (string path in paths) { string repoCandidate = doc.DocumentNode.SelectSingleNode(path).GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(repoCandidate)) { PackageURL repoPurl = GitHubProjectManager.ExtractGitHubPackageURLs(repoCandidate).ToList().FirstOrDefault(); mapping.Add(repoPurl, 1.0F); return(mapping); } } } catch (Exception ex) { Logger.Error(ex, $"Error fetching/parsing NuGet homepage: {ex.Message}"); return(mapping); } // if nothing worked, return empty return(mapping); }