protected async override Task <Dictionary <PackageURL, double> > SearchRepoUrlsInPackageMetadata(PackageURL purl, string metadata)
        {
            var mapping = new Dictionary <PackageURL, double>();

            if (purl.Name?.StartsWith('_') ?? false) // TODO: there are internal modules which do not start with _
            {
                // TODO: internal modules could also be in https://github.com/python/cpython/tree/master/Modules/
                mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name, null, null, "cpython/tree/master/Lib/"), 1.0F);
                return(mapping);
            }
            if (string.IsNullOrEmpty(metadata))
            {
                return(mapping);
            }
            JsonDocument contentJSON = JsonDocument.Parse(metadata);

            List <string> possibleProperties = new List <string>()
            {
                "homepage", "home_page"
            };
            JsonElement infoJSON;

            try
            {
                infoJSON = contentJSON.RootElement.GetProperty("info");
            }
            catch (Exception)
            {
                return(mapping);
            }

            foreach (var property in infoJSON.EnumerateObject())
            {   // there are a couple of possibilities where the repository url might be present - check all of them
                try
                {
                    if (possibleProperties.Contains(property.Name.ToLower()))
                    {
                        string homepage    = property.Value.ToString() ?? string.Empty;
                        var    packageUrls = GitHubProjectManager.ExtractGitHubPackageURLs(homepage);
                        // if we were able to extract a github url, return
                        if (packageUrls != null && packageUrls.Count() > 0)
                        {
                            mapping.Add(packageUrls.First(), 1.0F);
                            return(mapping);
                        }
                    }
                }
                catch (Exception) { continue; /* try the next property */ }
            }

            return(mapping);
        }
        /// <summary>
        /// Searches the package manager metadata to figure out the source code repository
        /// </summary>
        /// <param name="purl">the package for which we need to find the source code repository</param>
        /// <returns>A dictionary, mapping each possible repo source entry to its probability</returns>
        protected async override Task <Dictionary <PackageURL, float> > PackageMetadataSearch(PackageURL purl,
                                                                                              string metadata)
        {
            var mapping = new Dictionary <PackageURL, float>();

            if (purl.Name.StartsWith('_') || npm_internal_modules.Contains(purl.Name))
            {
                // url = 'https://github.com/nodejs/node/tree/master/lib' + package.name,

                mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name,
                                           null, null, "node/tree/master/lib"), 1.0F);
                return(mapping);
            }
            if (string.IsNullOrEmpty(metadata))
            {
                return(null);
            }
            JsonDocument contentJSON = JsonDocument.Parse(metadata);

            // if a version is provided, search that JSONElement, otherwise, just search the latest version,
            // which is more likely best maintained
            // TODO: If the latest version JSONElement doesnt have the repo infor, should we search all elements
            // on that chance that one of them might have it?
            JsonElement versionJSON = string.IsNullOrEmpty(purl.Version) ? GetLatestVersion(contentJSON) :
                                      GetVersion(contentJSON, new Version(purl.Version));

            try
            {
                JsonElement repositoryJSON = versionJSON.GetProperty("repository");
                string      repoType       = repositoryJSON.GetProperty("type").ToString().ToLower();
                string      repoURL        = repositoryJSON.GetProperty("url").ToString();

                // right now we deal with only github repos
                if (repoType == "git")
                {
                    PackageURL gitPURL = GitHubProjectManager.ParseUri(new Uri(repoURL));
                    // we got a repository value the author specified in the metadata -
                    // so no further processing needed
                    if (gitPURL != null)
                    {
                        mapping.Add(gitPURL, 1.0F);
                        return(mapping);
                    }
                }
            }
            catch (KeyNotFoundException) { /* continue onwards */ }
            catch (UriFormatException) { /* the uri specified in the metadata invalid */ }


            return(mapping);
        }
        protected async Task <Dictionary <PackageURL, double> > SearchRepoUrlsInPackageMetadata(PackageURL purl,
                                                                                                JsonDocument contentJSON)
        {
            var mapping = new Dictionary <PackageURL, double>();

            if (purl.Name is string purlName && (purlName.StartsWith('_') || npm_internal_modules.Contains(purlName)))
            {
                // url = 'https://github.com/nodejs/node/tree/master/lib' + package.name,

                mapping.Add(new PackageURL(purl.Type, purl.Namespace, purl.Name,
                                           null, null, "node/tree/master/lib"), 1.0F);
                return(mapping);
            }

            // if a version is provided, search that JSONElement, otherwise, just search the latest version,
            // which is more likely best maintained
            // TODO: If the latest version JSONElement doesnt have the repo infor, should we search all elements
            // on that chance that one of them might have it?
            JsonElement?versionJSON = string.IsNullOrEmpty(purl?.Version) ? GetLatestVersionElement(contentJSON) :
                                      GetVersionElement(contentJSON, new Version(purl.Version));

            if (versionJSON is JsonElement notNullVersionJSON)
            {
                try
                {
                    JsonElement repositoryJSON = notNullVersionJSON.GetProperty("repository");
                    string?     repoType       = Utilities.GetJSONPropertyStringIfExists(repositoryJSON, "type")?.ToLower();
                    string?     repoURL        = Utilities.GetJSONPropertyStringIfExists(repositoryJSON, "url");

                    // right now we deal with only github repos
                    if (repoType == "git" && repoURL is not null)
                    {
                        PackageURL gitPURL = GitHubProjectManager.ParseUri(new Uri(repoURL));
                        // we got a repository value the author specified in the metadata - so no further
                        // processing needed
                        if (gitPURL != null)
                        {
                            mapping.Add(gitPURL, 1.0F);
                            return(mapping);
                        }
                    }
                }
                catch (KeyNotFoundException) { /* continue onwards */ }
                catch (UriFormatException) { /* the uri specified in the metadata invalid */ }
            }

            return(mapping);
        }
Beispiel #4
0
        /// <summary>
        /// Return all github repo patterns in the searchText which have the same name as the package repo
        /// </summary>
        /// <param name="purl"></param>
        /// <param name="searchText"></param>
        /// <returns></returns>
        public static IEnumerable <PackageURL> ExtractGitHubUris(PackageURL purl, string searchText)
        {
            List <PackageURL> repos = new List <PackageURL>();

            if (string.IsNullOrEmpty(searchText))
            {
                return(repos);
            }

            MatchCollection matches = GithubExtractorRegex.Matches(searchText);

            try
            {
                matches.ToList().ForEach((item) => { repos.Add(GitHubProjectManager.ParseUri(new Uri(item.Value))); });
            }
            catch (UriFormatException) { /* that was an invalid url, ignore */ }
            return(repos.Where((item) => item.Name == purl.Name));
        }
Beispiel #5
0
        protected async override Task <Dictionary <PackageURL, double> > PackageMetadataSearch(PackageURL purl, string metadata)
        {
            Dictionary <PackageURL, double> mapping = new Dictionary <PackageURL, double>();

            try
            {
                var packageName = purl.Name;

                // nuget doesnt provide repository information in the json metadata; we have to extract it from the html home page
                HtmlWeb      web = new HtmlWeb();
                HtmlDocument doc = web.Load($"{ENV_NUGET_HOMEPAGE}/{packageName}");

                var paths = new List <string>()
                {
                    "//a[@title=\"View the source code for this package\"]/@href",
                    "//a[@title=\"Visit the project site to learn more about this package\"]/@href"
                };

                foreach (string path in paths)
                {
                    string repoCandidate = doc.DocumentNode.SelectSingleNode(path).GetAttributeValue("href", string.Empty);
                    if (!string.IsNullOrEmpty(repoCandidate))
                    {
                        PackageURL repoPurl = GitHubProjectManager.ExtractGitHubPackageURLs(repoCandidate).ToList().FirstOrDefault();
                        mapping.Add(repoPurl, 1.0F);
                        return(mapping);
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Error(ex, $"Error fetching/parsing NuGet homepage: {ex.Message}");
                return(mapping);
            }

            // if nothing worked, return empty
            return(mapping);
        }