Exemplo n.º 1
0
        /// <summary>
        ///     Identifies the base URL for package source files.
        /// </summary>
        /// <param name="purl"> </param>
        /// <param name="pool"> </param>
        /// <returns> </returns>
        private async Task <string?> GetArchiveBaseUrlForProject(PackageURL purl, string pool)
        {
            try
            {
                HttpClient httpClient = CreateHttpClient();

                string?html = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/{pool}/{purl.Name}", neverThrow : true);

                if (html == null)
                {
                    return(null);
                }
                AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(html);
                foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a"))
                {
                    string?href = anchor.GetAttribute("href");
                    if (href != null && href.EndsWith(".dsc"))
                    {
                        Match match = Regex.Match(href, "(.+)/[^/]+\\.dsc");
                        if (match.Success)
                        {
                            return(match.Groups[1].Value.Trim());
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Debug(ex, "Error fetching Ubuntu archive base URL for {0}: {1}", purl.ToString(), ex.Message);
            }
            return(null);
        }
Exemplo n.º 2
0
        /// <summary>
        ///     Identifies the available pools for a given Ubuntu project. For example, 'xenial'.
        /// </summary>
        /// <param name="purl"> Package URL to look up (only name is used). </param>
        /// <returns> List of pool names </returns>
        private async Task <IEnumerable <string> > GetPoolsForProject(PackageURL purl)
        {
            HashSet <string> pools = new();

            try
            {
                HttpClient httpClient = CreateHttpClient();

                string?searchResults = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/search?keywords={purl.Name}&searchon=names&exact=1&suite=all&section=all", neverThrow : true);

                AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(searchResults);
                foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a.resultlink"))
                {
                    string?href = anchor.GetAttribute("href");
                    if (href != null)
                    {
                        Match match = Regex.Match(href, "^/([^/]+)/.+");
                        if (match.Success)
                        {
                            string pool = match.Groups[1].Value.Trim();
                            Logger.Debug("Identified pool: {0}", pool);
                            pools.Add(pool);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Debug(ex, "Error fetching Ubuntu pools for {0}: {1}", purl.ToString(), ex.Message);
            }
            return(pools);
        }
Exemplo n.º 3
0
        private ICollection <IndexerCategory> MapCategories(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr)
        {
            var rName = t.TextContent;
            var rDesc = tr.QuerySelector("h3.tracker_info_bold").TextContent;
            var type  = dom.QuerySelector("div.releases-date:contains('Тип:')").TextContent;

            // Check OVA first cause OVA looks like anime with OVA in release name or description
            if (CategorieOVARegex.IsMatch(rName) || CategorieOVARegex.IsMatch(rDesc))
            {
                return(_categories.MapTrackerCatDescToNewznab("OVA/ONA/Special"));
            }

            // Check movies then, cause some of releases could be movies dorama and should go to movies category
            if (CategorieMovieRegex.IsMatch(rName) || CategorieMovieRegex.IsMatch(rDesc))
            {
                return(_categories.MapTrackerCatDescToNewznab("Movies"));
            }

            // Check dorama. Most of doramas are flaged as doramas in type info, but type info could have a lot of types at same time (movie, etc)
            if (CategorieDoramaRegex.IsMatch(rName) || CategorieDoramaRegex.IsMatch(type))
            {
                return(_categories.MapTrackerCatDescToNewznab("Dorama"));
            }

            return(_categories.MapTrackerCatDescToNewznab("TV Anime"));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Download one CPAN package and extract it to the target directory.
        /// </summary>
        /// <param name="purl">Package URL of the package to download.</param>
        /// <returns>n/a</returns>
        public override async Task <IEnumerable <string> > DownloadVersionAsync(PackageURL purl, bool doExtract, bool cached = false)
        {
            Logger.Trace("DownloadVersion {0}", purl?.ToString());

            string?       packageName     = purl?.Name;
            string?       packageVersion  = purl?.Version;
            List <string> downloadedPaths = new();

            if (string.IsNullOrWhiteSpace(packageName) || string.IsNullOrWhiteSpace(packageVersion))
            {
                Logger.Debug("Unable to download [{0} {1}]. Both must be defined.", packageName, packageVersion);
                return(downloadedPaths);
            }
            // Locate the URL
            HttpClient httpClient        = CreateHttpClient();
            string?    packageVersionUrl = null;
            string?    html = await GetHttpStringCache(httpClient, $"{ENV_CPAN_ENDPOINT}/release/{packageName}");

            HtmlParser parser = new();

            AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(html);

            foreach (AngleSharp.Dom.IElement option in document.QuerySelectorAll("div.release select.extend option"))
            {
                if (!option.HasAttribute("value"))
                {
                    continue;
                }
                string?value   = option.GetAttribute("value");
                string version = value.Split('-').Last();
                if (version.StartsWith("v", StringComparison.InvariantCultureIgnoreCase))
                {
                    version = version[1..];
Exemplo n.º 5
0
        static int DownloadAndFindNext(string url, int page)
        {
            Console.WriteLine("Downloading page " + page);
            const string NextPageSelector = "div.row > div > div.card > div.card-body > div.paginator-top > ul > li.page-item > a[rel=\"next\"]";

            //Download source
            string path = Path.GetFullPath(@"work\page" + page + ".html");

            if (!File.Exists(path) || OverwritePages)
            {
                using (WebClient client = new WebClient())
                {
                    client.DownloadFile(url, path);
                }
            }

            string source = File.ReadAllText(path);

            AngleSharp.Html.Dom.IHtmlDocument document = Parser.ParseDocument(source);

            IHtmlCollection <IElement> pages = document.QuerySelectorAll(NextPageSelector);

            if (pages.Length != 0)
            {
                AngleSharp.Html.Dom.IHtmlAnchorElement thisPage = (AngleSharp.Html.Dom.IHtmlAnchorElement)pages[0];
                return(DownloadAndFindNext(thisPage.Href, page + 1));
            }
            else
            {
                return(page);
            }
        }
Exemplo n.º 6
0
 /// <summary>
 /// Получает все задания с странцы категории
 /// </summary>
 /// <param name="link">ссылка на страницу без домена</param>
 /// <returns>Список все заданий</returns>
 public List <Objects.Task> GetTasksFromPage(string link)
 {
     Log.ProcessMessage("Пытаемся получить список заданий со страницы " + link);
     try
     {
         string     get    = http.GetAsync(Domain + link).Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> taskElements = html.QuerySelectorAll("div.row.set_href .title a");
         var tasks = new List <Objects.Task> {
         };
         foreach (var elem in taskElements)
         {
             var task = GetTaskFromLink(elem.GetAttribute("href"));
             if (task == null)
             {
                 continue;
             }
             tasks.Add(task);
         }
         Log.GoodMessage("Получили список заданий со страницы " + link);
         return(tasks);
     }
     catch
     {
         Log.ExMessage("Не удалось получить список заданий со страницы " + link);
         return(null);
     }
 }
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td table tbody tr td span").Select(x => x.TextContent.Trim()).ToArray();
            var latest    = tdContent.LastOrDefault();

            LatestPage = string.IsNullOrEmpty(latest) ? (int?)null : latest.ToInt();
        }
Exemplo n.º 8
0
 /// <summary>
 /// Обновляет список категорий
 /// </summary>
 /// <returns>статус обновления</returns>
 public bool UpdateWorkCategory()
 {
     Log.ProcessMessage("Пытаемся обновить список категорий");
     try
     {
         string     get    = http.GetAsync(Domain + "/jobs/").Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> categoriesElements = html.QuerySelectorAll(".collapse li a[data-category_id]");
         foreach (var elem in categoriesElements)
         {
             Objects.Category.Categories.Add(new Objects.Category
             {
                 Name = elem.TextContent,
                 Href = elem.GetAttribute("href")
             });
         }
         Log.GoodMessage("Обновили список категорий");
         return(true);
     }
     catch
     {
         Log.ExMessage("Не удалось обновить список категорий");
         return(false);
     }
 }
Exemplo n.º 9
0
        /// <summary>
        /// Получает данные задания
        /// </summary>
        /// <param name="link">ссылка на задание без домена</param>
        /// <returns>Задание</returns>
        public Objects.Task GetTaskFromLink(string link)
        {
            Log.ProcessMessage("Пытаемся получить задание " + link);
            try
            {
                if (link.Contains("vacancies"))
                {
                    return(null);
                }
                string     get    = http.GetAsync(Domain + link).Result.Content.ReadAsStringAsync().Result;
                HtmlParser Parser = new HtmlParser();
                AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);


                var task = new Objects.Task
                {
                    Title        = html.QuerySelector(".col h1").TextContent,
                    Discription  = html.QuerySelector(".text_field p").TextContent,
                    Price        = html.QuerySelector(".page_header_content .title.amount") != null?html.QuerySelector(".page_header_content .title.amount").TextContent : "Бюджет не определен",
                    TimeStamp    = double.Parse(html.QuerySelector(".cols_table.no_hover .text-muted  span[data-timestamp]").GetAttribute("data-timestamp")),
                    Applications = html.QuerySelector(".block-content .title").TextContent,
                    Link         = Domain + link
                };
                Log.GoodMessage("Получили задание " + link);
                return(task);
            }
            catch
            {
                Log.ExMessage("Не удалось получить задание " + link);
                return(null);
            }
        }
Exemplo n.º 10
0
        private DateTime GetDateFromShowPage(AngleSharp.Html.Dom.IHtmlDocument content)
        {
            const string dateFormat     = "d-MM-yyyy";
            const string dateTimeFormat = dateFormat + ", HH:mm";

            // Would be better to use AssumeLocal and provide "ru-RU" culture,
            // but doesn't work cross-platform
            const DateTimeStyles style = DateTimeStyles.AssumeUniversal;

            var culture = CultureInfo.InvariantCulture;

            var dateText = GetDateFromDocument(content);

            //Correct way but will not always work on cross-platform
            //var localTimeZone = TimeZoneInfo.FindSystemTimeZoneById("Russian Standard Time");
            //var nowLocal = TimeZoneInfo.ConvertTime(DateTime.UtcNow, localTimeZone);

            // Russian Standard Time is +03:00, no DST
            const int russianStandardTimeDiff = 3;
            var       nowLocal = DateTime.UtcNow.AddHours(russianStandardTimeDiff);

            dateText = dateText
                       .Replace("Вчера", nowLocal.AddDays(-1).ToString(dateFormat))
                       .Replace("Сегодня", nowLocal.ToString(dateFormat));

            if (DateTime.TryParseExact(dateText, dateTimeFormat, culture, style, out var date))
            {
                var utcDate = date.ToUniversalTime();
                return(utcDate.AddHours(-russianStandardTimeDiff));
            }

            Logger.Warn($"[AniDub] Date time couldn't be parsed on. Date text: {dateText}");

            return(DateTime.UtcNow);
        }
Exemplo n.º 11
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("thead tr th")
                            .Select(x => x.TextContent.Trim())
                            .ToArray();

            var tdContent = document.QuerySelectorAll("tbody tr")
                            .Where(x => x.ClassName == "view list_tr_humordata")
                            .Select(x => x.QuerySelectorAll("td"))
                            .SelectMany(x => x.Select(y =>
            {
                return(y.QuerySelector("a") != null ? y.QuerySelector("a").TextContent.Trim() : y.TextContent.Trim());
            }))
                            .ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr td")
                         .Where(x => x.ClassName == ("subject"))
                         .Select(x => x.QuerySelector("a").GetAttribute("href"))
                         .ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            var cultureInfo = (CultureInfo)Thread.CurrentThread.CurrentCulture.Clone();
            var calendar    = cultureInfo.Calendar;

            calendar.TwoDigitYearMax            = DateTime.Now.Year + 30;
            cultureInfo.DateTimeFormat.Calendar = calendar;

            Parallel.For(0, tdContent.Length / thContent.Length, n =>
            {
                var cursor    = n * thContent.Length;
                var id        = tdContent[cursor + 0].ToInt();
                var title     = tdContent[cursor + 2];
                var author    = tdContent[cursor + 3];
                var date      = DateTime.ParseExact(tdContent[cursor + 4], "yy/MM/dd HH:mm", cultureInfo);
                var count     = tdContent[cursor + 5].ToInt();
                var recommend = tdContent[cursor + 6].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    RowId     = id,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 12
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName != "g6")
                            .Select(x =>
            {
                return(x.QuerySelector("a") != null ? x.QuerySelector("a").TextContent.Trim() : x.TextContent.Trim());
            })
                            .ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr td a")
                         .Where(x => !string.IsNullOrEmpty(x.ClassName))
                         .Select(x => x.GetAttribute("href"))
                         .ToArray();


            if (!tdContent.Any())
            {
                return;
            }

            const int thLength  = 7;
            var       thContent = tdContent.Take(thLength);

            tdContent = tdContent.Skip(thLength).ToArray();

            Parallel.For(0, tdContent.Length / thLength, n =>
            {
                var cursor = n * thLength;

                var originTitle = tdContent[cursor + 1];

                var title        = originTitle.Substring("\n");
                var author       = tdContent[cursor + 2];
                var date         = DateTime.Parse(tdContent[cursor + 3]);
                var count        = tdContent[cursor + 4].ToInt();
                var recommend    = tdContent[cursor + 5].ToInt();
                var notRecommend = tdContent[cursor + 6].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend - notRecommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 13
0
        public static string PrettifyHtml(string newContent)
        {
            AngleSharp.Html.Parser.HtmlParser parser   = new AngleSharp.Html.Parser.HtmlParser();
            AngleSharp.Html.Dom.IHtmlDocument document = parser.ParseDocument(newContent);

            StringWriter sw = new StringWriter();

            document.ToHtml(sw, new PrettyMarkupFormatter());
            return(sw.ToString());
        }
Exemplo n.º 14
0
        private static void ProcessChildNode(AngleSharp.Html.Dom.IHtmlDocument document, INodeList childs, Dictionary <string, string> dicKeywordMap)
        {
            for (var i = 0; i < childs.Length; i++)
            {
                var childNode = childs[i];

                // 只跑純文字
                if (childNode.NodeType != NodeType.Text)
                {
                    continue;
                }

                // 如果沒有可閱讀文字,跳過
                if (string.IsNullOrWhiteSpace(childNode.TextContent))
                {
                    continue;
                }


                // 切割文字
                var splitedResult = WordSplitor.SplitWords(childNode.TextContent, dicKeywordMap);

                // 如果沒有找到目標
                if (!splitedResult.Where(obj => obj.NodeType == TextNodeType.Link).Any())
                {
                    continue;
                }


                List <INode> nodeList = new List <INode>();

                foreach (var item in splitedResult)
                {
                    if (item is TextNode)
                    {
                        nodeList.Add(document.CreateTextNode(item.Context));
                    }
                    else
                    {
                        var linkItem = item as LinkNode;

                        var link = document.CreateElement("a");
                        link.SetAttribute("href", linkItem.LinkUrl.ToString());
                        link.SetAttribute("data-autolink", "bot");
                        link.TextContent = linkItem.Context;

                        nodeList.Add(link);
                    }
                }

                childNode.ReplaceWith(nodeList.ToArray());
            }
        }
Exemplo n.º 15
0
        private static string GetDateFromDocument(AngleSharp.Html.Dom.IHtmlDocument content)
        {
            const string DateSelector = ".story_inf > li:nth-child(2)";

            var domDate = content.QuerySelector(DateSelector).LastChild;

            if (domDate?.NodeName != "#text")
            {
                return(string.Empty);
            }

            return(domDate.NodeValue.Trim());
        }
Exemplo n.º 16
0
        private static string GetTitle(AngleSharp.Html.Dom.IHtmlDocument content, AngleSharp.Dom.IElement tabNode)
        {
            var domTitle  = content.QuerySelector("#news-title");
            var baseTitle = domTitle.TextContent.Trim();
            var quality   = GetQuality(tabNode.ParentElement);

            if (!string.IsNullOrWhiteSpace(quality))
            {
                return($"{baseTitle} [{quality}]");
            }

            return(baseTitle);
        }
Exemplo n.º 17
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("thead tr th").Select(x => x.TextContent.Trim()).ToArray();

            if (thContent.Any())
            {
                OnPageCrawlTable(document, thContent);
            }
            else
            {
                OnPageCrawlList(document);
            }
        }
Exemplo n.º 18
0
        /// <summary>
        /// Возвращает список проксей с сайта, который нам нашел поисковик
        /// </summary>
        /// <param name="_url"></param>
        /// <returns></returns>
        public static string[] GetProxys(string _url)
        {
            List <string> Prxs = new List <string>();

            var browser = new AngleSharp.Html.Parser.HtmlParser();

            string code = GetHtml(_url);

            if (code != "")
            {
                AngleSharp.Html.Dom.IHtmlDocument doc = browser.ParseDocument(code);

                var TRs = doc.GetElementsByTagName("TR");

                foreach (var TR in TRs)
                {
                    try
                    {
                        string tds = "";
                        var    TDs = TR.GetElementsByTagName("TD");
                        foreach (var TD in TDs)
                        {
                            tds += " " + TD.TextContent + " ";
                        }

                        string tr = Regex.Replace(tds, @"[^0-9\.]", " ");
                        while (tr.Contains("  "))
                        {
                            tr = tr.Replace("  ", " ");
                        }
                        MatchCollection M = Regex.Matches(tr, @"(?<prx>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} \d{1,5} )+");
                        foreach (Match m in M)
                        {
                            string prx_ = m.Groups["prx"].Value.Replace(" ", ":");
                            if (prx_.EndsWith(":"))
                            {
                                prx_ = prx_.Substring(0, prx_.Length - 1);
                            }
                            prx_ = prx_.Trim();
                            if (!Prxs.Contains(prx_))
                            {
                                Prxs.Add(prx_);
                            }
                        }
                    }
                    catch { }
                }
            }

            return(Prxs.ToArray());
        }
Exemplo n.º 19
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContents = document.QuerySelectorAll("tbody tr")
                             .Select(x =>
            {
                var stringTuples = x.QuerySelectorAll("span")
                                   .Where(x => x.ClassName != "category")
                                   .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent.Trim())).ToList();

                var hrefs = x.QuerySelectorAll("a")
                            .Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(stringTuples, hrefs));
            })
                             .ToArray();

            Parallel.ForEach(tdContents, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var cmtnum = stringTuples.FindValue("cmtnum");

                var originTitle = stringTuples.FindValue("title");

                var infos     = stringTuples.FindValue("info").Split("|");
                var title     = string.IsNullOrEmpty(cmtnum) ? originTitle : originTitle.Substring(cmtnum);
                var category  = infos[0].Substring("\n");
                var author    = infos[1];
                var date      = DateTime.Parse(infos[2]);
                var recommend = string.IsNullOrEmpty(cmtnum) ? 0 : cmtnum.ToIntRegx();

                var href = hrefs[0];

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Title     = title,
                    Category  = category,
                    Author    = author,
                    Recommend = recommend,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 20
0
        /// <summary>
        /// Возвращает список сайтов с проксями, которые нашел по передаваемой ссылке на яндекс и т.д.
        /// </summary>
        /// <param name="_url"></param>
        /// <returns></returns>
        public static string[] GetYandexHrefs(string _url)
        {
            var    browser = new AngleSharp.Html.Parser.HtmlParser();
            string code    = GetHtml(_url);

            if (code != "")
            {
                AngleSharp.Html.Dom.IHtmlDocument doc = browser.ParseDocument(code);
                return((from m in doc.Links select m.GetAttribute("HREF")).Where(m => m.StartsWith("http")).Distinct().Where(m => (
                                                                                                                                 !m.Contains("yandex") && !m.Contains("google") && !m.Contains("mail") && !m.Contains("rambler") && !m.Contains("youtube")
                                                                                                                                 )).ToArray());
            }
            else
            {
                return(new string[0]);
            }
        }
Exemplo n.º 21
0
        /// <inheritdoc />
        public override async Task <IEnumerable <string> > EnumerateVersionsAsync(PackageURL purl, bool useCache = true, bool includePrerelease = true)
        {
            Logger.Trace("EnumerateVersions {0}", purl?.ToString());
            if (purl == null || purl.Name is null)
            {
                return(Array.Empty <string>());
            }

            try
            {
                string     packageName = purl.Name;
                HttpClient httpClient  = CreateHttpClient();

                System.Net.Http.HttpResponseMessage?html = await httpClient.GetAsync($"{ENV_HACKAGE_ENDPOINT}/package/{packageName}");

                html.EnsureSuccessStatusCode();
                HtmlParser parser = new();
                AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> ths = document.QuerySelectorAll("th");
                List <string> versionList = new();
                foreach (AngleSharp.Dom.IElement th in ths)
                {
                    if (th.TextContent.StartsWith("Versions"))
                    {
                        AngleSharp.Dom.IElement td = th.NextElementSibling;
                        foreach (AngleSharp.Dom.IElement version in td.QuerySelectorAll("a,strong"))
                        {
                            string versionString = version.TextContent.ToLower().Trim();
                            Logger.Debug("Identified {0} version {1}.", packageName, versionString);
                            versionList.Add(versionString);
                        }
                        break;
                    }
                }

                return(SortVersions(versionList.Distinct()));
            }
            catch (Exception ex)
            {
                Logger.Debug("Unable to enumerate versions: {0}", ex.Message);
                throw;
            }
        }
Exemplo n.º 22
0
        private string composeTitle(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr)
        {
            var name_ru   = dom.QuerySelector("div.media__post__header > h1").TextContent.Trim();
            var name_en   = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(1) > div > span").TextContent.Trim();
            var name_orig = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(2) > div > span").TextContent.Trim();

            var title = name_ru + " / " + name_en;

            if (name_en != name_orig)
            {
                title += " / " + name_orig;
            }

            var tabName = t.TextContent;

            tabName = tabName.Replace("Сезон", "Season");
            if (tabName.Contains("Серии"))
            {
                tabName = "";
            }

            var heading = tr.QuerySelector("h3.tracker_info_bold").TextContent;

            // Parse episodes info from heading if episods info present
            var match = EpisodesInfoQueryRegex.Match(heading);

            heading = tabName;
            if (match.Success)
            {
                if (string.IsNullOrEmpty(match.Groups[2].Value))
                {
                    heading += " E" + match.Groups[1].Value;
                }
                else
                {
                    heading += string.Format(" E{0}-{1}", match.Groups[1].Value, match.Groups[2].Value);
                }
            }

            return(title + " - " + heading + " [" + getResolution(tr) + "p]");
        }
Exemplo n.º 23
0
        private void OnPageCrawlTable(AngleSharp.Html.Dom.IHtmlDocument document, string[] thContent)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td").Select(x => x.TextContent.Trim()).ToArray();
            var tdHref    = document.QuerySelectorAll("tbody tr td").Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("title")).Select(x => x.QuerySelector("a").GetAttribute("href")).ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            Parallel.For(0, tdContent.Length / thContent.Length, n =>
            {
                var cursor    = n * thContent.Length;
                var category  = tdContent[cursor + 0];
                var title     = tdContent[cursor + 1];
                var author    = tdContent[cursor + 2];
                var date      = DateTime.Parse(tdContent[cursor + 3]);
                var count     = tdContent[cursor + 4].ToInt();
                var recommend = tdContent[cursor + 5].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title.Substring("\t"),
                    Author    = author,
                    Recommend = recommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 24
0
        private List <string> Parse(BotData data)
        {
            var original = ReplaceValues(parseTarget, data);
            var partial  = original;
            var list     = new List <string>();

            // Parse the value
            switch (Type)
            {
            case ParseType.LR:
                var ls    = ReplaceValues(leftString, data);
                var rs    = ReplaceValues(rightString, data);
                var pFrom = 0;
                var pTo   = 0;

                // No L and R = return full input
                if (ls == "" && rs == "")
                {
                    list.Add(original);
                    break;
                }

                // L or R not present and not empty
                else if (((!partial.Contains(ls) && ls != "") || (!partial.Contains(rs) && rs != "")))
                {
                    break;
                }

                // Instead of the mess below, we could simply use Extreme.NET's Substring extensions
                // return original.Substrings(ls, rs); // Recursive
                // return original.Substring(ls, rs); // Not recursive

                if (recursive)
                {
                    if (useRegexLR)
                    {
                        try
                        {
                            var             pattern = BuildLRPattern(ls, rs);
                            MatchCollection mc      = Regex.Matches(partial, pattern);
                            foreach (Match m in mc)
                            {
                                list.Add(m.Value);
                            }
                        }
                        catch { }
                    }
                    else
                    {
                        try
                        {
                            while ((partial.Contains(ls) || ls == "") && (partial.Contains(rs) || rs == ""))
                            {
                                // Search for left delimiter and Calculate offset
                                pFrom = ls == "" ? 0 : partial.IndexOf(ls) + ls.Length;
                                // Move right of offset
                                partial = partial.Substring(pFrom);
                                // Search for right delimiter and Calculate length to parse
                                pTo = rs == "" ? (partial.Length - 1) : partial.IndexOf(rs);
                                // Parse it
                                var parsed = partial.Substring(0, pTo);
                                list.Add(parsed);
                                // Move right of parsed + right
                                partial = partial.Substring(parsed.Length + rs.Length);
                            }
                        }
                        catch { }
                    }
                }

                // Non-recursive
                else
                {
                    if (useRegexLR)
                    {
                        var             pattern = BuildLRPattern(ls, rs);
                        MatchCollection mc      = Regex.Matches(partial, pattern);
                        if (mc.Count > 0)
                        {
                            list.Add(mc[0].Value);
                        }
                    }
                    else
                    {
                        try
                        {
                            pFrom   = ls == "" ? 0 : partial.IndexOf(ls) + ls.Length;
                            partial = partial.Substring(pFrom);
                            pTo     = rs == "" ? partial.Length : partial.IndexOf(rs);
                            list.Add(partial.Substring(0, pTo));
                        }
                        catch { }
                    }
                }

                break;

            case ParseType.CSS:

                HtmlParser parser = new HtmlParser();
                AngleSharp.Html.Dom.IHtmlDocument document = null;
                try { document = parser.ParseDocument(original); } catch {  }

                try
                {
                    if (recursive)
                    {
                        foreach (var element in document.QuerySelectorAll(ReplaceValues(cssSelector, data)))
                        {
                            switch (ReplaceValues(attributeName, data))
                            {
                            case "innerHTML":
                                list.Add(element.InnerHtml);
                                break;

                            case "outerHTML":
                                list.Add(element.OuterHtml);
                                break;

                            default:
                                foreach (var attr in element.Attributes)
                                {
                                    if (attr.Name == ReplaceValues(attributeName, data))
                                    {
                                        list.Add(attr.Value);
                                        break;
                                    }
                                }
                                break;
                            }
                        }
                    }
                    else
                    {
                        switch (ReplaceValues(attributeName, data))
                        {
                        case "innerHTML":
                            list.Add(document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].InnerHtml);
                            break;

                        case "outerHTML":
                            list.Add(document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].OuterHtml);
                            break;

                        default:
                            foreach (var attr in document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].Attributes)
                            {
                                if (attr.Name == ReplaceValues(attributeName, data))
                                {
                                    list.Add(attr.Value);
                                    break;
                                }
                            }
                            break;
                        }
                    }
                }
                catch { }

                break;

            case ParseType.JSON:
                if (JTokenParsing)
                {
                    if (original.Trim().StartsWith("["))
                    {
                        JArray json     = JArray.Parse(original);
                        var    jsonlist = json.SelectTokens(jsonField, false);
                        foreach (var j in jsonlist)
                        {
                            list.Add(j.ToString());
                        }
                    }
                    else
                    {
                        JObject json     = JObject.Parse(original);
                        var     jsonlist = json.SelectTokens(jsonField, false);
                        foreach (var j in jsonlist)
                        {
                            list.Add(j.ToString());
                        }
                    }
                }
                else
                {
                    var jsonlist = new List <KeyValuePair <string, string> >();
                    parseJSON("", original, jsonlist);
                    foreach (var j in jsonlist)
                    {
                        if (j.Key == ReplaceValues(jsonField, data))
                        {
                            list.Add(j.Value);
                        }
                    }
                }

                break;

            case ParseType.XPATH:

                // NOT IMPLEMENTED YET
                break;

            case ParseType.REGEX:
                try
                {
                    var matches = Regex.Matches(partial, ReplaceValues(regexString, data));
                    foreach (Match match in matches)
                    {
                        var output = ReplaceValues(regexOutput, data);
                        for (var i = 0; i < match.Groups.Count; i++)
                        {
                            output = output.Replace("[" + i + "]", match.Groups[i].Value);
                        }
                        list.Add(output);
                    }
                }
                catch { }
                break;
            }

            return(list);
        }
Exemplo n.º 25
0
        private void OnPageCrawlList(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("ul li div")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("li"))
                            .Select(x =>
            {
                var tuples = x.QuerySelectorAll("h3")
                             .Select(y =>
                {
                    var textContent = y.TextContent.Trim();

                    var lastBracket = textContent.LastIndexOf("[");
                    if (lastBracket != -1)
                    {
                        textContent = textContent.Substring(0, lastBracket);
                    }

                    return(new Tuple <string, string>("title", textContent));
                }).ToList();

                tuples.AddRange(x.QuerySelectorAll("span")
                                .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent.Trim()))
                                .ToList());

                tuples.AddRange(x.QuerySelectorAll("div")
                                .Where(y => !string.IsNullOrEmpty(y.ClassName) && y.ClassName == "hotdeal_info")
                                .Select(y => new Tuple <string, string>("info", y.TextContent.Replace("\t", string.Empty)))
                                .ToList());

                var hrefs = x.QuerySelectorAll("a")
                            .Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(tuples, hrefs));
            }).ToArray();

            Parallel.ForEach(tdContent, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var category = stringTuples.FindValue("category").Replace(" /", string.Empty);
                var title    = stringTuples.FindValue("title").TrimEnd();

                var info = stringTuples.FindValue("info");
                if (!string.IsNullOrEmpty(info))
                {
                    title += $" [{info}]";
                }

                var author    = stringTuples.FindValue("author").Replace("/ ", string.Empty);
                var date      = DateTime.Now;
                var recommend = stringTuples.FindValue("count").ToInt();

                var href = UrlCompositeHref(hrefs[0]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title.Substring("\t"),
                    Author    = author,
                    Recommend = recommend,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 26
0
        private void ParseLot(AngleSharp.Html.Dom.IHtmlDocument document, MessageModel descriptionMessage)
        {
            var elems = document.GetElementsByClassName("lotInfo");

            if (elems.Length == 0)
            {
                return;
            }

            var table = elems[0];
            var trs   = table.GetElementsByTagName("tr");

            if (trs.Length > 0)
            {
                int number      = -1;
                int description = -1;
                int startPrice  = -1;
                int step        = -1;
                int deposit     = -1;
                int PriceReductionInformation = -1;
                int PropertyClassification    = -1;

                var header     = trs[0];
                var thsHeaders = header.GetElementsByTagName("th");
                var length     = thsHeaders.Length;
                for (int i = 0; i < length; i++)
                {
                    switch (thsHeaders[i].TextContent)
                    {
                    case "Номер лота":
                        number = i;
                        break;

                    case "Описание":
                        description = i;
                        break;

                    case "Начальная цена, руб":
                        startPrice = i;
                        break;

                    case "Шаг":
                        step = i;
                        break;

                    case "Задаток":
                        deposit = i;
                        break;

                    case "Информация о снижении цены":
                        PriceReductionInformation = i;
                        break;

                    case "Классификация имущества":
                        PropertyClassification = i;
                        break;
                    }
                }
                length = trs.Length;
                for (int i = 1; i < length; i++)
                {
                    var lot = new LotModel();
                    var tds = trs[i].GetElementsByTagName("td");
                    if (number != -1)
                    {
                        int num;
                        if (int.TryParse(tds[number].TextContent, out num))
                        {
                            lot.Number = num;
                        }
                    }
                    if (description != -1)
                    {
                        lot.Description = tds[description].TextContent;
                    }
                    if (startPrice != -1)
                    {
                        double num;
                        if (double.TryParse(tds[startPrice].TextContent, out num))
                        {
                            lot.StartPrice = num;
                        }
                    }
                    if (step != -1)
                    {
                        lot.Step = tds[step].TextContent;
                    }
                    if (deposit != -1)
                    {
                        lot.Deposit = tds[deposit].TextContent;
                    }
                    if (PriceReductionInformation != -1)
                    {
                        lot.PriceReductionInformation = tds[PriceReductionInformation].TextContent;
                    }
                    if (PropertyClassification != -1)
                    {
                        lot.PropertyClassification = tds[PropertyClassification].TextContent;
                    }

                    descriptionMessage.Lots.Add(lot);
                    //Log.Debug($"Add lot - {lot.Number}");
                }
            }

            var regex  = new Regex(@"\d+\.\d+\.\d+&nbsp;\d+\:\d+");
            var regex1 = new Regex(@"\d+\.\d+\.\d+\s+\d+\:\d+");
            var body   = document.GetElementsByTagName("body")[0];
            var ms     = regex1.Matches(body.TextContent);

            if (ms.Count >= 2)
            {
                descriptionMessage.DateStart = DateTime.Parse(ms[0].Value);
                descriptionMessage.DateEnd   = DateTime.Parse(ms[1].Value);
            }
            if (ms.Count == 3)
            {
                descriptionMessage.DateStartBargaining = DateTime.Parse(ms[2].Value);
            }
        }
Exemplo n.º 27
0
        static void Main(string[] args)
        {
            const string AudioPlayerSelector = "div.row > div > div.card > div.card-body > div.audioplayer-wrapper > div.audioplayer";
            const string DownloadDirectory   = @"songs";

            //Get download url
            string baseUrl = "";

            if (args.Length > 0)
            {
                baseUrl = args[0];
            }
            else
            {
                baseUrl = "https://incompetech.filmmusic.io/de/suche/";
            }

            //If we're overwriting, delete work folder if it exists
            if (OverwritePages)
            {
                if (Directory.Exists(Path.GetFullPath("work")))
                {
                    Directory.Delete(Path.GetFullPath("work"), true);
                }
            }
            //Create work directory if it doesn't exist
            if (!Directory.Exists("work"))
            {
                Directory.CreateDirectory("work");
            }

            //Use the default configuration for AngleSharp
            IConfiguration config = Configuration.Default;

            //Create a new context for evaluating webpages with the given config
            IBrowsingContext context = BrowsingContext.New(config);

            IHtmlParser thisParser = context.GetService <IHtmlParser>();

            Parser = thisParser;

            Console.WriteLine("Downloading pages...\n");

            int totalPages = DownloadAndFindNext(baseUrl, 1);

            Console.WriteLine("Download complete! Found " + totalPages + " pages\n");
            Console.WriteLine("Downloading songs...");

            //If we're overwriting, delete song folder if it exists
            if (OverwriteSongs)
            {
                if (Directory.Exists(Path.GetFullPath(DownloadDirectory)))
                {
                    Directory.Delete(Path.GetFullPath(DownloadDirectory), true);
                }
            }
            //Create song folder if it doesn't exist
            if (!Directory.Exists(Path.GetFullPath(DownloadDirectory)))
            {
                Directory.CreateDirectory(Path.GetFullPath(DownloadDirectory));
            }

            //Ok, now parse the downloaded pages
            for (int thisPage = 1; thisPage <= totalPages; thisPage++)
            {
                Console.WriteLine("\nPage " + thisPage + " of " + totalPages + "\n");

                string path   = Path.GetFullPath(@"work\page" + thisPage + ".html");
                string source = File.ReadAllText(path);

                AngleSharp.Html.Dom.IHtmlDocument document = Parser.ParseDocument(source);

                IHtmlCollection <IElement> pages = document.QuerySelectorAll(AudioPlayerSelector);

                foreach (IElement pageElement in pages)
                {
                    AngleSharp.Html.Dom.IHtmlDivElement song = (AngleSharp.Html.Dom.IHtmlDivElement)pageElement;
                    //Get path to save to
                    string savePath = Path.GetFullPath(DownloadDirectory + "\\" + song.GetAttribute("data-title") + ".mp3");

                    //If we're overwriting songs, the song shouldn't already exist. If it does, there's a problem.
                    if (OverwriteSongs)
                    {
                        while (File.Exists(savePath))
                        {
                            savePath = savePath + ".CONFLICT";
                        }
                    }

                    //If we're not overwriting songs, it will only download if it doesn't exist
                    if (!File.Exists(savePath) || OverwriteSongs)
                    {
                        using (WebClient client = new WebClient())
                        {
                            client.DownloadFile(song.GetAttribute("data-mp3").Replace("mp3low", "mp3"), savePath);
                        }
                    }

                    Console.WriteLine(song.GetAttribute("data-title"));
                }
            }


            Console.WriteLine("\n\nEnd of program, bruh");
            Console.ReadKey();
        }
Exemplo n.º 28
0
        /// <summary>
        /// Parses an attribute's value from one or more elements of an HTML page.
        /// </summary>
        /// <param name="input">The HTML page</param>
        /// <param name="selector">The CSS Selector that targets the desired elements</param>
        /// <param name="attribute">The attribute for which you want to parse the value</param>
        /// <param name="index">The index of the element to parse among all the ones selected (if not recursive)</param>
        /// <param name="recursive">Whether to parse from all the elements that match the selector</param>
        /// <returns>The attribute value(s).</returns>
        public static IEnumerable <string> CSS(string input, string selector, string attribute, int index = 0, bool recursive = false)
        {
            HtmlParser parser = new HtmlParser();

            AngleSharp.Html.Dom.IHtmlDocument document = null;
            document = parser.ParseDocument(input);
            var list = new List <string>();

            if (recursive)
            {
                foreach (var element in document.QuerySelectorAll(selector))
                {
                    switch (attribute)
                    {
                    case "innerHTML":
                        list.Add(element.InnerHtml);
                        break;

                    case "outerHTML":
                        list.Add(element.OuterHtml);
                        break;

                    default:
                        foreach (var attr in element.Attributes)
                        {
                            if (attr.Name == attribute)
                            {
                                list.Add(attr.Value);
                                break;
                            }
                        }
                        break;
                    }
                }
            }
            else
            {
                switch (attribute)
                {
                case "innerHTML":
                    list.Add(document.QuerySelectorAll(selector)[index].InnerHtml);
                    break;

                case "outerHTML":
                    list.Add(document.QuerySelectorAll(selector)[index].OuterHtml);
                    break;

                default:
                    foreach (var attr in document.QuerySelectorAll(selector)[index].Attributes)
                    {
                        if (attr.Name == attribute)
                        {
                            list.Add(attr.Value);
                            break;
                        }
                    }
                    break;
                }
            }

            return(list);
        }
Exemplo n.º 29
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("div")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("list_item") && x.ClassName.Contains("symph_row"))
                            .Select(x =>
            {
                var stringTuples = x.QuerySelectorAll("span")
                                   .Select(y =>
                {
                    var text = y.TextContent.Trim();
                    if (string.IsNullOrEmpty(text))
                    {
                        text = y.QuerySelector("img")?.GetAttribute("alt");
                    }

                    return(new Tuple <string, string>(y.ClassName, text));
                }).ToList();

                var a = x.QuerySelectorAll("a");

                stringTuples.AddRange(a.Where(x => !string.IsNullOrEmpty(x.ClassName))
                                      .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent))
                                      .ToList());

                var hrefs = a.Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(stringTuples, hrefs));
            })
                            .ToArray();

            Parallel.ForEach(tdContent, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var category = stringTuples.FindValue("category_fixed");
                if (string.IsNullOrEmpty(category))
                {
                    category = stringTuples.FindValue("icon_keyword");
                }
                ;

                var title = stringTuples.FindValue("subject_fixed");
                if (string.IsNullOrEmpty(title))
                {
                    title = stringTuples.FindValue("list_subject");
                }
                ;

                title = title.Substring("\n");

                var author = stringTuples.FindValue("nickname");
                var count  = stringTuples.FindValue("hit").ToIntShorthand();
                var date   = DateTime.Parse(stringTuples.FindValue("timestamp"));

                var href = UrlCompositeHref(hrefs[0]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title,
                    Author    = author,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Exemplo n.º 30
0
        /// <summary>
        ///     Download one VS Marketplace package and extract it to the target directory.
        /// </summary>
        /// <param name="purl"> Package URL of the package to download. </param>
        /// <returns> the path or file written. </returns>
        public override async Task <IEnumerable <string> > DownloadVersionAsync(PackageURL purl, bool doExtract, bool cached = false)
        {
            Logger.Trace("DownloadVersion {0}", purl?.ToString());

            List <string>    downloadedPaths = new();
            HashSet <string> downloadedUrls  = new();
            HttpClient       httpClient      = CreateHttpClient();

            if (purl == null || purl.Name == null || purl.Version == null)
            {
                return(downloadedPaths);
            }

            string packageVersion = purl.Version;

            IEnumerable <string>?availablePools = await GetPoolsForProject(purl);

            foreach (string?pool in availablePools)
            {
                string?archiveBaseUrl = await GetArchiveBaseUrlForProject(purl, pool);

                if (archiveBaseUrl == null)
                {
                    Logger.Debug("Unable to find archive base URL for {0}, pool {1}", purl.ToString(), pool);
                    continue;
                }

                try
                {
                    string?html = await GetHttpStringCache(httpClient, archiveBaseUrl, neverThrow : true);

                    if (html == null)
                    {
                        Logger.Debug("Error reading {0}", archiveBaseUrl);
                        continue;
                    }

                    AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(html);
                    foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a"))
                    {
                        string?anchorHref = anchor.GetAttribute("href");
                        if (anchorHref.Contains(packageVersion) && anchorHref.EndsWith(".deb"))
                        {
                            string?fullDownloadUrl = archiveBaseUrl + "/" + anchorHref;
                            if (!downloadedUrls.Add(fullDownloadUrl))
                            {
                                // Never re-download the same file twice.
                                continue;
                            }
                            Logger.Debug("Downloading binary: {0}", fullDownloadUrl);

                            System.Net.Http.HttpResponseMessage downloadResult = await httpClient.GetAsync(fullDownloadUrl);

                            if (!downloadResult.IsSuccessStatusCode)
                            {
                                Logger.Debug("Error {0} downloading file {1}", downloadResult.StatusCode, fullDownloadUrl);
                                continue;
                            }

                            // TODO: Add distro version id
                            string targetName     = $"ubuntu-{purl.Name}@{packageVersion}-{anchorHref}";
                            string extractionPath = Path.Combine(TopLevelExtractionDirectory, targetName);
                            if (doExtract && Directory.Exists(extractionPath) && cached == true)
                            {
                                downloadedPaths.Add(extractionPath);
                                return(downloadedPaths);
                            }

                            if (doExtract)
                            {
                                downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await downloadResult.Content.ReadAsStreamAsync(), cached));
                            }
                            else
                            {
                                extractionPath += Path.GetExtension(anchorHref) ?? "";
                                await File.WriteAllBytesAsync(extractionPath, await downloadResult.Content.ReadAsByteArrayAsync());

                                downloadedPaths.Add(extractionPath);
                            }
                        }

                        // Source Code URLs don't have the full version on the source files. We need to find
                        // them in the .dsc
                        else if (anchorHref.Contains(packageVersion) && anchorHref.EndsWith(".dsc"))
                        {
                            string?dscContent = await GetHttpStringCache(httpClient, archiveBaseUrl + "/" + anchorHref);

                            if (dscContent == null)
                            {
                                continue;
                            }

                            HashSet <string> seenFiles = new();
                            foreach (Match match in Regex.Matches(dscContent, "^ [a-z0-9]+ \\d+ (.*)$", RegexOptions.Multiline | RegexOptions.IgnoreCase).Where(x => x != null))
                            {
                                seenFiles.Add(match.Groups[1].Value.Trim());
                            }

                            // Now we need to go through the anchor tags again looking for the source code files
                            foreach (AngleSharp.Dom.IElement?secondAnchor in document.QuerySelectorAll("a"))
                            {
                                string?secondHref = secondAnchor.GetAttribute("href");
                                if (seenFiles.Any(f => f.Equals(secondHref) && !secondHref.EndsWith(".deb") && !secondHref.EndsWith(".dsc") && !secondHref.EndsWith(".asc")))
                                {
                                    string fullDownloadUrl = archiveBaseUrl + "/" + secondHref;
                                    if (!downloadedUrls.Add(fullDownloadUrl))
                                    {
                                        // Never re-download the same file twice.
                                        continue;
                                    }
                                    Logger.Debug("Downloading source code: {0}", fullDownloadUrl);

                                    System.Net.Http.HttpResponseMessage downloadResult = await httpClient.GetAsync(fullDownloadUrl);

                                    if (!downloadResult.IsSuccessStatusCode)
                                    {
                                        Logger.Debug("Error {0} downloading file {1}", downloadResult.StatusCode, fullDownloadUrl);
                                        continue;
                                    }

                                    // TODO: Add distro version id
                                    string targetName     = $"ubuntu-{purl.Name}@{packageVersion}-{secondHref}";
                                    string extractionPath = Path.Combine(TopLevelExtractionDirectory, targetName);

                                    if (doExtract)
                                    {
                                        downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await downloadResult.Content.ReadAsStreamAsync(), cached));
                                    }
                                    else
                                    {
                                        extractionPath += Path.GetExtension(anchorHref) ?? "";
                                        await File.WriteAllBytesAsync(extractionPath, await downloadResult.Content.ReadAsByteArrayAsync());

                                        downloadedPaths.Add(extractionPath);
                                    }
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Logger.Debug("Error downloading binary for {0}: {1}", purl.ToString(), ex.Message);
                }
            }

            return(downloadedPaths);
        }