Пример #1
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("thead tr th")
                            .Select(x => x.TextContent.Trim())
                            .ToArray();

            var tdContent = document.QuerySelectorAll("tbody tr")
                            .Where(x => x.ClassName == "view list_tr_humordata")
                            .Select(x => x.QuerySelectorAll("td"))
                            .SelectMany(x => x.Select(y =>
            {
                return(y.QuerySelector("a") != null ? y.QuerySelector("a").TextContent.Trim() : y.TextContent.Trim());
            }))
                            .ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr td")
                         .Where(x => x.ClassName == ("subject"))
                         .Select(x => x.QuerySelector("a").GetAttribute("href"))
                         .ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            var cultureInfo = (CultureInfo)Thread.CurrentThread.CurrentCulture.Clone();
            var calendar    = cultureInfo.Calendar;

            calendar.TwoDigitYearMax            = DateTime.Now.Year + 30;
            cultureInfo.DateTimeFormat.Calendar = calendar;

            Parallel.For(0, tdContent.Length / thContent.Length, n =>
            {
                var cursor    = n * thContent.Length;
                var id        = tdContent[cursor + 0].ToInt();
                var title     = tdContent[cursor + 2];
                var author    = tdContent[cursor + 3];
                var date      = DateTime.ParseExact(tdContent[cursor + 4], "yy/MM/dd HH:mm", cultureInfo);
                var count     = tdContent[cursor + 5].ToInt();
                var recommend = tdContent[cursor + 6].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    RowId     = id,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #2
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName != "g6")
                            .Select(x =>
            {
                return(x.QuerySelector("a") != null ? x.QuerySelector("a").TextContent.Trim() : x.TextContent.Trim());
            })
                            .ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr td a")
                         .Where(x => !string.IsNullOrEmpty(x.ClassName))
                         .Select(x => x.GetAttribute("href"))
                         .ToArray();


            if (!tdContent.Any())
            {
                return;
            }

            const int thLength  = 7;
            var       thContent = tdContent.Take(thLength);

            tdContent = tdContent.Skip(thLength).ToArray();

            Parallel.For(0, tdContent.Length / thLength, n =>
            {
                var cursor = n * thLength;

                var originTitle = tdContent[cursor + 1];

                var title        = originTitle.Substring("\n");
                var author       = tdContent[cursor + 2];
                var date         = DateTime.Parse(tdContent[cursor + 3]);
                var count        = tdContent[cursor + 4].ToInt();
                var recommend    = tdContent[cursor + 5].ToInt();
                var notRecommend = tdContent[cursor + 6].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend - notRecommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #3
0
        /// <summary>
        ///     Identifies the base URL for package source files.
        /// </summary>
        /// <param name="purl"> </param>
        /// <param name="pool"> </param>
        /// <returns> </returns>
        private async Task <string?> GetArchiveBaseUrlForProject(PackageURL purl, string pool)
        {
            try
            {
                HttpClient httpClient = CreateHttpClient();

                string?html = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/{pool}/{purl.Name}", neverThrow : true);

                if (html == null)
                {
                    return(null);
                }
                AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(html);
                foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a"))
                {
                    string?href = anchor.GetAttribute("href");
                    if (href != null && href.EndsWith(".dsc"))
                    {
                        Match match = Regex.Match(href, "(.+)/[^/]+\\.dsc");
                        if (match.Success)
                        {
                            return(match.Groups[1].Value.Trim());
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Debug(ex, "Error fetching Ubuntu archive base URL for {0}: {1}", purl.ToString(), ex.Message);
            }
            return(null);
        }
Пример #4
0
        /// <summary>
        ///     Identifies the available pools for a given Ubuntu project. For example, 'xenial'.
        /// </summary>
        /// <param name="purl"> Package URL to look up (only name is used). </param>
        /// <returns> List of pool names </returns>
        private async Task <IEnumerable <string> > GetPoolsForProject(PackageURL purl)
        {
            HashSet <string> pools = new();

            try
            {
                HttpClient httpClient = CreateHttpClient();

                string?searchResults = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/search?keywords={purl.Name}&searchon=names&exact=1&suite=all&section=all", neverThrow : true);

                AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(searchResults);
                foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a.resultlink"))
                {
                    string?href = anchor.GetAttribute("href");
                    if (href != null)
                    {
                        Match match = Regex.Match(href, "^/([^/]+)/.+");
                        if (match.Success)
                        {
                            string pool = match.Groups[1].Value.Trim();
                            Logger.Debug("Identified pool: {0}", pool);
                            pools.Add(pool);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Debug(ex, "Error fetching Ubuntu pools for {0}: {1}", purl.ToString(), ex.Message);
            }
            return(pools);
        }
Пример #5
0
        /// <summary>
        /// Download one CPAN package and extract it to the target directory.
        /// </summary>
        /// <param name="purl">Package URL of the package to download.</param>
        /// <returns>n/a</returns>
        public override async Task <IEnumerable <string> > DownloadVersionAsync(PackageURL purl, bool doExtract, bool cached = false)
        {
            Logger.Trace("DownloadVersion {0}", purl?.ToString());

            string?       packageName     = purl?.Name;
            string?       packageVersion  = purl?.Version;
            List <string> downloadedPaths = new();

            if (string.IsNullOrWhiteSpace(packageName) || string.IsNullOrWhiteSpace(packageVersion))
            {
                Logger.Debug("Unable to download [{0} {1}]. Both must be defined.", packageName, packageVersion);
                return(downloadedPaths);
            }
            // Locate the URL
            HttpClient httpClient        = CreateHttpClient();
            string?    packageVersionUrl = null;
            string?    html = await GetHttpStringCache(httpClient, $"{ENV_CPAN_ENDPOINT}/release/{packageName}");

            HtmlParser parser = new();

            AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(html);

            foreach (AngleSharp.Dom.IElement option in document.QuerySelectorAll("div.release select.extend option"))
            {
                if (!option.HasAttribute("value"))
                {
                    continue;
                }
                string?value   = option.GetAttribute("value");
                string version = value.Split('-').Last();
                if (version.StartsWith("v", StringComparison.InvariantCultureIgnoreCase))
                {
                    version = version[1..];
Пример #6
0
        static int DownloadAndFindNext(string url, int page)
        {
            Console.WriteLine("Downloading page " + page);
            const string NextPageSelector = "div.row > div > div.card > div.card-body > div.paginator-top > ul > li.page-item > a[rel=\"next\"]";

            //Download source
            string path = Path.GetFullPath(@"work\page" + page + ".html");

            if (!File.Exists(path) || OverwritePages)
            {
                using (WebClient client = new WebClient())
                {
                    client.DownloadFile(url, path);
                }
            }

            string source = File.ReadAllText(path);

            AngleSharp.Html.Dom.IHtmlDocument document = Parser.ParseDocument(source);

            IHtmlCollection <IElement> pages = document.QuerySelectorAll(NextPageSelector);

            if (pages.Length != 0)
            {
                AngleSharp.Html.Dom.IHtmlAnchorElement thisPage = (AngleSharp.Html.Dom.IHtmlAnchorElement)pages[0];
                return(DownloadAndFindNext(thisPage.Href, page + 1));
            }
            else
            {
                return(page);
            }
        }
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td table tbody tr td span").Select(x => x.TextContent.Trim()).ToArray();
            var latest    = tdContent.LastOrDefault();

            LatestPage = string.IsNullOrEmpty(latest) ? (int?)null : latest.ToInt();
        }
Пример #8
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("thead tr th").Select(x => x.TextContent.Trim()).ToArray();

            if (thContent.Any())
            {
                OnPageCrawlTable(document, thContent);
            }
            else
            {
                OnPageCrawlList(document);
            }
        }
Пример #9
0
        private void OnPageCrawlTable(AngleSharp.Html.Dom.IHtmlDocument document, string[] thContent)
        {
            var tdContent = document.QuerySelectorAll("tbody tr td").Select(x => x.TextContent.Trim()).ToArray();
            var tdHref    = document.QuerySelectorAll("tbody tr td").Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("title")).Select(x => x.QuerySelector("a").GetAttribute("href")).ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            Parallel.For(0, tdContent.Length / thContent.Length, n =>
            {
                var cursor    = n * thContent.Length;
                var category  = tdContent[cursor + 0];
                var title     = tdContent[cursor + 1];
                var author    = tdContent[cursor + 2];
                var date      = DateTime.Parse(tdContent[cursor + 3]);
                var count     = tdContent[cursor + 4].ToInt();
                var recommend = tdContent[cursor + 5].ToInt();

                var href = UrlCompositeHref(tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title.Substring("\t"),
                    Author    = author,
                    Recommend = recommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #10
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContents = document.QuerySelectorAll("tbody tr")
                             .Select(x =>
            {
                var stringTuples = x.QuerySelectorAll("span")
                                   .Where(x => x.ClassName != "category")
                                   .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent.Trim())).ToList();

                var hrefs = x.QuerySelectorAll("a")
                            .Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(stringTuples, hrefs));
            })
                             .ToArray();

            Parallel.ForEach(tdContents, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var cmtnum = stringTuples.FindValue("cmtnum");

                var originTitle = stringTuples.FindValue("title");

                var infos     = stringTuples.FindValue("info").Split("|");
                var title     = string.IsNullOrEmpty(cmtnum) ? originTitle : originTitle.Substring(cmtnum);
                var category  = infos[0].Substring("\n");
                var author    = infos[1];
                var date      = DateTime.Parse(infos[2]);
                var recommend = string.IsNullOrEmpty(cmtnum) ? 0 : cmtnum.ToIntRegx();

                var href = hrefs[0];

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Title     = title,
                    Category  = category,
                    Author    = author,
                    Recommend = recommend,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #11
0
        /// <inheritdoc />
        public override async Task <IEnumerable <string> > EnumerateVersionsAsync(PackageURL purl, bool useCache = true, bool includePrerelease = true)
        {
            Logger.Trace("EnumerateVersions {0}", purl?.ToString());
            if (purl == null || purl.Name is null)
            {
                return(Array.Empty <string>());
            }

            try
            {
                string     packageName = purl.Name;
                HttpClient httpClient  = CreateHttpClient();

                System.Net.Http.HttpResponseMessage?html = await httpClient.GetAsync($"{ENV_HACKAGE_ENDPOINT}/package/{packageName}");

                html.EnsureSuccessStatusCode();
                HtmlParser parser = new();
                AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> ths = document.QuerySelectorAll("th");
                List <string> versionList = new();
                foreach (AngleSharp.Dom.IElement th in ths)
                {
                    if (th.TextContent.StartsWith("Versions"))
                    {
                        AngleSharp.Dom.IElement td = th.NextElementSibling;
                        foreach (AngleSharp.Dom.IElement version in td.QuerySelectorAll("a,strong"))
                        {
                            string versionString = version.TextContent.ToLower().Trim();
                            Logger.Debug("Identified {0} version {1}.", packageName, versionString);
                            versionList.Add(versionString);
                        }
                        break;
                    }
                }

                return(SortVersions(versionList.Distinct()));
            }
            catch (Exception ex)
            {
                Logger.Debug("Unable to enumerate versions: {0}", ex.Message);
                throw;
            }
        }
Пример #12
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("thead tr th")
                            .Select(x => x.TextContent.Trim())
                            .ToList();

            var tdContent = document.QuerySelectorAll("tbody tr")
                            .Where(x => x.ClassName == "table_body")
                            .Select(x => x.QuerySelectorAll("td"))
                            .SelectMany(x => x.Select(y => y.TextContent.Trim()))
                            .ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr")
                         .Where(x => x.ClassName == "table_body")
                         .Select(x => x.QuerySelectorAll("td"))
                         .SelectMany(x => x.Where(y => y.ClassName == "subject" && y.QuerySelector("a") != null)
                                     .Select(y => y.QuerySelector("a").GetAttribute("href")))
                         .Where(x => x.StartsWith("http"))
                         .ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            var cultureInfo = (CultureInfo)Thread.CurrentThread.CurrentCulture.Clone();
            var calendar    = cultureInfo.Calendar;

            calendar.TwoDigitYearMax            = DateTime.Now.Year + 30;
            cultureInfo.DateTimeFormat.Calendar = calendar;

            Parallel.For(0, tdContent.Length / thContent.Count, n =>
            {
                var cursor   = n * thContent.Count;
                var id       = tdContent.GetValue(thContent, "ID", cursor).ToIntNullable();
                var category = tdContent.GetValue(thContent, "구분", cursor);
                if (string.IsNullOrEmpty(category))
                {
                    category = tdContent.GetValue(thContent, "게시판", cursor);
                }

                var title     = tdContent.GetValue(thContent, "제목", cursor).Substring("\n");
                var author    = tdContent.GetValue(thContent, "글쓴이", cursor);
                var recommend = tdContent.GetValue(thContent, "추천", cursor).ToIntNullable();
                var count     = tdContent.GetValue(thContent, "조회", cursor).ToInt();

                var dateTimeStr = tdContent.GetValue(thContent, "날짜", cursor);
                DateTime?date;
                if (dateTimeStr.Contains('.'))
                {
                    date = dateTimeStr.IndexOf('.') >= 4 ?
                           DateTime.ParseExact(dateTimeStr, "yyyy.MM.dd", cultureInfo) :
                           DateTime.ParseExact(dateTimeStr, "yy.MM.dd", cultureInfo);
                }
                else
                {
                    date = DateTime.Parse(dateTimeStr);
                }

                var href = tdHref[n];

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend.GetValueOrDefault(0),
                    Count     = count,
                    DateTime  = date.GetValueOrDefault(DateTime.Now),
                    RowId     = id,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #13
0
 /// <summary>
 /// Получает все задания с странцы категории
 /// </summary>
 /// <param name="link">ссылка на страницу без домена</param>
 /// <returns>Список все заданий</returns>
 public List <Objects.Task> GetTasksFromPage(string link)
 {
     Log.ProcessMessage("Пытаемся получить список заданий со страницы " + link);
     try
     {
         string     get    = http.GetAsync(Domain + link).Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> taskElements = html.QuerySelectorAll("div.row.set_href .title a");
         var tasks = new List <Objects.Task> {
         };
         foreach (var elem in taskElements)
         {
             var task = GetTaskFromLink(elem.GetAttribute("href"));
             if (task == null)
             {
                 continue;
             }
             tasks.Add(task);
         }
         Log.GoodMessage("Получили список заданий со страницы " + link);
         return(tasks);
     }
     catch
     {
         Log.ExMessage("Не удалось получить список заданий со страницы " + link);
         return(null);
     }
 }
Пример #14
0
        static void Main(string[] args)
        {
            const string AudioPlayerSelector = "div.row > div > div.card > div.card-body > div.audioplayer-wrapper > div.audioplayer";
            const string DownloadDirectory   = @"songs";

            //Get download url
            string baseUrl = "";

            if (args.Length > 0)
            {
                baseUrl = args[0];
            }
            else
            {
                baseUrl = "https://incompetech.filmmusic.io/de/suche/";
            }

            //If we're overwriting, delete work folder if it exists
            if (OverwritePages)
            {
                if (Directory.Exists(Path.GetFullPath("work")))
                {
                    Directory.Delete(Path.GetFullPath("work"), true);
                }
            }
            //Create work directory if it doesn't exist
            if (!Directory.Exists("work"))
            {
                Directory.CreateDirectory("work");
            }

            //Use the default configuration for AngleSharp
            IConfiguration config = Configuration.Default;

            //Create a new context for evaluating webpages with the given config
            IBrowsingContext context = BrowsingContext.New(config);

            IHtmlParser thisParser = context.GetService <IHtmlParser>();

            Parser = thisParser;

            Console.WriteLine("Downloading pages...\n");

            int totalPages = DownloadAndFindNext(baseUrl, 1);

            Console.WriteLine("Download complete! Found " + totalPages + " pages\n");
            Console.WriteLine("Downloading songs...");

            //If we're overwriting, delete song folder if it exists
            if (OverwriteSongs)
            {
                if (Directory.Exists(Path.GetFullPath(DownloadDirectory)))
                {
                    Directory.Delete(Path.GetFullPath(DownloadDirectory), true);
                }
            }
            //Create song folder if it doesn't exist
            if (!Directory.Exists(Path.GetFullPath(DownloadDirectory)))
            {
                Directory.CreateDirectory(Path.GetFullPath(DownloadDirectory));
            }

            //Ok, now parse the downloaded pages
            for (int thisPage = 1; thisPage <= totalPages; thisPage++)
            {
                Console.WriteLine("\nPage " + thisPage + " of " + totalPages + "\n");

                string path   = Path.GetFullPath(@"work\page" + thisPage + ".html");
                string source = File.ReadAllText(path);

                AngleSharp.Html.Dom.IHtmlDocument document = Parser.ParseDocument(source);

                IHtmlCollection <IElement> pages = document.QuerySelectorAll(AudioPlayerSelector);

                foreach (IElement pageElement in pages)
                {
                    AngleSharp.Html.Dom.IHtmlDivElement song = (AngleSharp.Html.Dom.IHtmlDivElement)pageElement;
                    //Get path to save to
                    string savePath = Path.GetFullPath(DownloadDirectory + "\\" + song.GetAttribute("data-title") + ".mp3");

                    //If we're overwriting songs, the song shouldn't already exist. If it does, there's a problem.
                    if (OverwriteSongs)
                    {
                        while (File.Exists(savePath))
                        {
                            savePath = savePath + ".CONFLICT";
                        }
                    }

                    //If we're not overwriting songs, it will only download if it doesn't exist
                    if (!File.Exists(savePath) || OverwriteSongs)
                    {
                        using (WebClient client = new WebClient())
                        {
                            client.DownloadFile(song.GetAttribute("data-mp3").Replace("mp3low", "mp3"), savePath);
                        }
                    }

                    Console.WriteLine(song.GetAttribute("data-title"));
                }
            }


            Console.WriteLine("\n\nEnd of program, bruh");
            Console.ReadKey();
        }
Пример #15
0
        /// <summary>
        /// Parses an attribute's value from one or more elements of an HTML page.
        /// </summary>
        /// <param name="input">The HTML page</param>
        /// <param name="selector">The CSS Selector that targets the desired elements</param>
        /// <param name="attribute">The attribute for which you want to parse the value</param>
        /// <param name="index">The index of the element to parse among all the ones selected (if not recursive)</param>
        /// <param name="recursive">Whether to parse from all the elements that match the selector</param>
        /// <returns>The attribute value(s).</returns>
        public static IEnumerable <string> CSS(string input, string selector, string attribute, int index = 0, bool recursive = false)
        {
            HtmlParser parser = new HtmlParser();

            AngleSharp.Html.Dom.IHtmlDocument document = null;
            document = parser.ParseDocument(input);
            var list = new List <string>();

            if (recursive)
            {
                foreach (var element in document.QuerySelectorAll(selector))
                {
                    switch (attribute)
                    {
                    case "innerHTML":
                        list.Add(element.InnerHtml);
                        break;

                    case "outerHTML":
                        list.Add(element.OuterHtml);
                        break;

                    default:
                        foreach (var attr in element.Attributes)
                        {
                            if (attr.Name == attribute)
                            {
                                list.Add(attr.Value);
                                break;
                            }
                        }
                        break;
                    }
                }
            }
            else
            {
                switch (attribute)
                {
                case "innerHTML":
                    list.Add(document.QuerySelectorAll(selector)[index].InnerHtml);
                    break;

                case "outerHTML":
                    list.Add(document.QuerySelectorAll(selector)[index].OuterHtml);
                    break;

                default:
                    foreach (var attr in document.QuerySelectorAll(selector)[index].Attributes)
                    {
                        if (attr.Name == attribute)
                        {
                            list.Add(attr.Value);
                            break;
                        }
                    }
                    break;
                }
            }

            return(list);
        }
Пример #16
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("div")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("list_item") && x.ClassName.Contains("symph_row"))
                            .Select(x =>
            {
                var stringTuples = x.QuerySelectorAll("span")
                                   .Select(y =>
                {
                    var text = y.TextContent.Trim();
                    if (string.IsNullOrEmpty(text))
                    {
                        text = y.QuerySelector("img")?.GetAttribute("alt");
                    }

                    return(new Tuple <string, string>(y.ClassName, text));
                }).ToList();

                var a = x.QuerySelectorAll("a");

                stringTuples.AddRange(a.Where(x => !string.IsNullOrEmpty(x.ClassName))
                                      .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent))
                                      .ToList());

                var hrefs = a.Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(stringTuples, hrefs));
            })
                            .ToArray();

            Parallel.ForEach(tdContent, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var category = stringTuples.FindValue("category_fixed");
                if (string.IsNullOrEmpty(category))
                {
                    category = stringTuples.FindValue("icon_keyword");
                }
                ;

                var title = stringTuples.FindValue("subject_fixed");
                if (string.IsNullOrEmpty(title))
                {
                    title = stringTuples.FindValue("list_subject");
                }
                ;

                title = title.Substring("\n");

                var author = stringTuples.FindValue("nickname");
                var count  = stringTuples.FindValue("hit").ToIntShorthand();
                var date   = DateTime.Parse(stringTuples.FindValue("timestamp"));

                var href = UrlCompositeHref(hrefs[0]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title,
                    Author    = author,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #17
0
        /// <summary>
        ///     Download one VS Marketplace package and extract it to the target directory.
        /// </summary>
        /// <param name="purl"> Package URL of the package to download. </param>
        /// <returns> the path or file written. </returns>
        public override async Task <IEnumerable <string> > DownloadVersionAsync(PackageURL purl, bool doExtract, bool cached = false)
        {
            Logger.Trace("DownloadVersion {0}", purl?.ToString());

            List <string>    downloadedPaths = new();
            HashSet <string> downloadedUrls  = new();
            HttpClient       httpClient      = CreateHttpClient();

            if (purl == null || purl.Name == null || purl.Version == null)
            {
                return(downloadedPaths);
            }

            string packageVersion = purl.Version;

            IEnumerable <string>?availablePools = await GetPoolsForProject(purl);

            foreach (string?pool in availablePools)
            {
                string?archiveBaseUrl = await GetArchiveBaseUrlForProject(purl, pool);

                if (archiveBaseUrl == null)
                {
                    Logger.Debug("Unable to find archive base URL for {0}, pool {1}", purl.ToString(), pool);
                    continue;
                }

                try
                {
                    string?html = await GetHttpStringCache(httpClient, archiveBaseUrl, neverThrow : true);

                    if (html == null)
                    {
                        Logger.Debug("Error reading {0}", archiveBaseUrl);
                        continue;
                    }

                    AngleSharp.Html.Dom.IHtmlDocument document = await new HtmlParser().ParseDocumentAsync(html);
                    foreach (AngleSharp.Dom.IElement anchor in document.QuerySelectorAll("a"))
                    {
                        string?anchorHref = anchor.GetAttribute("href");
                        if (anchorHref.Contains(packageVersion) && anchorHref.EndsWith(".deb"))
                        {
                            string?fullDownloadUrl = archiveBaseUrl + "/" + anchorHref;
                            if (!downloadedUrls.Add(fullDownloadUrl))
                            {
                                // Never re-download the same file twice.
                                continue;
                            }
                            Logger.Debug("Downloading binary: {0}", fullDownloadUrl);

                            System.Net.Http.HttpResponseMessage downloadResult = await httpClient.GetAsync(fullDownloadUrl);

                            if (!downloadResult.IsSuccessStatusCode)
                            {
                                Logger.Debug("Error {0} downloading file {1}", downloadResult.StatusCode, fullDownloadUrl);
                                continue;
                            }

                            // TODO: Add distro version id
                            string targetName     = $"ubuntu-{purl.Name}@{packageVersion}-{anchorHref}";
                            string extractionPath = Path.Combine(TopLevelExtractionDirectory, targetName);
                            if (doExtract && Directory.Exists(extractionPath) && cached == true)
                            {
                                downloadedPaths.Add(extractionPath);
                                return(downloadedPaths);
                            }

                            if (doExtract)
                            {
                                downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await downloadResult.Content.ReadAsStreamAsync(), cached));
                            }
                            else
                            {
                                extractionPath += Path.GetExtension(anchorHref) ?? "";
                                await File.WriteAllBytesAsync(extractionPath, await downloadResult.Content.ReadAsByteArrayAsync());

                                downloadedPaths.Add(extractionPath);
                            }
                        }

                        // Source Code URLs don't have the full version on the source files. We need to find
                        // them in the .dsc
                        else if (anchorHref.Contains(packageVersion) && anchorHref.EndsWith(".dsc"))
                        {
                            string?dscContent = await GetHttpStringCache(httpClient, archiveBaseUrl + "/" + anchorHref);

                            if (dscContent == null)
                            {
                                continue;
                            }

                            HashSet <string> seenFiles = new();
                            foreach (Match match in Regex.Matches(dscContent, "^ [a-z0-9]+ \\d+ (.*)$", RegexOptions.Multiline | RegexOptions.IgnoreCase).Where(x => x != null))
                            {
                                seenFiles.Add(match.Groups[1].Value.Trim());
                            }

                            // Now we need to go through the anchor tags again looking for the source code files
                            foreach (AngleSharp.Dom.IElement?secondAnchor in document.QuerySelectorAll("a"))
                            {
                                string?secondHref = secondAnchor.GetAttribute("href");
                                if (seenFiles.Any(f => f.Equals(secondHref) && !secondHref.EndsWith(".deb") && !secondHref.EndsWith(".dsc") && !secondHref.EndsWith(".asc")))
                                {
                                    string fullDownloadUrl = archiveBaseUrl + "/" + secondHref;
                                    if (!downloadedUrls.Add(fullDownloadUrl))
                                    {
                                        // Never re-download the same file twice.
                                        continue;
                                    }
                                    Logger.Debug("Downloading source code: {0}", fullDownloadUrl);

                                    System.Net.Http.HttpResponseMessage downloadResult = await httpClient.GetAsync(fullDownloadUrl);

                                    if (!downloadResult.IsSuccessStatusCode)
                                    {
                                        Logger.Debug("Error {0} downloading file {1}", downloadResult.StatusCode, fullDownloadUrl);
                                        continue;
                                    }

                                    // TODO: Add distro version id
                                    string targetName     = $"ubuntu-{purl.Name}@{packageVersion}-{secondHref}";
                                    string extractionPath = Path.Combine(TopLevelExtractionDirectory, targetName);

                                    if (doExtract)
                                    {
                                        downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await downloadResult.Content.ReadAsStreamAsync(), cached));
                                    }
                                    else
                                    {
                                        extractionPath += Path.GetExtension(anchorHref) ?? "";
                                        await File.WriteAllBytesAsync(extractionPath, await downloadResult.Content.ReadAsByteArrayAsync());

                                        downloadedPaths.Add(extractionPath);
                                    }
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Logger.Debug("Error downloading binary for {0}: {1}", purl.ToString(), ex.Message);
                }
            }

            return(downloadedPaths);
        }
Пример #18
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContents = document.QuerySelectorAll("tbody tr")
                             .Where(x => string.IsNullOrEmpty(x.ClassName) || x.ClassName != "notice")
                             .Select(x =>
            {
                var stringTuples = x.QuerySelectorAll("td")
                                   .Select(y =>
                {
                    var text = y.TextContent.Trim();
                    if (string.IsNullOrEmpty(text))
                    {
                        text = y.QuerySelectorAll("img")
                               .Where(x => x.GetAttribute("src") != null)
                               .Select(x => x.GetAttribute("title")).LastOrDefault();

                        // LastOrDefault인 이유는 Author 부분이 First쪽이 레벨이기 때문
                    }

                    return(new Tuple <string, string>(y.ClassName, text));
                }).ToList();

                var hrefs = x.QuerySelectorAll("a")
                            .Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(stringTuples, hrefs));
            })
                             .ToArray();

            Parallel.ForEach(tdContents, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var title = stringTuples.FindValue("title");
                title     = title.Substring("\n");

                var category  = stringTuples.FindValue("cate");
                var author    = stringTuples.FindValue("author");
                var date      = DateTime.Parse(stringTuples.FindValue("time"));
                var count     = stringTuples[7].Item2.ToInt();
                var recommend = stringTuples[8].Item2.ToInt();

                var href = hrefs[0];

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Title     = title,
                    Category  = category,
                    Author    = author,
                    Count     = count,
                    Recommend = recommend,
                    DateTime  = date,
                    Href      = UrlCompositeHref(href),
                    SourceId  = Source.Id
                });
            });
        }
Пример #19
0
        public override async Task <string?> GetMetadataAsync(PackageURL purl, bool useCache = true)
        {
            Logger.Trace("GetMetadata {0}", purl?.ToString());

            if (purl == null || purl.Name == null)
            {
                return(null);
            }

            StringBuilder metadataContent = new();
            HttpClient    httpClient      = CreateHttpClient();

            foreach (string distroUrlPrefix in GetBaseURLs(purl))
            {
                try
                {
                    string?html = await GetHttpStringCache(httpClient, distroUrlPrefix, useCache : useCache, neverThrow : true);

                    if (html != null)
                    {
                        AngleSharp.Html.Dom.IHtmlDocument?document = await new HtmlParser().ParseDocumentAsync(html);
                        foreach (AngleSharp.Dom.IElement?anchor in document.QuerySelectorAll("a"))
                        {
                            string?anchorHref = anchor.GetAttribute("href");
                            if (anchorHref.EndsWith(".dsc"))
                            {
                                Logger.Debug("Found a .dsc file: {0}", anchorHref);
                                string?dscContent = await GetHttpStringCache(httpClient, distroUrlPrefix + anchorHref, neverThrow : true);

                                if (dscContent == null)
                                {
                                    continue;
                                }
                                metadataContent.AppendLine(dscContent);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Logger.Debug("Error obtaining .dsc file for {0}: {1}", purl.ToString(), ex.Message);
                }

                // Fallback to packages.ubuntu.com if we haven't seen any .dsc files
                if (metadataContent.Length == 0)
                {
                    try
                    {
                        string?searchResults = await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/search?keywords={purl.Name}&searchon=names&exact=1&suite=all&section=all", useCache);

                        HtmlParser parser = new();
                        AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(searchResults);

                        AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchorItems = document.QuerySelectorAll("a.resultlink");
                        IEnumerable <string> metadataUrlList = anchorItems.Select(s => s.GetAttribute("href") ?? "");

                        foreach (string metadataUrl in metadataUrlList)
                        {
                            metadataContent.AppendLine(await GetHttpStringCache(httpClient, $"{ENV_UBUNTU_ENDPOINT}/{metadataUrl}"));
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Debug(ex, "Error fetching Ubuntu metadata: {0}", ex.Message);
                    }
                }
            }

            return(metadataContent.ToString());
        }
Пример #20
0
        /// <summary>
        /// Получает список всех контактов
        /// </summary>
        /// <returns>Objects.Contact[]</returns>
        public Objects.Contact[] GetContacts()
        {
            Log.ProcessMessage("Получаем список контактов");
            if (Login == "" || Password == "")
            {
                Log.ExMessage("Не назначен логин/пароль");
            }
            try
            {
                string get = http.GetAsync(Domain + "/account/contacts/").Result.Content.ReadAsStringAsync().Result;
                if (get.Contains("Вы не авторизованы"))
                {
                    if (!Auth())
                    {
                        return(null);
                    }
                    else
                    {
                        get = http.GetAsync(Domain + "/account/contacts/").Result.Content.ReadAsStringAsync().Result;
                    }
                }

                HtmlParser Parser = new HtmlParser();
                AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);

                List <Objects.Contact> contacts = new List <Objects.Contact> {
                };

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> links = html.QuerySelectorAll(".page_content .row .buttons a.pm_link");
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> names = html.QuerySelectorAll(".page_content .row .name a");
                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> nicks = html.QuerySelectorAll(".page_content .row .nickname");
                for (int i = 0; i < links.Length; i++)
                {
                    if (links[i].GetAttribute("class").Contains("btn-success"))
                    {
                        contacts.Add(new Objects.Contact
                        {
                            Link  = links[i].GetAttribute("href"),
                            Name  = names[i].TextContent,
                            Nick  = nicks[i].TextContent,
                            IsNew = true
                        });
                    }
                    else
                    {
                        contacts.Add(new Objects.Contact
                        {
                            Link  = links[i].GetAttribute("href"),
                            Name  = names[i].TextContent,
                            Nick  = nicks[i].TextContent,
                            IsNew = false
                        });
                    }
                }

                Log.GoodMessage("Получили список контактов");
                return(contacts.ToArray());
            }
            catch
            {
                Log.ExMessage("Не удалось получить список контактов");
                return(null);
            }
        }
Пример #21
0
        private void OnPageCrawlList(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var tdContent = document.QuerySelectorAll("ul li div")
                            .Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("li"))
                            .Select(x =>
            {
                var tuples = x.QuerySelectorAll("h3")
                             .Select(y =>
                {
                    var textContent = y.TextContent.Trim();

                    var lastBracket = textContent.LastIndexOf("[");
                    if (lastBracket != -1)
                    {
                        textContent = textContent.Substring(0, lastBracket);
                    }

                    return(new Tuple <string, string>("title", textContent));
                }).ToList();

                tuples.AddRange(x.QuerySelectorAll("span")
                                .Select(y => new Tuple <string, string>(y.ClassName, y.TextContent.Trim()))
                                .ToList());

                tuples.AddRange(x.QuerySelectorAll("div")
                                .Where(y => !string.IsNullOrEmpty(y.ClassName) && y.ClassName == "hotdeal_info")
                                .Select(y => new Tuple <string, string>("info", y.TextContent.Replace("\t", string.Empty)))
                                .ToList());

                var hrefs = x.QuerySelectorAll("a")
                            .Select(x => x.GetAttribute("href"))
                            .ToList();

                return(new Tuple <List <Tuple <string, string> >, List <string> >(tuples, hrefs));
            }).ToArray();

            Parallel.ForEach(tdContent, row =>
            {
                var stringTuples = row.Item1;
                var hrefs        = row.Item2;

                var category = stringTuples.FindValue("category").Replace(" /", string.Empty);
                var title    = stringTuples.FindValue("title").TrimEnd();

                var info = stringTuples.FindValue("info");
                if (!string.IsNullOrEmpty(info))
                {
                    title += $" [{info}]";
                }

                var author    = stringTuples.FindValue("author").Replace("/ ", string.Empty);
                var date      = DateTime.Now;
                var recommend = stringTuples.FindValue("count").ToInt();

                var href = UrlCompositeHref(hrefs[0]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    Category  = category,
                    Title     = title.Substring("\t"),
                    Author    = author,
                    Recommend = recommend,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #22
0
        /// <inheritdoc />
        public override async Task <IEnumerable <string> > EnumerateVersionsAsync(PackageURL purl, bool useCache = true, bool includePrerelease = true)
        {
            Logger.Trace("EnumerateVersions {0}", purl?.ToString());
            if (purl == null || purl.Name is null)
            {
                return(new List <string>());
            }

            try
            {
                string        packageName = purl.Name;
                List <string> versionList = new();
                HttpClient    httpClient  = CreateHttpClient();

                // Get the latest version
                System.Net.Http.HttpResponseMessage html = await httpClient.GetAsync($"{ENV_CRAN_ENDPOINT}/web/packages/{packageName}/index.html");

                html.EnsureSuccessStatusCode();
                HtmlParser?parser = new();
                AngleSharp.Html.Dom.IHtmlDocument document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> tds = document.QuerySelectorAll("td");
                for (int i = 0; i < tds.Length; i++)
                {
                    if (tds[i].TextContent == "Version:")
                    {
                        string?value = tds[i + 1]?.TextContent?.Trim();
                        if (value != null)
                        {
                            versionList.Add(value);
                        }
                        break;
                    }
                }

                // Get the remaining versions
                html = await httpClient.GetAsync($"{ENV_CRAN_ENDPOINT}/src/contrib/Archive/{packageName}/");

                html.EnsureSuccessStatusCode();
                document = await parser.ParseDocumentAsync(await html.Content.ReadAsStringAsync());

                tds = document.QuerySelectorAll("a");
                foreach (AngleSharp.Dom.IElement td in tds)
                {
                    string?href = td.GetAttribute("href");
                    if (href?.Contains(".tar.gz") ?? false)
                    {
                        string version = href.Replace(".tar.gz", "");
                        version = version.Replace(packageName + "_", "").Trim();
                        Logger.Debug("Identified {0} version {1}.", packageName, version);
                        versionList.Add(version);
                    }
                }
                return(SortVersions(versionList.Distinct()));
            }
            catch (Exception ex)
            {
                Logger.Debug("Unable to enumerate versions: {0}", ex.Message);
                throw;
            }
        }
Пример #23
0
        protected override void OnPageCrawl(AngleSharp.Html.Dom.IHtmlDocument document)
        {
            var thContent = document.QuerySelectorAll("tbody tr")
                            .Where(x => x.ClassName == "title_bg")
                            .Select(x => x.QuerySelectorAll("td").Where(x => x.ClassName == "list_tspace"))
                            .SelectMany(x => x.Select(y => y.TextContent.Trim()))
                            .ToArray();

            var tdContent = document.QuerySelectorAll("tbody tr")
                            .Where(x => x.ClassName == "list0" || x.ClassName == "list1")
                            .Select(x => x.QuerySelectorAll("td").Where(x => !string.IsNullOrEmpty(x.ClassName) && x.ClassName.Contains("list_vspace")))
                            .SelectMany(x => x.Select(y =>
            {
                var text = y.TextContent.Trim();
                if (string.IsNullOrEmpty(text))
                {
                    text = y.QuerySelector("img")?.GetAttribute("alt");
                }
                else if (y.QuerySelector("a") != null)
                {
                    text = y.QuerySelector("a").TextContent;
                }

                return(text);
            })
                                        ).ToArray();

            var tdHref = document.QuerySelectorAll("tbody tr")
                         .Where(x => x.ClassName == "list0" || x.ClassName == "list1")
                         .Select(x => x.QuerySelectorAll("td a"))
                         .SelectMany(x => x.Select(y => y.GetAttribute("href")))
                         .Where(x => x != "#")
                         .ToArray();

            if (!thContent.Any() || !tdContent.Any())
            {
                return;
            }

            Parallel.For(0, tdContent.Length / thContent.Length, n =>
            {
                var cursor = n * thContent.Length;
                var id     = tdContent[cursor + 0].ToInt();
                var author = tdContent[cursor + 1];
                var title  = tdContent[cursor + 2];
                var date   = DateTime.Parse(tdContent[cursor + 3]);

                var str       = tdContent[cursor + 4];
                var recommend = string.IsNullOrEmpty(str) ? 0 : str.Split(" - ")[0].ToInt();

                var count = tdContent[cursor + 5].ToInt();

                var href = UrlCompositeHref("/" + tdHref[n]);

                _ = OnCrawlData(new CrawlingData
                {
                    Type      = Source.Type,
                    BoardId   = Source.BoardId,
                    BoardName = Source.Name,
                    RowId     = id,
                    Title     = title,
                    Author    = author,
                    Recommend = recommend,
                    Count     = count,
                    DateTime  = date,
                    Href      = href,
                    SourceId  = Source.Id
                });
            });
        }
Пример #24
0
        private List <string> Parse(BotData data)
        {
            var original = ReplaceValues(parseTarget, data);
            var partial  = original;
            var list     = new List <string>();

            // Parse the value
            switch (Type)
            {
            case ParseType.LR:
                var ls    = ReplaceValues(leftString, data);
                var rs    = ReplaceValues(rightString, data);
                var pFrom = 0;
                var pTo   = 0;

                // No L and R = return full input
                if (ls == "" && rs == "")
                {
                    list.Add(original);
                    break;
                }

                // L or R not present and not empty
                else if (((!partial.Contains(ls) && ls != "") || (!partial.Contains(rs) && rs != "")))
                {
                    break;
                }

                // Instead of the mess below, we could simply use Extreme.NET's Substring extensions
                // return original.Substrings(ls, rs); // Recursive
                // return original.Substring(ls, rs); // Not recursive

                if (recursive)
                {
                    if (useRegexLR)
                    {
                        try
                        {
                            var             pattern = BuildLRPattern(ls, rs);
                            MatchCollection mc      = Regex.Matches(partial, pattern);
                            foreach (Match m in mc)
                            {
                                list.Add(m.Value);
                            }
                        }
                        catch { }
                    }
                    else
                    {
                        try
                        {
                            while ((partial.Contains(ls) || ls == "") && (partial.Contains(rs) || rs == ""))
                            {
                                // Search for left delimiter and Calculate offset
                                pFrom = ls == "" ? 0 : partial.IndexOf(ls) + ls.Length;
                                // Move right of offset
                                partial = partial.Substring(pFrom);
                                // Search for right delimiter and Calculate length to parse
                                pTo = rs == "" ? (partial.Length - 1) : partial.IndexOf(rs);
                                // Parse it
                                var parsed = partial.Substring(0, pTo);
                                list.Add(parsed);
                                // Move right of parsed + right
                                partial = partial.Substring(parsed.Length + rs.Length);
                            }
                        }
                        catch { }
                    }
                }

                // Non-recursive
                else
                {
                    if (useRegexLR)
                    {
                        var             pattern = BuildLRPattern(ls, rs);
                        MatchCollection mc      = Regex.Matches(partial, pattern);
                        if (mc.Count > 0)
                        {
                            list.Add(mc[0].Value);
                        }
                    }
                    else
                    {
                        try
                        {
                            pFrom   = ls == "" ? 0 : partial.IndexOf(ls) + ls.Length;
                            partial = partial.Substring(pFrom);
                            pTo     = rs == "" ? partial.Length : partial.IndexOf(rs);
                            list.Add(partial.Substring(0, pTo));
                        }
                        catch { }
                    }
                }

                break;

            case ParseType.CSS:

                HtmlParser parser = new HtmlParser();
                AngleSharp.Html.Dom.IHtmlDocument document = null;
                try { document = parser.ParseDocument(original); } catch {  }

                try
                {
                    if (recursive)
                    {
                        foreach (var element in document.QuerySelectorAll(ReplaceValues(cssSelector, data)))
                        {
                            switch (ReplaceValues(attributeName, data))
                            {
                            case "innerHTML":
                                list.Add(element.InnerHtml);
                                break;

                            case "outerHTML":
                                list.Add(element.OuterHtml);
                                break;

                            default:
                                foreach (var attr in element.Attributes)
                                {
                                    if (attr.Name == ReplaceValues(attributeName, data))
                                    {
                                        list.Add(attr.Value);
                                        break;
                                    }
                                }
                                break;
                            }
                        }
                    }
                    else
                    {
                        switch (ReplaceValues(attributeName, data))
                        {
                        case "innerHTML":
                            list.Add(document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].InnerHtml);
                            break;

                        case "outerHTML":
                            list.Add(document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].OuterHtml);
                            break;

                        default:
                            foreach (var attr in document.QuerySelectorAll(ReplaceValues(cssSelector, data))[cssElementIndex].Attributes)
                            {
                                if (attr.Name == ReplaceValues(attributeName, data))
                                {
                                    list.Add(attr.Value);
                                    break;
                                }
                            }
                            break;
                        }
                    }
                }
                catch { }

                break;

            case ParseType.JSON:
                if (JTokenParsing)
                {
                    if (original.Trim().StartsWith("["))
                    {
                        JArray json     = JArray.Parse(original);
                        var    jsonlist = json.SelectTokens(jsonField, false);
                        foreach (var j in jsonlist)
                        {
                            list.Add(j.ToString());
                        }
                    }
                    else
                    {
                        JObject json     = JObject.Parse(original);
                        var     jsonlist = json.SelectTokens(jsonField, false);
                        foreach (var j in jsonlist)
                        {
                            list.Add(j.ToString());
                        }
                    }
                }
                else
                {
                    var jsonlist = new List <KeyValuePair <string, string> >();
                    parseJSON("", original, jsonlist);
                    foreach (var j in jsonlist)
                    {
                        if (j.Key == ReplaceValues(jsonField, data))
                        {
                            list.Add(j.Value);
                        }
                    }
                }

                break;

            case ParseType.XPATH:

                // NOT IMPLEMENTED YET
                break;

            case ParseType.REGEX:
                try
                {
                    var matches = Regex.Matches(partial, ReplaceValues(regexString, data));
                    foreach (Match match in matches)
                    {
                        var output = ReplaceValues(regexOutput, data);
                        for (var i = 0; i < match.Groups.Count; i++)
                        {
                            output = output.Replace("[" + i + "]", match.Groups[i].Value);
                        }
                        list.Add(output);
                    }
                }
                catch { }
                break;
            }

            return(list);
        }
Пример #25
0
 /// <summary>
 /// Обновляет список категорий
 /// </summary>
 /// <returns>статус обновления</returns>
 public bool UpdateWorkCategory()
 {
     Log.ProcessMessage("Пытаемся обновить список категорий");
     try
     {
         string     get    = http.GetAsync(Domain + "/jobs/").Result.Content.ReadAsStringAsync().Result;
         HtmlParser Parser = new HtmlParser();
         AngleSharp.Html.Dom.IHtmlDocument html = Parser.ParseDocument(get);
         AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> categoriesElements = html.QuerySelectorAll(".collapse li a[data-category_id]");
         foreach (var elem in categoriesElements)
         {
             Objects.Category.Categories.Add(new Objects.Category
             {
                 Name = elem.TextContent,
                 Href = elem.GetAttribute("href")
             });
         }
         Log.GoodMessage("Обновили список категорий");
         return(true);
     }
     catch
     {
         Log.ExMessage("Не удалось обновить список категорий");
         return(false);
     }
 }