C# (CSharp) HtmlAgilityPackExtensions Examples

Programming Language: C# (CSharp)

Examples at hotexamples.com: 2

C# (CSharp) HtmlAgilityPackExtensions - 2 examples found. These are the top rated real world C# (CSharp) examples of HtmlAgilityPackExtensions extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RemoveHtmlTags(2)

Example #1

Show file

        private LinkedinItem parse_details(Page page)
        {
            LinkedinItem item = new LinkedinItem();

            var htmldoc = page.Document.DocumentNode.SelectSingleNode("//div[contains(@id, 'top-card')]");

            item.Name        = htmldoc.SelectSingleNode(".//div[contains(@id, 'name')]//span[contains(@class, 'full-name')]").Extract();
            item.Description = HtmlAgilityPackExtensions.RemoveHtmlTags(htmldoc.SelectSingleNode(".//div[contains(@id, 'headline')]/p[contains(@class, 'title')]").Extract());
            item.Image       = htmldoc.SelectSingleNode(".//div[contains(@class, 'profile-picture')]//img").Extract("src");
            item.Email       = htmldoc.SelectSingleNode(".//div[contains(@id, 'email')]//ul//li").Extract();
            item.Phone       = htmldoc.SelectNodes(".//div[contains(@id, 'phone')]//ul//li").Extract();

            return(item);
        }

Example #2

Show file

        protected override LinkedinItem Crawl(Page page)
        {
            if (page.Url.depth == -1) //root
            {
                var htmldoc = page.Document.DocumentNode.SelectSingleNode("//code[contains(@id, 'voltron_srp_main-content')]").ExtractDecode(false);
                //remove <!-- -->
                htmldoc = htmldoc.Remove(0, 4);
                htmldoc = htmldoc.Remove(htmldoc.Length - 3, 3);

                dynamic jsonData = JsonSerializer.Deserialize <dynamic>(htmldoc);

                var root    = jsonData.content.page.voltron_unified_search_json.search;
                var results = root.results;

                foreach (var item in results)
                {
                    var description = HtmlAgilityPackExtensions.RemoveHtmlTags(item.person.fmt_headline.Value);

                    if (CheckString(description, _searchWord))
                    {
                        //add the details itens
                        AddProcess(item.person.link_nprofile_view_headless.Value, page);
                    }
                }

                //we only run the roots pages 1 by 1 so we dont need have lock the count
                if (_count <= 10)// crawl 10 pagination pages
                {
                    Interlocked.Increment(ref _count);
                    //add the pagination url
                    AddProcess(page.CleanUrl(root.baseData.resultPagination.nextPage.pageURL.Value));
                }
                return(null);
            }

            return(parse_details(page));
        }