ParseHtml() статический приватный Метод

Instantiates an HtmlParser element and calls the parsing function on the given input string
static private ParseHtml ( string htmlString ) : XmlElement,
htmlString string /// Input string of pssibly badly-formed Html to be parsed into well-formed Html ///
Результат XmlElement,
Пример #1
0
        private async Task ScanPageAsync(
            Uri currentUrl,
            int currentDepth,
            Uri baseUrl,
            ConcurrentDictionary <Uri, byte> urlsNotToScan,
            ConcurrentDictionary <Uri, int> urlsToScan,
            ConcurrentDictionary <Uri, byte> scannedUrls,
            int maxLinksOnPageCount)
        {
            var page = await _downloader.DownloadPageAsync(currentUrl);

            var parsedHtml = HtmlParser.ParseHtml(page, baseUrl);

            var filteredLinks = _urlFilter.Filter(parsedHtml.Links, baseUrl);

            filteredLinks
            .Where(url => !scannedUrls.ContainsKey(url))
            .Where(url => !urlsNotToScan.ContainsKey(url))
            .Take(maxLinksOnPageCount)
            .ForEach(uri => urlsToScan.TryAdd(uri, currentDepth + 1));

#pragma warning disable 4014
            _database.InsertAsync(new ScannedPage(currentUrl, parsedHtml.Text));
#pragma warning restore 4014
            scannedUrls.TryAdd(currentUrl, default);
        }
Пример #2
0
        public HtmlWidget(string htmlContent, RGBA_Bytes aboutTextColor)
            : base(FlowDirection.TopToBottom)
        {
            this.Name = "HtmlWidget";
            elementsUnderConstruction.Push(this);
            linkButtonFactory.fontSize  = 12;
            linkButtonFactory.textColor = aboutTextColor;

            textImageButtonFactory.normalFillColor = RGBA_Bytes.Gray;
            textImageButtonFactory.normalTextColor = ActiveTheme.Instance.PrimaryTextColor;

            htmlParser.ParseHtml(htmlContent, AddContent, CloseContent);

            VAnchor = VAnchor.Max_FitToChildren_ParentHeight;
            HAnchor = HAnchor.Max_FitToChildren_ParentWidth;
        }
Пример #3
0
        public AboutPage()
        {
            this.HAnchor = HAnchor.ParentLeftRight;
            this.VAnchor = VAnchor.ParentTop;

            this.Padding         = new BorderDouble(5);
            this.BackgroundColor = ActiveTheme.Instance.PrimaryBackgroundColor;

            linkButtonFactory.fontSize  = 12;
            linkButtonFactory.textColor = aboutTextColor;

            textImageButtonFactory.normalFillColor = RGBA_Bytes.Gray;
            textImageButtonFactory.normalTextColor = ActiveTheme.Instance.PrimaryTextColor;

            FlowLayoutWidget customInfoTopToBottom = new FlowLayoutWidget(FlowDirection.TopToBottom);

            customInfoTopToBottom.Name    = "AboutPageCustomInfo";
            customInfoTopToBottom.HAnchor = HAnchor.ParentLeftRight;
            customInfoTopToBottom.VAnchor = VAnchor.Max_FitToChildren_ParentHeight;
            customInfoTopToBottom.Padding = new BorderDouble(5, 10, 5, 0);

            customInfoTopToBottom.AddChild(new UpdateControlView());
            //AddMatterHackersInfo(customInfoTopToBottom);
            customInfoTopToBottom.AddChild(new GuiWidget(1, 10));

            HtmlParser htmlParser = new HtmlParser();

            if (htmlContent == null)
            {
                string aboutHtmlFile = Path.Combine("OEMSettings", "AboutPage.html");
                htmlContent = StaticData.Instance.ReadAllText(aboutHtmlFile);
            }

            htmlWidget          = new FlowLayoutWidget(FlowDirection.TopToBottom);
            htmlWidget.VAnchor  = VAnchor.Max_FitToChildren_ParentHeight;
            htmlWidget.HAnchor |= HAnchor.ParentCenter;

            htmlParser.ParseHtml(htmlContent, AddContent, CloseContent);

            customInfoTopToBottom.AddChild(htmlWidget);

            this.AddChild(customInfoTopToBottom);
        }
Пример #4
0
        public static DataTable HtmlTextToDataTable(this string htmlText)
        {
            var table = new DataTable();

            XmlElement htmlElement = HtmlParser.ParseHtml(htmlText);
            var        topName     = htmlElement.LocalName.ToLower();

            if (topName == "html")
            {
                var node = htmlElement.GetElementsByTagName("table");
                if (node != null)
                {
                    htmlElement = node[0] as XmlElement;
                }
            }

            XmlElement tableElem = htmlElement;

            foreach (var trElem in tableElem.GetXmlElementNodes("tr"))
            {
                // tr elem contains th children. add heading to data table.
                if (trElem.ContainsNodes("th"))
                {
                    foreach (var thElem in trElem.GetXmlElementNodes("th"))
                    {
                        var column = new DataColumn(thElem.InnerText);
                        table.Columns.Add(column);
                    }
                }

                else if (trElem.ContainsNodes("td"))
                {
                    var tdTextArray =
                        trElem.GetXmlElementNodes("td").Select(c => c.InnerText).ToArray();
                    table.Rows.Add(tdTextArray);
                }
            }

            return(table);
        }
Пример #5
0
        //////////////////////////////////////////////////////////////////////////
        private bool Generate(string HHCPath, string HtmlPath, string BasePath)
        {
            if (!File.Exists(HHCPath))
            {
                Console.WriteLine("File {0} doesn't exit.", HHCPath);
                return(false);
            }
            try
            {
                Directory.CreateDirectory(Path.GetDirectoryName(HtmlPath));
            }
            catch (Exception e)
            {
                Console.WriteLine("Error creating output directory.");
                Console.WriteLine(e.Message);
                return(false);
            }

            try
            {
                HtmlParser    Parser   = new HtmlParser();
                HtmlElement[] Elements = Parser.ParseHtml(HHCPath);

                Topic RootTopic = ParseHHC(Elements);
                GenerateContentsHtml(RootTopic, HtmlPath, BasePath);
            }
            catch (Exception e)
            {
                Console.WriteLine("Error parsing file {0}.", HHCPath);
                Console.WriteLine(e.Message);
                return(false);
            }



            return(true);
        }
        public List <int> Search(string searchFor, string urlOfInterest)
        {
            var keyCount = 1;

            for (int i = 1; i <= 10; i++)
            {
                var pageNumber = (i < 10) ? "0" + i : i.ToString();
                var result     = _service.Search(searchFor, i);
                if (string.IsNullOrWhiteSpace(result))
                {
                    break;
                }
                HtmlParser.ParseHtml(result.Replace('"', '#'), urlOfInterest, ref keyCount, searchDictionary);
            }

            var filteredSearchResult = searchDictionary.Where(p => p.Value.IsInfotrack);
            var retVal = new List <int>();

            foreach (var searchResult in filteredSearchResult)
            {
                retVal.Add(searchResult.Key);
            }
            return(retVal);
        }
Пример #7
0
 public static HtmlDocument _ParseHtml(this string body) => HtmlParser.ParseHtml(body);
Пример #8
0
 public void Parse()
 {
     _htmlParser = new HtmlParser(DocumentHtml, Configuration);
     _htmlParser.ParseHtml();
     RootNode = _htmlParser.RootNode;
 }