static private ParseHtml ( string htmlString ) : XmlElement, | ||
htmlString | string | /// Input string of pssibly badly-formed Html to be parsed into well-formed Html /// |
리턴 | XmlElement, |
private async Task ScanPageAsync( Uri currentUrl, int currentDepth, Uri baseUrl, ConcurrentDictionary <Uri, byte> urlsNotToScan, ConcurrentDictionary <Uri, int> urlsToScan, ConcurrentDictionary <Uri, byte> scannedUrls, int maxLinksOnPageCount) { var page = await _downloader.DownloadPageAsync(currentUrl); var parsedHtml = HtmlParser.ParseHtml(page, baseUrl); var filteredLinks = _urlFilter.Filter(parsedHtml.Links, baseUrl); filteredLinks .Where(url => !scannedUrls.ContainsKey(url)) .Where(url => !urlsNotToScan.ContainsKey(url)) .Take(maxLinksOnPageCount) .ForEach(uri => urlsToScan.TryAdd(uri, currentDepth + 1)); #pragma warning disable 4014 _database.InsertAsync(new ScannedPage(currentUrl, parsedHtml.Text)); #pragma warning restore 4014 scannedUrls.TryAdd(currentUrl, default); }
public HtmlWidget(string htmlContent, RGBA_Bytes aboutTextColor) : base(FlowDirection.TopToBottom) { this.Name = "HtmlWidget"; elementsUnderConstruction.Push(this); linkButtonFactory.fontSize = 12; linkButtonFactory.textColor = aboutTextColor; textImageButtonFactory.normalFillColor = RGBA_Bytes.Gray; textImageButtonFactory.normalTextColor = ActiveTheme.Instance.PrimaryTextColor; htmlParser.ParseHtml(htmlContent, AddContent, CloseContent); VAnchor = VAnchor.Max_FitToChildren_ParentHeight; HAnchor = HAnchor.Max_FitToChildren_ParentWidth; }
public AboutPage() { this.HAnchor = HAnchor.ParentLeftRight; this.VAnchor = VAnchor.ParentTop; this.Padding = new BorderDouble(5); this.BackgroundColor = ActiveTheme.Instance.PrimaryBackgroundColor; linkButtonFactory.fontSize = 12; linkButtonFactory.textColor = aboutTextColor; textImageButtonFactory.normalFillColor = RGBA_Bytes.Gray; textImageButtonFactory.normalTextColor = ActiveTheme.Instance.PrimaryTextColor; FlowLayoutWidget customInfoTopToBottom = new FlowLayoutWidget(FlowDirection.TopToBottom); customInfoTopToBottom.Name = "AboutPageCustomInfo"; customInfoTopToBottom.HAnchor = HAnchor.ParentLeftRight; customInfoTopToBottom.VAnchor = VAnchor.Max_FitToChildren_ParentHeight; customInfoTopToBottom.Padding = new BorderDouble(5, 10, 5, 0); customInfoTopToBottom.AddChild(new UpdateControlView()); //AddMatterHackersInfo(customInfoTopToBottom); customInfoTopToBottom.AddChild(new GuiWidget(1, 10)); HtmlParser htmlParser = new HtmlParser(); if (htmlContent == null) { string aboutHtmlFile = Path.Combine("OEMSettings", "AboutPage.html"); htmlContent = StaticData.Instance.ReadAllText(aboutHtmlFile); } htmlWidget = new FlowLayoutWidget(FlowDirection.TopToBottom); htmlWidget.VAnchor = VAnchor.Max_FitToChildren_ParentHeight; htmlWidget.HAnchor |= HAnchor.ParentCenter; htmlParser.ParseHtml(htmlContent, AddContent, CloseContent); customInfoTopToBottom.AddChild(htmlWidget); this.AddChild(customInfoTopToBottom); }
public static DataTable HtmlTextToDataTable(this string htmlText) { var table = new DataTable(); XmlElement htmlElement = HtmlParser.ParseHtml(htmlText); var topName = htmlElement.LocalName.ToLower(); if (topName == "html") { var node = htmlElement.GetElementsByTagName("table"); if (node != null) { htmlElement = node[0] as XmlElement; } } XmlElement tableElem = htmlElement; foreach (var trElem in tableElem.GetXmlElementNodes("tr")) { // tr elem contains th children. add heading to data table. if (trElem.ContainsNodes("th")) { foreach (var thElem in trElem.GetXmlElementNodes("th")) { var column = new DataColumn(thElem.InnerText); table.Columns.Add(column); } } else if (trElem.ContainsNodes("td")) { var tdTextArray = trElem.GetXmlElementNodes("td").Select(c => c.InnerText).ToArray(); table.Rows.Add(tdTextArray); } } return(table); }
////////////////////////////////////////////////////////////////////////// private bool Generate(string HHCPath, string HtmlPath, string BasePath) { if (!File.Exists(HHCPath)) { Console.WriteLine("File {0} doesn't exit.", HHCPath); return(false); } try { Directory.CreateDirectory(Path.GetDirectoryName(HtmlPath)); } catch (Exception e) { Console.WriteLine("Error creating output directory."); Console.WriteLine(e.Message); return(false); } try { HtmlParser Parser = new HtmlParser(); HtmlElement[] Elements = Parser.ParseHtml(HHCPath); Topic RootTopic = ParseHHC(Elements); GenerateContentsHtml(RootTopic, HtmlPath, BasePath); } catch (Exception e) { Console.WriteLine("Error parsing file {0}.", HHCPath); Console.WriteLine(e.Message); return(false); } return(true); }
public List <int> Search(string searchFor, string urlOfInterest) { var keyCount = 1; for (int i = 1; i <= 10; i++) { var pageNumber = (i < 10) ? "0" + i : i.ToString(); var result = _service.Search(searchFor, i); if (string.IsNullOrWhiteSpace(result)) { break; } HtmlParser.ParseHtml(result.Replace('"', '#'), urlOfInterest, ref keyCount, searchDictionary); } var filteredSearchResult = searchDictionary.Where(p => p.Value.IsInfotrack); var retVal = new List <int>(); foreach (var searchResult in filteredSearchResult) { retVal.Add(searchResult.Key); } return(retVal); }
public static HtmlDocument _ParseHtml(this string body) => HtmlParser.ParseHtml(body);
public void Parse() { _htmlParser = new HtmlParser(DocumentHtml, Configuration); _htmlParser.ParseHtml(); RootNode = _htmlParser.RootNode; }