public static async Task <AngleSharp.Html.Dom.IHtmlDocument> GetHtml(string URL) { try { // var config = Configuration.Default.WithJs(); // var doc = await BrowsingContext.New(config).OpenAsync(URL); var response = await hc.GetAsync(URL); var sorce = await response.Content.ReadAsStringAsync(); var parser = new AngleSharp.Html.Parser.HtmlParser(); return(parser.ParseDocument(sorce)); } catch (HttpRequestException e) { // 404エラーや、名前解決失敗など Log.Error("例外発生", e); } catch (TaskCanceledException e) { // タスクがキャンセルされたとき(一般的にタイムアウト) Log.Error("例外発生", e); } catch (Exception e) { Log.Error("例外発生", e); MessageBox.Show(e.Message, "謎のエラー", MessageBoxButtons.OK, MessageBoxIcon.Error); } // WebClient wc = new WebClient(); 非推奨らしい return(null); }
public IEnumerable <string> GetContentElements(int chapter, string element = "p") { foreach (IHtmlElement elementPage in Parser.ParseDocument(this[chapter].Content).GetElementsByTagName(element)) { yield return(elementPage.TextContent); } }
static void Main(string[] args) { var aparser = new AngleSharp.Html.Parser.HtmlParser(); var parser = new HtmlParser(); var psrWatch = new Stopwatch(); var page = File.ReadAllText("youku-home-page.txt"); psrWatch.Start(); var root = parser.Parse(page); var htmlParserElapsed = psrWatch.ElapsedMilliseconds; var doc = new HtmlDocument(); var hapWatch = new Stopwatch(); hapWatch.Start(); doc.LoadHtml(page); var hapElapsed = hapWatch.ElapsedMilliseconds; var aWtach = new Stopwatch(); aWtach.Start(); var adom = aparser.ParseDocument(page); var aElapsed = aWtach.ElapsedMilliseconds; Console.WriteLine(root); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.Write(adom); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine("**************************************************"); Console.WriteLine("**************************************************"); Console.WriteLine($"Tset parsing speed of youku-home page, page length : {page.Length}"); Console.WriteLine($"HtmlAgilityPack Time-Spend:{hapWatch.Elapsed.Milliseconds}ms"); Console.WriteLine($"HtmlParser Time-Spend:{htmlParserElapsed}ms"); Console.WriteLine($"AngleSharp Time-Spend:{aElapsed}ms"); Console.Read(); }
public async Task <IActionResult> Get() { var grafikPage = await System.IO.File.ReadAllTextAsync(Path.Combine(env.ContentRootPath, "../../frontend/public/grafik.html")); var p = new AngleSharp.Html.Parser.HtmlParser(); var grafik = p.ParseDocument(grafikPage); var kspTemplate = p.ParseDocument(await FetchBlankPage()); pageRewriter.ModifyTree(kspTemplate, "kurz"); var innerBody = grafik.Body; innerBody.Replace(kspTemplate.Body); var page = grafik.Body.QuerySelector("#page"); page.InnerHtml = ""; page.AppendNodes(innerBody.ChildNodes.ToArray()); if (KspAuthCookie is object) { var user = KspAuthenticator.ParseAuthCookie(KspAuthCookie); var metaUser = grafik.CreateElement("meta"); metaUser.SetAttribute("name", "x-ksp-uid"); metaUser.SetAttribute("content", user.Id.Value.ToString()); grafik.Head.AppendChild(metaUser); } foreach (var headElement in kspTemplate.Head.QuerySelectorAll("link, script")) { headElement.RemoveFromParent(); grafik.Head.AppendChild(headElement); } var outputHtml = new StringWriter(); grafik.ToHtml(outputHtml, new PrettyMarkupFormatter() { Indentation = "\t", NewLine = "\n" }); return(this.Content(outputHtml.ToString(), "text/html")); }
public static string PrettifyHtml(string newContent) { AngleSharp.Html.Parser.HtmlParser parser = new AngleSharp.Html.Parser.HtmlParser(); AngleSharp.Html.Dom.IHtmlDocument document = parser.ParseDocument(newContent); StringWriter sw = new StringWriter(); document.ToHtml(sw, new PrettyMarkupFormatter()); return(sw.ToString()); }
public int ParseSeasonNumber(string htmlBody) { var parser = new AngleSharp.Html.Parser.HtmlParser(); IHtmlDocument document = parser.ParseDocument(htmlBody); IElement optionElement = document.QuerySelector("select[name=season] option:first-child"); string value = optionElement.GetAttribute("value"); return(Int32.Parse(value)); }
static void Main(string[] args) { var path = Path.GetTempPath(); // %20 - пробел var client = new HttpClient(); var result = client.GetAsync("https://goldapple.ru/catalogsearch/result/?q=syoss").Result; var domParser = new AngleSharp.Html.Parser.HtmlParser(); var content = result.Content.ReadAsStringAsync().Result; var document = domParser.ParseDocument(content); //Получаем все элементы ol var olList = document.QuerySelectorAll("ol").Where(item => item.ClassList.Contains("products")); var tempProductList = new List <IElement>(); foreach (var ol in olList) { tempProductList.AddRange(ol.QuerySelectorAll("div").Where(x => x.ClassList.Contains("product-item-info"))); } //Bechmark 1 - Sync var stopWathc = new Stopwatch(); //stopWathc.Start(); //var productList = GetProductList(tempProductList); //stopWathc.Stop(); //Console.WriteLine("Sync: " + stopWathc.ElapsedMilliseconds); // Like 18.7 sec //Benchmark 2 - Async stopWathc.Reset(); stopWathc.Start(); var productListAsync = GetProductListAsync(tempProductList); stopWathc.Stop(); Console.WriteLine("Async: " + stopWathc.ElapsedMilliseconds); // Like 6.5 sec //TODO - Ускорить до 2-3 секунд получение продуктов, // а лучше даже еще быстрее //Output results Console.WriteLine("TemProducts Count: " + tempProductList.Count); Console.WriteLine("Products Count: " + productListAsync.Count()); foreach (var product in productListAsync) { var separator = new string('=', 8); Console.WriteLine(); Console.WriteLine(separator); Console.WriteLine(); Console.WriteLine(product); } }
public string RewriteHtml(string source, HttpContext context) { var p = new AngleSharp.Html.Parser.HtmlParser(); var document = p.ParseDocument(source); ModifyTree(document, context.Request.Path.Value.Trim('/')); var outputHtml = new StringWriter(); document.ToHtml(outputHtml, new HtmlMarkupFormatter()); return(outputHtml.ToString()); }
public async Task jQueryClickTest() { string script = "var numClicks = 0;\n" + "$('#btn').click(function() {\n" + "$('#message').text(\"hello there you clicked the button \" + numClicks + \" times\");\n" + "});"; string html = @"<!DOCTYPE html>\n<html> <head> </head> <body> <div id='message'>empty</div> <input type='button' id='btn'>Click me please</input> </body> </html>"; Jint.Engine engine = new Jint.Engine(); IDocument document = new Document(); var parser = new AngleSharp.Html.Parser.HtmlParser(new AngleSharp.Html.Parser.HtmlParserOptions() { IsScripting = true }); bool waitForScripts = true; //parser.AddEventListener(AngleSharp.Dom.EventNames.Parsing, (target,ev) => { document.HtmlDocument = (IHtmlDocument)target; while (waitForScripts) { Thread.Sleep(250); } }); //CancellationToken parserCanellationToken = new CancellationToken(); /*Task<IHtmlDocument> parseTask = Task.Run(() => { return parser.ParseDocumentAsync(html, parserCanellationToken); }); * * Thread.Sleep(500); * while(document.HtmlDocument == null) * { * * }*/ Window.Window window1 = new Window.Window(engine); window1.document = parser.ParseDocument(html); window1.InitializeEngine(); var jquery = System.IO.File.ReadAllText(@"../../../../BrowseSharpPlayground/jquery.js"); //engine.Execute("window.document.readyState = \"Loading\";"); engine.Execute(jquery); engine.Execute("var $ = window.jQuery;"); engine.Execute(script); waitForScripts = false;; CheckMessage(engine, document, "empty"); engine.Execute("$('#btn').trigger('click');"); CheckMessage(engine, document, "hello there you clicked the button 1 times"); }
/// <summary> /// Convert a text file with HTML content to plain text. /// </summary> /// <param name="html">The HTML string to convert.</param> /// <returns>The plain text representation of the HTML content.</returns> public string ToPlainText(string html) { var parser = new AngleSharp.Html.Parser.HtmlParser(); var document = parser.ParseDocument(html); using var sw = new StringWriter(); ConvertContentToText(document.ChildNodes, sw); sw.Flush(); var text = sw.ToString(); // strip leading white space and more than 2 consecutive line breaks return(text.Trim()); }
/// <summary> /// Возвращает список проксей с сайта, который нам нашел поисковик /// </summary> /// <param name="_url"></param> /// <returns></returns> public static string[] GetProxys(string _url) { List <string> Prxs = new List <string>(); var browser = new AngleSharp.Html.Parser.HtmlParser(); string code = GetHtml(_url); if (code != "") { AngleSharp.Html.Dom.IHtmlDocument doc = browser.ParseDocument(code); var TRs = doc.GetElementsByTagName("TR"); foreach (var TR in TRs) { try { string tds = ""; var TDs = TR.GetElementsByTagName("TD"); foreach (var TD in TDs) { tds += " " + TD.TextContent + " "; } string tr = Regex.Replace(tds, @"[^0-9\.]", " "); while (tr.Contains(" ")) { tr = tr.Replace(" ", " "); } MatchCollection M = Regex.Matches(tr, @"(?<prx>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} \d{1,5} )+"); foreach (Match m in M) { string prx_ = m.Groups["prx"].Value.Replace(" ", ":"); if (prx_.EndsWith(":")) { prx_ = prx_.Substring(0, prx_.Length - 1); } prx_ = prx_.Trim(); if (!Prxs.Contains(prx_)) { Prxs.Add(prx_); } } } catch { } } } return(Prxs.ToArray()); }
static string GetProductVolume(string linq) { var client = new HttpClient(); var result = client.GetAsync(linq).Result; var domParser = new AngleSharp.Html.Parser.HtmlParser(); var content = result.Content.ReadAsStringAsync().Result; var document = domParser.ParseDocument(content); //Поулчаем описание var spanElements = document.QuerySelectorAll("span"); var volume = spanElements.FirstOrDefault(item => item.ClassName != null && item.ClassList.Contains("swatch-simple__view")); return(volume == null ? "" : volume.TextContent); }
private static string RemoveScripts(string htmlString) { var parser = new AngleSharp.Html.Parser.HtmlParser(); var document = parser.ParseDocument("<html><body></body></html>"); var htmlFragment = parser.ParseFragment(htmlString, document.Body); foreach (var scriptElement in htmlFragment.QuerySelectorAll("script")) { scriptElement.Remove(); } return(htmlFragment.ToHtml()); }
static IEnumerable <string> GetProductComposition(string linq) { var client = new HttpClient(); var result = client.GetAsync(linq).Result; var domParser = new AngleSharp.Html.Parser.HtmlParser(); var content = result.Content.ReadAsStringAsync().Result; var document = domParser.ParseDocument(content); //Поулчаем описание var selectionElement = document.QuerySelectorAll("li").Where(x => x.ClassList.Contains("info-tabs__item")).ToList(); var composition = selectionElement[selectionElement.Count - 2].QuerySelector("section").TextContent .Split(" · "); return(composition); }
/// <summary> /// Возвращает список сайтов с проксями, которые нашел по передаваемой ссылке на яндекс и т.д. /// </summary> /// <param name="_url"></param> /// <returns></returns> public static string[] GetYandexHrefs(string _url) { var browser = new AngleSharp.Html.Parser.HtmlParser(); string code = GetHtml(_url); if (code != "") { AngleSharp.Html.Dom.IHtmlDocument doc = browser.ParseDocument(code); return((from m in doc.Links select m.GetAttribute("HREF")).Where(m => m.StartsWith("http")).Distinct().Where(m => ( !m.Contains("yandex") && !m.Contains("google") && !m.Contains("mail") && !m.Contains("rambler") && !m.Contains("youtube") )).ToArray()); } else { return(new string[0]); } }
public PlayerExport ParsePlayerDetail(string htmlBody) { var parser = new AngleSharp.Html.Parser.HtmlParser(); IHtmlDocument document = parser.ParseDocument(htmlBody); PlayerExport player = new PlayerExport(); IElement mainContent = document.QuerySelector(".main_left"); for (int i = 0; i < mainContent.Children.Length; i++) { IElement childElement = mainContent.Children[i]; i = ParseDetail(player, mainContent, i, childElement); i = ParseQuality(player.Quality, mainContent, i, childElement); ParseStats(player, childElement); } return(player); }
public async Task <RenderFragment> RenderAsync(NavigationManager NM, Entities.User user) { try { var text = Lecture.ReadPageFile(Path, Branch); var source = await preprocessAsync(text, user); var file = new System.IO.FileInfo(Path); if (file.Extension == ".md") { var r = new Markdig.MarkdownPipelineBuilder(); var q = Markdig.MarkdownExtensions.UseAdvancedExtensions(r); var p = q.Build(); source = Markdig.Markdown.ToHtml(source .Replace("\\[", "\\\\[").Replace("\\]", "\\\\]") .Replace("\\(", "\\\\(").Replace("\\)", "\\\\)") .Replace("\\{", "\\\\{").Replace("\\}", "\\\\}"), p); } if (file.Extension == ".md" || file.Extension == ".html" || file.Extension == ".htm") { return((builder) => { var options = new AngleSharp.Html.Parser.HtmlParserOptions(); var parser = new AngleSharp.Html.Parser.HtmlParser(options); var doc = parser.ParseDocument(source); var content = compile(NM, doc.Body, user); builder.AddContent(0, content); }); } else { return((builder) => builder.AddMarkupContent(0, $"Not found page `{Path}'")); } } catch (FileNotFoundException e) { return((builder) => builder.AddMarkupContent(0, $"Not found page `{Path}': {e.Message}")); } }
private IEnumerable <string> GetPlayersJson(PlayerType playerType, int startYear, int endYear) { // pull data int offset = 0; Extensions.PlayerPositions.TryGetValue(playerType, out string pos); var url = GetCareerUrl(playerType, offset, startYear, endYear); driver.Navigate().GoToUrl(url); Console.WriteLine($"Now retrieving {playerType} players."); Console.WriteLine("Please wait... (This may take a while)"); int playerRowCount = 0; List <string> jsons = new List <string>(); bool paginate = true; var parser = new AngleSharp.Html.Parser.HtmlParser(); do { var html = driver.PageSource; var document = parser.ParseDocument(html); var playerRows = document.QuerySelectorAll("tbody > tr:not(.thead)") .Select(el => el.Children.ToList()); var page = GetPageAsJson(playerRows, pos); jsons = jsons.Concat(page).ToList(); playerRowCount = playerRows.Count(); paginate = playerRowCount == 100; if (paginate) { offset += 100; url = GetCareerUrl(playerType, offset, startYear, endYear); driver.Navigate().GoToUrl(url); } } while (paginate); return(jsons); }
private static List <Uri> GetLinks(string htmlText, Uri baseUrl) { var parser = new AngleSharp.Html.Parser.HtmlParser(); var document = parser.ParseDocument(htmlText); var href = new List <Uri>(); var hostUrl = new Uri(baseUrl.GetLeftPart(UriPartial.Authority)); foreach (var element in document.QuerySelectorAll("a")) { var el = element.GetAttribute("href"); if (el != null) { var hrefAttribute = element.GetAttribute("href"); if ( Uri.TryCreate(hrefAttribute, UriKind.Absolute, out var link) || Uri.TryCreate(hostUrl, hrefAttribute, out link) ) { if (link.Scheme != Uri.UriSchemeHttp && link.Scheme != Uri.UriSchemeHttps) { continue; } if (!string.IsNullOrEmpty(link.Query)) { var linkString = link.ToString(); var queryBeginning = linkString.IndexOf('?'); link = new Uri(linkString.Substring(0, queryBeginning)); } Debug.Assert(string.IsNullOrEmpty(link.Query), "Query параметры в ссылке не пусты"); href.Add(link); } } } return(href); }
public static IHtmlContent TableOfContents(this IHtmlHelper htmlHelper, string content) { var parser = new AngleSharp.Html.Parser.HtmlParser(); var html = parser.ParseDocument(content); var toc = new Stack <(TagBuilder tag, int level)>(); toc.Push((new TagBuilder("ul"), 0)); var headings = EnumerateHeadings(html.Children); foreach (var heading in headings) { switch (heading.TagName) { case "H1": // Shouldn't happen break; case "H2": toc.Unwind(0); // add child var h2 = new TagBuilder("li"); h2.InnerHtml.AppendHtml(heading.InnerHtml); toc.Push((h2, 1)); break; case "H3": toc.Unwind(2); if (toc.Peek().tag.TagName != "ul") { toc.Push((new TagBuilder("ul"), 2)); } // add child var h3 = new TagBuilder("li"); h3.InnerHtml.AppendHtml(heading.InnerHtml); toc.Push((h3, 3)); break; case "H4": toc.Unwind(4); if (toc.Peek().tag.TagName != "ul") { toc.Push((new TagBuilder("ul"), 3)); } // add child var h4 = new TagBuilder("li"); h4.InnerHtml.AppendHtml(heading.InnerHtml); toc.Push((h4, 4)); break; } } // unwind the stack toc.Unwind(0); return(toc.Pop().tag); }
//AngleSharp 0.12.1 static void Main(string[] args) { //Console.WriteLine(Say.somethingToDo()); //Say.doSomethingIn(() => { return 10; }); //Console.WriteLine(Say.somethingToDo()); IDocument htmlDocument = new Document(); var parser = new AngleSharp.Html.Parser.HtmlParser(); htmlDocument.HtmlDocument = parser.ParseDocument(htmlContent); //var script1 = new Javascript(); CancellationToken loadedToken; ((AngleSharp.Dom.Document)htmlDocument.HtmlDocument).DelayLoad(Task.Run(async() => { while (true) { await Task.Delay(10000, loadedToken); // <- await with cancellation } }, loadedToken)); string script1 = "document.getElementById('content').textContent = 'this is the content';"; //htmlDocument.Scripts.Add(script1); Jint.Engine engine = new Jint.Engine(); engine.SetValue("document", htmlDocument.HtmlDocument); engine.Execute(script1); Browser browser = new Browser(); IDocument doc = browser.Navigate("https://browsesharp.org/testsitesjqueryrender.html"); //doc.HtmlDocument = new AngleSharp.Html.Parser.HtmlParser().ParseDocument(htmlContent); //Jint.Engine engine = new Jint.Engine(); //engine.SetValue("document", doc.HtmlDocument); //engine.SetValue("d", doc.HtmlDocument); //Navigator navigator = new Navigator(engine); Window window1 = new Window(engine); window1.document = doc.HtmlDocument; //engine.Execute("Window = {};"); //engine.SetValue("window.document", doc.HtmlDocument); //engine.SetValue("window.document", doc); //navigator.InitializeEngine(); window1.InitializeEngine(); Action <string> console = (val) => { Console.WriteLine(val); }; engine.SetValue("write", console); //engine.SetValue("navigator", navigator); //engine.SetValue("window", ); var scripts = "";// "var require = function(asd){};\nvar window = {};\nvar module = new Object();\nvar exports = new Object()\n;"; int skipFirst = -3; //engine.SetValue("console.log", new Action<object>(Console.WriteLine)); var jquery = System.IO.File.ReadAllText(@"../../jquery.js"); //engine.Execute(windowJs); engine.Execute("window.test = 'test';"); engine.Execute("var noGlobal = false; "); dynamic window1d = window1; try { engine.Execute(jquery + "\nvar document = window.document;\nvar $ = window.jQuery;\n$(document).ready(function(){\n$('h3.mt-4').text('hello there');\n});\n var document = window.document;\n$(document).ready() "); } catch (Exception ex) { Console.Write(String.Format("Execption: {0}", ex.StackTrace)); } engine.Execute("window.jQuery.ready();"); //engine.Execute("document.ready()"); engine.Execute("(function($, window, document){$('#Area1').text('hello there');})(window.jQuery, window, window.document);"); string newValue = engine.Execute("$('#Area1').text()").GetCompletionValue().ToString(); // This is the value set in the previous line with jquery string newValueFromDOM = window1.document.GetElementById("Area1").TextContent; // Get new value from DOM //engine.Execute("window.jQuery = jQuery;"); engine.Execute("var $ = window.jQuery()('"); scripts += doc.Scripts[6].JavascriptString; foreach (var script in doc.Scripts) { if (script.Content.Contains("jQuery")) { skipFirst++; continue; } //var result = engine.Execute(script.Content); //scripts += script.JavascriptString + "\n"; } try { var result = engine.Execute(scripts); } catch (Exception ex) { Console.Write(""); } string[] vals = { "div" }; Console.Write("!"); /*Jint.Engine engine = new Jint.Engine(); * engine.SetValue("console", new Action<object>(Console.WriteLine)); * var consoleLog = engine.GetValue("console"); * var res = engine.Execute("function executableFunction() {return otherFunction;}\n function otherFunction() { return true;};"); * * Console.WriteLine(res.ToString()); * * Jint.Native.JsValue executableFunction = res.GetValue("executableFunction"); * res.SetValue("executableFunction","wer"); * var newExecutableFunction = res.GetValue("executableFunction"); * var MyValue = "My value is nigh"; * engine.SetValue("MyValue", MyValue); * Console.WriteLine(MyValue); * engine.Execute("MyValue = 'Something different';"); * Console.WriteLine(MyValue); */ //More stuff Jint.Engine engine2 = new Jint.Engine(); Window window = new Window(engine2); window.InitializeEngine(); //engine2.SetValue("write", new Action<string>(Console.Write)); //engine2.SetValue("window", window); window.onappinstalled = Console.Write; window.onappinstalled("on app installed works"); Console.WriteLine(window.DevicePixelRatio); //engine2.Execute("window.devicePixelRatio = 19;"); var pixRatio = engine2.Execute("window.devicePixelRatio"); //engine2.Execute("window.OnAppInstalled = function() {write('hello world');}"); engine2.Execute("window.onappinstalled('asd');"); engine2.Execute("window.onappinstalled = window.LogArray"); //engine2.Execute("window.onappinstalled('asd');"); Console.WriteLine(pixRatio); Console.WriteLine(window.DevicePixelRatio); var windowVal = engine2.Execute("window.myValue = 35;"); var w = (Window)windowVal.GetValue("window").ToObject(); engine2.Execute("window.myValue = 'MyValue';"); }
public Dictionary <string, Tuple <int, List <int> > > GetKeeps(int topicid, int categoryId) { Dictionary <string, Tuple <int, List <int> > > dictionary; dictionary = new Dictionary <string, Tuple <int, List <int> > >(); var empty = string.Empty; var num = 0; var parser = new AngleSharp.Html.Parser.HtmlParser(); string str1; do { label_18: var _url = string.Format("https://{2}/forum/viewtopic.php?t={0}{1}", topicid, num == 0 ? "" : "&start=" + num, Settings.Current.HostRuTrackerOrg); str1 = DownloadWebPage(_url); if (str1.Contains("<div class=\"mrg_16\">Тема не найдена</div>")) { Thread.Sleep(500); str1 = DownloadWebPage( $"https://{Settings.Current.HostRuTrackerOrg}/forum/viewtopic.php?p={(object) topicid}"); if (str1.Contains("<div class=\"mrg_16\">Тема не найдена</div>")) { MessageBox.Show("Тема не найдена, или неправильно указана ссылка на раздел: " + _url, "Ошибка", icon: MessageBoxIcon.Warning, buttons: MessageBoxButtons.OK); return(dictionary); } var s = string.Join("\r\n", str1.Split('\r', '\n').Where(x => x.Contains("id=\"topic-title\""))) .Split(new char[4] { '"', '<', '>', ' ' }, StringSplitOptions.RemoveEmptyEntries) .Where(x => x.Contains($"https://{Settings.Current.HostRuTrackerOrg}/forum/viewtopic.php?t=")) .Select(x => x.Replace($"https://{Settings.Current.HostRuTrackerOrg}/forum/viewtopic.php?t=", "")) .FirstOrDefault(); if (!string.IsNullOrWhiteSpace(s)) { topicid = int.Parse(s); goto label_18; } } var document = parser.ParseDocument(str1); var posts = document.QuerySelectorAll("table#topic_main > tbody"); foreach (var post in posts) { if (!post.ClassList.Contains("row1") && !post.ClassList.Contains("row2")) { continue; } var keeperName = post.QuerySelector("td.poster_info > p.nick > a").Text().Trim(); if (!dictionary.ContainsKey(keeperName)) { dictionary.Add(keeperName, new Tuple <int, List <int> >(categoryId, new List <int>())); } var links = post.QuerySelectorAll("td.message div.post_body a"); foreach (var link in links) { var url = link.GetAttribute("href").Trim(); var match = new Regex(@"viewtopic.php\?t=([0-9]+)$").Match(url); if (!match.Success) { continue; } var topicId = match.Groups[1].Value; try { dictionary[keeperName].Item2.Add(int.Parse(topicId)); } catch (Exception ex) { _logger.Warn(topicid + "\t" + topicId + "\t" + ex.Message); } } } num += 30; } while (str1.Contains("\">След.</a></b></p>") || num == 0); return(dictionary); }
protected HtmlParser(Html html) { var parser = new AngleSharp.Html.Parser.HtmlParser(); _doc = parser.ParseDocument(html.ToString()); }