static async Task Main(string[] args) { string str1 = "<font color=\\\"#00a1d6\\\">>>Po.6214</font><br />另外已知的是套壳客户端发不了图片,因为就是个套壳网页所以没办法唤醒相册[^o^]ノ"; var config = Configuration.Default; var context = new BrowsingContext(config); var document = await context.OpenAsync(req => req.Content(str1)); var visitor = new RichTextBlockVisitor(); Dfs(document.DocumentElement, 1); var documentNode = new HTMLNode((IHtmlHtmlElement)document.DocumentElement); var renderer = documentNode.Accept(visitor); renderer.Print(); Console.Read(); while (true) { var str = Console.ReadLine(); var ans = GetIPAddr(str); ans.ForEach(Console.WriteLine); } GetIPAddr("88881"); /* * var config = new ApiConfig(); * ForumController f = new ForumController(config); * var res = await f.GetForumsAsync(); * var p = new PostController(config); * var posts = await p.GetPostAsync(res[0].Id, 1); * var t = new ThreadController(config); * var threads = await t.GetThreadsAsync(posts[0].No, 1); * Console.ReadLine(); */ }
/// <summary> /// See http://www.w3.org/TR/html-imports/#dfn-import-request. /// </summary> public override async Task LoadAsync(IConfiguration configuration, IResourceLoader loader) { var link = Link; var document = link.Owner; var list = ImportLists.GetOrCreateValue(document); var location = Url; var request = link.CreateRequestFor(location); var item = new ImportEntry { Relation = this, IsCycle = CheckCycle(document, location) }; _isasync = link.HasAttribute(AttributeNames.Async); list.Add(item); if (!item.IsCycle) { var nestedStatus = new TaskCompletionSource<Boolean>(); var download = loader.DownloadAsync(request); SetDownload(download); await link.ProcessResponse(download, async response => { var context = new BrowsingContext(document.Context, Sandboxes.None); var options = new CreateDocumentOptions(response, configuration) { ImportAncestor = document }; _import = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false); nestedStatus.SetResult(true); }).ConfigureAwait(false); await nestedStatus.Task.ConfigureAwait(false); } }
/// <summary> /// Quick and dirty scraping. Trying to throw an exception if the format of the website changes, so that no wrong values are returned. /// </summary> /// <returns></returns> public static async Task <Result?> Scrape() { var context = new BrowsingContext(Configuration.Default.WithDefaultLoader()); var document = await context.OpenAsync("https://www.covid19.admin.ch/en/overview"); var cards = document.QuerySelectorAll(".card"); if (cards.Length == 0) { return(null); } var values = cards.Take(3).Select(card => { var title = card.Descendents <IElement>().Single(e => e.ClassList.Contains("card__title")); var key = card.Descendents <IElement>().First(e => e.ClassList.Contains("bag-key-value-list__entry-key")); if (!key.TextContent.StartsWith("Difference since")) { throw new InvalidFormatException(); } var value = card.Descendents <IElement>().First(e => e.ClassList.Contains("bag-key-value-list__entry-value")); return(Title: title.TextContent, Value: value.TextContent, DifferenceSince: key.TextContent); }).ToArray(); if (values.Length != 3 || values[0].Title != "Laboratory-confirmed cases" || values[1].Title != "Laboratory-confirmed hospitalisations" || values[2].Title != "Laboratory-confirmed deaths") { throw new InvalidFormatException(); } var vaccinated = cards.Skip(3).Take(1).Select(card => { var title = card.Descendents <IElement>().Single(e => e.ClassList.Contains("card__title")); if (title.TextContent != "Vaccinated people") { throw new InvalidFormatException(); } var key = card.Descendents <IElement>().Where(e => e.ClassList.Contains("bag-key-value-list__entry-key")).Skip(3).First(); if (key.TextContent != "Fully vaccinated") { throw new InvalidFormatException(); } var value = key.Parent.Parent.Descendents <IElement>().Where(e => e.ClassList.Contains("bag-key-value-list__entry-value")).First(); return(value.TextContent); }).First(); // Date is in the subtitle of every card var encodedDate = document.QuerySelector(".card__subtitle").TextContent.Trim(); var prefix = "Source: FOPH – Status: "; if (!encodedDate.StartsWith(prefix) || !DateTime.TryParseExact(encodedDate.Remove(0, prefix.Length).Remove(10), "dd.MM.yyyy", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var date)) { throw new InvalidFormatException(); } return(new Result(date, values[0].Value, values[1].Value, values[2].Value, vaccinated, values[0].DifferenceSince)); }
protected override async Task ProcessResponseAsync(IResponse response) { var context = new BrowsingContext(_parentDocument.Context, Sandboxes.None); var options = new CreateDocumentOptions(response, _options) { ImportAncestor = _parentDocument }; _childDocument = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false); }
public static async Task <HTMLNode> LoadHtmlAsync(string str) { var config = Configuration.Default; var context = new BrowsingContext(config); var document = await context.OpenAsync(req => req.Content(str ?? string.Empty)); var documentNode = new HTMLNode((IHtmlHtmlElement)document.DocumentElement); return(documentNode); }
/// <summary> /// See http://www.w3.org/TR/html-imports/#dfn-import-request. /// </summary> public override async Task LoadAsync(IConfiguration configuration, IResourceLoader loader) { var link = Link; var document = link.Owner; var list = ImportLists.GetOrCreateValue(document); var location = Url; var request = link.CreateRequestFor(location); var item = new ImportEntry { Relation = this, IsCycle = CheckCycle(document, location) }; _isasync = link.HasAttribute(AttributeNames.Async); list.Add(item); if (!item.IsCycle) { var nestedStatus = new TaskCompletionSource <Boolean>(); var download = loader.DownloadAsync(request); SetDownload(download); await link.ProcessResponse(download, async response => { var context = new BrowsingContext(document.Context, Sandboxes.None); var options = new CreateDocumentOptions(response, configuration) { ImportAncestor = document }; _import = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false); nestedStatus.SetResult(true); }).ConfigureAwait(false); await nestedStatus.Task.ConfigureAwait(false); } }
async Task <string> LoadData() { try { string response = null; List <string> data = new List <string>(); using (HttpClient hc = new HttpClient()) { HttpResponseMessage hrm = await hc.GetAsync("https://ru.wikipedia.org/wiki/250_%D0%BB%D1%83%D1%87%D1%88%D0%B8%D1%85_%D1%84%D0%B8%D0%BB%D1%8C%D0%BC%D0%BE%D0%B2_%D0%BF%D0%BE_%D0%B2%D0%B5%D1%80%D1%81%D0%B8%D0%B8_IMDb"); hrm.EnsureSuccessStatusCode(); response = await hrm.Content.ReadAsStringAsync(); textBoxFilms.Text = response; } using (BrowsingContext bc = new BrowsingContext(Configuration.Default)) { var document = await bc.OpenAsync(m => m.Content(response)); var list = document.QuerySelectorAll("tr").Skip(2); foreach (var item in list) { var temp = item.QuerySelectorAll("td"); data.Add($"{temp[0].TextContent}. {temp[1].TextContent}\r\n Год выпуска: {temp[2].TextContent}\r\n Режиссер: {temp[3].TextContent}\r\n Жанры: {temp[4].TextContent}\r\n\r\n"); } if (!checkBoxRandom.Checked && !checkBox1.Checked) { string text = ""; foreach (var item in data) { text += item; } return($"Рейтинг фильмов по версии IMDb\r\n{document.QuerySelectorAll("span").First(m => m.ClassName == "mw-headline" && m.TextContent.StartsWith("Состояние")).TextContent}:\r\n{text}"); } else if (checkBoxRandom.Checked) { return($"Случайный фильм:\r\n\r\n{data[new Random().Next(0, 250)]}"); } else if (checkBox1.Checked) { var ab = textBoxFilters.Text.Split("-".ToCharArray()); bool contains = false; string text = ""; foreach (var item in data) { contains = false; foreach (var item1 in ab) { if (item.Contains(item1)) { contains = true; break; } } if (contains) { text += item; } } return($"Фильтр: {string.Join(", ", ab)}\r\n\r\n{text}"); } } } catch (Exception e) { MessageBox.Show($"Попробуйте еще раз!\nОшибка: {e.Message}"); } return(null); }
public static obj TopLevel() { var topLevel = new obj(); BrowsingContext ctx = new BrowsingContext(Configuration.Default.WithDefaultLoader()); IDocument dom = null; IElement el = null; topLevel.funcs["true"] = async(kall) => { Ensure.ArityMatches(kall, 0).NoBlock(); topLevel.result = true; return(true); }; topLevel.funcs["false"] = async(kall) => { Ensure.ArityMatches(kall, 0).NoBlock(); topLevel.result = false; return(true); }; topLevel.funcs["out"] = async(kall) => { Ensure.ArityMatches(kall, 1); topLevel.Output[kall.args[0]] = topLevel.result; topLevel.result = null; return(false); }; topLevel.funcs["all"] = async(kall) => { Ensure.HasBlock(kall); foreach (call k in kall.block.calls) { await topLevel.eval(k); if (topLevel.resultSet && topLevel.result is false) { topLevel.result = false; return(false); } } // If we didn't find any results set to false, it then set result to true topLevel.result = true; return(false); }; topLevel.funcs["any"] = async(kall) => { Ensure.HasBlock(kall); foreach (call k in kall.block.calls) { await topLevel.eval(k); if (topLevel.resultSet && topLevel.result is true) { return(false); } } // If we didn't find any truthy values, then topLevel.result = false; return(false); }; topLevel.funcs["none"] = async(kall) => { Ensure.ArityMatches(kall, 0).HasBlock(); foreach (call k in kall.block.calls) { await topLevel.eval(k); if (topLevel.resultSet && topLevel.result is true) { topLevel.result = false; return(false); } } // If we didn't find any truthy values, then this was true topLevel.result = true; return(false); }; topLevel.funcs["has-text"] = async(kall) => { Ensure.ArityMatches(kall, 1).NoBlock().ElementSelected(topLevel); topLevel.result = el.TextContent.Contains(kall.args[0]); return(true); }; topLevel.funcs["sel"] = async(kall) => { Ensure.ArityMatches(kall, 1).NoBlock().DomBuilt(dom); el = dom.QuerySelector(kall.args[0]); return(false); }; topLevel.funcs["id"] = async(kall) => { Ensure.ArityMatches(kall, 1).DomBuilt(dom).NoBlock(); el = dom.GetElementById(kall.args[0]); return(false); }; topLevel.funcs["site"] = async(kall) => { Ensure.ArityMatches(kall, 1); dom = await ctx.OpenAsync(kall.args[0]); var outName = kall.block?.calls?.FirstOrDefault(x => x.name == "out")?.args?[0] ?? kall.args[0]; if (dom == null) { var failMsg = ObjException.UrlFailed(kall).Message; topLevel.Output[$"{outName}-error"] = failMsg; topLevel.Output[outName] = false; return(false); } if (kall.block != null) { try { await kall.block.eval(topLevel); } catch (ObjException) { // This sort of exception needs to pass through, since it indicates code is constructed incorrectly. throw; } catch (Exception ex) { topLevel.Output[$"{outName}-error"] = ex.Message; // In this case, we don't know if the site is up or down, // just that something went wrong topLevel.Output[outName] = null; return(false); } } return(false); }; return(topLevel); }