Esempio n. 1
0
        static async Task Main(string[] args)
        {
            string str1     = "<font color=\\\"#00a1d6\\\">&gt;&gt;Po.6214</font><br />另外已知的是套壳客户端发不了图片,因为就是个套壳网页所以没办法唤醒相册[^o^]ノ";
            var    config   = Configuration.Default;
            var    context  = new BrowsingContext(config);
            var    document = await context.OpenAsync(req => req.Content(str1));

            var visitor = new RichTextBlockVisitor();

            Dfs(document.DocumentElement, 1);
            var documentNode = new HTMLNode((IHtmlHtmlElement)document.DocumentElement);
            var renderer     = documentNode.Accept(visitor);

            renderer.Print();
            Console.Read();

            while (true)
            {
                var str = Console.ReadLine();
                var ans = GetIPAddr(str);
                ans.ForEach(Console.WriteLine);
            }
            GetIPAddr("88881");

            /*
             * var config = new ApiConfig();
             * ForumController f = new ForumController(config);
             * var res = await f.GetForumsAsync();
             * var p = new PostController(config);
             * var posts = await p.GetPostAsync(res[0].Id, 1);
             * var t = new ThreadController(config);
             * var threads = await t.GetThreadsAsync(posts[0].No, 1);
             * Console.ReadLine();
             */
        }
        /// <summary>
        /// See http://www.w3.org/TR/html-imports/#dfn-import-request.
        /// </summary>
        public override async Task LoadAsync(IConfiguration configuration, IResourceLoader loader)
        {
            var link = Link;
            var document = link.Owner;
            var list = ImportLists.GetOrCreateValue(document);
            var location = Url;
            var request = link.CreateRequestFor(location);
            var item = new ImportEntry 
            { 
                Relation = this,
                IsCycle = CheckCycle(document, location)
            };
            _isasync = link.HasAttribute(AttributeNames.Async);
            list.Add(item);
            
            if (!item.IsCycle)
            {
                var nestedStatus = new TaskCompletionSource<Boolean>();
                var download = loader.DownloadAsync(request);
                SetDownload(download);

                await link.ProcessResponse(download, async response =>
                {
                    var context = new BrowsingContext(document.Context, Sandboxes.None);
                    var options = new CreateDocumentOptions(response, configuration)
                    {
                        ImportAncestor = document
                    };
                    _import = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false);
                    nestedStatus.SetResult(true);
                }).ConfigureAwait(false);
                await nestedStatus.Task.ConfigureAwait(false);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Quick and dirty scraping. Trying to throw an exception if the format of the website changes, so that no wrong values are returned.
        /// </summary>
        /// <returns></returns>
        public static async Task <Result?> Scrape()
        {
            var context  = new BrowsingContext(Configuration.Default.WithDefaultLoader());
            var document = await context.OpenAsync("https://www.covid19.admin.ch/en/overview");

            var cards = document.QuerySelectorAll(".card");

            if (cards.Length == 0)
            {
                return(null);
            }

            var values = cards.Take(3).Select(card =>
            {
                var title = card.Descendents <IElement>().Single(e => e.ClassList.Contains("card__title"));
                var key   = card.Descendents <IElement>().First(e => e.ClassList.Contains("bag-key-value-list__entry-key"));
                if (!key.TextContent.StartsWith("Difference since"))
                {
                    throw new InvalidFormatException();
                }
                var value = card.Descendents <IElement>().First(e => e.ClassList.Contains("bag-key-value-list__entry-value"));
                return(Title: title.TextContent, Value: value.TextContent, DifferenceSince: key.TextContent);
            }).ToArray();

            if (values.Length != 3 || values[0].Title != "Laboratory-⁠confirmed cases" || values[1].Title != "Laboratory-⁠confirmed hospitalisations" || values[2].Title != "Laboratory-⁠confirmed deaths")
            {
                throw new InvalidFormatException();
            }

            var vaccinated = cards.Skip(3).Take(1).Select(card =>
            {
                var title = card.Descendents <IElement>().Single(e => e.ClassList.Contains("card__title"));
                if (title.TextContent != "Vaccinated people")
                {
                    throw new InvalidFormatException();
                }

                var key = card.Descendents <IElement>().Where(e => e.ClassList.Contains("bag-key-value-list__entry-key")).Skip(3).First();
                if (key.TextContent != "Fully vaccinated")
                {
                    throw new InvalidFormatException();
                }

                var value = key.Parent.Parent.Descendents <IElement>().Where(e => e.ClassList.Contains("bag-key-value-list__entry-value")).First();
                return(value.TextContent);
            }).First();

            // Date is in the subtitle of every card
            var encodedDate = document.QuerySelector(".card__subtitle").TextContent.Trim();
            var prefix      = "Source: FOPH – Status: ";

            if (!encodedDate.StartsWith(prefix) || !DateTime.TryParseExact(encodedDate.Remove(0, prefix.Length).Remove(10), "dd.MM.yyyy", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var date))
            {
                throw new InvalidFormatException();
            }

            return(new Result(date, values[0].Value, values[1].Value, values[2].Value, vaccinated, values[0].DifferenceSince));
        }
Esempio n. 4
0
        protected override async Task ProcessResponseAsync(IResponse response)
        {
            var context = new BrowsingContext(_parentDocument.Context, Sandboxes.None);
            var options = new CreateDocumentOptions(response, _options)
            {
                ImportAncestor = _parentDocument
            };

            _childDocument = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false);
        }
Esempio n. 5
0
        public static async Task <HTMLNode> LoadHtmlAsync(string str)
        {
            var config   = Configuration.Default;
            var context  = new BrowsingContext(config);
            var document = await context.OpenAsync(req => req.Content(str ?? string.Empty));

            var documentNode = new HTMLNode((IHtmlHtmlElement)document.DocumentElement);

            return(documentNode);
        }
Esempio n. 6
0
        /// <summary>
        /// See http://www.w3.org/TR/html-imports/#dfn-import-request.
        /// </summary>
        public override async Task LoadAsync(IConfiguration configuration, IResourceLoader loader)
        {
            var link     = Link;
            var document = link.Owner;
            var list     = ImportLists.GetOrCreateValue(document);
            var location = Url;
            var request  = link.CreateRequestFor(location);
            var item     = new ImportEntry
            {
                Relation = this,
                IsCycle  = CheckCycle(document, location)
            };

            _isasync = link.HasAttribute(AttributeNames.Async);
            list.Add(item);

            if (!item.IsCycle)
            {
                var nestedStatus = new TaskCompletionSource <Boolean>();
                var download     = loader.DownloadAsync(request);
                SetDownload(download);

                await link.ProcessResponse(download, async response =>
                {
                    var context = new BrowsingContext(document.Context, Sandboxes.None);
                    var options = new CreateDocumentOptions(response, configuration)
                    {
                        ImportAncestor = document
                    };
                    _import = await context.OpenAsync(options, CancellationToken.None).ConfigureAwait(false);
                    nestedStatus.SetResult(true);
                }).ConfigureAwait(false);

                await nestedStatus.Task.ConfigureAwait(false);
            }
        }
Esempio n. 7
0
        async Task <string> LoadData()
        {
            try
            {
                string        response = null;
                List <string> data     = new List <string>();
                using (HttpClient hc = new HttpClient())
                {
                    HttpResponseMessage hrm = await hc.GetAsync("https://ru.wikipedia.org/wiki/250_%D0%BB%D1%83%D1%87%D1%88%D0%B8%D1%85_%D1%84%D0%B8%D0%BB%D1%8C%D0%BC%D0%BE%D0%B2_%D0%BF%D0%BE_%D0%B2%D0%B5%D1%80%D1%81%D0%B8%D0%B8_IMDb");

                    hrm.EnsureSuccessStatusCode();
                    response = await hrm.Content.ReadAsStringAsync();

                    textBoxFilms.Text = response;
                }
                using (BrowsingContext bc = new BrowsingContext(Configuration.Default))
                {
                    var document = await bc.OpenAsync(m => m.Content(response));

                    var list = document.QuerySelectorAll("tr").Skip(2);
                    foreach (var item in list)
                    {
                        var temp = item.QuerySelectorAll("td");
                        data.Add($"{temp[0].TextContent}. {temp[1].TextContent}\r\n   Год выпуска: {temp[2].TextContent}\r\n   Режиссер: {temp[3].TextContent}\r\n   Жанры: {temp[4].TextContent}\r\n\r\n");
                    }
                    if (!checkBoxRandom.Checked && !checkBox1.Checked)
                    {
                        string text = "";
                        foreach (var item in data)
                        {
                            text += item;
                        }
                        return($"Рейтинг фильмов по версии IMDb\r\n{document.QuerySelectorAll("span").First(m => m.ClassName == "mw-headline" && m.TextContent.StartsWith("Состояние")).TextContent}:\r\n{text}");
                    }
                    else if (checkBoxRandom.Checked)
                    {
                        return($"Случайный фильм:\r\n\r\n{data[new Random().Next(0, 250)]}");
                    }
                    else if (checkBox1.Checked)
                    {
                        var    ab       = textBoxFilters.Text.Split("-".ToCharArray());
                        bool   contains = false;
                        string text     = "";
                        foreach (var item in data)
                        {
                            contains = false;
                            foreach (var item1 in ab)
                            {
                                if (item.Contains(item1))
                                {
                                    contains = true;
                                    break;
                                }
                            }
                            if (contains)
                            {
                                text += item;
                            }
                        }
                        return($"Фильтр: {string.Join(", ", ab)}\r\n\r\n{text}");
                    }
                }
            }
            catch (Exception e)
            {
                MessageBox.Show($"Попробуйте еще раз!\nОшибка: {e.Message}");
            }
            return(null);
        }
Esempio n. 8
0
        public static obj TopLevel()
        {
            var topLevel = new obj();

            BrowsingContext ctx = new BrowsingContext(Configuration.Default.WithDefaultLoader());

            IDocument dom = null;
            IElement  el  = null;

            topLevel.funcs["true"] = async(kall) => {
                Ensure.ArityMatches(kall, 0).NoBlock();
                topLevel.result = true;
                return(true);
            };
            topLevel.funcs["false"] = async(kall) => {
                Ensure.ArityMatches(kall, 0).NoBlock();
                topLevel.result = false;
                return(true);
            };
            topLevel.funcs["out"] = async(kall) => {
                Ensure.ArityMatches(kall, 1);
                topLevel.Output[kall.args[0]] = topLevel.result;
                topLevel.result = null;
                return(false);
            };

            topLevel.funcs["all"] = async(kall) => {
                Ensure.HasBlock(kall);
                foreach (call k in kall.block.calls)
                {
                    await topLevel.eval(k);

                    if (topLevel.resultSet && topLevel.result is false)
                    {
                        topLevel.result = false;
                        return(false);
                    }
                }
                // If we didn't find any results set to false, it then set result to true
                topLevel.result = true;
                return(false);
            };

            topLevel.funcs["any"] = async(kall) => {
                Ensure.HasBlock(kall);
                foreach (call k in kall.block.calls)
                {
                    await topLevel.eval(k);

                    if (topLevel.resultSet && topLevel.result is true)
                    {
                        return(false);
                    }
                }
                // If we didn't find any truthy values, then
                topLevel.result = false;
                return(false);
            };

            topLevel.funcs["none"] = async(kall) => {
                Ensure.ArityMatches(kall, 0).HasBlock();
                foreach (call k in kall.block.calls)
                {
                    await topLevel.eval(k);

                    if (topLevel.resultSet && topLevel.result is true)
                    {
                        topLevel.result = false;
                        return(false);
                    }
                }
                // If we didn't find any truthy values, then this was true
                topLevel.result = true;
                return(false);
            };
            topLevel.funcs["has-text"] = async(kall) =>
            {
                Ensure.ArityMatches(kall, 1).NoBlock().ElementSelected(topLevel);
                topLevel.result = el.TextContent.Contains(kall.args[0]);
                return(true);
            };

            topLevel.funcs["sel"] = async(kall) => {
                Ensure.ArityMatches(kall, 1).NoBlock().DomBuilt(dom);
                el = dom.QuerySelector(kall.args[0]);
                return(false);
            };

            topLevel.funcs["id"] = async(kall) => {
                Ensure.ArityMatches(kall, 1).DomBuilt(dom).NoBlock();
                el = dom.GetElementById(kall.args[0]);
                return(false);
            };
            topLevel.funcs["site"] = async(kall) => {
                Ensure.ArityMatches(kall, 1);
                dom = await ctx.OpenAsync(kall.args[0]);

                var outName = kall.block?.calls?.FirstOrDefault(x => x.name == "out")?.args?[0] ?? kall.args[0];
                if (dom == null)
                {
                    var failMsg = ObjException.UrlFailed(kall).Message;
                    topLevel.Output[$"{outName}-error"] = failMsg;
                    topLevel.Output[outName]            = false;
                    return(false);
                }
                if (kall.block != null)
                {
                    try
                    {
                        await kall.block.eval(topLevel);
                    }
                    catch (ObjException)
                    {
                        // This sort of exception needs to pass through, since it indicates code is constructed incorrectly.
                        throw;
                    }
                    catch (Exception ex) {
                        topLevel.Output[$"{outName}-error"] = ex.Message;
                        // In this case, we don't know if the site is up or down,
                        // just that something went wrong
                        topLevel.Output[outName] = null;
                        return(false);
                    }
                }
                return(false);
            };

            return(topLevel);
        }