コード例 #1
0
        public override async Task <ProductModel> GetProductDetailsAsync(string productUrl, string identifier = null)
        {
            var response = await _httpClient.GetAsync(productUrl);

            if (!response.IsSuccessStatusCode)
            {
                _logger.LogWarning($"StatusCode: {response.StatusCode}");
                return(null);
            }

            var source = await response.Content.ReadAsStringAsync();

            var document = await _browsingContext.OpenAsync(req => req.Content(source));

            var productList = document.QuerySelector <IHtmlOrderedListElement>("ol.products-list");

            if (productList == null)
            {
                _logger.LogWarning("No Product Found");
                return(null);
            }

            var productSection = productList.QuerySelectorAll <IHtmlListItemElement>("li.item").FirstOrDefault();

            if (productSection == null)
            {
                _logger.LogWarning("No Product Found");
                return(null);
            }

            var product = _mapper.Map <Product>(productSection);

            return(product);
        }
コード例 #2
0
        public async Task <IEnumerable <(string name, string id, Uri cover)> > Search(string searchName)
        {
            var form = new FormUrlEncodedContent(new Dictionary <string, string>
            {
                { "recherche_critere", "f" },
                { "recherche_valeur", searchName },
                { "x", "0" },
                { "y", "0" }
            });

            var response = await httpClient.PostAsync(
                "http://warashi-asian-pornstars.fr/en/s-12/search", form
                );

            var html = await response.Content.ReadAsStringAsync();

            var doc = await context.OpenAsync(req => req.Content(html));

            return(doc.QuerySelectorAll(".resultat-pornostar")
                   .Select(n =>
            {
                var name = NormalizeName(n.QuerySelector(".correspondance-lien")?.TextContent);
                var id = ExtractId(n.QuerySelector("a")?.GetAttribute("href"));
                var cover = "http://warashi-asian-pornstars.fr" + n.QuerySelector("img")?.GetAttribute("src");

                return (name, id, new Uri(cover));
            }).Where(n => String.Equals(NormalizeName(searchName), n.name) || String.Equals(NormalizeName(ReverseName(searchName)), n.name)));
        }
コード例 #3
0
        /// <summary>Searches for a video by jav code.</summary>
        /// <param name="searchCode">The jav code. Ex: ABP-001.</param>
        /// <returns>A list of every matched video.</returns>
        public static async Task <IEnumerable <VideoResult> > Search(string searchCode)
        {
            var response = await HttpClient.GetAsync($"https://www.r18.com/common/search/order=match/searchword={searchCode}").ConfigureAwait(false);

            var html = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

            var doc = await Context.OpenAsync(req => req.Content(html)).ConfigureAwait(false);

            var videos = new List <VideoResult>();

            foreach (var n in doc.QuerySelectorAll(".item-list"))
            {
                var code  = n.QuerySelector("img")?.GetAttribute("alt");
                var id    = n.GetAttribute("data-content_id");
                var cover = n.QuerySelector("img")?.GetAttribute("data-original");

                if (code is not null && cover is not null)
                {
                    videos.Add(new VideoResult
                    {
                        Code  = code,
                        Id    = id,
                        Cover = new Uri(cover),
                    });
                }
            }

            return(videos);
        }
コード例 #4
0
        public override async Task <ProductModel> GetProductDetailsAsync(string productUrl, string identifier = null)
        {
            var response = await _httpClient.GetAsync(productUrl, HttpCompletionOption.ResponseHeadersRead);

            if (!response.IsSuccessStatusCode)
            {
                _logger.LogWarning($"StatusCode: {response.StatusCode}");
                return(null);
            }

            var source = await response.Content.ReadAsStringAsync();

            var document = await _browsingContext.OpenAsync(req => req.Content(source));

            var identifiers = identifier.Split("-");
            var productId   = identifiers[0];
            var catEntryId  = identifiers[1];

            var inventoryData = await GetInventoryData(productId);

            inventoryData.TryGetValue(catEntryId, out var inventory);

            var product = _mapper.Map <Product>(Tuple.Create(document, new MapperData(productId, catEntryId, productUrl, inventory)));

            return(product);
        }
コード例 #5
0
ファイル: JavDBExtractor.cs プロジェクト: weloveloli/NetAVLib
        /// <summary>
        /// The ResolveFromMetaData.
        /// </summary>
        /// <param name="metaData">The metaData<see cref="AvMetaData"/>.</param>
        /// <returns>The <see cref="Task{AvData}"/>.</returns>
        public async Task <AvData> ResolveFromMetaData(AvMetaData metaData)
        {
            if (metaData == null)
            {
                return(null);
            }
            var data = await cacheProvider.GetDataAsync(metaData.Number);

            if (data != null)
            {
                return(data);
            }
            if (metaData == null || string.IsNullOrEmpty(metaData.WebSiteUrl))
            {
                return(null);
            }

            var detailContent = await this.htmlContentReader.LoadFromUrlAsync(metaData.WebSiteUrl);

            if (detailContent == null)
            {
                return(null);
            }
            var document = await context.OpenAsync(req => req.Content(detailContent));

            data = ResolveContent(document, metaData);
            if (data != null)
            {
                await cacheProvider.StoreDataAsync(data);
            }
            return(data);
        }
コード例 #6
0
        /// <summary>
        /// Opens a new document loaded from the specified request
        /// asynchronously in the given context.
        /// </summary>
        /// <param name="context">The browsing context to use.</param>
        /// <param name="request">The request to issue.</param>
        /// <param name="cancel">The cancellation token.</param>
        /// <returns>The task that creates the document.</returns>
        public static async Task <IDocument> OpenAsync(this IBrowsingContext context, DocumentRequest request, CancellationToken cancel)
        {
            if (request == null)
            {
                throw new ArgumentNullException(nameof(request));
            }

            var loader = context.Loader;

            if (loader != null)
            {
                var download = loader.DownloadAsync(request);
                cancel.Register(download.Cancel);

                // Add a page cache in the browsing context
                var uri = request.Target.Href;
                if (context.ResponseCache != null && context.ResponseCache.ContainsKey(uri))
                {
                    var response = context.ResponseCache[uri];
                    return(await context.OpenAsync(response, cancel).ConfigureAwait(false));
                }
                else
                {
                    using (var response = await download.Task.ConfigureAwait(false))
                    {
                        if (response != null)
                        {
                            return(await context.OpenAsync(response, cancel).ConfigureAwait(false));
                        }
                    }
                }
            }

            return(await context.OpenNewAsync(request.Target.Href).ConfigureAwait(false));
        }
コード例 #7
0
        private async Task <List <long> > GetModifiedGames(CancellationToken token)
        {
            DateTimeOffset lastScrapeStamp = DateTimeOffset.Now.Subtract(TimeSpan.FromDays(1));

            logger.Info("Getting modified games since {time}", lastScrapeStamp);
            long epoch = lastScrapeStamp.ToUnixTimeSeconds();
            Url  url   = Url.Create(
                $"https://api.steampowered.com/IStoreService/GetAppList/v1/?key={apiKey}&if_modified_since={epoch}&include_games=1");
            DocumentRequest request  = DocumentRequest.Get(url);
            IDocument       response = await context.OpenAsync(request, token);

            if (response.StatusCode != HttpStatusCode.OK)
            {
                return(null);
            }

            string json = response.Body.Text();
            SteamModifiedGamesData modifiedGamesData = JsonConvert.DeserializeObject <SteamModifiedGamesData>(json);

            List <long> appIds = modifiedGamesData.Response.Apps?.Where(x => x.PriceChangeNumber > 0)
                                 .Select(x => x.Appid)
                                 .ToList() ?? new List <long>();

            return(appIds);
        }
コード例 #8
0
        public async Task <string> GetStores(string lat, string lng)
        {
            //AngleSharp套件及httpClient前置作業
            //var config = Configuration.Default;
            //var context = BrowsingContext.New(config);
            HttpClient httpClient = new HttpClient();

            httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36");
            //設定店家爬蟲的連結
            string LocationUrl = $"https://www.foodpanda.com.tw/restaurants/lat/{lat}/lng/{lng}";
            var    StoreInfoResponseMessage = await httpClient.GetAsync(LocationUrl);

            var StoreInfoResult = StoreInfoResponseMessage.Content.ReadAsStringAsync().Result;
            var Store_document  = await context.OpenAsync(res => res.Content(StoreInfoResult));

            //設定要爬蟲的資訊
            var StoreName = Store_document.QuerySelectorAll(".vendor-list-section .name");
            var StoreUrl  = Store_document.QuerySelectorAll(".vendor-list-section li a");
            //新增搜尋結果List
            List <Store> result_stores = new List <Store>();

            for (var S_count = 0; S_count < StoreName.Length; S_count++)
            {
                Store _Store = new Store();
                _Store.Store_Name = StoreName[S_count].TextContent;
                _Store.Store_Url  = "https://www.foodpanda.com.tw" + StoreUrl[S_count].GetAttribute("href");
                result_stores.Add(_Store);
            }

            // return result_stores;
            var JSON_STORES = JsonConvert.SerializeObject(result_stores);

            return(JSON_STORES);
        }
コード例 #9
0
        public async Task <WikipediaTextResult> GetWikipediaTextFromString(string html)
        {
            var document = await _angleSharpContext.OpenAsync(req => req.Content(html));

            WikipediaTextResult result = ParseHtml(document);

            return(result);
        }
コード例 #10
0
        public async Task <string> GetDownloadUrlFromArticlePage(string url)
        {
            var document = await _context.OpenAsync(url);

            var mp3Selector = "p.powerpress_links.powerpress_links_mp3 > a.powerpress_link_d";
            var link        = document.QuerySelector(mp3Selector);

            return(link?.GetAttribute("href"));
        }
コード例 #11
0
        public async Task <Quest> LoadQuest(string id)
        {
            var address  = "http://www.runehq.com/guide.php?type=quest&id=" + id;
            var document = await _browsingContext.OpenAsync(address);

            var questContent = document.QuerySelectorAll(".content-body")[1];

            return(ParseQuest((IHtmlElement)questContent));
        }
コード例 #12
0
        /// <summary>Loads a specific JAV by url.</summary>
        /// <param name="url">The JAV url.</param>
        /// <returns>The parsed video, or null if no video at <c>url</c> exists.</returns>
        /// <example>
        /// <code>
        /// var client = new Javlibrary.Client();
        /// var result = client.LoadVideo(new Url("http://www.javlibrary.com/en/?v=javlijazsu"));
        /// result.Id // javlijazsu
        /// result.Title // Fan Fan PRESTIGE Large Thanksgiving Soil And Shiro To Spree Yamakawa Blue Sky Meets Escalate! Basutsua ~
        /// </code>
        /// </example>
        public static async Task <Video?> LoadVideo(Uri url)
        {
            var response = await HttpClient.GetAsync(url).ConfigureAwait(false);

            var html = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

            var doc = await Context.OpenAsync(req => req.Content(html)).ConfigureAwait(false);

            return(ParseVideoPage(doc));
        }
コード例 #13
0
        public async Task Run()
        {
            string res_str = "";

            try
            {
                Log4Net.LogInfo($"正在抓取微博[{weibo_url}]的评论");
                Uri Weibo_Uri = new Uri(weibo_url);
                res_str = await httpService.GetAsync(Weibo_Uri.PathAndQuery, cookies);

                if (!string.IsNullOrEmpty(res_str))
                {
                    var document = await context.OpenAsync(req => req.Content(res_str));

                    var scripts = document.Scripts;
                    if (scripts.Length > 0)
                    {
                        var js = scripts.Where(x => x.InnerHtml.Contains(@"""ns"":""pl.content.weiboDetail.index""", StringComparison.OrdinalIgnoreCase)).FirstOrDefault();
                        if (js != null)
                        {
                            var txt = js.TextContent;
                            if (txt != null)
                            {
                                string json_str = txt.Replace("FM.view(", "").TrimEnd(')');

                                var json = JsonConvert.DeserializeObject <dynamic>(json_str);

                                var temp_doc = await context.OpenAsync(req => req.Content(Convert.ToString(json.html)));

                                var weibo_link_dom = temp_doc.QuerySelectorAll("*").Where(x => "feed_list_commentTabAll".Equals(x.GetAttribute("node-type")));
                                if (weibo_link_dom.Any())
                                {
                                    string weibo_link = weibo_link_dom.FirstOrDefault().GetAttribute("action-data");
                                    if (!string.IsNullOrEmpty(weibo_link))
                                    {
                                        string url = $"{Link(weibo_link)}";
                                        Log4Net.LogInfo($"获得评论入口URL[{url}]");
                                        Uri uri = new Uri(url);
                                        Log4Net.LogInfo($"开始抓取评论");
                                        await CommentHandle(uri, weibo_link);  //处理评论
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Log4Net.ErrorInfo($"处理微博[{weibo_url}]异常", e);
                Log4Net.LogInfo($"本次异常的字符串:{res_str}");
            }
            exit = true;
        }
コード例 #14
0
        public static async Task <IDocument> OpenPageAsync(this IBrowsingContext browsingContext, string path)
        {
            var z = IsLocalPath(path);

            if (IsLocalPath(path))
            {
                return(await browsingContext.OpenAsync(req => req.Content(File.ReadAllText(path))));
            }

            return(await browsingContext.OpenAsync(path));
        }
コード例 #15
0
            /// <summary>
            /// 取得HtmlDocument內的element資訊
            /// </summary>
            /// <param name="str_Document">HtmlDocument</param>
            /// <param name="QuerySelectorAll">Html的element選擇條件</param>
            /// <returns></returns>
            public static async Task <List <KeyValuePair <string, string> > > GetHtmlDocument(string str_Document, string QuerySelectorAll)
            {
                List <KeyValuePair <string, string> > input = new List <KeyValuePair <string, string> >();

                var _document = await context.OpenAsync(res => res.Content(str_Document));

                if (_document != null)
                {
                    var contents = _document.QuerySelectorAll(QuerySelectorAll);

                    if (contents != null)
                    {
                        if (QuerySelectorAll.ToLower().Contains("form input"))
                        {
                            foreach (var item in contents)
                            {
                                var name  = item.GetAttribute("name");
                                var value = item.GetAttribute("value");
                                value = string.IsNullOrEmpty(value) ? string.Empty : value;
                                input.Add(new KeyValuePair <string, string>(name, value));
                            }
                        }
                        else
                        {
                            foreach (var item in contents)
                            {
                                AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> option = item.QuerySelectorAll("option:checked");
                                if (option.Count() > 0)
                                {
                                    var name  = item.GetAttribute("name");
                                    var value = option.FirstOrDefault().GetAttribute("value");
                                    value = string.IsNullOrEmpty(value) ? string.Empty : value;
                                    input.Add(new KeyValuePair <string, string>(name, value));
                                }
                                else
                                {
                                    option = item.QuerySelectorAll("option");
                                    if (option.Count() > 0)
                                    {
                                        var name  = item.GetAttribute("name");
                                        var value = option.FirstOrDefault().GetAttribute("value");
                                        value = string.IsNullOrEmpty(value) ? string.Empty : value;
                                        input.Add(new KeyValuePair <string, string>(name, value));
                                    }
                                }
                            }
                        }
                    }
                }

                return(input);
            }
コード例 #16
0
ファイル: Scraper.cs プロジェクト: bdaniel7/scraperel
        public async Task <List <MenuItem> > Scrape(string url)
        {
            var document = await context.OpenAsync(url);

            var list = new List <MenuItem>();

            var menuSections    = document.QuerySelectorAll(parserConfig.MenuSectionRoot);
            var menuDescription = document.QuerySelector(parserConfig.MenuDescriptionPath).TextContent
                                  .Replace("<br>", "", StringComparison.Ordinal);
            var menuTitle = document.QuerySelector(parserConfig.MenuTitlePath).TextContent;

            foreach (var menuSection in menuSections)
            {
                var menuSectionTitle = menuSection.QuerySelector("span").TextContent;

                var menuSectionId = menuSection.Attributes["href"].Value.Substring(1);

                var dishesPerSection = document.QuerySelectorAll($"div[id='{menuSectionId}']");

                foreach (var dishPerSection in dishesPerSection)
                {
                    var dishes = dishPerSection.QuerySelectorAll($"{parserConfig.DishRootPath}");

                    foreach (var dish in dishes)
                    {
                        var dishDetailsUrl = ((IHtmlAnchorElement)dish.QuerySelector("a")).Href;

                        Log.Logger.Debug($"dishDetailsUrl {dishDetailsUrl}");

                        var dishDetailsPage = await context.OpenAsync(dishDetailsUrl);

                        var dishDescription = dishDetailsPage.QuerySelector(parserConfig.DishDescriptionPath).TextContent;

                        var dishHtml = dish.QuerySelector(parserConfig.DishNamePath).InnerHtml;
                        var dishName = dishHtml.Substring(dishHtml.IndexOf(SPAN, StringComparison.Ordinal)
                                                          + SPAN.Length + 1);

                        var menuItem = new MenuItem();
                        menuItem.MenuTitle        = menuTitle;
                        menuItem.MenuSectionTitle = menuSectionTitle;
                        menuItem.DishName         = dishName;
                        menuItem.MenuDescription  = menuDescription;
                        menuItem.DishDescription  = dishDescription;

                        list.Add(menuItem);
                    }
                }
            }

            return(list);
        }
コード例 #17
0
        private async Task <bool> DetectRegionSpecificAsync(Region r)
        {
            string url  = $"http://playoverwatch.com/en-gb/career/pc/{r}/{battletagUrlFriendly}";
            var    rslt = await browsingContext.OpenAsync(url);

            if (rslt.StatusCode == (System.Net.HttpStatusCode) 200)
            {
                Region     = r;
                userPage   = rslt;
                ProfileURL = url;
                return(true);
            }
            return(false);
        }
コード例 #18
0
        public async Task GetPalicoSkills(string address)
        {
            var page = await context.OpenAsync(address);

            await db.CreateTableAsync <PalicoSkill>();

            List <PalicoSkill> skills = new List <PalicoSkill>();

            foreach (var tr in page.QuerySelector(".table").QuerySelectorAll("tr").Skip(1))
            {
                skills.Add(GetPalicoSkill(tr));
            }
            await db.InsertAllAsync(skills);
        }
コード例 #19
0
        public async Task <List <Beer> > GetBeers()
        {
            var document = await _browsingContext.OpenAsync(_address);

            var cellSelector  = "ul.menu-section-list";
            var element       = document.QuerySelector(cellSelector);
            var beersResponse = new List <Beer>();

            beersResponse.AddRange(GrabBeerFromHtml(element));

            beersResponse.AddRange(await GrabLastBeersFromShowMore(document.ToHtml()));

            return(beersResponse);
        }
コード例 #20
0
        public async Task <TagFullDto> GetFullTagInfoByName(string tagName)
        {
            try
            {
                var mainDocument = await _context.OpenAsync(_baseUrl + $"/tag/{tagName}");

                var archiveDocument = await _context.OpenAsync(_baseUrl + $"/tag/{tagName}/archive");

                return(await GetFullTag(mainDocument, archiveDocument));
            }
            catch (Exception e)
            {
                throw new Exception($"Error during getting data from 'medium.com' by tag '{tagName}':\r\n {e}");
            }
        }
コード例 #21
0
        private async Task ExtrairDadosPagina(List <ResultadoBusca> resultados)
        {
            // Load default configuration
            var config = Configuration.Default.WithDefaultLoader();

            // Create a new browsing context
            _browsingContext = BrowsingContext.New(config);

            //Encontra cada link de produto na tela principal de busca
            var gridProdutos = await _browsingContext.OpenAsync(_template.UrlInicial);

            //Encontras as TAGS com a url para a o grid de produtos por categoria
            var categorias = gridProdutos.QuerySelectorAll(_template.SeletorMenuCategorias);
            var buscaId    = 0;

            foreach (var categoria in categorias)
            {
                var urlGridCategoria = _template.UrlSite + categoria.GetAttribute("href");

                //Categoria está registrada no menu de grid de produtos, não foi encontrada
                //dentro da página de detalhes do produto,
                //padrão semelhante foi visto nos outros 3 sites
                var nomeCategoria = categoria.QuerySelector(_template.SelectorCategoria).InnerHtml.Trim();

                await ExtrairDadosPorCategoria(urlGridCategoria, nomeCategoria, resultados);

                buscaId = await _busca.PersistirBusca(buscaId, resultados);

                resultados.Clear();
            }

            await _busca.ConsolidarBusca(buscaId);
        }
コード例 #22
0
        /// <summary>
        /// Generates an Excel file from the returned string from the supplied URI and returns the byte array of the file data.
        /// </summary>
        /// <param name="uri">URI to download the HTML string from. Will throw an error if the server cannot be reached, there are more than one tables or if a table cannot be found.</param>
        /// <returns>Byte array of the Excel file data.</returns>
        public byte[] FromUri(Uri uri)
        {
            IBrowsingContext context = BrowsingContext.New(Configuration.Default);
            var document             = context.OpenAsync(uri.ToString()).Result;

            return(ProcessDocument(document.DocumentElement));
        }
コード例 #23
0
        protected async Task <List <DocumentViewModel> > ParseHTML(string source)
        {
            IConfiguration   config  = Configuration.Default.WithDefaultLoader();
            IBrowsingContext context = BrowsingContext.New(config);
            IDocument        dom     = await context.OpenAsync(source);

            if (dom == null)
            {
                throw new WebScrappingException("The website might not exist");
            }

            List <DocumentViewModel> documents = dom.All
                                                 .Where(element => !string.IsNullOrEmpty(element.TextContent))
                                                 .Where(element => !IsAncestorLink(element))
                                                 .Select(element => new DocumentViewModel
            {
                textContent    = element.TextContent,
                name           = element.LocalName,
                isAncestorLink = IsAncestorLink(element),
            })
                                                 .Where((DocumentViewModel document) => document.name == "p")
                                                 .Where(document => IsSentenceMoreThanNWords(document.textContent, 4))
                                                 .ToList();

            documents.ForEach(document => document.textContent.Replace("\r\n", "").Replace("\r", ""));
            documents = documents.GroupBy(document => document.textContent)
                        .Select(grp => grp.First())
                        .Take(20)
                        .ToList();

            return(documents);
        }
コード例 #24
0
        private static List <string> GetPhoneNumbers(string tempId, IBrowsingContext brContext)
        {
            var result             = new List <string>();
            var regexNumberPattern = "\"value\":\"(.*)\"";
            var numberLinkPattern  = "http://www.olx.ua/ajax/misc/contact/phone/{0}/white/";
            var numberTask         = brContext.OpenAsync(string.Format(numberLinkPattern, tempId));
            var docNumber          = numberTask.Result;
            var values             = Regex.Match(docNumber.Body.InnerHtml, regexNumberPattern);

            if (values.Groups.Count > 1)
            {
                var numbersString = values.Groups[1].Value;
                if (numbersString.Contains("span"))
                {
                    var parser = new HtmlParser();
                    numbersString = numbersString.Replace("&lt;", "<").Replace("&gt;", ">").Replace("\\\"", "\"").Replace("\\/", "/");
                    var document = parser.Parse(numbersString);
                    var numbers  = document.QuerySelectorAll(".block").Select(s => PrepareNumber(s.InnerHtml));
                    result.AddRange(numbers);
                }
                else
                {
                    if (numbersString != string.Empty)
                    {
                        result.Add(PrepareNumber(numbersString));
                    }
                }
            }
            else
            {
                System.Console.WriteLine("No number");
            }
            return(result);
        }
コード例 #25
0
        public async Task <OneNewsVM> GetOneNews(OneNews oneNews, IMapper mapper)
        {
            HttpClient httpClient = new HttpClient();

            HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(oneNews.Url);

            string source = await httpResponseMessage.Content.ReadAsStringAsync();

            IConfiguration config = Configuration.Default;

            IBrowsingContext context = BrowsingContext.New(config);

            IDocument document = await context.OpenAsync(req => req.Content(source));

            IElement[] textItems = document.All.Where(m => (m.LocalName == "p" && m.ClassList.Contains("box-paragraph__text")) ||
                                                      (m.LocalName == "h2" && m.ClassList.Contains("box-paragraph__subtitle")) ||
                                                      (m.LocalName == "b" && m.ClassList.Contains("box-paragraph__text"))).ToArray();

            OneNewsVM oneNewsViewModel = mapper.Map <OneNewsVM>(oneNews);

            foreach (IElement item in textItems)
            {
                HtmlElement htmlElement = new HtmlElement()
                {
                    Name = item.LocalName, Text = item.Text()
                };

                oneNewsViewModel.HtmlElements.Add(htmlElement);
            }

            return(oneNewsViewModel);
        }
コード例 #26
0
        /// <summary>
        /// Opens a new document loaded from the specified request
        /// asynchronously in the given context.
        /// </summary>
        /// <param name="context">The browsing context to use.</param>
        /// <param name="request">The request to issue.</param>
        /// <param name="cancel">The cancellation token.</param>
        /// <returns>The task that creates the document.</returns>
        public static async Task <IDocument> OpenAsync(this IBrowsingContext context, DocumentRequest request, CancellationToken cancel)
        {
            if (request == null)
            {
                throw new ArgumentNullException("request");
            }

            var loader = context.Loader;

            if (loader != null)
            {
                var download = loader.DownloadAsync(request);
                cancel.Register(download.Cancel);

                using (var response = await download.Task.ConfigureAwait(false))
                {
                    if (response != null)
                    {
                        return(await context.OpenAsync(response, cancel).ConfigureAwait(false));
                    }
                }
            }

            return(await context.OpenNewAsync(request.Target.Href).ConfigureAwait(false));
        }
コード例 #27
0
        private async Task <IDocument> BrowseAsync(string url, CancellationToken cancellationToken)
        {
            IDocument document;

            if (caches.ContainsKey(url))
            {
                document = caches[url];
            }
            else
            {
                document = await browsingContext.OpenAsync(url, cancellationToken);

                // TODO: remove idle documents
                if (caches.Count > 100)
                {
                    Console.WriteLine("Cache reaches maximum limit, resetting...");
                    caches = new Dictionary <string, IDocument>();
                }
                else
                {
                    caches.Add(url, document);
                }
            }

            return(document);
        }
コード例 #28
0
ファイル: WebHtml.cs プロジェクト: StarTrekRules/WebRenderer
        private void Parse(string html, HtmlNode parent = null)
        {
            // https://regexr.com/4hr80 if you want the regex I ingeniously created

            IBrowsingContext context = BrowsingContext.New(Configuration.Default);

            IDocument doc = context.OpenAsync(req => req.Content(html)).Result;

            foreach (var el in doc.All)
            {
                HtmlNode node = new HtmlNode();
                node.SetAttributes(el.Attributes);
                node.Tag     = el.TagName;
                node.TagBody = el.Text();

                Elements.Add(node);
            }

            HtmlNode root = new HtmlNode();

            root.Tag     = doc.Body.TagName;
            root.TagBody = doc.Body.Text();

            PopulateChildren(doc.Body, root);

            Root = root;
        }
コード例 #29
0
ファイル: StockScraper.cs プロジェクト: coosmiin/BVBTrading
        public async Task <IEnumerable <BvbStock> > ScrapeIndexdComposition(string index)
        {
            var indexUrl = string.Format(INDEX_COMPOSITION_URL_FORMAT, index);
            var document = await _browsingContext.OpenAsync(indexUrl);

            var stockRows = document
                            .QuerySelectorAll("table#gvC tbody tr")
                            .OfType <IHtmlTableRowElement>();

            var stocks = new List <BvbStock>();

#pragma warning disable S3267 // Loops should be simplified with "LINQ" expressions
            foreach (IHtmlTableRowElement row in stockRows)
            {
                var symbol = row.Cells[0].QuerySelector("a")?.TextContent;
                if (symbol == null)
                {
                    continue;
                }

                stocks.Add(new BvbStock
                {
                    Symbol = symbol,
                    Name   = row.Cells[1].TextContent,
                    Price  = decimal.Parse(row.Cells[3].TextContent),
                    Weight = decimal.Parse(row.Cells[7].TextContent) / 100
                });
            }
#pragma warning restore S3267 // Loops should be simplified with "LINQ" expressions

            return(stocks.ToArray());
        }
コード例 #30
0
ファイル: JobScrapper.cs プロジェクト: zmitov/JobsScrapper
        public Job ScrapeJob(string url)
        {
            var jobPage = _context.OpenAsync(url).Result;
            var id      = long.Parse(url.Substring(url.LastIndexOf("/", StringComparison.Ordinal) + 1));

            var salary = jobPage.GetContent(
                "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(3) > td > span");

            var title = jobPage.GetContent(
                "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(2) > td > b");

            var company = jobPage.GetContent(
                "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(2) > td > a");

            var description = jobPage.GetContent(
                "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(5) > td") +
                              jobPage.GetContent(
                "body > table:nth-child(3) > tbody > tr > td > table:nth-child(2) > tbody > tr:nth-child(4)");

            return(new Job
            {
                Company = company,
                Description = description,
                Salary = salary,
                Title = title,
                Id = id,
                Created = DateTime.Now
            });
        }
コード例 #31
0
ファイル: W3CCreator.cs プロジェクト: Wojdav/AngleSharp
        static void CreateCssSelectorTest(IBrowsingContext context, String url, List<String> methods)
        {
            Console.Write("Loading " + url + " ... ");
            var document = context.OpenAsync(url).Result;
            var title = Sanatize(document.GetElementsByTagName("title")[0].TextContent);
            var content = document.GetElementsByTagName("content")[0].InnerHtml.Trim().Replace("\"", "\"\"");
            var styling = document.GetElementsByTagName("css")[0].TextContent;
            var parser = new CssParser();
            var sheet = parser.ParseStylesheet(styling);
            var selectors = new StringBuilder();
            var i = 1;

            if (methods.Contains(title))
            {
                var ltr = 'A';

                while (methods.Contains(title + ltr))
                    ltr = (Char)(ltr + 1);

                title += ltr.ToString();
            }

            foreach (var rule in sheet.Rules)
            {
                if (rule is ICssStyleRule)
                {
                    selectors.Append(@"
	        var selectorINDEX = doc.QuerySelectorAll(""SELECTOR"");
	        Assert.AreEqual(0, selectorINDEX.Length);"
                .Replace("SELECTOR", ((ICssStyleRule)rule).SelectorText)
                .Replace("INDEX", i.ToString()));
                    i++;
                }
            }

            File.AppendAllText("test.cs", @"
        /// <summary>
        /// Test taken from URL
        /// </summary>
        public void TITLE()
        {
	        var source = @""HTML"";
	        var doc = DocumentBuilder.Html(source);
	        SELECTORS
        }
"
            .Replace("URL", url)
            .Replace("TITLE", title)
            .Replace("HTML", content)
            .Replace("SELECTORS", selectors.ToString())
            );
            Console.WriteLine("success.");
            methods.Add(title);
        }