public override async Task <ProductModel> GetProductDetailsAsync(string productUrl, string identifier = null) { var response = await _httpClient.GetAsync(productUrl); if (!response.IsSuccessStatusCode) { _logger.LogWarning($"StatusCode: {response.StatusCode}"); return(null); } var source = await response.Content.ReadAsStringAsync(); var document = await _browsingContext.OpenAsync(req => req.Content(source)); var productList = document.QuerySelector <IHtmlOrderedListElement>("ol.products-list"); if (productList == null) { _logger.LogWarning("No Product Found"); return(null); } var productSection = productList.QuerySelectorAll <IHtmlListItemElement>("li.item").FirstOrDefault(); if (productSection == null) { _logger.LogWarning("No Product Found"); return(null); } var product = _mapper.Map <Product>(productSection); return(product); }
public async Task <IEnumerable <(string name, string id, Uri cover)> > Search(string searchName) { var form = new FormUrlEncodedContent(new Dictionary <string, string> { { "recherche_critere", "f" }, { "recherche_valeur", searchName }, { "x", "0" }, { "y", "0" } }); var response = await httpClient.PostAsync( "http://warashi-asian-pornstars.fr/en/s-12/search", form ); var html = await response.Content.ReadAsStringAsync(); var doc = await context.OpenAsync(req => req.Content(html)); return(doc.QuerySelectorAll(".resultat-pornostar") .Select(n => { var name = NormalizeName(n.QuerySelector(".correspondance-lien")?.TextContent); var id = ExtractId(n.QuerySelector("a")?.GetAttribute("href")); var cover = "http://warashi-asian-pornstars.fr" + n.QuerySelector("img")?.GetAttribute("src"); return (name, id, new Uri(cover)); }).Where(n => String.Equals(NormalizeName(searchName), n.name) || String.Equals(NormalizeName(ReverseName(searchName)), n.name))); }
/// <summary>Searches for a video by jav code.</summary> /// <param name="searchCode">The jav code. Ex: ABP-001.</param> /// <returns>A list of every matched video.</returns> public static async Task <IEnumerable <VideoResult> > Search(string searchCode) { var response = await HttpClient.GetAsync($"https://www.r18.com/common/search/order=match/searchword={searchCode}").ConfigureAwait(false); var html = await response.Content.ReadAsStringAsync().ConfigureAwait(false); var doc = await Context.OpenAsync(req => req.Content(html)).ConfigureAwait(false); var videos = new List <VideoResult>(); foreach (var n in doc.QuerySelectorAll(".item-list")) { var code = n.QuerySelector("img")?.GetAttribute("alt"); var id = n.GetAttribute("data-content_id"); var cover = n.QuerySelector("img")?.GetAttribute("data-original"); if (code is not null && cover is not null) { videos.Add(new VideoResult { Code = code, Id = id, Cover = new Uri(cover), }); } } return(videos); }
public override async Task <ProductModel> GetProductDetailsAsync(string productUrl, string identifier = null) { var response = await _httpClient.GetAsync(productUrl, HttpCompletionOption.ResponseHeadersRead); if (!response.IsSuccessStatusCode) { _logger.LogWarning($"StatusCode: {response.StatusCode}"); return(null); } var source = await response.Content.ReadAsStringAsync(); var document = await _browsingContext.OpenAsync(req => req.Content(source)); var identifiers = identifier.Split("-"); var productId = identifiers[0]; var catEntryId = identifiers[1]; var inventoryData = await GetInventoryData(productId); inventoryData.TryGetValue(catEntryId, out var inventory); var product = _mapper.Map <Product>(Tuple.Create(document, new MapperData(productId, catEntryId, productUrl, inventory))); return(product); }
/// <summary> /// The ResolveFromMetaData. /// </summary> /// <param name="metaData">The metaData<see cref="AvMetaData"/>.</param> /// <returns>The <see cref="Task{AvData}"/>.</returns> public async Task <AvData> ResolveFromMetaData(AvMetaData metaData) { if (metaData == null) { return(null); } var data = await cacheProvider.GetDataAsync(metaData.Number); if (data != null) { return(data); } if (metaData == null || string.IsNullOrEmpty(metaData.WebSiteUrl)) { return(null); } var detailContent = await this.htmlContentReader.LoadFromUrlAsync(metaData.WebSiteUrl); if (detailContent == null) { return(null); } var document = await context.OpenAsync(req => req.Content(detailContent)); data = ResolveContent(document, metaData); if (data != null) { await cacheProvider.StoreDataAsync(data); } return(data); }
/// <summary> /// Opens a new document loaded from the specified request /// asynchronously in the given context. /// </summary> /// <param name="context">The browsing context to use.</param> /// <param name="request">The request to issue.</param> /// <param name="cancel">The cancellation token.</param> /// <returns>The task that creates the document.</returns> public static async Task <IDocument> OpenAsync(this IBrowsingContext context, DocumentRequest request, CancellationToken cancel) { if (request == null) { throw new ArgumentNullException(nameof(request)); } var loader = context.Loader; if (loader != null) { var download = loader.DownloadAsync(request); cancel.Register(download.Cancel); // Add a page cache in the browsing context var uri = request.Target.Href; if (context.ResponseCache != null && context.ResponseCache.ContainsKey(uri)) { var response = context.ResponseCache[uri]; return(await context.OpenAsync(response, cancel).ConfigureAwait(false)); } else { using (var response = await download.Task.ConfigureAwait(false)) { if (response != null) { return(await context.OpenAsync(response, cancel).ConfigureAwait(false)); } } } } return(await context.OpenNewAsync(request.Target.Href).ConfigureAwait(false)); }
private async Task <List <long> > GetModifiedGames(CancellationToken token) { DateTimeOffset lastScrapeStamp = DateTimeOffset.Now.Subtract(TimeSpan.FromDays(1)); logger.Info("Getting modified games since {time}", lastScrapeStamp); long epoch = lastScrapeStamp.ToUnixTimeSeconds(); Url url = Url.Create( $"https://api.steampowered.com/IStoreService/GetAppList/v1/?key={apiKey}&if_modified_since={epoch}&include_games=1"); DocumentRequest request = DocumentRequest.Get(url); IDocument response = await context.OpenAsync(request, token); if (response.StatusCode != HttpStatusCode.OK) { return(null); } string json = response.Body.Text(); SteamModifiedGamesData modifiedGamesData = JsonConvert.DeserializeObject <SteamModifiedGamesData>(json); List <long> appIds = modifiedGamesData.Response.Apps?.Where(x => x.PriceChangeNumber > 0) .Select(x => x.Appid) .ToList() ?? new List <long>(); return(appIds); }
public async Task <string> GetStores(string lat, string lng) { //AngleSharp套件及httpClient前置作業 //var config = Configuration.Default; //var context = BrowsingContext.New(config); HttpClient httpClient = new HttpClient(); httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"); //設定店家爬蟲的連結 string LocationUrl = $"https://www.foodpanda.com.tw/restaurants/lat/{lat}/lng/{lng}"; var StoreInfoResponseMessage = await httpClient.GetAsync(LocationUrl); var StoreInfoResult = StoreInfoResponseMessage.Content.ReadAsStringAsync().Result; var Store_document = await context.OpenAsync(res => res.Content(StoreInfoResult)); //設定要爬蟲的資訊 var StoreName = Store_document.QuerySelectorAll(".vendor-list-section .name"); var StoreUrl = Store_document.QuerySelectorAll(".vendor-list-section li a"); //新增搜尋結果List List <Store> result_stores = new List <Store>(); for (var S_count = 0; S_count < StoreName.Length; S_count++) { Store _Store = new Store(); _Store.Store_Name = StoreName[S_count].TextContent; _Store.Store_Url = "https://www.foodpanda.com.tw" + StoreUrl[S_count].GetAttribute("href"); result_stores.Add(_Store); } // return result_stores; var JSON_STORES = JsonConvert.SerializeObject(result_stores); return(JSON_STORES); }
public async Task <WikipediaTextResult> GetWikipediaTextFromString(string html) { var document = await _angleSharpContext.OpenAsync(req => req.Content(html)); WikipediaTextResult result = ParseHtml(document); return(result); }
public async Task <string> GetDownloadUrlFromArticlePage(string url) { var document = await _context.OpenAsync(url); var mp3Selector = "p.powerpress_links.powerpress_links_mp3 > a.powerpress_link_d"; var link = document.QuerySelector(mp3Selector); return(link?.GetAttribute("href")); }
public async Task <Quest> LoadQuest(string id) { var address = "http://www.runehq.com/guide.php?type=quest&id=" + id; var document = await _browsingContext.OpenAsync(address); var questContent = document.QuerySelectorAll(".content-body")[1]; return(ParseQuest((IHtmlElement)questContent)); }
/// <summary>Loads a specific JAV by url.</summary> /// <param name="url">The JAV url.</param> /// <returns>The parsed video, or null if no video at <c>url</c> exists.</returns> /// <example> /// <code> /// var client = new Javlibrary.Client(); /// var result = client.LoadVideo(new Url("http://www.javlibrary.com/en/?v=javlijazsu")); /// result.Id // javlijazsu /// result.Title // Fan Fan PRESTIGE Large Thanksgiving Soil And Shiro To Spree Yamakawa Blue Sky Meets Escalate! Basutsua ~ /// </code> /// </example> public static async Task <Video?> LoadVideo(Uri url) { var response = await HttpClient.GetAsync(url).ConfigureAwait(false); var html = await response.Content.ReadAsStringAsync().ConfigureAwait(false); var doc = await Context.OpenAsync(req => req.Content(html)).ConfigureAwait(false); return(ParseVideoPage(doc)); }
public async Task Run() { string res_str = ""; try { Log4Net.LogInfo($"正在抓取微博[{weibo_url}]的评论"); Uri Weibo_Uri = new Uri(weibo_url); res_str = await httpService.GetAsync(Weibo_Uri.PathAndQuery, cookies); if (!string.IsNullOrEmpty(res_str)) { var document = await context.OpenAsync(req => req.Content(res_str)); var scripts = document.Scripts; if (scripts.Length > 0) { var js = scripts.Where(x => x.InnerHtml.Contains(@"""ns"":""pl.content.weiboDetail.index""", StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); if (js != null) { var txt = js.TextContent; if (txt != null) { string json_str = txt.Replace("FM.view(", "").TrimEnd(')'); var json = JsonConvert.DeserializeObject <dynamic>(json_str); var temp_doc = await context.OpenAsync(req => req.Content(Convert.ToString(json.html))); var weibo_link_dom = temp_doc.QuerySelectorAll("*").Where(x => "feed_list_commentTabAll".Equals(x.GetAttribute("node-type"))); if (weibo_link_dom.Any()) { string weibo_link = weibo_link_dom.FirstOrDefault().GetAttribute("action-data"); if (!string.IsNullOrEmpty(weibo_link)) { string url = $"{Link(weibo_link)}"; Log4Net.LogInfo($"获得评论入口URL[{url}]"); Uri uri = new Uri(url); Log4Net.LogInfo($"开始抓取评论"); await CommentHandle(uri, weibo_link); //处理评论 } } } } } } } catch (Exception e) { Log4Net.ErrorInfo($"处理微博[{weibo_url}]异常", e); Log4Net.LogInfo($"本次异常的字符串:{res_str}"); } exit = true; }
public static async Task <IDocument> OpenPageAsync(this IBrowsingContext browsingContext, string path) { var z = IsLocalPath(path); if (IsLocalPath(path)) { return(await browsingContext.OpenAsync(req => req.Content(File.ReadAllText(path)))); } return(await browsingContext.OpenAsync(path)); }
/// <summary> /// 取得HtmlDocument內的element資訊 /// </summary> /// <param name="str_Document">HtmlDocument</param> /// <param name="QuerySelectorAll">Html的element選擇條件</param> /// <returns></returns> public static async Task <List <KeyValuePair <string, string> > > GetHtmlDocument(string str_Document, string QuerySelectorAll) { List <KeyValuePair <string, string> > input = new List <KeyValuePair <string, string> >(); var _document = await context.OpenAsync(res => res.Content(str_Document)); if (_document != null) { var contents = _document.QuerySelectorAll(QuerySelectorAll); if (contents != null) { if (QuerySelectorAll.ToLower().Contains("form input")) { foreach (var item in contents) { var name = item.GetAttribute("name"); var value = item.GetAttribute("value"); value = string.IsNullOrEmpty(value) ? string.Empty : value; input.Add(new KeyValuePair <string, string>(name, value)); } } else { foreach (var item in contents) { AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> option = item.QuerySelectorAll("option:checked"); if (option.Count() > 0) { var name = item.GetAttribute("name"); var value = option.FirstOrDefault().GetAttribute("value"); value = string.IsNullOrEmpty(value) ? string.Empty : value; input.Add(new KeyValuePair <string, string>(name, value)); } else { option = item.QuerySelectorAll("option"); if (option.Count() > 0) { var name = item.GetAttribute("name"); var value = option.FirstOrDefault().GetAttribute("value"); value = string.IsNullOrEmpty(value) ? string.Empty : value; input.Add(new KeyValuePair <string, string>(name, value)); } } } } } } return(input); }
public async Task <List <MenuItem> > Scrape(string url) { var document = await context.OpenAsync(url); var list = new List <MenuItem>(); var menuSections = document.QuerySelectorAll(parserConfig.MenuSectionRoot); var menuDescription = document.QuerySelector(parserConfig.MenuDescriptionPath).TextContent .Replace("<br>", "", StringComparison.Ordinal); var menuTitle = document.QuerySelector(parserConfig.MenuTitlePath).TextContent; foreach (var menuSection in menuSections) { var menuSectionTitle = menuSection.QuerySelector("span").TextContent; var menuSectionId = menuSection.Attributes["href"].Value.Substring(1); var dishesPerSection = document.QuerySelectorAll($"div[id='{menuSectionId}']"); foreach (var dishPerSection in dishesPerSection) { var dishes = dishPerSection.QuerySelectorAll($"{parserConfig.DishRootPath}"); foreach (var dish in dishes) { var dishDetailsUrl = ((IHtmlAnchorElement)dish.QuerySelector("a")).Href; Log.Logger.Debug($"dishDetailsUrl {dishDetailsUrl}"); var dishDetailsPage = await context.OpenAsync(dishDetailsUrl); var dishDescription = dishDetailsPage.QuerySelector(parserConfig.DishDescriptionPath).TextContent; var dishHtml = dish.QuerySelector(parserConfig.DishNamePath).InnerHtml; var dishName = dishHtml.Substring(dishHtml.IndexOf(SPAN, StringComparison.Ordinal) + SPAN.Length + 1); var menuItem = new MenuItem(); menuItem.MenuTitle = menuTitle; menuItem.MenuSectionTitle = menuSectionTitle; menuItem.DishName = dishName; menuItem.MenuDescription = menuDescription; menuItem.DishDescription = dishDescription; list.Add(menuItem); } } } return(list); }
private async Task <bool> DetectRegionSpecificAsync(Region r) { string url = $"http://playoverwatch.com/en-gb/career/pc/{r}/{battletagUrlFriendly}"; var rslt = await browsingContext.OpenAsync(url); if (rslt.StatusCode == (System.Net.HttpStatusCode) 200) { Region = r; userPage = rslt; ProfileURL = url; return(true); } return(false); }
public async Task GetPalicoSkills(string address) { var page = await context.OpenAsync(address); await db.CreateTableAsync <PalicoSkill>(); List <PalicoSkill> skills = new List <PalicoSkill>(); foreach (var tr in page.QuerySelector(".table").QuerySelectorAll("tr").Skip(1)) { skills.Add(GetPalicoSkill(tr)); } await db.InsertAllAsync(skills); }
public async Task <List <Beer> > GetBeers() { var document = await _browsingContext.OpenAsync(_address); var cellSelector = "ul.menu-section-list"; var element = document.QuerySelector(cellSelector); var beersResponse = new List <Beer>(); beersResponse.AddRange(GrabBeerFromHtml(element)); beersResponse.AddRange(await GrabLastBeersFromShowMore(document.ToHtml())); return(beersResponse); }
public async Task <TagFullDto> GetFullTagInfoByName(string tagName) { try { var mainDocument = await _context.OpenAsync(_baseUrl + $"/tag/{tagName}"); var archiveDocument = await _context.OpenAsync(_baseUrl + $"/tag/{tagName}/archive"); return(await GetFullTag(mainDocument, archiveDocument)); } catch (Exception e) { throw new Exception($"Error during getting data from 'medium.com' by tag '{tagName}':\r\n {e}"); } }
private async Task ExtrairDadosPagina(List <ResultadoBusca> resultados) { // Load default configuration var config = Configuration.Default.WithDefaultLoader(); // Create a new browsing context _browsingContext = BrowsingContext.New(config); //Encontra cada link de produto na tela principal de busca var gridProdutos = await _browsingContext.OpenAsync(_template.UrlInicial); //Encontras as TAGS com a url para a o grid de produtos por categoria var categorias = gridProdutos.QuerySelectorAll(_template.SeletorMenuCategorias); var buscaId = 0; foreach (var categoria in categorias) { var urlGridCategoria = _template.UrlSite + categoria.GetAttribute("href"); //Categoria está registrada no menu de grid de produtos, não foi encontrada //dentro da página de detalhes do produto, //padrão semelhante foi visto nos outros 3 sites var nomeCategoria = categoria.QuerySelector(_template.SelectorCategoria).InnerHtml.Trim(); await ExtrairDadosPorCategoria(urlGridCategoria, nomeCategoria, resultados); buscaId = await _busca.PersistirBusca(buscaId, resultados); resultados.Clear(); } await _busca.ConsolidarBusca(buscaId); }
/// <summary> /// Generates an Excel file from the returned string from the supplied URI and returns the byte array of the file data. /// </summary> /// <param name="uri">URI to download the HTML string from. Will throw an error if the server cannot be reached, there are more than one tables or if a table cannot be found.</param> /// <returns>Byte array of the Excel file data.</returns> public byte[] FromUri(Uri uri) { IBrowsingContext context = BrowsingContext.New(Configuration.Default); var document = context.OpenAsync(uri.ToString()).Result; return(ProcessDocument(document.DocumentElement)); }
protected async Task <List <DocumentViewModel> > ParseHTML(string source) { IConfiguration config = Configuration.Default.WithDefaultLoader(); IBrowsingContext context = BrowsingContext.New(config); IDocument dom = await context.OpenAsync(source); if (dom == null) { throw new WebScrappingException("The website might not exist"); } List <DocumentViewModel> documents = dom.All .Where(element => !string.IsNullOrEmpty(element.TextContent)) .Where(element => !IsAncestorLink(element)) .Select(element => new DocumentViewModel { textContent = element.TextContent, name = element.LocalName, isAncestorLink = IsAncestorLink(element), }) .Where((DocumentViewModel document) => document.name == "p") .Where(document => IsSentenceMoreThanNWords(document.textContent, 4)) .ToList(); documents.ForEach(document => document.textContent.Replace("\r\n", "").Replace("\r", "")); documents = documents.GroupBy(document => document.textContent) .Select(grp => grp.First()) .Take(20) .ToList(); return(documents); }
private static List <string> GetPhoneNumbers(string tempId, IBrowsingContext brContext) { var result = new List <string>(); var regexNumberPattern = "\"value\":\"(.*)\""; var numberLinkPattern = "http://www.olx.ua/ajax/misc/contact/phone/{0}/white/"; var numberTask = brContext.OpenAsync(string.Format(numberLinkPattern, tempId)); var docNumber = numberTask.Result; var values = Regex.Match(docNumber.Body.InnerHtml, regexNumberPattern); if (values.Groups.Count > 1) { var numbersString = values.Groups[1].Value; if (numbersString.Contains("span")) { var parser = new HtmlParser(); numbersString = numbersString.Replace("<", "<").Replace(">", ">").Replace("\\\"", "\"").Replace("\\/", "/"); var document = parser.Parse(numbersString); var numbers = document.QuerySelectorAll(".block").Select(s => PrepareNumber(s.InnerHtml)); result.AddRange(numbers); } else { if (numbersString != string.Empty) { result.Add(PrepareNumber(numbersString)); } } } else { System.Console.WriteLine("No number"); } return(result); }
public async Task <OneNewsVM> GetOneNews(OneNews oneNews, IMapper mapper) { HttpClient httpClient = new HttpClient(); HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(oneNews.Url); string source = await httpResponseMessage.Content.ReadAsStringAsync(); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(req => req.Content(source)); IElement[] textItems = document.All.Where(m => (m.LocalName == "p" && m.ClassList.Contains("box-paragraph__text")) || (m.LocalName == "h2" && m.ClassList.Contains("box-paragraph__subtitle")) || (m.LocalName == "b" && m.ClassList.Contains("box-paragraph__text"))).ToArray(); OneNewsVM oneNewsViewModel = mapper.Map <OneNewsVM>(oneNews); foreach (IElement item in textItems) { HtmlElement htmlElement = new HtmlElement() { Name = item.LocalName, Text = item.Text() }; oneNewsViewModel.HtmlElements.Add(htmlElement); } return(oneNewsViewModel); }
/// <summary> /// Opens a new document loaded from the specified request /// asynchronously in the given context. /// </summary> /// <param name="context">The browsing context to use.</param> /// <param name="request">The request to issue.</param> /// <param name="cancel">The cancellation token.</param> /// <returns>The task that creates the document.</returns> public static async Task <IDocument> OpenAsync(this IBrowsingContext context, DocumentRequest request, CancellationToken cancel) { if (request == null) { throw new ArgumentNullException("request"); } var loader = context.Loader; if (loader != null) { var download = loader.DownloadAsync(request); cancel.Register(download.Cancel); using (var response = await download.Task.ConfigureAwait(false)) { if (response != null) { return(await context.OpenAsync(response, cancel).ConfigureAwait(false)); } } } return(await context.OpenNewAsync(request.Target.Href).ConfigureAwait(false)); }
private async Task <IDocument> BrowseAsync(string url, CancellationToken cancellationToken) { IDocument document; if (caches.ContainsKey(url)) { document = caches[url]; } else { document = await browsingContext.OpenAsync(url, cancellationToken); // TODO: remove idle documents if (caches.Count > 100) { Console.WriteLine("Cache reaches maximum limit, resetting..."); caches = new Dictionary <string, IDocument>(); } else { caches.Add(url, document); } } return(document); }
private void Parse(string html, HtmlNode parent = null) { // https://regexr.com/4hr80 if you want the regex I ingeniously created IBrowsingContext context = BrowsingContext.New(Configuration.Default); IDocument doc = context.OpenAsync(req => req.Content(html)).Result; foreach (var el in doc.All) { HtmlNode node = new HtmlNode(); node.SetAttributes(el.Attributes); node.Tag = el.TagName; node.TagBody = el.Text(); Elements.Add(node); } HtmlNode root = new HtmlNode(); root.Tag = doc.Body.TagName; root.TagBody = doc.Body.Text(); PopulateChildren(doc.Body, root); Root = root; }
public async Task <IEnumerable <BvbStock> > ScrapeIndexdComposition(string index) { var indexUrl = string.Format(INDEX_COMPOSITION_URL_FORMAT, index); var document = await _browsingContext.OpenAsync(indexUrl); var stockRows = document .QuerySelectorAll("table#gvC tbody tr") .OfType <IHtmlTableRowElement>(); var stocks = new List <BvbStock>(); #pragma warning disable S3267 // Loops should be simplified with "LINQ" expressions foreach (IHtmlTableRowElement row in stockRows) { var symbol = row.Cells[0].QuerySelector("a")?.TextContent; if (symbol == null) { continue; } stocks.Add(new BvbStock { Symbol = symbol, Name = row.Cells[1].TextContent, Price = decimal.Parse(row.Cells[3].TextContent), Weight = decimal.Parse(row.Cells[7].TextContent) / 100 }); } #pragma warning restore S3267 // Loops should be simplified with "LINQ" expressions return(stocks.ToArray()); }
public Job ScrapeJob(string url) { var jobPage = _context.OpenAsync(url).Result; var id = long.Parse(url.Substring(url.LastIndexOf("/", StringComparison.Ordinal) + 1)); var salary = jobPage.GetContent( "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(3) > td > span"); var title = jobPage.GetContent( "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(2) > td > b"); var company = jobPage.GetContent( "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(2) > td > a"); var description = jobPage.GetContent( "body > table:nth-child(3) > tbody > tr > td > table > tbody > tr:nth-child(5) > td") + jobPage.GetContent( "body > table:nth-child(3) > tbody > tr > td > table:nth-child(2) > tbody > tr:nth-child(4)"); return(new Job { Company = company, Description = description, Salary = salary, Title = title, Id = id, Created = DateTime.Now }); }
static void CreateCssSelectorTest(IBrowsingContext context, String url, List<String> methods) { Console.Write("Loading " + url + " ... "); var document = context.OpenAsync(url).Result; var title = Sanatize(document.GetElementsByTagName("title")[0].TextContent); var content = document.GetElementsByTagName("content")[0].InnerHtml.Trim().Replace("\"", "\"\""); var styling = document.GetElementsByTagName("css")[0].TextContent; var parser = new CssParser(); var sheet = parser.ParseStylesheet(styling); var selectors = new StringBuilder(); var i = 1; if (methods.Contains(title)) { var ltr = 'A'; while (methods.Contains(title + ltr)) ltr = (Char)(ltr + 1); title += ltr.ToString(); } foreach (var rule in sheet.Rules) { if (rule is ICssStyleRule) { selectors.Append(@" var selectorINDEX = doc.QuerySelectorAll(""SELECTOR""); Assert.AreEqual(0, selectorINDEX.Length);" .Replace("SELECTOR", ((ICssStyleRule)rule).SelectorText) .Replace("INDEX", i.ToString())); i++; } } File.AppendAllText("test.cs", @" /// <summary> /// Test taken from URL /// </summary> public void TITLE() { var source = @""HTML""; var doc = DocumentBuilder.Html(source); SELECTORS } " .Replace("URL", url) .Replace("TITLE", title) .Replace("HTML", content) .Replace("SELECTORS", selectors.ToString()) ); Console.WriteLine("success."); methods.Add(title); }