private async Task<HtmlNode> LoadWebsitesDocumentNode(string url) { HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument website = null; try { website = await htmlWeb.LoadFromWebAsync(url , "gsgschueler" , "vp2015_01"); } catch(HtmlAgilityPack.HtmlWebException ex) { // await new Windows.UI.Popups.MessageDialog(ex.Message).ShowAsync(); } HtmlNode root = website?.DocumentNode; return root; }
async void getData(String url, Mode mode) { fragmentLoad.Visibility = Visibility.Visible; if (mode == Mode.Refresh) { listNovels.Clear(); } HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); try { htmlDoc = await htmlWeb.LoadFromWebAsync(url); HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//ul[@class='homeListstory']"); HtmlNodeCollection _mainNode = _nod.SelectNodes("li"); foreach (var node in _mainNode) { String name = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("title", null); String imgUrl = node.SelectSingleNode("a").SelectSingleNode("img").GetAttributeValue("src", null); String mainUrl = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("href", null); listNovels.Add(new Novel(name, imgUrl, mainUrl)); } fragmentGridView.lvHomePage.ItemsSource = listNovels; currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText); } catch (Exception) { MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau"); md.ShowAsync(); } finally { fragmentLoad.Visibility = Visibility.Collapsed; } }
async public Task <List <VN> > GetVNSearch(string searchString) { List <VN> result = new List <VN>(); string url = "https://vndb.org/v/all?sq=" + searchString; //await VNDB.sem.WaitAsync(); try { var web = new HtmlAgilityPack.HtmlWeb(); web.CaptureRedirect = true; HtmlDocument doc = await web.LoadFromWebAsync(url); var searchList = doc.DocumentNode.SelectNodes("//table[@class='stripe']/tr"); foreach (var search in searchList) { string id = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("href", ""); string japName = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("title", ""); string engName = search.SelectSingleNode("./td[@class='tc1']").InnerText; string date = search.SelectSingleNode("./td[@class='tc4']").InnerText; VN vn = new VN(ExtractId(id), new Name(engName, japName), date); result.Add(vn); } } catch (Exception e) { } //VNDB.sem.Release(); return(result); }
public async Task <Models.rominfo> getrominfo(string link) { var doc2 = new HtmlAgilityPack.HtmlWeb(); /////////////se busca la pagina de info de el rom var htmlDoc2 = await doc2.LoadFromWebAsync(link); //////////////////esta pagina si es valida no puede contener 404 ya que el response llega ok pero no tiene nada en las tablas //////////////////lo cual podria provocar futuros crashes if (!htmlDoc2.Text.Contains("404 Page Not Found")) { //////////////se selecciona el 2do div de la pagina var nodelo = htmlDoc2.DocumentNode.SelectNodes("//div")[1]; var klowa = nodelo.SelectNodes("//*[contains(@class,'table table-striped rom-info')]").Where(aaxx => aaxx.Name == "table").First().ChildNodes.Where(aaxx => aaxx.Name == "tbody").First(); ////////////dentro de este se obtiene un inner text de una tabla que hay dentro de ese div el cual contiene la info de el rom var listaelementos = desencriptar(klowa.InnerText).Split(new[] { "^^^???**//" }, StringSplitOptions.None); Models.rominfo info = new Models.rominfo(); /////////////////////////////se busca directamente el elemento rom-link por su ide y se le agregan un par de cosas para hacerlo spliteable info.linkdescarga = htmlDoc2.GetElementbyId("rom-link").Attributes["href"].Value.Replace("&", "").Replace("&", "").Replace("token=", "&token=").Replace("id=", "&id=").Replace("name=", "&name="); ///////////////////////aqui se trata de buscar el id de el rom dentro de 2 parametros los cuales estan de la sig manera ///////////////////////&id=<id>&token=<token> info.id = info.linkdescarga.Split(new[] { "&id=" }, StringSplitOptions.None)[1].Split(new[] { "&token=" }, StringSplitOptions.None)[0].Replace("&", ""); //////////////////////////con los datos "desencriptados" se le agregan a la instancia de la clase de modelo info.nombre = listaelementos[0]; info.size = listaelementos[1]; info.region = listaelementos[2]; info.consola = listaelementos[3]; /////////////////////////se busca entre hijos la imagen y luego se ele extrae su href var imagen = nodelo.SelectNodes("//*[contains(@class,'product__img')]").Where(aaxx => aaxx.Name == "img"); info.imagen = imagen.First().Attributes["src"].Value; ////////////////////aqui se le extrae el info de descargas y votos si estos son existentes por eso estan dentro de un try catch try { info.descargas = listaelementos[4]; info.votos = nodelo.ChildNodes[2].ChildNodes[0].ChildNodes[1].ChildNodes[1].ChildNodes[1].ChildNodes[0].ChildNodes[0].InnerText.Replace("Out of", " De "); } catch { /////////////si no los encuentra se le ponen valores por defecto info.descargas = "0"; info.votos = "0 de 5"; } // info.votos= return(info); } else { return(new Models.rominfo()); } }
public async Task<All> GetAllSet(){ HtmlWeb webClient = new HtmlWeb(); //国际名校公开课 http://open.163.com/ocw/ HtmlDocument docNode = await webClient.LoadFromWebAsync("http://localhost:8080/OpenCourse163Test.html");// System.Diagnostics.Debug.WriteLine("after LoadFromWebAsync"); HtmlNode node = docNode.DocumentNode; // 1.<div class="m-t-bg">...</div> List<Catalogue> catalogues = this.GetCatalogueList(node); //获取一级标题列表 HashSet<NewCatalogue> newCatalogues = new HashSet<NewCatalogue>(); // Guid g = new Guid(); //获取二级标题列表 HashSet<NewCourseType> newCourseTypes = new HashSet<NewCourseType>(); //获取课程列表 HashSet<NewCourse> newCourses = new HashSet<NewCourse>(); foreach (var catalogue in catalogues) { //一级 NewCatalogue nc = new NewCatalogue { ID = catalogue.CatalogueTitle.GetHashCode(), Title = catalogue.CatalogueTitle }; newCatalogues.Add(nc); //二级 CourseType ct = catalogue.CourseTypes; NewCourseType nct = new NewCourseType { ID = ct.CourseTypeId.GetHashCode(), Title = ct.CourseTypeTitle, Catalogue = nc }; newCourseTypes.Add(nct); //课程列表 foreach (var oc in catalogue.CourseTypes.OCourses) { NewCourse newCourse = new NewCourse { CourseTitle = oc.CourseTitle, CourseHrefUrl = oc.CourseHrefUrl, CourseImgUrl = oc.CourseImgUrl, CourseUpdataProgress = oc.CourseUpdataProgress, CourseType = nct }; newCourses.Add(newCourse); } } return new All { NewCatalogueSet = newCatalogues , newCourseTypeSet = newCourseTypes, NewCourseSet = newCourses }; }
public void ReadSite() { HtmlWeb webClient = new HtmlWeb(); //国际名校公开课 http://open.163.com/ocw/ Task<HtmlDocument> doc = webClient.LoadFromWebAsync("http://localhost:8080/OpenCourse163Test.html");//http://open.163.com/ocw/ HtmlDocument docNode =doc.Result; HtmlNode node = docNode.DocumentNode; // 1.<div class="m-t-bg">...</div> List<Catalogue> catalogues=GetCatalogueList(node); printCatalogues(catalogues); //断言 Assert.AreEqual(18,catalogues.Count()); }
/// <summary> /// Load content fisrt and replace a tag to javascript to raise event when user click to a link /// because WebView in Windows 8 is not support event Navigating /// </summary> /// <param name="url"></param> async void GoTo(string url) { HtmlWeb htmlWeb = new HtmlWeb(); // Using HtmlDocument to parse content to xml linq formart HtmlDocument doc = await htmlWeb.LoadFromWebAsync(url); foreach (HtmlNode link in doc.DocumentNode.Descendants("a")) { HtmlAttribute att = link.Attributes["href"]; att.Value = FixLink(att); } //WebViewMain.NavigateToString(doc.DocumentNode.OuterHtml); WebViewMain.Navigate(new Uri(url)); WebViewMain.LoadCompleted += WebViewMain_LoadCompleted; }
public async Task <Dictionary <string, Celebrity> > CreateDataFile() { string jsonFilePath = config["Celebrities:Path"]; string url = config["Celebrities:Url"]; Dictionary <string, Celebrity> celebrityDic = new Dictionary <string, Celebrity>(); Celebrity celebrity = null; HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); HtmlNode htmlNode = null; HtmlNode item = null; string description; var nodes = doc.DocumentNode.SelectNodes("//div[@class='lister-item mode-detail']").ToList(); // for (int i = 0; i < nodes.Count; i++) { item = nodes[i]; celebrity = new Celebrity { Id = Guid.NewGuid().ToString() }; celebrity.Name = item.SelectNodes("//h3[@class='lister-item-header']").FirstOrDefault().ChildNodes["a"].InnerText.Trim(); htmlNode = item.SelectNodes("//p[@class='text-muted text-small']").FirstOrDefault(); description = htmlNode.NextSibling.NextSibling.InnerText.ToLower(); celebrity.Profession = htmlNode.InnerText.Split(new string[] { "\r\n", "|" }, StringSplitOptions.RemoveEmptyEntries)[0].Trim(); SetGender(celebrity, description); SetBirthday(celebrity, description); celebrity.ImageUrl = item.SelectNodes("//div[@class='lister-item-image']").FirstOrDefault().ChildNodes["a"].ChildNodes["img"].Attributes["src"].Value; celebrityDic.Add(celebrity.Id, celebrity); item.Remove(); //correct document, error is found. } await System.IO.File.WriteAllTextAsync(jsonFilePath, JsonConvert.SerializeObject(celebrityDic)); return(celebrityDic); }
async void getData(String url, Mode mode) { fragmentLoad.Visibility = Visibility.Visible; HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); try { htmlDoc = await htmlWeb.LoadFromWebAsync(url); if (mode == Mode.Refresh) { chapter.Clear(); getInfo(htmlDoc); getSummary(htmlDoc); } HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//table[@class='table table-striped']"); HtmlNodeCollection _mainNode = _nod.SelectNodes("tr"); foreach (HtmlNode node in _mainNode) { HtmlNode chap = node.SelectSingleNode("td[2]"); HtmlNode n = node.SelectSingleNode("td[3]"); if (n != null) { String displayName = chap.SelectSingleNode("strong").InnerText + " : " + n.SelectSingleNode("a").InnerText; String chapterUrl = n.SelectSingleNode("a").GetAttributeValue("href", null); chapter.Add(new Chapter(displayName, chapterUrl)); } } currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText); lvChapter.ItemsSource = chapter; checkNextPage(htmlDoc); } catch (Exception) { MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau"); md.ShowAsync(); } finally { fragmentLoad.Visibility = Visibility.Collapsed; } }
public static async Task<Incident> GetIncidentDetailsAsync(int incidentId) { if (incidentId < 0) { throw new ArgumentOutOfRangeException("incidentId"); } // Download the webpage string url = String.Format(GetIncidentDetailsUrl, incidentId); HtmlWeb web = new HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); string name = ""; Dictionary<string, string> properties = null; // Find incident name foreach (HtmlNode headerNode in doc.DocumentNode.Descendants("h3")) { if (headerNode.HasAttributes && headerNode.Attributes.Contains("class") && headerNode.Attributes["class"].Value == "incident_h3") { name = headerNode.InnerText; break; } } // Find incident properties foreach (HtmlNode tableNode in doc.DocumentNode.Descendants("table")) { if (tableNode.HasAttributes && tableNode.Attributes.Contains("class") && tableNode.Attributes["class"].Value == "incident_table") { properties = ParseIncidentTable(tableNode); break; } } return new Incident(incidentId, name, properties); }
/// <summary> /// Retreives JSON Data from Mashie by parsing the HTML. /// Because of Mashie not having an public API I had to make my own workaround. /// </summary> /// <returns>JSON String with the data.</returns> private static async Task<string> GetJsonDataFromMashieAsync() { //Url to page to parse. const string urlToFetchFrom = @"https://mpi.mashie.eu/public/menu/motala+kommun/af77367d"; var webClient = new HtmlWeb(); //download HTML from url. HtmlDocument document = await webClient.LoadFromWebAsync(urlToFetchFrom); //find root of HTML document. HtmlNode root = document.DocumentNode; //find all script nodes. var nodes = root.Descendants("script"); //find the node that contains the "weekData" string. foreach (string fixedScript in from node in nodes where node.InnerHtml.Contains("weekData") select node.InnerHtml.Substring(20)) { return fixedScript; } //if the parse failed return "Couldn't find Json data from Mashie"; }
async public Task <string> GetCharacterImageSrc(int id) { string url = "http://vndb.org/c" + id; await VNDB.sem.WaitAsync(); // try { var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); VNDB.sem.Release(); var profile = doc.DocumentNode.SelectSingleNode("//div[@class='charimg']/img"); if (profile == null) { return(null); } string src = profile.GetAttributeValue("src", ""); return(src); } return(null); }
public async Task <List <Models.romsinfos> > getwebdata(string consola, int paginas, bool portadashd) { List <Models.romsinfos> listaroms = new List <Models.romsinfos>(); var doc = new HtmlAgilityPack.HtmlWeb(); for (int i = 0; i < paginas; i++) { HtmlDocument htmlDoc; ///////////////// si la pagina es 0 o 1 se busca no se le agrega el subdirectorio page if (i == 0 || i == 1) { /// se descarga la web y se genera un objeto de la clase htmldocument htmlDoc = await doc.LoadFromWebAsync("https://emulator.games/roms/" + consola + "/"); } else { /// se descarga la web y se genera un objeto de la clase htmldocument htmlDoc = await doc.LoadFromWebAsync("https://emulator.games/roms/" + consola + "/page/" + i + "/"); } if (!htmlDoc.Text.Contains("404 Page Not Found")) { //se busca la primera tabla existente en la pagina la cual contiene el info de todos los roms de esa pagina var node = htmlDoc.DocumentNode.SelectSingleNode("//table"); int numerillo = 0; //una vez se tiene la tabla se buscan los tr(table row) los cuales son las filas que contiene esa tabla var nodu = node.ChildNodes["tbody"].SelectNodes("//tr"); foreach (var nodel in nodu) { if (numerillo > 0) { ////////////////////////cuando no hay una criteria valida el primer elemento de la table siempre dira esto if (!nodel.InnerText.ToLower().Contains("search term not found")) { ///se busca entre los datos de el documento la informacion de los roms existentes para la consola seleccionada ///y crea un objeto para luego agregarlo al array Models.romsinfos elemento = new Models.romsinfos(); /////////////////////////a partir de aqui se navega entre hijos de los elementos para poder asi conseguir la info de ellos elemento.nombre = nodel.ChildNodes[0].InnerText; ////////////si la portada no es hd pondra la de default si no se buscara una hd en el server if (!portadashd) { elemento.imagen = nodel.ChildNodes[0].ChildNodes[0].ChildNodes[0].Attributes["src"].Value; } else { elemento.imagen = nodel.ChildNodes[0].ChildNodes[0].ChildNodes[0].Attributes["src"].Value.Replace("thumbnails/", ""); } elemento.link = nodel.ChildNodes[0].ChildNodes[0].Attributes["href"].Value; elemento.descargas = nodel.ChildNodes[1].InnerText; listaroms.Add(elemento); numerillo++; } } else { numerillo++; } } } } return(listaroms); }
/// <summary> /// Removing unused elements on html content /// </summary> /// <param name="url"></param> /// <param name="web"></param> /// <param name="feedItem"></param> /// <returns></returns> public async Task<string> RemoveUnusedElementsAsync(string url) { HtmlWeb web = new HtmlWeb(); var document = await web.LoadFromWebAsync(url); document.DocumentNode.Descendants().Where(x => x.Id == "fb-root").ToList().ForEach(x => x.Remove()); document.DocumentNode.Descendants().Where(x => x.Id == "header").ToList().ForEach(x => x.Remove()); document.DocumentNode .Descendants( "div") .FirstOrDefault( d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("section-head")).Remove(); document.DocumentNode .Descendants( "div") .FirstOrDefault( d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("container inner")) .Descendants("div").First().Remove(); document.DocumentNode .Descendants( "div") .FirstOrDefault( d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("meta"))?.Remove(); document.DocumentNode .Descendants( "div") .FirstOrDefault( d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("ssba"))?.Remove(); document.DocumentNode.Descendants().Where(x => x.Id == "jp-relatedposts").ToList().ForEach(x => x?.Remove()); document.DocumentNode.Descendants("aside").ToList().ForEach(x => x.Remove()); document.DocumentNode.Descendants("footer").ToList().ForEach(x => x.Remove()); // Modify some elements return document.DocumentNode.OuterHtml; }
public async void DownloadPage() { var webGet = new HtmlWeb(); var document = await webGet.LoadFromWebAsync(_url); DownloadedPage(document); }
public async Task <VN> GetMoreVNInfo(VN vn) { string url = "https://vndb.org/v" + vn.id; await VNDB.sem.WaitAsync(); try { var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); var details = doc.DocumentNode.SelectNodes("//div[@class='vndetails']/table/tr"); foreach (var detail in details) { var key = detail.SelectSingleNode("./td").InnerText; if (key == "Length") { vn.playTIme = detail.SelectNodes("./td")[1].InnerText; } else if (key == "Developer") { vn.developer = detail.SelectNodes("./td")[1].InnerText; } } var imagePath = doc.DocumentNode.SelectSingleNode("//div[@class='vnimg']//img"); if (imagePath != null) { vn.image = imagePath.GetAttributeValue("src", ""); } var characterList = doc.DocumentNode.SelectNodes("//div[contains(@class,'charsum_bubble')]"); foreach (var character in characterList) { int id = ExtractId(character.SelectSingleNode("./div[@class='name']/a").GetAttributeValue("href", "")); string engName = character.SelectSingleNode("./div[@class='name']/a").InnerText; string japName = character.SelectSingleNode("./div[@class='name']/a").GetAttributeValue("title", ""); Character _character = new Character(id, new Name(engName, japName)); if (character.SelectSingleNode("./div[@class='actor']") != null) { int voiceId = ExtractId(character.SelectSingleNode("./div[@class='actor']/a").GetAttributeValue("href", "")); string voiceJapName = character.SelectSingleNode("./div[@class='actor']/a").GetAttributeValue("title", ""); string voiceEngName = character.SelectSingleNode("./div[@class='actor']/a").InnerText; _character.voice = new Voice(voiceId, new Name(voiceEngName, voiceJapName)); if (voices.ContainsKey(voiceId)) { _character.voice.Nickname = voices[voiceId]; } } vn.characters.Add(_character); } } catch (Exception e) { } VNDB.sem.Release(); return(vn); }
async public Task <Voice> GetVoice(int id) { Voice voice = new Voice(id); string url = "https://vndb.org/s" + id; await VNDB.sem.WaitAsync(); try { var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); var profile = doc.DocumentNode.SelectSingleNode("//body/div[@id='maincontent']/div[@class='mainbox staffpage']"); string engName; string japName; engName = profile.SelectSingleNode("./h1").InnerText; japName = profile.SelectSingleNode("./h2").InnerText; Name name = new Name(engName, japName); voice.PushName(name); /*var aliases = profile.SelectNodes(".//tr[@class='nostripe']"); * * foreach (var alias in aliases) * { * var elem = alias.SelectNodes("./td"); * engName = elem[0].InnerText; * japName = elem[1].InnerText; * name = new Name(engName, japName); * voice.PushName(name); * }*/ var vns = doc.DocumentNode.SelectNodes("//div[@class='mainbox browse staffroles']/table/tr"); foreach (var vn in vns) { var characterTag = vn.SelectSingleNode("./td[@class='tc3']/a"); if (characterTag == null) { continue; } var date = vn.SelectSingleNode("./td[@class='tc2']").InnerText; var titleTag = vn.SelectSingleNode("./td[@class='tc1']/a"); int characterId = ExtractId(characterTag.GetAttributeValue("href", "")); japName = characterTag.GetAttributeValue("title", ""); engName = characterTag.InnerText; var character = new Character(characterId, new Name(engName, japName)); voice.PushCharacter(character); int vnId = ExtractId(titleTag.GetAttributeValue("href", "")); japName = titleTag.GetAttributeValue("title", ""); engName = titleTag.InnerText; name = new Name(engName, japName); string color = "red"; if (File.Exists(CreateCachePath(vnId, characterId))) { color = "green"; } var _vn = new VN(vnId, name, date); _vn.characters.Add(character); _vn.Color = color; voice.PushVN(_vn); } voice.vns.Reverse(); } catch (Exception e) { AddLog(e.Message); VNDB.sem.Release(); await Task.Delay(1000); return(await GetVoice(id)); } VNDB.sem.Release(); return(voice); }
public async Task<ViewModel.NewsItem> CreateItem(ViewModel.NewsItem result, SyndicationItem item) { result.URIToSource = item.Id; if (!string.IsNullOrEmpty(result.ContentRaw)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(result.ContentRaw); foreach (HtmlNode link in doc.DocumentNode.Descendants("img")) { result.Image = link.Attributes["src"].Value; break; } result.Content = HtmlEntity.DeEntitize(doc.DocumentNode.InnerText); result.ContentRaw = null; } HtmlDocument site = null; HtmlWeb web = new HtmlWeb(); try { site = await web.LoadFromWebAsync(result.URIToSource); } catch { } if (site != null) { var contentNode = site.DocumentNode.Descendants("div").Where(A => A.Attributes["class"]?.Value?.Contains("formatted") ?? false).FirstOrDefault(); HtmlNode imagecontainer; while ((imagecontainer = site.DocumentNode.Descendants("a").Where(A => A.Attributes["class"]?.Value?.Contains("golem-gallery2-nojs") ?? false).FirstOrDefault()) != null) { string imageHtml = "<div>\n"; var imagesFromContainer = imagecontainer.Descendants("img"); foreach (var img in imagesFromContainer) { img.Attributes["src"].Value = img.Attributes["data-src-full"]?.Value ?? img.Attributes["data-src"]?.Value; img.Attributes.Append("style", "max-width:600px"); imageHtml += img.OuterHtml + "<br/>\n"; } imageHtml += "</div>\n"; var imagesNode = HtmlNode.CreateNode(imageHtml); contentNode.ReplaceChild(imagesNode, imagecontainer); } if (contentNode != null && !string.IsNullOrEmpty(contentNode.InnerText) && !string.IsNullOrWhiteSpace(HtmlEntity.DeEntitize(contentNode.InnerText))) { var nodes = contentNode.Elements("div").ToList(); foreach (var delNode in nodes) { contentNode.ChildNodes.Remove(delNode); } result.ContentRaw = string.Format(GenericHtml.HTML, contentNode.InnerHtml); } Func<string, string> getMetaContentByName = (string name) => site.DocumentNode.Descendants("meta") ?.Where(A => A.Attributes["name"]?.Value?.ToLower() == name && !string.IsNullOrEmpty(A.Attributes["content"]?.Value)) ?.FirstOrDefault()?.Attributes["content"]?.Value; var twitter_image_src = getMetaContentByName("twitter:image:src"); if (!string.IsNullOrEmpty(twitter_image_src)) result.Image = twitter_image_src; } return result; }
public async Task <Product[]> GetData() { string url = config["Products:Url"]; HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); HtmlNode htmlNode = null; HtmlNode item = null; string txt; var nodes = doc.DocumentNode.SelectNodes("//div[@class='a-section a-spacing-none aok-relative']").ToList(); Product[] products = new Product[nodes.Count]; Product product = null; // for (int i = 0; i < nodes.Count; i++) { try { item = nodes[i]; product = new Product { Id = Guid.NewGuid().ToString() }; product.Name = "book"; htmlNode = item.ChildNodes["span"].ChildNodes["a"].ChildNodes["span"].ChildNodes["div"].ChildNodes["img"]; product.ImageUrl = htmlNode.Attributes["src"].Value; product.Title = htmlNode.Attributes["alt"].Value; if (item.ChildNodes["span"].ChildNodes["div"].ChildNodes["a"] != null) { product.Author = item.ChildNodes["span"].ChildNodes["div"].ChildNodes["a"].InnerText; } else { product.Author = item.ChildNodes["span"].ChildNodes["div"].ChildNodes["span"].InnerText; } if (item.ChildNodes["span"].ChildNodes.Count >= 6) { product.Kind = item.ChildNodes["span"].ChildNodes[5].ChildNodes["span"].InnerText; } else { product.Kind = item.ChildNodes["span"].ChildNodes[2].ChildNodes["span"].InnerText; } htmlNode = item.SelectNodes(".//span[@class='a-size-base a-color-price']").FirstOrDefault(); txt = htmlNode.InnerText.Split("&")[0]; product.Price = double.Parse(txt.Substring(1)); product.Currency = txt.Substring(0, 1); product.RatingUnit = "out of 5 stars"; if (item.ChildNodes["span"].ChildNodes[3].ChildNodes["a"].Attributes["title"] != null) { txt = item.ChildNodes["span"].ChildNodes[3].ChildNodes["a"].Attributes["title"].Value; product.Rating = double.Parse(txt.Split(" ")[0]); } products[i] = product; } catch (Exception e) { throw new Exception("ProductScraping.GetData: " + e.Message); } } return(products); }
/// <summary> /// The scrape. /// </summary> /// <returns> /// The <see cref="Task" />. /// </returns> public static async Task<BandwidthResults> Scrape() { var web = new HtmlWeb(); IPropertySet settings = ApplicationData.Current.LocalSettings.Values; string siteToLoad = (string)settings["user"] == "testuser" ? "http://alexmullans.com/bandwidth.html" : "http://netreg.rose-hulman.edu/tools/networkUsage.pl"; HtmlDocument doc = await web.LoadFromWebAsync( siteToLoad, new UTF8Encoding(), (string)settings["user"], (string)settings["pass"], "rose-hulman"); return ParseBandwidthDocument(doc); }
public static async Task<IEnumerable<Incident>> GetIncidentsAsync(int currentPage = IncidentsDefaultCurrentPage, int itemsPerPage = IncidentsDefaultItemsPerPage, IncidentsSortOrder sortOrder = IncidentsDefaultSortOrder) { if (currentPage < 0) { throw new ArgumentOutOfRangeException("currentPage"); } if (itemsPerPage < 0) { throw new ArgumentOutOfRangeException("itemsPerPage"); } string sortOrderString = ""; switch (sortOrder) { case IncidentsSortOrder.Name: sortOrderString = "incident_name"; break; case IncidentsSortOrder.County: sortOrderString = "incident_county"; break; case IncidentsSortOrder.AdministrativeUnit: sortOrderString = "incident_administrative_unit"; break; case IncidentsSortOrder.DateStarted: sortOrderString = "incident_date_created"; break; case IncidentsSortOrder.DateLastUpdated: sortOrderString = "incident_date_last_update"; break; case IncidentsSortOrder.Priority: default: sortOrderString = "incident_priority"; break; } // Download the webpage string url = String.Format(GetIncidentsUrl, currentPage, itemsPerPage, sortOrderString); HtmlWeb web = new HtmlWeb(); HtmlDocument doc = await web.LoadFromWebAsync(url); List<Incident> incidents = new List<Incident>(); // Find incident tables foreach (HtmlNode tableNode in doc.DocumentNode.Descendants("table")) { if (tableNode.HasAttributes && tableNode.Attributes.Contains("class") && tableNode.Attributes["class"].Value == "incident_table") { string name = tableNode.Attributes["title"].Value; if (name != "Search for a fire") { int id = Incident.InvalidId; Dictionary<string, string> details = ParseIncidentTable(tableNode); // Parse out the hidden id if (details.ContainsKey(IncidentIDKey)) { id = Int32.Parse(details[IncidentIDKey]); } Incident incident = (id == Incident.InvalidId) ? new Incident(name, details) : new Incident(id, name, details); incidents.Add(incident); } } } return incidents; }
public async Task<int> HTMLDownload (string stateRequested) { Log.Debug (TAG, "HTMLDownloadRunning"); HtmlWeb htmlWeb = new HtmlWeb (); HtmlDocument htmlDoc = new HtmlDocument (); DateTime currentDateTime = DateTime.Now; TimeSpan currentTime = currentDateTime.TimeOfDay; int hourIndex = 3; int completeStatus = 0; if (regionsList.Count != 0) { regionsList.Clear (); APIList.Clear (); } // regionEntry.Clear (); // latestAPI.Clear(); string hourRegion = string.Empty; //A problem will occur if it's 1st day of month at 12AM. Date is 0-3-2016 string currentDay = string.Empty; if (currentDateTime.Day == 1) { currentDay = currentDateTime.Day.ToString ("D2"); } else { currentDay = currentTime.Hours == 0 ? (currentDateTime.Day - 1).ToString ("D2") : currentDateTime.Day.ToString ("D2"); } string date = currentDateTime.Year.ToString () + "-" + currentDateTime.Month.ToString ("D2") + "-" + currentDay; int currentHour = currentTime.Hours == 0 ? 24 : currentTime.Hours; if (currentHour > 0 && currentHour <= 6) { hourRegion = "hour1"; hourIndex += currentHour - 1; } else if (currentHour > 6 && currentHour <= 12) { hourRegion = "hour2"; hourIndex += (currentHour - 6 - 1); } else if (currentHour > 12 && currentHour <= 18) { hourRegion = "hour3"; hourIndex += (currentHour - 12 - 1);} else if (currentHour > 18 && currentHour <= 24) { hourRegion = "hour4"; hourIndex += (currentHour - 18 - 1);} string urlConstruct = "http://apims.doe.gov.my/v2/" + hourRegion + "_" + date + ".html"; try { htmlDoc = await htmlWeb.LoadFromWebAsync(urlConstruct); var div = htmlDoc.GetElementbyId ("content"); var table = div.Descendants ("table").ToList () [0].ChildNodes.ToList (); foreach (var tableEntry in table) { if (tableEntry.HasChildNodes) { var rowEntry = tableEntry.ChildNodes.ToList (); var stateEntry = rowEntry [0].InnerText.ToString (); if (stateEntry == stateRequested) { regionEntry.Add (rowEntry [2].InnerText.ToString ()); latestAPI.Add (rowEntry [hourIndex].InnerText.ToString ()); } } } completeStatus = 1; } catch (Exception e) { completeStatus = 0; errorString = e.Message.ToString(); } Log.Debug (TAG, "HTMLDownloadFinish"); return completeStatus; }