private ClubSeasonPerformance ParseLeagueTableRow(HtmlNode tableRow) { var cells = tableRow.Elements("td").ToArray(); var name = cells[1].InnerText; return new ClubSeasonPerformance { Club = Clubs.GetClubFromName(name), HomePlayed = byte.Parse(cells[2].InnerText), HomeWon = byte.Parse(cells[3].InnerText), HomeDrawn = byte.Parse(cells[4].InnerText), HomeLost = byte.Parse(cells[5].InnerText), HomeGoalsFor = byte.Parse(cells[6].InnerText), HomeGoalsAgainst = byte.Parse(cells[7].InnerText), AwayPlayed = byte.Parse(cells[10].InnerText), AwayWon = byte.Parse(cells[11].InnerText), AwayDrawn = byte.Parse(cells[12].InnerText), AwayLost = byte.Parse(cells[13].InnerText), AwayGoalsFor = byte.Parse(cells[14].InnerText), AwayGoalsAgainst = byte.Parse(cells[15].InnerText) }; }
public static IEnumerable <HtmlNode> Elements(this HtmlNode node, string name, string className) { if (node == null) { throw new ArgumentNullException(nameof(node)); } return(node.Elements(name).Where(n => n.HasClass(className))); }
private int GetProductInfoSku(HtmlAgilityPack.HtmlDocument _doc, ref ProductInfoSku PIS, ref List <string> images, ref List <String_String> name_and_des_title_img) { /* try * {*/ HtmlNode jProductInfoSku = _doc.GetElementbyId("j-product-info-sku"); IEnumerable <HtmlNode> proItems = jProductInfoSku.Elements("dl"); foreach (HtmlNode proItem in proItems) { string name = ""; //Gonna be added to NameVal_String NameValueCollection skuList = new NameValueCollection(); int hasIMGTag = 0; name = proItem.Element("dt").InnerText.Replace(":", ""); HtmlAgilityPack.HtmlNode jSkuList = proItem.Element("dd").Element("ul"); IEnumerable <HtmlNode> itemSkus = jSkuList.Elements("li"); //itemSku: li element contains class item-sku-color foreach (HtmlNode itemSku in itemSkus) { string dataSkuId = itemSku.FirstChild.Attributes["data-sku-id"].Value; string title; if (itemSku.FirstChild.Attributes.Contains("title")) { title = itemSku.FirstChild.Attributes["title"].Value; } else { title = itemSku.FirstChild.InnerText; } skuList.Add(dataSkuId, title); //Images of colors if (itemSku.FirstChild.Descendants("img").Count <HtmlNode>() > 0) { if (itemSku.FirstChild.Element("img").Attributes.Contains("bigpic")) { string bigpicWithTail = itemSku.FirstChild.Element("img").Attributes["bigpic"].Value; string bigpicWithoutTail = bigpicWithTail.Remove(bigpicWithTail.LastIndexOf('_')); images.Add(bigpicWithoutTail); string values_s = "<tr><td>" + name + ": " + title + "</td><td><img width='480' height='480' src='" + bigpicWithTail + "'></td></tr>"; name_and_des_title_img.Add(new String_String(name, values_s)); } hasIMGTag = 1; } //Background colors else if (itemSku.FirstChild.Descendants("span").Where(n => n.Attributes.Contains("class") && n.Attributes["class"].Value.Contains("sku-color-")).Count <HtmlNode>() > 0) { MessageBox.Show(itemSku.FirstChild.Descendants("span").First().OuterHtml); HtmlNode backgroundColorCSSElem = itemSku.FirstChild.Descendants("span").Where(n => n.Attributes.Contains("class") && n.Attributes["class"].Value.Contains("sku-color-")).First(); string className = backgroundColorCSSElem.Attributes["class"].Value; string colorStyle = BackgroundColorSelector(className); if (colorStyle == "") { return(-2); } string values_s = "<tr><td>" + name + ": " + title + "</td><td width='480' height='480' style='" + BackgroundColorSelector(className) + "'></td></tr>"; name_and_des_title_img.Add(new String_String(name, values_s)); } else if (name.ToLower() == "color") { MessageBox.Show("Color not found! Liên hệ https://www.facebook.com/letruongquy96 \nOuterHTML: " + itemSku.InnerHtml); return(-1); } } NameVal_String skuList_hasIMGTag = new NameVal_String(skuList, hasIMGTag); PropertyItem tempPI = new PropertyItem(name, skuList_hasIMGTag); PIS.Add(tempPI); } /*} * catch(Exception ex) * { * MessageBox.Show("GetProductInfoSku: " + ex.Message); * return -7; * }*/ return(0); }
void GetLatestVideos(ref List<VideoInfo> result, HtmlNode li_container) { foreach (var li in li_container.Elements("li")) { if (li.GetAttributeValue("class", "").Contains("more_reiter")) { HasNextPage = true; nextPageUrl = "http://www.heise.de/video/" + li.Element("a").GetAttributeValue("href", "") + "&hajax=1"; } else { SerializableDictionary<string, string> tags = null; if (li.Element("ul") != null) { tags = new SerializableDictionary<string, string>(); foreach (var tag_li in li.Element("ul").Elements("li")) { tags.Add(tag_li.Element("a").InnerText, "http://www.heise.de" + tag_li.Element("a").GetAttributeValue("href", "")); } } var a = li.Descendants("h3").First().Element("a"); result.Add(new VideoInfo() { Title = a.InnerText, VideoUrl = "http://www.heise.de" + a.GetAttributeValue("href", ""), Thumb = "http://www.heise.de" + li.Descendants("img").First().GetAttributeValue("src", ""), Description = li.Descendants("p").First().FirstChild.InnerText.Trim(), Other = tags }); } } }
private SubsceneSubtitle CreateSubtitleFromLink(HtmlNode anchor, string link) { const string goodRatingClassName = "r100"; const string neutralRatingClassName = "r0"; var spans = anchor.Elements("span").ToArray(); var firstSpan = spans.First(); var ratingClass = firstSpan.Attributes["class"].Value; var ratingType = ratingClass.Contains(goodRatingClassName) ? 1 : ratingClass.Contains(neutralRatingClassName) ? 0 : -1; var language = firstSpan.InnerText.Trim(); var lastSpan = spans.Last(); var releaseName = RemoveComments(lastSpan?.InnerText.Trim()); var relaseIdentity = _releaseParser.ParseEpisodeInfo(releaseName); var subtitle = new SubsceneSubtitle { SubtitleLink = link, LanguageName = language, ReleaseName = releaseName, SeriesName = relaseIdentity.SeriesName, Season = relaseIdentity.Season, Episode = relaseIdentity.Episode, EndEpisode = relaseIdentity.EndEpisode, ReleaseGroup = relaseIdentity.ReleaseGroup, RatingType = ratingType }; return subtitle; }
public SearchResult(HtmlNode tr) { var img = tr.Elements("th").First().Descendants("img").SingleOrDefault(); if (img != null) ISBN = imageUrlParser.Match(img.GetAttributeValue("src", "")).Groups[1].Value; var details = tr.Elements("td").Last().Element("span"); //This is "Last, First" Author = details.FirstChild.CleanText().TrimEnd('.'); var authorTitle = details.Element("b").CleanText().Split('/'); Title = authorTitle[0].Trim(); //This is "First Last" //Author = authorTitle[0].Trim(); var publisherNode = details.Element("b").NextSibling; while (String.IsNullOrWhiteSpace(publisherNode.InnerText)) publisherNode = publisherNode.NextSibling; Year = GetYear(publisherNode.InnerText); var script = tr.Descendants("script").First(); sessionDetailsUrl = detailsUrlFinder.Match(script.InnerText).Groups[1].Value; }
private static HtmlNode ClearNodes(HtmlNode JobOfferElement) { //var trsToRemove = JobOfferElement.Elements("tr").ToList(); //JobOfferElement.RemoveChild(trsToRemove[0]); //JobOfferElement.RemoveChild(trsToRemove[1]); //JobOfferElement.RemoveChild(trsToRemove[2]); JobOfferElement = RemoveDescendants(JobOfferElement, new string[] { "a", "img", "script", "style" }); JobOfferElement.RemoveChild(JobOfferElement.Element("tr")); var trS = JobOfferElement.Elements("tr").ToList(); bool removeNext = false; foreach (var item in trS) { if (removeNext == false) { if (item.Descendants().Where( d => (d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("button_new")) ).Count() > 0) { removeNext = true; } } if (removeNext == true) { JobOfferElement.RemoveChild(item); } } return JobOfferElement; }
internal List<SongModel> ParseSearchedSongTable(HtmlNode table) { string temp = table.InnerHtml; foreach(Match m in Regex.Matches(temp, @"(<tbody\s)[\s\S]+?(/tbody>)")) { StringBuilder sb = new StringBuilder(); sb.Append("</tbody>"); sb.Append(m.Value); sb.Append("<tbody>"); temp = temp.Replace(m.Value, sb.ToString()); } table.InnerHtml = temp; List<SongModel> res = new List<SongModel>(); foreach(var node in table.Elements("tbody")) { if (node.GetAttributeValue("class", "").Contains("same_song_group")) res.AddRange(node.Elements("tr").Select((tr) => { var song = ParseSearchedSong(tr); song.DuplicateOf = res[res.Count - 1]; return song; })); // 不全部返回的话会造成歌曲数目不够,PageItem加载时会产生Exception else res.AddRange(node.Elements("tr").Select((tr) => ParseSearchedSong(tr))); }; return res; }
internal SongModel ParseSearchedSong(HtmlNode tr) { //System.Diagnostics.Debugger.Break(); var tds = tr.Elements("td").ToList(); var checkbox = tds[0].Descendant("input"); SongModel song = SongModel.GetNew(uint.Parse(checkbox.GetAttributeValue("value", "0"))); song.Available = checkbox.GetAttributeValue("checked", "") == "checked"; var links = tds[1].SelectNodes("./a[@target='_blank']"); song.NameHtml = links[0].InnerHtml; if (links.Count > 1) song.MV = MVModel.GetNew(ParseXiamiIDString(links[1].GetAttributeValue("href", "/0"))); var anode = tds[3].Element("a"); var album = AlbumModel.GetNew(ParseXiamiID(anode.GetAttributeValue("href", "/0"))); album.NameHtml = anode.InnerHtml.Replace("《", "").Replace("》", ""); var arnode = tds[2].Element("a"); album.Artist = ArtistModel.GetNew(ParseXiamiID(arnode.GetAttributeValue("href", "/0"))); album.Artist.NameHtml = arnode.InnerHtml; song.Album = album; return song; }
private VideoInfo getVideoInfo(HtmlNode itemDiv) { VideoInfo video = new VideoInfo(); video.VideoUrl = itemDiv.Elements("div").Last().GetAttributeValue("arte_vp_url", ""); video.Length = itemDiv.Descendants("div").Where(d => d.GetAttributeValue("class", "").Contains("badge-holder")).FirstOrDefault().Element("div").NextSibling.InnerText.Trim().Trim('"').Trim(); video.Airdate = itemDiv.Descendants("p").FirstOrDefault().ChildNodes.LastOrDefault().InnerText.Trim(); video.Title = itemDiv.Descendants("h3").FirstOrDefault().InnerText.Trim(); video.Thumb = itemDiv.Element("img").GetAttributeValue("src", ""); video.Description = itemDiv.GetAttributeValue("data-description", ""); return video; }
private IEnumerable<Task<ItemDB.Gem>> ParseGemTable(HtmlNode table) { return from row in table.Elements("tr").Skip(1) select row.ChildNodes[0] into cell select cell.SelectNodes("span/a[not(contains(@class, 'image'))]")[0] into nameNode select _gemReader.FetchGemAsync(nameNode.InnerHtml); }
public static HtmlNode Element(this HtmlNode node, string name, string className) { return(node.Elements(name, className).FirstOrDefault()); }
/// <summary> /// Gets all the column values from the li node (Aggregate) /// </summary> /// <param name="node">The Aggregate Node</param> /// <param name="columnNumber">Desired Columne Number, 'all' or'1' for the name column</param> /// <returns>Array of column values</returns> /// <remarks> /// Abstracted the getting the column values to here to follow OO SRP and /// the column tag names are only maintained in a single place. /// </remarks> private string[] GetColumnValues(HtmlNode node, string columnNumber = "all") { // Using an array we can guarantee the order of the column values var columnValues = new string[10]; // Extract the elements from the column <p> tags var columns = node.Elements("p"); // Use foreach so there is only one iteration over the collection // then we don't need to use .ToArray() to allow indexing; foreach (var column in columns) { if (column.Attributes.Count > 0) { var colName = column.Attributes[0].Value; var colAttributes = colName.Split(' '); var colId = colAttributes[0]; switch (colId) { case "panel-column1": /* * HTML Agility Pack has a utility class HtmlEntity containing a static class * DeEntitize, which decode html elements like replacing them with * the appropriate character. * */ var trimChars = new char[] { '\r', '\n', ' ' }; //var colValue = column.InnerText; //var colValueTrimmed = colValue.Trim(trimChars); //columnValues[0] = HtmlAgilityPack.HtmlEntity.DeEntitize(colValueTrimmed); // If only interested in the name, the first column, then exit immediately columnValues[0] = HtmlAgilityPack.HtmlEntity.DeEntitize(column.InnerText.Trim(trimChars)); if (columnNumber != "all") return columnValues; break; case "panel-column2": columnValues[1] = column.InnerText; break; case "panel-column3": columnValues[2] = HtmlEntity.DeEntitize(column.InnerText); break; case "panel-column4": columnValues[3] = column.InnerText; break; case "panel-column5": columnValues[4] = column.InnerText; break; case "panel-column6": columnValues[5] = column.InnerText; break; case "panel-column7": columnValues[6] = column.InnerText; break; case "panel-column8": columnValues[7] = column.InnerText; break; case "panel-column9": columnValues[8] = column.InnerText; break; case "panel-column10": columnValues[9] = column.InnerText; break; } } } return columnValues; }