public static ForumUserEntity FromPost(HtmlNode postNode) { var user = new ForumUserEntity { Username = WebUtility.HtmlDecode( postNode.Descendants("dt") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("author")) .InnerHtml), DateJoined = DateTime.Parse(postNode.Descendants("dd") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("registered")) .InnerHtml) }; var avatarTitle = postNode.Descendants("dd").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("title")); var avatarImage = postNode.Descendants("dd").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("title")).Descendants("img").FirstOrDefault(); if (avatarTitle != null) { user.AvatarTitle = WebUtility.HtmlDecode(avatarTitle.InnerText).WithoutNewLines().Trim(); } if (avatarImage != null) { user.AvatarLink = FixPostHtmlImage(avatarImage.OuterHtml); } user.Id = Convert.ToInt64(postNode.DescendantsAndSelf("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("userinfo")).GetAttributeValue("class", string.Empty).Split('-')[1]); return user; }
private int GetThreadID(HtmlNode node) { var threadIDNode = node.DescendantsAndSelf() .Where(value => value.GetAttributeValue("id", "") != null) .FirstOrDefault(); string id = threadIDNode.GetAttributeValue("id", "").Trim(); id = id.Replace("thread", ""); int parsedID = -1; int.TryParse(id, out parsedID); Awful.Core.Event.Logger.AddEntry(string.Format("SAThread - ThreadID: {0}", id)); return parsedID; }
public static string ExtractViewableTextCleaned(HtmlNode root) { var chunks = new List<string>(); foreach (var item in root.DescendantsAndSelf()) { if (item.NodeType == HtmlNodeType.Text) { if (item.InnerText.Trim() != "") { chunks.Add(item.InnerText.Trim()); } } } return String.Join(" ", chunks); }
private static void ParseEcrExpress(HtmlNode column,EcrModel model) { var hrefs = column.DescendantsAndSelf().Where(x => x.Name == "a").ToList(); var index = 0; foreach (var hy in hrefs) { if (hy.InnerText != "") { if (index == 0) { model.Ecr = long.Parse(hy.InnerText); } else { model.ExpressCode = long.Parse(hy.InnerText); } index++; } } }
private static void ParseNameStantion(HtmlNode column, EcrModel result) { try { result.StationImageUrl = column.DescendantsAndSelf().FirstOrDefault(x => x.Name == "img").Attributes.FirstOrDefault().Value; result.StationName = column.InnerText.Replace("\r\n ", "").Replace(" ", ""); } catch (Exception e) { } }
public static ForumUserEntity FromPost(HtmlNode postNode) { var user = new ForumUserEntity { Username = WebUtility.HtmlDecode( postNode.Descendants("dt") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("author")) .InnerHtml) }; var dateTimeNode = postNode.Descendants("dd") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("registered")); if (dateTimeNode != null) { try { user.DateJoined = DateTime.Parse(dateTimeNode.InnerHtml); } catch (Exception) { // Parsing failed, so say they joined today. // I blame SA for any parsing failures. user.DateJoined = DateTime.UtcNow; } } HtmlNode avatarTitle = postNode.Descendants("dd") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("title")); HtmlNode avatarImage = postNode.Descendants("dd") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("title")) .Descendants("img") .FirstOrDefault(); if (avatarTitle != null) { user.AvatarTitle = WebUtility.HtmlDecode(avatarTitle.InnerText).WithoutNewLines().Trim(); } if (avatarImage != null) { user.AvatarLink = avatarImage.GetAttributeValue("src", string.Empty); } var userIdNode = postNode.DescendantsAndSelf("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("userinfo")) ?? postNode.DescendantsAndSelf("div") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("userinfo")); if (userIdNode == null) return user; var splitString = userIdNode .GetAttributeValue("class", string.Empty) .Split('-'); if (splitString.Length >= 2) { user.Id = Convert.ToInt64(splitString[1]); } // Remove the UserInfo node after we are done with it, because // some forums (FYAD) use it in the body of posts. Why? Who knows!11!1 userIdNode.Remove(); return user; }
/// <summary> /// Find all inner texts and return a simplified string. /// </summary> /// <param name="node"></param> /// <returns></returns> protected virtual string GetAllInnerTexts(HtmlNode node) { return RemoveWhitespace(string.Join(" ", node.DescendantsAndSelf() .Select(d => d.InnerText.Replace(Environment.NewLine, " ")))).Trim().ToLowerInvariant(); }
private static ThreadMetadata ParseThreadSeen(this ThreadMetadata thread, HtmlNode node) { // if the node is null, then we haven't seen this thread, otherwise it's been visited var threadSeenNode = node.DescendantsAndSelf() .Where(value => value.GetAttributeValue("class", "").Contains("thread seen")) .FirstOrDefault(); bool seen = threadSeenNode == null ? false : true; thread.IsNew = !seen; // if thread is new, all posts are new, so don't show post count if (thread.IsNew) { thread.NewPostCount = -1; thread.ShowPostCount = false; } // else parse thread count else { thread = ParseThreadCount(thread, node); } return thread; }
private void ParseThreadSeen(SAThread thread, HtmlNode node) { // if the node is null, then we haven't seen this thread, otherwise it's been visited var threadSeenNode = node.DescendantsAndSelf() .Where(value => value.GetAttributeValue("class", "").Contains("thread seen")) .FirstOrDefault(); bool seen = threadSeenNode == null ? false : true; thread.ThreadSeen = seen; // if thread is new, all posts are new, so don't show post count if (!thread.ThreadSeen) { thread.NewPostCount = -1; thread.ShowPostCount = false; Awful.Core.Event.Logger.AddEntry("SAThread - This thread is brand new! Hide the post count."); } // else parse thread count else { this.ParseThreadCount(thread, node); } }
private int ParsePostID(HtmlNode postNode) { var id = postNode.DescendantsAndSelf() .Where(node => node.GetAttributeValue("class", "").Equals("post")) .FirstOrDefault(); int result = -1; if (id != null) { string postID = id.GetAttributeValue("id", "").Replace("post", ""); int.TryParse(postID, out result); } return result; }
private Podcast GetPodcastData(HtmlNode node) { Podcast returnPodcast = new Podcast(); List<HtmlNode> nodes = new List<HtmlNode>(); nodes = node.DescendantsAndSelf().ToList<HtmlNode>(); try { returnPodcast.Title = GetTitle(node.ChildNodes.FindFirst("h3")); } catch (Exception) { } try { returnPodcast.EpisodeUri = GetEpisodeUri(node.ChildNodes.FindFirst("h3")); } catch (Exception) { } try { returnPodcast.Uri = GetUri(node.ChildNodes.FindFirst("audio")); } catch (Exception) { } try { returnPodcast.BibleReference = GetBibleReference(node.SelectSingleNode("p[@class='metadata']")); } catch (Exception) { } try { returnPodcast.Description = GetDescription(node, returnPodcast.BibleReference, node.SelectSingleNode("p[@class='metadata']").ChildNodes[0].InnerText); } catch (Exception) { } //returnPodcast.Length = GetPodcastLength(returnPodcast.Uri); return returnPodcast; }