public static ForumPageData ParseForumPage(HtmlDocument doc) { var top = doc.DocumentNode; AwfulForum forum = new AwfulForum(); int pageNumber = -1; // first, let's find the forum id var formNode = top.Descendants("form") .Where(node => node.GetAttributeValue("id", "").Equals("ac_timemachine")) .FirstOrDefault(); if (formNode != null) { string idString = formNode.GetAttributeValue("action", ""); // strip undesiriable stuff off idString = idString.Replace("/forumdisplay.php?", ""); idString = idString.Split('=').Last(); int id = -1; if (int.TryParse(idString, out id)) { forum.ID = id; } } // then, let's find the page number var pageNumberNode = top.Descendants("span") .Where(node => node.GetAttributeValue("class", "").Equals("curpage")) .FirstOrDefault(); if (pageNumberNode != null) { var pageNumberText = pageNumberNode.InnerText; if (!int.TryParse(pageNumberText, out pageNumber)) { pageNumber = -1; } } var page = new AwfulForumPage(forum, pageNumber); HandleMaxPages(page, top); HandleThreads(page, top); return page; }
private static void HandleMaxPages(AwfulForumPage page, HtmlNode node) { var maxPagesNode = node.Descendants("div") .Where(n => n.GetAttributeValue("class", "").Equals("pages")) .FirstOrDefault(); if (maxPagesNode == null) { Logger.AddEntry("AwfulForumPage - Could not parse maxPagesNode."); page.Parent.TotalPages = 1; } else { page.Parent.TotalPages = ExtractMaxForumPages(maxPagesNode); Logger.AddEntry(string.Format("AwfulForumPage - maxPagesNode parsed. Value: {0}", page.Parent.TotalPages)); } }
private static void HandleThreads(AwfulForumPage page, HtmlNode node) { var forumThreadsTable = node.Descendants("table") .Where(n => n.Id.Equals("forum")) .First(); var threadList = forumThreadsTable.Descendants("tbody").First(); var threadsInfo = threadList.Descendants("tr"); page.Threads = GenerateThreadData(page, threadsInfo); }
// TODO: Remember to sort thread data by new posts private static IList<AwfulThread> GenerateThreadData(AwfulForumPage page, IEnumerable<HtmlNode> threadsInfo) { Logger.AddEntry("AwfulForumPage - Generating thread data..."); List<AwfulThread> data = new List<AwfulThread>(); foreach (var node in threadsInfo) { var thread = AwfulThreadParser.ParseFromNode(page.ForumID, node); data.Add(thread); } return data; }