Esempio n. 1
0
        public static ForumPageMetadata ParseForumPage(HtmlDocument doc)
        {
            var top = doc.DocumentNode;
            var page = new ForumPageMetadata();
            int pageNumber = -1;

            // first, let's find the forum id
            var formNode = top.Descendants("form")
                .Where(node => node.GetAttributeValue("id", "").Equals("ac_timemachine"))
                .FirstOrDefault();

            if (formNode != null)
            {
                string idString = formNode.GetAttributeValue("action", "");
                // strip undesiriable stuff off
                idString = idString.Replace("/forumdisplay.php?", "");
                idString = idString.Split('=').Last();
                page.ForumID = idString;
            }

            // then, let's find the page number
            var pageNumberNode = top.Descendants("span")
                .Where(node => node.GetAttributeValue("class", "").Equals("curpage"))
                .FirstOrDefault();

            if (pageNumberNode != null)
            {
                var pageNumberText = pageNumberNode.InnerText;
                if (!int.TryParse(pageNumberText, out pageNumber)) { pageNumber = -1; }
            }

            page.PageNumber = pageNumber;

            HandleMaxPages(page, top);
            HandleThreads(page, top);
            return page;
        }
Esempio n. 2
0
        // TODO: Remember to sort thread data by new posts
        private static IList<ThreadMetadata> GenerateThreadData(ForumPageMetadata page, IEnumerable<HtmlNode> threadsInfo)
        {
            //Logger.AddEntry("AwfulForumPage - Generating thread data...");

            List<ThreadMetadata> data = new List<ThreadMetadata>();
            foreach (var node in threadsInfo)
            {
                var thread = ThreadParser.ParseThread(node);
                data.Add(thread);
            }

            return data;
        }
Esempio n. 3
0
        private static void HandleThreads(ForumPageMetadata page, HtmlNode node)
        {
            var forumThreadsTable = node.Descendants("table")
                   .Where(n => n.Id.Equals("forum")).FirstOrDefault();

            // do we have any thread items to parse?
            if (forumThreadsTable != null)
            {
                var threadList = forumThreadsTable.Descendants("tbody").First();
                var threadsInfo = threadList.Descendants("tr");
                page.Threads = GenerateThreadData(page, threadsInfo);
            }
            else { page.Threads = new List<ThreadMetadata>(); }
        }
Esempio n. 4
0
        private static void HandleMaxPages(ForumPageMetadata page, HtmlNode node)
        {
            var maxPagesNode = node.Descendants("div")
                .Where(n => n.GetAttributeValue("class", "").Contains("pages"))
                .FirstOrDefault();

            if (maxPagesNode == null)
            {
                //Logger.AddEntry("AwfulForumPage - Could not parse maxPagesNode.");
                page.PageCount = 1;
            }
            else
            {
                page.PageCount = ExtractMaxForumPages(maxPagesNode);
                //Logger.AddEntry(string.Format("AwfulForumPage - maxPagesNode parsed. Value: {0}", page.Parent.TotalPages));
            }
        }
Esempio n. 5
0
        private static void HandleFilters(ForumPageMetadata page, HtmlNode top)
        {
            var tagsListNode = top.Descendants("div")
                .Where(node => node.GetAttributeValue("class", "")
                    .Equals("thread_tags"))
                .FirstOrDefault();

            if (null != tagsListNode)
            {
                var filterNodes = tagsListNode.Descendants("a").ToList();
                var filters = new List<FilterTagMetadata>(filterNodes.Count) { FilterTagMetadata.NoFilter };
                page.Filters = filters;
                foreach (var filterNode in filterNodes)
                {
                    string href = filterNode.GetAttributeValue("href", string.Empty);
                    string title = filterNode.FirstChild.GetAttributeValue("title", string.Empty);
                    string src = filterNode.FirstChild.GetAttributeValue("src", string.Empty);
                    FilterTagMetadata filter = new FilterTagMetadata()
                    {
                        FilterUri = WebUtility.HtmlDecode(href),
                        Title = WebUtility.HtmlDecode(title),
                        TagUri = WebUtility.HtmlDecode(src)
                    };

                    filters.Add(filter);
                }
            }
        }
Esempio n. 6
0
        private static void HandleThreads(ForumPageMetadata page, HtmlNode node)
        {
            var forumThreadsTable = node.Descendants("table")
                   .Where(n => n.Id.Equals("forum"))
                   .First();

            var threadList = forumThreadsTable.Descendants("tbody").First();
            var threadsInfo = threadList.Descendants("tr");

            page.Threads = GenerateThreadData(page, threadsInfo);
        }